Module:Str find word/sandbox

-- 2023-04-17 STABLE wrt basics, quotes "" '' * with base sep; working on resltstring & report
-- todo: report options, more options
-- todo: options count, pattern, out-table, out-htmllist, keepinputordersource
require('strict')
local p	= {}
local mArgs		= require('Module:Arguments')
local str		= require('Module:String')
local yesno 	= require('Module:Yesno')
local tTools 	= require('Module:TableTools')
local strDeEnCode = require('Module:DecodeEncode')
local iMaxWords	= 12 -- alpha-status, Apr2023. when stable, can be higher
local tArgs		= {}
local report	= nil -- initinated when explain=T

local function parseReportType( tArgs )
local xpReportTF = false
local xpReportType = yesno( tArgs.explain, tArgs.explain ) or false -- to be parsed beyond T/F
-- in: nil, false: FALSE type=nil
-- in: true, preview: type=true TRUE (dflt: if prev)
-- in: doc, testcases: by page TRUE (persistent on those pages)
-- in: foo, other: FALSE

	xpReportTF = false
	if yesno( xpReportType, false ) == nil then -- nil, false
	elseif xpReportType == 'testcases' then
		xpReportType = 'testcases'
		xpReportTF = true
	elseif xpReportType == 'doc' then
		xpReportType = 'doc'
		xpReportTF = true
	elseif xpReportType == true then
		xpReportType = 'preview'
		xpReportTF = true
	else
		xpReportTF = false -- unk word
	end
	tArgs.explain = xpReportTF
	return xpReportType

end

local function initReport( tArgs )
	report = require('Module:Str find word/report')
	report.xpCheckExplain() -- dummy
end

local function isPreview( ) -- here or in report?
local ifPreview = require('Module:If preview')
	-- return not ( ifPreview._warning( {'is_preview'} ) == '' )
	return ifPreview.main( true, false )
end

-- Turn "A" into "A" etc. asap
-- and reduce multi-spaces (including nbsp etc.) into single space
local function sDecodeTrim( str )
	if str == nil then return nil end
	str = mw.ustring.gsub( strDeEnCode._decode( str ), '%s+' , ' ' )
	return mw.text.trim( str )
end

-- %-Escape any word (character string) before feeding it into a string pattern function
-- will be %-escaped: "([%(%)%.%%%+%-%*%?%[%^%$%]])" = 12 characters ().%+-*?[^$]
local function escape_word( word )
	return str._escapePattern( word )
end

-- remove \' \" outer pair (& rm outer spaces);
-- any result (=the inner string) is trimmed by T/F option (case " abc ").
local function removeOuterQuotes( s, bTrimAfter )
	if s == nil then return nil end

	if mw.ustring.match( s, "^%s*\'" ) ~= nil then
		s = mw.ustring.gsub( s, "^%s*%\'(.*)%\'%s*$", "%1" )
	elseif mw.ustring.match( s, '^%s*\"' ) ~= '' then
		s = mw.ustring.gsub( mw.text.trim( s ), '^%\"(.*)%\"$', '%1' )
	end
	if bTrimAfter == true then
		s = mw.text.trim( s )
	end
	return s
end

-- separator-in
-- todo: check characters '" _ {}(); & accept?'
local function setSepIn( sSep, sDefaultSep )
	if sSep == nil then	return sDecodeTrim( sDefaultSep ) end
	-- remove all %w (alphanumeric) and %s (WS)
	sSep = mw.ustring.gsub( sDecodeTrim( sSep ), '[%w%s]*', '' ) or ''
	if sSep == '' then
		return sDecodeTrim( sDefaultSep )
	else
		return sSep
	end
end

-- separator
local function setSepOut( sSep, sDefaultSep )
	sSep = sDecodeTrim( sSep ) or nil
	if sSep == nil then return sDefaultSep end
	sSep = removeOuterQuotes( sSep, false )
	if sSep == '' then
		return sDefaultSep
	else
		return sSep
	end
end

-- Check whether a single word is in a table (simple array of words)
-- returns hitword or nil; iPosition is helper to keep outlist ordered
local function findWordInTable( tSource, word )
---local bHit = false
---local iPosition = -1
	for i, v in ipairs( tSource ) do
		if v == word then
		---	bHit = true --- del todo
			---iPosition = i
			return word
			--- break
		end
	end

	return nil
end

-- Reads and parses a word list and returns a table with words (simple array)
-- words list can be: source, andwords-to-check, orwords-to-check
-- step 1: basic preparation of the csv wordstring
-- step 2: when case-insensitive, turn string into lowercase
-- step 3: read (parse) quoted '..'
-- step 4: read (parse) quoted ".."
-- step 5: read (parse) comma-separated words
-- step 6: merge quoted wordlists; keep in order
-- step 7: when booleans=T, change boolean words into true/false (module:yesno rules)
-- step 8: replace synonyms (by inout "|_nov=November, 11" input)
-- step 9: remove duplicates from wordtable (rm latest)
-- 		all words returned are trimmed
-- return the table (a straight array)
local function buildWordTable( sWordlist )
local wordTable = {}
local hitWord	= ''
local hitCount	= -1
local _
local sPattern
local cQ1 = '_Q0027_' -- U+0027 = \'
local cQ2 = '_Q0022_' -- U+0022 = \"
local tQ1hits	= {} -- Q1-hits, reused to restore order
local tQ2hits	= {} -- Q2-hits, reused to restore order
local sMsg = '' -- xpmessage only
local xpHasQuotes = false

	-- Step 1: prepare sWordList
	sDecodeTrim( sWordlist )
	if sWordlist == '' or sWordlist == nil then return wordTable end
	sWordlist = tArgs.sep .. sWordlist .. tArgs.sep
	-- test. dev only:
	xpHasQuotes = mw.ustring.match( sWordlist, '[\"\']' ) ~= '' -- unused
	if xpHasQuotes then 
		--- report.xpMessage( 'xpHasQuotes [unused]: ' .. tostring( xpHasQuotes ) )
	end

	-- Step 2: case sensitive
	if yesno( tArgs.case, true ) == false then
		sWordlist = string.lower( sWordlist )
	end

	-- Step 3: Q1 read quotes (single quotes '..')
	sPattern = '%f[^' .. tArgs.sep_pattern .. ']%s*%b\'\'%s*%f[' .. tArgs.sep_pattern .. ']'
	-- initial: 
	hitWord = sDecodeTrim( mw.ustring.match( sWordlist, sPattern ) ) or ''
	while hitWord ~= '' do
		--- now into function/ to check if both \' and \" are not mixed
		---	hitWord = sDecodeTrim( mw.ustring.gsub( hitWord, "^%\'(.+)%\'$", "%1" ) ) -- remove outer Qs \"
		hitWord = removeOuterQuotes( hitWord, true )
		table.insert( tQ1hits, hitWord )
		sWordlist = mw.ustring.gsub( sWordlist, sPattern, cQ1, 1 ) -- removes current 1st hit; replace with code

		-- next
		hitWord = sDecodeTrim( mw.ustring.match( sWordlist, sPattern ) ) or ''
	end
	---	report.xpMessage( 'sWL1: ' .. sWordlist )
	---	report.xpMessage( 'Qhits: ' .. table.concat( tQ1hits, '; ' ) )
	
	-- Step 4: Q2 read quotes (double quotes "..")
	sPattern = '%f[^' .. tArgs.sep_pattern .. ']%s*%b\"\"%s*%f[' .. tArgs.sep_pattern .. ']'
	-- initial search
	hitWord = sDecodeTrim( mw.ustring.match( sWordlist, sPattern ) ) or ''
	while hitWord ~= '' do
		--- hitWord = sDecodeTrim( mw.ustring.gsub( hitWord, '^%\"(.+)%\"$', '%1' ) ) -- remove outer Qs \"
		hitWord = removeOuterQuotes( hitWord, true )
		table.insert( tQ2hits, hitWord )
		sWordlist = mw.ustring.gsub( sWordlist, sPattern, cQ2, 1 ) -- removes current '1st' hit; replace with code
		-- next
		hitWord = sDecodeTrim( mw.ustring.match( sWordlist, sPattern ) ) or ''
	end
	---report.xpMessage( 'sWL2:' .. sWordlist )
	---report.xpMessage( 'Qhits: ' .. table.concat( tQ2hits, '; ' ) )

	-- Step 5: parse plain sep-delimited words
	sPattern = '%f[^' .. tArgs.sep_pattern .. '][^' .. tArgs.sep_pattern .. ']+%f[' .. tArgs.sep_pattern .. ']'
	hitCount = 0
	while hitCount < iMaxWords do
		hitWord = sDecodeTrim( str._match( sWordlist, sPattern, 1, hitCount + 1, false, tArgs.sep ) ) or ''
		
		if hitWord == sDecodeTrim(tArgs.sep) then
			-- no more words found in the string
			break
		elseif hitWord ~= '' then
			hitCount = hitCount + 1
			table.insert( wordTable, hitWord )
		else -- blank word, to skip (note: but blank quotes as in .., " ", ..are kept = blank dcell '')
			hitCount = hitCount + 1
		end
	end
	if hitCount >= iMaxWords then report.xpMessage( 'ERR701 wordcount ' .. hitCount .. ' > maxwords' .. iMaxWords ) end

	-- Step 6: merge quoted words & wordtable, keep order
	for iQ, sQW in ipairs( tQ1hits ) do
		for iW, sW in ipairs( wordTable ) do
			if sW == cQ1 then
				wordTable[iW] = sQW
				break
			end
		end
	end
	for iQ, sQW in ipairs( tQ2hits ) do
		for iW, sW in ipairs( wordTable ) do
			if sW == cQ2 then
				wordTable[iW] = sQW
				break
			end
		end
	end

	-- Step 7: when read as booleans, converse words to true/false
	if tArgs.booleans then
		local sBool
		for i, v in ipairs( wordTable ) do
			sBool = yesno( v )
			if sBool ~= nil then
				wordTable[i] = tostring( sBool )
			end
		end
	end

	-- Step 8: replace synonyms
	if #tArgs['synonymsTables'] >= 1 then
		for aka1, tAkas in pairs ( tArgs['synonymsTables'] ) do
			for iW, w in ipairs( wordTable ) do
				if findWordInTable( tAkas, w ) then -- todo must be ... ~= nil ??? 26-3
					wordTable[iW] = aka1
				end
			end
		end
	end

if true then
	wordTable = tTools.removeDuplicates( wordTable )
else -- lol works but not needed, use ttools
	-- Step 9: remove duplicates from list
	local iR, iK -- iR = reader, iK = killer
	local hit = false
	iR = 1
	while iR < #wordTable do
		iK = #wordTable -- will be counting downwards
		while iK > iR do
			if wordTable[iK] == wordTable[iR] then
				hit	= true
				sMsg = sMsg .. '=syn=' .. wordTable[iK]
				table.remove( wordTable, iK )
				tTools.compressSparseArray( wordTable )
			end
			iK = iK - 1
		end
		tTools.compressSparseArray( wordTable )
		iR = iR + 1
	end
end	

	return wordTable
end

-- AND-logic with ANDwords words: ALL words must be found
-- returns {T/F, hittable}
-- 		T when *all* AND words are found
-- 		hittable with all hit words
-- note 1: when F, the hittable still contains the words that were found
-- note 2: empty AND-wordlist => True by logic (because: not falsified)
local function checkANDwords( tWorkf )
local bANDchk	= true -- main conclusion
local result1 = nil -- per word hit
local tHits	= {} -- hit table
---local iPos	= -1 -- helper info just to keep in order

	if #tWorkf.ANDwords > 0 then
		bANDchk = true
		for i, word in ipairs( tWorkf.ANDwords ) do
			result1 = findWordInTable( tWorkf.SOURCEwords, word ) or nil
			if result1 == nil then
				bANDchk = false -- Falsified!
				-- We could break now logically, but we continue to complete the hit table (feature)
				-- bAND remains false till & at end of loop
			else
				table.insert( tHits, result1 )
			end
		end
	else
		bANDchk = true -- not falsified
	end
	tTools.compressSparseArray( tHits )
	return bANDchk, tHits
end

-- OR-logic with tORwords words: at least one word must be found
-- returns {T/F, hittable}
-- 		True when at least one OR word is found
-- 		hittable has all hit words
-- note 1: empty OR-wordlist => True by logic (because: not falsified)
-- note 2: while just one hitword is a True result, the hittable contains all words found
local function checkORwords( tWork )
local result1
local bORchk
local tHits

	bORchk = false
	tHits = {}
	result1 = nil
	if #tWork.ORwords > 0 then
		for i, word in ipairs( tWork.ORwords ) do
			result1 = findWordInTable( tWork.SOURCEwords, word ) or nil
			if result1 == nil then
				-- this one is false; bOR unchanged; do next
			else
				bORchk = true -- Confirmed!
				table.insert( tHits, result1 )
				-- could break here logically, but complete the check; bOR will not be set to False 
			end
		end
	else
		bORchk = true
	end
	tTools.compressSparseArray( tHits )
	return bORchk, tHits
end

-- Determine the requested return value (a string)
-- sRESULTstring is the _main return value (logically defined value)
-- this function applies tArgs.out_true / tArgs.out_false return value
-- note: out_true='' implies: blank return value
-- note: no parameter out_true= (that is, out_true=nil) implies: by default, return the sRESULTstring
--- todo add pref, suff
local function yesnoReturnstring( tResults )
	if tResults.resultALL == false then -- result False 
		return tArgs.out_false or ''
	else -- result True
		if tArgs.out_true == nil then
			return table.concat( tResults.tTRUE, tArgs.out_sep ) 
		else -- some |out-true= value is entered, could be ''
			return '_out-true' .. tArgs.out_true
		end
	end
end

local function tCombinedSourceorderedTRUEtables( tResult )
local tOut = {}
	if tResult.tANDhits == nil then
		tOut = tResult.tORhits
	elseif tResult.tORhits == nil then
		tOut = tResult.tANDhits
	else
		tOut = tResult.tANDhits
		for i, v in ipairs( tResult.tORhits ) do
			table.insert( tOut, i, v )
		end
	end
	if tOut == nil then 
		report.xpMessage( 'ERR921 BUG tOut is nil??? - tCombinedSourceorderedTRUEtables' )
	end
	return tOut -- unsorted; never nil
end

local function concatAndLists( s1, s2 )
	local tLists = {} -- args in: both s1 and s2 to concat
	table.insert( tLists, s1 )
	table.insert( tLists, s2 )
	return table.concat( tLists, tArgs.sep )
end

-- ===== ===== ===== ===== ===== ===== ===== ===== ===== 
-- PARSE arguments
local function parseArgs( origArgs )
local tNewArgs = {}
local tDefault	= {}
	tDefault['sep']			= ','
	tDefault['case']		= false
	tDefault['booleans']	= false
	tDefault['out_sep']		= ', '

	tNewArgs.sep			= setSepIn( origArgs['sep'], tDefault['sep'] )
	tNewArgs.sep_pattern	= escape_word( tNewArgs.sep )
	tNewArgs.out_sep		= setSepOut( origArgs['out-sep'] or origArgs['sep'], tDefault['out_sep'] )
	tNewArgs.case			= yesno( origArgs['case'] or origArgs['casesensitive'] ) or tDefault['case']
	tNewArgs.booleans		= yesno( origArgs['bool'] or origArgs['booleans'] ) or tDefault['booleans']
	tNewArgs.out_true		= sDecodeTrim( origArgs.out_true ) or nil -- nil =default so return sRESULTstring; keep '' as legal input & return value
	tNewArgs.out_false		= sDecodeTrim( origArgs.out_false ) or ''
	tNewArgs.prefix			= sDecodeTrim( origArgs.prefix or origArgs.p ) or ''
	tNewArgs.suffix			= sDecodeTrim( origArgs.suffix or origArgs.s ) or ''
	tNewArgs.out_format		= 'default' -- todo: table, default, htmllisttype, flatlidt , first, 
	tNewArgs.explain		= false -- TEST17Apr origArgs.explain
	tNewArgs.explain_type	= parseReportType( tNewArgs ) or nil
	tNewArgs.test			= origArgs.test

	-- the wordlists:
	tNewArgs['source']		= origArgs['source'] or origArgs['s'] or ''
	tNewArgs['sANDlist']	= concatAndLists( 
								origArgs['word'] or origArgs['w'] or nil,
								origArgs['andwords'] or origArgs['andw'] or nil )
	tNewArgs['sORlist']		= origArgs['orwords'] or origArgs['orw'] or ''

	tNewArgs['synonyms']		= {}
	tNewArgs['synonymsTables']	= {} -- to be populated later
	for k, v in pairs( origArgs ) do
		if str._match( k, '^_%S', 1, 1, false, false ) then
			local syn1
			syn1 = mw.ustring.gsub( k, '^_', '', 1 )
			table.insert( tNewArgs['synonyms'], syn1 )
			tNewArgs['synonyms'][syn1] = v
		end
	end

	if tNewArgs.explain == true then 
		initReport( tNewArgs.explain )
		report.xpMessage( 'EXPLAIN: ' .. origArgs.explain .. '=>' .. tNewArgs.explain_type or 'unk')
		report.xpReportSynonyms( tNewArgs )
	end

if false then
	for aka1, sAkalist in pairs ( tNewArgs['synonyms'] ) do
		report.xpMessage( 'SYNONYMS: ' .. aka1 .. '=' .. sAkalist )
	end
end

	return tNewArgs
end

-- ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== =====
-- _main function: check for presence of words in source string
-- Checks and returns:
-- 		when T: the string of all hitwords ( default ), or the |yes=... input
-- 		when F: empty string '' ( default ), or the |no=... input
-- steps:
-- 1. input word strings are prepared ( parsed into an array of words )
-- 2. words checks are made ( applying AND-logic, OR-logic )
-- 3. final conclusion drawn ( T/F )
-- 4. optionally, the preview report is prepared ( debug, feedback )
-- 5. based on T or F status, the return value ( string ) is established and returned
-- note 1: each return value ( yes=.., no=.. ) can be '' ( nullstring )
function p._main( origArgs )
local tWork = {}
local tResults = {}

	tArgs = parseArgs( origArgs )
	
	-- make synonyms into tables
	-- 'aka1' = target synonym (= the synonym that remains)
	for aka1, sAkalist in pairs( tArgs['synonyms'] ) do
		tArgs['synonymsTables'][aka1] = buildWordTable( tArgs['synonyms'][aka1] )
	end

	-- build the worktables
	tWork['SOURCEwords']	= buildWordTable( tArgs.source )
	tWork['ANDwords']		= buildWordTable( tArgs.sANDlist )
	tWork['ORwords']		= buildWordTable( tArgs.sORlist )

	-- apply logic & conclude
	tResults.resultALL = nil -- best be set explicitly
	if ( #tWork.SOURCEwords == 0 ) or ( #tWork.ANDwords + #tWork.ORwords == 0 ) then
		-- No words to check
		tResults.resultALL = false
		if yesno( tArgs.explain, true ) then
			report.xpMessage( 'ERR201 No words to check' ) 
		end
	else
		tResults['bAND'], tResults['tANDhits']	= checkANDwords( tWork )
		tResults['bOR'],  tResults['tORhits']	= checkORwords( tWork )
		tResults.resultALL = ( tResults.bAND ) and ( tResults.bOR )
	end

	tResults.sRESULTstring = 'notinit'
	if tResults.resultALL == true then
		tResults.tTRUE = tCombinedSourceorderedTRUEtables( tResults ) or {}
	end
	tResults.sRESULTstring = yesnoReturnstring( tResults )

	local sReport  = ''
	if tArgs.explain then
		sReport = 'xp endfinal Report here L485'
		--sReport = report.xpPresent( tArgs, tWork, tResults )
	end
	
local test = 'Tunk'
test  = tArgs.test or '_unk'

if tArgs.explain then
	test = tostring(tArgs.explain)
else
	test = 'not'
end
	return string.upper( tostring( tResults.resultALL ) )  .. tResults.sRESULTstring
end

function p.main( frame )
local origArgs	= mArgs.getArgs( frame )

	return p._main( origArgs )
end

return p

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.