Module:अक्षर गणना

मुक्त ज्ञानकोश विकिपीडिया से

--[[Module for counting akshars in input text
	Call using {{#invoke:modname|count|param1|param2}}

	param1:
	String
	input text
	Default: '' (Empty string)
	Plain text: fully supported
	Wikitext: Partly supported (see Support below)

	param2:
	Boolean
	Specifies whether to ignore wikisyntax
	or not when counting
	Default:true
	By default all counts ignore wikisyntax
	If set to false, all counts will include wikisyntax

	Support:
	All input text is preprocessed as wikitext
	Anything inside xml tags is then removed
	Non-visible part of wikilinks and external
	links is removed
	[=[अगर]=]-->अगर
	[=[आगरा|अगर]=]-->अगर
	[http://google.com अगर]-->अगर
	Removes bullets and numbering #es
	*अगर-->अगर
	#अगर-->अगर

	outputs the character length,
	akshar length and
	the number of spaces used
]]

local ustring = mw.ustring

--Preprocesses the input
local function preprocess(text, frame)
	local ptext = frame:preprocess(text)
	return ptext
end

--Removes the wikisyntax from the input
local function dewiki(text)
	local dewikitext = text
--	mw.log('dewikitext is ' .. dewikitext)
	--the following table is iterated on first to last
	local wikitext = {'((\127[^\127]*\127))', --xml tags from preprocessing
						'(%[%[[^%]%]|]*|)', --piped wikilinks start
						'(%[%[([^%]%]]*))', --plain wikilinks start
						'(([^%[]?)%[[^%s%[]+%s)', --named external links start
						'(([^%[]?)%[[^%]]+)', --unnamed external links
						'(%])', --ending brackets for all links
						'[*#]' --numbers and bullets
					}
	local replacement = {'', '', '%2', '%2', '%2', '', ''}
	local flag = 1
	while flag <= #wikitext do
		dewikitext = ustring.gsub(dewikitext, wikitext[flag], replacement[flag])
--		mw.log('dewikitext is ' .. dewikitext)
		flag = flag + 1
	end
	return dewikitext
end	

--Does the counting
local function count(frame)
	local deva = '[ँ-॰%s]+'
	local dia = '[ँंः़ािीुूृॅेैॉोौ॒॰]'
	local halant = '[्]'
	local text = frame.args[1]
	if text == nil then
		return 'कैरैक्टर:0, अक्षर:0, स्पेस:0'
	end
	local ignorewikitext = frame.args[2]
	local charnum = ustring.len(text)
	local spacenum = 0
--	mw.log('input is ' .. text)

	if ignorewikitext ~= false then --ignore wikisyntax, default state
		text = preprocess(text, frame)
		text = dewiki(text)
		charnum = ustring.len(text)
--	else --non-default state, count wikisyntax
--		aksharnum = ustring.len(ustring.gsub(text, wikitext, ''))
	end
	
	local aksharnum = ustring.len(text)
--	charnum = ustring.len(text)
--	mw.log('1. charnum is ' .. charnum)
--	mw.log('2. aksharnum is ' .. aksharnum)
--	mw.log('3. spacenum is ' .. aksharnum)

	matches = ustring.gmatch(text, deva)

	for fmatch in matches do
--		mw.log('fmatch in matches')
		diacritics = ustring.gmatch(fmatch, dia)
		halcount = ustring.gmatch(fmatch, halant)
		space = ustring.gmatch(fmatch, '[%s]')

--		mw.log('fmatch is ' .. fmatch)

		for fmatch1 in diacritics do
			aksharnum = aksharnum - 1
		end

--		mw.log('4. aksharnum after subtracting diacritic length is ' .. aksharnum)

		for fmatch2 in halcount do
			aksharnum = aksharnum - 1.5
		end

--		mw.log('5. aksharnum after subtracting halant length is ' .. aksharnum)

		for fmatch3 in space do
			aksharnum = aksharnum - 1
			spacenum = spacenum + 1
		end
		
--		mw.log('5. num after subtracting spaces is ' .. aksharnum)
		
	end
	local output = 'कैरैक्टर: ' .. charnum .. ', अक्षर: ' .. aksharnum .. ', स्पेस: ' .. spacenum
--	output = text .. '\n' .. output
	return output
end

return {count = count, preprocess = preprocess, dewiki = dewiki}