सदस्य:वार्ताबाट/सोर्स

मुक्त ज्ञानकोश विकिपीडिया से

वार्ताबाट is being written in Perl with help of a modified version of CMS::MediaWiki module.

The modified version of CMS::MediaWiki module can be found here: सदस्य:वार्ताबाट/सोर्स/MediaWikiHindi

#!/usr/bin/perl

# Bot created to enhance talk pages of Hindi wikipedia
# This is still a trial version dated 7/17/2011
#
# This is what I intend to do:
# ==Task 1==
# Get a list of links from Special:Allpages page
# Visit each link and build a list of all article pages
# Visit each page, and if it is not a redirect page and does not have
#  a speedy delete template,
# Get it's talk page
# If the talk page does not have 'vaarta shiirshaka' template, add it
# Do this for all pages

use strict;
use lib qw(.);

use LWP::Simple;
use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;
use HTTP::Request::Common;
use HTML::LinkExtor;
use MediaWikiHindi;

use vars qw($sessioncookie);
*sessioncookie = \%MediaWikiHindi::sessioncookie;

my $browser = LWP::UserAgent->new();
$browser->timeout(10);

my $username='वार्ताबाट';
my $password='123456';

my $mw                  =   CMS::MediaWikiHindi->new(
#                          'user' => $username,'pass' => $password},
                        'host' => 'hi.wikipedia.org',
			'path' => 'w',
			'debug' => 1
);

#Logging in as bot
&Login;



#Getting Special:Allpages
my @contents = $mw->getSpecialPage(title => '%E0%A4%B5%E0%A4%BF%E0%A4%B6%E0%A5%87%E0%A4%B7:AllPages');
#@contents = $mw->getPage(title => '1854');

#print sprintf('%08d ', ++$i), " $_\n" foreach @contents;

#Building an array of starting points
my @linklist = ();
LOOP: for (my $jj = 0; $jj <= $#contents; $jj++) {

	my $line = $contents[$jj];

   

	if ($line =~ m/allpageslist/) { #we found the line containing all links
#		print "$line \n";
		my $mylink = "";

		while ($line =~ m/href\=\"\/wiki/) {
			if (index($line, "<tr><td align=\"right\"><a href=\"/wiki/") > 0) {
				my $initpos = index($line, "<tr><td align=\"right\"><a href=\"/wiki/");
				my $finalpos = index($line, "\">", $initpos+37);
				$mylink = substr ($line, $initpos+37, $finalpos-$initpos-37);
				push (@linklist, $mylink);
#				print "pushing link $mylink \n";
#				sleep 1;
				$line = substr ($line, $finalpos);
			} else { last LOOP; } #exit while loop if no more links to be added 
		}
	}
}

#Building an array of all pages
my @allpages = ();
LINKLISTLOOP: foreach my $startlink (@linklist) {
	@contents = $mw->getSpecialPage(title => $startlink);
	for (my $jj = 0; $jj <= $#contents; $jj++) {

		my $line = $contents[$jj];

   

		if ($line =~ m/\/fieldset/) { #we found the line containing pagelinks

			my $mylink = '';

			#delete the first wiki link
			my $initpos = index $line, '<a href="/wiki/';
			my $finalpos = index $line, '" title', $initpos+15;
			$line = substr ($line, $finalpos);

			while ($line =~ m/\<a href\=\"\/wiki\//) {
				$initpos = index $line, '<a href="/wiki/';
				$finalpos = index $line, '" title', $initpos+15;
				$mylink = substr ($line, $initpos+15, $finalpos-$initpos-15);
				push (@allpages, $mylink);
#				print "pushing pagelink $mylink \n";
#				sleep 1;
				$line = substr ($line, $finalpos);
			}
		last LINKLISTLOOP;
		}
	}
}

my @pagecontents;
my @talkpagecontents;
#Start adding {{vaarta shiirshaka}} to talk pages
ALLPAGESLOOP: foreach my $page (@allpages) {
	@pagecontents = $mw->getPage(title => $page);
	
	my $i;
	print $mw->{'debug'} ? '' : sprintf('%08d ', ++$i), " $_\n" foreach @pagecontents;
	sleep 1;

	#check that it's not a redirect page and does not have a speedy delete template
	foreach my $pageline (@pagecontents) {
		if ($pageline =~ m/\#REDIRECT/ || $pageline =~ m/\{\{delete\}\}/) { next ALLPAGESLOOP; }
	}

	#if it's good, get this page's talk page
	@talkpagecontents = $mw->getPage(title => "%E0%A4%B5%E0%A4%BE%E0%A4%B0%E0%A5%8D%E0%A4%A4%E0%A4%BE:".$page);

	print $mw->{'debug'} ? '' : sprintf('%08d ', ++$i), " $_\n" foreach @talkpagecontents;
	sleep 1;

	#check that it does not already have vaarta shiirshaka template
	foreach my $talkpageline (@talkpagecontents) {
		if ($talkpageline =~ m/\{\{वार्ता शीर्षक\}\}/) { next ALLPAGESLOOP; }
	}

	#everything is good, let's add the template
	unshift (@talkpagecontents, "{{वार्ता शीर्षक}}\n");
	my $response = $mw->editPage(
		title   => "%E0%A4%B5%E0%A4%BE%E0%A4%B0%E0%A5%8D%E0%A4%A4%E0%A4%BE:".$page ,
		section => '' , #  2 means edit second section etc.
				# '' = no section means edit the full page
		text    => @talkpagecontents,
		summary => "{{वार्ता शीर्षक}} जोड़ा" , # optional
	);

	#if successful, write to logfile
	if ($response == 1) {
		open(LOGFILE, ">>bolbalalog.txt");
		print LOGFILE gmtime()." Added Vaarta Shiirshaka template to $page talk page.\n";
		close LOGFILE;
	}
	exit;
}


#@contents = &GetContents("http://hi.wikipedia.org/wiki/%E0%A4%AE%E0%A5%81%E0%A4%96%E0%A4%AA%E0%A5%83%E0%A4%B7%E0%A5%8D%E0%A4%A0");
#&PrinttoFile(@contents);

#$url = "http://hi.wikipedia.org/wiki/%E0%A4%AE%E0%A5%81%E0%A4%96%E0%A4%AA%E0%A5%83%E0%A4%B7%E0%A5%8D%E0%A4%A0";
#print &GetContents($url);



sub Login {
  my $rc = $mw->login(
        protocol => 'http',       # optional, default is http
        host     => 'hi.wikipedia.org' ,  # optional here, but wins if (re-)set here
        path     => 'w',        # optional here, but wins
        user     => 'vaarta' ,       # default: Perlbot
        pass     => 'password' ,
  );
  print $rc ? "Login unsuccessful!\n" : "Login successful!\n";


}


sub GetContents {

	my $myurl = $_[0];

	my $request = HTTP::Request->new(GET => $myurl);
	my $response = $browser->request($request);
	if ($response->is_error()) {print $response->status_line;}

	my $mycontents = $response->content();

	return $mycontents;
}

sub PrinttoFile {
	open (MYFILE, ">testing.html");
	foreach my $mycontent (@_) {
		print MYFILE $mycontent;
	}
	close (MYFILE);
}


print "\nbye\n";


exit;