सदस्य:वार्ताबाट/सोर्स/MediaWikiHindi

मुक्त ज्ञानकोश विकिपीडिया से
package CMS::MediaWikiHindi;

#######################################################################
# Author: Reto Schär
# Copyright (C) by Reto Schär (find details at the end of this script)
#
# This library is free software; you can redistribute it and/or modify
# it under the same terms as Perl itself, either Perl version 5.8.6 or,
# at your option, any later version of Perl 5 you may have available.
#
# Locations:
# http://meta.pgate.net/cms-mediawiki/
# http://search.cpan.org/dist/CMS-MediaWiki/lib/CMS/MediaWiki.pm
#
# Find more MediaWiki reference in general on:
# http://www.infocopter.com/know-how/mediawiki-reference/
#######################################################################
# Modifications for Hindi Wikipedia made by:
# vaartabot
#######################################################################
use strict;
my $package = __PACKAGE__;
our $VERSION = '0.8013';

use LWP::UserAgent;
use HTTP::Request::Common;
use HTTP::Cookies;
use HTML::Form;
use IO::File;

# GLOBAL VARIABLES
my %Var = ();
my $contentType = "";
my $ua;
my $cookie_jar;
my $sessionid;

use vars qw($sessioncookie);

$| = 1;

#-----  FORWARD DECLARATIONS & PROTOTYPING
sub Error($);
sub Debug($);

sub new {
	my $type = shift;
	my %params = @_;
	my $self = {};

	$self->{'protocol'} = $params{'protocol'} || 'http'; # optional
	$self->{'host'  } = $params{'host'} || 'localhost';
	$self->{'path'  } = $params{'path'} || '';
	$self->{'debug' } = $params{'debug'} || 0; # 0, 1, 2
	$Var{'SERVER_SIG'} = '*Unknown*';
	$Var{'EDIT_TIME_BEFORE'} = '*Unknown*';

	Debug "$package V$VERSION" if $self->{'debug'};

	$ua = LWP::UserAgent->new;
	$ua->agent('MediaWikiHindi/0.8013');
#	$ua->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; T312461');
	$ua->cookie_jar({file => 'mediawikihindi-cookies.txt', autosave => 1});
	$cookie_jar = $ua->cookie_jar;

	bless $self, $type;
}

sub login {
	my $self = shift;
	my %args = @_;
	if ($self->{'debug'}) {
		Debug "[login] $_ = $args{$_}" foreach keys %args;
	}

	$args{'protocol'} ||= $self->{'protocol'};
	$args{'path'} ||= $self->{'path'};
	$self->{'path'} = $args{'path'}; # globalize, if it was set here

	$args{'host'} ||= $self->{'host'};
	$self->{'host'} = $args{'host'}; # globalize

	my $index_path = "/index.php";
	   $index_path = "/$args{'path'}/index.php" if $args{'path'};

	#Let's logout from any previous sessions
#	my $logout_url = "$args{'protocol'}://$args{'host'}$index_path?title=%E0%A4%B5%E0%A4%BF%E0%A4%B6%E0%A5%87%E0%A4%B7:UserLogout";
#	my $request = HTTP::Request->new(GET => $logout_url);
#	my $resp = $ua->request($request);

	my $login_url = "$args{'protocol'}://$args{'host'}$index_path?title=%E0%A4%B5%E0%A4%BF%E0%A4%B6%E0%A5%87%E0%A4%B7:Userlogin\&action=submitlogin\&type=login";

	Debug "[login] POST $login_url\..." if $self->{'debug'};

	my $request = HTTP::Request->new(POST => $login_url);
	$request->header(Content_Type => 'application/x-www-form-urlencoded');
#	$request->header(Cookie => $mycookie);
	$request->content("wpName=$args{'user'}&wpPassword=$args{'pass'}&wpLoginattempt=लॉग इन&wpRemember=1");

	my $resp = $ua->request($request);

	if ($self->{'debug'} > 2) {
		my @contents = $resp->content;
		open (TESTING, ">testing.html");
		foreach my $contentline (@contents) {
			print TESTING "$contentline\n";
		}
	close TESTING;
	}
	

	my $login_okay = 0;
	foreach (keys %{$resp->{'_headers'}}) {
		Debug "(header) $_ = " . $resp->{'_headers'}->{$_} if $self->{'debug'} > 2;
		if ($_ =~ /^set-cookie$/i) {
			my $arr = $resp->{'_headers'}->{$_};
			if ($arr =~ /^ARRAY(.+)$/) {
				foreach (@{$arr}) {
					# wikiUserID or wikidbUserID
					if ($_ =~ /UserID=\d+\;/i) {
						# Success!
						$login_okay = 1;
					}
					Debug "(cookie) $_" if $self->{'debug'} > 1;
					if ($_ =~ m/session/) {
						$sessioncookie = $_;
						print "Session Cookie: $sessioncookie\n"  if $self->{'debug'} > 1;
					}
				}
			}
			else {
				Debug "=====> cookie: $arr" if $self->{'debug'};
			}
		}
		if ($_ =~ /^server$/i) {
			$Var{'SERVER_SIG'} = $resp->{'_headers'}->{$_};
		}
	}

	$sessionid = substr($sessioncookie, 15, 32);
#	$cookie_jar->set_cookie(0, "hiwiki_session", $sessionid, "\/", "hi.wikipedia.org", 80, 1, 0, 2592000, 0, ) ;
#	$cookie_jar->save;

	open(COOKIEFILE, "<mediawikihindi-cookies.txt") || print "Could not open file for reading\n$!\n";
	my @cookielines = <COOKIEFILE>;
	chomp (@cookielines);
	my @newcookielines;
	my $sessioncookiefound;
	foreach my $cookieline (@cookielines) {
		if ($cookieline =~ m/session/) {
			print "sessioncookieline found\n";
			$sessioncookiefound = 1;
			$cookieline = "Set-Cookie3: $sessioncookie\n";
#			$ua->cookie_jar->set_cookie(0, 'hiwiki_session', $sessioncookie, "\/", 'hi.wikipedia.org', '', 1, 0, 1840000, 0, \httponly ) ;
		}
	push (@newcookielines, $cookieline);
	}
	if (!$sessioncookiefound) {
		push (@newcookielines, "Set-Cookie3: $sessioncookie\n");
		print "pushing Set-Cookie3: $sessioncookie\n";
	}
	close COOKIEFILE;

	print "This is what I want to print: Set-Cookie3: $sessioncookie\n";

	open(COOKIEFILE, "+>mediawikihindi-sessioncookies.txt") || print "Could not open file for writing\n$!\n";
	foreach my $cookieline (@newcookielines) {
		print COOKIEFILE "$cookieline\n" || print "Could not write\n$!\n";
		print "writing to file: $cookieline\n";
	}
	close COOKIEFILE;

	return $login_okay ? 0 : 1;

}



sub editPage {
	my $self = shift;
	my %args = @_;

	if ($self->{'debug'}) {
		Debug "[editPage] $_ = \"$args{$_}\"" foreach keys %args;
		Debug "[editPage] VAR $_ = \"$Var{$_}\"" foreach keys %Var;
	}

	my $WHOST = $self->{'host'} || 'localhost';
	my $WPATH = $self->{'path'} || '';

	$args{'protocol'} ||= $self->{'protocol'};
	$args{'text   '} ||= '* No text *';
	$args{'summary'} ||= 'By CMS::MediaWiki';
	$args{'section'} ||= '';

	Debug "Editing page '$args{'title'}' (section '$args{'section'}')..." if $self->{'debug'};

	my $edit_section = length($args{'section'}) > 0 ? "\&section=$args{'section'}" : '';

	# (Pre-)fetch page...

	my $myurl = "$args{'protocol'}://$WHOST/$WPATH/index.php?title=$args{'title'}&action=edit$edit_section";
	my $request = HTTP::Request->new(GET => $myurl);
	my $resp = $ua->request($request);
	my @lines = split /\n/, $resp->content();
	my $token = my $edit_time = my $start_time = '';

	foreach (@lines) {
		#Debug "X $_";
		if (/wpEditToken/) {
			print "EditToken line: $_ \n";
			m/value\=\"(.+)\" *name/i;
			$token = $1;
		}
		if (/wpEdittime/) {
			s/type=.?hidden.? *value="(.+)" *name/$1/i;
			$edit_time = $1 || '';
			$Var{EDIT_TIME_BEFORE} = $edit_time;
		}
		if (/wpStarttime/) {
			s/type=.?hidden.? *value="(.+)" *name/$1/i;
			$start_time = $1 || '';
		}
		if (/<title>/i) {
			s/<title>(.+)<\/title>/$1/i;
			$Var{PAGE_TITLE} = $1 || '';
		}
		if (/index.php\?title=(.+?):Copyright.+/i) {
			$Var{WIKI_NAME} = $1 || '';
		}
	}

	if ($self->{'debug'}) {
		Debug "token = $token" if $self->{'debug'} > 1;
		Debug "edit_time (before update) = $edit_time";
	}
	$token = s/\\/\\\\/;

	my %tags = ();
	$tags{'wpTextbox1' } = $args{'text'};
	$tags{'wpEdittime' } = $edit_time;
	$tags{'wpStarttime'} = $start_time;
	$tags{'wpSave'     } = 'लेख सहेजें';
	$tags{'wpSection'  } = $args{'section'};
	$tags{'wpSummary'  } = $args{'summary'};
	$tags{'wpEditToken'} = $token;
	$tags{'wpMinoredit'} = 1;

	$tags{'title' } = $args{'title'};
	$tags{'action' } = 'submit';

#	my $form = HTML::Form->parse($resp);
 
#	my $text = $form->find_input('wpTextbox1')->value;
#	my $summary = $form->find_input('wpSummary')->value;
#	my $save = $form->find_input('wpSave')->value;
#	my $edittoken = $form->find_input('wpEditToken')->value;
#	my $starttime = $form->find_input('wpStarttime')->value;
#	my $edittime = $form->find_input('wpEdittime')->value;
#	my $minoredit = $form->find_input('wpMinoredit')->value;
 
#	$form->value('wpTextbox1', $args{'text'});
#	$form->value('wpSummary', $args{'summary'}); 
#	$form->value('wpMinoredit',1);
#	my $resp = $ua->request($form->click);

	my $submit_url = "$args{'protocol'}://$WHOST/$WPATH/index.php?title=$args{'title'}\&action=submit";

	my $request = HTTP::Request->new(POST => $submit_url);
	$request->header(Content_Type => 'application/x-www-form-urlencoded');
	$request->content("wpTextbox1=$args{'text'}&wpEdittime=$edit_time&wpSave=लेख सहेजें&wpSection=$args{'section'}&wpSummary=$args{'summary'}&wpEditToken=$token&wpMinoredit=1&title=$args{'title'}&action=submit");
#	my $content = join('\&' , %tags);
#	$request->content(%tags);
#	$request->header(Cookie => $sessioncookie);
#	$cookie_jar->set_cookie(0, 'hiwiki_session', $sessionid, "\/", 'hi.wikipedia.org', , 1, 0, 8640000, , \ 'httponly' ) ;
	$cookie_jar->load("mediawikihindi-sessioncookies.txt");

	my $resp = $ua->request($request);

	if ($self->{'debug'} > 2) {
		my @contents = $resp->content;

		open (TESTING, ">testing.html");
		foreach my $contentline (@contents) {
			print TESTING "$contentline\n";
		}
	close TESTING;
	}

	foreach (sort keys %{$resp->{'_headers'}}) {
		Debug "(header) $_ = " . $resp->{'_headers'}->{$_} if $self->{'debug'} > 2;
	}
	my $response_location = $resp->{'_headers'}->{'location'} || '';
	Debug "Response Location: $response_location" if $self->{'debug'};
	Debug "Comparing with \"/$args{'title'}\"" if $self->{'debug'};
	if ($response_location =~ /[\/=]$args{'title'}/i) {
		Debug "Success!" if $self->{'debug'};
		return 1;
	}
	else {
		Debug "NOK!" if $self->{'debug'};
		return 2;
	}
}

sub get {
	my $self = shift;
	my $Key  = shift;
	$Var{$Key};
}

sub let {
	my $self  = shift;
	my $Key   = shift;
	my $Value = shift;
	Debug "[let] $Key = $Value" if $self->{'debug'};
	$Var{$Key} = $Value;
}

sub getPage {
	# returns arrayref of lines of page source
	# Function created by Matt Hucke <hucke@nospam-cynico.net>
	my ($self, %args) = @_;

	$args{'protocol'} ||= $self->{'protocol'};
	$args{'section' } ||= 0;

	if ($self->{'debug'}) {
		Debug "[getPage] $_ = \"$args{$_}\"" foreach keys %args;
		Debug "[getPage] VAR $_ = \"$Var{$_}\"" foreach keys %Var;
	}

	my $WHOST = $self->{'host'} || 'localhost';
	my $WPATH = $self->{'path'} || '';

	Debug "Fetching page '$args{'title'}' (section '$args{'section'}')..." if $self->{'debug'};

	my $edit_section = $args{'section'} ? "\&section=$args{'section'}" : '';
	my $myurl = "$args{'protocol'}://$WHOST/$WPATH/index.php?title=$args{'title'}&action=edit$edit_section";
	my $request = HTTP::Request->new(GET => $myurl);
	my $resp = $ua->request($request);

	foreach (keys %{$resp->{'_headers'}}) {
		Debug "(header) $_ = " . $resp->{'_headers'}->{$_} if $self->{'debug'} > 2;
	}

	my @lines = split /\n/, $resp->content();

	my @content = ();
	my $saving = 0;

	# This is a very simple parser - it looks for <textarea...wpTextbox1> and </textarea>
	# and returns everything in between.
	for (my $jj = 0; $jj <= $#lines; $jj++) {
		my $line = $lines[$jj];
		if ($lines[$jj+1] =~ m/^[^>]+><\/textarea>/) {
			#this means there is no content on the page, skip everything
			last;
		}
		if ($line =~ m/<textarea.*wpTextbox1/) {
			$saving = 1;
			if ($line =~ m/<textarea[^>]+>(.*)/) {
				$line = $1;    # strip out <textarea.....>, keep what's after.
			} else {
				# ADVANCE to next line
				++$jj;
				$line = $lines[$jj];

				# strip out end of textarea tag at start of line
				$line =~ s#^[^>]+>##;
   			}

			# if any of $line remains, fall thru to 'push' part.
			next unless ($line);

		} elsif ($line =~ m#(.*)</textarea>#) {
			push (@content, $line) if ($saving && $1);
			$saving = 0;
		}
		push (@content, $line) if ($saving);
	}

	# Always return an arrayref for later processing
	return @content;
}


sub getSpecialPage {
	# returns arrayref of lines of page source
	# Function created by Matt Hucke <hucke@nospam-cynico.net>
	my ($self, %args) = @_;

	$args{'protocol'} ||= $self->{'protocol'};

	if ($self->{'debug'}) {
		Debug "[getPage] $_ = \"$args{$_}\"" foreach keys %args;
		Debug "[getPage] VAR $_ = \"$Var{$_}\"" foreach keys %Var;
	}

	my $WHOST = $self->{'host'} || 'localhost';
	my $WPATH = $self->{'path'} || '';

	Debug "Fetching page '$args{'title'}'..." if $self->{'debug'};

	my $myurl = "$args{'protocol'}://$WHOST/wiki/$args{'title'}";
	my $request = HTTP::Request->new(GET => $myurl);
	my $resp = $ua->request($request);

	my @lines = split /\n/, $resp->content();

	foreach (keys %{$resp->{'_headers'}}) {
		Debug "(header) $_ = " . $resp->{'_headers'}->{$_} if $self->{'debug'} > 2;
	}
	return @lines;
}

sub Error ($) {
	print "Content-type: text/html\n\n" unless $contentType;
	print "<b>ERROR</b> ($package): $_[0]\n";
	exit(1);
}

sub Debug ($)  { print "[ $package ] $_[0]\n"; }

####  Used Warning / Error Codes  ##########################
#	Next free W Code: 1000
#	Next free E Code: 1000

1;

__END__

=head1 NAME

CMS::MediaWiki - Perl extension for creating, reading and updating MediaWiki pages

=head1 SYNOPSIS

  use CMS::MediaWiki;

  my $mw = CMS::MediaWiki->new(
	# protocol => 'https',  # Optional, default is http
	host  => 'localhost',   # Default: localhost
	path  => 'wiki' ,       # Can be empty on 3rd-level domain Wikis
	debug => 0              # Optional. 0=no debug msgs, 1=some msgs, 2=more msgs
  );

=head1 DESCRIPTION

Create or update MediaWiki pages. An update of a MediaWiki page can also be
reduced to a specific page section. You may update many pages with the same
object handle ($mw in the shown example).

You could change the login name between an update. This might be necessary
if you would like to update a public page *and* a protected page by the
WikiSysop user in just one cycle.

=head2 Login example

  if ($mw->login(user => 'Reto', pass => 'yourpass')) {
	print STDERR "Could not login\n";
	exit;
  }
  else {
	# Logged in. Do stuff ...
  }

=head2 Another login example

  $rc = $mw->login(
	protocol => 'https',       # optional, default is http
	host     => 'localhost' ,  # optional here, but wins if (re-)set here
	path     => 'wiki',        # optional here, but wins
	user     => 'Reto' ,       # default: Perlbot
	pass     => 'yourpass' ,
  );

=head2 Edit a Wiki page or section

  $rc = $mw->editPage(
	title   => 'Online_Directory:Computers:Software:Internet:Authoring' ,
    	section => '' ,	#  2 means edit second section etc.
       	                # '' = no section means edit the full page
       	text    => "== Your Section Title ==\nbar foo\n\n",
	summary => "Your summary." , # optional
  );

=head2 Get a Wiki page or a section of a Wiki page

  $lines_ref = $mw->getPage(title => 'Perl_driven', section => 1); # omit section to get full page
  # Process Wiki lines ...
  print sprintf('%08d ', ++$i), " $_\n" foreach @$lines_ref;

In general, $rc returns 0 on success unequal 0 on failure.

=head3 Tip

After a successful call of the B<editPage> function you had the
following information available:

  print "Edit time (before) was ", $mw->get('EDIT_TIME_BEFORE'), "\n";
  print "Page title was "        , $mw->get('PAGE_TITLE')      , "\n";
  print "The Wiki name was "     , $mw->get('WIKI_NAME')       , "\n";

=head2 EXPORT

None by default.

=head1 SEE ALSO

=item *

http://meta.pgate.net/cms-mediawiki/

=item *

http://www.infocopter.com/perl/modules/

=item *

http://www.infocopter.com/know-how/mediawiki-reference/

=head1 AUTHOR

Reto Schaer, E<lt>retoh@nospam-cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2005-2007 by Reto Schaer

This library is free software; you can redistribute it and/or modify

it under the same terms as Perl itself, either Perl version 5.8.6 or,

at your option, any later version of Perl 5 you may have available.

http://www.infocopter.com/perl/licencing.html

=cut