User:OrphanBot/libBot.pl
#!/usr/bin/perl
# libBot: A library of useful routines for running a bot
use strict;
use warnings;
require "libPearle2.pl";
my $test_only = 0;
my $username = "";
sub config
{
my %params = @_;
$test_only = $params{test_only} if(defined($params{test_only}));
$username = $params{username} if(defined($params{username}));
}
# Log a warning on the talk page of the bot
sub userwarnlog
{
my ($text, $editTime, $startTime, $token, $user, $summary, $session);
$user = $_[1];
$user = $username if(!defined($user));
$summary = $_[2];
$summary = "Logging warning message" if(!defined($summary));
$session = $_[3];
if(defined($session))
{
# We've been handed an editing session
($text, $editTime, $startTime, $token) = @{$session};
Pearle::myLog("Warning with existing edit session\n");
}
else
{
($text, $editTime, $startTime, $token) = Pearle::getPage("User talk:$user");
}
if($test_only)
{
print STDERR $_[0];
return;
}
if($text =~ /^#redirect/i)
{
userwarnlog("*User talk page [[User talk:$user]] is a redirect\n");
return;
}
$text .= $_[0];
Pearle::postPage("User talk:$user", $editTime, $startTime, $token, $text, $summary, "no");
print STDERR $_[0];
}
# Log a notification message to the console
sub notelog
{
print STDERR @_;
}
# Fix all wikilinks in a string so that they shows as a link, not inline, if it's for a category or image
sub FixupLinks
{
my $link = shift;
$link =~ s/\[\[(Category|Image)/[[:$1/g;
return $link;
}
# Make a string into a Wikipedia-compatible regex
sub MakeWikiRegex
{
my $string = shift;
# Escape metacharacters
$string =~ s/\\/\\\\/g;
$string =~ s/\./\\\./g;
$string =~ s/\(/\\\(/g;
$string =~ s/\)/\\\)/g;
$string =~ s/\[/\\\[/g;
$string =~ s/\]/\\\]/g;
$string =~ s/\+/\\\+/g;
$string =~ s/\*/\\\*/g;
$string =~ s/\?/\\\?/g;
$string =~ s/\^/\\\^/g;
$string =~ s/\$/\\\$/g;
# Process the string to match both with spaces and with underscores
$string =~ s/[ _]/[ _]+/g;
# Process the string to match both upcase and lowercase first characters
if($string =~ /^[A-Za-z]/)
{
$string =~ s/^(.)/"[$1".lc($1)."]"/e;
}
return $string;
}
# Check for new talk page messages
sub DoIHaveMessages
{
my $text = shift;
if($text =~ /<div class="usermessage">You have/)
{
return 1;
}
else
{
return 0;
}
}
sub GetPageList
{
my $image = shift;
my $image_text = shift;
my @pages = ();
# Extract the page links
# <ul><li><a href="https://artikeldigital.com/en/Lee_Hyori" title="Lee Hyori">Lee Hyori</a></li>
# <li><a href="https://artikeldigital.com/en/Daesung_Entertainment" title="Daesung Entertainment">Daesung Entertainment</a></li>
# </ul>
while($image_text =~ /<li><a href="https://artikeldigital.com/en/(\/wiki\/[^"]+)" title="([^"]+)">/g)
{
my $title;
$title = $2;
# Unescape any HTML entities in the title
$title =~ s/</</g;
$title =~ s/>/>/g;
$title =~ s/"/"/g;
$title =~ s/&/&/g;
notelog("Matched article $title\n");
# Filter out bad namespaces
if($title =~ /^(User:|Talk:|User talk:|Template talk:|Image:|Image talk:|Category talk:|Wikipedia:|Wikipedia talk:|Portal talk:)/) # Leave these alone
{
notelog("Ignoring [[$title]] due to namespace\n");
}
elsif($title =~ /^Special:/)
{
# Ignore Special: pages completely
}
elsif($title =~ /^(MediaWiki:|MediaWiki talk:|Template:|Help:|Help talk:)/) # Log a warning about these, but otherwise leave them alone
{
userwarnlog("*Found image [[:$image]] in [[$title]]\n");
}
else # Good namespaces: article, Category:, Portal:
{
push @pages, $title;
}
}
return @pages;
}
# Get all pages. Don't filter for bad namespaces.
sub GetFullPageList
{
my $image = shift;
my $image_text = shift;
my @pages = ();
# Extract the page links
# <ul><li><a href="https://artikeldigital.com/en/Lee_Hyori" title="Lee Hyori">Lee Hyori</a></li>
# <li><a href="https://artikeldigital.com/en/Daesung_Entertainment" title="Daesung Entertainment">Daesung Entertainment</a></li>
# </ul>
while($image_text =~ /<li><a href="https://artikeldigital.com/en/(\/wiki\/[^"]+)" title="([^"]+)">/g)
{
my $title;
$title = $2;
# Unescape any HTML entities in the title
$title =~ s/</</g;
$title =~ s/>/>/g;
$title =~ s/"/"/g;
$title =~ s/&/&/g;
notelog("Matched article $title\n");
push @pages, $title;
}
return @pages;
}
sub SaveImage
{
my $image = shift;
my $image_text = shift;
my $image_path = shift;
my $image_url;
($image_url) = $image_text =~ /<a href="https://artikeldigital.com/en/(http:\/\/upload\.wikimedia\.org\/wikipedia\/en\/[^"]+)"/;
if(defined($image_url))
{
my $filename;
my $image_data;
notelog("Fetching image $image_url\n");
($filename) = $image_url =~ /(\/[^\/]+)$/;
$filename = $image_path . $filename;
if(! -e $filename)
{
if($test_only)
{
notelog("Would save to $filename...");
}
else
{
$image_url = Pearle::urlDecode($image_url);
$image_data = Pearle::getURL($image_url);
notelog("Saving to $filename...");
if(defined($filename) and $filename)
{
open OUTFILE, ">", $filename;
print OUTFILE $image_data;
close OUTFILE;
notelog("Image saved\n");
Pearle::myLog("Image $image saved as $filename\n");
}
else
{
notelog("Failed\n");
}
}
}
else
{
notelog("File already exists\n");
}
}
}
sub RemoveImageFromPage
{
my $image = shift;
my $page = shift;
my $image_regex = shift;
my $removal_prefix = shift;
my $removal_comment = shift;
my ($text, $editTime, $startTime, $token);
my ($match1, $match2);
my $old_length;
my $new_length;
my $change_len;
my $match_len;
# Fetch an article page
($text, $editTime, $startTime, $token) = Pearle::getPage($page);
if(!defined($text))
{
Pearle::myLog("Error: Bad edit page [[$page]]\n");
userwarnlog(FixupLinks("*Error: Bad edit page [[$page]]\n"));
sleep(300);
return 0;
}
if($text =~ /^\s*$/)
{
# Might be protected instead of empty
Pearle::myLog("Error: Empty page [[$page]]\n");
userwarnlog(FixupLinks("*Error: Empty page [[$page]]\n"));
sleep(300);
return 0;
}
if($text =~ /^#redirect/i)
{
Pearle::myLog("Redirect found for page [[$page]] (image [[:$image]])\n");
userwarnlog(FixupLinks("*Redirect found for page [[$page]] (image [[:$image]])\n"));
return 0;
}
# Remove the image
my $regex3 = "(\\[\\[${image_regex}.*?(\\[\\[.*?\\]\\].*?|)+\\]\\][ \\t]*)"; # Regex to match images
my $regex3ex = "\\w[ \\t]*${regex3}[ \\t]*\\w"; # Regex to try to spot inline images
my $regex3c = "<!--.*${regex3}.*-->"; # Regex to spot images in comments
my $regex3g = "(${image_regex}.*)"; # Regex to match gallery images
my $regex3gc = "<!--.*${regex3g}-->"; # Regex to spot gallery images in comments
my ($raw_image) = $image =~ /Image:(.*)/;
my $regex4a = "([Cc]over\\s*=\\s*)" . MakeWikiRegex($raw_image);
my $regex4b = "(image_skyline\\s*=\\s*)" . MakeWikiRegex($raw_image);
my $regex4i = "(image\\s*=\\s*)" . MakeWikiRegex($raw_image); # Regex to match "image = " sections in infoboxes
my $regex4p = "(picture\\s*=\\s*)" . MakeWikiRegex($raw_image); # Regex to match "picture = " sections in infoboxes
my $regex4m = "\\[\\[[ _]*[Mm]edia[ _]*:[ _]*" . MakeWikiRegex($raw_image) . "[ _]*\\|([^]]*)\\]\\]"; # Regex to match inline Media: links
my $regex4g = "(img\\s*=\\s*)" . MakeWikiRegex($raw_image); # Regex to match "img = " sections in infoboxes
Pearle::myLog("Regex 3: $regex3\n");
notelog("Regex 3: $regex3\n");
notelog("Regex 3 extended: $regex3ex\n");
notelog("Regex 3 gallery: $regex3g\n");
Pearle::myLog("Raw regex: $raw_image\n");
notelog("Regex 4 Album: $regex4a\n");
notelog("Regex 4 City: $regex4b\n");
notelog("Regex 4 Image: $regex4i\n");
notelog("Regex 4 Media: $regex4m\n");
notelog("Regex 4 Picture: $regex4p\n");
notelog("Regex 4 Img: $regex4g\n");
if($text =~ /$regex3ex/)
{
Pearle::myLog("Possible inline image in [[$page]]\n");
userwarnlog(FixupLinks("*Possible inline image [[:$image]] in [[$page]]\n"));
return 0; # Can't do gallery matching because that also matches regular images, and odds are, we don't have an infobox
}
if($text =~ /$regex3c/ or $text =~ /$regex3gc/)
{
Pearle::myLog("Image in comment in [[$page]]\n");
# userwarnlog(FixupLinks("*Image in comment in [[$page]]\n"));
return 0; # Can't do gallery matching because that also matches regular images
}
$text =~ /$regex3/;
$match_len = length($1);
$match2 = $text =~ s/$regex3/<!-- $removal_prefix $1 -->/g;
$new_length = length($text);
print "Num: $match2 Len: $match_len\n";
if($match2)
{
# If a whole lot of text was removed, log a warning
if($match_len > (500 + length($image)))
{
userwarnlog(FixupLinks("*Long caption of $match_len bytes replaced in [[$page]]\n"));
if($match_len > (1000 + length($image)))
{
notelog("Unusually long caption found. Exiting.\n");
Pearle::myLog("Unusually long caption of $match_len found in [[$page]] ($match2 matches).\n");
exit;
}
}
if($match_len < (4 + length($image)))
{
notelog("*Short replacement of $match_len bytes in [[$page]]\n");
Pearle::myLog("Short replacement of $match_len bytes (min " . (length($image) + 4) . ") in [[$page]] ($match2 matches). Exiting.\n");
Pearle::myLog("Text:\n$text\n");
exit;
}
# If many matches, log a warning
if($match2 > 2)
{
Pearle::myLog("More than one match ($match2) in page [[$page]]\n");
# userwarnlog(FixupLinks("*More than one match ($match2) in page [[$page]]\n"));
}
if($match2 > 100)
{
Pearle::myLog("Too many matches ($match2) in page [[$page]]. Skipping.\n");
userwarnlog("Too many matches ($match2) in page [[$page]]. Skipping.\n");
return 0;
}
# If there might be a reference, log a warning
# if($text =~ /(?:see (?:image|picture|graph|diagram|right|left)|\(left\)|\(right\)|\(below\)|\(above\))/)
# {
# Pearle::myLog("Possible image reference in page [[$page]]\n");
# userwarnlog("*Possible image reference in page [[$page]]\n");
# }
if($text =~ /-->\]/)
{
Pearle::myLog("Possible bracket mixup in page [[$page]]\n");
userwarnlog(FixupLinks("*Possible bracket mixup in page [[$page]]\n"));
}
# if($text =~ /\[\[(?: |)<!--/)
# {
# Pearle::myLog("Possible multiline image in page [[$page]]\n");
# userwarnlog(FixupLinks("*Possible multiline image in page [[$page]]\n"));
# }
}
elsif($text =~ /<gallery/)
{
Pearle::myLog("*Possible image gallery in page [[$page]]\n");
if($text =~ s/$regex3g/<!-- $removal_prefix $1 -->/)
{
$match2 += 1;
}
}
if($match2 > 0)
{
if($text =~ /\[\[(?: |)<!--/)
{
Pearle::myLog("Possible multiline image in page [[$page]]\n");
userwarnlog(FixupLinks("*Possible multiline image in page [[$page]]\n"));
}
}
# Infobox removal
if($text =~ /{{Album[ _]infobox|{{Infobox[ _]Album/i)
{
if($text =~ s/$regex4a/$1/)
{
Pearle::myLog("*Album infobox in page [[$page]]\n");
$match2 += 1;
}
}
if($text =~ /{{Infobox[ _]City/i)
{
if($text =~ s/$regex4b/$1/)
{
Pearle::myLog("*City infobox in page [[$page]]\n");
$match2 += 1;
}
}
if($text =~ /{{Taxobox/i)
{
if($text =~ s/$regex4i/$1/)
{
Pearle::myLog("*Taxobox in page [[$page]]\n");
$match2 += 1;
}
}
if($text =~ /{{NFL[ _]player/i)
{
if($text =~ s/$regex4i/$1/i)
{
Pearle::myLog("*NFL Playerbox in page [[$page]]\n");
$match2 += 1;
}
}
if($text =~ /{{Infobox[ _]President/i)
{
if($text =~ s/$regex4i/$1/i)
{
Pearle::myLog("*Presidentbox in page [[$page]]\n");
# userwarnlog("*Presidentbox in page [[$page]]\n");
$match2 += 1;
}
}
if($text =~ /{{Infobox[ _]Cricketer/i)
{
if($text =~ s/$regex4p/picture = cricket no pic.png/i)
{
Pearle::myLog("*Cricketer in page [[$page]]\n");
# userwarnlog("*Cricketer in page [[$page]]\n");
$match2 += 1;
}
}
if($text =~ /{{Infobox[ _]Celebrity/)
{
if($text =~ s/$regex4i/$1/i)
{
Pearle::myLog("*Celebrity in page [[$page]]\n");
$match2 += 1;
}
}
if($text =~ /{{Infobox[ _]Wrestler/)
{
if($text =~ s/$regex4i/$1/i)
{
Pearle::myLog("*Wrestler in page [[$page]]\n");
$match2 += 1;
}
}
if($text =~ /{{Infobox musical artist 2/)
{
if($text =~ s/$regex4g/$1/i)
{
Pearle::myLog("*InfoMusArt2 in page [[$page]]\n");
$match2 += 1;
}
}
if($text =~ /{{Infobox Model/)
{
if($text =~ s/$regex4i/$1/i)
{
Pearle::myLog("*Model in page [[$page]]\n");
$match2 += 1;
}
}
if($match2) # No need to null-edit articles anymore
{
if($test_only)
{
notelog("Test removal from page succeeded\n");
}
else
{
# Submit the changes
Pearle::postPage($page, $editTime, $startTime, $token, $text, $removal_comment, "no");
}
}
return ($match2)
}
# Returns 1 if the user has been notified, or a reference to the userpage edit session if they haven't
sub isNotified
{
my $image_text = shift;
my $uploader = shift;
my $image_regex = shift;
my $image_name = shift;
my $notes_ref = shift;
my $donts_ref = shift;
# Check notification list
if($notes_ref->{"$uploader,$image_name"})
{
notelog("Already notified for this image\n");
return 1;
}
if($donts_ref->{$uploader})
{
notelog("On exception list\n");
Pearle::myLog("On exception list: $uploader\n");
return 1;
}
# Check uploader's talkpage
my ($text, $editTime, $startTime, $token) = Pearle::getPage("User talk:$uploader");
if($text =~ /$image_regex/)
{
notelog("Already notified by someone else\n");
$donts_ref->{"$uploader,$image_name"} = 1;
return 1;
}
else
{
print "Not already notified\n";
return [$text, $editTime, $startTime, $token];
}
}
sub isDated
{
my $image_text = shift;
if($image_text =~ /\((\d\d?) (\w*) (\d\d\d\d)\)/) # Dated template
{
print "Dated tag $1 $2 $3\n";
return 1;
}
# as of 6 October 2006">
elsif($image_text =~ /as of (\d\d?) (\w*) (\d\d\d\d)/) # Template borked, working off category
{
print "Template borked; category $1 $2 $3\n";
return 1;
}
elsif($image_text =~ /{{{day}}} {{{month}}} \d\d\d\d/ or $image_text =~ /\( 2006\)/) # Generic template
{
print "Generic tag\n";
return 0;
}
else
{
print "No tag match\n";
return 0;
}
}
# Return the tag date if there is one, the upload date if not
# Returns in (day, month, year) format
sub getDate
{
my $image_text = shift;
if($image_text =~ /\((\d\d?) (\w*) (\d\d\d\d)\)/)
{
print "Template date $1-$2-$3\n";
return ($1, $2, $3);
}
elsif($image_text =~ /as of (\d\d?) (\w*) (\d\d\d\d)/) # Template borked, working off category
{
print "Category date $1-$2-$3\n";
return ($1, $2, $3);
}
elsif($image_text =~ />\d\d?:\d\d, (\d\d?) (\w*) (\d\d\d\d)</)
{
print "Upload date $1-$2-$3\n";
# return ($1, $2, $3);
# For now, be conservative:
my ($year, $month, $day) = Today();
return ($day, Month_to_Text($month), $year);
}
else
{
print "No date\n";
return (1, "January", 2006);
}
}
# Return a list of upload dates
sub getUploadDates
{
my @dates;
my $image_text = shift;
while($image_text =~ />\d\d?:\d\d, (\d\d?) (\w*) (\d\d\d\d)</g)
{
push @dates, [$1, $2, $3];
}
return @dates;
}
sub getLastEditDate
{
my ($day, $month, $year);
my $image = shift;
my @history = Pearle::parseHistory($image);
(undef, $day, $month, $year) = @{$history[0]};
return ($day, $month, $year);
}
# Find the most recent non-vandal, non-revert uploader
sub getUploader
{
my $image_text = shift;
my ($uploader, $dims, $bytes, $comment);
my @uploaders;
my $uploader_data;
my $i = 0;
# title="User:Jamie100">Jamie100</a> (<a href="https://artikeldigital.com/en/User_talk:Jamie100" title="User talk:Jamie100">Talk</a>) . . 424x216 (25800 bytes) <span class='comment'>(Reverted to earlier revision)</span></li>
# while($image_text =~ />([^<]+?)<\/a> \(<a href="https://artikeldigital.com/en/[^"]+?" (?:class="new" |)title="[^"]+?">Talk<\/a>\) \. \. (\d+x\d+) \(([0-9,]+) bytes\)(?: <span class="comment">([^<]*)|)</g)
while($image_text =~ />([^<]+?)<\/a> \(<a href="https://artikeldigital.com/en/[^"]+?" (?:class="new" |)title="[^"]+?">Talk<\/a> \| <a href="https://artikeldigital.com/en/[^"]*" title="[^"]*">contribs<\/a>\) \. \. (\d+.+?\d+) \(([0-9,]+) bytes\)(?: <span class="comment">([^<]*)|)</g)
{
($uploader, $dims, $bytes, $comment) = ($1, $2, $3, $4);
$bytes =~ s/,//g; # Remove commas to turn into a real number
$comment = "" if(!defined($comment)); # Reduce warnings
push @uploaders, [$uploader, $dims, $bytes, $comment];
notelog("Uploader found: $uploader, $dims, $bytes, $comment\n");
$i++;
die "Too many uploaders: $i\n" if($i > 100);
}
my $max = scalar(@uploaders);
print $max, "\n";
for($i = 0; $i < $max; $i++)
{
$uploader = $uploaders[$i][0];
if($uploaders[$i][3] =~ /Reverted/)
{
$dims = $uploaders[$i][1];
$bytes = $uploaders[$i][2];
notelog("Revert found: $uploader, $dims, $bytes\n");
$i++;
while(($dims ne $uploaders[$i][1] or $bytes ne $uploaders[$i][2]) and $i < $max)
{
notelog("Reversion data: $uploaders[$i][1], $uploaders[$i][2], $i\n");
$uploader = $uploaders[$i][0];
$i++;
}
}
elsif($uploaders[$i][3] =~ /optimi(z|s)|adjust|tweak|scale|crop|change|resize/i)
{
notelog("Optimize found. Skipping.\n");
}
else
{
notelog("Uploader: $uploader ($i)\n");
last;
}
}
$uploader = undef if($i >= $max);
print "Uploader: $uploader\n";
return $uploader;
}
# See if the specified category exists, and if not, create it
sub checkImageCategory
{
my $cat;
my ($text, $editTime, $startTime, $token);
$cat = "Category:Images with unknown source as of $_[0] $_[1] $_[2]";
($text, $editTime, $startTime, $token) = Pearle::getPage($cat);
if($text !~ /\[\[[Cc]ategory:[Ii]mages with unknown source/)
{
$text .= "\n[[Category:Images with unknown source| ]]\n";
if($test_only)
{
notelog("Would create category [[:$cat]]\n");
}
else
{
Pearle::postPage($cat, $editTime, $startTime, $token, $text, "Created category", "no");
userwarnlog("*Created category [[:$cat]]\n");
}
}
}
sub loadNotificationList
{
my $file = shift;
my %notelist;
my $i = 0;
notelog("File: $file\n");
open INFILE, "<", $file;
while(<INFILE>)
{
$_ =~ s/\s*#.*$//g;
chomp;
$notelist{$_} = 1;
$i++;
}
close INFILE;
notelog("$i notifications loaded\n");
return %notelist;
}
sub saveNotificationList
{
return if($test_only);
my $file = shift;
my %notelist = @_;
my $key;
open OUTFILE, ">", $file;
foreach $key (keys(%notelist))
{
print OUTFILE "$key\n";
}
close OUTFILE;
}
1;
Content Disclaimer
Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.
- The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
- There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
- It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
- Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
- Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.