#! /usr/bin/perl -w

# mymovies
#
# Automatically download your vote history from IMDB.
#
# Copyright (C) 2001 Alan De Smet chaos at highprogrammer.com
# http://www.highprogrammer.com/alan/perl/#mymovies
#
# This software is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of
# the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with this program in file entitled "COPYING"; if
# not, write to the Free Software Foundation, Inc., 59 Temple
# Place - Suite 330, Boston, MA  02111-1307, USA.  A copy of the
# GNU General Public License is available at
# http://www.fsf.org/copyleft/gpl.html .
#


my $URL = 'http://us.imdb.com/mymovies/list?votehistory';

# Only need to set one of COOKIE_FILE or IMDBDATA.  COOKIE_FILE is preferred
# over IMDBDATA
my $COOKIE_FILE = "$ENV{HOME}/.netscape/cookies";
my $IMDBDATA = undef;

# Allows you to rename movies, useful if you know a movie by a a
# different name than it's "proper name", common for translated
# movies.  (The default collection translates a random collection of
# anime, Jackie Chan, and John Woo movies to their common english
# names.)
my(%TRANSLATIONS) = (
	'\'A\' gai waak' => 'Project A',
	'Cite des enfants perdus, La' => 'City of Lost Children, The',
	'Dian zhi gong fu gan chian chan' => 'Half a Loaf of Kung Fu',
	'Die xue jie tou' => 'Bullet in the Head',
	'Do ma daan' => 'Peking Opera Blues',
	'Feiying gaiwak' => 'Armor of God II: Operation Condor',
	'Feng yu shuang liu xing' => 'Killer Meteors, The',
	'Ging chaat goo si juk jaap' => 'Police Story 2',
	'Hokuto no Ken' => 'Fist of the North Star',
	'Hong faan kui' => 'Rumble in the Bronx',
	'Huo shao dao' => 'The Prisoner',
	'Jing cha gu shi III: Chao ji jing cha' => 'Supercop (Police Story 3)',
	'Jing cha gu shi IV: Jian dan ren wu' =>
						'Jackie Chan\'s First Strike (Police Story 4)',
	'Jui kuen II' => 'Legend of Drunken Master, The',
	'Kwai tsan tse' => 'Wheels on Meals',
	'Lashou shentan' => 'Hard Boiled',
	'Lodoss to senki' => 'Record of Lodoss War',
	'Lola rennt' => 'Run, Lola, Run',
	'Longxiong hudi' => 'Armor of God',
	'Majo no takkyubin' => 'Kiki\'s Delivery Service',
	'Qiji' => 'Miracle',
	'She hao ba bu' => 'Snake & Crane Arts of Shaolin',
	'Shuang long hui' => 'Twin Dragons',
	'Tonari no Totoro' => 'My Neighbor Totoro',
	'Unendliche Geschichte, Die' => 'NeverEnding Story, The',
	'Wo hu zang long' => 'Crouching Tiger, Hidden Dragon',
	'Yatgo ho yan' => 'Mr. Nice Guy',
	'Yong Chun' => 'Wing Chun',
);


################################################################################
#
# NO USER SERVICABLE PARTS BELOW
#

use strict;

use LWP::UserAgent;
use HTTP::Cookies;

my $VERSION = '1.0.0.0';

my $AGENT_NAME = "mymovies/$VERSION (http://www.highprogrammer.com/alan/perl/#mymovies)";

main();
exit;

sub main {
	my $USER_AGENT = new LWP::UserAgent;
	$USER_AGENT->agent($AGENT_NAME);

	my $request = new HTTP::Request GET=>$URL;

	my $cookie_jar = undef;
	if(defined $COOKIE_FILE and -e $COOKIE_FILE) {
		$cookie_jar = HTTP::Cookies::Netscape->new(File=>$COOKIE_FILE);
	}
	if(not defined $cookie_jar) {
		$cookie_jar = HTTP::Cookies->new;
		my(@rest) = ("/", ".imdb.com", undef, 0, 0, 60*60, 0);
		$cookie_jar->set_cookie(undef, "IMDBDATA", $IMDBDATA, @rest);
		$cookie_jar->set_cookie(undef, "IMDBPREFS", "3", @rest);
	}

	$cookie_jar->add_cookie_header($request);
	my $result = $USER_AGENT->request($request);

	if($result->is_success) {
	} else {
		die "$0: Unable to load movie list from IMDB\n";
	}
	my $body = $result->content;

	$body = strip_entity($body);

	my $owner = get_owner($body);
	my($total) = get_total($body);
	my(%movies) = get_movies($body);

	my $count = scalar keys %movies;

	if($count != $total) {
		die "$0: An error occurred. Expected $total movies, found $count.\n";
	}

	print "${owner}'s Movies ($total total)\n";
	print_movies(%movies);
}

sub get_owner {
	my($body) = @_;
	my($owner) = ($body =~ m|<b><font size="\+1" face="Arial, Helvetica, sans-serif">([^<>]+)'s Movies</font></b>|i); # ' Fix syntax coloring
	return $owner;
}

sub get_total {
	my($body) = @_;
	my($total) = ($body =~ m|<td nowrap><font size="-1"> </font><font face="Verdana, Sans-serif" size="-1"><b>(\d+) Titles</b></font></td>|);
	return $total;
}

sub get_movies {
	my($body) = @_;
	my(%movies) = ($body =~ m|
		<tr>\s*
			<td[^>]+>\s*<input[^>]+type="checkbox"[^>]+>\s*</td>\s*
			<td[^>]+>\s*<a\ href="/Title\?\d+">([^<]+)</a></td>\s*
			<td[^>]+>(\d+)</td>\s*
		</tr>|ximsg);
	return %movies;
}

sub canonize_name {
	my($imdb_name) = @_;
	my($title, $year, $rest) = ($imdb_name =~ /^(.+) \((\d\d\d\d(?:\/[IVX]+)?)\)\s*(.*)$/);
	if(not defined $title) {
		$title = $imdb_name;
		$year = 'unknown';
	} elsif(defined $rest and length $rest) {
		$title = "$title $rest";
	}

	my($original_title) = undef;
	if(exists $TRANSLATIONS{$title}) {
		$original_title = $title;
		$title = $TRANSLATIONS{$title};
	}

	return ($title, $year, $original_title);
}

sub print_movies {
	my(%movies) = @_;
	my(@sort_order) = sort { (-($movies{$a} <=> $movies{$b})) || smartcmp($a, $b)} (keys %movies);
	foreach my $movie (@sort_order) {
		my($title, $year, $original_title) = canonize_name($movie);
		printf "%2d %s (%s)", $movies{$movie}, $title, $year;
		if(defined $original_title and length $original_title) {
			print " ($original_title)";
		}
		print "\n";
	}
}

sub smartcmp {
	my($lhs, $rhs) = @_;
	$lhs =~ tr[a-z][A-Z];
	$rhs =~ tr[a-z][A-Z];
	$lhs =~ s/[":'!]//;
	$rhs =~ s/[":'!]//;
	return $lhs cmp $rhs;
}

sub strip_entity {
	my($src) = @_;

	$src =~ s/\&quot;/"/g;
	$src =~ s/\&nbsp;/ /g;
	$src =~ s/\&iexcl;/!/g;
	$src =~ s/\&cent;/c/g; # "&#162;" 
	$src =~ s/\&pound;/L/g; # "&#163;" 
	$src =~ s/\&curren;/\$/g; # "&#164;" 
	$src =~ s/\&yen;/Y/g; # "&#165;" 
	$src =~ s/\&brvbar;/|/g; # "&#166;" 
	$src =~ s/\&sect;/S/g; # "&#167;" 
	$src =~ s/\&uml;/../g; # "&#168;" 
	$src =~ s/\&copy;/(c)/g; # "&#169;" 
	$src =~ s/\&ordf;/_/g; # "&#170;" 
	$src =~ s/\&laquo;/<</g; # "&#171;" 
	$src =~ s/\&not;/_/g; # "&#172;" 
	$src =~ s/\&shy;//g; # "&#173;" 
	$src =~ s/\&reg;/(R)/g; # "&#174;" 
	$src =~ s/\&macr;/_/g; # "&#175;" 
	$src =~ s/\&deg;/o/g; # "&#176;" 
	$src =~ s/\&plusmn;/+-/g; # "&#177;" 
	$src =~ s/\&sup2;/2/g; # "&#178;" 
	$src =~ s/\&sup3;/3/g; # "&#179;" 
	$src =~ s/\&acute;/'/g; #'
	$src =~ s/\&micro;/u/g; # "&#181;" 
	$src =~ s/\&para;/P/g; # "&#182;" 
	$src =~ s/\&middot;/./g; # "&#183;" 
	$src =~ s/\&cedil;/,/g; # "&#184;" 
	$src =~ s/\&sup1;/1/g; # "&#185;" 
	$src =~ s/\&ordm;/o/g; # "&#186;" 
	$src =~ s/\&raquo;/>>/g; # "&#187;" 
	$src =~ s/\&frac14;/1\/4/g; # "&#188;" 
	$src =~ s/\&frac12;/1\/2/g; # "&#189;" 
	$src =~ s/\&frac34;/3\/4/g; # "&#190;" 
	$src =~ s/\&iquest;/?/g; # "&#191;" 
	$src =~ s/\&Agrave;/A/g; # "&#192;" 
	$src =~ s/\&Aacute;/A/g; # "&#193;" 
	$src =~ s/\&Acirc;/A/g; # "&#194;" 
	$src =~ s/\&Atilde;/A/g; # "&#195;" 
	$src =~ s/\&Auml;/A/g; # "&#196;" 
	$src =~ s/\&Aring;/A/g; # "&#197;" 
	$src =~ s/\&AElig;/AE/g; # "&#198;" 
	$src =~ s/\&Ccedil;/C/g; # "&#199;" 
	$src =~ s/\&Egrave;/E/g; # "&#200;" 
	$src =~ s/\&Eacute;/E/g; # "&#201;" 
	$src =~ s/\&Ecirc;/E/g; # "&#202;" 
	$src =~ s/\&Euml;/E/g; # "&#203;" 
	$src =~ s/\&Igrave;/I/g; # "&#204;" 
	$src =~ s/\&Iacute;/I/g; # "&#205;" 
	$src =~ s/\&Icirc;/I/g; # "&#206;" 
	$src =~ s/\&Iuml;/I/g; # "&#207;" 
	$src =~ s/\&ETH;/ETH/g; # "&#208;" 
	$src =~ s/\&Ntilde;/N/g; # "&#209;" 
	$src =~ s/\&Ograve;/O/g; # "&#210;" 
	$src =~ s/\&Oacute;/O/g; # "&#211;" 
	$src =~ s/\&Ocirc;/O/g; # "&#212;" 
	$src =~ s/\&Otilde;/O/g; # "&#213;" 
	$src =~ s/\&Ouml;/O/g; # "&#214;" 
	$src =~ s/\&times;/x/g; # "&#215;" 
	$src =~ s/\&Oslash;/O/g; # "&#216;" 
	$src =~ s/\&Ugrave;/U/g; # "&#217;" 
	$src =~ s/\&Uacute;/U/g; # "&#218;" 
	$src =~ s/\&Ucirc;/U/g; # "&#219;" 
	$src =~ s/\&Uuml;/U/g; # "&#220;" 
	$src =~ s/\&Yacute;/Y/g; # "&#221;" 
	$src =~ s/\&THORN;/_/g; # "&#222;" 
	$src =~ s/\&szlig;/s/g; # "&#223;" 
	$src =~ s/\&agrave;/a/g; # "&#224;" 
	$src =~ s/\&aacute;/a/g; # "&#225;" 
	$src =~ s/\&acirc;/a/g; # "&#226;" 
	$src =~ s/\&atilde;/a/g; # "&#227;" 
	$src =~ s/\&auml;/a/g; # "&#228;" 
	$src =~ s/\&aring;/a/g; # "&#229;" 
	$src =~ s/\&aelig;/ae/g; # "&#230;" 
	$src =~ s/\&ccedil;/c/g; # "&#231;" 
	$src =~ s/\&egrave;/e/g; # "&#232;" 
	$src =~ s/\&eacute;/e/g; # "&#233;" 
	$src =~ s/\&ecirc;/e/g; # "&#234;" 
	$src =~ s/\&euml;/i/g; # "&#235;" 
	$src =~ s/\&igrave;/i/g; # "&#236;" 
	$src =~ s/\&iacute;/i/g; # "&#237;" 
	$src =~ s/\&icirc;/i/g; # "&#238;" 
	$src =~ s/\&iuml;/i/g; # "&#239;" 
	$src =~ s/\&eth;/eth/g; # "&#240;" 
	$src =~ s/\&ntilde;/n/g; # "&#241;" 
	$src =~ s/\&ograve;/o/g; # "&#242;" 
	$src =~ s/\&oacute;/o/g; # "&#243;" 
	$src =~ s/\&ocirc;/o/g; # "&#244;" 
	$src =~ s/\&otilde;/o/g; # "&#245;" 
	$src =~ s/\&ouml;/o/g; # "&#246;" 
	$src =~ s/\&divide;/\//g; # "&#247;" 
	$src =~ s/\&oslash;/o/g; # "&#248;" 
	$src =~ s/\&ugrave;/u/g; # "&#249;" 
	$src =~ s/\&uacute;/u/g; # "&#250;" 
	$src =~ s/\&ucirc;/u/g; # "&#251;" 
	$src =~ s/\&uuml;/u/g; # "&#252;" 
	$src =~ s/\&yacute;/y/g; # "&#253;" 
	$src =~ s/\&thorn;/_/g; # "&#254;" 
	$src =~ s/\&yuml;/y/g; # "&#255;" 

	$src =~ s/\&amp;/\&/g; # MUST be last.
	return $src;
}


