#! /usr/bin/perl -w # mymovies # # Automatically download your vote history from IMDB. # # Copyright (C) 2001 Alan De Smet chaos at highprogrammer.com # http://www.highprogrammer.com/alan/perl/#mymovies # # This software is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2 of # the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program in file entitled "COPYING"; if # not, write to the Free Software Foundation, Inc., 59 Temple # Place - Suite 330, Boston, MA 02111-1307, USA. A copy of the # GNU General Public License is available at # http://www.fsf.org/copyleft/gpl.html . # my $URL = 'http://us.imdb.com/mymovies/list?votehistory'; # Only need to set one of COOKIE_FILE or IMDBDATA. COOKIE_FILE is preferred # over IMDBDATA my $COOKIE_FILE = "$ENV{HOME}/.netscape/cookies"; my $IMDBDATA = undef; # Allows you to rename movies, useful if you know a movie by a a # different name than it's "proper name", common for translated # movies. (The default collection translates a random collection of # anime, Jackie Chan, and John Woo movies to their common english # names.) my(%TRANSLATIONS) = ( '\'A\' gai waak' => 'Project A', 'Cite des enfants perdus, La' => 'City of Lost Children, The', 'Dian zhi gong fu gan chian chan' => 'Half a Loaf of Kung Fu', 'Die xue jie tou' => 'Bullet in the Head', 'Do ma daan' => 'Peking Opera Blues', 'Feiying gaiwak' => 'Armor of God II: Operation Condor', 'Feng yu shuang liu xing' => 'Killer Meteors, The', 'Ging chaat goo si juk jaap' => 'Police Story 2', 'Hokuto no Ken' => 'Fist of the North Star', 'Hong faan kui' => 'Rumble in the Bronx', 'Huo shao dao' => 'The Prisoner', 'Jing cha gu shi III: Chao ji jing cha' => 'Supercop (Police Story 3)', 'Jing cha gu shi IV: Jian dan ren wu' => 'Jackie Chan\'s First Strike (Police Story 4)', 'Jui kuen II' => 'Legend of Drunken Master, The', 'Kwai tsan tse' => 'Wheels on Meals', 'Lashou shentan' => 'Hard Boiled', 'Lodoss to senki' => 'Record of Lodoss War', 'Lola rennt' => 'Run, Lola, Run', 'Longxiong hudi' => 'Armor of God', 'Majo no takkyubin' => 'Kiki\'s Delivery Service', 'Qiji' => 'Miracle', 'She hao ba bu' => 'Snake & Crane Arts of Shaolin', 'Shuang long hui' => 'Twin Dragons', 'Tonari no Totoro' => 'My Neighbor Totoro', 'Unendliche Geschichte, Die' => 'NeverEnding Story, The', 'Wo hu zang long' => 'Crouching Tiger, Hidden Dragon', 'Yatgo ho yan' => 'Mr. Nice Guy', 'Yong Chun' => 'Wing Chun', ); ################################################################################ # # NO USER SERVICABLE PARTS BELOW # use strict; use LWP::UserAgent; use HTTP::Cookies; my $VERSION = '1.0.0.0'; my $AGENT_NAME = "mymovies/$VERSION (http://www.highprogrammer.com/alan/perl/#mymovies)"; main(); exit; sub main { my $USER_AGENT = new LWP::UserAgent; $USER_AGENT->agent($AGENT_NAME); my $request = new HTTP::Request GET=>$URL; my $cookie_jar = undef; if(defined $COOKIE_FILE and -e $COOKIE_FILE) { $cookie_jar = HTTP::Cookies::Netscape->new(File=>$COOKIE_FILE); } if(not defined $cookie_jar) { $cookie_jar = HTTP::Cookies->new; my(@rest) = ("/", ".imdb.com", undef, 0, 0, 60*60, 0); $cookie_jar->set_cookie(undef, "IMDBDATA", $IMDBDATA, @rest); $cookie_jar->set_cookie(undef, "IMDBPREFS", "3", @rest); } $cookie_jar->add_cookie_header($request); my $result = $USER_AGENT->request($request); if($result->is_success) { } else { die "$0: Unable to load movie list from IMDB\n"; } my $body = $result->content; $body = strip_entity($body); my $owner = get_owner($body); my($total) = get_total($body); my(%movies) = get_movies($body); my $count = scalar keys %movies; if($count != $total) { die "$0: An error occurred. Expected $total movies, found $count.\n"; } print "${owner}'s Movies ($total total)\n"; print_movies(%movies); } sub get_owner { my($body) = @_; my($owner) = ($body =~ m|([^<>]+)'s Movies|i); # ' Fix syntax coloring return $owner; } sub get_total { my($body) = @_; my($total) = ($body =~ m| (\d+) Titles|); return $total; } sub get_movies { my($body) = @_; my(%movies) = ($body =~ m| \s* ]+>\s*]+type="checkbox"[^>]+>\s*\s* ]+>\s*([^<]+)\s* ]+>(\d+)\s* |ximsg); return %movies; } sub canonize_name { my($imdb_name) = @_; my($title, $year, $rest) = ($imdb_name =~ /^(.+) \((\d\d\d\d(?:\/[IVX]+)?)\)\s*(.*)$/); if(not defined $title) { $title = $imdb_name; $year = 'unknown'; } elsif(defined $rest and length $rest) { $title = "$title $rest"; } my($original_title) = undef; if(exists $TRANSLATIONS{$title}) { $original_title = $title; $title = $TRANSLATIONS{$title}; } return ($title, $year, $original_title); } sub print_movies { my(%movies) = @_; my(@sort_order) = sort { (-($movies{$a} <=> $movies{$b})) || smartcmp($a, $b)} (keys %movies); foreach my $movie (@sort_order) { my($title, $year, $original_title) = canonize_name($movie); printf "%2d %s (%s)", $movies{$movie}, $title, $year; if(defined $original_title and length $original_title) { print " ($original_title)"; } print "\n"; } } sub smartcmp { my($lhs, $rhs) = @_; $lhs =~ tr[a-z][A-Z]; $rhs =~ tr[a-z][A-Z]; $lhs =~ s/[":'!]//; $rhs =~ s/[":'!]//; return $lhs cmp $rhs; } sub strip_entity { my($src) = @_; $src =~ s/\"/"/g; $src =~ s/\ / /g; $src =~ s/\¡/!/g; $src =~ s/\¢/c/g; # "¢" $src =~ s/\£/L/g; # "£" $src =~ s/\¤/\$/g; # "¤" $src =~ s/\¥/Y/g; # "¥" $src =~ s/\¦/|/g; # "¦" $src =~ s/\§/S/g; # "§" $src =~ s/\¨/../g; # "¨" $src =~ s/\©/(c)/g; # "©" $src =~ s/\ª/_/g; # "ª" $src =~ s/\«/<>/g; # "»" $src =~ s/\¼/1\/4/g; # "¼" $src =~ s/\½/1\/2/g; # "½" $src =~ s/\¾/3\/4/g; # "¾" $src =~ s/\¿/?/g; # "¿" $src =~ s/\À/A/g; # "À" $src =~ s/\Á/A/g; # "Á" $src =~ s/\Â/A/g; # "Â" $src =~ s/\Ã/A/g; # "Ã" $src =~ s/\Ä/A/g; # "Ä" $src =~ s/\Å/A/g; # "Å" $src =~ s/\Æ/AE/g; # "Æ" $src =~ s/\Ç/C/g; # "Ç" $src =~ s/\È/E/g; # "È" $src =~ s/\É/E/g; # "É" $src =~ s/\Ê/E/g; # "Ê" $src =~ s/\Ë/E/g; # "Ë" $src =~ s/\Ì/I/g; # "Ì" $src =~ s/\Í/I/g; # "Í" $src =~ s/\Î/I/g; # "Î" $src =~ s/\Ï/I/g; # "Ï" $src =~ s/\Ð/ETH/g; # "Ð" $src =~ s/\Ñ/N/g; # "Ñ" $src =~ s/\Ò/O/g; # "Ò" $src =~ s/\Ó/O/g; # "Ó" $src =~ s/\Ô/O/g; # "Ô" $src =~ s/\Õ/O/g; # "Õ" $src =~ s/\Ö/O/g; # "Ö" $src =~ s/\×/x/g; # "×" $src =~ s/\Ø/O/g; # "Ø" $src =~ s/\Ù/U/g; # "Ù" $src =~ s/\Ú/U/g; # "Ú" $src =~ s/\Û/U/g; # "Û" $src =~ s/\Ü/U/g; # "Ü" $src =~ s/\Ý/Y/g; # "Ý" $src =~ s/\Þ/_/g; # "Þ" $src =~ s/\ß/s/g; # "ß" $src =~ s/\à/a/g; # "à" $src =~ s/\á/a/g; # "á" $src =~ s/\â/a/g; # "â" $src =~ s/\ã/a/g; # "ã" $src =~ s/\ä/a/g; # "ä" $src =~ s/\å/a/g; # "å" $src =~ s/\æ/ae/g; # "æ" $src =~ s/\ç/c/g; # "ç" $src =~ s/\è/e/g; # "è" $src =~ s/\é/e/g; # "é" $src =~ s/\ê/e/g; # "ê" $src =~ s/\ë/i/g; # "ë" $src =~ s/\ì/i/g; # "ì" $src =~ s/\í/i/g; # "í" $src =~ s/\î/i/g; # "î" $src =~ s/\ï/i/g; # "ï" $src =~ s/\ð/eth/g; # "ð" $src =~ s/\ñ/n/g; # "ñ" $src =~ s/\ò/o/g; # "ò" $src =~ s/\ó/o/g; # "ó" $src =~ s/\ô/o/g; # "ô" $src =~ s/\õ/o/g; # "õ" $src =~ s/\ö/o/g; # "ö" $src =~ s/\÷/\//g; # "÷" $src =~ s/\ø/o/g; # "ø" $src =~ s/\ù/u/g; # "ù" $src =~ s/\ú/u/g; # "ú" $src =~ s/\û/u/g; # "û" $src =~ s/\ü/u/g; # "ü" $src =~ s/\ý/y/g; # "ý" $src =~ s/\þ/_/g; # "þ" $src =~ s/\ÿ/y/g; # "ÿ" $src =~ s/\&/\&/g; # MUST be last. return $src; }