#!/usr/bin/perl -w # cdbaby_cheapskate.pl: Sort cdbaby.com CDs by price # Placed into the public domain in 2004 by Forrest Cahoon use strict; use LWP::UserAgent; use HTTP::Request::Common; my ($style, $Ua, $all_url, $out_fname, $response, $content, $line, $before_list, $top, $bottom, $url, $artist, $title, $price, $tracks, @cds, $cd); ($style) = @ARGV; while (!defined $style) { print "Enter cdbaby style number (? for help): "; $style = ; $style =~ s/\s*$//s; if ($style !~ /^\d+$/) { print <<"EOT"; This program takes a page displaying all artists of a particular category at cdbaby.com and regenerates it sorted by price with the prices displayed. You need to find the number from the URL of the category you wish to browse. To find this, first go to www.cdbaby.com and click on "BROWSE" at the top. Select a genre from the yellow box on the upper left (e.g. "Rock"). Then select a sub-genre from the yellow box on the upper left of that page (e.g. "Acoustic"). You should now be taken to a url like "http://www.cdbaby.com/style/105". You want the number at the end (in this case "105"). EOT $style = undef; } } $Ua = LWP::UserAgent->new; $all_url = "http://www.cdbaby.com/style/$style/all"; $out_fname = "cheapskate$style.html"; print "Fetching $all_url ... "; $response = $Ua->request(GET $all_url); if ($response->is_success) { print "got it\n"; } else { die "failed: $!\n"; } $content = $response->content; $before_list = 1; $top = ""; $bottom = ""; while ($content =~ /^(.*)$/mg) { $line = "$1\n"; $line =~ s!\bhref=\"!href=\"http://www.cdbaby.com!i; $line =~ s!\?cdbaby=[0-9a-f]{32}!!; # try removing this if it breaks if ($line =~ m!^\s*
(.*?):\s*(.*?)!) { $url = $1; $artist = $2; $title = $3; ($price, $tracks) = getinfo($url, $artist, $title); $line =~ s!^(\s*
)!$1 \$$price $tracks !; push @cds, { line => $line, url => $url, artist => $artist, title => $title, price => $price, tracks => $tracks }; $before_list = 0; } else { if ($before_list) { $top .= $line; } else { $bottom .= $line; } } } open OUT, ">$out_fname" || die "Couldn't open $out_fname for writing\n"; print OUT $top; foreach $cd ( sort byprice @cds) { print OUT $cd->{line}; } print OUT $bottom; close OUT; print "\nNow load $out_fname into your browser, " . "and check out some cheap CDs!\n"; ############################################################################### sub byprice { my $aprice = ($a->{price} =~ /^\d+\.\d\d$/) ? $a->{price} : "10000"; my $bprice = ($b->{price} =~ /^\d+\.\d\d$/) ? $b->{price} : "10000"; return $aprice <=> $bprice or $a->{artist} cmp $b->{artist} or $a->{title} cmp $b->{title} } ############################################################################### sub getinfo { my ($url, $artist, $title) = @_; my ($response, $price, $tracklist, $tracks); print "$artist: $title ... "; $response = $Ua->request(GET $url); if (!$response->is_success) { print "error\n"; return ("[fetch error]", "[fetch error]"); } if ($response->content =~ m!CD Baby Price:.*?\$(\d+\.\d\d)!s) { $price = $1; } else { $price = "[parse error]"; } $tracks = "[parse error]"; if ($response->content =~ m/(.*?)