#!/usr/bin/perl -w # # usbank_html_reformatter.pl # # Copyright (c) 2003 by Forrest Cahoon # # This program takes as input html saved from a USBank online account # webpage, and outputs html which is much more useful for keeping track # of your finances. # # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # # Usage: perl usbank_html_reformatter.pl input.html output.html use HTML::Parser; use strict; die "Usage: perl usbank_parser.pl input.html output.html\n" unless defined $ARGV[0] && defined $ARGV[1] && -e $ARGV[0]; open OUT, ">" . $ARGV[1] || die "Couldn't open $ARGV[1] for output\n"; my ($t_list, $account_type, $title, $row, $cell, $rowclass); ($t_list, $account_type) = extract($ARGV[0]); $title = "$account_type from $t_list->[$#$t_list]->[0] to $t_list->[0]->[0]"; print OUT <<"END_OF_TEXT"; $title

$title

END_OF_TEXT $rowclass = "odd"; foreach $row (@$t_list) { print OUT ""; print OUT ""; foreach $cell (@$row) { print OUT ""; } print OUT "\n"; if ($rowclass eq "odd") { $rowclass = "even" } else { $rowclass = "odd" } } print OUT <<"END_OF_TEXT";
$cell
END_OF_TEXT close OUT; ############################################################################### { my ($in_f3_td, $row_status, $transaction, $transaction_list, $in_account_type_span, $account_type, $textbuffer); sub extract { my ($fname) = @_; my $parser = HTML::Parser->new (api_version => 3, start_h => [\&start_handler, "tagname, attr"], text_h => [\&text_handler, "text"], end_h => [\&end_handler, "tagname"]); $in_f3_td = 0; $row_status = ""; $textbuffer = ""; $transaction = []; $transaction_list = []; $parser->parse_file($fname); return ($transaction_list, $account_type); ############################################################ sub start_handler { my ($tagname, $attr) = @_; if ($tagname eq "tr") { $row_status = "start"; } elsif ($tagname eq "td" && defined($attr->{class}) && $attr->{class} eq "f3") { $in_f3_td = 1; } elsif ($in_f3_td && $tagname eq "br") { $textbuffer .= "
"; } elsif ($tagname eq "span" && defined($attr->{class}) && $attr->{class} eq "f32") { $in_account_type_span = 1; } } ############################################################ sub text_handler { my ($text) = @_; if ($in_f3_td || $in_account_type_span) { $textbuffer .= $text; } } ############################################################ sub end_handler { my ($tagname) = @_; if ($in_f3_td && $tagname eq "td") { $textbuffer =~ s/^[\s\xA0]*//s; $textbuffer =~ s/[\s\xA0]*$//s; if ($row_status eq "start") { if ($textbuffer =~ m|\d\d/\d\d/\d\d|) { $row_status = "good"; push @$transaction, $textbuffer; } else { $row_status = "bad"; } } elsif ($row_status eq "good") { push @$transaction, $textbuffer; } $in_f3_td = 0; $textbuffer = ""; } elsif ($tagname eq "tr") { if ($row_status eq "good") { push @$transaction_list, $transaction; } $transaction = []; $row_status = ""; } elsif ($in_account_type_span) { $account_type = $textbuffer; $textbuffer = ""; $in_account_type_span = 0; } } ############################################################ } } ###############################################################################