#!/usr/bin/perl -w
#
# usbank_html_reformatter.pl
#
# Copyright (c) 2003 by Forrest Cahoon
#
# This program takes as input html saved from a USBank online account
# webpage, and outputs html which is much more useful for keeping track
# of your finances.
#
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# Usage: perl usbank_html_reformatter.pl input.html output.html
use HTML::Parser;
use strict;
die "Usage: perl usbank_parser.pl input.html output.html\n"
unless defined $ARGV[0] && defined $ARGV[1] && -e $ARGV[0];
open OUT, ">" . $ARGV[1] || die "Couldn't open $ARGV[1] for output\n";
my ($t_list, $account_type, $title, $row, $cell, $rowclass);
($t_list, $account_type) = extract($ARGV[0]);
$title = "$account_type from $t_list->[$#$t_list]->[0] to $t_list->[0]->[0]";
print OUT <<"END_OF_TEXT";
$title
$title
END_OF_TEXT
close OUT;
###############################################################################
{
my ($in_f3_td, $row_status, $transaction, $transaction_list,
$in_account_type_span, $account_type, $textbuffer);
sub extract {
my ($fname) = @_;
my $parser =
HTML::Parser->new (api_version => 3,
start_h => [\&start_handler, "tagname, attr"],
text_h => [\&text_handler, "text"],
end_h => [\&end_handler, "tagname"]);
$in_f3_td = 0;
$row_status = "";
$textbuffer = "";
$transaction = [];
$transaction_list = [];
$parser->parse_file($fname);
return ($transaction_list, $account_type);
############################################################
sub start_handler {
my ($tagname, $attr) = @_;
if ($tagname eq "tr") {
$row_status = "start";
} elsif ($tagname eq "td" &&
defined($attr->{class}) &&
$attr->{class} eq "f3") {
$in_f3_td = 1;
} elsif ($in_f3_td && $tagname eq "br") {
$textbuffer .= "
";
} elsif ($tagname eq "span" &&
defined($attr->{class}) &&
$attr->{class} eq "f32") {
$in_account_type_span = 1;
}
}
############################################################
sub text_handler {
my ($text) = @_;
if ($in_f3_td || $in_account_type_span) {
$textbuffer .= $text;
}
}
############################################################
sub end_handler {
my ($tagname) = @_;
if ($in_f3_td && $tagname eq "td") {
$textbuffer =~ s/^[\s\xA0]*//s; $textbuffer =~ s/[\s\xA0]*$//s;
if ($row_status eq "start") {
if ($textbuffer =~ m|\d\d/\d\d/\d\d|) {
$row_status = "good";
push @$transaction, $textbuffer;
} else {
$row_status = "bad";
}
} elsif ($row_status eq "good") {
push @$transaction, $textbuffer;
}
$in_f3_td = 0;
$textbuffer = "";
} elsif ($tagname eq "tr") {
if ($row_status eq "good") {
push @$transaction_list, $transaction;
}
$transaction = [];
$row_status = "";
} elsif ($in_account_type_span) {
$account_type = $textbuffer;
$textbuffer = "";
$in_account_type_span = 0;
}
}
############################################################
}
}
###############################################################################