#!/usr/bin/perl -w
use strict;
use Data::Dumper;
use DBI();
my $INFILE = "taxdata.csv";
open(my $fh, "<", $INFILE) or die "Cannot open $INFILE for reading: $!";
my $database = 'taxbrackets';
my $hostname = '127.0.0.1';
my $username = 'root';
my $password = '';
my $dsn = "DBI:mysql:database=$database;host=$hostname;port=3306";
my $dbh = DBI->connect($dsn, $username, $password) or die "Couldn't connect to MySQL server at $hostname (user $username): $!";
# $dbh->trace(2);
my %YEARS;
while (defined(my $year_line = <$fh>)) {
chomp($year_line);
print "$year_line\n";
# if ($line =~ /^,,(Nominal),,,,(\d{4}),,,,,,,,,,,,,,(Adjusted for Inflation)?,,,,(\d{4}),,,CPI: ([\d\.]+)(?: \(Projected\))?,,,,,,,$/) {
# ,,Nominal,,,,2011,,,,,,,,,,,,,,Adjusted for Inflation,,,,2011,,,CPI: 224.380 (Projected),,,,,,,
# ,,Nominal,,,,2010,,,,,,,,,,,,,,Adjusted for Inflation,,,,2010,,,CPI: 218.056,,,,,,,
if ($year_line =~ /,,(Nominal),,,,(\d{4})(?: \(\w\))?,,,,,,,,,,,,,,(Adjusted for Inflation)?,,,,(\d{4})(?: \(\w\))?,,,CPI: ([\d\.]+)(?: \(Projected\))?,,,,,,,/) {
my $yr = $2;
my $cpi = $5;
print "year: $yr cpi: $cpi\n";
# Filing status line.
my $filing_status_line = <$fh>;
chomp($filing_status_line);
my @statuses = ($filing_status_line =~ /,(Married Filing Jointly),,,,(Married Filing Separately),,,,(Single),,,,(Head of Household),,,,,,(Married Filing Jointly),,,,(Married Filing Separately),,,,(Single),,,,(Head of Household),,,/);
print "$filing_status_line\n";
print "^... is a filing status line: " . join(", ", map { defined($_) ? "'$_'" : 'undef' } @statuses) . "\n";
# Skip headers...
<$fh>; # skip "Marginal"|"Tax Brackets"
<$fh>; # skip "Tax Rate"|"Over"|"But Not Over"
# Compose nominal & adjusted lists for this year.
my @notes;
my %nominal;
my %adjusted;
while (defined(my $data_line = <$fh>)) {
chomp($data_line);
print "$data_line\n";
# Match & extract columns.
my $percent = '[\d\.]+%';
my $dollar = '\$[\d,]+|-';
my $q = '"?';
my $colgroup = "(?:($percent)?,$q($dollar)?$q,$q($dollar)?$q|,$q.+$q,)";
my $datagroup = "$colgroup,,$colgroup,,$colgroup,,$colgroup";
my $line_regex = "$datagroup,,,,$datagroup";
# Columns:
# Nom Adj
# MFJ MFS Sng HoH MFJ MFS Sng HoH
# R O B R O B R O B R O B R O B R O B R O B R O B
if ($data_line =~ /^,{34}/) {
print "^... is a blank line.\n";
last;
} elsif (my @matches = ($data_line =~ /^$line_regex/)) {
print "^... is a data line: " . join(", ", map { defined($_) ? "'$_'" : 'undef' } @matches) . "\n";
# print Dumper(@matches) . "\n\n";
@matches = map { if (defined($_)) { s/[,\$%]//g; /^[\d\.]+$/ ? $_ * 1.0 : $_; } } @matches;
# Nominal data group.
NOMINAL: for my $i (0..3) {
my $first = $i * 3;
# This column is blank / has a note instead of data.
next NOMINAL if $matches[$first] eq '';
my %rate;
my @keys = ('rate', 'from', 'to');
for my $i (0..2) {
$rate{$keys[$i]} = $matches[$first + $i];
}
my $status = $statuses[$i]; # status this column is under
$dbh->do("INSERT INTO `brackets` (`year`, `status`, `rate`, `from`, `to`, `dollars`, `cpi`)" .
"VALUES (?, ?, ?, ?, ?, ?, ?)",
undef, $yr, $status, $rate{'rate'}, $rate{'from'}, $rate{'to'} eq '-' ? undef : $rate{'to'}, 'nominal', $cpi);
if (!defined($nominal{$status})) {
$nominal{$status} = ();
}
push @{ $nominal{$status} }, { %rate };
}
# Adjusted data group.
ADJUSTED: for my $i (0..3) {
my $first = $i * 3 + 12;
# This column is blank / has a note instead of data.
next ADJUSTED if $matches[$first] eq '';
my %rate;
my @keys = ('rate', 'from', 'to');
for my $i (0..2) {
$rate{$keys[$i]} = $matches[$first + $i];
}
my $status = $statuses[$i]; # status this column is under
$dbh->do("INSERT INTO `brackets` (`year`, `status`, `rate`, `from`, `to`, `dollars`)" .
"VALUES (?, ?, ?, ?, ?, ?)",
undef, $yr, $status, $rate{'rate'}, $rate{'from'}, $rate{'to'} eq '-' ? undef : $rate{'to'}, 'adjusted');
if (!defined($adjusted{$status})) {
$adjusted{$status} = ();
}
push @{ $adjusted{$status} }, { %rate };
}
# 1, 2, 3: $nom{$statuses[0]}
# 4, 5, 6: $nom{$statuses[1]}
# 7, 8, 9: $nom{$statuses[2]}
# 10, 11, 12: $nom{$statuses[3]}
} elsif ($data_line =~ /^"?([^,].+)"?,{18}"?([^,].+)"?,{16}/) {
my $note = $1;
print "^... is a note line: '$note'\n";
$note =~ s/""/"/g;
$note =~ s/^\(\w\) //g;
push @notes, $note;
} else {
print "^ line did not match.\n";
}
# print Dumper(%nominal) . "\n\n";
# print Dumper(%adjusted) . "\n\n";
# last;
}
for (@notes) {
$dbh->do("INSERT INTO `notes` (`year`, `note`)" .
"VALUES (?, ?)",
undef, $yr, $_);
}
$YEARS{$yr} = {
'nominal' => { %nominal },
'adjusted' => { %adjusted },
'notes' => [ @notes ]
};
# last if (scalar keys %YEARS) == 40;
} else {
print "^ line did not match.\n";
}
# last;
}
print Dumper(%YEARS) . "\n\n";
$dbh->disconnect();
# %YEARS = {
# 1913: { # %year
# nominal: { # %nom
# 'Married Filing Jointly': [
# {
# rate: 1%,
# over: 10000,
# but_not_over: 100000
# },
# {
# rate: 2%,
# over: 20000,
# but_not_over: 200000
# },
# ],
# 'Married Filing Separately': {
# rate: 1%,
# over: 10000,
# but_not_over: 100000
# }
# },
# adjusted: { # %adj
#
# },
# notes: { # @notes
#
# }
# },
# 1914: {
#
# },
# ...
# }
# [
# {
# "year": 1913,
# "cpi": ...,
# "statuses": {
# "Married Filing Jointly": [
# ...
# ]
# },
# "notes": [
# ...
# ]
# }
# ]