#!/usr/bin/perl -w
#
# To run down one column of a textual data table that uses '|' for separating columns,
#    and substitute the 2-character USPS abbreviations for state names.  Does not handle
#    abbreviations of 3 or more characters yet.  The columns are assumed to be numbered
#    starting with 1, not 0, for designating which column to translate!
#
# Usage: statestocodes.pl  <FILEIN>  <FILEOUT>  <column-to-translate>
#
# Example:  statestocodes.pl pr.file.csv  pr.statecoded.txt 2
#
# David Harris, Version of 3 Jan 2006 02:30 GMT

# ------------------------------------------------

# Check the number of arguments on the command line:
if ($#ARGV+1 != 3) {
  die "statestocodes.pl needs <FILEIN> <FILEOUT> <target-column-number> as arguments";
};
# Check existence of the textfile and email listing file:
(open FILEIN, "<$ARGV[0]") or die "statestocodes.pl FILEIN file $ARGV[0] not found.\n";
(open FILEOUT, ">$ARGV[1]") or die "statestocodes.pl SQL file could not made.\n";
$targetcolumn = $ARGV[2];
chomp $targetcolumn;
#Read through FILEIN file:
while ($lineread = <FILEIN>) {
  chomp $lineread;
  @allcolumns = split(/\|/, $lineread);
  # Isolate just the last name after a space:
  ($lastname = $allcolumns[0]) =~  s/.*\ (\w+$\)/$1/;  
  print FILEOUT "$lastname\|" ;
  for ($i=0; $i < (scalar @allcolumns)-1; $i++) {
    print FILEOUT "$allcolumns[$i]\|" ;
  } ;
  print FILEOUT "$allcolumns[$i]" ;  # Last column is not followed by '|'
  print FILEOUT "\n" ;
}
close FILEOUT;
close FILEIN;
exit;
