#!/usr/bin/perl
# decode.pl -- Version 1.00.10 98.04.28 15:45
# Filter to convert QP or Base64 text/plain and text/html content
# to 8-bit clean, for further filtering by transliteration programs
# (eg. elot2ascii.pl)
# (c) 1997, 1998, Hellenic Resources Institute, Inc.

foreach $Arg (@ARGV) {
  if($Arg =~ /^-v$/) {
    $verbose = 1;
  }
}

@temp = <STDIN>;

$i = 0;
while($i <= $#temp) {                                     # Prepare headers for decoding
  if(@temp[$i+1] =~ /^\n?$/) { last; }                    # Stop if headers are over.
  if(@temp[$i+1] =~ /^\s{1,}/) {                          # If following header line begins with a blank, we append
    @temp[$i] =~ s/\s*$//;                                # Clean trailing spaces before continuation line
    @temp[$i+1] =~ s/^\s*//;                              # Clean leading spaces on continuation line
    @temp[$i] = @temp[$i] . " " . @temp[$i+1];            # Merge continuation line
    @temp[$i] =~ s/\?=(\s*)=\?/?==?/gi;                   # Remove redundant spaces between encoded content
    @temp[$i] =~ s/([^\s\:\,])\s=\?/$1=\?/gi;             #   "        "        "      "    encoded content and plain text
    @temp[$i] =~ s/\?=[ \t]*([^<])/\?=$1/gi;              #   "        "        "      "    encoded content and plain text
    splice(@temp, $i+1, 1);
  } else {
    @temp[$i] =~ s/([^\?])=$/$1\?=/;                      # TERRIBLE kludge, for erroneously closed encoded header strings
							  # (closed by '=$' instead of '?=$') -- dep 98-04-28 15:45
    $i++;
  }
}
$i = 0;
while($i <= $#temp) {                                     # Decode headers
  $_ = @temp[$i];
  if(/^\n?$/) { last; }                                   # Stop if headers are over.
  while(/=\?[^\?]*\?([BQ])\?([^\?]*)\?=/) {
    $text = &Decode($2, $1, 1);
    s/=\?([^\?]*)\?([BQ])\?([^\?]*)\?=/$text/i;
    @temp[$i] = $_;
  }
  $i++;
}
$i = 0;
while($i <= $#temp) {
  if((@temp[$i] =~ /^Content-Type\:\s*\w*\/\w*\;\s*$/i)&&
     (@temp[$i+1] =~ /^\s{1,}/i)) {
    @temp[$i] =~ s/\s*$//;
    @temp[$i+1] =~ s/^\s*//;
    @temp[$i] = @temp[$i] . " " . @temp[$i+1];
    splice(@temp, $i+1, 1);
  } else {
    $i++;
  }
}
$i = 0;
while($i <= $#temp) {                                     # Establish message format: Is it multipart?
  $_ = @temp[$i];
  if(/^Content-Type\:\s*multipart\/(mixed|alternative)\;\s*boundary=\"([^\"]*)\"/i) {
    push(@boundary, $2);
    if($verbose) {
      print STDERR "$i: Content-Type: multipart/$1; boundary=\"@boundary[$#boundary]\"\n";
    }
    $multipart = 1;
  }
  if(/^Content-Type\:\s*text\/(plain|enriched|html)\;\s*charset=\"?([^\"]*)\"?/i) {
    $type = $1;
    $charset = $2;
    $multipart = 0;
    $j = $i + 1;
    while(($j < $#temp)&&(@temp[$j] !~ /^$/)) {
      if(@temp[$j] =~ /Content-Transfer-Encoding:/i) {
	$encoding = @temp[$j];
	$encoding =~ s/^Content-Transfer-Encoding:\s*(.*)\s*$/$1/i;
	@temp[$j] =~ s/^(Content-Transfer-Encoding:)\s*.*$/$1 8bit/i;
	if($verbose) {
	  print STDERR "$i: Content-Type: $type, encoded using $encoding\n";
	}
	last;
      }
      $j++;
    }
  }
  if(/^\n?$/) { last; }                                   # Stop if headers are over.
  $i++;
}
if($multipart == 1) {                                     # Process a multipart message.
  $i = 0;
  while(@temp[$i++] !~ /^\n?$/) { nop; }                  # Skip headers.
  while($i <= $#temp) {
    if(@temp[$i] =~ /^\-\-@boundary[0]\-\-$/) { last; }      # Message is over.
    if((@temp[$i] =~ /^\-\-@boundary[$#boundary]\-\-$/)&&($#boundary > 0)) {
      if($verbose) {
	print STDERR "$i: ** Exiting Sub-Message\n";
      }
      pop(@boundary);
    }
    if(@temp[$i] =~ /^\-\-@boundary[$#boundary]$/) {                  # Beginning of part in message. 
      @temp[$i-1] =~ s/\n?$/\n/;
      if($verbose) {
	print STDERR "$i: Found Boundary @boundary[$#boundary]\n";
      }
      $type = @temp[++$i];                                # Find the type.
      $type =~ s/^Content-Type:\s*([^;]*);.*\n/$1/;       # If optional parameter is present.
      $type =~ s/^Content-Type:\s*([^;]*)\n/$1/;          # If optional parameter is not present.
      if($type =~ /text\/(plain|enriched|html)/i) {                       # Text type, will convert to 8bit
	while(@temp[++$i] !~ /^Content-Transfer-Encoding:/i) {
	  if(@temp[$i] =~ /^$/i) {
	    splice(@temp, $i, 0, "Content-Transfer-Encoding: 7bit");
	    last;
	  }
	}
	$encoding = @temp[$i];
	$encoding =~ s/^Content-Transfer-Encoding:\s*(.*)\s*$/$1/i;
	@temp[$i] =~ s/^(Content-Transfer-Encoding:)\s*.*$/$1 8bit/i;
	if($verbose) {
	  printf STDERR "%u: Content-Type: %s, encoded using %s\n", $i-1, $type, $encoding;
	}
	while(@temp[$i++] !~ /^\n?$/) { nop; }
	if($encoding =~ /^quoted-printable$/i) {             # Remove soft breaks for QP encoded section
	  $j = $i;
	  while(@temp[$j] !~ /^\-\-@boundary[$#boundary](\-\-)?$/) {
	    if(@temp[$j] =~ /=\n?$/) {
	      @temp[$j] =~ s/=\n?$//;
	      @temp[$j] = @temp[$j] . @temp[$j+1];
	      splice(@temp, $j+1, 1);
	    } else {
	      $j++;
	    }
	  }
	}
	while((@temp[$i] !~ /^\-\-@boundary[$#boundary](\-\-)?$/)&&($i < $#temp)) {    # Decode until the end of the section
	  @temp[$i] = &Decode(@temp[$i], $encoding, 0);
	  $i++;
	}
      } else {
	if(@temp[$i] =~ /^Content-Type\:\s*multipart\/alternative\;\s*boundary=\"([^\"]*)\"/i) {
	  if($verbose) {
	    print STDERR "$i: ** Entering Sub-Message\n";
	    print STDERR "$i: $temp[$i]";
	  }
	  push(@boundary, $1);
	} else {
	  $encoding = @temp[$i+1];
	  $encoding =~ s/^Content-Transfer-Encoding:\s*(.*)\s*$/$1/i;
	  if($verbose) {
	    print STDERR "$i: Content-Type: $type, encoded using $encoding\n";
	  }
	  while(@temp[$i++] !~ /^\-\-@boundary[$#boundary](\-\-)?$/) { nop; }  # I'm in a hurry, skip to the end
	  if($verbose) {
	    print STDERR "$i: Found Boundary @boundary[$#boundary]\n";
	  }
	}
      }
    } else {
      $i++;
    }
  }
} else {                                                  # Not multipart, single content text
  $i = 0;
  while(@temp[$i++] !~ /^\n?$/) { nop; }                  # Skip headers
  if($encoding =~ /^quoted-printable$/i) {                   # If QP, remove soft breaks
    $j = $i;
    while($j <= $#temp) {
      if(@temp[$j] =~ /=\n?$/) {
	@temp[$j] =~ s/=\n?$//;
	@temp[$j] = @temp[$j] . @temp[$j+1];
	splice(@temp, $j+1, 1);
      } else {
	$j++;
      }
    }
  }
  while($i <= $#temp) {                                   # Decode the message text
    @temp[$i] = &Decode(@temp[$i], $encoding, 0);
    $i++;
  }
}
while (@temp[$#temp] =~ /^$/) {                           # Remove trailing blank lines
  splice(@temp, $#temp, 1);
}
print @temp;                                              # OK, output it and go home
exit;

sub Decode {                                              # Oh, yeah, here's how we decode
  local($string, $encoding, $headers) = @_;
  if(($encoding =~ /^q$/i)||($encoding =~ /quoted-printable/i)) {
    $string =~ s/=([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
    if($headers) {
      $string =~ s/_/ /g;
    }
  } else {
    if(($encoding =~ /^b$/i)||($encoding =~ /base64/i)) {
      $string =~ tr#A-Za-z0-9+/##cd;                 # remove non-base64 chars
      $string =~ tr#A-Za-z0-9+/# -_#;                # convert to uuencoded format
      $len = pack("c", 32 + 0.75*length($string));   # compute length byte
      $string = unpack("u", $len . $string);         # uudecode and print
    }
  }
  $string;
}
