#!/usr/bin/perl # # mimeStrip.pl Version 0.5 ### some suggested changes by Eric... ### kludge 10/2003: remove Time::ParseDate file date setting. ### other 10/2003: improved compatibility to strange headers and data, ### enable From only after empty body line # # - if no "filename" is given in the Content-Disposition, check # for a "name" in the Content-Type. # - boundary match regex changed # # mimeStrip.pl Version 0.4 # # - The content type boundary was not being found # if it was not lower-case # # mimeStrip.pl Version 0.3 # # 03.04.2003 # # - some filenames were not being found # - should run with no args # # mimeStrip.pl Version 0.2 # # 19.02.2003 # - some 'boundaries' were not being matched if they contained # characters that were being interpolated during the match, # using 'index' instead of m// # - the wrong 'envelope' was being assigned to a message, not a # huge problem, but not good either, D'Oh! # require 5.002; # for SUPER use MIME::Base64; use File::Basename; use File::stat; use Fcntl ':flock'; use Getopt::Long; ### use Time::ParseDate; # Usage is: $me --in folder --out folder.out --dir output-directory #if ($#ARGV == -1) { # Usage(); #} $result = GetOptions qw( --in=s --out=s --dir=s --cat! --help! ); if ( "$opt_help" ne "") { Usage(); } if ( "$opt_in" eq "" ) { } else { my $in; $folder=$opt_in; $fs = stat $folder; open(STDIN,"+<$folder") || die "Error: opening input folder $folder\n"; $in = STDIN; # copy to var: Perl 5.8 otherwise complains that the # lock(STDIN) would attempt to modify a constant (2/2005). unless (lock($in)) { close(STDIN); printf STDERR "Error: could not lock folder $folder\n"; } } $cat=("$opt_cat" eq "") ? ">" : ">>"; if ( "$opt_out" eq "" ) { } else { $output=$opt_out; # redirect STDOUT to $output open(STDOUT,"$cat$output") || die "Error: opening output folder $output\n"; } if ( "$opt_dir" eq "" ) { $opt_dir="."; } @header = (); @body = (); $last=0; $date=0; $enableFrom = 1; while() { ### main loop starts here chomp; if (($enableFrom) && (/^From /)) { ### changed to use "\n\nFrom \n" $env = $_; # this is for the next header! $inheader = 1; if (@header == NULL) { $envelope = $env; next; } processMessage(); ### process last message $envelope = $env; ### prepare for next message @header = (); @body = (); $last = 0; $date = 0; next; } $enableFrom = 0; if ($inheader) { ### in headers if (/^$/) { $inheader = 0; @body = (); } elsif (/^Date:(.*)/) { push @header, $_; $last++; $date=$1; } elsif (/^(\s+)(.*)/) { if ($last) { $header[$last-1] .= "\n$1$2"; } else { ### strange thing that this happens at all! printf STDERR "-----\nUnexpected header continuation:\n"; printf STDERR "$1$2\n-----\n"; } next; } elsif (/^(\S+):(.*)/) { push @header, $_; $last++; next; } else { printf STDERR "-----\nUnexpected header entry:\n"; printf STDERR "$_\n-----\n"; next; } } else { ### not in headers if (/^$/) { ### enable From only in body after an empty line $enableFrom = 1; } } push @body,$_; }; processMessage(); unlock(STDIN); close(STDIN); if ($fs) { chmod $fs->mode, $output; chown $fs->uid,$fs->gid, $output; } sub aprint { $out=$_[0]; shift; foreach $e (@_) { print $out "$e\n"; } } sub processMessage { my $boundary=""; # look for multipart in Content-Type header foreach $h (@header) { if ($h =~ /^Content-Type:(.*)/i) { $_ = $h; if (/multipart\/mixed/i) { #if (/boundary="(\S+)"/i) << replace with the regex below to deal with #boundaries without quotes if (/boundary\s*=[\s"]*([\S]+[^"]+)/) { $boundary=$1; last; } } } } print STDOUT "$envelope\n"; print STDERR "$envelope\n"; # print STDERR "."; # a bit of feedback to stderr aprint(STDOUT,@header); if ($boundary) { my @mimepart=(); my $nbody=$#body+1; for ($i=0; $i < $nbody; $i++) { $_ = $body[$i]; if (-1 != index $_,"--$boundary") { next if $#mimepart == -1; # process mimepart if ($trencode =~ /base64/i) { if ($cdisp =~ /filename="(.*)"/i) { # get rid of any path specs $filename= basename "$1"; } elsif ($cdisp =~ /filename=(.*)/i) { # get rid of any path specs $filename= basename "$1"; } elsif ($ctype =~ /name="(.*)"/i) { $filename= basename "$1"; } elsif ($ctype =~ /name=(.*)/i) { $filename= basename "$1"; } else { print STDERR "\n Warning: no filename given\n"; $filename="noname.bin"; } ### remove ALL unwanted chars like [] and ""... ### we change all but a-z A-Z 0-9 . - _ into _ $filename =~ tr/a-zA-Z0-9._-/_/c; # account for duplicates $filename = uniqueName("$opt_dir/$filename"); if (open(FILE,">$filename")) { print STDERR " Writing $filename\n"; binmode(FILE); $go=0; foreach $mp (@mimepart) { $_ = $mp; if ($go == 0) { # start processing after reaching a blank line in @mimepart ### changed to allow "--" as well (some viruses...) $go = 1 if (/^$/); $go = 1 if (/^--$/); next; } next if ( /^$/ ); # skip blank lines next if ( /^--$/ ); ### skip special lines last if (/--$boundary/); # stop at boundary $decoded = decode_base64($mp); print FILE $decoded; } close(FILE); if ($fs) { chmod $fs->mode, $filename; chown $fs->uid,$fs->gid, $filename; } if ($date) { ### my $mtime=parsedate($date); ### utime $mtime, $mtime, $filename if $mtime; ### kludge - setting file time would need Time::ParseDate ### so I simply commented this part out. } # tell the L^Huser where their attachment is print STDOUT "Content-Type: text/plain; charset=us-ascii\n"; print STDOUT "Content-Transfer-Encoding: 7bit\n\n"; print STDOUT "*****\n"; print STDOUT "***** Content-Type: $ctype\n"; print STDOUT "***** Content-Transfer-Encoding: $trencode\n"; print STDOUT "***** Content-Description: $cdesc\n"; print STDOUT "***** Content-Disposition: $cdisp\n"; print STDOUT "*****\n\n"; if ($date) { print STDOUT "***** Date: $date\n"; ### just show it... } print STDOUT "***** Attached file saved to disk: $filename\n\n"; } else { printf STDERR "\n Error: could not open attachment file $filename\n"; aprint(STDOUT,@mimepart); } } else { aprint(STDOUT,@mimepart); } print STDOUT "$body[$i]\n"; # print the boundary marker @mimepart = (); $filename=""; $trencode=""; $ctype=""; $cdisp=""; $cdesc=""; } else { if ( /^Content-Transfer-Encoding:(.*)/i ) { $trencode=$1; } elsif( /^Content-Type:(.*)/i ) { $ctype=$1; while ( /;$/ ) { $ctype .= $body[++$i]; $_ .= $body[$i]; } } elsif( /^Content-Disposition:(.*)/i ) { $cdisp=$1; while(/;$/) { $cdisp .= $body[++$i]; $_ .= $body[$i]; } } elsif( /^Content-Description:(.*)/i ) { $cdesc=$1; } push @mimepart,$_; } } aprint(STDOUT,@mimepart); } else { aprint(STDOUT,@body); } print STDOUT "\n"; } sub uniqueName { #$filename = uniqueName("$opt_dir/$filename"); my $f, $p, $g, $x; ($f,$p) = fileparse($_[0]); @chunks = split( /\./, $f ); $x = ".bin"; if ($#chunks > 0) { $f = $chunks[0]; $x = ".$chunks[$#chunks]"; for (my $i=1; $i<$#chunks;$i++) { $f .= ".$chunks[$i]"; } } my $g = "$p$f$x"; while( -f "$g" ) { ++$n; $g="$p$f-$n$x"; } return $g; } sub lock { # true on success $rval = flock($_[0],LOCK_EX | LOCK_NB); # exclusive lock, non blocking return $rval; } sub unlock { $rval = flock($_[0],LOCK_UN); return $rval; } sub Usage() { my $usage; my $me=basename $0; $usage=< user.stripped USAGE print $usage; exit(1); }