#!/usr/bin/perl
#
# ------------------------------------------------------------
# script:  mboxscrub.pl
# Authors: David Wood (dwood@plugged.net.au)
# Date:    15 January 1999
#
# Purpose: Removes unwanted MIME attachments from an mbox-style 
# electronic mailbox.
#
# Usage:
#	1.  Adheres to mbox format as most commonly used; therefore may
#	introduce extra blank lines at the end of messages into NS mailboxes.
#	2.  Uses whatever From_ line found in the original box, whether 'std'
#	mbox format or not.
#
# Location of the script:  Immaterial to operation.
#
# Copyright (c) 1999 Plugged In Software Pty Ltd.
# Released under the GNU GPL - see http://www.gnu.org/copyleft/gpl.html
#
# Modification History:
#
# ------------------------------------------------------------

# ---------------------------------------------------------
# |            MAIN PROGRAM                               |
# ---------------------------------------------------------
# Packages
use MIME::Parser;                  # The MIME-Tools Parser (reads/writes
                                   # MIME entities).
use Term::ReadLine;                # Readline library (used for user input).
use Getopt::Std;                   # Used to get command line arguments.

# Set initial variables.

$debug = 0;                        # Set to non-zero to show debugging info.
$archive = 0;                      # Set to non-zero if compressing output.
$archiver = '/bin/gzip';           # Program to use for file compression.
$archive_subscript = '.gz';        # File extension for compressed files.

$allowed_length = 0;               # Size in bytes above which MIME entities
                                   # are checked.
$mbox_file = "";                   # Mailbox filename.

# Variables used with the Term::ReadLine module to get user input:
$term = new Term::ReadLine;        # The Readline object.
$length_prompt = "Enter the maximum size in bytes of attachments " .
                 "you wish to keep. [#/q] ";
$file_prompt = "Please enter the filename of your mailbox. [#/q] ";
$attach_prompt = "Do you wish to delete this attachment? [y/N/q] ";
$OUT = $term->OUT || STDOUT;       # Where the terminal output goes.

# Variables related to the current message.
$msg = "";                         # Current message contents
$msg_num = 0;                      # The (linear) number of the current message.
$from_ = "";                       # The From_ line for the current message.
$date = localtime(time);           # The current time/date.
# The message created when an attachment is deleted.
$info_start = "This attachment has been deleted by $0 on $date.  " .
		"The original attachment's headers were:\n\n";

# Variables used with MIME-Tools.
my($parser) = new MIME::Parser;    # The Parser object.
$parser->output_dir("/tmp");       # The temporary directory to use
                                   # when creating temporary files for
                                   # messages and MIME entities.

# Get command line arguments.
# Options: -l sets the allowed length of attachments,
#          -f takes a filename of an mbox.
#          -u shows the usage statement.
#          -a compresses the output file.
#          -d shows debugging information.
getopts('ul:f:ad');

# Return a usage statement if asked.
if ( $opt_u ) {
	print<<ENDOFUSAGE;

$0 - an interactive script which allows you to scrub 
                 mbox-style mailboxes of unwanted MIME attachments.

      Synopsis:  $0 [-u] [-l <length>] [-f <file>]
      where <length> is the maximum size of attachments to ignore; and
            <file> is the path and filename of an mbox-style mailbox.

ENDOFUSAGE

	exit(0);

}

print "\nWelcome to $0!\n\n";

# Turn on debugging reports, if we have been requested to.
if ( $opt_d ) { $debug = 1; } 

# If we don't have an allowed attachment length, ask for it.
$allowed_length = $opt_l;
unless ( $allowed_length ) {

	$answer = $term->readline($length_prompt);
	warn $@ if $@;
	$term->addhistory($_) if /\S/;
	if ( $answer =~ /^q/i ) {
		print "ABORTED by user\n";
		exit(0);
	} else {
		$allowed_length = $answer;
	}
}

# If we don't have a mailbox file, ask for it.
$mbox_file = $opt_f;
unless ( $mbox_file ) {

	$answer = $term->readline($file_prompt);
	warn $@ if $@;
	$term->addhistory($_) if /\S/;
	if ( $answer =~ /^q/i ) {
		print "ABORTED by user\n";
		exit(0);
	} else {
		$mbox_file = $answer;
		$mbox_file =~ s/\s$//;
	}
}

# Check to see if we can read the input file.
# NOTE that Win32 may place pwd in perl dir...
unless ( -r $mbox_file ) {
	print "File $mbox_file is not readable: $!  Exiting...\n";
	exit(0);
}

if ( $opt_a ) { $archive = 1; }

# Open a mailbox and step through it, one message at a time.
open(MBOX, $mbox_file) or die "Can't open $mbox_file: $!";

# Open a new mailbox file, so that we can write to it.
$newfile = $mbox_file . "_scrubbed";
open(NEWBOX, ">$newfile") or die "Can't open $newfile: $!";

$msg_started = 0;
while(<MBOX>) {

	# Read in one message, then write it to a temporary
	# file and operate on it.

	# The current line.
	$line = $_;

	if ( ($line =~ /^From /) && (! $msg_started) ) {

		# Note that we've found the start of the first message.
		$msg_started = 1;

		# Keep the From_ line.
		$from_ = $line;

	} elsif ( ($line =~ /^From - /) && ($msg_started) ) {

		# We found the end of a message and the start of
		# a new one.  We now operate on the completed message.

		# Increment the message number.
		$msg_num++;

		# Operate on current message.
		&check_message();

		# Set the From_ line for the next message.
		$from_ = $line;

	} else {
		# We're in the middle of reading a message, so just append
		# to the current message.
		$msg .= $line;
	}

} # end while reading MBOX.

# Need to handle the last message read!
$msg_num++;
&check_message();

# Archive (compress) the output mailbox if the user so requested.
if ( $archive ) {
       &close_fhs();
       `$archiver $newfile`;
       $newfile .= $archive_subscript;
}

# Tell the user the filename that was created.
print "\nYour scrubbed mailbox is in the file " . $newfile . ".\n";

&exit();



# ---------------------------------------------------------------
# |  Everything below here is a subroutine.                     |
# ---------------------------------------------------------------
sub abort {

	print "\nABORTED by user.\n";

	# Delete output file!
	&close_fhs();
	unlink($newfile);
	
	&clean();
	&exit();

} # end sub abort


sub clean {

	# Clean up any temporary files left on disk.
	#$entity->purge;

	# Forget about the current message.
	undef($entity);
	undef(@parts_to_keep);
	undef($msg);

} # end sub clean


sub close_fhs {

	# Close the mailboxes.
	close(MBOX);
	close(NEWBOX);

} # end sub close_fhs


sub exit {

        &close_fhs();

	# Say Goodbye.
	print "Finished.\n\n";
	exit(0);

} # end sub exit


sub check_message {
	# Operate on a given message, asking the user
	# for each large attachment found.

	# DBG
	print "Message #" . $msg_num . ":\n" if $debug;

	$entity = $parser->parse_data($msg) or die 
		"ERROR:  Couldn't parse MIME stream: $!";

	# Compute Content-Length headers for each body part.
	$entity->sync_headers(Length=>'COMPUTE');

	# Prepare some explanatory information.
	$summarized = 0;
	$head = $entity->head;
	$msg_summary = "     Message Summary for message #" . $msg_num . ":\n" .
		"     From   : " . $head->get('From') .
		"     To     : " . $head->get('To') .
		"     Subject: " . $head->get('Subject') .
		"     Date   : " . $head->get('Date') . "\n";

	# DBG
	print "Dumping skeleton for message #" . $msg_num . ":\n" if $debug;
	$entity->dump_skeleton if $debug;

	@parts = $entity->parts;

	$i = 0;
	foreach $part (@parts) {

		$part_head = $part->head;

		# DBG
		print "PART $i:\n\n" if $debug;
		print $part_head->print . "\n\n" if $debug;

		# Print out info only if the length of the attachment
		# is greater than the set value.
		$length = $part_head->get('Content-length');
		if ( $length > $allowed_length ) {

			# Print the summary information for this message,
			# if we haven't already done it.
			unless ($summarized) {
				print $msg_summary;
				$summarized = 1;
			}
			print "Found a large attachment.  It's header's are:\n";
			print "-" x 60 . "\n";
			$part_head->print;
			print "-" x 60 . "\n";

			$answer = $term->readline($attach_prompt);
			warn $@ if $@;
			$term->addhistory($_) if /\S/;

			# DBG
			print "The user's answer was: $answer\n" if $debug;

			if ( $answer =~ /^y/i ) {
				# The user has requested that the 
				# attachment be deleted.
				print "The attachment will be deleted.\n\n";

				# Create a new attachment of text/plain
				# and put info into it.
				$part_headers = $part_head->stringify;
				@new_info = ($info_start, $part_headers);

				$new_part = build MIME::Entity 
						(Type       => "text/plain",
						Data        => \@new_info);
				push @parts_to_keep, $new_part;

			} elsif ( $answer =~ /^q/i ) {
				# The user has decided to quit early.
				&abort();
			} else {
				print "Keeping the attachment.\n\n";
				push @parts_to_keep, $part;
			}
		} else {
			# This attachment is not too big, so keep it.
			push @parts_to_keep, $part;
		}

		$i++;
	}

	# At this point, we should have a complete message, in
	# the form that we wish to keep it.  Write this message
	# to the output file.

	# Extract MIME boundary from the original message.
	$boundary = $head->get('Content-Type');

	chomp($boundary);
	if ( $boundary =~ /boundary\s*=\s*\"(.*)\"/ ) {
		$boundary = $1;
	} else {
		# Assign a default boundary, since we couldn't parse one.
		$boundary = "$$--$0--$msg_num";
	}

	# Print the newly formed message to the new mbox.

	# Start with the FROM_ line.
	print NEWBOX "$from_";

	# Next comes the headers.
	$head->print(\*NEWBOX);
	print NEWBOX "\n";

	# Then the message body.
	if ( $#parts_to_keep == -1 ) {
		# This is not a MIME message.  Need to keep the original body.
		$entity->print_body(\*NEWBOX);
	} else {
		# This _is_ a MIME message, so write out all of its parts.

		# Print the preamble
		$preamble   = $entity->preamble;
		foreach $pline (@$preamble) {
			print NEWBOX $pline, "\n";;
		}
		#print NEWBOX "The Preamble is: $preamble\n\n";

		foreach $part (@parts_to_keep) {
			print NEWBOX "--$boundary\n";
			$part->print(\*NEWBOX);
			#print NEWBOX "\n";
		}
		print NEWBOX "\n--$boundary--\n";
	}
	print NEWBOX "\n\n";

	&clean();

} # end sub check_message




