#!/usr/bin/perl # # Concept: # # Convert a hypermail archive to an mbox, weakly. # # Author: # # Scott Rose, rose@cs.[wisc,washington].edu. # # With modifications by Kent Landfield. # Further modifications by Peter McCluskey (pcm@rahul.net). # If it's broke, it's my fault. # For the very latest version, check the hypermail cvs repository: # http://cvs.hypermail.org/cgi-bin/cvsweb.cgi/hypermail/contrib/#hypetombox.pl # Or get the version in a recent hypermail distribution at # http://sourceforge.net/projects/hypermail/ or http://www.hypermail.org. # # A few more mods by Fyodor (fyodor@insecure.org) [9/24/00]: # * Added -H (additional header) option (useful for filtering messages) # * Extended the date formats accepted in "Received:" headers # * Added 'a' option to append to mailbox rather than clobbering it # * Updated usage info # # Usage: # # hypetombox.pl [-a] [-H ] [-m ] [-n ] [-S ] # -a means append to mailbox instead of clobbering it # -H adds the header(s) you specify. If more than one, put the three # characters %0A between them. # -R removes carriage returns (\r) from the end of lines; they sometimes get # added for reasons I haven't been able to explain. # -S defaults to 'html'. # # # $Header: /CVS/hypermail/contrib/hypetombox.pl,v 1.3 2001/11/08 00:28:14 pcm Exp $ require 5.000; use Getopt::Std; use MIME::Base64; getopts('ad:H:m:n:RS:'); # This is a list of the fields in the comment header of each message. @fields = ('received', 'sent', 'name', 'email', 'subject', 'id', 'inreplyto'); # Get a list of the message files. $htmlsuffix = $opt_S || "html"; $fpat= "[0-9][0-9][0-9][0-9].$htmlsuffix"; $fpat = "$opt_d/$fpat" if(defined $opt_d); @msgs = sort glob($fpat); $to_address = $opt_n || 'bogus'; # Open the output file for write. $mbox_name = $opt_m || 'mbox'; if ($opt_H) { $opt_H =~ s/%0A/\n/g; chomp($opt_H); $opt_H .= "\n"; } $DIR_PREFIXER = "att-"; # should be same as DIR_PREFIXER in parse.h $PATH_SEPARATOR = '/'; if ($opt_a) { $openflag = ">>"; } else { $openflag = ">"; } (open MBOX, "$openflag$mbox_name") || die "can't open $mbox_name"; # Loop on the input files. # count the messages in the file $cntr = 0; $boguscntr = 0; foreach $msg (@msgs) { # Open the message file for read. # reset the header count for each message. # added by Erik Peterson 1/3/00. $header_count=0; $cntr += 1; $msgnum = int(substr($msg, length($opt_d) + ($opt_d ne ''), 99999)); $boundary = "=---------HYPETOMBOXCREATEDBOUNDARY$msgnum"; $attdir = sprintf("%s%s$DIR_PREFIXER%04d", $opt_d, $opt_d ne '' ? $PATH_SEPARATOR : '', $msgnum); @attfiles = glob("$attdir$PATH_SEPARATOR*"); undef %att_type; undef %att_desc; (open M, $msg) || die "can't open $msg"; # Loop on lines in the file $state = 'HeaderComments'; foreach $fld (@fields) { $$fld = ''; } my $line; my $msg_text; while($line = ) { # This is a header comment; save the value in a variable with the # same name. $line =~ s|\r(\n)$|\n| if($opt_R); if($state eq 'HeaderComments' && ($line =~ /^(\s*)$/)) { $key = $1; $value = $2; $value =~ s/&/&/g; $value =~ s/<//g; $$key = $value; ++$header_count; } elsif($header_count > 0) { if($state eq 'HeaderComments') { $state = 'LeadingGoo'; if($received =~ /^(\w{3} \w{3} {1,2}\d{1,2} \d{2}:\d{2}:\d{2} \d{4})/) { $date = $1; } elsif($received =~ /^(\d{1,2} \w{3} {1,2}\d{2,4} \d{2}:\d{2}:\d{2} \w{3})/) { $date = $1; } elsif($received =~ /^(\w{3}), (\d{1,2}) (\w{3}) (\d{4}) (\d{2}:\d{2}:\d{2})/) { $date = sprintf("%s %s %.2d %s %s", $1, $3, $2, $5, $4); } else { $date = 'Bogus date'; $boguscntr += 1; warn("received $received, $line\n") if($boguscntr < 3); } print MBOX "From $email $date\n"; print MBOX "Date: $sent\n"; print MBOX "Message-Id: <$id>\n"; print MBOX "To: $to_address\n"; print MBOX "From: $email ($name)\n"; print MBOX "Subject: $subject\n"; if ($opt_H) { print MBOX $opt_H; } if ($inreplyto) { print MBOX "In-Reply-To: <$inreplyto>\n"; } if (@attfiles) { print MBOX "Mime-Version: 1.0\n"; print MBOX "Content-Type: multipart/mixed; boundary=\"$boundary\"\n"; } print MBOX "\n"; if (@attfiles) { print MBOX "This is a multi-part message in MIME format.\n"; print MBOX "--$boundary\n"; print MBOX "Content-Type: text/plain; charset=us-ascii\n"; print MBOX "Content-Transfer-Encoding: 7bit\n"; } } if($line =~ m|^|) { my $attfilename = $2; my $part_no = $1; if($msg_text =~ s|
    \n
  • (\S+) attachment: ([^\n]+)\n
(\s*)$||s) { $att_type{$attfilename} = $1; $att_desc{$attfilename} = $2 if($2 ne 'stored'); } $msg_text =~ s|
\n$||s; $msg_text .= &get_att($attdir, $part_no, $attfilename); next; } $state = 'Body' if($line =~ /^/); $state = 'TrailingGoo' if(($line =~ /^/) || ($line =~ /^<([pP])>/)); next if(($line =~ /^