#!/pkg/gnu/bin/perl
#
# Convert raw text to something with a little HTML formatting
#
# Written by Seth Golub <seth@hilco.com> 
#            
#
#
# Oscar Nierstrasz has a nice script for hypertextifying URLs.
# It is available at:
#   http://cui_www.unige.ch/ftp/PUBLIC/oscar/scripts/html.pl
#

$version = "1.01";

# Configurable options
$short_line_length = 40;	# Lines this short must be intentional
				# and therefore are kept that short. <BR>

$mailmode = 1;			# Deal with mail headers & quoted text

$preformat_whitespace_min = 5;	# Min whitespace for PRE

$hrule_min = 6;			# Min number of ---s for an HRule.

$append_file = 0;		# If you want something ALWAYS appended,
				# put the filename here.

# These are defs, really.
$NONE   =   0;
$LIST   =   1;
$HRULE  =   2;
$PAR    =   4;
$PRE    =   8;
$END    =  16;
$BREAK  =  32;
$HEADER =  64;


while (@ARGV[0] =~ /^[-+].+/)
{
    if ((@ARGV[0] eq "-r" || @ARGV[0] eq "--hrule") &&
	@ARGV[1] =~ /^%d+$/)
    {
	$hrule_min = @ARGV[1];
	shift @ARGV;
	next;
    }

    if ((@ARGV[0] eq "-s" || @ARGV[0] eq "--shortline") &&
	@ARGV[1] =~ /^\d+$/)
    {
	$short_line_length = @ARGV[1];
	shift @ARGV;
	next;
    }
    
    if ((@ARGV[0] eq "-p" || @ARGV[0] eq "--prewhite") &&
	@ARGV[1] =~ /^\d+$/)
    {
	$preformat_whitespace_min = @ARGV[1];
	shift @ARGV;
	next;
    }
    
    if ((@ARGV[0] eq "-a" || @ARGV[0] eq "--append") &&
	@ARGV[1] =~ /^.+$/)
    {
	if (-r @ARGV[1]) {
	    $append_file = @ARGV[1];
	} else {
	    print STDERR "Can't find or read @ARGV[1].\n";
	}
	shift @ARGV;
	next;
    }
    
    if (@ARGV[0] eq "+a" || @ARGV[0] eq "--noappend")
    {
	$append_file = 0;
	next;
    }
    
    if (@ARGV[0] eq "-m" || @ARGV[0] eq "--mail")
    {
	$mailmode = 1;
	next;
    }
    
    if (@ARGV[0] eq "+m" || @ARGV[0] eq "--nomail")
    {
	$mailmode = 0;
	next;
    }
    
    if (@ARGV[0] eq "-v" || @ARGV[0] eq "--version")
    {
	print "txt2html version $version\n";
	exit;
    }

    &usage;

} continue {
    
    shift @ARGV;
}


sub blank
{
    return @_[0] =~ /^\s*$/;
}

sub escape
{
    $line =~ s/&/&amp;/g;
    $line =~ s/>/&gt;/g;
    $line =~ s/</&lt;/g;
}

sub hrule
{
    if ($line =~ /^\s*([-_~=\*]\s*){$hrule_min,}\s*$/)
    {
	$line = "<HR>\n";
	$prev =~ s/<p>//;
	$line_action |= $HRULE;
    }
}

sub shortline
{
    if (!($mode & $PRE) &&
	!&blank($line) &&
	(length($line) < $short_line_length) && 
	!&blank($nextline) &&
	!($line_action & ($HEADER | $HRULE | $BREAK)))
    {
	$line =~ s/$/<BR>/;
	$line_action |= $BREAK;
    }
}

sub mailstuff
{
    if ((($line =~ /^\w*&gt/) ||
	 ($line =~ /^\w*[|:]/))&& 
	!&blank($nextline))
    {
	$line =~ s/$/<BR>/;
	$line_action |= $BREAK;
    }
}

sub paragraph
{
    $prev .= "<p>\n";
    $line_action |= $PAR;
}

sub listprefix
{
    local($line) = @_;
    local($prefix, $number, $rawprefix);

    return (0,0,0) if (!($line =~ /^\s*[-=\*o]\s+\S+/ ) &&
		       !($line =~ /^\s*\d+[\.\)\]]\s+\S+/ ));

    ($number) = $line =~ /^\s*(\d+)/;
    if ($number)
    {
	($rawprefix) = $line =~ /(\s*\d+.)/;
	$prefix = $rawprefix;
	$prefix =~ s/\d+//;	# Take the number out
    } else {
	($rawprefix) = $line =~ /(\s*[-=o\*].)/;
	$prefix = $rawprefix;
    }
    ($prefix, $number, $rawprefix);
}

sub startlist
{
    local($prefix, $number, $rawprefix) = @_;

    $listprefix[$listnum] = $prefix;
    if($number)
    {
	return if $number != 1; # It doesn't start with 1.  Let's not screw with it.
	$list[$listnum++] = $OL;
	$prev .= "<OL>\n";
    } else {
	$list[$listnum++] = $UL;
	$prev .= "<UL>\n";
    }
    $line_action |= $LIST;
    $mode |= $LIST;
}


sub endlist			# End N lists
{
    local($n) = @_;
    for(; $n > 0; $n--, $listnum--)
    {
	if($list[$listnum-1] == $UL)
	{
	    $prev .= "</UL>\n";
	} elsif($list[$listnum-1] == $OL)
	{
	    $prev .= "</OL>\n";
	} else
	{
	    print STDERR "Encountered list of unknown type\n";
	}
    }
    $line_action |= $END;
    $mode ^= ($LIST & $mode) if (!$listnum);
}

sub continuelist
{
    $line =~ s/^\s*[-=o\*]\s*/<LI> / if $list[$listnum-1] == $UL;
    $line =~ s/^\s*\d+.\s*/<LI> /    if $list[$listnum-1] == $OL;
    $line_action |= $LIST;
}

sub liststuff
{
    local($oldlist, $i);

    local($prefix, $number, $rawprefix) = &listprefix($line);

    $i = $listnum;
    if (!$prefix)
    {
	return if !&blank($prev); # inside a list item

	# This ain't no list.  We'll want to end all of them.
	return if !($mode & $LIST);	# This just speeds up the inevitable
	$i = 0;
    } else 
    {
	# Maybe we're going back up to a previous list
	$i-- while (($prefix ne $listprefix[$i-1]) && ($i >= 0));
    }

    if (($i >= 0) && ($i != $listnum))
    { 
	&endlist($listnum - $i); 
    } elsif (!$listnum || $i != $listnum)
    { 
	&startlist($prefix, $number, $rawprefix);
    }

    &continuelist($prefix, $number, $rawprefix) if ($mode & $LIST);
}

sub endpreformat
{
    $prev =~ s@$@\n</PRE>@;
    $mode ^= ($PRE & $mode);
    $line_action |= $END;
}

sub preformat
{
    $line =~ s/^/<PRE>\n/;
    $prev =~ s/<p>//;
    $mode |= $PRE;
}

sub heading
{
    local($hpre, $heading) = $line =~ /(\s*)(.+)\s*$/;
    $heading =~ s/\s+$//;	# get rid of trailing whitespace.

    local($underline) = $nextline =~ /\s*(\S+)\s*$/;
    
    return if length($heading) != length($underline);

    $underline =~ s/(^.).*/$1/;

    local($hlevel);
    $hlevel = 1 if $underline eq "*";
    $hlevel = 2 if $underline eq "=";
    $hlevel = 3 if $underline eq "+";
    $hlevel = 4 if $underline eq "-";
    $hlevel = 5 if $underline eq "~";
    $hlevel = 6 if $underline eq ".";
    return if !$hlevel;

    $nextline = <STDIN>;	# Eat the underline
    $line = "<H${hlevel}>" . $line;
    $line =~ s@$@</H$hlevel>@;
    $line_action |= $HEADER;
}

sub usage
{
    $0 =~ s@.*/@@;

    print STDERR <<EOF;

Usage: $0 -v        | --version
       $0 -s <n>    | --shortline <n>
       $0 -p <n>    | --prewhite <n>
       $0 -r <n>    | --hrule <n>
       $0 -m/+m     | --mail / --nomail
       $0 -a <file> | --append <file>
       $0 +a        | --noappend

EOF
    exit(1);
}


# Start the fireworks

print "<HTML>\n";
print "<HEAD>\n";		# It'd be nice if we could guess a title
print "</HEAD>\n";		# from the first header.  Maybe I'll rewite
				# this as a multi-pass filter.  Yuck.
print "<BODY>\n";

$prev     = "";
$line     = <STDIN>;
$nextline = <STDIN>;

do {
    &escape;

    &endpreformat if (!($line =~ /\s{$preformat_whitespace_min,}/) 
		      && ($mode & $PRE));

    &hrule if !($mode & $PRE);

    &liststuff if (!($mode & $PRE) && 
		   !&blank($line));

    &heading   if (!($mode & $PRE) && 
		   $nextline =~ /\s*[=\-\*\.~\+]+\s*/);

    &preformat if (!($line_action & ($HEADER | $LIST)) && 
		   !($mode & ($LIST | $PRE)) && 
		   $line =~ /\s{$preformat_whitespace_min,}/);
 
    &paragraph if (&blank($prev) && 
		   !&blank($line) &&
		   !($mode & ($LIST | $PRE)) && 
		   !$line_action);

    &mailstuff if ($mailmode && 
		   !($mode & $PRE) && 
		   !($line_action & $HEADER));

    &shortline;


    # Print it out and move on.

    print $prev;

    if (!&blank($nextline))
    {
#	$previous_action = $line_action;    # I've no use for this yet..
	$line_action     = $NONE;
    }

    $prev = $line;
    $line = $nextline;
    $nextline = <STDIN>;
} until (!$nextline && !$line && !$prev);

$prev = "";
&endlist($listnum) if ($mode & $LIST);
print $prev;

print "\n";

print "</PRE>\n" if ($mode & $PRE);

#if ($append_file && -r $append_file)
#{
    open(APPEND, $append_file);
    print while <APPEND>;
#}

print "</BODY>\n";
print "</HTML>\n";


