#!/usr/bin/perl
#
# man2html.pl - UNIX manpage to HTML convertor
# Copyright (C) TOYODA Eizi, 1998
#

	require "getopts.pl";
	&Getopts("b:f:d:Dnmp:x:");
	$opt_x =~ s/^\.//;
	$opt_x = substr($opt_x, 0, 3) if $opt_m;

	if ($opt_p) {
		$opt_x || ($opt_x = "htm");
		$PATHDELIM = &isMSDOS ? ";" : ":";
		@manpath = split(/$PATHDELIM/, @opt_p);
		foreach (@manpath) {
			&ManTreeScan($_);
		}
	} else {
		while ($manpage = shift) {
			&ManConv($manpage, &Output($manpage));
			$files++;
		}
		!$files && &ManConv("-", "-");
	}
exit 0;

sub ManTreeScan {
	local($mandir) = @_;
	print "tree scan in <$mandir> (not implemented)\n";
}

sub Output {
	local($fnam, $section) = @_;
	return "-" if (!$opt_x && !defined $section);
	local($ext) = $opt_x || "htm";
	$fnam =~ s/\.[^\.]*$//;
	$fnam = substr($fnam, 0, 8) if $opt_m;
	$fnam .= ".$ext";
	if ($opt_d && $opt_p) {
		$fnam = "$opt_d/htm$section/$fnam";
	} elsif ($opt_d) {
		$fnam = "$opt_d/$fnam";
	}
	$fnam;
}

sub ManConv {
	($INPUT, $OUTPUT) = @_;
	warn "files: $INPUT $OUTPUT\n" if ($opt_n || $opt_D);
	return if $opt_n;
	if ($INPUT eq "-") {
		$INPUT = "STDIN";
		select(STDOUT);
		&ManFilter;
		return;
	}
	open($INPUT, $INPUT) || die "$INPUT: cannot open\n";
	open($OUTPUT, ">$OUTPUT") || die ">$OUTPUT: cannot open\n";
	select($OUTPUT);
	&ManFilter;
	close($INPUT);
	close($OUTPUT);
}

sub ManFilter {
	$IGNORE = 1;		# for ever
	$ITEMIZE = 0;
	$EQUATION = 0;

	&chop_init;
	while (<$INPUT>) {
		&chop;
		/^$/ && (print("\n"), next);
		/^[\.']\\"/ && next;
		/^[\.']/ && (&RoffCommand, next);
		next if $IGNORE;
		$EQUATION && /delim/ && next;		# eqn emu kluge
		&htquote();
		print "$_\n";
	}
	&HtmlFooter();
}

sub htquote {
	$_ = @_ ? $_[0] : $_;
	s/\\\\/\\\0/g;
	s/\\-/--/g;
	s/<URL:([^>]*)>/\\(A HREF=\\Q\1\\Q\\)\1\\(\/A\\)/g;
	s/^relative:([!-~]+)/\\(A HREF=\\Q\1\\Q\\)\1\\(\/A\\)/;
	s/\\\&//g;
	s/\&/\&amp;/g;
	s/</\&lt;/g;
	s/>/\&gt;/g;
	s/"/\&quot;/g;
	s/\\\(/</g;
	s/\\\)/>/g;
	s/\\Q/"/g;
	while (/\\f[BI]/) {
		if ($& eq "\\fB") {
			s/\\fB/<B>/;
			s,\\fP,</B>, || s,$,</B>,;
		} else {
			s/\\fI/<I><U>/;
			s,\\fP,</U></I>, || s,$,</U></I>,;
		}
	}
	s/\\\0/\\/g;
	$_;
}

sub RoffCommand {
	local($cmd, @arg) = &RoffArg($_);
	return if ($cmd eq "");
	warn "$cmd <<", join(">><<", @arg), ">>\n" if $opt_D;

	if ($cmd eq "TH") {
		$IGNORE = 0;
		&HtmlHeader(@arg);
	} elsif ($cmd eq "SH") {
		&enditem;
		print "<H2>", &htquote($arg[0]), "</H2>\n";
	} elsif ($cmd eq "SS") {
		&enditem;
		print "<H3>", &htquote($arg[0]), "</H3>\n";
	} elsif ($cmd =~ /^[LP]P$/) {
		&enditem;
		print "<P>\n";
	} elsif ($cmd eq "IP") {
		$ITEMIZE || ($ITEMIZE = 1, print "<DL>");
		print"<DT>", &htquote($arg[0]), "<DD>\n";
	} elsif ($cmd eq "br") {
		print "<BR>\n";
	} elsif ($cmd eq "nf") {
		print "<PRE>\n";
	} elsif ($cmd eq "fi") {
		print "</PRE>\n";
	} elsif ($cmd eq "RS" || $cmd eq "EQ") {
		print "<BLOCKQUOTE>\n";
		$EQUATION = ($cmd eq "EQ");
	} elsif ($cmd eq "RE" || $cmd eq "EN") {
		print "</BLOCKQUOTE>\n";
		$EQUATION = 0 if ($cmd eq "EN");
	} elsif ($cmd eq "B") {
		print "<B>", &htquote($arg[0]), "</B>\n";
	} elsif ($cmd eq "I") {
		print "<I><U>", &htquote($arg[0]), "</U></I>\n";
	} elsif ($cmd eq "BR" && &isRefMan(@arg)) {
		if ($link = &RefMan(@arg)) {
			print "<A HREF=\"$link\"><B>",
				&htquote($arg[0]), "</B>",
				&htquote($arg[1]), "</A>\n";
		} else {
			print "<B>", &htquote($arg[0]), "</B>",
				&htquote($arg[1]), "\n";
		}
	} elsif ($cmd =~ /^[BRI][BRI]$/) {
		local($pat, $args);
		$pat = $cmd x 3;
		while ($pat) {
			($args = shift(@arg)) || last;
			if ($pat =~ /^B/) {
				print "<B>", &htquote($args), "</B>";
			} elsif ($pat =~ /^I/) {
				print "<I><U>", &htquote($args), "</U></I>";
			} else {
				print &htquote($args);
			}
			$pat =~ s/^.//;
		}
		print "\n";
	} else {
		warn "$0: unknown command $cmd\n";
	}
}

sub HtmlHeader {
	local($arg) = @_;
	($TITLE = shift(arg)) =~ tr/a-z/A-Z/;
	$TITLE .= "(" . shift(arg) . ")";
	print "<HTML>\n";
	print "<HEAD><TITLE>", $TITLE, "</TITLE></HEAD>\n";
	print "<BODY";
	print " BGCOLOR=\"\#$opt_b\"" if $opt_b;
	print " FGCOLOR=\"\#$opt_f\"" if $opt_f;
	print ">\n";
	print "<H1>", $TITLE, "</H1>\n";
	foreach ("date:", "source:", "title:") {
		($it = shift(arg)) || last;
		print("<BR>\n", $_, $it);
	}
	print "<HR>\n";
}

sub HtmlFooter {
	if ($IGNORE) {
		warn "$0: $INPUT has no .TH command\n";
		return;
	}
	print "<HR>\n";
	print "<P ALIGN=\"RIGHT\">\n";
	print "HTML-converted with TOYODA Eizi's <B>man2html.pl</B>\n";
	print "</BODY>\n";
	print "</HTML>\n";
}

sub isRefMan {
	(scalar @_ == 2) && ($_[1] =~ /^\([1-8][A-Za-z]?|[lno]\)/);
}

sub RefMan {
	local($name, $section) = @_;
	$section =~ s/^\(//;
	$section =~ s/\).*//;
	local($fnam) = &Output($name, $section);
	return ($_ = $fnam) if -f $fnam;
	$fnam = "$name.$section";
	return ($_ = $fnam) if -f $fnam;
	$fnam = "$name.man";
	return ($_ = $fnam) if -f $fnam;
	return 0;
}

sub enditem {
	return unless $ITEMIZE;
	print "</DL>\n";
	$ITEMIZE = 0;
}

sub RoffArg {
	local($_) = @_;
	local($cmd, @arg, $curarg);
	s/.\s*//;
	s/\\".*//;
	return () unless /^\S+/;
	$cmd = $&;
	s/^\S+\s*//;
	until (/^\s*$/) {
		s/^\s*//;
		# last if (/^\\"/);	# comment in Command
		if (/^"/) {
			s/^"//;
			/^[^"]*/;
			$curarg = $&;
			$_ = $';
			s/^"//;
			push(@arg, $curarg);
		} else {
			/\S*/;
			push(@arg, $&);
			$_ = $';
		}
		next;
	}
	($cmd, @arg);
}

#
# line input $_ preprocessor
#   	a. truncate trailing CR? LF
#	b. quoting kanzi-2nd BACKSLASH in SJIS
#

sub chop_init {
	$SJIS = 0;
}

sub chop {
	s/\r?\n$//g;
	#
	# kanzi hack
	#
	/[\x81-\x8D\x90-\x9F]/ && ($SJIS = 1);
	if (/\x1B/) {
		local(@block) = split(/\x1B/, $_); 
		foreach (0 .. scalar(@block)) {
			if ($block[$_] =~ /^\044[\@B]/) {
				$block[$_] = $';
				$block[$_] =~ tr/\041-\176/\241-\376/;
			} elsif ($block[$_] =~ /^\(I/) {
				$block[$_] = $';
				$block[$_] =~ tr/\041-\176/\241-\376/;
				$block[$_] =~ s/[\041-\176]/\x8E$&/g;
			} elsif ($block[$_] =~ /^\([\@ABJ]/) {
				$block[$_] = $';
			}
		}
		$_ = join("", @block);
	} elsif ($SJIS) {
		s/[\x81-\x9F\xE0-\xEF]\\/$&\\/g;
	}
	$_;
}

sub isMSDOS {
	# MS-DOS has no /dev/null.
	return 0 if ( -c '/dev/null' );
	# MS-DOS has /DEV/CON even in drive without /DEV.
	return 1 if ( -f '/DEV/CoN' && ! -d '/DEV' );
	# MS-DOS allows CON or NUL have extension.
	return 2 if ( -f '/coN.3b7' && -f '/NuL.j0Q' && -f '/nUl.!#$' );
	0;
}

