#!/usr/bin/env perl
# File          : makejmlrbook
# Author        : Nicola L C Talbot
# Date          : 22nd March 2010
# Last Modified : 24nd March 2010
# Version       : 0.1
# Description   : Given the name of a document using the
#                 jmlrbook class file, this script runs
#                 pdflatex (and possibly bibtex) on the
#                 imported articles and the main document.
# http://theoval.cmp.uea.ac.uk/~nlct/
#
# This file is distributed as part of the jmlr LaTeX bundle.
#
# Copyright (c) 2006 Nicola L.C. Talbot
# This work may be distributed and/or modified under the
# conditions of the LaTeX Project Public License, either version 1.3
# of this license or any later version.
# The latest version of this license is in
#   http://www.latex-project.org/lppl.txt
# and version 1.3 or later is part of all distributions of LaTeX
# version 2005/12/01 or later.
#
# This work has the LPPL maintenance status `maintained'.
#
# The Current Maintainer of this work is Nicola Talbot.

use Getopt::Long;
use File::Basename;
use File::Copy;
use Cwd;
use strict;

my $version = "0.1 (2010-03-24)";

my $eol = "\n";
my $dd  = '/';

if ($^O eq 'MSWin32')
{
  $eol = "\r\n";
  $dd = "\\";
}

my $showversion      = 0;
my $showhelp         = 0;
my $quiet            = 0;
my $batchtex         = 0;
my $online           = 1;
my $print            = 1;
my $html             = 1;
my $latexapp         = 'pdflatex';
my $bibtexapp        = 'bibtex';
my $latexopts        = '';
my $bibtexopts       = '';
my $format           = 'pdf';
my $logourl          = '';

unless(&GetOptions(
   "online!"          => \$online,
   "print!"           => \$print,
   "html!"            => \$html,
   "logourl=s"        => \$logourl,
   "format=s"         => \$format,
   "latexapp=s"       => \$latexapp,
   "bibtexapp=s"      => \$bibtexapp,
   "latexopts=s"      => \$latexopts,
   "bibtexopts=s"     => \$bibtexopts,
   "quieter!"           => \$quiet,
   "batchtex!"        => \$batchtex,
   "version"          => \$showversion,
   "help"             => \$showhelp))
{
   die "$!\n", "Use --help for help\n";
}

my $appname = basename($0);

if ($showhelp)
{
  die "$appname version $version Copyright (c) 2010 Nicola L C Talbot\n",
    "Distributed under the LaTeX Project Public License.\n\n",
    "Syntax: $appname [options] <filename>\n\n",
    "<filename> should be the name of the master document for a LaTeX \n",
    "file that uses the jmlrbook class. The .tex extension may be\n",
    "omitted.\n\n",
    "Basic options:\n",
    "--online\t\tGenerate the color on-line version (default)\n",
    "--noonline\t\tDon't generate the color on-line version\n",
    "--print\t\t\tGenerate the grayscale print version (default)\n",
    "--noprint\t\tDon't generate the grayscale print version\n",
    "--html\t\t\tGenerate the HTML version (default)\n",
    "--nohtml\t\tDon't generate the HTML version\n",
    "--logourl <url>\tMake the logo on the HTML index page link to <url>\n",
    "--batchtex\t\tRun TeX in batch mode\n",
    "--nobatchtex\t\tDon't run TeX in batch mode\n",
    "--quieter\t\tReduce messages to stdout and run TeX in batch mode\n",
    "--noquieter\t\tDon't reduce messages to stdout\n",
    "--version\t\tDisplay version number and exit\n",
    "--help\t\t\tDisplay help message and exit\n",
    "\nAdvanced options:\n",
    "--latexapp <name>\tApplication used to call LaTeX\n",
    "\t\t\t(Defaults to 'pdflatex')\n",
    "--format <string>\tOutput format (default: 'pdf')\n",
    "--bibtexapp <name>\tApplication used to call BibTeX\n",
    "\t\t\t(Defaults to 'bibtex')\n",
    "--latexopt <string>\tOptions to pass to LaTeX\n",
    "--bibtexopt <string>\tOptions to pass to BibTeX\n";
}

if ($showversion)
{
  die "$appname version $version copyright (c) 2010 Nicola L C Talbot\n",
    "Distributed under the LaTeX Project Public License.\n";
}

unless ($#ARGV == 0)
{
   die "$appname: missing filename\n",
       "use --help for help\n";
} 

if ($quiet or $batchtex)
{
   $latexopts = '-interaction batchmode '.$latexopts;
}

if ($quiet)
{
   $bibtexopts = '-terse '.$bibtexopts;
}

my ($main_name, $main_path, $main_suffix) 
  = fileparse($ARGV[0], '\.(tex|ltx)');

$main_suffix = '.tex' unless $main_suffix;

my $org_dir = cwd();

chdir $main_path
  or die "Can't change directory to '$main_path': $!\n";

my $mainaux = "$main_name.aux";

# If the aux file doesn't exist or the aux file is older than
# the main file, run latex

if (not -e $mainaux or (-M $mainaux > -M "$main_name$main_suffix"))
{
   &latex($main_name);
}

# Parse aux file

open AUX, "$mainaux" or die "Can't open '$mainaux' $!\n";

&message("Reading '$mainaux'...\n");

my $main_bibdata = 0;
my @imports = ();
my %pagerefs = ();

while (<AUX>)
{
   if (/\\bibdata\b/)
   {
     $main_bibdata = 1;
   }
   elsif (/\\\@jmlr\@import{(.*)}{(.*)}{(.*)}/)
   {
      my $import = 
        {
          label => $1,
          path  => $2,
          name  => $3
        };

      $import->{'name'} =~s/\.(tex|ltx)\Z//;

      push @imports, $import;
   }
   elsif (/\\contentsline\s*{chapterauthor}{(.*)}{}{}}\s*$/
     and $#imports > -1)
   {
      $imports[$#imports]->{'author'} = $1;
   }
   elsif (/^\\newlabel\s*{([^}]*)jmlrstart}{{([^}]*)}{([^}]*)}/)
   {
      my $label = $1;
      
      $pagerefs{$label}->{'start'} = $3;
   }
   elsif (/^\\newlabel\s*{([^}]*)jmlrend}{{([^}]*)}{([^}]*)}/)
   {
      my $label = $1;

      $pagerefs{$label}->{'end'} = $3;
   }
}

close AUX;

# Replace any instances of \articlepagesref

foreach my $import (@imports)
{
   my $label = $import->{'label'};

   my $pages = $pagerefs{$label}->{'start'}.'--'
             . $pagerefs{$label}->{'end'};

   $import->{'author'}=~s/\\articlepagesref\s*{$label}/$pages/;
}

if ($html)
{
   # If the html files need to be created, make the directory
   # html-src, if it doesn't already exist

   unless (-d 'html-src')
   {
      mkdir 'html-src'
        or die "Can't create directory 'html-src': $!\n";
   }

   unless (-d 'html')
   {
      mkdir 'html'
        or die "Can't create directory 'html': $!\n";
   }
}

# Iterate through each imported article

foreach my $import (@imports)
{
   my $importbase = &fname($import->{'path'}, $import->{'name'});

   # Check the aux file of this article

   my $aux = "$importbase.aux";

   # The aux file should exist because running LaTeX on the
   # main file will create the aux file.

   open AUX, $aux or die "Can't open '$aux' $!\n";

   &message("Reading '$aux'...\n");

   my $bibdata = 0;

   while (<AUX>)
   {
      if (/\\bibdata\b/)
      {
         $bibdata = 1;
      }
   }
  
   close AUX;

   # Do we need a bibtex run?

   if ($bibdata)
   {
      my $log = "$importbase.log";

      # If the log file doesn't exist, run LaTeX

      unless (-e $log)
      {
        chdir $import->{'path'}
          or die "Can't change directory to '$import->{path}': $!\n";

         &latex($import->{'name'});

         chdir "$org_dir/$main_path" or
           die "Can't change directory to '$org_dir/$main_path': $!\n";
      }

      open LOGFD, $log or die "Can't open '$log' $!\n";

      &message("Reading '$log'...\n");

      my $runbibtex = 0;

      while (<LOGFD>)
      {
         if (/There were undefined citations\./)
         {
            # Run bibtex and latex
            $runbibtex = 1;

            last;
         }
      }

      close LOGFD;

      if ($runbibtex)
      {
        chdir $import->{'path'}
          or die "Can't change directory to '$import->{path}': $!\n";

         &bibtex($import->{'name'});
         &latex($import->{'name'});

         chdir "$org_dir/$main_path" or
           die "Can't change directory to '$org_dir/$main_path': $!\n";
      }
   }

   if ($html)
   {
     # If html is required, we also need pdf versions of the
     # individual articles.

      # Does the pdf file exist?

      unless (-e "$importbase.$format")
      {
        chdir $import->{'path'}
          or die "Can't change directory to '$import->{path}': $!\n";

         &latex($import->{'name'});

         chdir "$org_dir/$main_path" or
           die "Can't change directory to '$org_dir/$main_path': $!\n";
      }

      # Do we need a rerun?

      if (&needs_rerun($importbase))
      {
        chdir $import->{'path'}
          or die "Can't change directory to '$import->{path}': $!\n";

         &latex($import->{'name'});

         chdir "$org_dir/$main_path" or
           die "Can't change directory to '$org_dir/$main_path': $!\n";
      }

      my $importdir = $import->{'path'};

      if ($importdir = '.')
      {
         $importdir = $import->{'name'};
      }

      # Check the appropriate subdirectory is in html-src

      my $name = &fname('html-src', $importdir);

      unless (-d $name)
      {
         mkdir $name 
           or die "Can't create directory '$name': $!\n";
      }

      my $text = '';

      # Read the LaTeX file and store everything up to
      # the end of the abstract

      my $tex = "$importbase.tex";

      open TEX, $tex or die "Can't open '$tex': $!\n";

      while (<TEX>)
      {
         # This doesn't handle complicated cases, such as
         # the author using \abstract ... \endabstract
         # or commenting out the abstract with conditionals
         if (/^([^%]*)\\end{abstract}/)
         {
            $text .= $&;
            last;
         }

         $text .= $_;
      }

      close TEX;

      # Add the 'html' class option:
      unless ($text=~
       s/^([^%]*)\\documentclass\s*\[(.*)\]/$1\\documentclass[$2,html]/m)
      {
        $text=~s/^([^%]*)\\documentclass\s*/$1\\documentclass[html]/m;
      }

      my $begindoc = '';

      # Set the authors
      if (defined($import->{'author'}))
      {
         my $author = $import->{'author'};

         $author=~s/^([^;]*);/\\textbf{\\emph{$1};}/;

         $begindoc .= "\\jmlrauthors{$author}";
      }

      # Add content div
      $text=~s/^([^%]*)\\begin{document}/$&$begindoc\\HCode{<div id="content">}/m;

      # Create file containing the abstract

      my $absfile = "html-src/$importdir/".$import->{'name'}.'.tex';

      open ABSFD,">$absfile" 
        or die "Can't create '$absfile': $!\n";

      print ABSFD "\\batchmode", $eol if ($batchtex or $quiet);

      my $texpath = $import->{path};

      $texpath=~s/\\/\//g if ($dd eq '\\');

      print ABSFD
        "\\makeatletter",$eol,
        "\\def\\input\@path{{../../$texpath/}}$eol",
        "\\makeatother",$eol,
        $text, $eol, 
        "\\HCode{</div>}", $eol,
        "\\end{document}",$eol;

      close ABSFD;

      # Create the cfg file

      my $cfg = "html-src/$importdir/jmlr.cfg";

      open CFG, ">$cfg" or die "Can't create '$cfg': $!\n";

print CFG <<END_CFG;
\\Preamble{html}
\\begin{document}
\\Css{div.maketitle {text-align:left;}}
\\Css{h2.titleHead {text-align:left;}}
\\Css{. {font-family:verdana,helvetica,sans-serif}}
\\Css{a {text-decoration:none;color:\\#3030a0}}
\\Css{.cmbx-10x-x-109{ font-weight: bold;}}
\\Css{.cmbxti-10x-x-109{ font-weight: bold; font-style: italic;}}
\\EndPreamble
END_CFG

      close CFG;

      # Run htlatex

      # Change directory
      chdir "html-src/$importdir"
        or die "Can't change directory to 'html-src/$importdir': $!\n";


      my $code = system("htlatex \"".$import->{'name'}."\" \"jmlr\"");

      if ($code)
      {
         die "htlatex failed with exit code $code\n";
      }

      # Go back to main directory
      chdir "$org_dir/$main_path"
        or die "Can't change directory to '$org_dir/$main_path': $!\n";

      # Copy the html file to the html directory, but rename
      # the css file to jmlr.css

      my $infile = "html-src/$importdir/".$import->{'name'}.".html";

      open INFD, $infile or die "Can't open '$infile': $!\n";

      my $outfile = "html/".$import->{'name'}.".html";

      open OUTFD, ">$outfile" or die "Can't open '$outfile': $!\n";

      while (<INFD>)
      {
         s/href="$import->{name}\.css"/href="jmlr.css"/;

         print OUTFD;
      }

      close OUTFD;

      close INFD;

      # Copy css file

      copy("html-src/$importdir/$import->{name}.css",
           "html/jmlr.css")
      or die "Can't copy 'html-src/$importdir/$import->{name}.css'",
             " to 'html/jmlr.css': $!\n";

      # Copy pdf file

      copy("$importbase.$format",
           "html/$import->{name}.$format")
      or die "Can't copy '$importdir/$import->{name}.$format'",
             " to 'html/$import->{name}.$format': $!\n";
   }
}

# do we need to run bibtex on the main document?

if ($main_bibdata)
{
   &bibtex($main_name);
}

if ($online)
{
   &latexonline($main_name);

   # do we need a rerun?

   if (&needs_rerun($main_name))
   {
      &message("Rerun required\n");
      &latexonline($main_name);

      # check again

      if (&needs_rerun($main_name))
      {
         &message("Rerun required\n");
         &latexonline($main_name);
      }
   }
}

if ($print)
{
   &latexprint($main_name);

   # do we need a rerun?

   if (&needs_rerun($main_name))
   {
      &message("Rerun required\n");
      &latexprint($main_name);

      # check again

     if (&needs_rerun($main_name))
     {
        &message("Rerun required\n");
        &latexprint($main_name);
     }
   }
}

if ($html)
{
   # Make the index file

   my $indexfile = &fname('html-src', "index");

   my $preamble = '';

   open OUTFD, ">$indexfile.tex" 
    or die "Can't open '$indexfile.tex': $!\n";

   open INFD, "$main_name.tex"
     or die "Can't open '$main_name.tex': $!\n";

   print OUTFD "\\batchmode", $eol if ($batchtex or $quiet);

   print OUTFD
        "\\makeatletter",$eol,
        "\\def\\input\@path{{../}}$eol",
        "\\makeatother",$eol,
        "\\def\\jmlrgrayscale{0}",$eol;

   while (<INFD>)
   {
      unless
        (s/^([^%]*)\\documentclass\[([^\]]*)\]/$1\\documentclass[$2,html]/)
      {
         s/^([^%]*)\\documentclass/$&\[html\]/;
      }

      s/^([^%]*)\\begin{document}/$&\\HCode{<div id="content">}/;

      if (/^([^%]*)\\maketitle/)
      {
         $preamble .= join('', $1, "\\maketitle", $eol);
         last;
      }

      $preamble .= $_;
   }

   close INFD;

   # Find the book logo
   if ($preamble=~/\\logo\s*{(%\s*\n)?\\includegraphics\s*(\[[^\]]*\])?{([^}]*)}}/m)
   {
      my $texpath = $3;
      my $orgtexpath = $texpath;
      $texpath=~s/\//\\/g if ($dd eq "\\");

      my $ext = '';

      if (-e $texpath)
      {
         copy($texpath, 'html')
           or die "Can't copy '$texpath' to 'html': $!\n";
         copy($texpath, 'html-src')
           or die "Can't copy '$texpath' to 'html-src': $!\n";
      }
      elsif (-e "$texpath.png")
      {
         copy("$texpath.png", 'html')
           or die "Can't copy '$texpath.png' to 'html': $!\n";
         copy("$texpath.png", 'html-src')
           or die "Can't copy '$texpath.png' to 'html-src': $!\n";

         $ext = '.png';
      }
      elsif (-e "$texpath.jpg")
      {
         copy("$texpath.jpg", 'html')
           or die "Can't copy '$texpath.jpg' to 'html': $!\n";
         copy("$texpath.jpg", 'html-src')
           or die "Can't copy '$texpath.jpg' to 'html-src': $!\n";

         $ext = '.jpg';
      }
      elsif (-e "$texpath.gif")
      {
         copy("$texpath.gif", 'html')
           or die "Can't copy '$texpath.gif' to 'html': $!\n";
         copy("$texpath.gif", 'html-src')
           or die "Can't copy '$texpath.gif' to 'html-src': $!\n";

         $ext = '.gif';
      }

      my $img = basename($texpath);
      
      if ($logourl)
      {
         $preamble=~s/\\includegraphics(\[[^\]]*\])?{$orgtexpath}/\\href{$logourl}{\\includegraphics${1}{$img$ext}}/mg;
      }
      else
      {
         $preamble=~s/\\includegraphics(\[[^\]]*\])?{$orgtexpath}/\\includegraphics${1}{$img$ext}/mg;
      }
   }

   print OUTFD $preamble, $eol;

   # Parse TOC

   my $toc = "$main_name.toc";

   open TOC, $toc or die "Can't open '$toc': $!\n";

   my $idx = 0;

   while (<TOC>)
   {
      if (/^\\tocpart\s*{(.*)}\s*$/)
      {
         print OUTFD "\\begin{center}\\bfseries $1\\end{center}$eol";
      }
      elsif (/\\contentsline\s*{papertitle}{(.*)}{[^{}]*}{[^{}]*}\s*$/)
      {
         print OUTFD "\\par\\noindent $1$eol";
      }
      elsif (/\\contentsline\s*{chapterauthor}{(.*)}{[^{}]*}{[^{}]*}\s*$/)
      {
         my $details = $1;
         $details=~s/([^;]*);/\\textbf{\\emph{$1};}/;

         my $label = $imports[$idx]->{'label'};
         my $pages = $pagerefs{$label}->{'start'}.'--'
                   . $pagerefs{$label}->{'end'};

         $details=~s/\\articlepagesref\s*{$label}/$pages/;

         print OUTFD "\\par $details$eol";

         my $name = $imports[$idx]->{'name'};

         print OUTFD "\\par [\\href{$name.html}{abs}] [\\href{$name.pdf}{pdf}]$eol$eol";

         $idx++;
      }
   }

   close TOC;

   print OUTFD "\\HCode{</div>}\\end{document}$eol";

   close OUTFD;

   # Go into html-src directory and run htlatex

   chdir('html-src')
     or die "Can't change directory to 'html': $!\n";

   my $code = system('htlatex index');

   if ($code)
   {
      die "htlatex failed with error code $code\n";
   }

   chdir('..')
     or die "Can't change directory to '..': $!\n";

   # Copy to html directory

   my $inname = &fname('html-src', 'index.html');
   my $outname = &fname('html', 'index.html');

   open INFD, $inname or die "Can't open '$inname': $!\n";

   open OUTFD, ">$outname" or die "Can't open '$outname': $!\n";

   while (<INFD>)
   {
      s/href="index.css"/href="jmlr.css"/;

      print OUTFD;
   }

   close OUTFD;

   close INFD;
}

# Return to original directory
chdir $org_dir;

# Subroutines

sub message{ print @_ unless ($quiet) }

sub latex{
   my $file = shift;

   my $code;

   $code = system(join(' ', $latexapp, $latexopts, "\"$file\""));

   if ($code)
   {
      die "**LaTeX run failed with exit code $code.**\n",
          "Check '$file.log' for details\n";
   }
}

sub latexonline{
   my $file = shift;

   my $code;

   $code = system(join(' ', $latexapp, $latexopts,
     "\"\\def\\jmlrgrayscale{0}\\input{$file}\""));

   if ($code)
   {
      die "**LaTeX run failed with exit code $code.**\n",
          "Check '$file.log' for details\n";
   }

   &message("copying '$file.$format' to '$file-online.$format'\n");

   copy("$file.$format", "$file-online.$format")
     or die "Can't copy '$file.$format' to '$file-online.$format': $!\n";
}

sub latexprint{
   my $file = shift;

   my $code;

   $code = system(join(' ', $latexapp, $latexopts,
     "\"\\def\\jmlrgrayscale{1}\\input{$file}\""));

   if ($code)
   {
      die "**LaTeX run failed with exit code $code.**\n",
          "Check '$file.log' for details\n";
   }

   &message("copying '$file.$format' to '$file-print.$format'\n");

   copy("$file.$format", "$file-print.$format")
     or die "Can't copy '$file.$format' to '$file-print.$format': $!\n";
}

sub bibtex{
   my $file = shift;

   my $code;

   $code = system(join(' ', $bibtexapp, $bibtexopts, "\"$file\""));

   if ($code)
   {
      die "**BibTeX run failed with exit code $code.**\n",
          "Check '$file.blg' for details\n";
   }
}

sub needs_rerun{
  my $file = shift;
  my $rerun = 0;

  # Scan log file for rerun message

  my $log = "$file.log";

  # If there's no log file a run is needed
  open LOGFD, $log or return 1;

  while (<LOGFD>)
  {
     if (/Rerun to get cross-references right\./)
     {
        $rerun = 1;
        last;
     }
  }

  close LOGFD;

  return $rerun;
}

sub fname{ join($dd, @_) }

1;
