#!/usr/local/bin/perl
#
#    xtrcode - extract contents of LaTeX environments from a LaTeX file
#    Copyright (C) 2000  Thomas Ruedas
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# xtrcode extracts contents of LaTeX environments from a LaTeX file,
# e.g. program code in verbatim environments from a program documentation.
# Version: 0.2 (Jan 12, 2000)
# Author: Thomas Ruedas (ruedas@geophysik.uni-frankfurt.de)
# URL: http://www.geophysik.uni-frankfurt.de/~ruedas/progs.html
#
# NOTE: This program requires Perl 5. You may need modify the first line
# according to the location of Perl 5 on your system.
$|=1;
$all=0;
$ncode=0;
unless (defined @ARGV) { &usage; }
foreach (@ARGV) {
  OPTION: {
      ($_ eq "-a") && do { $all=1; last OPTION; }; # extract all
      ($_ =~ /^-e/) && do { # choose non-default environment type
	$envtype=substr($_,2);
	last OPTION;
      };
      ($_ =~ /^-p/) && do { # choose non-default marker pattern
	$mpatt=substr($_,2);
	$mpatt =~ s/\@/\\\@/g;
	$mpatt =~ s/\+/\\\+/g;
	last OPTION;
      };
      ($_ eq "-h") && do { &usage; }; # show help
      if (defined $texfile) { # specify a codefile name (together w/ marker)
          if ($all == 1) { print "<codefile> $_ ignored.\n"; last OPTION; }
          $codefile=$_;
      } else { # LaTeX source file
          $texfile=$_;
      };
  };
}
# check arguments, set defaults if necessary
unless (defined $texfile) { die "Error: No <texfile> specified.\n"; }
unless (defined $envtype) { $envtype="verbatim"; }
unless (defined $mpatt) { $mpatt="%%\@"; }
# process LaTeX file
open(TEX,"$texfile");
if ($all == 0) {
# search with specific codefile or extract all into one file
#   if no codefile name is given, the default name is "xtrcode.out"
    if (defined $codefile) {
      $fpatt="$mpatt $codefile";
    } else {
      $codefile="xtrcode.out";
      $fpatt=$mpatt;
    }
    open(CODE,">$codefile");
    while ($line = <TEX>) {
        if ($line =~ /^$fpatt/) {
            unless ($fpatt eq "") { $line = <TEX>; } # null pattern is special
            if ($line =~ /\\begin\{$envtype\}/gi) {
		$nested=1;
                while ($line = <TEX>) {
                    if ($line =~ /\\end\{$envtype\}/gi) {
			--$nested;
			if ($nested == 0) { last; }
		    } elsif ($line =~ /\\begin\{$envtype\}/gi) { ++$nested; }
                    ++$ncode;
                    print CODE $line;
                }
            }
        }
    }
    close(CODE);
    print "$ncode lines of extracted code written to $codefile.\n";
} else {
# extract and put into individual codefiles
  while ($line = <TEX>) {
    if ($line =~ /^$mpatt/) {
      $codefile=substr($line,(length $mpatt));
      chomp $codefile;
      unless (defined $ncode{$codefile}) { ++$nfiles; }
      open(CODE,"+>>$codefile");
      $line = <TEX>;
      if ($line =~ /\\begin\{$envtype\}/gi) {
	  $nested{$codefile}=1;
	  while ($line = <TEX>) {
	      if ($line =~ /\\end\{$envtype\}/gi) {
		  --$nested{$codefile};
		  if ($nested{$codefile} == 0) { last; }
	      } elsif ($line =~ /\\begin\{$envtype\}/gi) {
		  ++$nested{$codefile};
	      }
	      ++$ncode{$codefile};
	      print CODE $line;
	  }
      }
      close(CODE);
    }
  }
  print "$nfiles files have been extracted from $texfile:\n\n";
  format STDOUT_TOP=
@<<<file@>>>>>>>>>>>>>>>>>>>>>>>>>>>> no. of lines

--------------------------------------------------
.
  write;
  format STDOUT=
   @<<<<<<<<<<<<<<<<<<<<<<<<<<@>>>>>>>>>>>>>>>>>>>
$outfile,$nlines
.
  foreach (keys %ncode) {
    $outfile=$_;
    $nlines=$ncode{$_};
    write;
  }
}
close(TEX);
exit;

sub usage {
    print "Usage: xtrcode <options> texfile <codefile>
\tOptions:
\t  -a - extract all code; this is for code which is supposed to be
\t       distributed over different target files, so do not use it
\t       together with <codefile>
\t  -e<environment> - extract content of <environment>; default is
\t                    [Vv]erbatim
\t  -p<pattern> - marker pattern for environments to extract (see below)
\t                a null pattern is possible
\t  -h - show this help

texfile is a TeX source file containing code in some kind of LaTeX environment.
The environment containing the code must be preceded by a line beginning with
a marker pattern (%%\@ by default), optionally followed by a program source
name <codefile>.\n";
    exit;
}