Web::Chain project:    Web/Pro/Interact.pm


     package Web::Pro::Interact;
#                                doom@kzsu.stanford.edu
#                                20 Oct 2004

=head1 NAME

Web::Pro::Interact - the user-level interface between 
   the df script and the Web::* modules.

=head1 SYNOPSIS

   use Web::Pro::Interact;



=head1 DESCRIPTION

A collection of proceedural routines that are run directly 
by the df script (which does very little processing of it's 
own).  These are essentially the user-level commands that 
make use of the Web::* modules.  This is oriented directly 
towards working on DF pages ("doomfiles"), even more so than 
the Web::* modules are.  The input format is almost always 
Rawtext, and the input location is as specified in Web::Definitions
as $DF_RAWTEXT.  The output format is almost always Html, 
in the $DF_LOC location, also as specified in Web::Definitions.

These locations can be overridden for testing purposes: 
a reference to the df scripts hash of options is passed in 
as the final argument.

=head2 EXPORT

None by default.

=over 

=cut 

use 5.006;
use strict; 
use warnings;
use Carp;

use Web::Chain::IO;
use Web::Definitions qw( $DEBUG
                         $DF_VERSION
                         $DF_LOC
                         $DF_RAWTEXT
                         $DF_CONTENTS_NODE_NAME
                         $DF_WHATSNEW_NODE_NAME
                         $DF_WHATSNEW_NOW_MARKER
                         $DF_NEW_NODES_LOG
                         $DF_PUSH_SCP_TARGET
                         $DF_PUSH_RSYNCH_TARGET
                         $DF_NODE_NAME_RULE
                        );

require Exporter;
our @ISA = qw(Exporter);
our %EXPORT_TAGS = ( 'all' => [ qw(
     check_links    
     push_out
     process_rawtext
     dummy_testes
) ] );
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
our @EXPORT = qw(  );
our $VERSION = $DF_VERSION;

=item B<dummy_testes> - echoes arguments to stderr

=cut 

sub dummy_testes {
  my $opt_ref = pop;
  print STDERR "dummy_testes: " . join(" ", @_) . "\n";
}

=item B<process_rawtext> - given a rawtext filename, converts it to 
   html and splices it into the DF project after the node 
   named in the second argument. 

=cut 

sub process_rawtext {
  my $subname = ( caller(0) )[3];
  my $opt_ref = pop;
  my $DEBUG = $opt_ref->{d};
  ($DEBUG) && print STDERR "process_rawtext args: " . join(" ", @_) . "\n";

  my $rawtext_name = shift;  
  my $browse_location = shift;

  my $rawtext_location =  $opt_ref->{r} || $DF_RAWTEXT;
  my $rawtext_file = "$rawtext_location/$rawtext_name";

  my $working_location = $opt_ref->{h} || $DF_LOC;

  # Read rawtext file into Chain structure
  my $dfh = Web::Chain::IO->new;             # Create a DF IO handle
  $dfh->input_location($rawtext_location);  
  $dfh->input_format('Rawtext');             
  my $chain = $dfh->input($rawtext_file);

  my @new_node_names = $chain->get_names;  

  # splice into mass of DF html files
  $dfh->output_location($working_location);
  my $ret = $dfh->output_format('Html');     # dynamically loads a Web::Chain::IO::Html

  # Do rcs mass check-out
  # $dfh->vc_check_out_output_location; # TODO (someday) Even better: integrate with 'output'
  chdir($working_location);
  system('co -l *.html');

  $dfh->output_splice( $chain, $browse_location );

  # log new node names to WHATSNEW.html
  my $datestamp = 
    $dfh->log_new_names( \@new_node_names);  # defaults to $DF_WHATSNEW_NODE_NAME;

  $dfh->generate_contents_node;

  # do rcs mass check-in (unchanged files just revert)
  my $message = "$subname added: " . join(" ", @new_node_names);
  #  $dfh->vc_check_in_output_location($message); # TODO someday
  chdir($working_location);
  my ($cmd);
  $cmd= "ci -u -m\'$message\' -t-\'Created by $subname\' *.html";
  ($DEBUG) && print STDERR "cmd: $cmd\n";
  system($cmd);

  return $datestamp;
}


=item B<check_links> - searches the DF project for broken internal links.
   This works on $DF_LOC by default (as defined in Web::Definitions),
   but this can be overriden with the "-h" option.  Note: the "-r" 
   option is not meaningful for this method. 

=cut 

sub check_links {
  my $opt_ref = pop;
  my $DEBUG = $opt_ref->{d};
  ($DEBUG) && print STDERR "check_links args: " . join(" ", @_) . "\n";

  my $working_location = $opt_ref->{h} || $DF_LOC;

  my (%browse_exists, %links, $node_name, $place);

  # Initialize a DF IO handle
  my $dfh = Web::Chain::IO->new;
  $dfh->input_location($working_location);  
  $dfh->input_format('Html'); # dyanamically loads Web::Chain::IO::Html 

  # get the browse_sequence for the df location, 
  my $node_names_ref = $dfh->get_browse_sequence_from_input;

#  map{ $browse_exists{ $_ } = 1 } @{ $node_names_ref };
  @browse_exists{ @{ $node_names_ref } } = ();

  my $link_rule = 
    qr{<A HREF="($DF_NODE_NAME_RULE)\.html">\1</A>}o;

     # This rule matches links like this:
     #   <A HREF="DATA.html">DATA</A>
     # where 'DATA' will be captured to $1   

  # Build up the %links hash
  foreach $node_name (@{ $node_names_ref }) {
    ($DEBUG) && print STDERR "check_links: looking for links in $node_name\n";
    my $chain_singlet = $dfh->input($node_name, 1); # read just one node 
    my $body_ref = $chain_singlet->get_beg->get_body;

    my $lc = 8; # line count - initialize to size of headers - generalize, how?
    $_ = ${ $body_ref };
    while (m{^(.*?)$}msgx) {  # chunk, line at at time
      my $line = $1;
      while( $line =~ m{$link_rule}g ) { 

        # %links is a hash of arrays of arrays.
        # the link name is the key for each entry,
        # an "entry" as an array of all the places 
        # the link was found, where a "place" is a 
        # node name and a numeric offset.

        push @{ $links{ $1 } }, [$node_name, $lc];  # amazingly enough, this works
        ($DEBUG) && print STDERR "check_links: found link named: $1 in $node_name\n";
        $lc++; # increment line count
      }
    }
  }

  # report on any links that don't have destinations 
  # (i.e. aren't found in the browse_sequence)

  ($DEBUG) && print STDERR "list of links identified:\n";
  ($DEBUG) && print STDERR join (' ', (keys %links)) . "\n";

  my $report='';
  foreach my $link (sort keys %links) { 
    if ( not exists($browse_exists{ $link } ) ) { 
      ($DEBUG) && print STDERR "bad link found: $link\n";
      foreach $place ( sort ( @{ $links{ $link } } ) ) { 
        $report .= "Broken link: $link found in: " . $place->[0] . " at line " . $place->[1] . "\n";
      }
    }
  }
  return \$report;
}

=item B<push_out> - updates the website with the version in the 
  working location (via rsync, etc)

=cut 

sub push_out {
  my $opt_ref = pop;
  my $DEBUG = $opt_ref->{d};
  ($DEBUG) && print STDERR "push_out args: " . join(" ", @_) . "\n";

  my $doom_loc = $DF_LOC;  # Working location of doomfiles

  chdir($doom_loc);
  unlink <*~>;  # delete emacs backup files

  # Web::Definitions.pm (as of this writing):
  #   our $DF_PUSH_RSYNCH_TARGET = 'mirthles@shell.grin.net:/usr/home/mirthles/public_html';
  #   our $DF_PUSH_SCP_TARGET = "mirthles\@shell.grin.net:/usr/home/mirthles/public_html/doomfiles";

  #  (my $target = $DF_PUSH_RSYNCH_TARGET) =~ s/@/\@/g;
  my $target = $DF_PUSH_SCP_TARGET;
  my $cmd = "rsync -avz -e ssh $doom_loc/*.html $target";

  system($cmd);
}

1;
__END__

=back

=head1 REFERENCE

The %opts hash passed in from df should (as of this writing) 
have four fields: 

  d - debug
  v - verbose
  r - rawtext location (to override default $DF_RAWTEXT)
  h - html location (to override default $DF_LOC) 


=head1 SEE ALSO

L<Project Documentation|Web::Project>

=head1 AUTHOR

Joseph Brenner, E<lt>doom@kzsu.stanford.eduE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2004 by Joseph Brenner

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.2 or,
at your option, any later version of Perl 5 you may have available.

=head1 BUGS

None reported... yet.

=cut

     

Joseph Brenner, Sat Nov 6 17:04:11 2004