Web::Chain project: Web/Pro/Interact.pm
package Web::Pro::Interact;
# doom@kzsu.stanford.edu
# 20 Oct 2004
=head1 NAME
Web::Pro::Interact - the user-level interface between
the df script and the Web::* modules.
=head1 SYNOPSIS
use Web::Pro::Interact;
=head1 DESCRIPTION
A collection of proceedural routines that are run directly
by the df script (which does very little processing of it's
own). These are essentially the user-level commands that
make use of the Web::* modules. This is oriented directly
towards working on DF pages ("doomfiles"), even more so than
the Web::* modules are. The input format is almost always
Rawtext, and the input location is as specified in Web::Definitions
as $DF_RAWTEXT. The output format is almost always Html,
in the $DF_LOC location, also as specified in Web::Definitions.
These locations can be overridden for testing purposes:
a reference to the df scripts hash of options is passed in
as the final argument.
=head2 EXPORT
None by default.
=over
=cut
use 5.006;
use strict;
use warnings;
use Carp;
use Web::Chain::IO;
use Web::Definitions qw( $DEBUG
$DF_VERSION
$DF_LOC
$DF_RAWTEXT
$DF_CONTENTS_NODE_NAME
$DF_WHATSNEW_NODE_NAME
$DF_WHATSNEW_NOW_MARKER
$DF_NEW_NODES_LOG
$DF_PUSH_SCP_TARGET
$DF_PUSH_RSYNCH_TARGET
$DF_NODE_NAME_RULE
);
require Exporter;
our @ISA = qw(Exporter);
our %EXPORT_TAGS = ( 'all' => [ qw(
check_links
push_out
process_rawtext
dummy_testes
) ] );
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
our @EXPORT = qw( );
our $VERSION = $DF_VERSION;
=item B<dummy_testes> - echoes arguments to stderr
=cut
sub dummy_testes {
my $opt_ref = pop;
print STDERR "dummy_testes: " . join(" ", @_) . "\n";
}
=item B<process_rawtext> - given a rawtext filename, converts it to
html and splices it into the DF project after the node
named in the second argument.
=cut
sub process_rawtext {
my $subname = ( caller(0) )[3];
my $opt_ref = pop;
my $DEBUG = $opt_ref->{d};
($DEBUG) && print STDERR "process_rawtext args: " . join(" ", @_) . "\n";
my $rawtext_name = shift;
my $browse_location = shift;
my $rawtext_location = $opt_ref->{r} || $DF_RAWTEXT;
my $rawtext_file = "$rawtext_location/$rawtext_name";
my $working_location = $opt_ref->{h} || $DF_LOC;
# Read rawtext file into Chain structure
my $dfh = Web::Chain::IO->new; # Create a DF IO handle
$dfh->input_location($rawtext_location);
$dfh->input_format('Rawtext');
my $chain = $dfh->input($rawtext_file);
my @new_node_names = $chain->get_names;
# splice into mass of DF html files
$dfh->output_location($working_location);
my $ret = $dfh->output_format('Html'); # dynamically loads a Web::Chain::IO::Html
# Do rcs mass check-out
# $dfh->vc_check_out_output_location; # TODO (someday) Even better: integrate with 'output'
chdir($working_location);
system('co -l *.html');
$dfh->output_splice( $chain, $browse_location );
# log new node names to WHATSNEW.html
my $datestamp =
$dfh->log_new_names( \@new_node_names); # defaults to $DF_WHATSNEW_NODE_NAME;
$dfh->generate_contents_node;
# do rcs mass check-in (unchanged files just revert)
my $message = "$subname added: " . join(" ", @new_node_names);
# $dfh->vc_check_in_output_location($message); # TODO someday
chdir($working_location);
my ($cmd);
$cmd= "ci -u -m\'$message\' -t-\'Created by $subname\' *.html";
($DEBUG) && print STDERR "cmd: $cmd\n";
system($cmd);
return $datestamp;
}
=item B<check_links> - searches the DF project for broken internal links.
This works on $DF_LOC by default (as defined in Web::Definitions),
but this can be overriden with the "-h" option. Note: the "-r"
option is not meaningful for this method.
=cut
sub check_links {
my $opt_ref = pop;
my $DEBUG = $opt_ref->{d};
($DEBUG) && print STDERR "check_links args: " . join(" ", @_) . "\n";
my $working_location = $opt_ref->{h} || $DF_LOC;
my (%browse_exists, %links, $node_name, $place);
# Initialize a DF IO handle
my $dfh = Web::Chain::IO->new;
$dfh->input_location($working_location);
$dfh->input_format('Html'); # dyanamically loads Web::Chain::IO::Html
# get the browse_sequence for the df location,
my $node_names_ref = $dfh->get_browse_sequence_from_input;
# map{ $browse_exists{ $_ } = 1 } @{ $node_names_ref };
@browse_exists{ @{ $node_names_ref } } = ();
my $link_rule =
qr{<A HREF="($DF_NODE_NAME_RULE)\.html">\1</A>}o;
# This rule matches links like this:
# <A HREF="DATA.html">DATA</A>
# where 'DATA' will be captured to $1
# Build up the %links hash
foreach $node_name (@{ $node_names_ref }) {
($DEBUG) && print STDERR "check_links: looking for links in $node_name\n";
my $chain_singlet = $dfh->input($node_name, 1); # read just one node
my $body_ref = $chain_singlet->get_beg->get_body;
my $lc = 8; # line count - initialize to size of headers - generalize, how?
$_ = ${ $body_ref };
while (m{^(.*?)$}msgx) { # chunk, line at at time
my $line = $1;
while( $line =~ m{$link_rule}g ) {
# %links is a hash of arrays of arrays.
# the link name is the key for each entry,
# an "entry" as an array of all the places
# the link was found, where a "place" is a
# node name and a numeric offset.
push @{ $links{ $1 } }, [$node_name, $lc]; # amazingly enough, this works
($DEBUG) && print STDERR "check_links: found link named: $1 in $node_name\n";
$lc++; # increment line count
}
}
}
# report on any links that don't have destinations
# (i.e. aren't found in the browse_sequence)
($DEBUG) && print STDERR "list of links identified:\n";
($DEBUG) && print STDERR join (' ', (keys %links)) . "\n";
my $report='';
foreach my $link (sort keys %links) {
if ( not exists($browse_exists{ $link } ) ) {
($DEBUG) && print STDERR "bad link found: $link\n";
foreach $place ( sort ( @{ $links{ $link } } ) ) {
$report .= "Broken link: $link found in: " . $place->[0] . " at line " . $place->[1] . "\n";
}
}
}
return \$report;
}
=item B<push_out> - updates the website with the version in the
working location (via rsync, etc)
=cut
sub push_out {
my $opt_ref = pop;
my $DEBUG = $opt_ref->{d};
($DEBUG) && print STDERR "push_out args: " . join(" ", @_) . "\n";
my $doom_loc = $DF_LOC; # Working location of doomfiles
chdir($doom_loc);
unlink <*~>; # delete emacs backup files
# Web::Definitions.pm (as of this writing):
# our $DF_PUSH_RSYNCH_TARGET = 'mirthles@shell.grin.net:/usr/home/mirthles/public_html';
# our $DF_PUSH_SCP_TARGET = "mirthles\@shell.grin.net:/usr/home/mirthles/public_html/doomfiles";
# (my $target = $DF_PUSH_RSYNCH_TARGET) =~ s/@/\@/g;
my $target = $DF_PUSH_SCP_TARGET;
my $cmd = "rsync -avz -e ssh $doom_loc/*.html $target";
system($cmd);
}
1;
__END__
=back
=head1 REFERENCE
The %opts hash passed in from df should (as of this writing)
have four fields:
d - debug
v - verbose
r - rawtext location (to override default $DF_RAWTEXT)
h - html location (to override default $DF_LOC)
=head1 SEE ALSO
L<Project Documentation|Web::Project>
=head1 AUTHOR
Joseph Brenner, E<lt>doom@kzsu.stanford.eduE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2004 by Joseph Brenner
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.2 or,
at your option, any later version of Perl 5 you may have available.
=head1 BUGS
None reported... yet.
=cut
Joseph Brenner,
Sat Nov 6 17:04:11 2004