Web::Chain project:    Web/Chain/IO/Rawtext.pm


     package Web::Chain::IO::Rawtext;
#                                doom@kzsu.stanford.edu
#                                12 Oct 2004

=head1 NAME

Web::Chain::IO::Rawtext - adds capability to the IO object to 
  handle the DF Rawtext format.

=head1 SYNOPSIS

   See L<Web::Chain::IO>

=head1 DESCRIPTION


This class is used to create an input handle object used
internally by a Web::Chain::IO handle, to handle input and
(ultimately) output from files in the the "Rawtext" DF 
(aka "Doomfiles") format.

In other words, Web::Chain::IO is the interface, this class
is (one) of the implementations.

=cut

use 5.006;
use strict; 
use warnings;
use Carp;
use File::Basename;
use Web::Definitions qw( $DEBUG 
                         $DF_VERSION
                         $DF_START_RULE
                         $DF_END_RULE
                       );
use Web::Pro::Transform qw(text2xml);
use Web::Chain;

our $VERSION = $DF_VERSION;


=head1 METHODS

=over 

=item B<new> - create an input handle, used by a L<Web::Chain::IO> object.
   Example usage:  

   my $input_handle = $input_class->new( $self );

=cut

sub new { 
  my $class = shift;
  my $io_object = shift;
  bless { 
         _io_object => $io_object,   # pointing back at the owning object
         _input_location => $io_object->input_location,
        } , $class
}



sub input {

=item B<input> - takes the filename to be read (typically from the 
  already defined input location, but this can be over-ridden 
  by using a full path with this filename), and optionally a 
  numeric limit on how many nodes are to be read from this file 
  (in the case of Rawtext, that option is a nearly useless feature, 
  it's provided only to be consistent with other input modules). 
  Input returns a newly created chain object containing the 
  newly read data. 
  Example:  

   $dfh->input($file_name) or $dfh->input($file_name, $howmany)

=cut

   my ($self, $name, $howmany) = @_;
   my $subname = ( caller(0) )[3];

   my $location = $self->{_input_location}; 
# Note that this works because the new method (when run 
# by the "input_format" method of the IO object) copies 
# the information from the IO object into the local field
# _input_location.  Possibly this is anachronistic:
# in the original architecture, this class did not have 
# access to the entire calling object. Now input_format 
# passes to new the IO object itself, so interface and 
# implementation objects point at each other. 
# So alternately, this could be done as:
#   $location = $self->{_dbh}->{_input_location}

# TODO 
# Why not use a similar to the Web::Chain::IO::Html::input?
#   my ($begin_node_name, $end_node_name) = 
#      $self->_handle_input_arguments( $begin, $termination );

   # if $name has path included use that instead of the standard location
   my $in_file;
   if ( dirname($name) ne '.' ) {  
     $in_file = $name;
   } else { 
     $in_file = "$location/$name";
   }
   my $fh;
   open $fh, "<$in_file" or croak "$subname: Couldn't open $in_file for read: $!";

   my $chain = Web::Chain->new;   # Create a chain object to store data (will be returned)

   my $this_node;
   my $last_node_name = undef;

   # The flip-flop operator '..' is used below. It is true once the first pattern matches 
   # and stays true until the second matches.  What this does here is it prevents the 
   # "$DF_START_RULE" from matching again until the "$DF_END_RULE" has matched.
   # If we didn't need this behavior, we wouldn't need the if($opened) [...] $opened=0;
   # blocks: you'd just have a case statement using the pattern matches directly.

   # Just for reference, the imported regeps used here are something like:
   # $DF_START_RULE 
   #    ^([A-Zc0-9-_]{3,}*)
   # $DF_END_RULE
   #    ^==+\s*$
   # See Doomfile::Definitions

   # TODO - this now passes test, but the repeated checks of eof($fh) is 
   # really ugly hackery.  Clean this up.  (Also: add more tests.)

   my $bodyref;  
   my $opened = 0; 
   my $closed = 1;
   while (<$fh>) {
     if (
         ( $opened = (m/$DF_START_RULE/) ) 
         ..
         ( $closed = (s/$DF_END_RULE//) or eof($fh) ) 
        ) {

       if ($opened) {     # new node begins, nodename captured to $1
         $this_node = $chain->add_link($1, $last_node_name);
         $bodyref = anonymous_scalar(); # for each node, the text will go in a new anonymous scalar
         $opened = 0;
       } elsif ( ($closed) or eof($fh) ) { # node ends, stash the main body in this node of the chain.

         if (eof ($fh) ) {     # hackery daquiri 
           s{([\x20\t]+)$}{}x; # Strip all trailing whitespace
           ${ $bodyref } .= $_; 
         }  

         # Transform text into internal storage format
         $bodyref = text2xml( $bodyref );
         $this_node->set_body($bodyref);  
         # prepare to advance to next node
         $last_node_name = $this_node->name();
         $closed = 0;
       } else {            # regular line: save up lines as content, the body of the node
          
         s{([\x20\t]+)$}{}x; # Strip all trailing whitespace from every line

         ${ $bodyref } .= $_;
       }
     }
   }

   return $chain;
}


sub anonymous_scalar {  # Does this work?  (Yes.  And it's Cookbook recommended.)
  my $scaley;
  return \$scaley;
}

1;
__END__

=back 

=head1 FUTURE DEVELOPMENT

  Methods are currently only provided for input.  
  Add output routines also (fairly easy really).

=head1 SEE ALSO

L<Project Documentation|Web::Project>


=head1 AUTHOR

Joseph Brenner, E<lt>doom@kzsu.stanford.eduE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2004 by Joseph Brenner

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.2 or,
at your option, any later version of Perl 5 you may have available.

=head1 BUGS

None reported... yet.

=cut

     

Joseph Brenner, Sat Nov 6 17:04:11 2004