Web::Chain project: Web/Chain/IO/Rawtext.pm
package Web::Chain::IO::Rawtext;
# doom@kzsu.stanford.edu
# 12 Oct 2004
=head1 NAME
Web::Chain::IO::Rawtext - adds capability to the IO object to
handle the DF Rawtext format.
=head1 SYNOPSIS
See L<Web::Chain::IO>
=head1 DESCRIPTION
This class is used to create an input handle object used
internally by a Web::Chain::IO handle, to handle input and
(ultimately) output from files in the the "Rawtext" DF
(aka "Doomfiles") format.
In other words, Web::Chain::IO is the interface, this class
is (one) of the implementations.
=cut
use 5.006;
use strict;
use warnings;
use Carp;
use File::Basename;
use Web::Definitions qw( $DEBUG
$DF_VERSION
$DF_START_RULE
$DF_END_RULE
);
use Web::Pro::Transform qw(text2xml);
use Web::Chain;
our $VERSION = $DF_VERSION;
=head1 METHODS
=over
=item B<new> - create an input handle, used by a L<Web::Chain::IO> object.
Example usage:
my $input_handle = $input_class->new( $self );
=cut
sub new {
my $class = shift;
my $io_object = shift;
bless {
_io_object => $io_object, # pointing back at the owning object
_input_location => $io_object->input_location,
} , $class
}
sub input {
=item B<input> - takes the filename to be read (typically from the
already defined input location, but this can be over-ridden
by using a full path with this filename), and optionally a
numeric limit on how many nodes are to be read from this file
(in the case of Rawtext, that option is a nearly useless feature,
it's provided only to be consistent with other input modules).
Input returns a newly created chain object containing the
newly read data.
Example:
$dfh->input($file_name) or $dfh->input($file_name, $howmany)
=cut
my ($self, $name, $howmany) = @_;
my $subname = ( caller(0) )[3];
my $location = $self->{_input_location};
# Note that this works because the new method (when run
# by the "input_format" method of the IO object) copies
# the information from the IO object into the local field
# _input_location. Possibly this is anachronistic:
# in the original architecture, this class did not have
# access to the entire calling object. Now input_format
# passes to new the IO object itself, so interface and
# implementation objects point at each other.
# So alternately, this could be done as:
# $location = $self->{_dbh}->{_input_location}
# TODO
# Why not use a similar to the Web::Chain::IO::Html::input?
# my ($begin_node_name, $end_node_name) =
# $self->_handle_input_arguments( $begin, $termination );
# if $name has path included use that instead of the standard location
my $in_file;
if ( dirname($name) ne '.' ) {
$in_file = $name;
} else {
$in_file = "$location/$name";
}
my $fh;
open $fh, "<$in_file" or croak "$subname: Couldn't open $in_file for read: $!";
my $chain = Web::Chain->new; # Create a chain object to store data (will be returned)
my $this_node;
my $last_node_name = undef;
# The flip-flop operator '..' is used below. It is true once the first pattern matches
# and stays true until the second matches. What this does here is it prevents the
# "$DF_START_RULE" from matching again until the "$DF_END_RULE" has matched.
# If we didn't need this behavior, we wouldn't need the if($opened) [...] $opened=0;
# blocks: you'd just have a case statement using the pattern matches directly.
# Just for reference, the imported regeps used here are something like:
# $DF_START_RULE
# ^([A-Zc0-9-_]{3,}*)
# $DF_END_RULE
# ^==+\s*$
# See Doomfile::Definitions
# TODO - this now passes test, but the repeated checks of eof($fh) is
# really ugly hackery. Clean this up. (Also: add more tests.)
my $bodyref;
my $opened = 0;
my $closed = 1;
while (<$fh>) {
if (
( $opened = (m/$DF_START_RULE/) )
..
( $closed = (s/$DF_END_RULE//) or eof($fh) )
) {
if ($opened) { # new node begins, nodename captured to $1
$this_node = $chain->add_link($1, $last_node_name);
$bodyref = anonymous_scalar(); # for each node, the text will go in a new anonymous scalar
$opened = 0;
} elsif ( ($closed) or eof($fh) ) { # node ends, stash the main body in this node of the chain.
if (eof ($fh) ) { # hackery daquiri
s{([\x20\t]+)$}{}x; # Strip all trailing whitespace
${ $bodyref } .= $_;
}
# Transform text into internal storage format
$bodyref = text2xml( $bodyref );
$this_node->set_body($bodyref);
# prepare to advance to next node
$last_node_name = $this_node->name();
$closed = 0;
} else { # regular line: save up lines as content, the body of the node
s{([\x20\t]+)$}{}x; # Strip all trailing whitespace from every line
${ $bodyref } .= $_;
}
}
}
return $chain;
}
sub anonymous_scalar { # Does this work? (Yes. And it's Cookbook recommended.)
my $scaley;
return \$scaley;
}
1;
__END__
=back
=head1 FUTURE DEVELOPMENT
Methods are currently only provided for input.
Add output routines also (fairly easy really).
=head1 SEE ALSO
L<Project Documentation|Web::Project>
=head1 AUTHOR
Joseph Brenner, E<lt>doom@kzsu.stanford.eduE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2004 by Joseph Brenner
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.2 or,
at your option, any later version of Perl 5 you may have available.
=head1 BUGS
None reported... yet.
=cut
Joseph Brenner,
Sat Nov 6 17:04:11 2004