File Coverage

File:C4/TTParser.pm
Coverage:13.1%

linestmtbrancondsubtimecode
1#!/usr/bin/env perl
2#simple parser for HTML with Template Toolkit directives. Tokens are put into @tokens and are accesible via next_token and peep_token
3package C4::TTParser;
4
2
2
2
2625
18
182
use base qw(HTML::Parser);
5
2
2
2
8
4
127
use C4::TmplToken;
6
2
2
2
9
3
101
use strict;
7
2
2
2
13
3
3337
use warnings;
8
9#seems to be handled post tokenizer
10##hash where key is tag we are interested in and the value is a hash of the attributes we want
11#my %interesting_tags = (
12# img => { alt => 1 },
13#);
14
15#tokens found so far (used like a stack)
16my ( @tokens );
17
18#shiftnext token or undef
19sub next_token{
20
0
    return shift @tokens;
21}
22
23#unshift token back on @tokens
24sub unshift_token{
25
0
    my $self = shift;
26
0
    unshift @tokens, shift;
27}
28
29#have a peep at next token
30sub peep_token{
31
0
    return $tokens[0];
32}
33
34#wrapper for parse
35#please use this method INSTEAD of the HTML::Parser->parse_file method (and HTML::Parser->parse)
36#signature build_tokens( self, filename)
37sub build_tokens{
38
0
    my ($self, $filename) = @_;
39
0
    $self->{filename} = $filename;
40
0
    $self->handler(start => "start", "self, line, tagname, attr, text"); #signature is start( self, linenumber, tagname, hash of attributes, origional text )
41
0
    $self->handler(text => "text", "self, line, text, is_cdata"); #signature is text( self, linenumber, origional text, is_cdata )
42
0
    $self->handler(end => "end", "self, line, tag, attr, text"); #signature is end( self, linenumber, tagename, origional text )
43
0
    $self->handler(declaration => "declaration", "self, line, text, is_cdata"); # declaration
44
0
    $self->handler(comment => "comment", "self, line, text, is_cdata"); # comments
45# $self->handler(default => "default", "self, line, text, is_cdata"); # anything else
46
0
    $self->marked_sections(1); #treat anything inside CDATA tags as text, should really make it a C4::TmplTokenType::CDATA
47
0
    $self->unbroken_text(1); #make contiguous whitespace into a single token (can span multiple lines)
48
0
    $self->parse_file($filename);
49
0
    return $self;
50}
51
52#handle parsing of text
53sub text{
54
0
    my $self = shift;
55
0
    my $line = shift;
56
0
    my $work = shift; # original text
57
0
    my $is_cdata = shift;
58
0
    while($work){
59        # if there is a template_toolkit tag
60
0
        if( $work =~ m/\[%.*?\]/ ){
61            #everything before this tag is text (or possibly CDATA), add a text token to tokens if $`
62
0
            if( $` ){
63
0
                my $t = C4::TmplToken->new( $`, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
64
0
                push @tokens, $t;
65            }
66
67            #the match itself is a DIRECTIVE $&
68
0
            my $t = C4::TmplToken->new( $&, C4::TmplTokenType::DIRECTIVE, $line, $self->{filename} );
69
0
            push @tokens, $t;
70
71            # put work still to do back into work
72
0
            $work = $' ? $' : 0;
73        } else {
74            # If there is some left over work, treat it as text token
75
0
            my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
76
77
0
            push @tokens, $t;
78
0
            last;
79        }
80    }
81}
82
83sub declaration {
84
0
    my $self = shift;
85
0
    my $line = shift;
86
0
    my $work = shift; #original text
87
0
    my $is_cdata = shift;
88
0
    my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
89
0
    push @tokens, $t;
90}
91
92sub comment {
93
0
    my $self = shift;
94
0
    my $line = shift;
95
0
    my $work = shift; #original text
96
0
    my $is_cdata = shift;
97
0
    my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
98
0
    push @tokens, $t;
99}
100
101sub default {
102
0
    my $self = shift;
103
0
    my $line = shift;
104
0
    my $work = shift; #original text
105
0
    my $is_cdata = shift;
106
0
    my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
107
0
    push @tokens, $t;
108}
109
110
111#handle opening html tags
112sub start{
113
0
    my $self = shift;
114
0
    my $line = shift;
115
0
    my $tag = shift;
116
0
    my $hash = shift; #hash of attr/value pairs
117
0
    my $text = shift; #origional text
118
0
    my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename});
119
0
    my %attr;
120    # tags seem to be uses in an 'interesting' way elsewhere..
121
0
    for my $key( %$hash ) {
122
0
        next unless defined $hash->{$key};
123
0
        if ($key eq "/"){
124
0
            $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 1 ];
125            }
126        else {
127
0
        $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
128            }
129    }
130
0
    $t->set_attributes( \%attr );
131
0
    push @tokens, $t;
132}
133
134#handle closing html tags
135sub end{
136
0
    my $self = shift;
137
0
    my $line = shift;
138
0
    my $tag = shift;
139
0
    my $hash = shift;
140
0
    my $text = shift;
141    # what format should this be in?
142
0
    my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename} );
143
0
    my %attr;
144    # tags seem to be uses in an 'interesting' way elsewhere..
145
0
    for my $key( %$hash ) {
146
0
        next unless defined $hash->{$key};
147
0
        $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
148    }
149
0
    $t->set_attributes( \%attr );
150
0
    push @tokens, $t;
151}
152
1531;