File: | C4/TTParser.pm |
Coverage: | 13.1% |
line | stmt | bran | cond | sub | time | code |
---|---|---|---|---|---|---|
1 | #!/usr/bin/env perl | |||||
2 | #simple parser for HTML with Template Toolkit directives. Tokens are put into @tokens and are accesible via next_token and peep_token | |||||
3 | package C4::TTParser; | |||||
4 | 2 2 2 | 2625 18 182 | use base qw(HTML::Parser); | |||
5 | 2 2 2 | 8 4 127 | use C4::TmplToken; | |||
6 | 2 2 2 | 9 3 101 | use strict; | |||
7 | 2 2 2 | 13 3 3337 | use warnings; | |||
8 | ||||||
9 | #seems to be handled post tokenizer | |||||
10 | ##hash where key is tag we are interested in and the value is a hash of the attributes we want | |||||
11 | #my %interesting_tags = ( | |||||
12 | # img => { alt => 1 }, | |||||
13 | #); | |||||
14 | ||||||
15 | #tokens found so far (used like a stack) | |||||
16 | my ( @tokens ); | |||||
17 | ||||||
18 | #shiftnext token or undef | |||||
19 | sub next_token{ | |||||
20 | 0 | return shift @tokens; | ||||
21 | } | |||||
22 | ||||||
23 | #unshift token back on @tokens | |||||
24 | sub unshift_token{ | |||||
25 | 0 | my $self = shift; | ||||
26 | 0 | unshift @tokens, shift; | ||||
27 | } | |||||
28 | ||||||
29 | #have a peep at next token | |||||
30 | sub peep_token{ | |||||
31 | 0 | return $tokens[0]; | ||||
32 | } | |||||
33 | ||||||
34 | #wrapper for parse | |||||
35 | #please use this method INSTEAD of the HTML::Parser->parse_file method (and HTML::Parser->parse) | |||||
36 | #signature build_tokens( self, filename) | |||||
37 | sub build_tokens{ | |||||
38 | 0 | my ($self, $filename) = @_; | ||||
39 | 0 | $self->{filename} = $filename; | ||||
40 | 0 | $self->handler(start => "start", "self, line, tagname, attr, text"); #signature is start( self, linenumber, tagname, hash of attributes, origional text ) | ||||
41 | 0 | $self->handler(text => "text", "self, line, text, is_cdata"); #signature is text( self, linenumber, origional text, is_cdata ) | ||||
42 | 0 | $self->handler(end => "end", "self, line, tag, attr, text"); #signature is end( self, linenumber, tagename, origional text ) | ||||
43 | 0 | $self->handler(declaration => "declaration", "self, line, text, is_cdata"); # declaration | ||||
44 | 0 | $self->handler(comment => "comment", "self, line, text, is_cdata"); # comments | ||||
45 | # $self->handler(default => "default", "self, line, text, is_cdata"); # anything else | |||||
46 | 0 | $self->marked_sections(1); #treat anything inside CDATA tags as text, should really make it a C4::TmplTokenType::CDATA | ||||
47 | 0 | $self->unbroken_text(1); #make contiguous whitespace into a single token (can span multiple lines) | ||||
48 | 0 | $self->parse_file($filename); | ||||
49 | 0 | return $self; | ||||
50 | } | |||||
51 | ||||||
52 | #handle parsing of text | |||||
53 | sub text{ | |||||
54 | 0 | my $self = shift; | ||||
55 | 0 | my $line = shift; | ||||
56 | 0 | my $work = shift; # original text | ||||
57 | 0 | my $is_cdata = shift; | ||||
58 | 0 | while($work){ | ||||
59 | # if there is a template_toolkit tag | |||||
60 | 0 | if( $work =~ m/\[%.*?\]/ ){ | ||||
61 | #everything before this tag is text (or possibly CDATA), add a text token to tokens if $` | |||||
62 | 0 | if( $` ){ | ||||
63 | 0 | my $t = C4::TmplToken->new( $`, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} ); | ||||
64 | 0 | push @tokens, $t; | ||||
65 | } | |||||
66 | ||||||
67 | #the match itself is a DIRECTIVE $& | |||||
68 | 0 | my $t = C4::TmplToken->new( $&, C4::TmplTokenType::DIRECTIVE, $line, $self->{filename} ); | ||||
69 | 0 | push @tokens, $t; | ||||
70 | ||||||
71 | # put work still to do back into work | |||||
72 | 0 | $work = $' ? $' : 0; | ||||
73 | } else { | |||||
74 | # If there is some left over work, treat it as text token | |||||
75 | 0 | my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} ); | ||||
76 | ||||||
77 | 0 | push @tokens, $t; | ||||
78 | 0 | last; | ||||
79 | } | |||||
80 | } | |||||
81 | } | |||||
82 | ||||||
83 | sub declaration { | |||||
84 | 0 | my $self = shift; | ||||
85 | 0 | my $line = shift; | ||||
86 | 0 | my $work = shift; #original text | ||||
87 | 0 | my $is_cdata = shift; | ||||
88 | 0 | my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} ); | ||||
89 | 0 | push @tokens, $t; | ||||
90 | } | |||||
91 | ||||||
92 | sub comment { | |||||
93 | 0 | my $self = shift; | ||||
94 | 0 | my $line = shift; | ||||
95 | 0 | my $work = shift; #original text | ||||
96 | 0 | my $is_cdata = shift; | ||||
97 | 0 | my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} ); | ||||
98 | 0 | push @tokens, $t; | ||||
99 | } | |||||
100 | ||||||
101 | sub default { | |||||
102 | 0 | my $self = shift; | ||||
103 | 0 | my $line = shift; | ||||
104 | 0 | my $work = shift; #original text | ||||
105 | 0 | my $is_cdata = shift; | ||||
106 | 0 | my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} ); | ||||
107 | 0 | push @tokens, $t; | ||||
108 | } | |||||
109 | ||||||
110 | ||||||
111 | #handle opening html tags | |||||
112 | sub start{ | |||||
113 | 0 | my $self = shift; | ||||
114 | 0 | my $line = shift; | ||||
115 | 0 | my $tag = shift; | ||||
116 | 0 | my $hash = shift; #hash of attr/value pairs | ||||
117 | 0 | my $text = shift; #origional text | ||||
118 | 0 | my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename}); | ||||
119 | 0 | my %attr; | ||||
120 | # tags seem to be uses in an 'interesting' way elsewhere.. | |||||
121 | 0 | for my $key( %$hash ) { | ||||
122 | 0 | next unless defined $hash->{$key}; | ||||
123 | 0 | if ($key eq "/"){ | ||||
124 | 0 | $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 1 ]; | ||||
125 | } | |||||
126 | else { | |||||
127 | 0 | $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ]; | ||||
128 | } | |||||
129 | } | |||||
130 | 0 | $t->set_attributes( \%attr ); | ||||
131 | 0 | push @tokens, $t; | ||||
132 | } | |||||
133 | ||||||
134 | #handle closing html tags | |||||
135 | sub end{ | |||||
136 | 0 | my $self = shift; | ||||
137 | 0 | my $line = shift; | ||||
138 | 0 | my $tag = shift; | ||||
139 | 0 | my $hash = shift; | ||||
140 | 0 | my $text = shift; | ||||
141 | # what format should this be in? | |||||
142 | 0 | my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename} ); | ||||
143 | 0 | my %attr; | ||||
144 | # tags seem to be uses in an 'interesting' way elsewhere.. | |||||
145 | 0 | for my $key( %$hash ) { | ||||
146 | 0 | next unless defined $hash->{$key}; | ||||
147 | 0 | $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ]; | ||||
148 | } | |||||
149 | 0 | $t->set_attributes( \%attr ); | ||||
150 | 0 | push @tokens, $t; | ||||
151 | } | |||||
152 | ||||||
153 | 1; |