File Coverage

File:	C4/TTParser.pm
Coverage:	13.1%

line	stmt	bran	sub	time	code
1					#!/usr/bin/env perl
2					#simple parser for HTML with Template Toolkit directives. Tokens are put into @tokens and are accesible via next_token and peep_token
3					package C4::TTParser;
4	2 2 2		2	2625 18 182	use base qw(HTML::Parser);
5	2 2 2		2	8 4 127	use C4::TmplToken;
6	2 2 2		2	9 3 101	use strict;
7	2 2 2		2	13 3 3337	use warnings;
8
9					#seems to be handled post tokenizer
10					##hash where key is tag we are interested in and the value is a hash of the attributes we want
11					#my %interesting_tags = (
12					# img => { alt => 1 },
13					#);
14
15					#tokens found so far (used like a stack)
16					my ( @tokens );
17
18					#shiftnext token or undef
19					sub next_token{
20	0		0		return shift @tokens;
21					}
22
23					#unshift token back on @tokens
24					sub unshift_token{
25	0		0		my $self = shift;
26	0				unshift @tokens, shift;
27					}
28
29					#have a peep at next token
30					sub peep_token{
31	0		0		return $tokens[0];
32					}
33
34					#wrapper for parse
35					#please use this method INSTEAD of the HTML::Parser->parse_file method (and HTML::Parser->parse)
36					#signature build_tokens( self, filename)
37					sub build_tokens{
38	0		0		my ($self, $filename) = @_;
39	0				$self->{filename} = $filename;
40	0				$self->handler(start => "start", "self, line, tagname, attr, text"); #signature is start( self, linenumber, tagname, hash of attributes, origional text )
41	0				$self->handler(text => "text", "self, line, text, is_cdata"); #signature is text( self, linenumber, origional text, is_cdata )
42	0				$self->handler(end => "end", "self, line, tag, attr, text"); #signature is end( self, linenumber, tagename, origional text )
43	0				$self->handler(declaration => "declaration", "self, line, text, is_cdata"); # declaration
44	0				$self->handler(comment => "comment", "self, line, text, is_cdata"); # comments
45					# $self->handler(default => "default", "self, line, text, is_cdata"); # anything else
46	0				$self->marked_sections(1); #treat anything inside CDATA tags as text, should really make it a C4::TmplTokenType::CDATA
47	0				$self->unbroken_text(1); #make contiguous whitespace into a single token (can span multiple lines)
48	0				$self->parse_file($filename);
49	0				return $self;
50					}
51
52					#handle parsing of text
53					sub text{
54	0		0		my $self = shift;
55	0				my $line = shift;
56	0				my $work = shift; # original text
57	0				my $is_cdata = shift;
58	0				while($work){
59					# if there is a template_toolkit tag
60	0	0			if( $work =~ m/\[%.*?\]/ ){
61					#everything before this tag is text (or possibly CDATA), add a text token to tokens if $`
62	0	0			if( $` ){
63	0	0			my $t = C4::TmplToken->new( $`, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
64	0				push @tokens, $t;
65					}
66
67					#the match itself is a DIRECTIVE $&
68	0				my $t = C4::TmplToken->new( $&, C4::TmplTokenType::DIRECTIVE, $line, $self->{filename} );
69	0				push @tokens, $t;
70
71					# put work still to do back into work
72	0	0			$work = $' ? $' : 0;
73					} else {
74					# If there is some left over work, treat it as text token
75	0	0			my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
76
77	0				push @tokens, $t;
78	0				last;
79					}
80					}
81					}
82
83					sub declaration {
84	0		0		my $self = shift;
85	0				my $line = shift;
86	0				my $work = shift; #original text
87	0				my $is_cdata = shift;
88	0	0			my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
89	0				push @tokens, $t;
90					}
91
92					sub comment {
93	0		0		my $self = shift;
94	0				my $line = shift;
95	0				my $work = shift; #original text
96	0				my $is_cdata = shift;
97	0	0			my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
98	0				push @tokens, $t;
99					}
100
101					sub default {
102	0		0		my $self = shift;
103	0				my $line = shift;
104	0				my $work = shift; #original text
105	0				my $is_cdata = shift;
106	0	0			my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
107	0				push @tokens, $t;
108					}
109
110
111					#handle opening html tags
112					sub start{
113	0		0		my $self = shift;
114	0				my $line = shift;
115	0				my $tag = shift;
116	0				my $hash = shift; #hash of attr/value pairs
117	0				my $text = shift; #origional text
118	0				my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename});
119	0				my %attr;
120					# tags seem to be uses in an 'interesting' way elsewhere..
121	0				for my $key( %$hash ) {
122	0	0			next unless defined $hash->{$key};
123	0	0			if ($key eq "/"){
124	0				$attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 1 ];
125					}
126					else {
127	0				$attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
128					}
129					}
130	0				$t->set_attributes( \%attr );
131	0				push @tokens, $t;
132					}
133
134					#handle closing html tags
135					sub end{
136	0		0		my $self = shift;
137	0				my $line = shift;
138	0				my $tag = shift;
139	0				my $hash = shift;
140	0				my $text = shift;
141					# what format should this be in?
142	0				my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename} );
143	0				my %attr;
144					# tags seem to be uses in an 'interesting' way elsewhere..
145	0				for my $key( %$hash ) {
146	0	0			next unless defined $hash->{$key};
147	0				$attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
148					}
149	0				$t->set_attributes( \%attr );
150	0				push @tokens, $t;
151					}
152
153					1;