| File: | C4/TTParser.pm |
| Coverage: | 13.1% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | #!/usr/bin/env perl | |||||
| 2 | #simple parser for HTML with Template Toolkit directives. Tokens are put into @tokens and are accesible via next_token and peep_token | |||||
| 3 | package C4::TTParser; | |||||
| 4 | 2 2 2 | 2625 18 182 | use base qw(HTML::Parser); | |||
| 5 | 2 2 2 | 8 4 127 | use C4::TmplToken; | |||
| 6 | 2 2 2 | 9 3 101 | use strict; | |||
| 7 | 2 2 2 | 13 3 3337 | use warnings; | |||
| 8 | ||||||
| 9 | #seems to be handled post tokenizer | |||||
| 10 | ##hash where key is tag we are interested in and the value is a hash of the attributes we want | |||||
| 11 | #my %interesting_tags = ( | |||||
| 12 | # img => { alt => 1 }, | |||||
| 13 | #); | |||||
| 14 | ||||||
| 15 | #tokens found so far (used like a stack) | |||||
| 16 | my ( @tokens ); | |||||
| 17 | ||||||
| 18 | #shiftnext token or undef | |||||
| 19 | sub next_token{ | |||||
| 20 | 0 | return shift @tokens; | ||||
| 21 | } | |||||
| 22 | ||||||
| 23 | #unshift token back on @tokens | |||||
| 24 | sub unshift_token{ | |||||
| 25 | 0 | my $self = shift; | ||||
| 26 | 0 | unshift @tokens, shift; | ||||
| 27 | } | |||||
| 28 | ||||||
| 29 | #have a peep at next token | |||||
| 30 | sub peep_token{ | |||||
| 31 | 0 | return $tokens[0]; | ||||
| 32 | } | |||||
| 33 | ||||||
| 34 | #wrapper for parse | |||||
| 35 | #please use this method INSTEAD of the HTML::Parser->parse_file method (and HTML::Parser->parse) | |||||
| 36 | #signature build_tokens( self, filename) | |||||
| 37 | sub build_tokens{ | |||||
| 38 | 0 | my ($self, $filename) = @_; | ||||
| 39 | 0 | $self->{filename} = $filename; | ||||
| 40 | 0 | $self->handler(start => "start", "self, line, tagname, attr, text"); #signature is start( self, linenumber, tagname, hash of attributes, origional text ) | ||||
| 41 | 0 | $self->handler(text => "text", "self, line, text, is_cdata"); #signature is text( self, linenumber, origional text, is_cdata ) | ||||
| 42 | 0 | $self->handler(end => "end", "self, line, tag, attr, text"); #signature is end( self, linenumber, tagename, origional text ) | ||||
| 43 | 0 | $self->handler(declaration => "declaration", "self, line, text, is_cdata"); # declaration | ||||
| 44 | 0 | $self->handler(comment => "comment", "self, line, text, is_cdata"); # comments | ||||
| 45 | # $self->handler(default => "default", "self, line, text, is_cdata"); # anything else | |||||
| 46 | 0 | $self->marked_sections(1); #treat anything inside CDATA tags as text, should really make it a C4::TmplTokenType::CDATA | ||||
| 47 | 0 | $self->unbroken_text(1); #make contiguous whitespace into a single token (can span multiple lines) | ||||
| 48 | 0 | $self->parse_file($filename); | ||||
| 49 | 0 | return $self; | ||||
| 50 | } | |||||
| 51 | ||||||
| 52 | #handle parsing of text | |||||
| 53 | sub text{ | |||||
| 54 | 0 | my $self = shift; | ||||
| 55 | 0 | my $line = shift; | ||||
| 56 | 0 | my $work = shift; # original text | ||||
| 57 | 0 | my $is_cdata = shift; | ||||
| 58 | 0 | while($work){ | ||||
| 59 | # if there is a template_toolkit tag | |||||
| 60 | 0 | if( $work =~ m/\[%.*?\]/ ){ | ||||
| 61 | #everything before this tag is text (or possibly CDATA), add a text token to tokens if $` | |||||
| 62 | 0 | if( $` ){ | ||||
| 63 | 0 | my $t = C4::TmplToken->new( $`, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} ); | ||||
| 64 | 0 | push @tokens, $t; | ||||
| 65 | } | |||||
| 66 | ||||||
| 67 | #the match itself is a DIRECTIVE $& | |||||
| 68 | 0 | my $t = C4::TmplToken->new( $&, C4::TmplTokenType::DIRECTIVE, $line, $self->{filename} ); | ||||
| 69 | 0 | push @tokens, $t; | ||||
| 70 | ||||||
| 71 | # put work still to do back into work | |||||
| 72 | 0 | $work = $' ? $' : 0; | ||||
| 73 | } else { | |||||
| 74 | # If there is some left over work, treat it as text token | |||||
| 75 | 0 | my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} ); | ||||
| 76 | ||||||
| 77 | 0 | push @tokens, $t; | ||||
| 78 | 0 | last; | ||||
| 79 | } | |||||
| 80 | } | |||||
| 81 | } | |||||
| 82 | ||||||
| 83 | sub declaration { | |||||
| 84 | 0 | my $self = shift; | ||||
| 85 | 0 | my $line = shift; | ||||
| 86 | 0 | my $work = shift; #original text | ||||
| 87 | 0 | my $is_cdata = shift; | ||||
| 88 | 0 | my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} ); | ||||
| 89 | 0 | push @tokens, $t; | ||||
| 90 | } | |||||
| 91 | ||||||
| 92 | sub comment { | |||||
| 93 | 0 | my $self = shift; | ||||
| 94 | 0 | my $line = shift; | ||||
| 95 | 0 | my $work = shift; #original text | ||||
| 96 | 0 | my $is_cdata = shift; | ||||
| 97 | 0 | my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} ); | ||||
| 98 | 0 | push @tokens, $t; | ||||
| 99 | } | |||||
| 100 | ||||||
| 101 | sub default { | |||||
| 102 | 0 | my $self = shift; | ||||
| 103 | 0 | my $line = shift; | ||||
| 104 | 0 | my $work = shift; #original text | ||||
| 105 | 0 | my $is_cdata = shift; | ||||
| 106 | 0 | my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} ); | ||||
| 107 | 0 | push @tokens, $t; | ||||
| 108 | } | |||||
| 109 | ||||||
| 110 | ||||||
| 111 | #handle opening html tags | |||||
| 112 | sub start{ | |||||
| 113 | 0 | my $self = shift; | ||||
| 114 | 0 | my $line = shift; | ||||
| 115 | 0 | my $tag = shift; | ||||
| 116 | 0 | my $hash = shift; #hash of attr/value pairs | ||||
| 117 | 0 | my $text = shift; #origional text | ||||
| 118 | 0 | my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename}); | ||||
| 119 | 0 | my %attr; | ||||
| 120 | # tags seem to be uses in an 'interesting' way elsewhere.. | |||||
| 121 | 0 | for my $key( %$hash ) { | ||||
| 122 | 0 | next unless defined $hash->{$key}; | ||||
| 123 | 0 | if ($key eq "/"){ | ||||
| 124 | 0 | $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 1 ]; | ||||
| 125 | } | |||||
| 126 | else { | |||||
| 127 | 0 | $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ]; | ||||
| 128 | } | |||||
| 129 | } | |||||
| 130 | 0 | $t->set_attributes( \%attr ); | ||||
| 131 | 0 | push @tokens, $t; | ||||
| 132 | } | |||||
| 133 | ||||||
| 134 | #handle closing html tags | |||||
| 135 | sub end{ | |||||
| 136 | 0 | my $self = shift; | ||||
| 137 | 0 | my $line = shift; | ||||
| 138 | 0 | my $tag = shift; | ||||
| 139 | 0 | my $hash = shift; | ||||
| 140 | 0 | my $text = shift; | ||||
| 141 | # what format should this be in? | |||||
| 142 | 0 | my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename} ); | ||||
| 143 | 0 | my %attr; | ||||
| 144 | # tags seem to be uses in an 'interesting' way elsewhere.. | |||||
| 145 | 0 | for my $key( %$hash ) { | ||||
| 146 | 0 | next unless defined $hash->{$key}; | ||||
| 147 | 0 | $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ]; | ||||
| 148 | } | |||||
| 149 | 0 | $t->set_attributes( \%attr ); | ||||
| 150 | 0 | push @tokens, $t; | ||||
| 151 | } | |||||
| 152 | ||||||
| 153 | 1; | |||||