File: | C4/External/Syndetics.pm |
Coverage: | 17.5% |
line | stmt | bran | cond | sub | time | code |
---|---|---|---|---|---|---|
1 | package C4::External::Syndetics; | |||||
2 | # Copyright (C) 2006 LibLime | |||||
3 | # <jmf at liblime dot com> | |||||
4 | # | |||||
5 | # This file is part of Koha. | |||||
6 | # | |||||
7 | # Koha is free software; you can redistribute it and/or modify it under the | |||||
8 | # terms of the GNU General Public License as published by the Free Software | |||||
9 | # Foundation; either version 2 of the License, or (at your option) any later | |||||
10 | # version. | |||||
11 | # | |||||
12 | # Koha is distributed in the hope that it will be useful, but WITHOUT ANY | |||||
13 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR | |||||
14 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details. | |||||
15 | # | |||||
16 | # You should have received a copy of the GNU General Public License along | |||||
17 | # with Koha; if not, write to the Free Software Foundation, Inc., | |||||
18 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |||||
19 | ||||||
20 | 4 4 4 | 41813 21201 165 | use XML::Simple; | |||
21 | 4 4 4 | 692 68285 124 | use XML::LibXML; | |||
22 | 4 4 4 | 37702 138976 40 | use LWP::Simple; | |||
23 | 4 4 4 | 2460 6 82 | use LWP::UserAgent; | |||
24 | 4 4 4 | 3387 10009 370 | use HTTP::Request::Common; | |||
25 | ||||||
26 | 4 4 4 | 19 7 94 | use strict; | |||
27 | 4 4 4 | 17 5 173 | use warnings; | |||
28 | ||||||
29 | 4 4 4 | 13 7 401 | use vars qw($VERSION @ISA @EXPORT); | |||
30 | ||||||
31 | BEGIN { | |||||
32 | 4 | 19 | require Exporter; | |||
33 | 4 | 6 | $VERSION = 0.03; | |||
34 | 4 | 43 | @ISA = qw(Exporter); | |||
35 | 4 | 7387 | @EXPORT = qw( | |||
36 | &get_syndetics_index | |||||
37 | &get_syndetics_summary | |||||
38 | &get_syndetics_toc | |||||
39 | &get_syndetics_editions | |||||
40 | &get_syndetics_excerpt | |||||
41 | &get_syndetics_reviews | |||||
42 | &get_syndetics_anotes | |||||
43 | ); | |||||
44 | } | |||||
45 | ||||||
46 | # package-level variable | |||||
47 | my $parser = XML::LibXML->new(); | |||||
48 | ||||||
49 - 63 | =head1 NAME C4::External::Syndetics - Functions for retrieving Syndetics content in Koha =head1 FUNCTIONS This module provides facilities for retrieving Syndetics.com content in Koha =head2 get_syndetics_summary my $syndetics_summary= &get_syndetics_summary( $isbn ); Get Summary data from Syndetics =cut | |||||
64 | ||||||
65 | sub get_syndetics_index { | |||||
66 | 0 | my ( $isbn,$upc,$oclc ) = @_; | ||||
67 | ||||||
68 | 0 | my $response = _fetch_syndetics_content('INDEX.XML', $isbn, $upc, $oclc); | ||||
69 | ||||||
70 | 0 | my $content = $response->content; | ||||
71 | 0 | my $xmlsimple = XML::Simple->new(); | ||||
72 | 0 | $response = $xmlsimple->XMLin( | ||||
73 | $content, | |||||
74 | ) unless !$content; | |||||
75 | ||||||
76 | 0 | my $syndetics_elements; | ||||
77 | 0 | for my $available_type ('SUMMARY','TOC','FICTION','AWARDS1','SERIES1','SPSUMMARY','SPREVIEW', 'AVPROFILE', 'AVSUMMARY','DBCHAPTER','LJREVIEW','PWREVIEW','SLJREVIEW','CHREVIEW','BLREVIEW','HBREVIEW','KIREVIEW','CRITICASREVIEW','ANOTES') { | ||||
78 | 0 | if (exists $response->{$available_type} && $response->{$available_type} =~ /$available_type/) { | ||||
79 | 0 | $syndetics_elements->{$available_type} = $available_type; | ||||
80 | #warn "RESPONSE: $available_type : $response->{$available_type}"; | |||||
81 | } | |||||
82 | } | |||||
83 | 0 | return $syndetics_elements if $syndetics_elements; | ||||
84 | } | |||||
85 | ||||||
86 | sub get_syndetics_summary { | |||||
87 | 0 | my ( $isbn, $upc, $oclc, $syndetics_elements ) = @_; | ||||
88 | ||||||
89 | 0 | my $summary_type = exists($syndetics_elements->{'AVSUMMARY'}) ? 'AVSUMMARY.XML' : 'SUMMARY.XML'; | ||||
90 | 0 | my $response = _fetch_syndetics_content($summary_type, $isbn, $upc, $oclc); | ||||
91 | 0 | unless ($response->content_type =~ /xml/) { | ||||
92 | 0 | return; | ||||
93 | } | |||||
94 | ||||||
95 | 0 | my $content = $response->content; | ||||
96 | ||||||
97 | 0 | my $summary; | ||||
98 | 0 | eval { | ||||
99 | 0 | my $doc = $parser->parse_string($content); | ||||
100 | 0 | $summary = $doc->findvalue('//Fld520'); | ||||
101 | }; | |||||
102 | 0 | if ($@) { | ||||
103 | 0 | warn "Error parsing Syndetics $summary_type"; | ||||
104 | } | |||||
105 | 0 | return $summary if $summary; | ||||
106 | } | |||||
107 | ||||||
108 | sub get_syndetics_toc { | |||||
109 | 0 | my ( $isbn,$upc,$oclc ) = @_; | ||||
110 | ||||||
111 | 0 | my $response = _fetch_syndetics_content('TOC.XML', $isbn, $upc, $oclc); | ||||
112 | 0 | unless ($response->content_type =~ /xml/) { | ||||
113 | 0 | return; | ||||
114 | } | |||||
115 | ||||||
116 | 0 | my $content = $response->content; | ||||
117 | 0 | my $xmlsimple = XML::Simple->new(); | ||||
118 | 0 | $response = $xmlsimple->XMLin( | ||||
119 | $content, | |||||
120 | forcearray => [ qw(Fld970) ], | |||||
121 | ) unless !$content; | |||||
122 | # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a | |||||
123 | 0 | my $toc; | ||||
124 | 0 0 | $toc = \@{$response->{VarFlds}->{VarDFlds}->{SSIFlds}->{Fld970}} if $response; | ||||
125 | 0 | return $toc if $toc; | ||||
126 | } | |||||
127 | ||||||
128 | sub get_syndetics_excerpt { | |||||
129 | 0 | my ( $isbn,$upc,$oclc ) = @_; | ||||
130 | ||||||
131 | 0 | my $response = _fetch_syndetics_content('DBCHAPTER.XML', $isbn, $upc, $oclc); | ||||
132 | 0 | unless ($response->content_type =~ /xml/) { | ||||
133 | 0 | return; | ||||
134 | } | |||||
135 | ||||||
136 | 0 | my $content = $response->content; | ||||
137 | 0 | my $xmlsimple = XML::Simple->new(); | ||||
138 | 0 | $response = $xmlsimple->XMLin( | ||||
139 | $content, | |||||
140 | forcearray => [ qw(Fld520) ], | |||||
141 | ) unless !$content; | |||||
142 | # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a | |||||
143 | 0 | my $excerpt; | ||||
144 | 0 0 | $excerpt = \@{$response->{VarFlds}->{VarDFlds}->{Notes}->{Fld520}} if $response; | ||||
145 | 0 | return XMLout($excerpt, NoEscape => 1) if $excerpt; | ||||
146 | } | |||||
147 | ||||||
148 | sub get_syndetics_reviews { | |||||
149 | 0 | my ( $isbn,$upc,$oclc,$syndetics_elements ) = @_; | ||||
150 | ||||||
151 | 0 | my @reviews; | ||||
152 | 0 | my $review_sources = [ | ||||
153 | {title => 'Library Journal Review', file => 'LJREVIEW.XML', element => 'LJREVIEW'}, | |||||
154 | {title => 'Publishers Weekly Review', file => 'PWREVIEW.XML', element => 'PWREVIEW'}, | |||||
155 | {title => 'School Library Journal Review', file => 'SLJREVIEW.XML', element => 'SLJREVIEW'}, | |||||
156 | {title => 'CHOICE Review', file => 'CHREVIEW.XML', element => 'CHREVIEW'}, | |||||
157 | {title => 'Booklist Review', file => 'BLREVIEW.XML', element => 'BLREVIEW'}, | |||||
158 | {title => 'Horn Book Review', file => 'HBREVIEW.XML', element => 'HBREVIEW'}, | |||||
159 | {title => 'Kirkus Book Review', file => 'KIREVIEW.XML', element => 'KIREVIEW'}, | |||||
160 | {title => 'Criticas Review', file => 'CRITICASREVIEW.XML', element => 'CRITICASREVIEW'}, | |||||
161 | {title => 'Spanish Review', file => 'SPREVIEW.XML', element => 'SPREVIEW'}, | |||||
162 | ]; | |||||
163 | ||||||
164 | 0 | for my $source (@$review_sources) { | ||||
165 | 0 | if ($syndetics_elements->{$source->{element}} and $source->{element} =~ $syndetics_elements->{$source->{element}}) { | ||||
166 | ||||||
167 | } else { | |||||
168 | #warn "Skipping $source->{element} doesn't match $syndetics_elements->{$source->{element}} \n"; | |||||
169 | 0 | next; | ||||
170 | } | |||||
171 | 0 | my $response = _fetch_syndetics_content($source->{file}, $isbn, $upc, $oclc); | ||||
172 | 0 | unless ($response->content_type =~ /xml/) { | ||||
173 | 0 | next; | ||||
174 | } | |||||
175 | ||||||
176 | 0 | my $content = $response->content; | ||||
177 | ||||||
178 | 0 | eval { | ||||
179 | 0 | my $doc = $parser->parse_string($content); | ||||
180 | ||||||
181 | # note that using findvalue strips any HTML elements embedded | |||||
182 | # in that review. That helps us handle slight differences | |||||
183 | # in the output provided by Syndetics 'old' and 'new' versions | |||||
184 | # of their service and cleans any questionable HTML that | |||||
185 | # may be present in the reviews, but does mean that any | |||||
186 | # <B> and <I> tags used to format the review are also gone. | |||||
187 | 0 | my $result = $doc->findvalue('//Fld520'); | ||||
188 | 0 | push @reviews, {title => $source->{title}, reviews => [ { content => $result } ]} if $result; | ||||
189 | }; | |||||
190 | 0 | if ($@) { | ||||
191 | 0 | warn "Error parsing Syndetics $source->{title} review"; | ||||
192 | } | |||||
193 | } | |||||
194 | 0 | return \@reviews; | ||||
195 | } | |||||
196 | ||||||
197 | sub get_syndetics_editions { | |||||
198 | 0 | my ( $isbn,$upc,$oclc ) = @_; | ||||
199 | ||||||
200 | 0 | my $response = _fetch_syndetics_content('FICTION.XML', $isbn, $upc, $oclc); | ||||
201 | 0 | unless ($response->content_type =~ /xml/) { | ||||
202 | 0 | return; | ||||
203 | } | |||||
204 | ||||||
205 | 0 | my $content = $response->content; | ||||
206 | ||||||
207 | 0 | my $xmlsimple = XML::Simple->new(); | ||||
208 | 0 | $response = $xmlsimple->XMLin( | ||||
209 | $content, | |||||
210 | forcearray => [ qw(Fld020) ], | |||||
211 | ) unless !$content; | |||||
212 | # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a | |||||
213 | 0 | my $similar_items; | ||||
214 | 0 0 | $similar_items = \@{$response->{VarFlds}->{VarDFlds}->{NumbCode}->{Fld020}} if $response; | ||||
215 | 0 | return $similar_items if $similar_items; | ||||
216 | } | |||||
217 | ||||||
218 | sub get_syndetics_anotes { | |||||
219 | 0 | my ( $isbn,$upc,$oclc) = @_; | ||||
220 | ||||||
221 | 0 | my $response = _fetch_syndetics_content('ANOTES.XML', $isbn, $upc, $oclc); | ||||
222 | 0 | unless ($response->content_type =~ /xml/) { | ||||
223 | 0 | return; | ||||
224 | } | |||||
225 | ||||||
226 | 0 | my $content = $response->content; | ||||
227 | ||||||
228 | 0 | my $xmlsimple = XML::Simple->new(); | ||||
229 | 0 | $response = $xmlsimple->XMLin( | ||||
230 | $content, | |||||
231 | forcearray => [ qw(Fld980) ], | |||||
232 | ForceContent => 1, | |||||
233 | ) unless !$content; | |||||
234 | 0 | my @anotes; | ||||
235 | 0 0 | for my $fld980 (@{$response->{VarFlds}->{VarDFlds}->{SSIFlds}->{Fld980}}) { | ||||
236 | # this is absurd, but sometimes this data serializes differently | |||||
237 | 0 | if(ref($fld980->{a}->{content}) eq 'ARRAY') { | ||||
238 | 0 0 | for my $content (@{$fld980->{a}->{content}}) { | ||||
239 | 0 | push @anotes, {content => $content}; | ||||
240 | ||||||
241 | } | |||||
242 | } | |||||
243 | else { | |||||
244 | 0 | push @anotes, {content => $fld980->{a}->{content}}; | ||||
245 | } | |||||
246 | } | |||||
247 | 0 | return \@anotes; | ||||
248 | } | |||||
249 | ||||||
250 | sub _fetch_syndetics_content { | |||||
251 | 0 | my ( $element, $isbn, $upc, $oclc ) = @_; | ||||
252 | ||||||
253 | 0 | $isbn = '' unless defined $isbn; | ||||
254 | 0 | $upc = '' unless defined $upc; | ||||
255 | 0 | $oclc = '' unless defined $oclc; | ||||
256 | ||||||
257 | 0 | my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode'); | ||||
258 | ||||||
259 | 0 | my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/$element&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc"; | ||||
260 | 0 | my $ua = LWP::UserAgent->new; | ||||
261 | 0 | $ua->timeout(10); | ||||
262 | 0 | $ua->env_proxy; | ||||
263 | 0 | my $response = $ua->get($url); | ||||
264 | ||||||
265 | 0 | warn "could not retrieve $url" unless $response->content; | ||||
266 | 0 | return $response; | ||||
267 | ||||||
268 | } | |||||
269 | 1; |