| File: | C4/External/Syndetics.pm |
| Coverage: | 17.5% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | package C4::External::Syndetics; | |||||
| 2 | # Copyright (C) 2006 LibLime | |||||
| 3 | # <jmf at liblime dot com> | |||||
| 4 | # | |||||
| 5 | # This file is part of Koha. | |||||
| 6 | # | |||||
| 7 | # Koha is free software; you can redistribute it and/or modify it under the | |||||
| 8 | # terms of the GNU General Public License as published by the Free Software | |||||
| 9 | # Foundation; either version 2 of the License, or (at your option) any later | |||||
| 10 | # version. | |||||
| 11 | # | |||||
| 12 | # Koha is distributed in the hope that it will be useful, but WITHOUT ANY | |||||
| 13 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR | |||||
| 14 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details. | |||||
| 15 | # | |||||
| 16 | # You should have received a copy of the GNU General Public License along | |||||
| 17 | # with Koha; if not, write to the Free Software Foundation, Inc., | |||||
| 18 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |||||
| 19 | ||||||
| 20 | 3 3 3 | 18641 11493 55 | use XML::Simple; | |||
| 21 | 3 3 3 | 438 33695 74 | use XML::LibXML; | |||
| 22 | 3 3 3 | 40157 75816 44 | use LWP::Simple; | |||
| 23 | 3 3 3 | 1537 6 64 | use LWP::UserAgent; | |||
| 24 | 3 3 3 | 21321 8709 299 | use HTTP::Request::Common; | |||
| 25 | ||||||
| 26 | 3 3 3 | 18 4 68 | use strict; | |||
| 27 | 3 3 3 | 13 4 124 | use warnings; | |||
| 28 | ||||||
| 29 | 3 3 3 | 24 5 347 | use vars qw($VERSION @ISA @EXPORT); | |||
| 30 | ||||||
| 31 | BEGIN { | |||||
| 32 | 3 | 24 | require Exporter; | |||
| 33 | 3 | 6 | $VERSION = 0.03; | |||
| 34 | 3 | 34 | @ISA = qw(Exporter); | |||
| 35 | 3 | 5469 | @EXPORT = qw( | |||
| 36 | &get_syndetics_index | |||||
| 37 | &get_syndetics_summary | |||||
| 38 | &get_syndetics_toc | |||||
| 39 | &get_syndetics_editions | |||||
| 40 | &get_syndetics_excerpt | |||||
| 41 | &get_syndetics_reviews | |||||
| 42 | &get_syndetics_anotes | |||||
| 43 | ); | |||||
| 44 | } | |||||
| 45 | ||||||
| 46 | # package-level variable | |||||
| 47 | my $parser = XML::LibXML->new(); | |||||
| 48 | ||||||
| 49 - 63 | =head1 NAME C4::External::Syndetics - Functions for retrieving Syndetics content in Koha =head1 FUNCTIONS This module provides facilities for retrieving Syndetics.com content in Koha =head2 get_syndetics_summary my $syndetics_summary= &get_syndetics_summary( $isbn ); Get Summary data from Syndetics =cut | |||||
| 64 | ||||||
| 65 | sub get_syndetics_index { | |||||
| 66 | 0 | my ( $isbn,$upc,$oclc ) = @_; | ||||
| 67 | ||||||
| 68 | 0 | my $response = _fetch_syndetics_content('INDEX.XML', $isbn, $upc, $oclc); | ||||
| 69 | ||||||
| 70 | 0 | my $content = $response->content; | ||||
| 71 | 0 | my $xmlsimple = XML::Simple->new(); | ||||
| 72 | 0 | $response = $xmlsimple->XMLin( | ||||
| 73 | $content, | |||||
| 74 | ) unless !$content; | |||||
| 75 | ||||||
| 76 | 0 | my $syndetics_elements; | ||||
| 77 | 0 | for my $available_type ('SUMMARY','TOC','FICTION','AWARDS1','SERIES1','SPSUMMARY','SPREVIEW', 'AVPROFILE', 'AVSUMMARY','DBCHAPTER','LJREVIEW','PWREVIEW','SLJREVIEW','CHREVIEW','BLREVIEW','HBREVIEW','KIREVIEW','CRITICASREVIEW','ANOTES') { | ||||
| 78 | 0 | if (exists $response->{$available_type} && $response->{$available_type} =~ /$available_type/) { | ||||
| 79 | 0 | $syndetics_elements->{$available_type} = $available_type; | ||||
| 80 | #warn "RESPONSE: $available_type : $response->{$available_type}"; | |||||
| 81 | } | |||||
| 82 | } | |||||
| 83 | 0 | return $syndetics_elements if $syndetics_elements; | ||||
| 84 | } | |||||
| 85 | ||||||
| 86 | sub get_syndetics_summary { | |||||
| 87 | 0 | my ( $isbn, $upc, $oclc, $syndetics_elements ) = @_; | ||||
| 88 | ||||||
| 89 | 0 | my $summary_type = exists($syndetics_elements->{'AVSUMMARY'}) ? 'AVSUMMARY.XML' : 'SUMMARY.XML'; | ||||
| 90 | 0 | my $response = _fetch_syndetics_content($summary_type, $isbn, $upc, $oclc); | ||||
| 91 | 0 | unless ($response->content_type =~ /xml/) { | ||||
| 92 | 0 | return; | ||||
| 93 | } | |||||
| 94 | ||||||
| 95 | 0 | my $content = $response->content; | ||||
| 96 | ||||||
| 97 | 0 | my $summary; | ||||
| 98 | 0 | eval { | ||||
| 99 | 0 | my $doc = $parser->parse_string($content); | ||||
| 100 | 0 | $summary = $doc->findvalue('//Fld520'); | ||||
| 101 | }; | |||||
| 102 | 0 | if ($@) { | ||||
| 103 | 0 | warn "Error parsing Syndetics $summary_type"; | ||||
| 104 | } | |||||
| 105 | 0 | return $summary if $summary; | ||||
| 106 | } | |||||
| 107 | ||||||
| 108 | sub get_syndetics_toc { | |||||
| 109 | 0 | my ( $isbn,$upc,$oclc ) = @_; | ||||
| 110 | ||||||
| 111 | 0 | my $response = _fetch_syndetics_content('TOC.XML', $isbn, $upc, $oclc); | ||||
| 112 | 0 | unless ($response->content_type =~ /xml/) { | ||||
| 113 | 0 | return; | ||||
| 114 | } | |||||
| 115 | ||||||
| 116 | 0 | my $content = $response->content; | ||||
| 117 | 0 | my $xmlsimple = XML::Simple->new(); | ||||
| 118 | 0 | $response = $xmlsimple->XMLin( | ||||
| 119 | $content, | |||||
| 120 | forcearray => [ qw(Fld970) ], | |||||
| 121 | ) unless !$content; | |||||
| 122 | # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a | |||||
| 123 | 0 | my $toc; | ||||
| 124 | 0 0 | $toc = \@{$response->{VarFlds}->{VarDFlds}->{SSIFlds}->{Fld970}} if $response; | ||||
| 125 | 0 | return $toc if $toc; | ||||
| 126 | } | |||||
| 127 | ||||||
| 128 | sub get_syndetics_excerpt { | |||||
| 129 | 0 | my ( $isbn,$upc,$oclc ) = @_; | ||||
| 130 | ||||||
| 131 | 0 | my $response = _fetch_syndetics_content('DBCHAPTER.XML', $isbn, $upc, $oclc); | ||||
| 132 | 0 | unless ($response->content_type =~ /xml/) { | ||||
| 133 | 0 | return; | ||||
| 134 | } | |||||
| 135 | ||||||
| 136 | 0 | my $content = $response->content; | ||||
| 137 | 0 | my $xmlsimple = XML::Simple->new(); | ||||
| 138 | 0 | $response = $xmlsimple->XMLin( | ||||
| 139 | $content, | |||||
| 140 | forcearray => [ qw(Fld520) ], | |||||
| 141 | ) unless !$content; | |||||
| 142 | # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a | |||||
| 143 | 0 | my $excerpt; | ||||
| 144 | 0 0 | $excerpt = \@{$response->{VarFlds}->{VarDFlds}->{Notes}->{Fld520}} if $response; | ||||
| 145 | 0 | return XMLout($excerpt, NoEscape => 1) if $excerpt; | ||||
| 146 | } | |||||
| 147 | ||||||
| 148 | sub get_syndetics_reviews { | |||||
| 149 | 0 | my ( $isbn,$upc,$oclc,$syndetics_elements ) = @_; | ||||
| 150 | ||||||
| 151 | 0 | my @reviews; | ||||
| 152 | 0 | my $review_sources = [ | ||||
| 153 | {title => 'Library Journal Review', file => 'LJREVIEW.XML', element => 'LJREVIEW'}, | |||||
| 154 | {title => 'Publishers Weekly Review', file => 'PWREVIEW.XML', element => 'PWREVIEW'}, | |||||
| 155 | {title => 'School Library Journal Review', file => 'SLJREVIEW.XML', element => 'SLJREVIEW'}, | |||||
| 156 | {title => 'CHOICE Review', file => 'CHREVIEW.XML', element => 'CHREVIEW'}, | |||||
| 157 | {title => 'Booklist Review', file => 'BLREVIEW.XML', element => 'BLREVIEW'}, | |||||
| 158 | {title => 'Horn Book Review', file => 'HBREVIEW.XML', element => 'HBREVIEW'}, | |||||
| 159 | {title => 'Kirkus Book Review', file => 'KIREVIEW.XML', element => 'KIREVIEW'}, | |||||
| 160 | {title => 'Criticas Review', file => 'CRITICASREVIEW.XML', element => 'CRITICASREVIEW'}, | |||||
| 161 | {title => 'Spanish Review', file => 'SPREVIEW.XML', element => 'SPREVIEW'}, | |||||
| 162 | ]; | |||||
| 163 | ||||||
| 164 | 0 | for my $source (@$review_sources) { | ||||
| 165 | 0 | if ($syndetics_elements->{$source->{element}} and $source->{element} =~ $syndetics_elements->{$source->{element}}) { | ||||
| 166 | ||||||
| 167 | } else { | |||||
| 168 | #warn "Skipping $source->{element} doesn't match $syndetics_elements->{$source->{element}} \n"; | |||||
| 169 | 0 | next; | ||||
| 170 | } | |||||
| 171 | 0 | my $response = _fetch_syndetics_content($source->{file}, $isbn, $upc, $oclc); | ||||
| 172 | 0 | unless ($response->content_type =~ /xml/) { | ||||
| 173 | 0 | next; | ||||
| 174 | } | |||||
| 175 | ||||||
| 176 | 0 | my $content = $response->content; | ||||
| 177 | ||||||
| 178 | 0 | eval { | ||||
| 179 | 0 | my $doc = $parser->parse_string($content); | ||||
| 180 | ||||||
| 181 | # note that using findvalue strips any HTML elements embedded | |||||
| 182 | # in that review. That helps us handle slight differences | |||||
| 183 | # in the output provided by Syndetics 'old' and 'new' versions | |||||
| 184 | # of their service and cleans any questionable HTML that | |||||
| 185 | # may be present in the reviews, but does mean that any | |||||
| 186 | # <B> and <I> tags used to format the review are also gone. | |||||
| 187 | 0 | my $result = $doc->findvalue('//Fld520'); | ||||
| 188 | 0 | push @reviews, {title => $source->{title}, reviews => [ { content => $result } ]} if $result; | ||||
| 189 | }; | |||||
| 190 | 0 | if ($@) { | ||||
| 191 | 0 | warn "Error parsing Syndetics $source->{title} review"; | ||||
| 192 | } | |||||
| 193 | } | |||||
| 194 | 0 | return \@reviews; | ||||
| 195 | } | |||||
| 196 | ||||||
| 197 | sub get_syndetics_editions { | |||||
| 198 | 0 | my ( $isbn,$upc,$oclc ) = @_; | ||||
| 199 | ||||||
| 200 | 0 | my $response = _fetch_syndetics_content('FICTION.XML', $isbn, $upc, $oclc); | ||||
| 201 | 0 | unless ($response->content_type =~ /xml/) { | ||||
| 202 | 0 | return; | ||||
| 203 | } | |||||
| 204 | ||||||
| 205 | 0 | my $content = $response->content; | ||||
| 206 | ||||||
| 207 | 0 | my $xmlsimple = XML::Simple->new(); | ||||
| 208 | 0 | $response = $xmlsimple->XMLin( | ||||
| 209 | $content, | |||||
| 210 | forcearray => [ qw(Fld020) ], | |||||
| 211 | ) unless !$content; | |||||
| 212 | # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a | |||||
| 213 | 0 | my $similar_items; | ||||
| 214 | 0 0 | $similar_items = \@{$response->{VarFlds}->{VarDFlds}->{NumbCode}->{Fld020}} if $response; | ||||
| 215 | 0 | return $similar_items if $similar_items; | ||||
| 216 | } | |||||
| 217 | ||||||
| 218 | sub get_syndetics_anotes { | |||||
| 219 | 0 | my ( $isbn,$upc,$oclc) = @_; | ||||
| 220 | ||||||
| 221 | 0 | my $response = _fetch_syndetics_content('ANOTES.XML', $isbn, $upc, $oclc); | ||||
| 222 | 0 | unless ($response->content_type =~ /xml/) { | ||||
| 223 | 0 | return; | ||||
| 224 | } | |||||
| 225 | ||||||
| 226 | 0 | my $content = $response->content; | ||||
| 227 | ||||||
| 228 | 0 | my $xmlsimple = XML::Simple->new(); | ||||
| 229 | 0 | $response = $xmlsimple->XMLin( | ||||
| 230 | $content, | |||||
| 231 | forcearray => [ qw(Fld980) ], | |||||
| 232 | ForceContent => 1, | |||||
| 233 | ) unless !$content; | |||||
| 234 | 0 | my @anotes; | ||||
| 235 | 0 0 | for my $fld980 (@{$response->{VarFlds}->{VarDFlds}->{SSIFlds}->{Fld980}}) { | ||||
| 236 | # this is absurd, but sometimes this data serializes differently | |||||
| 237 | 0 | if(ref($fld980->{a}->{content}) eq 'ARRAY') { | ||||
| 238 | 0 0 | for my $content (@{$fld980->{a}->{content}}) { | ||||
| 239 | 0 | push @anotes, {content => $content}; | ||||
| 240 | ||||||
| 241 | } | |||||
| 242 | } | |||||
| 243 | else { | |||||
| 244 | 0 | push @anotes, {content => $fld980->{a}->{content}}; | ||||
| 245 | } | |||||
| 246 | } | |||||
| 247 | 0 | return \@anotes; | ||||
| 248 | } | |||||
| 249 | ||||||
| 250 | sub _fetch_syndetics_content { | |||||
| 251 | 0 | my ( $element, $isbn, $upc, $oclc ) = @_; | ||||
| 252 | ||||||
| 253 | 0 | $isbn = '' unless defined $isbn; | ||||
| 254 | 0 | $upc = '' unless defined $upc; | ||||
| 255 | 0 | $oclc = '' unless defined $oclc; | ||||
| 256 | ||||||
| 257 | 0 | my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode'); | ||||
| 258 | ||||||
| 259 | 0 | my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/$element&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc"; | ||||
| 260 | 0 | my $ua = LWP::UserAgent->new; | ||||
| 261 | 0 | $ua->timeout(10); | ||||
| 262 | 0 | $ua->env_proxy; | ||||
| 263 | 0 | my $response = $ua->get($url); | ||||
| 264 | ||||||
| 265 | 0 | warn "could not retrieve $url" unless $response->content; | ||||
| 266 | 0 | return $response; | ||||
| 267 | ||||||
| 268 | } | |||||
| 269 | 1; | |||||