File Coverage

File:C4/External/Syndetics.pm
Coverage:17.5%

linestmtbrancondsubtimecode
1package C4::External::Syndetics;
2# Copyright (C) 2006 LibLime
3# <jmf at liblime dot com>
4#
5# This file is part of Koha.
6#
7# Koha is free software; you can redistribute it and/or modify it under the
8# terms of the GNU General Public License as published by the Free Software
9# Foundation; either version 2 of the License, or (at your option) any later
10# version.
11#
12# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
13# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License along
17# with Koha; if not, write to the Free Software Foundation, Inc.,
18# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19
20
4
4
4
41813
21201
165
use XML::Simple;
21
4
4
4
692
68285
124
use XML::LibXML;
22
4
4
4
37702
138976
40
use LWP::Simple;
23
4
4
4
2460
6
82
use LWP::UserAgent;
24
4
4
4
3387
10009
370
use HTTP::Request::Common;
25
26
4
4
4
19
7
94
use strict;
27
4
4
4
17
5
173
use warnings;
28
29
4
4
4
13
7
401
use vars qw($VERSION @ISA @EXPORT);
30
31BEGIN {
32
4
19
    require Exporter;
33
4
6
    $VERSION = 0.03;
34
4
43
    @ISA = qw(Exporter);
35
4
7387
    @EXPORT = qw(
36        &get_syndetics_index
37        &get_syndetics_summary
38        &get_syndetics_toc
39        &get_syndetics_editions
40        &get_syndetics_excerpt
41        &get_syndetics_reviews
42        &get_syndetics_anotes
43    );
44}
45
46# package-level variable
47my $parser = XML::LibXML->new();
48
49 - 63
=head1 NAME

C4::External::Syndetics - Functions for retrieving Syndetics content in Koha

=head1 FUNCTIONS

This module provides facilities for retrieving Syndetics.com content in Koha

=head2 get_syndetics_summary

  my $syndetics_summary= &get_syndetics_summary( $isbn );

Get Summary data from Syndetics

=cut
64
65sub get_syndetics_index {
66
0
    my ( $isbn,$upc,$oclc ) = @_;
67
68
0
    my $response = _fetch_syndetics_content('INDEX.XML', $isbn, $upc, $oclc);
69
70
0
    my $content = $response->content;
71
0
    my $xmlsimple = XML::Simple->new();
72
0
    $response = $xmlsimple->XMLin(
73        $content,
74    ) unless !$content;
75
76
0
    my $syndetics_elements;
77
0
    for my $available_type ('SUMMARY','TOC','FICTION','AWARDS1','SERIES1','SPSUMMARY','SPREVIEW', 'AVPROFILE', 'AVSUMMARY','DBCHAPTER','LJREVIEW','PWREVIEW','SLJREVIEW','CHREVIEW','BLREVIEW','HBREVIEW','KIREVIEW','CRITICASREVIEW','ANOTES') {
78
0
        if (exists $response->{$available_type} && $response->{$available_type} =~ /$available_type/) {
79
0
            $syndetics_elements->{$available_type} = $available_type;
80            #warn "RESPONSE: $available_type : $response->{$available_type}";
81        }
82    }
83
0
    return $syndetics_elements if $syndetics_elements;
84}
85
86sub get_syndetics_summary {
87
0
    my ( $isbn, $upc, $oclc, $syndetics_elements ) = @_;
88
89
0
    my $summary_type = exists($syndetics_elements->{'AVSUMMARY'}) ? 'AVSUMMARY.XML' : 'SUMMARY.XML';
90
0
    my $response = _fetch_syndetics_content($summary_type, $isbn, $upc, $oclc);
91
0
    unless ($response->content_type =~ /xml/) {
92
0
        return;
93    }
94
95
0
    my $content = $response->content;
96
97
0
    my $summary;
98
0
    eval {
99
0
        my $doc = $parser->parse_string($content);
100
0
        $summary = $doc->findvalue('//Fld520');
101    };
102
0
    if ($@) {
103
0
        warn "Error parsing Syndetics $summary_type";
104    }
105
0
    return $summary if $summary;
106}
107
108sub get_syndetics_toc {
109
0
    my ( $isbn,$upc,$oclc ) = @_;
110
111
0
    my $response = _fetch_syndetics_content('TOC.XML', $isbn, $upc, $oclc);
112
0
    unless ($response->content_type =~ /xml/) {
113
0
        return;
114    }
115
116
0
    my $content = $response->content;
117
0
    my $xmlsimple = XML::Simple->new();
118
0
    $response = $xmlsimple->XMLin(
119        $content,
120        forcearray => [ qw(Fld970) ],
121    ) unless !$content;
122    # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a
123
0
    my $toc;
124
0
0
    $toc = \@{$response->{VarFlds}->{VarDFlds}->{SSIFlds}->{Fld970}} if $response;
125
0
    return $toc if $toc;
126}
127
128sub get_syndetics_excerpt {
129
0
    my ( $isbn,$upc,$oclc ) = @_;
130
131
0
    my $response = _fetch_syndetics_content('DBCHAPTER.XML', $isbn, $upc, $oclc);
132
0
    unless ($response->content_type =~ /xml/) {
133
0
        return;
134    }
135
136
0
    my $content = $response->content;
137
0
    my $xmlsimple = XML::Simple->new();
138
0
    $response = $xmlsimple->XMLin(
139        $content,
140        forcearray => [ qw(Fld520) ],
141    ) unless !$content;
142    # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a
143
0
    my $excerpt;
144
0
0
    $excerpt = \@{$response->{VarFlds}->{VarDFlds}->{Notes}->{Fld520}} if $response;
145
0
    return XMLout($excerpt, NoEscape => 1) if $excerpt;
146}
147
148sub get_syndetics_reviews {
149
0
    my ( $isbn,$upc,$oclc,$syndetics_elements ) = @_;
150
151
0
    my @reviews;
152
0
    my $review_sources = [
153    {title => 'Library Journal Review', file => 'LJREVIEW.XML', element => 'LJREVIEW'},
154    {title => 'Publishers Weekly Review', file => 'PWREVIEW.XML', element => 'PWREVIEW'},
155    {title => 'School Library Journal Review', file => 'SLJREVIEW.XML', element => 'SLJREVIEW'},
156    {title => 'CHOICE Review', file => 'CHREVIEW.XML', element => 'CHREVIEW'},
157    {title => 'Booklist Review', file => 'BLREVIEW.XML', element => 'BLREVIEW'},
158    {title => 'Horn Book Review', file => 'HBREVIEW.XML', element => 'HBREVIEW'},
159    {title => 'Kirkus Book Review', file => 'KIREVIEW.XML', element => 'KIREVIEW'},
160    {title => 'Criticas Review', file => 'CRITICASREVIEW.XML', element => 'CRITICASREVIEW'},
161    {title => 'Spanish Review', file => 'SPREVIEW.XML', element => 'SPREVIEW'},
162    ];
163
164
0
    for my $source (@$review_sources) {
165
0
        if ($syndetics_elements->{$source->{element}} and $source->{element} =~ $syndetics_elements->{$source->{element}}) {
166
167        } else {
168            #warn "Skipping $source->{element} doesn't match $syndetics_elements->{$source->{element}} \n";
169
0
            next;
170        }
171
0
        my $response = _fetch_syndetics_content($source->{file}, $isbn, $upc, $oclc);
172
0
        unless ($response->content_type =~ /xml/) {
173
0
            next;
174        }
175
176
0
        my $content = $response->content;
177
178
0
        eval {
179
0
            my $doc = $parser->parse_string($content);
180
181            # note that using findvalue strips any HTML elements embedded
182            # in that review. That helps us handle slight differences
183            # in the output provided by Syndetics 'old' and 'new' versions
184            # of their service and cleans any questionable HTML that
185            # may be present in the reviews, but does mean that any
186            # <B> and <I> tags used to format the review are also gone.
187
0
            my $result = $doc->findvalue('//Fld520');
188
0
            push @reviews, {title => $source->{title}, reviews => [ { content => $result } ]} if $result;
189        };
190
0
        if ($@) {
191
0
            warn "Error parsing Syndetics $source->{title} review";
192        }
193    }
194
0
    return \@reviews;
195}
196
197sub get_syndetics_editions {
198
0
    my ( $isbn,$upc,$oclc ) = @_;
199
200
0
    my $response = _fetch_syndetics_content('FICTION.XML', $isbn, $upc, $oclc);
201
0
    unless ($response->content_type =~ /xml/) {
202
0
        return;
203    }
204
205
0
    my $content = $response->content;
206
207
0
    my $xmlsimple = XML::Simple->new();
208
0
    $response = $xmlsimple->XMLin(
209        $content,
210        forcearray => [ qw(Fld020) ],
211    ) unless !$content;
212    # manipulate response USMARC VarFlds VarDFlds Notes Fld520 a
213
0
    my $similar_items;
214
0
0
    $similar_items = \@{$response->{VarFlds}->{VarDFlds}->{NumbCode}->{Fld020}} if $response;
215
0
    return $similar_items if $similar_items;
216}
217
218sub get_syndetics_anotes {
219
0
    my ( $isbn,$upc,$oclc) = @_;
220
221
0
    my $response = _fetch_syndetics_content('ANOTES.XML', $isbn, $upc, $oclc);
222
0
    unless ($response->content_type =~ /xml/) {
223
0
        return;
224    }
225
226
0
    my $content = $response->content;
227
228
0
    my $xmlsimple = XML::Simple->new();
229
0
    $response = $xmlsimple->XMLin(
230        $content,
231        forcearray => [ qw(Fld980) ],
232        ForceContent => 1,
233    ) unless !$content;
234
0
    my @anotes;
235
0
0
    for my $fld980 (@{$response->{VarFlds}->{VarDFlds}->{SSIFlds}->{Fld980}}) {
236        # this is absurd, but sometimes this data serializes differently
237
0
        if(ref($fld980->{a}->{content}) eq 'ARRAY') {
238
0
0
            for my $content (@{$fld980->{a}->{content}}) {
239
0
                push @anotes, {content => $content};
240
241            }
242        }
243        else {
244
0
            push @anotes, {content => $fld980->{a}->{content}};
245        }
246    }
247
0
    return \@anotes;
248}
249
250sub _fetch_syndetics_content {
251
0
    my ( $element, $isbn, $upc, $oclc ) = @_;
252
253
0
    $isbn = '' unless defined $isbn;
254
0
    $upc = '' unless defined $upc;
255
0
    $oclc = '' unless defined $oclc;
256
257
0
    my $syndetics_client_code = C4::Context->preference('SyndeticsClientCode');
258
259
0
    my $url = "http://www.syndetics.com/index.aspx?isbn=$isbn/$element&client=$syndetics_client_code&type=xw10&upc=$upc&oclc=$oclc";
260
0
    my $ua = LWP::UserAgent->new;
261
0
    $ua->timeout(10);
262
0
    $ua->env_proxy;
263
0
    my $response = $ua->get($url);
264
265
0
    warn "could not retrieve $url" unless $response->content;
266
0
    return $response;
267
268}
2691;