File Coverage

File:	C4/Ris.pm
Coverage:	6.5%

line	stmt	bran	cond	sub	time	code
1						package C4::Ris;
2
3						# Original script :
4						## marc2ris: converts MARC21 and UNIMARC datasets to RIS format
5						## See comments below for compliance with other MARC dialects
6						##
7						## usage: perl marc2ris < infile.marc > outfile.ris
8						##
9						## Dependencies: perl 5.6.0 or later
10						## MARC::Record
11						## MARC::Charset
12						##
13						## markus@mhoenicka.de 2002-11-16
14
15						## This program is free software; you can redistribute it and/or modify
16						## it under the terms of the GNU General Public License as published by
17						## the Free Software Foundation; either version 2 of the License, or
18						## (at your option) any later version.
19						##
20						## This program is distributed in the hope that it will be useful,
21						## but WITHOUT ANY WARRANTY; without even the implied warranty of
22						## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23						## GNU General Public License for more details.
24
25						## You should have received a copy of the GNU General Public License
26						## along with this program; if not, write to the Free Software
27						## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28
29						## Some background about MARC as understood by this script
30						## The default input format used in this script is MARC21, which
31						## superseded USMARC and CANMARC. The specification can be found at:
32						## http://lcweb.loc.gov/marc/
33						## UNIMARC follows the specification at:
34						## http://www.ifla.org/VI/3/p1996-1/sec-uni.htm
35						## UKMARC support is a bit shaky because there is no specification available
36						## for free. The wisdom used in this script was taken from a PDF document
37						## comparing UKMARC to MARC21 found at:
38						## www.bl.uk/services/bibliographic/marcchange.pdf
39
40
41						# Modified 2008 by BibLibre for Koha
42						# Modified 2011 by Catalyst
43						# Modified 2011 by Equinox Software, Inc.
44						#
45						# This file is part of Koha.
46						#
47						# Koha is free software; you can redistribute it and/or modify it under the
48						# terms of the GNU General Public License as published by the Free Software
49						# Foundation; either version 2 of the License, or (at your option) any later
50						# version.
51						#
52						# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
53						# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
54						# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
55						#
56						# You should have received a copy of the GNU General Public License along with
57						# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
58						# Suite 330, Boston, MA 02111-1307 USA
59						#
60						#
61
62						#use strict;
63						#use warnings; FIXME - Bug 2505
64
65	4 4 4			4	42880 28 28620	use vars qw($VERSION @ISA @EXPORT);
66
67						# set the version for version checking
68						$VERSION = 3.00;
69
70						@ISA = qw(Exporter);
71
72						# only export API methods
73
74						@EXPORT = qw(
75						&marc2ris
76						);
77
78
79 - 87						=head1 marc2bibtex - Convert from UNIMARC to RIS my ($ris) = marc2ris($record); Returns a RIS scalar C<$record> - a MARC::Record object =cut
88
89						sub marc2ris {
90	0			0	0	my ($record) = @_;
91	0				0	my $output;
92
93	0				0	my $marcflavour = C4::Context->preference("marcflavour");
94	0				0	my $intype = lc($marcflavour);
95	0				0	my $marcprint = 0; # Debug flag;
96
97						# Let's redirect stdout
98	0				0	open my $oldout, ">&STDOUT";
99	0				0	my $outvar;
100	0				0	close STDOUT;
101	0				0	open STDOUT,'>', \$outvar;
102
103
104						## First we should check the character encoding. This may be
105						## MARC-8 or UTF-8. The former is indicated by a blank, the latter
106						## by 'a' at position 09 (zero-based) of the leader
107	0				0	my $leader = $record->leader();
108	0	0			0	if ($intype eq "marc21") {
109	0	0			0	if ($leader =~ /^.{9}a/) {
110	0	0			0	print "<marc>---\r\n<marc>UTF-8 data\r\n" if $marcprint;
111	0				0	$utf = 1;
112						}
113						else {
114	0	0			0	print "<marc>---\r\n<marc>MARC-8 data\r\n" if $marcprint;
115						}
116						}
117						## else: other MARC formats do not specify the character encoding
118						## we assume it's not UTF-8
119
120						## start RIS dataset
121	0				0	&print_typetag($leader);
122
123						## retrieve all author fields and collect them in a list
124	0				0	my @author_fields;
125
126	0	0			0	if ($intype eq "unimarc") {
127						## Fields 700, 701, and 702 can contain author names
128	0				0	@author_fields = ($record->field('700'), $record->field('701'), $record->field('702'));
129						}
130						else { ## marc21, ukmarc
131						## Field 100 sometimes carries main author
132						## Field(s) 700 carry added entries - personal names
133	0				0	@author_fields = ($record->field('100'), $record->field('700'));
134						}
135
136						## loop over all author fields
137	0				0	foreach my $field (@author_fields) {
138	0	0			0	if (length($field)) {
139	0				0	my $author = &get_author($field);
140	0				0	print "AU - ",&charconv($author),"\r\n";
141						}
142						}
143
144						# ToDo: should we specify anonymous as author if we didn't find
145						# one? or use one of the corporate/meeting names below?
146
147						## add corporate names or meeting names as editors ??
148	0				0	my @editor_fields;
149
150	0	0			0	if ($intype eq "unimarc") {
151						## Fields 710, 711, and 712 can carry corporate names
152						## Field(s) 720, 721, 722, 730 have additional candidates
153	0				0	@editor_fields = ($record->field('710'), $record->field('711'), $record->field('712'), $record->field('720'), $record->field('721'), $record->field('722'), $record->field('730'));
154						}
155						else { ## marc21, ukmarc
156						## Fields 110 and 111 carry the main entries - corporate name and
157						## meeting name, respectively
158						## Field(s) 710, 711 carry added entries - personal names
159	0				0	@editor_fields = ($record->field('110'), $record->field('111'), $record->field('710'), $record->field('711'));
160						}
161
162						## loop over all editor fields
163	0				0	foreach my $field (@editor_fields) {
164	0	0			0	if (length($field)) {
165	0				0	my $editor = &get_editor($field);
166	0				0	print "ED - ",&charconv($editor),"\r\n";
167						}
168						}
169
170						## get info from the title field
171	0	0			0	if ($intype eq "unimarc") {
172	0				0	&print_title($record->field('200'));
173						}
174						else { ## marc21, ukmarc
175	0				0	&print_title($record->field('245'));
176						}
177
178						## series title
179	0	0			0	if ($intype eq "unimarc") {
180	0				0	&print_stitle($record->field('225'));
181						}
182						else { ## marc21, ukmarc
183	0				0	&print_stitle($record->field('490'));
184						}
185
186						## ISBN/ISSN
187	0	0 0			0	if ($intype eq "unimarc") {
188	0				0	&print_isbn($record->field('010'));
189	0				0	&print_issn($record->field('011'));
190						}
191						elsif ($intype eq "ukmarc") {
192	0				0	&print_isbn($record->field('021'));
193						## this is just an assumption
194	0				0	&print_issn($record->field('022'));
195						}
196						else { ## assume marc21
197	0				0	&print_isbn($record->field('020'));
198	0				0	&print_issn($record->field('022'));
199						}
200
201	0	0			0	if ($intype eq "marc21") {
202	0				0	&print_loc_callno($record->field('050'));
203	0				0	&print_dewey($record->field('082'));
204						}
205						## else: unimarc, ukmarc do not seem to store call numbers?
206
207						## publication info
208	0	0			0	if ($intype eq "unimarc") {
209	0				0	&print_pubinfo($record->field('210'));
210						}
211						else { ## marc21, ukmarc
212	0				0	&print_pubinfo($record->field('260'));
213						}
214
215						## 6XX fields contain KW candidates. We add all of them to a
216						## hash to eliminate duplicates
217	0				0	my %kwpool;
218
219	0	0 0			0	if ($intype eq "unimarc") {
220	0				0	foreach ('600', '601', '602', '604', '605', '606','607', '608', '610', '615', '620', '660'. '661', '670', '675', '676', '680', '686') {
221	0				0	&get_keywords(\%kwpool, "$_",$record->field($_));
222						}
223						}
224						elsif ($intype eq "ukmarc") {
225	0				0	foreach ('600', '610', '611', '630', '650', '651','653', '655', '660', '661', '668', '690', '691', '692', '695') {
226	0				0	&get_keywords(\%kwpool, "$_",$record->field($_));
227						}
228						}
229						else { ## assume marc21
230	0				0	foreach ('600', '610', '611', '630', '650', '651','653', '654', '655', '656', '657', '658') {
231	0				0	&get_keywords(\%kwpool, "$_",$record->field($_));
232						}
233						}
234
235						## print all keywords found in the hash. The value of each hash
236						## entry is the number of occurrences, but we're not really interested
237						## in that and rather print the key
238	0				0	while (my ($key, $value) = each %kwpool) {
239	0				0	print "KW - ", &charconv($key), "\r\n";
240						}
241
242						## 5XX have various candidates for notes and abstracts. We pool
243						## all notes-like stuff in one list.
244	0				0	my @notepool;
245
246						## these fields have notes candidates
247	0	0 0			0	if ($intype eq "unimarc") {
248	0				0	foreach ('300', '301', '302', '303', '304', '305', '306', '307', '308', '310', '311', '312', '313', '314', '315', '316', '317', '318', '320', '321', '322', '323', '324', '325', '326', '327', '328', '332', '333', '336', '337', '345') {
249	0				0	&pool_subx(\@notepool, $_, $record->field($_));
250						}
251						}
252						elsif ($intype eq "ukmarc") {
253	0				0	foreach ('500', '501', '502', '503', '504', '505', '506', '508', '514', '515', '516', '521', '524', '525', '528', '530', '531', '532', '533', '534', '535', '537', '538', '540', '541', '542', '544', '554', '555', '556', '557', '561', '563', '580', '583', '584', '586') {
254	0				0	&pool_subx(\@notepool, $_, $record->field($_));
255						}
256						}
257						else { ## assume marc21
258	0				0	foreach ('500', '501', '502', '504', '505', '506', '507', '508', '510', '511', '513', '514', '515', '516', '518', '521', '522', '524', '525', '526', '530', '533', '534', '535') {
259	0				0	&pool_subx(\@notepool, $_, $record->field($_));
260						}
261						}
262
263	0				0	my $allnotes = join "; ", @notepool;
264
265	0	0			0	if (length($allnotes) > 0) {
266	0				0	print "N1 - ", &charconv($allnotes), "\r\n";
267						}
268
269						## 320/520 have the abstract
270	0	0 0			0	if ($intype eq "unimarc") {
271	0				0	&print_abstract($record->field('320'));
272						}
273						elsif ($intype eq "ukmarc") {
274	0				0	&print_abstract($record->field('512'), $record->field('513'));
275						}
276						else { ## assume marc21
277	0				0	&print_abstract($record->field('520'));
278						}
279
280						# 856u has the URI
281	0	0			0	if ($record->field('856')) {
282	0				0	print_uri($record->field('856'));
283						}
284
285						## end RIS dataset
286	0				0	print "ER - \r\n";
287
288						# Let's re-redirect stdout
289	0				0	close STDOUT;
290	0				0	open STDOUT, ">&", $oldout;
291
292	0				0	return $outvar;
293
294						}
295
296
297						##********************************************************************
298						## print_typetag(): prints the first line of a RIS dataset including
299						## the preceeding newline
300						## Argument: the leader of a MARC dataset
301						## Returns: the value at leader position 06
302						##********************************************************************
303						sub print_typetag {
304	2			2	379488	my ($leader)= @_;
305						## the keys of typehash are the allowed values at position 06
306						## of the leader of a MARC record, the values are the RIS types
307						## that might appropriately represent these types.
308	2				22	my %ustypehash = (
309						"a" => "BOOK",
310						"c" => "MUSIC",
311						"d" => "MUSIC",
312						"e" => "MAP",
313						"f" => "MAP",
314						"g" => "ADVS",
315						"i" => "SOUND",
316						"j" => "SOUND",
317						"k" => "ART",
318						"m" => "DATA",
319						"o" => "GEN",
320						"p" => "GEN",
321						"r" => "ART",
322						"t" => "GEN",
323						);
324
325	2				15	my %unitypehash = (
326						"a" => "BOOK",
327						"b" => "BOOK",
328						"c" => "MUSIC",
329						"d" => "MUSIC",
330						"e" => "MAP",
331						"f" => "MAP",
332						"g" => "ADVS",
333						"i" => "SOUND",
334						"j" => "SOUND",
335						"k" => "ART",
336						"l" => "ELEC",
337						"m" => "ADVS",
338						"r" => "ART",
339						);
340
341						## The type of a MARC record is found at position 06 of the leader
342	2				4	my $typeofrecord = substr($leader, 6, 1);
343
344						## ToDo: for books, field 008 positions 24-27 might have a few more
345						## hints
346
347	2				2	my %typehash;
348
349						## the ukmarc here is just a guess
350	2	50 50	33		24	if ($intype eq "marc21" \|\| $intype eq "ukmarc") {
351	0				0	%typehash = %ustypehash;
352						}
353						elsif ($intype eq "unimarc") {
354	0				0	%typehash = %unitypehash;
355						}
356						else {
357						## assume MARC21 as default
358	2				41	%typehash = %ustypehash;
359						}
360
361	2	50			10	if (!exists $typehash{$typeofrecord}) {
362	2				755	print "TY - BOOK\r\n"; ## most reasonable default
363	2	50			9	warn ("no type found - assume BOOK") if $marcprint;
364						}
365						else {
366	0				0	print "TY - $typehash{$typeofrecord}\r\n";
367						}
368
369						## use $typeofrecord as the return value, just in case
370	2				34	$typeofrecord;
371						}
372
373						##********************************************************************
374						## normalize_author(): normalizes an authorname
375						## Arguments: authorname subfield a
376						## authorname subfield b
377						## authorname subfield c
378						## name type if known: 0=direct order
379						## 1=only surname or full name in
380						## inverted order
381						## 3=family, clan, dynasty name
382						## Returns: the normalized authorname
383						##********************************************************************
384						sub normalize_author {
385	0			0	0	my($rawauthora, $rawauthorb, $rawauthorc, $nametype) = @_;
386
387	0	0 0 0			0	if ($nametype == 0) {
388						# ToDo: convert every input to Last[,(F.\|First)[ (M.\|Middle)[,Suffix]]]
389	0	0			0	warn("name >>$rawauthora<< in direct order - leave as is") if $marcprint;
390	0				0	return $rawauthora;
391						}
392						elsif ($nametype == 1) {
393						## start munging subfield a (the real name part)
394						## remove spaces after separators
395	0				0	$rawauthora =~ s%([,.]+) *%$1%g;
396
397						## remove trailing separators after spaces
398	0				0	$rawauthora =~ s% [,;:/]$%%;
399
400						## remove periods after a non-abbreviated name
401	0				0	$rawauthora =~ s%(\w{2,})\.%$1%g;
402
403						## start munging subfield b (something like the suffix)
404						## remove trailing separators after spaces
405	0				0	$rawauthorb =~ s% [,;:/]$%%;
406
407						## we currently ignore subfield c until someone complains
408	0	0			0	if (length($rawauthorb) > 0) {
409	0				0	return join ",", ($rawauthora, $rawauthorb);
410						}
411						else {
412	0				0	return $rawauthora;
413						}
414						}
415						elsif ($nametype == 3) {
416	0				0	return $rawauthora;
417						}
418						}
419
420						##********************************************************************
421						## get_author(): gets authorname info from MARC fields 100, 700
422						## Argument: field (100 or 700)
423						## Returns: an author string in the format found in the record
424						##********************************************************************
425						sub get_author {
426	0			0	0	my ($authorfield) = @_;
427	0				0	my ($indicator);
428
429						## the sequence of the name parts is encoded either in indicator
430						## 1 (marc21) or 2 (unimarc)
431	0	0			0	if ($intype eq "unimarc") {
432	0				0	$indicator = 2;
433						}
434						else { ## assume marc21
435	0				0	$indicator = 1;
436						}
437
438	0	0			0	print "<marc>:Author(Ind$indicator): ", $authorfield->indicator("$indicator"),"\r\n" if $marcprint;
439	0	0			0	print "<marc>:Author(\$a): ", $authorfield->subfield('a'),"\r\n" if $marcprint;
440	0	0			0	print "<marc>:Author(\$b): ", $authorfield->subfield('b'),"\r\n" if $marcprint;
441	0	0			0	print "<marc>:Author(\$c): ", $authorfield->subfield('c'),"\r\n" if $marcprint;
442	0	0			0	print "<marc>:Author(\$h): ", $authorfield->subfield('h'),"\r\n" if $marcprint;
443	0	0			0	if ($intype eq "ukmarc") {
444	0				0	my $authorname = $authorfield->subfield('a') . "," . $authorfield->subfield('h');
445	0				0	normalize_author($authorname, $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
446						}
447						else {
448	0				0	normalize_author($authorfield->subfield('a'), $authorfield->subfield('b'), $authorfield->subfield('c'), $authorfield->indicator("$indicator"));
449						}
450						}
451
452						##********************************************************************
453						## get_editor(): gets editor info from MARC fields 110, 111, 710, 711
454						## Argument: field (110, 111, 710, or 711)
455						## Returns: an author string in the format found in the record
456						##********************************************************************
457						sub get_editor {
458	0			0	0	my ($editorfield) = @_;
459
460	0	0			0	if (!$editorfield) {
461	0				0	return;
462						}
463						else {
464	0	0			0	print "<marc>Editor(\$a): ", $editorfield->subfield('a'),"\r\n" if $marcprint;
465	0	0			0	print "<marc>Editor(\$b): ", $editorfield->subfield('b'),"\r\n" if $marcprint;
466	0	0			0	print "<marc>editor(\$c): ", $editorfield->subfield('c'),"\r\n" if $marcprint;
467	0				0	return $editorfield->subfield('a');
468						}
469						}
470
471						##********************************************************************
472						## print_title(): gets info from MARC field 245
473						## Arguments: field (245)
474						## Returns:
475						##********************************************************************
476						sub print_title {
477	2			2	4	my ($titlefield) = @_;
478	2	50			6	if (!$titlefield) {
479	2	50			6	print "<marc>empty title field (245)\r\n" if $marcprint;
480	2	50			21	warn("empty title field (245)") if $marcprint;
481						}
482						else {
483	0	0			0	print "<marc>Title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
484	0	0			0	print "<marc>Title(\$b): ",$titlefield->subfield('b'),"\r\n" if $marcprint;
485	0	0			0	print "<marc>Title(\$c): ",$titlefield->subfield('c'),"\r\n" if $marcprint;
486
487						## The title is usually written in a very odd notation. The title
488						## proper ($a) often ends with a space followed by a separator like
489						## a slash or a colon. The subtitle ($b) doesn't start with a space
490						## so simple concatenation looks odd. We have to conditionally remove
491						## the separator and make sure there's a space between title and
492						## subtitle
493
494	0				0	my $clean_title = $titlefield->subfield('a');
495
496	0				0	my $clean_subtitle = $titlefield->subfield('b');
497	0				0	$clean_title =~ s% *[/:;.]$%%;
498	0				0	$clean_subtitle =~ s%^ (.) *[/:;.]$%$1%;
499
500	0	0	0		0	if (length($clean_title) > 0
501						\|\| (length($clean_subtitle) > 0 && $intype ne "unimarc")) {
502	0				0	print "TI - ", &charconv($clean_title);
503
504						## subfield $b is relevant only for marc21/ukmarc
505	0	0	0		0	if (length($clean_subtitle) > 0 && $intype ne "unimarc") {
506	0				0	print ": ",&charconv($clean_subtitle);
507						}
508	0				0	print "\r\n";
509						}
510
511						## The statement of responsibility is just this: horrors. There is
512						## no formal definition how authors, editors and the like should
513						## be written and designated. The field is free-form and resistant
514						## to all parsing efforts, so this information is lost on me
515						}
516						}
517
518						##********************************************************************
519						## print_stitle(): prints info from series title field
520						## Arguments: field
521						## Returns:
522						##********************************************************************
523						sub print_stitle {
524	2			2	4	my ($titlefield) = @_;
525
526	2	50			7	if (!$titlefield) {
527	2	50			17	print "<marc>empty series title field\r\n" if $marcprint;
528						}
529						else {
530	0	0			0	print "<marc>Series title(\$a): ",$titlefield->subfield('a'),"\r\n" if $marcprint;
531	0				0	my $clean_title = $titlefield->subfield('a');
532
533	0				0	$clean_title =~ s% *[/:;.]$%%;
534
535	0	0			0	if (length($clean_title) > 0) {
536	0				0	print "T2 - ", &charconv($clean_title),"\r\n";
537						}
538
539	0	0			0	if ($intype eq "unimarc") {
540	0	0			0	print "<marc>Series vol(\$v): ",$titlefield->subfield('v'),"\r\n" if $marcprint;
541	0	0			0	if (length($titlefield->subfield('v')) > 0) {
542	0				0	print "VL - ", &charconv($titlefield->subfield('v')),"\r\n";
543						}
544						}
545						}
546						}
547
548						##********************************************************************
549						## print_isbn(): gets info from MARC field 020
550						## Arguments: field (020)
551						##********************************************************************
552						sub print_isbn {
553	0			0	0	my($isbnfield) = @_;
554
555	0	0	0		0	if (!$isbnfield \|\| length ($isbnfield->subfield('a')) == 0) {
556	0	0			0	print "<marc>no isbn found (020\$a)\r\n" if $marcprint;
557	0	0			0	warn("no isbn found") if $marcprint;
558						}
559						else {
560	0	0			0	if (length ($isbnfield->subfield('a')) < 10) {
561	0	0			0	print "<marc>truncated isbn (020\$a)\r\n" if $marcprint;
562	0	0			0	warn("truncated isbn") if $marcprint;
563						}
564
565	0				0	my $isbn = substr($isbnfield->subfield('a'), 0, 10);
566	0				0	print "SN - ", &charconv($isbn), "\r\n";
567						}
568						}
569
570						##********************************************************************
571						## print_issn(): gets info from MARC field 022
572						## Arguments: field (022)
573						##********************************************************************
574						sub print_issn {
575	0			0	0	my($issnfield) = @_;
576
577	0	0	0		0	if (!$issnfield \|\| length ($issnfield->subfield('a')) == 0) {
578	0	0			0	print "<marc>no issn found (022\$a)\r\n" if $marcprint;
579	0	0			0	warn("no issn found") if $marcprint;
580						}
581						else {
582	0	0			0	if (length ($issnfield->subfield('a')) < 9) {
583	0	0			0	print "<marc>truncated issn (022\$a)\r\n" if $marcprint;
584	0	0			0	warn("truncated issn") if $marcprint;
585						}
586
587	0				0	my $issn = substr($issnfield->subfield('a'), 0, 9);
588	0				0	print "SN - ", &charconv($issn), "\r\n";
589						}
590						}
591
592						###
593						# print_uri() prints info from 856 u
594						###
595						sub print_uri {
596	0			0	0	my @f856s = @_;
597
598	0				0	foreach my $f856 (@f856s) {
599	0	0			0	if (my $uri = $f856->subfield('u')) {
600	0				0	print "UR - ", charconv($uri), "\r\n";
601						}
602						}
603						}
604
605						##********************************************************************
606						## print_loc_callno(): gets info from MARC field 050
607						## Arguments: field (050)
608						##********************************************************************
609						sub print_loc_callno {
610	0			0	0	my($callnofield) = @_;
611
612	0	0	0		0	if (!$callnofield \|\| length ($callnofield->subfield('a')) == 0) {
613	0	0			0	print "<marc>no LOC call number found (050\$a)\r\n" if $marcprint;
614	0	0			0	warn("no LOC call number found") if $marcprint;
615						}
616						else {
617	0				0	print "AV - ", &charconv($callnofield->subfield('a')), " ", &charconv($callnofield->subfield('b')), "\r\n";
618						}
619						}
620
621						##********************************************************************
622						## print_dewey(): gets info from MARC field 082
623						## Arguments: field (082)
624						##********************************************************************
625						sub print_dewey {
626	0			0	0	my($deweyfield) = @_;
627
628	0	0	0		0	if (!$deweyfield \|\| length ($deweyfield->subfield('a')) == 0) {
629	0	0			0	print "<marc>no Dewey number found (082\$a)\r\n" if $marcprint;
630	0	0			0	warn("no Dewey number found") if $marcprint;
631						}
632						else {
633	0				0	print "U1 - ", &charconv($deweyfield->subfield('a')), " ", &charconv($deweyfield->subfield('2')), "\r\n";
634						}
635						}
636
637						##********************************************************************
638						## print_pubinfo(): gets info from MARC field 260
639						## Arguments: field (260)
640						##********************************************************************
641						sub print_pubinfo {
642	0			0	0	my($pubinfofield) = @_;
643
644	0	0			0	if (!$pubinfofield) {
645	0	0			0	print "<marc>no publication information found (260)\r\n" if $marcprint;
646	0	0			0	warn("no publication information found") if $marcprint;
647						}
648						else {
649						## the following information is available in MARC21:
650						## $a place -> CY
651						## $b publisher -> PB
652						## $c date -> PY
653						## the corresponding subfields for UNIMARC:
654						## $a place -> CY
655						## $c publisher -> PB
656						## $d date -> PY
657
658						## all of them are repeatable. We pool all places into a
659						## comma-separated list in CY. We also pool all publishers
660						## into a comma-separated list in PB. We break the rule with
661						## the date field because this wouldn't make much sense. In
662						## this case, we use the first occurrence for PY, the second
663						## for Y2, and ignore the rest
664
665	0				0	my @pubsubfields = $pubinfofield->subfields();
666	0				0	my @cities;
667	0				0	my @publishers;
668	0				0	my $pycounter = 0;
669
670	0				0	my $pubsub_place;
671	0				0	my $pubsub_publisher;
672	0				0	my $pubsub_date;
673
674	0	0			0	if ($intype eq "unimarc") {
675	0				0	$pubsub_place = "a";
676	0				0	$pubsub_publisher = "c";
677	0				0	$pubsub_date = "d";
678						}
679						else { ## assume marc21
680	0				0	$pubsub_place = "a";
681	0				0	$pubsub_publisher = "b";
682	0				0	$pubsub_date = "c";
683						}
684
685						## loop over all subfield list entries
686	0				0	for my $tuple (@pubsubfields) {
687						## each tuple consists of the subfield code and the value
688	0	0 0 0			0	if (@$tuple[0] eq $pubsub_place) {
689						## strip any trailing crap
690	0				0	$_ = @$tuple[1];
691	0				0	s% *[,;:/]$%%;
692						## pool all occurrences in a list
693	0				0	push (@cities, $_);
694						}
695						elsif (@$tuple[0] eq $pubsub_publisher) {
696						## strip any trailing crap
697	0				0	$_ = @$tuple[1];
698	0				0	s% *[,;:/]$%%;
699						## pool all occurrences in a list
700	0				0	push (@publishers, $_);
701						}
702						elsif (@$tuple[0] eq $pubsub_date) {
703						## the dates are free-form, so we want to extract
704						## a four-digit year and leave the rest as
705						## "other info"
706	0				0	$protoyear = @$tuple[1];
707	0	0			0	print "<marc>Year (260\$c): $protoyear\r\n" if $marcprint;
708
709						## strip any separator chars at the end
710	0				0	$protoyear =~ s% [\.;:/]$%%;
711
712						## isolate a four-digit year. We discard anything
713						## preceeding the year, but keep everything after
714						## the year as other info.
715	0				0	$protoyear =~ s%\D([0-9\-]{4})(.)%$1///$2%;
716
717						## check what we've got. If there is no four-digit
718						## year, make it up. If digits are replaced by '-',
719						## replace those with 0s
720
721	0	0			0	if (index($protoyear, "/") == 4) {
722						## have year info
723						## replace all '-' in the four-digit year
724						## by '0'
725	0				0	substr($protoyear,0,4) =~ s!-!0!g;
726						}
727						else {
728						## have no year info
729	0	0			0	print "<marc>no four-digit year found, use 0000\r\n" if $marcprint;
730	0				0	$protoyear = "0000///$protoyear";
731	0	0			0	warn("no four-digit year found, use 0000") if $marcprint;
732						}
733
734	0	0 0	0 0		0	if ($pycounter == 0 && length($protoyear)) {
735	0				0	print "PY - $protoyear\r\n";
736						}
737						elsif ($pycounter == 1 && length($_)) {
738	0				0	print "Y2 - $protoyear\r\n";
739						}
740						## else: discard
741						}
742						## else: discard
743						}
744
745						## now dump the collected CY and PB lists
746	0	0			0	if (@cities > 0) {
747	0				0	print "CY - ", &charconv(join(", ", @cities)), "\r\n";
748						}
749	0	0			0	if (@publishers > 0) {
750	0				0	print "PB - ", &charconv(join(", ", @publishers)), "\r\n";
751						}
752						}
753						}
754
755						##********************************************************************
756						## get_keywords(): prints info from MARC fields 6XX
757						## Arguments: list of fields (6XX)
758						##********************************************************************
759						sub get_keywords {
760	0			0	0	my($href, $fieldname, @keywords) = @_;
761
762						## a list of all possible subfields
763	0				0	my @subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '2', '3', '4');
764
765						## loop over all 6XX fields
766	0				0	foreach $kwfield (@keywords) {
767	0	0			0	if ($kwfield != undef) {
768						## authornames get special treatment
769	0	0			0	if ($fieldname eq "600") {
770	0				0	my $val = normalize_author($kwfield->subfield('a'), $kwfield->subfield('b'), $kwfield->subfield('c'), $kwfield->indicator('1'));
771	0 0				0 0	${$href}{$val} += 1;
772	0	0			0	print "<marc>Field $kwfield subfield a:", $kwfield->subfield('a'), "\r\n<marc>Field $kwfield subfield b:", $kwfield->subfield('b'), "\r\n<marc>Field $kwfield subfield c:", $kwfield->subfield('c'), "\r\n" if $marcprint;
773						}
774						else {
775						## retrieve all available subfields
776	0				0	@kwsubfields = $kwfield->subfields();
777
778						## loop over all available subfield tuples
779	0				0	foreach $kwtuple (@kwsubfields) {
780						## loop over all subfields to check
781	0				0	foreach $subfield (@subfields) {
782						## [0] contains subfield code
783	0	0			0	if (@$kwtuple[0] eq $subfield) {
784						## [1] contains value, remove trailing separators
785	0				0	@$kwtuple[1] =~ s% [,;.:/]$%%;
786	0	0			0	if (length(@$kwtuple[1]) > 0) {
787						## add to hash
788	0 0				0 0	${$href}{@$kwtuple[1]} += 1;
789	0	0			0	print "<marc>Field $fieldname subfield $subfield:", @$kwtuple[1], "\r\n" if $marcprint;
790						}
791						## we can leave the subfields loop here
792	0				0	last;
793						}
794						}
795						}
796						}
797						}
798						}
799						}
800
801						##********************************************************************
802						## pool_subx(): adds contents of several subfields to a list
803						## Arguments: reference to a list
804						## field name
805						## list of fields (5XX)
806						##********************************************************************
807						sub pool_subx {
808	0			0	0	my($aref, $fieldname, @notefields) = @_;
809
810						## we use a list that contains the interesting subfields
811						## for each field
812						# ToDo: this is apparently correct only for marc21
813	0				0	my @subfields;
814
815	0	0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0			0	if ($fieldname eq "500") {
816	0				0	@subfields = ('a');
817						}
818						elsif ($fieldname eq "501") {
819	0				0	@subfields = ('a');
820						}
821						elsif ($fieldname eq "502") {
822	0				0	@subfields = ('a');
823						}
824						elsif ($fieldname eq "504") {
825	0				0	@subfields = ('a', 'b');
826						}
827						elsif ($fieldname eq "505") {
828	0				0	@subfields = ('a', 'g', 'r', 't', 'u');
829						}
830						elsif ($fieldname eq "506") {
831	0				0	@subfields = ('a', 'b', 'c', 'd', 'e');
832						}
833						elsif ($fieldname eq "507") {
834	0				0	@subfields = ('a', 'b');
835						}
836						elsif ($fieldname eq "508") {
837	0				0	@subfields = ('a');
838						}
839						elsif ($fieldname eq "510") {
840	0				0	@subfields = ('a', 'b', 'c', 'x', '3');
841						}
842						elsif ($fieldname eq "511") {
843	0				0	@subfields = ('a');
844						}
845						elsif ($fieldname eq "513") {
846	0				0	@subfields = ('a', 'b');
847						}
848						elsif ($fieldname eq "514") {
849	0				0	@subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', 'u', 'z');
850						}
851						elsif ($fieldname eq "515") {
852	0				0	@subfields = ('a');
853						}
854						elsif ($fieldname eq "516") {
855	0				0	@subfields = ('a');
856						}
857						elsif ($fieldname eq "518") {
858	0				0	@subfields = ('a', '3');
859						}
860						elsif ($fieldname eq "521") {
861	0				0	@subfields = ('a', 'b', '3');
862						}
863						elsif ($fieldname eq "522") {
864	0				0	@subfields = ('a');
865						}
866						elsif ($fieldname eq "524") {
867	0				0	@subfields = ('a', '2', '3');
868						}
869						elsif ($fieldname eq "525") {
870	0				0	@subfields = ('a');
871						}
872						elsif ($fieldname eq "526") {
873	0				0	@subfields = ('a', 'b', 'c', 'd', 'i', 'x', 'z', '5');
874						}
875						elsif ($fieldname eq "530") {
876	0				0	@subfields = ('a', 'b', 'c', 'd', 'u', '3');
877						}
878						elsif ($fieldname eq "533") {
879	0				0	@subfields = ('a', 'b', 'c', 'd', 'e', 'f', 'm', 'n', '3');
880						}
881						elsif ($fieldname eq "534") {
882	0				0	@subfields = ('a', 'b', 'c', 'e', 'f', 'k', 'l', 'm', 'n', 'p', 't', 'x', 'z');
883						}
884						elsif ($fieldname eq "535") {
885	0				0	@subfields = ('a', 'b', 'c', 'd', 'g', '3');
886						}
887
888						## loop over all notefields
889	0				0	foreach $notefield (@notefields) {
890	0	0			0	if ($notefield != undef) {
891						## retrieve all available subfield tuples
892	0				0	@notesubfields = $notefield->subfields();
893
894						## loop over all subfield tuples
895	0				0	foreach $notetuple (@notesubfields) {
896						## loop over all subfields to check
897	0				0	foreach $subfield (@subfields) {
898						## [0] contains subfield code
899	0	0			0	if (@$notetuple[0] eq $subfield) {
900						## [1] contains value, remove trailing separators
901	0	0			0	print "<marc>field $fieldname subfield $subfield: ", @$notetuple[1], "\r\n" if $marcprint;
902	0				0	@$notetuple[1] =~ s% [,;.:/]$%%;
903	0	0			0	if (length(@$notetuple[1]) > 0) {
904						## add to list
905	0 0				0 0	push @{$aref}, @$notetuple[1];
906						}
907	0				0	last;
908						}
909						}
910						}
911						}
912						}
913						}
914
915						##********************************************************************
916						## print_abstract(): prints abstract fields
917						## Arguments: list of fields (520)
918						##********************************************************************
919						sub print_abstract {
920						# ToDo: take care of repeatable subfields
921	0			0	0	my(@abfields) = @_;
922
923						## we check the following subfields
924	0				0	my @subfields = ('a', 'b');
925
926						## we generate a list for all useful strings
927	0				0	my @abstrings;
928
929						## loop over all abfields
930	0				0	foreach $abfield (@abfields) {
931	0				0	foreach $field (@subfields) {
932	0	0			0	if (length ($abfield->subfield($field)) > 0) {
933	0				0	my $ab = $abfield->subfield($field);
934
935	0	0			0	print "<marc>field 520 subfield $field: $ab\r\n" if $marcprint;
936
937						## strip trailing separators
938	0				0	$ab =~ s% [;,:./]$%%;
939
940						## add string to the list
941	0				0	push (@abstrings, $ab);
942						}
943						}
944						}
945
946	0				0	my $allabs = join "; ", @abstrings;
947
948	0	0			0	if (length($allabs) > 0) {
949	0				0	print "N2 - ", &charconv($allabs), "\r\n";
950						}
951
952						}
953
954
955
956						##********************************************************************
957						## charconv(): converts to a different charset based on a global var
958						## Arguments: string
959						## Returns: string
960						##********************************************************************
961						sub charconv {
962	4	50 50		4	16	if ($utf) {
963						## return unaltered if already utf-8
964	0				0	return @_;
965						}
966						elsif ($uniout eq "t") {
967						## convert to utf-8
968	0				0	return marc8_to_utf8("@_");
969						}
970						else {
971						## return unaltered if no utf-8 requested
972	4				32	return @_;
973						}
974						}
975						1;