File Coverage

File:	C4/Languages.pm
Coverage:	23.2%

line	stmt	bran	cond	sub	time	code
1						package C4::Languages;
2
3						# Copyright 2006 (C) LibLime
4						# Joshua Ferraro <jmf@liblime.com>
5						# Portions Copyright 2009 Chris Cormack and the Koha Dev Team
6						# This file is part of Koha.
7						#
8						# Koha is free software; you can redistribute it and/or modify it under the
9						# terms of the GNU General Public License as published by the Free Software
10						# Foundation; either version 2 of the License, or (at your option) any later
11						# version.
12						#
13						# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
14						# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
15						# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16						#
17						# You should have received a copy of the GNU General Public License along
18						# with Koha; if not, write to the Free Software Foundation, Inc.,
19						# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20
21
22	3 3 3			3	354 27 92	use strict;
23						#use warnings; FIXME - Bug 2505
24	3 3 3			3	33 20 210	use Carp;
25	3 3 3			3	227 19 46	use C4::Context;
26	3 3 3			3	24 14 740	use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
27
28						eval {
29						if (C4::Context->ismemcached) {
30						require Memoize::Memcached;
31						import Memoize::Memcached qw(memoize_memcached);
32
33						memoize_memcached('getTranslatedLanguages', memcached => C4::Context->memcached);
34						memoize_memcached('getFrameworkLanguages' , memcached => C4::Context->memcached);
35						memoize_memcached('getAllLanguages', memcached => C4::Context->memcached);
36						}
37						};
38
39						BEGIN {
40	3			3	52	$VERSION = 3.00;
41	3				30	require Exporter;
42	3				39	@ISA = qw(Exporter);
43	3				16	@EXPORT = qw(
44						&getFrameworkLanguages
45						&getTranslatedLanguages
46						&getAllLanguages
47						);
48	3				17	@EXPORT_OK = qw(getFrameworkLanguages getTranslatedLanguages getAllLanguages get_bidi regex_lang_subtags language_get_description accept_language);
49	3				13122	$DEBUG = 0;
50						}
51
52 - 62						=head1 NAME C4::Languages - Perl Module containing language list functions for Koha =head1 SYNOPSIS use C4::Languages; =head1 DESCRIPTION =cut
63
64 - 77						=head1 FUNCTIONS =head2 getFrameworkLanguages Returns a reference to an array of hashes: my $languages = getFrameworkLanguages(); for my $language(@$languages) { print "$language->{language_code}\n"; # language code in iso 639-2 print "$language->{language_name}\n"; # language name in native script print "$language->{language_locale_name}\n"; # language name in current locale } =cut
78
79						sub getFrameworkLanguages {
80						# get a hash with all language codes, names, and locale names
81	0			0	0	my $all_languages = getAllLanguages();
82	0				0	my @languages;
83
84						# find the available directory names
85	0				0	my $dir=C4::Context->config('intranetdir')."/installer/data/";
86	0				0	opendir (MYDIR,$dir);
87	0 0	0			0 0	my @listdir= grep { !/^\.\|CVS/ && -d "$dir/$_"} readdir(MYDIR);
88	0				0	closedir MYDIR;
89
90						# pull out all data for the dir names that exist
91	0				0	for my $dirname (@listdir) {
92	0				0	for my $language_set (@$all_languages) {
93
94	0	0			0	if ($dirname eq $language_set->{language_code}) {
95	0				0	push @languages, {
96						'language_code'=>$dirname,
97						'language_description'=>$language_set->{language_description},
98						'native_descrition'=>$language_set->{language_native_description} }
99						}
100						}
101						}
102	0				0	return \@languages;
103						}
104
105 - 117						=head2 getTranslatedLanguages Returns a reference to an array of hashes: my $languages = getTranslatedLanguages(); print "Available translated languages:\n"; for my $language(@$trlanguages) { print "$language->{language_code}\n"; # language code in iso 639-2 print "$language->{language_name}\n"; # language name in native script print "$language->{language_locale_name}\n"; # language name in current locale } =cut
118
119						sub getTranslatedLanguages {
120	0			0	0	my ($interface, $theme, $current_language, $which) = @_;
121	0				0	my $htdocs;
122	0				0	my $all_languages = getAllLanguages();
123	0				0	my @languages;
124	0				0	my @enabled_languages;
125
126	0	0 0	0 0		0	if ($interface && $interface eq 'opac' ) {
127	0				0	@enabled_languages = split ",", C4::Context->preference('opaclanguages');
128	0				0	$htdocs = C4::Context->config('opachtdocs');
129	0	0	0		0	if ( $theme and -d "$htdocs/$theme" ) {
130	0				0	(@languages) = _get_language_dirs($htdocs,$theme);
131						}
132						else {
133	0				0	for my $theme ( _get_themes('opac') ) {
134	0				0	push @languages, _get_language_dirs($htdocs,$theme);
135						}
136						}
137						}
138						elsif ($interface && $interface eq 'intranet' ) {
139	0				0	@enabled_languages = split ",", C4::Context->preference('language');
140	0				0	$htdocs = C4::Context->config('intrahtdocs');
141	0	0	0		0	if ( $theme and -d "$htdocs/$theme" ) {
142	0				0	@languages = _get_language_dirs($htdocs,$theme);
143						}
144						else {
145	0				0	foreach my $theme ( _get_themes('intranet') ) {
146	0				0	push @languages, _get_language_dirs($htdocs,$theme);
147						}
148						}
149						}
150						else {
151	0				0	@enabled_languages = split ",", C4::Context->preference('opaclanguages');
152	0				0	my $htdocs = C4::Context->config('intrahtdocs');
153	0				0	foreach my $theme ( _get_themes('intranet') ) {
154	0				0	push @languages, _get_language_dirs($htdocs,$theme);
155						}
156	0				0	$htdocs = C4::Context->config('opachtdocs');
157	0				0	foreach my $theme ( _get_themes('opac') ) {
158	0				0	push @languages, _get_language_dirs($htdocs,$theme);
159						}
160	0				0	my %seen;
161	0 0				0 0	$seen{$_}++ for @languages;
162	0				0	@languages = keys %seen;
163						}
164	0				0	return _build_languages_arrayref($all_languages,\@languages,$current_language,\@enabled_languages);
165						}
166
167 - 179						=head2 getAllLanguages Returns a reference to an array of hashes: my $alllanguages = getAllLanguages(); print "Available translated languages:\n"; for my $language(@$alllanguages) { print "$language->{language_code}\n"; print "$language->{language_name}\n"; print "$language->{language_locale_name}\n"; } =cut
180
181						sub getAllLanguages {
182	0			0	0	my @languages_loop;
183	0				0	my $dbh=C4::Context->dbh;
184	0		0		0	my $current_language = shift \|\| 'en';
185	0				0	my $sth = $dbh->prepare('SELECT * FROM language_subtag_registry WHERE type=\'language\'');
186	0				0	$sth->execute();
187	0				0	while (my $language_subtag_registry = $sth->fetchrow_hashref) {
188
189						# pull out all the script descriptions for each language
190	0				0	my $sth2= $dbh->prepare("SELECT * FROM language_descriptions LEFT JOIN language_rfc4646_to_iso639 on language_rfc4646_to_iso639.rfc4646_subtag = language_descriptions.subtag WHERE type='language' AND subtag =? AND language_descriptions.lang = ?");
191	0				0	$sth2->execute($language_subtag_registry->{subtag},$current_language);
192
193	0				0	my $sth3 = $dbh->prepare("SELECT description FROM language_descriptions WHERE type='language' AND subtag=? AND lang=?");
194
195						# add the correct description info
196	0				0	while (my $language_descriptions = $sth2->fetchrow_hashref) {
197	0				0	$sth3->execute($language_subtag_registry->{subtag},$language_subtag_registry->{subtag});
198	0				0	my $native_description;
199	0				0	while (my $description = $sth3->fetchrow_hashref) {
200	0				0	$native_description = $description->{description};
201						}
202
203						# fill in the ISO6329 code
204	0				0	$language_subtag_registry->{iso639_2_code} = $language_descriptions->{iso639_2_code};
205						# fill in the native description of the language, as well as the current language's translation of that if it exists
206	0	0			0	if ($native_description) {
207	0				0	$language_subtag_registry->{language_description} = $native_description;
208	0	0			0	$language_subtag_registry->{language_description}.=" ($language_descriptions->{description})" if $language_descriptions->{description};
209						}
210						else {
211	0				0	$language_subtag_registry->{language_description} = $language_descriptions->{description};
212						}
213						}
214	0				0	push @languages_loop, $language_subtag_registry;
215						}
216	0				0	return \@languages_loop;
217						}
218
219 - 226						=head2 _get_themes Internal function, returns an array of all available themes. (@themes) = &_get_themes('opac'); (@themes) = &_get_themes('intranet'); =cut
227
228						sub _get_themes {
229	1			1	3	my $interface = shift;
230	1				1	my $htdocs;
231	1				1	my @themes;
232	1	50			4	if ( $interface eq 'intranet' ) {
233	0				0	$htdocs = C4::Context->config('intrahtdocs');
234						}
235						else {
236	1				8	$htdocs = C4::Context->config('opachtdocs');
237						}
238	1				119	opendir D, "$htdocs";
239	1				23	my @dirlist = readdir D;
240	1				2	foreach my $directory (@dirlist) {
241						# if there's an en dir, it's a valid theme
242	3	100			35	-d "$htdocs/$directory/en" and push @themes, $directory;
243						}
244	1				11	return @themes;
245						}
246
247 - 251						=head2 _get_language_dirs Internal function, returns an array of directory names, excluding non-language directories =cut
252
253						sub _get_language_dirs {
254	1			1	35	my ($htdocs,$theme) = @_;
255	1				32	my @lang_strings;
256	1				52	opendir D, "$htdocs/$theme";
257	1				19157	for my $lang_string ( readdir D ) {
258	23	100			48	next if $lang_string =~/^\./;
259	21	50			38	next if $lang_string eq 'all';
260	21	50			34	next if $lang_string =~/png$/;
261	21	50			39	next if $lang_string =~/css$/;
262	21	50			37	next if $lang_string =~/CVS$/;
263	21	50			39	next if $lang_string =~/\.txt$/i; #Don't read the readme.txt !
264	21	100			52	next if $lang_string =~/img\|images\|famfam\|sound\|pdf/;
265	20				31	push @lang_strings, $lang_string;
266						}
267	1				20	return (@lang_strings);
268						}
269
270 - 276						=head2 _build_languages_arrayref Internal function for building the ref to array of hashes FIXME: this could be rewritten and simplified using map =cut
277
278						sub _build_languages_arrayref {
279	0			0	0	my ($all_languages,$translated_languages,$current_language,$enabled_languages) = @_;
280	0				0	my @translated_languages = @$translated_languages;
281	0				0	my @languages_loop; # the final reference to an array of hashrefs
282	0				0	my @enabled_languages = @$enabled_languages;
283						# how many languages are enabled, if one, take note, some contexts won't need to display it
284	0				0	my %seen_languages; # the language tags we've seen
285	0				0	my %found_languages;
286	0				0	my $language_groups;
287	0				0	my $track_language_groups;
288	0				0	my $current_language_regex = regex_lang_subtags($current_language);
289						# Loop through the translated languages
290	0				0	for my $translated_language (@translated_languages) {
291						# separate the language string into its subtag types
292	0				0	my $language_subtags_hashref = regex_lang_subtags($translated_language);
293
294						# is this language string 'enabled'?
295	0				0	for my $enabled_language (@enabled_languages) {
296						#warn "Checking out if $translated_language eq $enabled_language";
297	0	0			0	$language_subtags_hashref->{'enabled'} = 1 if $translated_language eq $enabled_language;
298						}
299
300						# group this language, key by langtag
301	0	0			0	$language_subtags_hashref->{'sublanguage_current'} = 1 if $translated_language eq $current_language;
302	0				0	$language_subtags_hashref->{'rfc4646_subtag'} = $translated_language;
303	0				0	$language_subtags_hashref->{'native_description'} = language_get_description($language_subtags_hashref->{language},$language_subtags_hashref->{language},'language');
304	0				0	$language_subtags_hashref->{'script_description'} = language_get_description($language_subtags_hashref->{script},$language_subtags_hashref->{'language'},'script');
305	0				0	$language_subtags_hashref->{'region_description'} = language_get_description($language_subtags_hashref->{region},$language_subtags_hashref->{'language'},'region');
306	0				0	$language_subtags_hashref->{'variant_description'} = language_get_description($language_subtags_hashref->{variant},$language_subtags_hashref->{'language'},'variant');
307	0				0	$track_language_groups->{$language_subtags_hashref->{'language'}}++;
308	0 0				0 0	push ( @{ $language_groups->{$language_subtags_hashref->{language}} }, $language_subtags_hashref );
309						}
310						# $key is a language subtag like 'en'
311	0				0	while( my ($key, $value) = each %$language_groups) {
312
313						# is this language group enabled? are any of the languages within it enabled?
314	0				0	my $enabled;
315	0				0	for my $enabled_language (@enabled_languages) {
316	0				0	my $regex_enabled_language = regex_lang_subtags($enabled_language);
317	0	0			0	$enabled = 1 if $key eq $regex_enabled_language->{language};
318						}
319	0	0 0			0	push @languages_loop, {
320						# this is only use if there is one
321						rfc4646_subtag => @$value[0]->{rfc4646_subtag},
322						native_description => language_get_description($key,$key,'language'),
323						language => $key,
324						sublanguages_loop => $value,
325						plural => $track_language_groups->{$key} >1 ? 1 : 0,
326						current => $current_language_regex->{language} eq $key ? 1 : 0,
327						group_enabled => $enabled,
328						};
329						}
330	0				0	return \@languages_loop;
331						}
332
333						sub language_get_description {
334	0			0	0	my ($script,$lang,$type) = @_;
335	0				0	my $dbh = C4::Context->dbh;
336	0				0	my $desc;
337	0				0	my $sth = $dbh->prepare("SELECT description FROM language_descriptions WHERE subtag=? AND lang=? AND type=?");
338						#warn "QUERY: SELECT description FROM language_descriptions WHERE subtag=$script AND lang=$lang AND type=$type";
339	0				0	$sth->execute($script,$lang,$type);
340	0				0	while (my $descriptions = $sth->fetchrow_hashref) {
341	0				0	$desc = $descriptions->{'description'};
342						}
343	0	0			0	unless ($desc) {
344	0				0	$sth = $dbh->prepare("SELECT description FROM language_descriptions WHERE subtag=? AND lang=? AND type=?");
345	0				0	$sth->execute($script,'en',$type);
346	0				0	while (my $descriptions = $sth->fetchrow_hashref) {
347	0				0	$desc = $descriptions->{'description'};
348						}
349						}
350	0				0	return $desc;
351						}
352 - 358						=head2 regex_lang_subtags This internal sub takes a string composed according to RFC 4646 as an input and returns a reference to a hash containing keys and values for ( language, script, region, variant, extension, privateuse ) =cut
359
360						sub regex_lang_subtags {
361	0			0	0	my $string = shift;
362
363						# Regex for recognizing RFC 4646 well-formed tags
364						# http://www.rfc-editor.org/rfc/rfc4646.txt
365
366						# regexes based on : http://unicode.org/cldr/data/tools/java/org/unicode/cldr/util/data/langtagRegex.txt
367						# The structure requires no forward references, so it reverses the order.
368						# The uppercase comments are fragments copied from RFC 4646
369						#
370						# Note: the tool requires that any real "=" or "#" or ";" in the regex be escaped.
371
372	0				0	my $alpha = qr/[a-zA-Z]/ ; # ALPHA
373	0				0	my $digit = qr/[0-9]/ ; # DIGIT
374	0				0	my $alphanum = qr/[a-zA-Z0-9]/ ; # ALPHA / DIGIT
375	0				0	my $x = qr/[xX]/ ; # private use singleton
376	0				0	my $singleton = qr/[a-w y-z A-W Y-Z]/ ; # other singleton
377	0				0	my $s = qr/[-]/ ; # separator -- lenient parsers will use [-_]
378
379						# Now do the components. The structure is slightly different to allow for capturing the right components.
380						# The notation (?:....) is a non-capturing version of (...): so the "?:" can be deleted if someone doesn't care about capturing.
381
382	0				0	my $extlang = qr{(?: $s $alpha{3} )}x ; # *3("-" 3ALPHA)
383	0				0	my $language = qr{(?: $alpha{2,3} \| $alpha{4,8} )}x ;
384						#my $language = qr{(?: $alpha{2,3}$extlang{0,3} \| $alpha{4,8} )}x ; # (23ALPHA [ extlang ]) / 4ALPHA / 58ALPHA
385
386	0				0	my $script = qr{(?: $alpha{4} )}x ; # 4ALPHA
387
388	0				0	my $region = qr{(?: $alpha{2} \| $digit{3} )}x ; # 2ALPHA / 3DIGIT
389
390	0				0	my $variantSub = qr{(?: $digit$alphanum{3} \| $alphanum{5,8} )}x ; # ("-" variant), 58alphanum / (DIGIT 3alphanum)
391	0				0	my $variant = qr{(?: $variantSub (?: $s$variantSub )* )}x ; # ("-" variant), 58alphanum / (DIGIT 3alphanum)
392
393	0				0	my $extensionSub = qr{(?: $singleton (?: $s$alphanum{2,8} )+ )}x ; # singleton 1("-" (28alphanum))
394	0				0	my $extension = qr{(?: $extensionSub (?: $s$extensionSub )* )}x ; # singleton 1("-" (28alphanum))
395
396	0				0	my $privateuse = qr{(?: $x (?: $s$alphanum{1,8} )+ )}x ; # ("x"/"X") 1("-" (18alphanum))
397
398						# Define certain grandfathered codes, since otherwise the regex is pretty useless.
399						# Since these are limited, this is safe even later changes to the registry --
400						# the only oddity is that it might change the type of the tag, and thus
401						# the results from the capturing groups.
402						# http://www.iana.org/assignments/language-subtag-registry
403						# Note that these have to be compared case insensitively, requiring (?i) below.
404
405	0				0	my $grandfathered = qr{(?: (?i)
406						en $s GB $s oed
407						\| i $s (?: ami \| bnn \| default \| enochian \| hak \| klingon \| lux \| mingo \| navajo \| pwn \| tao \| tay \| tsu )
408						\| sgn $s (?: BE $s fr \| BE $s nl \| CH $s de)
409						)}x;
410
411						# For well-formedness, we don't need the ones that would otherwise pass, so they are commented out here
412
413						# \| art $s lojban
414						# \| cel $s gaulish
415						# \| en $s (?: boont \| GB $s oed \| scouse )
416						# \| no $s (?: bok \| nyn)
417						# \| zh $s (?: cmn \| cmn $s Hans \| cmn $s Hant \| gan \| guoyu \| hakka \| min \| min $s nan \| wuu \| xiang \| yue)
418
419						# Here is the final breakdown, with capturing groups for each of these components
420						# The language, variants, extensions, grandfathered, and private-use may have interior '-'
421
422						#my $root = qr{(?: ($language) (?: $s ($script) )? 40% (?: $s ($region) )? 40% (?: $s ($variant) )? 10% (?: $s ($extension) )? 5% (?: $s ($privateuse) )? 5% ) 90% \| ($grandfathered) 5% \| ($privateuse) 5% };
423
424	0				0	$string =~ qr{^ (?:($language)) (?:$s($script))? (?:$s($region))? (?:$s($variant))? (?:$s($extension))? (?:$s($privateuse))? $}xi; # \|($grandfathered) \| ($privateuse) $}xi;
425	0				0	my %subtag = (
426						'rfc4646_subtag' => $string,
427						'language' => $1,
428						'script' => $2,
429						'region' => $3,
430						'variant' => $4,
431						'extension' => $5,
432						'privateuse' => $6,
433						);
434	0				0	return \%subtag;
435						}
436
437						# Script Direction Resources:
438						# http://www.w3.org/International/questions/qa-scripts
439						sub get_bidi {
440	0			0	0	my ($language_script)= @_;
441	0				0	my $dbh = C4::Context->dbh;
442	0				0	my $bidi;
443	0				0	my $sth = $dbh->prepare('SELECT bidi FROM language_script_bidi WHERE rfc4646_subtag=?');
444	0				0	$sth->execute($language_script);
445	0				0	while (my $result = $sth->fetchrow_hashref) {
446	0				0	$bidi = $result->{'bidi'};
447						}
448	0				0	return $bidi;
449						};
450
451						sub accept_language {
452						# referenced http://search.cpan.org/src/CGILMORE/I18N-AcceptLanguage-1.04/lib/I18N/AcceptLanguage.pm
453	1			1	2	my ($clientPreferences,$supportedLanguages) = @_;
454	1				2	my @languages = ();
455	1	50			4	if ($clientPreferences) {
456						# There should be no whitespace anways, but a cleanliness/sanity check
457	0				0	$clientPreferences =~ s/\s//g;
458						# Prepare the list of client-acceptable languages
459	0				0	foreach my $tag (split(/,/, $clientPreferences)) {
460	0				0	my ($language, $quality) = split(/\;/, $tag);
461	0	0			0	$quality =~ s/^q=//i if $quality;
462	0	0			0	$quality = 1 unless $quality;
463	0	0			0	next if $quality <= 0;
464						# We want to force the wildcard to be last
465	0	0			0	$quality = 0 if ($language eq '*');
466						# Pushing lowercase language here saves processing later
467	0				0	push(@languages, { quality => $quality,
468						language => $language,
469						lclanguage => lc($language) });
470						}
471						} else {
472	1				79	carp "accept_language(x,y) called with no clientPreferences (x).";
473						}
474						# Prepare the list of server-supported languages
475	1				2118	my %supportedLanguages = ();
476	1				20	my %secondaryLanguages = ();
477	1				19	foreach my $language (@$supportedLanguages) {
478						# warn "Language supported: " . $language->{language};
479	0				0	my $subtag = $language->{rfc4646_subtag};
480	0				0	$supportedLanguages{lc($subtag)} = $subtag;
481	0	0			0	if ( $subtag =~ /^([^-]+)-?/ ) {
482	0				0	$secondaryLanguages{lc($1)} = $subtag;
483						}
484						}
485
486						# Reverse sort the list, making best quality at the front of the array
487	1 0				20 0	@languages = sort { $b->{quality} <=> $a->{quality} } @languages;
488	1				18	my $secondaryMatch = '';
489	1				16	foreach my $tag (@languages) {
490	0	0 0 0 0 0	0 0		0	if (exists($supportedLanguages{$tag->{lclanguage}})) {
491						# Client en-us eq server en-us
492	0	0			0	return $supportedLanguages{$tag->{language}} if exists($supportedLanguages{$tag->{language}});
493	0				0	return $supportedLanguages{$tag->{lclanguage}};
494						} elsif (exists($secondaryLanguages{$tag->{lclanguage}})) {
495						# Client en eq server en-us
496	0	0			0	return $secondaryLanguages{$tag->{language}} if exists($secondaryLanguages{$tag->{language}});
497	0				0	return $supportedLanguages{$tag->{lclanguage}};
498						} elsif ($tag->{lclanguage} =~ /^([^-]+)-/ && exists($secondaryLanguages{$1}) && $secondaryMatch eq '') {
499						# Client en-gb eq server en-us
500	0				0	$secondaryMatch = $secondaryLanguages{$1};
501						} elsif ($tag->{lclanguage} =~ /^([^-]+)-/ && exists($secondaryLanguages{$1}) && $secondaryMatch eq '') {
502						# FIXME: We just checked the exact same conditional!
503						# Client en-us eq server en
504	0				0	$secondaryMatch = $supportedLanguages{$1};
505						} elsif ($tag->{lclanguage} eq '*') {
506						# * matches every language not already specified.
507						# It doesn't care which we pick, so let's pick the default,
508						# if available, then the first in the array.
509						#return $acceptor->defaultLanguage() if $acceptor->defaultLanguage();
510	0				0	return $supportedLanguages->[0];
511						}
512						}
513						# No primary matches. Secondary? (ie, en-us requested and en supported)
514	1	50			18	return $secondaryMatch if $secondaryMatch;
515	1				24	return undef; # else, we got nothing.
516						}
517						1;
518