File Coverage

File:C4/Languages.pm
Coverage:23.2%

linestmtbrancondsubtimecode
1package C4::Languages;
2
3# Copyright 2006 (C) LibLime
4# Joshua Ferraro <jmf@liblime.com>
5# Portions Copyright 2009 Chris Cormack and the Koha Dev Team
6# This file is part of Koha.
7#
8# Koha is free software; you can redistribute it and/or modify it under the
9# terms of the GNU General Public License as published by the Free Software
10# Foundation; either version 2 of the License, or (at your option) any later
11# version.
12#
13# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
14# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
15# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License along
18# with Koha; if not, write to the Free Software Foundation, Inc.,
19# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20
21
22
3
3
3
17846
21
115
use strict;
23#use warnings; FIXME - Bug 2505
24
3
3
3
52
35
257
use Carp;
25
3
3
3
206
24
79
use C4::Context;
26
3
3
3
14
6
667
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
27
28eval {
29    if (C4::Context->ismemcached) {
30        require Memoize::Memcached;
31        import Memoize::Memcached qw(memoize_memcached);
32
33        memoize_memcached('getTranslatedLanguages', memcached => C4::Context->memcached);
34        memoize_memcached('getFrameworkLanguages' , memcached => C4::Context->memcached);
35        memoize_memcached('getAllLanguages', memcached => C4::Context->memcached);
36    }
37};
38
39BEGIN {
40
3
6
    $VERSION = 3.00;
41
3
43
    require Exporter;
42
3
47
    @ISA = qw(Exporter);
43
3
11
    @EXPORT = qw(
44        &getFrameworkLanguages
45        &getTranslatedLanguages
46        &getAllLanguages
47    );
48
3
10
    @EXPORT_OK = qw(getFrameworkLanguages getTranslatedLanguages getAllLanguages get_bidi regex_lang_subtags language_get_description accept_language);
49
3
8683
    $DEBUG = 0;
50}
51
52 - 62
=head1 NAME

C4::Languages - Perl Module containing language list functions for Koha 

=head1 SYNOPSIS

use C4::Languages;

=head1 DESCRIPTION

=cut
63
64 - 77
=head1 FUNCTIONS

=head2 getFrameworkLanguages

Returns a reference to an array of hashes:

 my $languages = getFrameworkLanguages();
 for my $language(@$languages) {
    print "$language->{language_code}\n"; # language code in iso 639-2
    print "$language->{language_name}\n"; # language name in native script
    print "$language->{language_locale_name}\n"; # language name in current locale
 }

=cut
78
79sub getFrameworkLanguages {
80    # get a hash with all language codes, names, and locale names
81
0
0
    my $all_languages = getAllLanguages();
82
0
0
    my @languages;
83
84    # find the available directory names
85
0
0
    my $dir=C4::Context->config('intranetdir')."/installer/data/";
86
0
0
    opendir (MYDIR,$dir);
87
0
0
0
0
    my @listdir= grep { !/^\.|CVS/ && -d "$dir/$_"} readdir(MYDIR);
88
0
0
    closedir MYDIR;
89
90    # pull out all data for the dir names that exist
91
0
0
    for my $dirname (@listdir) {
92
0
0
        for my $language_set (@$all_languages) {
93
94
0
0
            if ($dirname eq $language_set->{language_code}) {
95
0
0
                push @languages, {
96                    'language_code'=>$dirname,
97                    'language_description'=>$language_set->{language_description},
98                    'native_descrition'=>$language_set->{language_native_description} }
99            }
100        }
101    }
102
0
0
    return \@languages;
103}
104
105 - 117
=head2 getTranslatedLanguages

Returns a reference to an array of hashes:

 my $languages = getTranslatedLanguages();
 print "Available translated languages:\n";
 for my $language(@$trlanguages) {
    print "$language->{language_code}\n"; # language code in iso 639-2
    print "$language->{language_name}\n"; # language name in native script
    print "$language->{language_locale_name}\n"; # language name in current locale
 }

=cut
118
119sub getTranslatedLanguages {
120
0
0
    my ($interface, $theme, $current_language, $which) = @_;
121
0
0
    my $htdocs;
122
0
0
    my $all_languages = getAllLanguages();
123
0
0
    my @languages;
124
0
0
    my @enabled_languages;
125
126
0
0
    if ($interface && $interface eq 'opac' ) {
127
0
0
        @enabled_languages = split ",", C4::Context->preference('opaclanguages');
128
0
0
        $htdocs = C4::Context->config('opachtdocs');
129
0
0
        if ( $theme and -d "$htdocs/$theme" ) {
130
0
0
            (@languages) = _get_language_dirs($htdocs,$theme);
131        }
132        else {
133
0
0
            for my $theme ( _get_themes('opac') ) {
134
0
0
                push @languages, _get_language_dirs($htdocs,$theme);
135            }
136        }
137    }
138    elsif ($interface && $interface eq 'intranet' ) {
139
0
0
        @enabled_languages = split ",", C4::Context->preference('language');
140
0
0
        $htdocs = C4::Context->config('intrahtdocs');
141
0
0
        if ( $theme and -d "$htdocs/$theme" ) {
142
0
0
            @languages = _get_language_dirs($htdocs,$theme);
143        }
144        else {
145
0
0
            foreach my $theme ( _get_themes('intranet') ) {
146
0
0
                push @languages, _get_language_dirs($htdocs,$theme);
147            }
148        }
149    }
150    else {
151
0
0
        @enabled_languages = split ",", C4::Context->preference('opaclanguages');
152
0
0
        my $htdocs = C4::Context->config('intrahtdocs');
153
0
0
        foreach my $theme ( _get_themes('intranet') ) {
154
0
0
            push @languages, _get_language_dirs($htdocs,$theme);
155        }
156
0
0
        $htdocs = C4::Context->config('opachtdocs');
157
0
0
        foreach my $theme ( _get_themes('opac') ) {
158
0
0
            push @languages, _get_language_dirs($htdocs,$theme);
159        }
160
0
0
        my %seen;
161
0
0
0
0
        $seen{$_}++ for @languages;
162
0
0
        @languages = keys %seen;
163    }
164
0
0
    return _build_languages_arrayref($all_languages,\@languages,$current_language,\@enabled_languages);
165}
166
167 - 179
=head2 getAllLanguages

Returns a reference to an array of hashes:

 my $alllanguages = getAllLanguages();
 print "Available translated languages:\n";
 for my $language(@$alllanguages) {
    print "$language->{language_code}\n";
    print "$language->{language_name}\n";
    print "$language->{language_locale_name}\n";
 }

=cut
180
181sub getAllLanguages {
182
0
0
    my @languages_loop;
183
0
0
    my $dbh=C4::Context->dbh;
184
0
0
    my $current_language = shift || 'en';
185
0
0
    my $sth = $dbh->prepare('SELECT * FROM language_subtag_registry WHERE type=\'language\'');
186
0
0
    $sth->execute();
187
0
0
    while (my $language_subtag_registry = $sth->fetchrow_hashref) {
188
189        # pull out all the script descriptions for each language
190
0
0
        my $sth2= $dbh->prepare("SELECT * FROM language_descriptions LEFT JOIN language_rfc4646_to_iso639 on language_rfc4646_to_iso639.rfc4646_subtag = language_descriptions.subtag WHERE type='language' AND subtag =? AND language_descriptions.lang = ?");
191
0
0
        $sth2->execute($language_subtag_registry->{subtag},$current_language);
192
193
0
0
        my $sth3 = $dbh->prepare("SELECT description FROM language_descriptions WHERE type='language' AND subtag=? AND lang=?");
194
195        # add the correct description info
196
0
0
        while (my $language_descriptions = $sth2->fetchrow_hashref) {
197
0
0
            $sth3->execute($language_subtag_registry->{subtag},$language_subtag_registry->{subtag});
198
0
0
            my $native_description;
199
0
0
            while (my $description = $sth3->fetchrow_hashref) {
200
0
0
                $native_description = $description->{description};
201            }
202
203            # fill in the ISO6329 code
204
0
0
            $language_subtag_registry->{iso639_2_code} = $language_descriptions->{iso639_2_code};
205            # fill in the native description of the language, as well as the current language's translation of that if it exists
206
0
0
            if ($native_description) {
207
0
0
                $language_subtag_registry->{language_description} = $native_description;
208
0
0
                $language_subtag_registry->{language_description}.=" ($language_descriptions->{description})" if $language_descriptions->{description};
209            }
210            else {
211
0
0
                $language_subtag_registry->{language_description} = $language_descriptions->{description};
212            }
213        }
214
0
0
        push @languages_loop, $language_subtag_registry;
215    }
216
0
0
    return \@languages_loop;
217}
218
219 - 226
=head2 _get_themes

Internal function, returns an array of all available themes.

  (@themes) = &_get_themes('opac');
  (@themes) = &_get_themes('intranet');

=cut
227
228sub _get_themes {
229
1
387976
    my $interface = shift;
230
1
1
    my $htdocs;
231
1
2
    my @themes;
232
1
5
    if ( $interface eq 'intranet' ) {
233
0
0
        $htdocs = C4::Context->config('intrahtdocs');
234    }
235    else {
236
1
8
        $htdocs = C4::Context->config('opachtdocs');
237    }
238
1
157
    opendir D, "$htdocs";
239
1
44
    my @dirlist = readdir D;
240
1
3
    foreach my $directory (@dirlist) {
241        # if there's an en dir, it's a valid theme
242
3
68
        -d "$htdocs/$directory/en" and push @themes, $directory;
243    }
244
1
12
    return @themes;
245}
246
247 - 251
=head2 _get_language_dirs

Internal function, returns an array of directory names, excluding non-language directories

=cut
252
253sub _get_language_dirs {
254
1
3
    my ($htdocs,$theme) = @_;
255
1
1
    my @lang_strings;
256
1
31
    opendir D, "$htdocs/$theme";
257
1
85
    for my $lang_string ( readdir D ) {
258
23
63
        next if $lang_string =~/^\./;
259
21
36
        next if $lang_string eq 'all';
260
21
33
        next if $lang_string =~/png$/;
261
21
34
        next if $lang_string =~/css$/;
262
21
33
        next if $lang_string =~/CVS$/;
263
21
37
        next if $lang_string =~/\.txt$/i; #Don't read the readme.txt !
264
21
97
        next if $lang_string =~/img|images|famfam|sound|pdf/;
265
20
33
        push @lang_strings, $lang_string;
266    }
267
1
13
        return (@lang_strings);
268}
269
270 - 276
=head2 _build_languages_arrayref 

Internal function for building the ref to array of hashes

FIXME: this could be rewritten and simplified using map

=cut
277
278sub _build_languages_arrayref {
279
0
0
        my ($all_languages,$translated_languages,$current_language,$enabled_languages) = @_;
280
0
0
        my @translated_languages = @$translated_languages;
281
0
0
        my @languages_loop; # the final reference to an array of hashrefs
282
0
0
        my @enabled_languages = @$enabled_languages;
283        # how many languages are enabled, if one, take note, some contexts won't need to display it
284
0
0
        my %seen_languages; # the language tags we've seen
285
0
0
        my %found_languages;
286
0
0
        my $language_groups;
287
0
0
        my $track_language_groups;
288
0
0
        my $current_language_regex = regex_lang_subtags($current_language);
289        # Loop through the translated languages
290
0
0
        for my $translated_language (@translated_languages) {
291            # separate the language string into its subtag types
292
0
0
            my $language_subtags_hashref = regex_lang_subtags($translated_language);
293
294            # is this language string 'enabled'?
295
0
0
            for my $enabled_language (@enabled_languages) {
296                #warn "Checking out if $translated_language eq $enabled_language";
297
0
0
                $language_subtags_hashref->{'enabled'} = 1 if $translated_language eq $enabled_language;
298            }
299
300            # group this language, key by langtag
301
0
0
            $language_subtags_hashref->{'sublanguage_current'} = 1 if $translated_language eq $current_language;
302
0
0
            $language_subtags_hashref->{'rfc4646_subtag'} = $translated_language;
303
0
0
            $language_subtags_hashref->{'native_description'} = language_get_description($language_subtags_hashref->{language},$language_subtags_hashref->{language},'language');
304
0
0
            $language_subtags_hashref->{'script_description'} = language_get_description($language_subtags_hashref->{script},$language_subtags_hashref->{'language'},'script');
305
0
0
            $language_subtags_hashref->{'region_description'} = language_get_description($language_subtags_hashref->{region},$language_subtags_hashref->{'language'},'region');
306
0
0
            $language_subtags_hashref->{'variant_description'} = language_get_description($language_subtags_hashref->{variant},$language_subtags_hashref->{'language'},'variant');
307
0
0
            $track_language_groups->{$language_subtags_hashref->{'language'}}++;
308
0
0
0
0
            push ( @{ $language_groups->{$language_subtags_hashref->{language}} }, $language_subtags_hashref );
309        }
310        # $key is a language subtag like 'en'
311
0
0
        while( my ($key, $value) = each %$language_groups) {
312
313            # is this language group enabled? are any of the languages within it enabled?
314
0
0
            my $enabled;
315
0
0
            for my $enabled_language (@enabled_languages) {
316
0
0
                my $regex_enabled_language = regex_lang_subtags($enabled_language);
317
0
0
                $enabled = 1 if $key eq $regex_enabled_language->{language};
318            }
319
0
0
            push @languages_loop, {
320                            # this is only use if there is one
321                            rfc4646_subtag => @$value[0]->{rfc4646_subtag},
322                            native_description => language_get_description($key,$key,'language'),
323                            language => $key,
324                            sublanguages_loop => $value,
325                            plural => $track_language_groups->{$key} >1 ? 1 : 0,
326                            current => $current_language_regex->{language} eq $key ? 1 : 0,
327                            group_enabled => $enabled,
328                           };
329        }
330
0
0
        return \@languages_loop;
331}
332
333sub language_get_description {
334
0
0
    my ($script,$lang,$type) = @_;
335
0
0
    my $dbh = C4::Context->dbh;
336
0
0
    my $desc;
337
0
0
    my $sth = $dbh->prepare("SELECT description FROM language_descriptions WHERE subtag=? AND lang=? AND type=?");
338    #warn "QUERY: SELECT description FROM language_descriptions WHERE subtag=$script AND lang=$lang AND type=$type";
339
0
0
    $sth->execute($script,$lang,$type);
340
0
0
    while (my $descriptions = $sth->fetchrow_hashref) {
341
0
0
        $desc = $descriptions->{'description'};
342    }
343
0
0
    unless ($desc) {
344
0
0
        $sth = $dbh->prepare("SELECT description FROM language_descriptions WHERE subtag=? AND lang=? AND type=?");
345
0
0
        $sth->execute($script,'en',$type);
346
0
0
        while (my $descriptions = $sth->fetchrow_hashref) {
347
0
0
            $desc = $descriptions->{'description'};
348        }
349    }
350
0
0
    return $desc;
351}
352 - 358
=head2 regex_lang_subtags

This internal sub takes a string composed according to RFC 4646 as
an input and returns a reference to a hash containing keys and values
for ( language, script, region, variant, extension, privateuse )

=cut
359
360sub regex_lang_subtags {
361
0
0
    my $string = shift;
362
363    # Regex for recognizing RFC 4646 well-formed tags
364    # http://www.rfc-editor.org/rfc/rfc4646.txt
365
366    # regexes based on : http://unicode.org/cldr/data/tools/java/org/unicode/cldr/util/data/langtagRegex.txt
367    # The structure requires no forward references, so it reverses the order.
368    # The uppercase comments are fragments copied from RFC 4646
369    #
370    # Note: the tool requires that any real "=" or "#" or ";" in the regex be escaped.
371
372
0
0
    my $alpha = qr/[a-zA-Z]/ ; # ALPHA
373
0
0
    my $digit = qr/[0-9]/ ; # DIGIT
374
0
0
    my $alphanum = qr/[a-zA-Z0-9]/ ; # ALPHA / DIGIT
375
0
0
    my $x = qr/[xX]/ ; # private use singleton
376
0
0
    my $singleton = qr/[a-w y-z A-W Y-Z]/ ; # other singleton
377
0
0
    my $s = qr/[-]/ ; # separator -- lenient parsers will use [-_]
378
379    # Now do the components. The structure is slightly different to allow for capturing the right components.
380    # The notation (?:....) is a non-capturing version of (...): so the "?:" can be deleted if someone doesn't care about capturing.
381
382
0
0
    my $extlang = qr{(?: $s $alpha{3} )}x ; # *3("-" 3ALPHA)
383
0
0
    my $language = qr{(?: $alpha{2,3} | $alpha{4,8} )}x ;
384    #my $language = qr{(?: $alpha{2,3}$extlang{0,3} | $alpha{4,8} )}x ; # (2*3ALPHA [ extlang ]) / 4ALPHA / 5*8ALPHA
385
386
0
0
    my $script = qr{(?: $alpha{4} )}x ; # 4ALPHA
387
388
0
0
    my $region = qr{(?: $alpha{2} | $digit{3} )}x ; # 2ALPHA / 3DIGIT
389
390
0
0
    my $variantSub = qr{(?: $digit$alphanum{3} | $alphanum{5,8} )}x ; # *("-" variant), 5*8alphanum / (DIGIT 3alphanum)
391
0
0
    my $variant = qr{(?: $variantSub (?: $s$variantSub )* )}x ; # *("-" variant), 5*8alphanum / (DIGIT 3alphanum)
392
393
0
0
    my $extensionSub = qr{(?: $singleton (?: $s$alphanum{2,8} )+ )}x ; # singleton 1*("-" (2*8alphanum))
394
0
0
    my $extension = qr{(?: $extensionSub (?: $s$extensionSub )* )}x ; # singleton 1*("-" (2*8alphanum))
395
396
0
0
    my $privateuse = qr{(?: $x (?: $s$alphanum{1,8} )+ )}x ; # ("x"/"X") 1*("-" (1*8alphanum))
397
398    # Define certain grandfathered codes, since otherwise the regex is pretty useless.
399    # Since these are limited, this is safe even later changes to the registry --
400    # the only oddity is that it might change the type of the tag, and thus
401    # the results from the capturing groups.
402    # http://www.iana.org/assignments/language-subtag-registry
403    # Note that these have to be compared case insensitively, requiring (?i) below.
404
405
0
0
    my $grandfathered = qr{(?: (?i)
406        en $s GB $s oed
407    | i $s (?: ami | bnn | default | enochian | hak | klingon | lux | mingo | navajo | pwn | tao | tay | tsu )
408    | sgn $s (?: BE $s fr | BE $s nl | CH $s de)
409)}x;
410
411    # For well-formedness, we don't need the ones that would otherwise pass, so they are commented out here
412
413    # | art $s lojban
414    # | cel $s gaulish
415    # | en $s (?: boont | GB $s oed | scouse )
416    # | no $s (?: bok | nyn)
417    # | zh $s (?: cmn | cmn $s Hans | cmn $s Hant | gan | guoyu | hakka | min | min $s nan | wuu | xiang | yue)
418
419    # Here is the final breakdown, with capturing groups for each of these components
420    # The language, variants, extensions, grandfathered, and private-use may have interior '-'
421
422    #my $root = qr{(?: ($language) (?: $s ($script) )? 40% (?: $s ($region) )? 40% (?: $s ($variant) )? 10% (?: $s ($extension) )? 5% (?: $s ($privateuse) )? 5% ) 90% | ($grandfathered) 5% | ($privateuse) 5% };
423
424
0
0
    $string =~ qr{^ (?:($language)) (?:$s($script))? (?:$s($region))? (?:$s($variant))? (?:$s($extension))? (?:$s($privateuse))? $}xi; # |($grandfathered) | ($privateuse) $}xi;
425
0
0
    my %subtag = (
426        'rfc4646_subtag' => $string,
427        'language' => $1,
428        'script' => $2,
429        'region' => $3,
430        'variant' => $4,
431        'extension' => $5,
432        'privateuse' => $6,
433    );
434
0
0
    return \%subtag;
435}
436
437# Script Direction Resources:
438# http://www.w3.org/International/questions/qa-scripts
439sub get_bidi {
440
0
0
    my ($language_script)= @_;
441
0
0
    my $dbh = C4::Context->dbh;
442
0
0
    my $bidi;
443
0
0
    my $sth = $dbh->prepare('SELECT bidi FROM language_script_bidi WHERE rfc4646_subtag=?');
444
0
0
    $sth->execute($language_script);
445
0
0
    while (my $result = $sth->fetchrow_hashref) {
446
0
0
        $bidi = $result->{'bidi'};
447    }
448
0
0
    return $bidi;
449};
450
451sub accept_language {
452    # referenced http://search.cpan.org/src/CGILMORE/I18N-AcceptLanguage-1.04/lib/I18N/AcceptLanguage.pm
453
1
3
    my ($clientPreferences,$supportedLanguages) = @_;
454
1
2
    my @languages = ();
455
1
3
    if ($clientPreferences) {
456        # There should be no whitespace anways, but a cleanliness/sanity check
457
0
0
        $clientPreferences =~ s/\s//g;
458        # Prepare the list of client-acceptable languages
459
0
0
        foreach my $tag (split(/,/, $clientPreferences)) {
460
0
0
            my ($language, $quality) = split(/\;/, $tag);
461
0
0
            $quality =~ s/^q=//i if $quality;
462
0
0
            $quality = 1 unless $quality;
463
0
0
            next if $quality <= 0;
464            # We want to force the wildcard to be last
465
0
0
            $quality = 0 if ($language eq '*');
466            # Pushing lowercase language here saves processing later
467
0
0
            push(@languages, { quality => $quality,
468               language => $language,
469               lclanguage => lc($language) });
470        }
471    } else {
472
1
495
        carp "accept_language(x,y) called with no clientPreferences (x).";
473    }
474    # Prepare the list of server-supported languages
475
1
2613
    my %supportedLanguages = ();
476
1
3
    my %secondaryLanguages = ();
477
1
3
    foreach my $language (@$supportedLanguages) {
478        # warn "Language supported: " . $language->{language};
479
0
0
        my $subtag = $language->{rfc4646_subtag};
480
0
0
        $supportedLanguages{lc($subtag)} = $subtag;
481
0
0
        if ( $subtag =~ /^([^-]+)-?/ ) {
482
0
0
            $secondaryLanguages{lc($1)} = $subtag;
483        }
484    }
485
486    # Reverse sort the list, making best quality at the front of the array
487
1
0
3
0
    @languages = sort { $b->{quality} <=> $a->{quality} } @languages;
488
1
2
    my $secondaryMatch = '';
489
1
2
    foreach my $tag (@languages) {
490
0
0
        if (exists($supportedLanguages{$tag->{lclanguage}})) {
491            # Client en-us eq server en-us
492
0
0
            return $supportedLanguages{$tag->{language}} if exists($supportedLanguages{$tag->{language}});
493
0
0
            return $supportedLanguages{$tag->{lclanguage}};
494        } elsif (exists($secondaryLanguages{$tag->{lclanguage}})) {
495            # Client en eq server en-us
496
0
0
            return $secondaryLanguages{$tag->{language}} if exists($secondaryLanguages{$tag->{language}});
497
0
0
            return $supportedLanguages{$tag->{lclanguage}};
498        } elsif ($tag->{lclanguage} =~ /^([^-]+)-/ && exists($secondaryLanguages{$1}) && $secondaryMatch eq '') {
499            # Client en-gb eq server en-us
500
0
0
            $secondaryMatch = $secondaryLanguages{$1};
501        } elsif ($tag->{lclanguage} =~ /^([^-]+)-/ && exists($secondaryLanguages{$1}) && $secondaryMatch eq '') {
502            # FIXME: We just checked the exact same conditional!
503            # Client en-us eq server en
504
0
0
            $secondaryMatch = $supportedLanguages{$1};
505        } elsif ($tag->{lclanguage} eq '*') {
506        # * matches every language not already specified.
507        # It doesn't care which we pick, so let's pick the default,
508        # if available, then the first in the array.
509        #return $acceptor->defaultLanguage() if $acceptor->defaultLanguage();
510
0
0
        return $supportedLanguages->[0];
511        }
512    }
513    # No primary matches. Secondary? (ie, en-us requested and en supported)
514
1
3
    return $secondaryMatch if $secondaryMatch;
515
1
10
    return undef; # else, we got nothing.
516}
5171;
518