1#!/usr/local/bin/perl -wC 2# $FreeBSD$ 3 4use strict; 5use File::Copy; 6use XML::Parser; 7use Tie::IxHash; 8use Data::Dumper; 9use Getopt::Long; 10use Digest::SHA qw(sha1_hex); 11require "charmaps.pm"; 12 13 14if ($#ARGV < 2) { 15 print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n"; 16 exit(1); 17} 18 19my $DEFENCODING = "UTF-8"; 20my @filter = (); 21 22my $CLDRDIR = undef; 23my $UNIDATADIR = undef; 24my $ETCDIR = undef; 25my $TYPE = undef; 26my $doonly = undef; 27 28my $result = GetOptions ( 29 "cldr=s" => \$CLDRDIR, 30 "unidata=s" => \$UNIDATADIR, 31 "etc=s" => \$ETCDIR, 32 "type=s" => \$TYPE, 33 "lc=s" => \$doonly 34 ); 35 36my %convertors = (); 37 38my %ucd = (); 39my %values = (); 40my %hashtable = (); 41my %languages = (); 42my %translations = (); 43my %encodings = (); 44my %alternativemonths = (); 45get_languages(); 46 47my %utf8map = (); 48my %utf8aliases = (); 49get_unidata($UNIDATADIR); 50get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm"); 51get_encodings("$ETCDIR/charmaps"); 52 53my %keys = (); 54tie(%keys, "Tie::IxHash"); 55tie(%hashtable, "Tie::IxHash"); 56 57my %FILESNAMES = ( 58 "monetdef" => "LC_MONETARY", 59 "timedef" => "LC_TIME", 60 "msgdef" => "LC_MESSAGES", 61 "numericdef" => "LC_NUMERIC", 62 "colldef" => "LC_COLLATE", 63 "ctypedef" => "LC_CTYPE" 64); 65 66my %callback = ( 67 mdorder => \&callback_mdorder, 68 altmon => \&callback_altmon, 69 cformat => \&callback_cformat, 70 dtformat => \&callback_dtformat, 71 cbabmon => \&callback_abmon, 72 data => undef, 73); 74 75my %DESC = ( 76 77 # numericdef 78 "decimal_point" => "decimal_point", 79 "thousands_sep" => "thousands_sep", 80 "grouping" => "grouping", 81 82 # monetdef 83 "int_curr_symbol" => "int_curr_symbol (last character always " . 84 "SPACE)", 85 "currency_symbol" => "currency_symbol", 86 "mon_decimal_point" => "mon_decimal_point", 87 "mon_thousands_sep" => "mon_thousands_sep", 88 "mon_grouping" => "mon_grouping", 89 "positive_sign" => "positive_sign", 90 "negative_sign" => "negative_sign", 91 "int_frac_digits" => "int_frac_digits", 92 "frac_digits" => "frac_digits", 93 "p_cs_precedes" => "p_cs_precedes", 94 "p_sep_by_space" => "p_sep_by_space", 95 "n_cs_precedes" => "n_cs_precedes", 96 "n_sep_by_space" => "n_sep_by_space", 97 "p_sign_posn" => "p_sign_posn", 98 "n_sign_posn" => "n_sign_posn", 99 100 # msgdef 101 "yesexpr" => "yesexpr", 102 "noexpr" => "noexpr", 103 "yesstr" => "yesstr", 104 "nostr" => "nostr", 105 106 # timedef 107 "abmon" => "Short month names", 108 "mon" => "Long month names (as in a date)", 109 "abday" => "Short weekday names", 110 "day" => "Long weekday names", 111 "t_fmt" => "X_fmt", 112 "d_fmt" => "x_fmt", 113 "c_fmt" => "c_fmt", 114 "am_pm" => "AM/PM", 115 "d_t_fmt" => "date_fmt", 116 "altmon" => "Long month names (without case ending)", 117 "md_order" => "md_order", 118 "t_fmt_ampm" => "ampm_fmt", 119); 120 121if ($TYPE eq "colldef") { 122 transform_collation(); 123 make_makefile(); 124} 125 126if ($TYPE eq "ctypedef") { 127 transform_ctypes(); 128 make_makefile(); 129} 130 131if ($TYPE eq "numericdef") { 132 %keys = ( 133 "decimal_point" => "s", 134 "thousands_sep" => "s", 135 "grouping" => "ai", 136 ); 137 get_fields(); 138 print_fields(); 139 make_makefile(); 140} 141 142if ($TYPE eq "monetdef") { 143 %keys = ( 144 "int_curr_symbol" => "s", 145 "currency_symbol" => "s", 146 "mon_decimal_point" => "s", 147 "mon_thousands_sep" => "s", 148 "mon_grouping" => "ai", 149 "positive_sign" => "s", 150 "negative_sign" => "s", 151 "int_frac_digits" => "i", 152 "frac_digits" => "i", 153 "p_cs_precedes" => "i", 154 "p_sep_by_space" => "i", 155 "n_cs_precedes" => "i", 156 "n_sep_by_space" => "i", 157 "p_sign_posn" => "i", 158 "n_sign_posn" => "i" 159 ); 160 get_fields(); 161 print_fields(); 162 make_makefile(); 163} 164 165if ($TYPE eq "msgdef") { 166 %keys = ( 167 "yesexpr" => "s", 168 "noexpr" => "s", 169 "yesstr" => "s", 170 "nostr" => "s" 171 ); 172 get_fields(); 173 print_fields(); 174 make_makefile(); 175} 176 177if ($TYPE eq "timedef") { 178 %keys = ( 179 "abmon" => "<cbabmon<abmon<as", 180 "mon" => "as", 181 "abday" => "as", 182 "day" => "as", 183 "t_fmt" => "s", 184 "d_fmt" => "s", 185 "c_fmt" => "<cformat<d_t_fmt<s", 186 "am_pm" => "as", 187 "d_fmt" => "s", 188 "d_t_fmt" => "<dtformat<d_t_fmt<s", 189 "altmon" => "<altmon<mon<as", 190 "md_order" => "<mdorder<d_fmt<s", 191 "t_fmt_ampm" => "s", 192 ); 193 get_fields(); 194 print_fields(); 195 make_makefile(); 196} 197 198sub callback_cformat { 199 my $s = shift; 200 $s =~ s/ %Z//; 201 $s =~ s/ %z//; 202 return $s; 203}; 204 205sub callback_dtformat { 206 my $s = shift; 207 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 208 209 if ($nl eq 'ja_JP') { 210 $s =~ s/(> )(%H)/$1%A $2/; 211 } 212 return $s; 213}; 214 215sub callback_mdorder { 216 my $s = shift; 217 return undef if (!defined $s); 218 $s =~ s/[^dm]//g; 219 return $s; 220}; 221 222sub callback_altmon { 223 # if the language/country is known in %alternative months then 224 # return that, otherwise repeat mon 225 my $s = shift; 226 227 if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) { 228 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}}); 229 my @cleaned; 230 foreach (@altnames) 231 { 232 $_ =~ s/^\s+//; 233 $_ =~ s/\s+$//; 234 push @cleaned, $_; 235 } 236 return join(";",@cleaned); 237 } 238 239 return $s; 240} 241 242sub callback_abmon { 243 # for specified CJK locales, pad result with a space to enable 244 # columns to line up (style established in FreeBSD in 2001) 245 my $s = shift; 246 my $nl = $callback{data}{l} . "_" . $callback{data}{c}; 247 248 if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' || 249 $nl eq 'zh_HK' || $nl eq 'zh_TW') { 250 my @monthnames = split(";", $s); 251 my @cleaned; 252 foreach (@monthnames) 253 { 254 if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ || 255 ($_ =~ /^"<one>/ && $_ !~ /^"<one>(<zero>|<one>|<two>)/)) 256 { 257 $_ =~ s/^"/"<space>/; 258 } 259 push @cleaned, $_; 260 } 261 return join(";",@cleaned); 262 } 263 return $s; 264} 265 266############################ 267 268sub get_unidata { 269 my $directory = shift; 270 271 open(FIN, "$directory/UnicodeData.txt") 272 or die("Cannot open $directory/UnicodeData.txt");; 273 my @lines = <FIN>; 274 chomp(@lines); 275 close(FIN); 276 277 foreach my $l (@lines) { 278 my @a = split(/;/, $l); 279 280 $ucd{code2name}{"$a[0]"} = $a[1]; # Unicode name 281 $ucd{name2code}{"$a[1]"} = $a[0]; # Unicode code 282 } 283} 284 285sub get_utf8map { 286 my $file = shift; 287 288 open(FIN, $file); 289 my @lines = <FIN>; 290 close(FIN); 291 chomp(@lines); 292 293 my $prev_k = undef; 294 my $prev_v = ""; 295 my $incharmap = 0; 296 foreach my $l (@lines) { 297 $l =~ s/\r//; 298 next if ($l =~ /^\#/); 299 next if ($l eq ""); 300 301 if ($l eq "CHARMAP") { 302 $incharmap = 1; 303 next; 304 } 305 306 next if (!$incharmap); 307 last if ($l eq "END CHARMAP"); 308 309 $l =~ /^<([^\s]+)>\s+(.*)/; 310 my $k = $1; 311 my $v = $2; 312 $k =~ s/_/ /g; # unicode char string 313 $v =~ s/\\x//g; # UTF-8 char code 314 $utf8map{$k} = $v; 315 316 $utf8aliases{$k} = $prev_k if ($prev_v eq $v); 317 318 $prev_v = $v; 319 $prev_k = $k; 320 } 321} 322 323sub get_encodings { 324 my $dir = shift; 325 foreach my $e (sort(keys(%encodings))) { 326 if (!open(FIN, "$dir/$e.TXT")) { 327 print "Cannot open charmap for $e\n"; 328 next; 329 330 } 331 $encodings{$e} = 1; 332 my @lines = <FIN>; 333 close(FIN); 334 chomp(@lines); 335 foreach my $l (@lines) { 336 $l =~ s/\r//; 337 next if ($l =~ /^\#/); 338 next if ($l eq ""); 339 340 my @a = split(" ", $l); 341 next if ($#a < 1); 342 $a[0] =~ s/^0[xX]//; # local char code 343 $a[1] =~ s/^0[xX]//; # unicode char code 344 $convertors{$e}{uc($a[1])} = uc($a[0]); 345 } 346 } 347} 348 349sub get_languages { 350 my %data = get_xmldata($ETCDIR); 351 %languages = %{$data{L}}; 352 %translations = %{$data{T}}; 353 %alternativemonths = %{$data{AM}}; 354 %encodings = %{$data{E}}; 355 356 return if (!defined $doonly); 357 358 my @a = split(/_/, $doonly); 359 if ($#a == 1) { 360 $filter[0] = $a[0]; 361 $filter[1] = "x"; 362 $filter[2] = $a[1]; 363 } elsif ($#a == 2) { 364 $filter[0] = $a[0]; 365 $filter[1] = $a[1]; 366 $filter[2] = $a[2]; 367 } 368 369 print Dumper(@filter); 370 return; 371} 372 373sub transform_ctypes { 374 foreach my $l (sort keys(%languages)) { 375 foreach my $f (sort keys(%{$languages{$l}})) { 376 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 377 next if ($#filter == 2 && ($filter[0] ne $l 378 || $filter[1] ne $f || $filter[2] ne $c)); 379 next if (defined $languages{$l}{$f}{definitions} 380 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 381 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 382 my $file; 383 $file = $l . "_"; 384 $file .= $f . "_" if ($f ne "x"); 385 $file .= $c; 386 my $actfile = $file; 387 388 my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src"; 389 if (! -f $filename) { 390 print STDERR "Cannot open $filename\n"; 391 next; 392 } 393 open(FIN, "$filename"); 394 print "Reading from $filename for ${l}_${f}_${c}\n"; 395 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 396 my @lines; 397 my $shex; 398 my $uhex; 399 while (<FIN>) { 400 push @lines, $_; 401 } 402 close(FIN); 403 $shex = sha1_hex(join("\n", @lines)); 404 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; 405 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; 406 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); 407 print FOUT @lines; 408 close(FOUT); 409 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 410 next if ($enc eq $DEFENCODING); 411 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 412 if (! -f $filename) { 413 print STDERR "Cannot open $filename\n"; 414 next; 415 } 416 @lines = (); 417 open(FIN, "$filename"); 418 while (<FIN>) { 419 if ((/^comment_char\s/) || (/^escape_char\s/)){ 420 push @lines, $_; 421 } 422 if (/^LC_CTYPE/../^END LC_CTYPE/) { 423 push @lines, $_; 424 } 425 } 426 close(FIN); 427 $uhex = sha1_hex(join("\n", @lines) . $enc); 428 $languages{$l}{$f}{data}{$c}{$enc} = $uhex; 429 $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1; 430 open(FOUT, ">$TYPE.draft/$actfile.$enc.src"); 431 print FOUT <<EOF; 432# Warning: Do not edit. This file is automatically extracted from the 433# tools in /usr/src/tools/tools/locale. The data is obtained from the 434# CLDR project, obtained from http://cldr.unicode.org/ 435# ----------------------------------------------------------------------------- 436EOF 437 print FOUT @lines; 438 close(FOUT); 439 } 440 } 441 } 442 } 443} 444 445 446sub transform_collation { 447 foreach my $l (sort keys(%languages)) { 448 foreach my $f (sort keys(%{$languages{$l}})) { 449 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 450 next if ($#filter == 2 && ($filter[0] ne $l 451 || $filter[1] ne $f || $filter[2] ne $c)); 452 next if (defined $languages{$l}{$f}{definitions} 453 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 454 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 455 my $file; 456 $file = $l . "_"; 457 $file .= $f . "_" if ($f ne "x"); 458 $file .= $c; 459 my $actfile = $file; 460 461 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 462 $filename = "$ETCDIR/$file.$DEFENCODING.src" 463 if (! -f $filename); 464 if (! -f $filename 465 && defined $languages{$l}{$f}{fallback}) { 466 $file = $languages{$l}{$f}{fallback}; 467 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 468 } 469 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src" 470 if (! -f $filename); 471 if (! -f $filename) { 472 print STDERR 473 "Cannot open $file.$DEFENCODING.src or fallback\n"; 474 next; 475 } 476 open(FIN, "$filename"); 477 print "Reading from $filename for ${l}_${f}_${c}\n"; 478 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 479 my @lines; 480 my $shex; 481 while (<FIN>) { 482 if ((/^comment_char\s/) || (/^escape_char\s/)){ 483 push @lines, $_; 484 } 485 if (/^LC_COLLATE/../^END LC_COLLATE/) { 486 $_ =~ s/[ ]+/ /g; 487 push @lines, $_; 488 } 489 } 490 close(FIN); 491 $shex = sha1_hex(join("\n", @lines)); 492 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; 493 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; 494 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); 495 print FOUT <<EOF; 496# Warning: Do not edit. This file is automatically extracted from the 497# tools in /usr/src/tools/tools/locale. The data is obtained from the 498# CLDR project, obtained from http://cldr.unicode.org/ 499# ----------------------------------------------------------------------------- 500EOF 501 print FOUT @lines; 502 close(FOUT); 503 504 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 505 next if ($enc eq $DEFENCODING); 506 copy ("$TYPE.draft/$actfile.$DEFENCODING.src", 507 "$TYPE.draft/$actfile.$enc.src"); 508 $languages{$l}{$f}{data}{$c}{$enc} = $shex; 509 $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1; 510 } 511 } 512 } 513 } 514} 515 516sub get_fields { 517 foreach my $l (sort keys(%languages)) { 518 foreach my $f (sort keys(%{$languages{$l}})) { 519 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 520 next if ($#filter == 2 && ($filter[0] ne $l 521 || $filter[1] ne $f || $filter[2] ne $c)); 522 next if (defined $languages{$l}{$f}{definitions} 523 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 524 525 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread 526 my $file; 527 $file = $l . "_"; 528 $file .= $f . "_" if ($f ne "x"); 529 $file .= $c; 530 531 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 532 $filename = "$ETCDIR/$file.$DEFENCODING.src" 533 if (! -f $filename); 534 if (! -f $filename 535 && defined $languages{$l}{$f}{fallback}) { 536 $file = $languages{$l}{$f}{fallback}; 537 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; 538 } 539 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src" 540 if (! -f $filename); 541 if (! -f $filename) { 542 print STDERR 543 "Cannot open $file.$DEFENCODING.src or fallback\n"; 544 next; 545 } 546 open(FIN, "$filename"); 547 print "Reading from $filename for ${l}_${f}_${c}\n"; 548 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read 549 my @lines = <FIN>; 550 chomp(@lines); 551 close(FIN); 552 my $continue = 0; 553 foreach my $k (keys(%keys)) { 554 foreach my $line (@lines) { 555 $line =~ s/\r//; 556 next if (!$continue && $line !~ /^$k\s/); 557 if ($continue) { 558 $line =~ s/^\s+//; 559 } else { 560 $line =~ s/^$k\s+//; 561 } 562 563 $values{$l}{$c}{$k} = "" 564 if (!defined $values{$l}{$c}{$k}); 565 566 $continue = ($line =~ /\/$/); 567 $line =~ s/\/$// if ($continue); 568 569 while ($line =~ /_/) { 570 $line =~ 571 s/\<([^>_]+)_([^>]+)\>/<$1 $2>/; 572 } 573 die "_ in data - $line" if ($line =~ /_/); 574 $values{$l}{$c}{$k} .= $line; 575 576 last if (!$continue); 577 } 578 } 579 } 580 } 581 } 582} 583 584sub decodecldr { 585 my $e = shift; 586 my $s = shift; 587 588 my $v = undef; 589 590 if ($e eq "UTF-8") { 591 # 592 # Conversion to UTF-8 can be done from the Unicode name to 593 # the UTF-8 character code. 594 # 595 $v = $utf8map{$s}; 596 die "Cannot convert $s in $e (charmap)" if (!defined $v); 597 } else { 598 # 599 # Conversion to these encodings can be done from the Unicode 600 # name to Unicode code to the encodings code. 601 # 602 my $ucc = undef; 603 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s}); 604 $ucc = $ucd{name2code}{$utf8aliases{$s}} 605 if (!defined $ucc 606 && $utf8aliases{$s} 607 && defined $ucd{name2code}{$utf8aliases{$s}}); 608 609 if (!defined $ucc) { 610 if (defined $translations{$e}{$s}{hex}) { 611 $v = $translations{$e}{$s}{hex}; 612 $ucc = 0; 613 } elsif (defined $translations{$e}{$s}{ucc}) { 614 $ucc = $translations{$e}{$s}{ucc}; 615 } 616 } 617 618 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc); 619 $v = $convertors{$e}{$ucc} if (!defined $v); 620 621 $v = $translations{$e}{$s}{hex} 622 if (!defined $v && defined $translations{$e}{$s}{hex}); 623 624 if (!defined $v && defined $translations{$e}{$s}{unicode}) { 625 my $ucn = $translations{$e}{$s}{unicode}; 626 $ucc = $ucd{name2code}{$ucn} 627 if (defined $ucd{name2code}{$ucn}); 628 $ucc = $ucd{name2code}{$utf8aliases{$ucn}} 629 if (!defined $ucc 630 && defined $ucd{name2code}{$utf8aliases{$ucn}}); 631 $v = $convertors{$e}{$ucc}; 632 } 633 634 die "Cannot convert $s in $e (charmap)" if (!defined $v); 635 } 636 637 return pack("C", hex($v)) if (length($v) == 2); 638 return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2))) 639 if (length($v) == 4); 640 return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)), 641 hex(substr($v, 4, 2))) if (length($v) == 6); 642 print STDERR "Cannot convert $e $s\n"; 643 return "length = " . length($v); 644 645} 646 647sub translate { 648 my $enc = shift; 649 my $v = shift; 650 651 return $translations{$enc}{$v} if (defined $translations{$enc}{$v}); 652 return undef; 653} 654 655sub print_fields { 656 foreach my $l (sort keys(%languages)) { 657 foreach my $f (sort keys(%{$languages{$l}})) { 658 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 659 next if ($#filter == 2 && ($filter[0] ne $l 660 || $filter[1] ne $f || $filter[2] ne $c)); 661 next if (defined $languages{$l}{$f}{definitions} 662 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 663 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 664 if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { 665 print "Skipping ${l}_" . 666 ($f eq "x" ? "" : "${f}_") . 667 "${c} - not read\n"; 668 next; 669 } 670 my $file = $l; 671 $file .= "_" . $f if ($f ne "x"); 672 $file .= "_" . $c; 673 print "Writing to $file in $enc\n"; 674 675 if ($enc ne $DEFENCODING && 676 !defined $convertors{$enc}) { 677 print "Failed! Cannot convert to $enc.\n"; 678 next; 679 }; 680 681 open(FOUT, ">$TYPE.draft/$file.$enc.new"); 682 my $okay = 1; 683 my $output = ""; 684 print FOUT <<EOF; 685# Warning: Do not edit. This file is automatically generated from the 686# tools in /usr/src/tools/tools/locale. The data is obtained from the 687# CLDR project, obtained from http://cldr.unicode.org/ 688# ----------------------------------------------------------------------------- 689EOF 690 foreach my $k (keys(%keys)) { 691 my $f = $keys{$k}; 692 693 die("Unknown $k in \%DESC") 694 if (!defined $DESC{$k}); 695 696 $output .= "#\n# $DESC{$k}\n"; 697 698 # Replace one row with another 699 if ($f =~ /^>/) { 700 $k = substr($f, 1); 701 $f = $keys{$k}; 702 } 703 704 # Callback function 705 if ($f =~ /^\</) { 706 $callback{data}{c} = $c; 707 $callback{data}{k} = $k; 708 $callback{data}{l} = $l; 709 $callback{data}{e} = $enc; 710 my @a = split(/\</, substr($f, 1)); 711 my $rv = 712 &{$callback{$a[0]}}($values{$l}{$c}{$a[1]}); 713 $values{$l}{$c}{$k} = $rv; 714 $f = $a[2]; 715 $callback{data} = (); 716 } 717 718 my $v = $values{$l}{$c}{$k}; 719 $v = "undef" if (!defined $v); 720 721 if ($f eq "i") { 722 $output .= "$v\n"; 723 next; 724 } 725 if ($f eq "ai") { 726 $output .= "$v\n"; 727 next; 728 } 729 if ($f eq "s") { 730 $v =~ s/^"//; 731 $v =~ s/"$//; 732 my $cm = ""; 733 while ($v =~ /^(.*?)<(.*?)>(.*)/) { 734 my $p1 = $1; 735 $cm = $2; 736 my $p3 = $3; 737 738 my $rv = decodecldr($enc, $cm); 739# $rv = translate($enc, $cm) 740# if (!defined $rv); 741 if (!defined $rv) { 742 print STDERR 743"Could not convert $k ($cm) from $DEFENCODING to $enc\n"; 744 $okay = 0; 745 next; 746 } 747 748 $v = $p1 . $rv . $p3; 749 } 750 $output .= "$v\n"; 751 next; 752 } 753 if ($f eq "as") { 754 foreach my $v (split(/;/, $v)) { 755 $v =~ s/^"//; 756 $v =~ s/"$//; 757 my $cm = ""; 758 while ($v =~ /^(.*?)<(.*?)>(.*)/) { 759 my $p1 = $1; 760 $cm = $2; 761 my $p3 = $3; 762 763 my $rv = 764 decodecldr($enc, 765 $cm); 766# $rv = translate($enc, 767# $cm) 768# if (!defined $rv); 769 if (!defined $rv) { 770 print STDERR 771"Could not convert $k ($cm) from $DEFENCODING to $enc\n"; 772 $okay = 0; 773 next; 774 } 775 776 $v = $1 . $rv . $3; 777 } 778 $output .= "$v\n"; 779 } 780 next; 781 } 782 783 die("$k is '$f'"); 784 785 } 786 787 $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output); 788 $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1; 789 print FOUT "$output# EOF\n"; 790 close(FOUT); 791 792 if ($okay) { 793 rename("$TYPE.draft/$file.$enc.new", 794 "$TYPE.draft/$file.$enc.src"); 795 } else { 796 rename("$TYPE.draft/$file.$enc.new", 797 "$TYPE.draft/$file.$enc.failed"); 798 } 799 } 800 } 801 } 802 } 803} 804 805sub make_makefile { 806 return if ($#filter > -1); 807 print "Creating Makefile for $TYPE\n"; 808 my $SRCOUT; 809 my $SRCOUT2; 810 my $SRCOUT3 = ""; 811 my $MAPLOC; 812 if ($TYPE eq "colldef") { 813 $SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" . 814 "\t-f \${MAPLOC}/map.UTF-8 " . 815 "\${.OBJDIR}/\${.IMPSRC:T:R}"; 816 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . 817 "locale/etc/final-maps\n"; 818 $SRCOUT2 = "LC_COLLATE"; 819 } 820 elsif ($TYPE eq "ctypedef") { 821 $SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" . 822 "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:E} " . 823 "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " . 824 " || true"; 825 $SRCOUT2 = "LC_CTYPE"; 826 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . 827 "locale/etc/final-maps\n"; 828 $SRCOUT3 = "## SYMPAIRS\n\n" . 829 ".for s t in \${SYMPAIRS}\n" . 830 "\${t:S/src\$/LC_CTYPE/}: " . 831 "\$s\n" . 832 "\tlocaledef -D -U -c -w \${MAPLOC}/widths.txt \\\n" . 833 "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " . 834 "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " . 835 " || true\n" . 836 ".endfor\n\n"; 837 } 838 else { 839 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}"; 840 $SRCOUT2 = "out"; 841 $MAPLOC = ""; 842 } 843 open(FOUT, ">$TYPE.draft/Makefile"); 844 print FOUT <<EOF; 845# \$FreeBSD\$ 846# Warning: Do not edit. This file is automatically generated from the 847# tools in /usr/src/tools/tools/locale. 848 849LOCALEDIR= \${SHAREDIR}/locale 850FILESNAME= $FILESNAMES{$TYPE} 851.SUFFIXES: .src .${SRCOUT2} 852${MAPLOC} 853.src.${SRCOUT2}: 854 $SRCOUT 855 856## PLACEHOLDER 857 858EOF 859 860 foreach my $hash (keys(%hashtable)) { 861 # For colldef, weight LOCALES to UTF-8 862 # Sort as upper-case and reverse to achieve it 863 # Make en_US, ru_RU, and ca_AD preferred 864 my @files; 865 if ($TYPE eq "colldef") { 866 @files = sort { 867 if ($a eq 'en_x_US.UTF-8' || 868 $a eq 'ru_x_RU.UTF-8' || 869 $a eq 'ca_x_AD.UTF-8') { return -1; } 870 elsif ($b eq 'en_x_US.UTF-8' || 871 $b eq 'ru_x_RU.UTF-8' || 872 $b eq 'ca_x_AD.UTF-8') { return 1; } 873 else { return uc($b) cmp uc($a); } 874 } keys(%{$hashtable{$hash}}); 875 } elsif ($TYPE eq "ctypedef") { 876 @files = sort { 877 if ($a eq 'en_x_US.UTF-8') { return -1; } 878 elsif ($b eq 'en_x_US.UTF-8') { return 1; } 879 if ($a =~ /^en_x_US/) { return -1; } 880 elsif ($b =~ /^en_x_US/) { return 1; } 881 882 if ($a =~ /^en_x_GB.ISO8859-15/ || 883 $a =~ /^ru_x_RU/) { return -1; } 884 elsif ($b =~ /^en_x_GB.ISO8859-15/ || 885 $b =~ /ru_x_RU/) { return 1; } 886 else { return uc($b) cmp uc($a); } 887 888 } keys(%{$hashtable{$hash}}); 889 } else { 890 @files = sort { 891 if ($a =~ /_Comm_/ || 892 $b eq 'en_x_US.UTF-8') { return 1; } 893 elsif ($b =~ /_Comm_/ || 894 $a eq 'en_x_US.UTF-8') { return -1; } 895 else { return uc($b) cmp uc($a); } 896 } keys(%{$hashtable{$hash}}); 897 } 898 if ($#files > 0) { 899 my $link = shift(@files); 900 $link =~ s/_x_/_/; # strip family if none there 901 foreach my $file (@files) { 902 my @a = split(/_/, $file); 903 my @b = split(/\./, $a[-1]); 904 $file =~ s/_x_/_/; 905 print FOUT "SAME+=\t\t$link $file\n"; 906 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]}); 907 } 908 } 909 } 910 911 foreach my $l (sort keys(%languages)) { 912 foreach my $f (sort keys(%{$languages{$l}})) { 913 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { 914 next if ($#filter == 2 && ($filter[0] ne $l 915 || $filter[1] ne $f || $filter[2] ne $c)); 916 next if (defined $languages{$l}{$f}{definitions} 917 && $languages{$l}{$f}{definitions} !~ /$TYPE/); 918 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING} 919 && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { 920 print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") . 921 "${c} - not read\n"; 922 next; 923 } 924 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 925 my $file = $l . "_"; 926 $file .= $f . "_" if ($f ne "x"); 927 $file .= $c; 928 next if (!defined $languages{$l}{$f}{data}{$c}{$e}); 929 print FOUT "LOCALES+=\t$file.$e\n"; 930 } 931 932 if (defined $languages{$l}{$f}{nc_link}) { 933 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { 934 my $file = $l . "_"; 935 $file .= $f . "_" if ($f ne "x"); 936 $file .= $c; 937 print FOUT "SAME+=\t\t$file.$e $languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n"; 938 } 939 } 940 941 if (defined $languages{$l}{$f}{e_link}) { 942 foreach my $el (split(" ", $languages{$l}{$f}{e_link})) { 943 my @a = split(/:/, $el); 944 my $file = $l . "_"; 945 $file .= $f . "_" if ($f ne "x"); 946 $file .= $c; 947 print FOUT "SAME+=\t\t$file.$a[0] $file.$a[1]\t# legacy (same charset)\n"; 948 } 949 } 950 951 } 952 } 953 } 954 955 print FOUT <<EOF; 956 957FILES= \${LOCALES:S/\$/.${SRCOUT2}/} 958CLEANFILES= \${FILES} 959 960.for f t in \${SAME} 961SYMLINKS+= ../\$f/\${FILESNAME} \\ 962 \${LOCALEDIR}/\$t/\${FILESNAME} 963.endfor 964 965.for f in \${LOCALES} 966FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f} 967.endfor 968 969${SRCOUT3}.include <bsd.prog.mk> 970EOF 971 972 close(FOUT); 973} 974