xref: /NextBSD/tools/tools/locale/tools/cldr2def.pl (revision 4557fabb34e865d7f40be64b39c9e34fa41dbb60)
1#!/usr/local/bin/perl -wC
2# $FreeBSD$
3
4use strict;
5use File::Copy;
6use XML::Parser;
7use Tie::IxHash;
8use Data::Dumper;
9use Getopt::Long;
10use Digest::SHA qw(sha1_hex);
11require "charmaps.pm";
12
13
14if ($#ARGV < 2) {
15	print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n";
16	exit(1);
17}
18
19my $DEFENCODING = "UTF-8";
20my @filter = ();
21
22my $CLDRDIR = undef;
23my $UNIDATADIR = undef;
24my $ETCDIR = undef;
25my $TYPE = undef;
26my $doonly = undef;
27
28my $result = GetOptions (
29		"cldr=s"	=> \$CLDRDIR,
30		"unidata=s"	=> \$UNIDATADIR,
31		"etc=s"		=> \$ETCDIR,
32		"type=s"	=> \$TYPE,
33		"lc=s"		=> \$doonly
34	    );
35
36my %convertors = ();
37
38my %ucd = ();
39my %values = ();
40my %hashtable = ();
41my %languages = ();
42my %translations = ();
43my %encodings = ();
44my %alternativemonths = ();
45get_languages();
46
47my %utf8map = ();
48my %utf8aliases = ();
49get_unidata($UNIDATADIR);
50get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm");
51get_encodings("$ETCDIR/charmaps");
52
53my %keys = ();
54tie(%keys, "Tie::IxHash");
55tie(%hashtable, "Tie::IxHash");
56
57my %FILESNAMES = (
58	"monetdef"	=> "LC_MONETARY",
59	"timedef"	=> "LC_TIME",
60	"msgdef"	=> "LC_MESSAGES",
61	"numericdef"	=> "LC_NUMERIC",
62	"colldef"	=> "LC_COLLATE",
63	"ctypedef"	=> "LC_CTYPE"
64);
65
66my %callback = (
67	mdorder => \&callback_mdorder,
68	altmon => \&callback_altmon,
69	cformat => \&callback_cformat,
70	dtformat => \&callback_dtformat,
71	cbabmon => \&callback_abmon,
72	data => undef,
73);
74
75my %DESC = (
76
77	# numericdef
78	"decimal_point"	=> "decimal_point",
79	"thousands_sep"	=> "thousands_sep",
80	"grouping"	=> "grouping",
81
82	# monetdef
83	"int_curr_symbol"	=> "int_curr_symbol (last character always " .
84				   "SPACE)",
85	"currency_symbol"	=> "currency_symbol",
86	"mon_decimal_point"	=> "mon_decimal_point",
87	"mon_thousands_sep"	=> "mon_thousands_sep",
88	"mon_grouping"		=> "mon_grouping",
89	"positive_sign"		=> "positive_sign",
90	"negative_sign"		=> "negative_sign",
91	"int_frac_digits"	=> "int_frac_digits",
92	"frac_digits"		=> "frac_digits",
93	"p_cs_precedes"		=> "p_cs_precedes",
94	"p_sep_by_space"	=> "p_sep_by_space",
95	"n_cs_precedes"		=> "n_cs_precedes",
96	"n_sep_by_space"	=> "n_sep_by_space",
97	"p_sign_posn"		=> "p_sign_posn",
98	"n_sign_posn"		=> "n_sign_posn",
99
100	# msgdef
101	"yesexpr"	=> "yesexpr",
102	"noexpr"	=> "noexpr",
103	"yesstr"	=> "yesstr",
104	"nostr"		=> "nostr",
105
106	# timedef
107	"abmon"		=> "Short month names",
108	"mon"		=> "Long month names (as in a date)",
109	"abday"		=> "Short weekday names",
110	"day"		=> "Long weekday names",
111	"t_fmt"		=> "X_fmt",
112	"d_fmt"		=> "x_fmt",
113	"c_fmt"		=> "c_fmt",
114	"am_pm"		=> "AM/PM",
115	"d_t_fmt"	=> "date_fmt",
116	"altmon"	=> "Long month names (without case ending)",
117	"md_order"	=> "md_order",
118	"t_fmt_ampm"	=> "ampm_fmt",
119);
120
121if ($TYPE eq "colldef") {
122	transform_collation();
123	make_makefile();
124}
125
126if ($TYPE eq "ctypedef") {
127	transform_ctypes();
128	make_makefile();
129}
130
131if ($TYPE eq "numericdef") {
132	%keys = (
133	    "decimal_point"	=> "s",
134	    "thousands_sep"	=> "s",
135	    "grouping"		=> "ai",
136	);
137	get_fields();
138	print_fields();
139	make_makefile();
140}
141
142if ($TYPE eq "monetdef") {
143	%keys = (
144	    "int_curr_symbol"	=> "s",
145	    "currency_symbol"	=> "s",
146	    "mon_decimal_point"	=> "s",
147	    "mon_thousands_sep"	=> "s",
148	    "mon_grouping"	=> "ai",
149	    "positive_sign"	=> "s",
150	    "negative_sign"	=> "s",
151	    "int_frac_digits"	=> "i",
152	    "frac_digits"	=> "i",
153	    "p_cs_precedes"	=> "i",
154	    "p_sep_by_space"	=> "i",
155	    "n_cs_precedes"	=> "i",
156	    "n_sep_by_space"	=> "i",
157	    "p_sign_posn"	=> "i",
158	    "n_sign_posn"	=> "i"
159	);
160	get_fields();
161	print_fields();
162	make_makefile();
163}
164
165if ($TYPE eq "msgdef") {
166	%keys = (
167	    "yesexpr"		=> "s",
168	    "noexpr"		=> "s",
169	    "yesstr"		=> "s",
170	    "nostr"		=> "s"
171	);
172	get_fields();
173	print_fields();
174	make_makefile();
175}
176
177if ($TYPE eq "timedef") {
178	%keys = (
179	    "abmon"		=> "<cbabmon<abmon<as",
180	    "mon"		=> "as",
181	    "abday"		=> "as",
182	    "day"		=> "as",
183	    "t_fmt"		=> "s",
184	    "d_fmt"		=> "s",
185	    "c_fmt"		=> "<cformat<d_t_fmt<s",
186	    "am_pm"		=> "as",
187	    "d_fmt"		=> "s",
188	    "d_t_fmt"		=> "<dtformat<d_t_fmt<s",
189	    "altmon"		=> "<altmon<mon<as",
190	    "md_order"		=> "<mdorder<d_fmt<s",
191	    "t_fmt_ampm"	=> "s",
192	);
193	get_fields();
194	print_fields();
195	make_makefile();
196}
197
198sub callback_cformat {
199 	my $s = shift;
200 	$s =~ s/ %Z//;
201 	$s =~ s/ %z//;
202 	return $s;
203};
204
205sub callback_dtformat {
206 	my $s = shift;
207	my $nl = $callback{data}{l} . "_" . $callback{data}{c};
208
209	if ($nl eq 'ja_JP') {
210	    $s =~ s/(> )(%H)/$1%A $2/;
211	}
212	return $s;
213};
214
215sub callback_mdorder {
216	my $s = shift;
217	return undef if (!defined $s);
218	$s =~ s/[^dm]//g;
219	return $s;
220};
221
222sub callback_altmon {
223	# if the language/country is known in %alternative months then
224	# return that, otherwise repeat mon
225	my $s = shift;
226
227	if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) {
228		my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}});
229		my @cleaned;
230		foreach (@altnames)
231		{
232			$_ =~ s/^\s+//;
233			$_ =~ s/\s+$//;
234			push @cleaned, $_;
235		}
236		return join(";",@cleaned);
237	}
238
239	return $s;
240}
241
242sub callback_abmon {
243	# for specified CJK locales, pad result with a space to enable
244	# columns to line up (style established in FreeBSD in 2001)
245	my $s = shift;
246	my $nl = $callback{data}{l} . "_" . $callback{data}{c};
247
248	if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' ||
249	    $nl eq 'zh_HK' || $nl eq 'zh_TW') {
250		my @monthnames = split(";", $s);
251		my @cleaned;
252		foreach (@monthnames)
253		{
254			if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ ||
255			   ($_ =~ /^"<one>/ && $_ !~ /^"<one>(<zero>|<one>|<two>)/))
256			{
257				$_ =~ s/^"/"<space>/;
258			}
259			push @cleaned, $_;
260		}
261		return join(";",@cleaned);
262	}
263	return $s;
264}
265
266############################
267
268sub get_unidata {
269	my $directory = shift;
270
271	open(FIN, "$directory/UnicodeData.txt")
272	    or die("Cannot open $directory/UnicodeData.txt");;
273	my @lines = <FIN>;
274	chomp(@lines);
275	close(FIN);
276
277	foreach my $l (@lines) {
278		my @a = split(/;/, $l);
279
280		$ucd{code2name}{"$a[0]"} = $a[1];	# Unicode name
281		$ucd{name2code}{"$a[1]"} = $a[0];	# Unicode code
282	}
283}
284
285sub get_utf8map {
286	my $file = shift;
287
288	open(FIN, $file);
289	my @lines = <FIN>;
290	close(FIN);
291	chomp(@lines);
292
293	my $prev_k = undef;
294	my $prev_v = "";
295	my $incharmap = 0;
296	foreach my $l (@lines) {
297		$l =~ s/\r//;
298		next if ($l =~ /^\#/);
299		next if ($l eq "");
300
301		if ($l eq "CHARMAP") {
302			$incharmap = 1;
303			next;
304		}
305
306		next if (!$incharmap);
307		last if ($l eq "END CHARMAP");
308
309		$l =~ /^<([^\s]+)>\s+(.*)/;
310		my $k = $1;
311		my $v = $2;
312		$k =~ s/_/ /g;		# unicode char string
313		$v =~ s/\\x//g;		# UTF-8 char code
314		$utf8map{$k} = $v;
315
316		$utf8aliases{$k} = $prev_k if ($prev_v eq $v);
317
318		$prev_v = $v;
319		$prev_k = $k;
320	}
321}
322
323sub get_encodings {
324	my $dir = shift;
325	foreach my $e (sort(keys(%encodings))) {
326		if (!open(FIN, "$dir/$e.TXT")) {
327			print "Cannot open charmap for $e\n";
328			next;
329
330		}
331		$encodings{$e} = 1;
332		my @lines = <FIN>;
333		close(FIN);
334		chomp(@lines);
335		foreach my $l (@lines) {
336			$l =~ s/\r//;
337			next if ($l =~ /^\#/);
338			next if ($l eq "");
339
340			my @a = split(" ", $l);
341			next if ($#a < 1);
342			$a[0] =~ s/^0[xX]//;	# local char code
343			$a[1] =~ s/^0[xX]//;	# unicode char code
344			$convertors{$e}{uc($a[1])} = uc($a[0]);
345		}
346	}
347}
348
349sub get_languages {
350	my %data = get_xmldata($ETCDIR);
351	%languages = %{$data{L}};
352	%translations = %{$data{T}};
353	%alternativemonths = %{$data{AM}};
354	%encodings = %{$data{E}};
355
356	return if (!defined $doonly);
357
358	my @a = split(/_/, $doonly);
359	if ($#a == 1) {
360		$filter[0] = $a[0];
361		$filter[1] = "x";
362		$filter[2] = $a[1];
363	} elsif ($#a == 2) {
364		$filter[0] = $a[0];
365		$filter[1] = $a[1];
366		$filter[2] = $a[2];
367	}
368
369	print Dumper(@filter);
370	return;
371}
372
373sub transform_ctypes {
374	foreach my $l (sort keys(%languages)) {
375	foreach my $f (sort keys(%{$languages{$l}})) {
376	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
377		next if ($#filter == 2 && ($filter[0] ne $l
378		    || $filter[1] ne $f || $filter[2] ne $c));
379		next if (defined $languages{$l}{$f}{definitions}
380		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
381		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
382		my $file;
383		$file = $l . "_";
384		$file .= $f . "_" if ($f ne "x");
385		$file .= $c;
386		my $actfile = $file;
387
388		my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src";
389		if (! -f $filename) {
390			print STDERR "Cannot open $filename\n";
391			next;
392		}
393		open(FIN, "$filename");
394		print "Reading from $filename for ${l}_${f}_${c}\n";
395		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
396		my @lines;
397		my $shex;
398		my $uhex;
399		while (<FIN>) {
400			push @lines, $_;
401		}
402		close(FIN);
403		$shex = sha1_hex(join("\n", @lines));
404		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
405		$hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
406		open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
407		print FOUT @lines;
408		close(FOUT);
409		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
410			next if ($enc eq $DEFENCODING);
411			$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
412			if (! -f $filename) {
413				print STDERR "Cannot open $filename\n";
414				next;
415			}
416			@lines = ();
417			open(FIN, "$filename");
418			while (<FIN>) {
419				if ((/^comment_char\s/) || (/^escape_char\s/)){
420					push @lines, $_;
421				}
422				if (/^LC_CTYPE/../^END LC_CTYPE/) {
423					push @lines, $_;
424				}
425			}
426			close(FIN);
427			$uhex = sha1_hex(join("\n", @lines) . $enc);
428			$languages{$l}{$f}{data}{$c}{$enc} = $uhex;
429			$hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
430			open(FOUT, ">$TYPE.draft/$actfile.$enc.src");
431			print FOUT <<EOF;
432# Warning: Do not edit. This file is automatically extracted from the
433# tools in /usr/src/tools/tools/locale. The data is obtained from the
434# CLDR project, obtained from http://cldr.unicode.org/
435# -----------------------------------------------------------------------------
436EOF
437			print FOUT @lines;
438			close(FOUT);
439		}
440	}
441	}
442	}
443}
444
445
446sub transform_collation {
447	foreach my $l (sort keys(%languages)) {
448	foreach my $f (sort keys(%{$languages{$l}})) {
449	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
450		next if ($#filter == 2 && ($filter[0] ne $l
451		    || $filter[1] ne $f || $filter[2] ne $c));
452		next if (defined $languages{$l}{$f}{definitions}
453		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
454		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
455		my $file;
456		$file = $l . "_";
457		$file .= $f . "_" if ($f ne "x");
458		$file .= $c;
459		my $actfile = $file;
460
461		my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
462		$filename = "$ETCDIR/$file.$DEFENCODING.src"
463		    if (! -f $filename);
464		if (! -f $filename
465		 && defined $languages{$l}{$f}{fallback}) {
466			$file = $languages{$l}{$f}{fallback};
467			$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
468		}
469		$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
470		    if (! -f $filename);
471		if (! -f $filename) {
472			print STDERR
473			    "Cannot open $file.$DEFENCODING.src or fallback\n";
474			next;
475		}
476		open(FIN, "$filename");
477		print "Reading from $filename for ${l}_${f}_${c}\n";
478		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
479		my @lines;
480		my $shex;
481		while (<FIN>) {
482			if ((/^comment_char\s/) || (/^escape_char\s/)){
483				push @lines, $_;
484			}
485			if (/^LC_COLLATE/../^END LC_COLLATE/) {
486				$_ =~ s/[ ]+/ /g;
487				push @lines, $_;
488			}
489		}
490		close(FIN);
491		$shex = sha1_hex(join("\n", @lines));
492		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
493		$hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
494		open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
495		print FOUT <<EOF;
496# Warning: Do not edit. This file is automatically extracted from the
497# tools in /usr/src/tools/tools/locale. The data is obtained from the
498# CLDR project, obtained from http://cldr.unicode.org/
499# -----------------------------------------------------------------------------
500EOF
501		print FOUT @lines;
502		close(FOUT);
503
504		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
505			next if ($enc eq $DEFENCODING);
506			copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
507			      "$TYPE.draft/$actfile.$enc.src");
508			$languages{$l}{$f}{data}{$c}{$enc} = $shex;
509			$hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1;
510		}
511	}
512	}
513	}
514}
515
516sub get_fields {
517	foreach my $l (sort keys(%languages)) {
518	foreach my $f (sort keys(%{$languages{$l}})) {
519	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
520		next if ($#filter == 2 && ($filter[0] ne $l
521		    || $filter[1] ne $f || $filter[2] ne $c));
522		next if (defined $languages{$l}{$f}{definitions}
523		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
524
525		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0;	# unread
526		my $file;
527		$file = $l . "_";
528		$file .= $f . "_" if ($f ne "x");
529		$file .= $c;
530
531		my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
532		$filename = "$ETCDIR/$file.$DEFENCODING.src"
533		    if (! -f $filename);
534		if (! -f $filename
535		 && defined $languages{$l}{$f}{fallback}) {
536			$file = $languages{$l}{$f}{fallback};
537			$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
538		}
539		$filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
540		    if (! -f $filename);
541		if (! -f $filename) {
542			print STDERR
543			    "Cannot open $file.$DEFENCODING.src or fallback\n";
544			next;
545		}
546		open(FIN, "$filename");
547		print "Reading from $filename for ${l}_${f}_${c}\n";
548		$languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1;	# read
549		my @lines = <FIN>;
550		chomp(@lines);
551		close(FIN);
552		my $continue = 0;
553		foreach my $k (keys(%keys)) {
554			foreach my $line (@lines) {
555				$line =~ s/\r//;
556				next if (!$continue && $line !~ /^$k\s/);
557				if ($continue) {
558					$line =~ s/^\s+//;
559				} else {
560					$line =~ s/^$k\s+//;
561				}
562
563				$values{$l}{$c}{$k} = ""
564					if (!defined $values{$l}{$c}{$k});
565
566				$continue = ($line =~ /\/$/);
567				$line =~ s/\/$// if ($continue);
568
569				while ($line =~ /_/) {
570					$line =~
571					    s/\<([^>_]+)_([^>]+)\>/<$1 $2>/;
572				}
573				die "_ in data - $line" if ($line =~ /_/);
574				$values{$l}{$c}{$k} .= $line;
575
576				last if (!$continue);
577			}
578		}
579	}
580	}
581	}
582}
583
584sub decodecldr {
585	my $e = shift;
586	my $s = shift;
587
588	my $v = undef;
589
590	if ($e eq "UTF-8") {
591		#
592		# Conversion to UTF-8 can be done from the Unicode name to
593		# the UTF-8 character code.
594		#
595		$v = $utf8map{$s};
596		die "Cannot convert $s in $e (charmap)" if (!defined $v);
597	} else {
598		#
599		# Conversion to these encodings can be done from the Unicode
600		# name to Unicode code to the encodings code.
601		#
602		my $ucc = undef;
603		$ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s});
604		$ucc = $ucd{name2code}{$utf8aliases{$s}}
605			if (!defined $ucc
606			 && $utf8aliases{$s}
607			 && defined $ucd{name2code}{$utf8aliases{$s}});
608
609		if (!defined $ucc) {
610			if (defined $translations{$e}{$s}{hex}) {
611				$v = $translations{$e}{$s}{hex};
612				$ucc = 0;
613			} elsif (defined $translations{$e}{$s}{ucc}) {
614				$ucc = $translations{$e}{$s}{ucc};
615			}
616		}
617
618		die "Cannot convert $s in $e (ucd string)" if (!defined $ucc);
619		$v = $convertors{$e}{$ucc} if (!defined $v);
620
621		$v = $translations{$e}{$s}{hex}
622			if (!defined $v && defined $translations{$e}{$s}{hex});
623
624		if (!defined $v && defined $translations{$e}{$s}{unicode}) {
625			my $ucn = $translations{$e}{$s}{unicode};
626			$ucc = $ucd{name2code}{$ucn}
627				if (defined $ucd{name2code}{$ucn});
628			$ucc = $ucd{name2code}{$utf8aliases{$ucn}}
629				if (!defined $ucc
630				 && defined $ucd{name2code}{$utf8aliases{$ucn}});
631			$v = $convertors{$e}{$ucc};
632		}
633
634		die "Cannot convert $s in $e (charmap)" if (!defined $v);
635	}
636
637	return pack("C", hex($v)) if (length($v) == 2);
638	return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
639		if (length($v) == 4);
640	return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
641	    hex(substr($v, 4, 2))) if (length($v) == 6);
642	print STDERR "Cannot convert $e $s\n";
643	return "length = " . length($v);
644
645}
646
647sub translate {
648	my $enc = shift;
649	my $v = shift;
650
651	return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
652	return undef;
653}
654
655sub print_fields {
656	foreach my $l (sort keys(%languages)) {
657	foreach my $f (sort keys(%{$languages{$l}})) {
658	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
659		next if ($#filter == 2 && ($filter[0] ne $l
660		    || $filter[1] ne $f || $filter[2] ne $c));
661		next if (defined $languages{$l}{$f}{definitions}
662		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
663		foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
664			if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
665				print "Skipping ${l}_" .
666				    ($f eq "x" ? "" : "${f}_") .
667				    "${c} - not read\n";
668				next;
669			}
670			my $file = $l;
671			$file .= "_" . $f if ($f ne "x");
672			$file .= "_" . $c;
673			print "Writing to $file in $enc\n";
674
675			if ($enc ne $DEFENCODING &&
676			    !defined $convertors{$enc}) {
677				print "Failed! Cannot convert to $enc.\n";
678				next;
679			};
680
681			open(FOUT, ">$TYPE.draft/$file.$enc.new");
682			my $okay = 1;
683			my $output = "";
684			print FOUT <<EOF;
685# Warning: Do not edit. This file is automatically generated from the
686# tools in /usr/src/tools/tools/locale. The data is obtained from the
687# CLDR project, obtained from http://cldr.unicode.org/
688# -----------------------------------------------------------------------------
689EOF
690			foreach my $k (keys(%keys)) {
691				my $f = $keys{$k};
692
693				die("Unknown $k in \%DESC")
694					if (!defined $DESC{$k});
695
696				$output .= "#\n# $DESC{$k}\n";
697
698				# Replace one row with another
699				if ($f =~ /^>/) {
700					$k = substr($f, 1);
701					$f = $keys{$k};
702				}
703
704				# Callback function
705				if ($f =~ /^\</) {
706					$callback{data}{c} = $c;
707					$callback{data}{k} = $k;
708					$callback{data}{l} = $l;
709					$callback{data}{e} = $enc;
710					my @a = split(/\</, substr($f, 1));
711					my $rv =
712					    &{$callback{$a[0]}}($values{$l}{$c}{$a[1]});
713					$values{$l}{$c}{$k} = $rv;
714					$f = $a[2];
715					$callback{data} = ();
716				}
717
718				my $v = $values{$l}{$c}{$k};
719				$v = "undef" if (!defined $v);
720
721				if ($f eq "i") {
722					$output .= "$v\n";
723					next;
724				}
725				if ($f eq "ai") {
726					$output .= "$v\n";
727					next;
728				}
729				if ($f eq "s") {
730					$v =~ s/^"//;
731					$v =~ s/"$//;
732					my $cm = "";
733					while ($v =~ /^(.*?)<(.*?)>(.*)/) {
734						my $p1 = $1;
735						$cm = $2;
736						my $p3 = $3;
737
738						my $rv = decodecldr($enc, $cm);
739#						$rv = translate($enc, $cm)
740#							if (!defined $rv);
741						if (!defined $rv) {
742							print STDERR
743"Could not convert $k ($cm) from $DEFENCODING to $enc\n";
744							$okay = 0;
745							next;
746						}
747
748						$v = $p1 . $rv . $p3;
749					}
750					$output .= "$v\n";
751					next;
752				}
753				if ($f eq "as") {
754					foreach my $v (split(/;/, $v)) {
755						$v =~ s/^"//;
756						$v =~ s/"$//;
757						my $cm = "";
758						while ($v =~ /^(.*?)<(.*?)>(.*)/) {
759							my $p1 = $1;
760							$cm = $2;
761							my $p3 = $3;
762
763							my $rv =
764							    decodecldr($enc,
765								$cm);
766#							$rv = translate($enc,
767#							    $cm)
768#							    if (!defined $rv);
769							if (!defined $rv) {
770								print STDERR
771"Could not convert $k ($cm) from $DEFENCODING to $enc\n";
772								$okay = 0;
773								next;
774							}
775
776							$v = $1 . $rv . $3;
777						}
778						$output .= "$v\n";
779					}
780					next;
781				}
782
783				die("$k is '$f'");
784
785			}
786
787			$languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
788			$hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
789			print FOUT "$output# EOF\n";
790			close(FOUT);
791
792			if ($okay) {
793				rename("$TYPE.draft/$file.$enc.new",
794				    "$TYPE.draft/$file.$enc.src");
795			} else {
796				rename("$TYPE.draft/$file.$enc.new",
797				    "$TYPE.draft/$file.$enc.failed");
798			}
799		}
800	}
801	}
802	}
803}
804
805sub make_makefile {
806	return if ($#filter > -1);
807	print "Creating Makefile for $TYPE\n";
808	my $SRCOUT;
809	my $SRCOUT2;
810	my $SRCOUT3 = "";
811	my $MAPLOC;
812	if ($TYPE eq "colldef") {
813		$SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" .
814			"\t-f \${MAPLOC}/map.UTF-8 " .
815			"\${.OBJDIR}/\${.IMPSRC:T:R}";
816		$MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
817				"locale/etc/final-maps\n";
818		$SRCOUT2 = "LC_COLLATE";
819	}
820	elsif ($TYPE eq "ctypedef") {
821		$SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
822			"\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:E} " .
823			"\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " .
824			" || true";
825		$SRCOUT2 = "LC_CTYPE";
826		$MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
827				"locale/etc/final-maps\n";
828		$SRCOUT3 = "## SYMPAIRS\n\n" .
829			".for s t in \${SYMPAIRS}\n" .
830			"\${t:S/src\$/LC_CTYPE/}: " .
831			"\$s\n" .
832			"\tlocaledef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
833			"\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " .
834			"\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " .
835			" || true\n" .
836			".endfor\n\n";
837	}
838	else {
839		$SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
840		$SRCOUT2 = "out";
841		$MAPLOC = "";
842	}
843	open(FOUT, ">$TYPE.draft/Makefile");
844	print FOUT <<EOF;
845# \$FreeBSD\$
846# Warning: Do not edit. This file is automatically generated from the
847# tools in /usr/src/tools/tools/locale.
848
849LOCALEDIR=	\${SHAREDIR}/locale
850FILESNAME=	$FILESNAMES{$TYPE}
851.SUFFIXES:	.src .${SRCOUT2}
852${MAPLOC}
853.src.${SRCOUT2}:
854	$SRCOUT
855
856## PLACEHOLDER
857
858EOF
859
860	foreach my $hash (keys(%hashtable)) {
861		# For colldef, weight LOCALES to UTF-8
862		#     Sort as upper-case and reverse to achieve it
863		#     Make en_US, ru_RU, and ca_AD preferred
864		my @files;
865		if ($TYPE eq "colldef") {
866			@files = sort {
867				if ($a eq 'en_x_US.UTF-8' ||
868				    $a eq 'ru_x_RU.UTF-8' ||
869				    $a eq 'ca_x_AD.UTF-8') { return -1; }
870				elsif ($b eq 'en_x_US.UTF-8' ||
871				       $b eq 'ru_x_RU.UTF-8' ||
872				       $b eq 'ca_x_AD.UTF-8') { return 1; }
873				else { return uc($b) cmp uc($a); }
874				} keys(%{$hashtable{$hash}});
875		} elsif ($TYPE eq "ctypedef") {
876			@files = sort {
877				if ($a eq 'en_x_US.UTF-8') { return -1; }
878				elsif ($b eq 'en_x_US.UTF-8') { return 1; }
879				if ($a =~ /^en_x_US/) { return -1; }
880				elsif ($b =~ /^en_x_US/) { return 1; }
881
882				if ($a =~ /^en_x_GB.ISO8859-15/ ||
883				    $a =~ /^ru_x_RU/) { return -1; }
884				elsif ($b =~ /^en_x_GB.ISO8859-15/ ||
885				       $b =~ /ru_x_RU/) { return 1; }
886				else { return uc($b) cmp uc($a); }
887
888				} keys(%{$hashtable{$hash}});
889		} else {
890			@files = sort {
891				if ($a =~ /_Comm_/ ||
892				    $b eq 'en_x_US.UTF-8') { return 1; }
893				elsif ($b =~ /_Comm_/ ||
894				       $a eq 'en_x_US.UTF-8') { return -1; }
895				else { return uc($b) cmp uc($a); }
896				} keys(%{$hashtable{$hash}});
897		}
898		if ($#files > 0) {
899			my $link = shift(@files);
900			$link =~ s/_x_/_/;	# strip family if none there
901			foreach my $file (@files) {
902				my @a = split(/_/, $file);
903				my @b = split(/\./, $a[-1]);
904				$file =~ s/_x_/_/;
905				print FOUT "SAME+=\t\t$link $file\n";
906				undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
907			}
908		}
909	}
910
911	foreach my $l (sort keys(%languages)) {
912	foreach my $f (sort keys(%{$languages{$l}})) {
913	foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
914		next if ($#filter == 2 && ($filter[0] ne $l
915		    || $filter[1] ne $f || $filter[2] ne $c));
916		next if (defined $languages{$l}{$f}{definitions}
917		    && $languages{$l}{$f}{definitions} !~ /$TYPE/);
918		if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING}
919		 && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
920			print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") .
921			    "${c} - not read\n";
922			next;
923		}
924		foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
925			my $file = $l . "_";
926			$file .= $f . "_" if ($f ne "x");
927			$file .= $c;
928			next if (!defined $languages{$l}{$f}{data}{$c}{$e});
929			print FOUT "LOCALES+=\t$file.$e\n";
930		}
931
932		if (defined $languages{$l}{$f}{nc_link}) {
933			foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
934				my $file = $l . "_";
935				$file .= $f . "_" if ($f ne "x");
936				$file .= $c;
937				print FOUT "SAME+=\t\t$file.$e $languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n";
938			}
939		}
940
941		if (defined $languages{$l}{$f}{e_link}) {
942			foreach my $el (split(" ", $languages{$l}{$f}{e_link})) {
943				my @a = split(/:/, $el);
944				my $file = $l . "_";
945				$file .= $f . "_" if ($f ne "x");
946				$file .= $c;
947				print FOUT "SAME+=\t\t$file.$a[0] $file.$a[1]\t# legacy (same charset)\n";
948			}
949		}
950
951	}
952	}
953	}
954
955	print FOUT <<EOF;
956
957FILES=		\${LOCALES:S/\$/.${SRCOUT2}/}
958CLEANFILES=	\${FILES}
959
960.for f t in \${SAME}
961SYMLINKS+=	../\$f/\${FILESNAME} \\
962    \${LOCALEDIR}/\$t/\${FILESNAME}
963.endfor
964
965.for f in \${LOCALES}
966FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
967.endfor
968
969${SRCOUT3}.include <bsd.prog.mk>
970EOF
971
972	close(FOUT);
973}
974