2 # *******************************************************************************
\r
3 # * Copyright (C) 2002-2004, International Business Machines Corporation and *
\r
4 # * others. All Rights Reserved. *
\r
5 # *******************************************************************************
\r
9 if "%OS%" == "Windows_NT" goto WinNT
\r
10 perl -W -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9
\r
13 perl -W -x -S "%0" %*
\r
14 if NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl
\r
15 if %errorlevel% == 9009 echo You do not have Perl in your PATH.
\r
21 # This perl script updates the filters in the transliterator index file.
\r
22 # It does so in a dumb way:
\r
27 # For transliterators using NFKD, or not using Lower in this way, you
\r
28 # will have to hand-edit the index file.
\r
30 # This script writes a new index file. The new file has to then be
\r
31 # hand-edited and checked before use; it contains comments indicating
\r
32 # old lines that were replaced.
\r
38 my $DIR = "../../text/resources";
\r
39 my $CLASSES = "../../../../../classes";
\r
41 #GetOptions('dir=s' => \$DIR,
\r
43 # '<>' => \&usage) || die;
\r
45 #usage() if (@ARGV);
\r
49 print STDERR "$DIR is not a directory\n";
\r
55 # $me =~ s|.+[/\\]||;
\r
56 # print "Usage: $me [-dir <dir>] [-id <id>]\n";
\r
57 # print " --dir <dir> Specify the directory containing the\n";
\r
58 # print " Transliterator_*.txt files\n";
\r
59 # print " --id <id> Specify a single ID to transform, e.g.\n";
\r
60 # print " Fullwidth-Halfwidth\n";
\r
66 ######################################################################
\r
67 # Convert the index file from Java to C format
\r
68 # Assume lines are of the form:
\r
69 # <ID>:alias:<FILTER>;<REMAINDER>
\r
70 # <REMAINDER> can be
\r
75 $IN = "Transliterator_index.txt";
\r
77 open(IN, "$DIR/$IN") or die;
\r
78 open(OUT, ">$DIR/$OUT") or die;
\r
81 # Look for lines that are aliases with NF*
\r
82 if (/^([^:]+):alias:(\[.+?);\s*((NF[^\s]*?)\s*;.+)$/i) {
\r
89 # If it comes before NF* then adjust accordingly
\r
90 if (/^([^:]+):alias:(\[.+?);\s*(Lower\s*;.+)$/i) {
\r
92 if (length($2) < length($oldset)) {
\r
97 print STDERR "$id $NFXD $lower\n";
\r
98 my $set = getSourceSet($id, $NFXD, $lower);
\r
99 $_ = "$id:alias:$set;$remainder\n";
\r
106 print STDERR "Wrote $DIR/$OUT\n";
\r
109 ######################################################################
\r
110 # Get the source set (call out to Java), optionally with a closure.
\r
115 my $set = `java -classpath $CLASSES com.ibm.tools.translit.genIndexFilters $ID $NFXD $lower`;
\r