1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
|
# Perl5 script to make binary search table for width of UCS characters on terminal
# from EastAsianWidth.txt:
# perl eaw_ucswidth.pl /path/to/EastAsianWidth.txt
$eaw_a2n_h = 'eaw_a2n_ucswidth.mk_btri.h' if (!defined($eaw_a2n_h));
$eaw_a2w_h = 'eaw_a2w_ucswidth.mk_btri.h' if (!defined($eaw_a2w_h));
require 5;
my (@wid_a2n, @wid_a2w, $first, $line, $beg, $end, $width, $desc, $i);
while (defined($line = <>)) {
if ($line =~ /^\s*([0-9A-Fa-f]{4})\s*;\s*(F|H|W|N|Na|A)\s*[\;\#]\s*(.*)/i) {
($beg, $width, $desc) = (hex($1), $2, $3);
$end = $beg;
if (defined($first) && $desc =~ /\bLast>/i) {
$beg = $first;
}
elsif ($desc =~ /\bFirst>/i) {
$first = $beg;
next;
}
}
elsif ($line =~ /^\s*([0-9A-Fa-f]{4})\s*\.\.\s*([0-9A-Fa-f]{4})\s*;\s*(F|H|W|N|Na|A)\s*[\;\#]\s*(.*)/i) {
($beg, $end, $width, $desc) = (hex($1), hex($2), $3, $4);
}
else {
next;
}
if ($beg >= 0xA0) {
if ($desc =~ /^COMBINING\s/i) {
&push_wid(\@wid_a2n, $beg, $end, 0);
&push_wid(\@wid_a2w, $beg, $end, 0);
}
elsif ($width =~ /^(F|W)$/i) {
&push_wid(\@wid_a2n, $beg, $end, 2);
&push_wid(\@wid_a2w, $beg, $end, 2);
}
elsif ($width =~ /^A$/i) {
&push_wid(\@wid_a2w, $beg, $end, 2);
}
}
$first = undef;
}
&wr($eaw_a2n_h, \@wid_a2n);
&wr($eaw_a2w_h, \@wid_a2w);
exit;
sub push_wid {
my ($arr, $b, $e, $w) = @_;
if (!@$arr || $arr->[$#$arr] != $w || $arr->[$#$arr - 1] + 1 != $b) {
push(@$arr, $b, $e, $w);
}
else {
$arr->[$#$arr - 1] = $e;
}
}
sub wr {
my ($fn, $arr) = @_;
local (*H);
open(H, ">$fn") || die "open(H, \">$fn_h\"): $!";
print H <<'EOF';
%%TYPE number
%%BEGIN
EOF
printf(H "0x%04X-0x%04X,%d\n", splice(@$arr, 0, 3)) while (@$arr);
}
|