#!/usr/bin/env perl ### $Id: kotexindy.pl,v 1.8 2011/08/09 00:32:27 nomos Exp $ ### texindy wrapper for ko.tex ### written by Dohyun Kim ### public domain #use warnings; #use strict; use 5.8.0; use Getopt::Long qw(:config no_ignore_case); my @args = @ARGV; my ($opt_version, $opt_help, $opt_quiet, $opt_verbose, $opt_stdin, $opt_german, $opt_no_ranges, $opt_letter_ordering, @opt_debug, $opt_out_file, $opt_log_file, $opt_language, $opt_codepage, @opt_module, $opt_input_markup); GetOptions ( 'version|V' => \$opt_version, 'help|?' => \$opt_help, 'quiet' => \$opt_quiet, 'verbose' => \$opt_verbose, 'stdin|i' => \$opt_stdin, 'german' => \$opt_german, 'no-ranges|r' => \$opt_no_ranges, 'letter-ordering|l' => \$opt_letter_ordering, 'debug=s' => \@opt_debug, 'out-file=s' => \$opt_out_file, 'log-file|t=s' => \$opt_log_file, 'language|L=s' => \$opt_language, 'codepage|C=s' => \$opt_codepage, 'module|M=s' => \@opt_module, 'input-markup|I=s' => \$opt_input_markup); if ($opt_version or $opt_help) { system "texindy @args"; exit; } ### obtain output file name my @idxfiles = @ARGV; my $indfile = $opt_out_file; if (!$indfile and @idxfiles) { $indfile = $idxfiles[0]; $indfile =~ s/\.idx$/\.ind/; } $indfile or die "Failed to obtain output file name!"; # support stdin option $opt_stdin and @idxfiles = ('-'); # remove idxfiles from @args for my $i (@idxfiles) { for (0 .. $#args) { $i eq $args[$_] and $args[$_] = ''; } } my (@idxarr, @indarr); ### variables for subroutines my @hanja_to_hangul = get_hanja_hangul_table("hanja_hg.tab"); my @hanjacompat_to_hangul = get_hanja_hangul_table("hjcom_hg.tab"); my @hanjaextA_to_hangul = get_hanja_hangul_table("hjexa_hg.tab"); my @jamo_cho_comp = ( # 0x115F HCF => 0x314F ㅏ 0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142, 0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314A, 0x314B, 0x314C, 0x314D, 0x314E, 0x1113, 0x3165, 0x3166, 0x1116, 0x1117, 0x1118, 0x1119, 0x3140, 0x111B, 0x316E, 0x3171, 0x3172, 0x111F, 0x3173, 0x3144, 0x3174, 0x3175, 0x1124, 0x1125, 0x1126, 0x3176, 0x1128, 0x3177, 0x112A, 0x3178, 0x3179, 0x317A, 0x317B, 0x317C, 0x1130, 0x1131, 0x317D, 0x1133, 0x1134, 0x1135, 0x317E, 0x1137, 0x1138, 0x1139, 0x113A, 0x113B, 0x113C, 0x113D, 0x113E, 0x113F, 0x317F, 0x1141, 0x1142, 0x1143, 0x1144, 0x1145, 0x1146, 0x3180, 0x1148, 0x1149, 0x114A, 0x114B, 0x3181, 0x114D, 0x114E, 0x114F, 0x1150, 0x1151, 0x1152, 0x1153, 0x1154, 0x1155, 0x1156, 0x3184, 0x3185, 0x3186, 0x115A, 0x3167, 0x3135, 0x3136, 0x115E, 0x314F); my $cho = "\x{1100}-\x{115F}\x{A960}-\x{A97C}"; my $jung = "\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}"; my $jong = "\x{11A8}-\x{11FF}\x{D7CB}-\x{D7FB}"; my $hanja = "\x{3400}-\x{4DB5}\x{4E00}-\x{9FA5}\x{F900}-\x{FA2D}"; my $ist_keyword = '\indexentry'; my $ist_actual = '@'; my $ist_encap = '|'; my $ist_level = '!'; my $ist_quote = '"'; my $ist_arg_open = '{'; my $ist_arg_close = '}'; ### processing input files foreach my $file (@idxfiles) { open IDX, "<$file" or die "$file: $!"; binmode IDX, ":utf8"; while () { # \indexentry{ ..... }{ .. } # -> $pre $body $post if (/(\Q$ist_keyword\E\s*\Q$ist_arg_open\E) (.*?[^\Q$ist_quote\E]) (\Q$ist_arg_close$ist_arg_open\E.+?\Q$ist_arg_close\E)$/x) { my($pre,$body,$post) = ($1,$2,$3); # \indexentry{ ..... | .. }{ .. } # -> $pre $body $post my @xbody = split /(?0; $i--) { $post = $ist_encap.$xbody[$i].$post; } $body = $xbody[0]; # !을 경계로 가름 @xbody = split /(?> 8 and die "\ntexindy failed!\n"; ### processing output file open IND, "<:utf8", $indfile or die "$indfile: $!"; while () { if (/\\lettergroup/) { &insertfillers; &jamo_to_jamocomp; } push @indarr, $_; } close IND; open IND, ">:utf8", $indfile or die "$indfile: $!"; print IND @indarr; close IND; ########## SUBROUTINES ########## sub syllable_to_jamo { s/([\x{AC00}-\x{D7A3}])/do_syllable_to_jamo($1)/ge; } sub do_syllable_to_jamo { my $syl = ord shift; my $cho = ($syl - 0xac00) / (21 * 28) + 0x1100; my $jung = ($syl - 0xac00) % (21 * 28) / 28 + 0x1161; my $jong = ($syl - 0xac00) % 28; if ($jong) { $jong += 0x11a7; return chr($cho).chr($jung).chr($jong); } return chr($cho).chr($jung); } sub hanja_to_hangul { s/([\x{3400}-\x{4DB5}])/chr($hanjaextA_to_hangul[ord($1)-0x3400])/ge; s/([\x{4E00}-\x{9FA5}])/chr($hanja_to_hangul[ord($1)-0x4E00])/ge; s/([\x{F900}-\x{FA2D}])/chr($hanjacompat_to_hangul[ord($1)-0xF900])/ge; } sub get_hanja_hangul_table { my $file = shift; my @HJHG; $file = `kpsewhich $file`; chomp $file; open TAB, $file or die "$file : $!\n"; @HJHG = ; close TAB; chomp @HJHG; return @HJHG; } sub insertfillers { s/([$cho])([$jong])/$1\x{1160}\x{115F}\x{1160}$2/g; s/(^|[^$jung$jong])([$jong])/$1\x{115F}\x{1160}$2/g; s/(^|[^$cho$jung])([$jung])/$1\x{115F}$2/g; s/([$cho])([^$cho$jung]|$)/$1\x{1160}$2/g; } sub insertjongsongfiller { # 0xF86A as jongseong filler s/([$cho][$jung])([^$jong]|$)/$1\x{F86A}$2/g; } sub jamo_to_jamocomp { s/([\x{1100}-\x{115F}])\x{1160}([^$jong]|$)/&cjamo_by_jamo_cho($1).$2/ge; } sub cjamo_by_jamo_cho { my $jamo = ord shift; my $cjamo = $jamo_cho_comp[$jamo - 0x1100]; if ($cjamo < 0x11FF) { return chr($cjamo) . chr(0x1160); } else { return chr($cjamo); } } ### EOF