#!/usr/bin/env perl # # A simple wrapper for kingtr command (part of IBM's Translation King). # Copyright (c) 2006-2012, Hiroyuki Ohsaki. # All rights reserved. # # $Id: pkingtr,v 1.10 2012/08/12 05:47:35 oosaki Exp oosaki $ # use File::Basename; use Getopt::Std; use IPC::Open2; use Jcode; use Readonly; use strict; sub usage { my $prog = basename($0); die < ( '(ネットワーク|図)中' => '$1内', 'が(.*?)に与える影響' => 'の$1への影響', '\\\%' => '%', ); Readonly my %POST_TRANS_TABLE => ( '\b(about)\b' => 'approximately', ',\s+([^,]+), etc\.' => ', and $1', '\b(which)\b' => 'that', '\b(Table)\b' => 'Tab.', '\b((figure|Fig.)\s+)' => 'Fig.~', '\b(Eq.\s+)' => 'Eq.~', '\b(Section\s+)' => 'Section~', '\b(Tab.\s+)' => 'Tab.~', '\b(Here,)' => 'where', '\b(to\s+([\d,.]+)\s+from\s+([\d,.]+))' => 'from $3 to $2', '%' => '\\\%', 'In order to' => 'To', 'in order to' => 'to', 'Note that,' => 'Note that', ); # FIXME: assume Japanese strings as its input sub trim_string { my $str = shift; # remove newlines and unnecessary spaces $str =~ s/([\x20-\x7f])\s*\n\s*([\x20-\x7f])/$1 $2/g; $str =~ s/([\x20-\x7f])\s*\n\s*([\x80-\xff])/$1$2/g; $str =~ s/([\x80-\xff])\s*\n\s*([\x20-\x7f])/$1$2/g; $str =~ s/([\x80-\xff])\s*\n\s*([\x80-\xff])/$1$2/g; # remove unnecessary spaces $str =~ s/\s+([\x80-\xff])/$1/g; $str =~ s/([\x80-\xff])\s+/$1/g; return $str; } sub mask_strings { my ( $str, $hashp ) = @_; my $label = '000001'; $str =~ s/\s*(\\ref{.*?})\s*/{ $hashp->{$label} = $1; $label++; }/eg; $str =~ s/\s*(\$[^\$]+\$)\s*/{ $hashp->{$label} = $1; $label++; }/eg; $str =~ s/\s*(\\cite{.*?})\s*/{ my $key = "[$label]"; $hashp->{$key} = $1; $label++; $key; }/eg; return $str; } sub unmask_strings { my ( $str, $hashp ) = @_; while ( my ( $key, $val ) = each %{$hashp} ) { $key = quotemeta $key; $str =~ s/$key/$val/g; } return $str; } sub replace_with_regexps { my ( $str, $hashp ) = @_; while ( my ( $key, $val ) = each %{$hashp} ) { eval qq(\$str =~ s/$key/$val/g;); } return $str; } sub update_userdic { if ( -f "$ENV{HOME}/.king/Makefile" ) { system "cd $ENV{HOME}/.king && make clean all >/dev/null"; } } our ( $opt_d, $opt_n, $opt_o ); getopts('dno') or usage; my $show_original = $opt_o; my $no_dictionary = $opt_n; my $debug = $opt_d; update_userdic(); my $original = do { local $/; <> }; my %hash; $original = Jcode->new($original)->euc(); $original = mask_strings( $original, \%hash ); $original = trim_string($original); $original = replace_with_regexps( $original, \%PRE_TRANS_TABLE ); warn ">> $original\n\n" if $debug; my $opts = $no_dictionary ? 'HOME=/tmp' : ''; my ( $child_out, $child_in ); my $pid = open2( $child_out, $child_in, "LANG=ja_JP.eucJP $opts kingtr" ); print {$child_in} $original; close $child_in; my $translated = do { local $/; <$child_out> }; $translated =~ s/([0-9A-Za-z])\n([0-9A-Za-z])/$1 $2/g; $translated =~ s/\n//g; warn ">> $translated\n\n" if $debug; $translated = unmask_strings( $translated, \%hash ); $translated = replace_with_regexps( $translated, \%POST_TRANS_TABLE ); print "$original\n" if $show_original; print "$translated\n";