Advertisement
cwchen

Convert from zh-TW to zh-CN

Dec 24th, 2018
235
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 2.18 KB | None | 0 0
  1. use utf8;
  2. use open qw(:utf8);
  3. use Encode qw(encode_utf8 decode_utf8);
  4. use JSON;
  5.  
  6. BEGIN {
  7.     binmode(STDIN, ":utf8");
  8.     binmode(STDOUT, ":utf8");
  9.     binmode(STDERR, ":utf8");
  10.  
  11.     # Load the 6-character term table.
  12.     open $FH_6, "<", "ITDict_6_ts.json";
  13.     binmode($FH_6, ":utf8");
  14.     $IT_term_6_ts_ref = decode_json encode_utf8(<$FH_6>);
  15.     %IT_term_6_ts = %$IT_term_6_ts_ref;
  16.     $check_6 = join "|", keys %IT_term_6_ts;
  17.     close $FH_6;
  18.  
  19.     # Load the 5-character term table.
  20.     open $FH_5, "<", "ITDict_5_ts.json";
  21.     binmode($FH_5, ":utf8");
  22.     $IT_term_5_ts_ref = decode_json encode_utf8(<$FH_5>);
  23.     %IT_term_5_ts = %$IT_term_5_ts_ref;
  24.     $check_5 = join "|", keys %IT_term_5_ts;
  25.     close $FH_5;
  26.  
  27.     # Load the 4-character term table.
  28.     open $FH_4, "<", "ITDict_4_ts.json";
  29.     binmode($FH_4, ":utf8");
  30.     $IT_term_4_ts_ref = decode_json encode_utf8(<$FH_4>);
  31.     %IT_term_4_ts = %$IT_term_4_ts_ref;
  32.     $check_4 = join "|", keys %IT_term_4_ts;
  33.     close $FH_4;
  34.  
  35.     # Load the 3-character term table.
  36.     open $FH_3, "<", "ITDict_3_ts.json";
  37.     binmode($FH_3, ":utf8");
  38.     $IT_term_3_ts_ref = decode_json encode_utf8(<$FH_3>);
  39.     %IT_term_3_ts = %$IT_term_3_ts_ref;
  40.     $check_3 = join "|", keys %IT_term_3_ts;
  41.     close $FH_3;
  42.  
  43.     # Load the 2-character term table.
  44.     open $FH_2, "<", "ITDict_2_ts.json";
  45.     binmode($FH_2, ":utf8");
  46.     $IT_term_2_ts_ref = decode_json encode_utf8(<$FH_2>);
  47.     %IT_term_2_ts = %$IT_term_2_ts_ref;
  48.     $check_2 = join "|", keys %IT_term_2_ts;
  49.     close $FH_2;
  50.  
  51.     # Load the character table.
  52.     open $FH, "<", "tongwei_ts.json";
  53.     binmode($FH, ":utf8");
  54.     $tongwei_ts_ref = decode_json encode_utf8(join "", <$FH>);
  55.     %tongwei_ts = %$tongwei_ts_ref;
  56.     $check = join "|", keys %tongwei_ts;
  57.     close $FH;
  58. }
  59.  
  60. # Decode the input string.
  61. $_ = decode_utf8 $_;
  62.  
  63. # Perform term-to-term conversion.
  64. s/($check_6)/$IT_term_6_ts{$1}/g;
  65. s/($check_5)/$IT_term_5_ts{$1}/g;
  66. s/($check_4)/$IT_term_4_ts{$1}/g;
  67. s/($check_3)/$IT_term_3_ts{$1}/g;
  68. s/($check_2)/$IT_term_2_ts{$1}/g;
  69.  
  70. # Perform character-to-character conversion.
  71. s/($check)/$tongwei_ts{$1}/g;
  72.  
  73. # Encode the output string.
  74. $_ = encode_utf8 $_;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement