Current Path : /compat/linux/proc/self/root/usr/local/share/namazu/pl/ |
FreeBSD hs32.drive.ne.jp 9.1-RELEASE FreeBSD 9.1-RELEASE #1: Wed Jan 14 12:18:08 JST 2015 root@hs32.drive.ne.jp:/sys/amd64/compile/hs32 amd64 |
Current File : //compat/linux/proc/self/root/usr/local/share/namazu/pl/codeconv.pl |
# # -*- Perl -*- # $Id: codeconv.pl,v 1.11.8.15 2007/11/16 17:24:53 opengl2772 Exp $ # Copyright (C) 1997-1999 Satoru Takabayashi All rights reserved. # Copyright (C) 2000-2005 Namazu Project All rights reserved. # This is free software with ABSOLUTELY NO WARRANTY. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either versions 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA # 02111-1307, USA # # This file must be encoded in EUC-JP encoding # # package for code conversion # # imported from Rei FURUKAWA <furukawa@dkv.yamaha.co.jp> san's pnamazu. # [1998-09-24] package codeconv; use strict; my @ktoe = (0xA3, 0xD6, 0xD7, 0xA2, 0xA6, 0xF2, 0xA1, 0xA3, 0xA5, 0xA7, 0xA9, 0xE3, 0xE5, 0xE7, 0xC3, 0xBC, 0xA2, 0xA4, 0xA6, 0xA8, 0xAA, 0xAB, 0xAD, 0xAF, 0xB1, 0xB3, 0xB5, 0xB7, 0xB9, 0xBB, 0xBD, 0xBF, 0xC1, 0xC4, 0xC6, 0xC8, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD2, 0xD5, 0xD8, 0xDB, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE4, 0xE6, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEF, 0xF3, 0xAB, 0xAC, ); # convert JIS X0201 KANA characters to JIS X0208 KANA sub ktoe ($$) { my ($c1, $c2) = @_; $c1 = ord($c1) & 0x7f; my($hi) = ($c1 <= 0x25 || $c1 == 0x30 || 0x5e <= $c1)? "\xa1": "\xa5"; $c1 -= 0x21; my($lo) = $ktoe[$c1]; if ($c2){ if ($c1 == 5){ $lo = 0xdd; }else{ $lo++; $lo++ if (ord($c2) == 0xdf); } } return $hi . chr($lo); } sub eucjp_han2zen_kana ($) { my ($strref) = @_; if (util::islang("ja")) { $$strref =~ s/\x8e([\xa1-\xdf])(\x8e([\xde\xdf]))?/&ktoe($1,$3)/geo; } } # convert Shift_JIS to EUC-JP sub stoe ($) { my ($c1, $c2) = unpack('CC', shift); if (0xa1 <= $c1 && $c1 <= 0xdf) { $c2 = $c1; $c1 = 0x8e; } elsif (0x9f <= $c2) { $c1 = $c1 * 2 - ($c1 >= 0xe0 ? 0xe0 : 0x60); $c2 += 2; } else { $c1 = $c1 * 2 - ($c1 >= 0xe0 ? 0xe1 : 0x61); $c2 += 0x60 + ($c2 < 0x7f); } # Outside of the range of an EUC-JP code. return chr(0xa2) . chr(0xae) if ($c1 < 0x80 || $c1 >= 0x100 || $c2 < 0x80 || $c2 >= 0x100); return chr($c1) . chr($c2); } sub shiftjis_to_eucjp ($) { my ($str) = @_; if (util::islang("ja")) { $str =~ s/([\x81-\x9f\xe0-\xfc][\x40-\x7e\x80-\xfc]|[\xa1-\xdf])/&stoe($1)/geo; } return $str; } # convert EUC-JP to Shift_JIS sub etos ($) { my ($c1, $c2) = unpack('CC', shift); if ($c1 == 0x8e) { # JIS X 0201 KATAKANA return chr($c2); } elsif ($c1 == 0x8f) { # JIS X 0212 HOJO KANJI return "\x81\xac"; } elsif ($c1 % 2) { $c1 = ($c1>>1) + ($c1 < 0xdf ? 0x31 : 0x71); $c2 -= 0x60 + ($c2 < 0xe0); } else { $c1 = ($c1>>1) + ($c1 < 0xdf ? 0x30 : 0x70); $c2 -= 2; } return chr($c1) . chr($c2); } sub eucjp_to_shiftjis ($) { my ($str) = @_; if (util::islang("ja")) { $str =~ s/([\xa1-\xfe][\xa1-\xfe]|\x8e[\xa1-\xdf]|\x8f[\xa1-\xfe][\xa1-\xfe])/&etos($1)/ge; } return $str; } # Remove a garbage EUC-JP 1st charactor at the end. sub chomp_eucjp ($) { my ($str) = @_; if (util::islang("ja")) { if ($str =~ /\x8f$/ or $str =~ tr/\x8e\xa1-\xfe// % 2) { chop($str); chop($str) if ($str =~ /\x8f$/); } } return $str; } # convert to EUC-JP by using NKF sub toeuc ($) { my ($contref, $opt) = @_; if (util::islang("ja")) { my $nkf_opt = "-emXZ1"; if ($var::USE_NKF_MODULE) { $$contref = NKF::nkf($nkf_opt, $$contref); # namazu-devel-ja #3152 -> backed out, #3181 } else { my $nkftmp = util::tmpnam("NMZ.nkf"); { my $nh = util::efopen("|$conf::NKF $nkf_opt > $nkftmp"); print $nh $$contref; util::fclose($nh); } { my $nh = util::efopen("< $nkftmp"); $$contref = util::readfile($nh); util::fclose($nh); } unlink($nkftmp); } } } sub eucjp_zen2han_ascii ($) { my ($strref) = @_; if (util::islang("ja")) { $$strref =~ s/([\xa1-\xfe][\xa1-\xfe]|\x8e[\xa1-\xdf]|\x8f[\xa1-\xfe][\xa1-\xfe])/ my $tmp = $1; if ($tmp =~ m!\xa3([\xb0-\xb9\xc1-\xda\xe1-\xfa])!) { $tmp = $1 & "\x7F"; } elsif ($tmp =~ m!\xa1([\xa0-\xfe])!) { my $kigou = ( # X0208 kigou conversion table # 0xa1a0 - 0xa1fe "\x00","\x20","\x00","\x00","\x2C","\x2E","\x00","\x3A", "\x3B","\x3F","\x21","\x00","\x00","\x27","\x60","\x00", "\x5E","\x00","\x5F","\x00","\x00","\x00","\x00","\x00", "\x00","\x00","\x00","\x00","\x00","\x2D","\x00","\x2F", "\x5C","\x00","\x00","\x7C","\x00","\x00","\x60","\x27", "\x22","\x22","\x28","\x29","\x00","\x00","\x5B","\x5D", "\x7B","\x7D","\x3C","\x3E","\x00","\x00","\x00","\x00", "\x00","\x00","\x00","\x00","\x2B","\x2D","\x00","\x00", "\x00","\x3D","\x00","\x3C","\x3E","\x00","\x00","\x00", "\x00","\x00","\x00","\x00","\x00","\x00","\x00","\x00", "\x24","\x00","\x00","\x25","\x23","\x26","\x2A","\x40", "\x00","\x00","\x00","\x00","\x00","\x00","\x00","\x00" )[unpack("C", $1) - unpack("C", "\xa0")]; $tmp = $kigou unless ($kigou eq "\x00"); } $tmp; /gse; } } sub normalize_eucjp ($) { my ($contref) = @_; if (util::islang("ja")) { codeconv::eucjp_han2zen_kana($contref); codeconv::eucjp_zen2han_ascii($contref); } $contref; } sub normalize_nl ($) { my ($conts) = @_; $$conts =~ s/\x0d\x0a/\x0a/g; # Windows $$conts =~ s/\x0d/\x0a/g; # Mac $$conts =~ s/\x0a/\n/g; $$conts; } sub remove_control_char ($) { my ($textref) = @_; $$textref =~ tr/\x01-\x08\x0b-\x0c\x0e-\x1f\x7f/ /; # Remove control char. } sub normalize_document ($) { my ($textref) = @_; codeconv::normalize_nl($textref); codeconv::remove_control_char($textref); } sub codeconv_document ($) { my ($textref) = @_; #codeconv::to_inner_encoding($textref, 'unknown'); codeconv::toeuc($textref); codeconv::normalize_document($textref); } sub normalize_eucjp_document ($) { my ($textref) = @_; codeconv::normalize_eucjp($textref); codeconv::normalize_document($textref); } 1;