Current Path : /usr/local/share/namazu/filter/ |
FreeBSD hs32.drive.ne.jp 9.1-RELEASE FreeBSD 9.1-RELEASE #1: Wed Jan 14 12:18:08 JST 2015 root@hs32.drive.ne.jp:/sys/amd64/compile/hs32 amd64 |
Current File : //usr/local/share/namazu/filter/taro7_10.pl |
# # -*- Perl -*- # $Id: taro7_10.pl,v 1.1.2.14 2007/01/14 04:12:04 opengl2772 Exp $ # Copyright (C) 2003 Yukio USUDA # 2003-2007 Namazu Project All rights reserved. # This is free software with ABSOLUTELY NO WARRANTY. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either versions 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA # 02111-1307, USA # package taro7_10; use strict; use English; require 'util.pl'; require 'gfilter.pl'; sub mediatype() { return ( 'application/ichitaro7', 'application/x-js-taro' ); } sub status() { my $olepath = undef; $olepath = util::checklib('OLE/Storage_Lite.pm'); return 'no' unless $olepath; return 'yes' if ($English::PERL_VERSION >= 5.008); my $utfconvpath = undef; $utfconvpath = util::checklib('unicode.pl'); return 'yes' if $utfconvpath; return 'no'; } sub recursive() { return 0; } sub pre_codeconv() { return 0; } sub post_codeconv() { return 0; } sub add_magic($) { my ($magic) = @_; # FIXME: very ad hoc. $magic->addFileExts('\\.jtd$', 'application/x-js-taro'); $magic->addFileExts('\\.jfw$', 'application/ichitaro7'); return; } sub filter($$$$$) { my ($orig_cfile, $contref, $weighted_str, $headings, $fields) = @_; my $err = undef; $err = taro7_10_filter($orig_cfile, $contref, $weighted_str, $headings, $fields); return $err; } sub taro7_10_filter($$$$$) { my ($orig_cfile, $contref, $weighted_str, $headings, $fields) = @_; my $cfile = defined $orig_cfile ? $$orig_cfile : ''; my $err = undef; eval 'use OLE::Storage_Lite'; my $oleobject = OLE::Storage_Lite->new($contref); return (undef) unless($oleobject); my ($authorname, $title) = getinfo($oleobject); codeconv::normalize_eucjp(\$authorname); codeconv::normalize_eucjp(\$title); $fields->{'author'} = $authorname; $fields->{'title'} = $title; my $content = getcontent($oleobject); codeconv::normalize_eucjp(\$content); $$contref = $content; gfilter::line_adjust_filter($contref); gfilter::line_adjust_filter($weighted_str); gfilter::white_space_adjust_filter($contref); $fields->{'title'} = gfilter::filename_to_title($cfile, $weighted_str) unless $fields->{'title'}; gfilter::show_filter_debug_info($contref, $weighted_str, $fields, $headings); return undef; } sub byteswap($) { my($tmp)=@_; $$tmp = pack("n".length($$tmp)*2, unpack("v".length($$tmp)*2,$$tmp)); } sub getinfo($) { my($oleobject)=@_; my @pps = $oleobject->getPpsSearch( [OLE::Storage_Lite::Asc2Ucs("\x04JSRV_SummaryInformation")], 1, 1); return (undef) if($#pps < 0); my $author = undef; my $title = undef; my $position; if ($pps[0]->{Data}) { if ($pps[0]->{Data} =~ /\x02\x00\x00\x31\x8b\x89\xfa\x51\x57\x30/g) { $position = pos($pps[0]->{Data}); my $title_length = unpack("v", substr($pps[0]->{Data}, $position + 70, 2)); $title = substr($pps[0]->{Data}, $position + 86, $title_length); } if ($pps[0]->{Data} =~ /\x04\x00\x00\x31\x5c\x4f\x10\x62\x05\x80/g) { $position = pos($pps[0]->{Data}); my $author_length = unpack("v", substr($pps[0]->{Data}, $position + 70, 2)); $author = substr($pps[0]->{Data}, $position + 86, $author_length); } } taro7_10::byteswap(\$author); taro7_10::u16toe(\$author); $author =~ s/[\x00\x0E\x0c]//g; taro7_10::byteswap(\$title); taro7_10::u16toe(\$title); $title =~ s/[\x00\x0E\x0c]//g; return ($author, $title); } sub getcontent($) { my($oleobject)=@_; my @pps = $oleobject->getPpsSearch( [OLE::Storage_Lite::Asc2Ucs('DocumentText')], 1, 1); return (undef) if($#pps < 0); my $content=""; for my $i (0..$#pps) { if ($pps[$i]->{Data}) { my $size = unpack("N", substr($pps[$i]->{Data}, 0x1c, 4)) * 2; my $buf = substr($pps[$i]->{Data}, 0x20, $size); taro7_10::remove_ctlcodearea(\$buf); $content .= $buf . "\x00\x0a"; } } u16toe(\$content); $content =~ s/[\x00\x0E\x0c]//g; return $content; } sub remove_ctlcodearea($){ my ($textref) = @_; my $ctl_in = "\x00\x1c"; my $ctl_out = "\x00\x1f"; my $tmptext1 = $$textref; my $tmptext2=""; my $pos1=0; my $pos2=0; my @incodes; while ($tmptext1 =~ /$ctl_in/sg){ push(@incodes, pos($tmptext1)-2); } push(@incodes, length($tmptext1)); my $i=1; while (@incodes){ $pos2=shift(@incodes) ; my $tmptext3=""; $tmptext3=substr($tmptext1, $pos1, $pos2-$pos1); $tmptext3=~s/$ctl_in.*$ctl_out//s; $tmptext3=~s/$ctl_in.*//s; $tmptext2 .= $tmptext3; $i++; $pos1 = $pos2; } $$textref = $tmptext2; } # convert utf-16 to euc # require Perl5.8 or unicode.pl sub u16toe($) { my ($tmp) = @_; if ($English::PERL_VERSION >= 5.008){ eval 'use Encode qw/from_to Unicode JP/;'; Encode::from_to($$tmp, "UTF-16BE" ,"euc-jp"); }else{ eval require 'unicode.pl'; my @unicodeList = unpack("n*", $$tmp); $$tmp = unicode::u2e(@unicodeList); } } 1;