config root man

Current Path : /usr/local/share/namazu/filter/

FreeBSD hs32.drive.ne.jp 9.1-RELEASE FreeBSD 9.1-RELEASE #1: Wed Jan 14 12:18:08 JST 2015 root@hs32.drive.ne.jp:/sys/amd64/compile/hs32 amd64
Upload File :
Current File : //usr/local/share/namazu/filter/excel.pl

#
# -*- Perl -*-
# $Id: excel.pl,v 1.13.4.23 2007/11/16 17:11:00 opengl2772 Exp $
# Copyright (C) 1997-2000 Satoru Takabayashi,
#               1999 NOKUBI Takatsugu, 
#               2000-2007 Namazu Project All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either versions 2, or (at your option)
#  any later version.
# 
#  This program is distributed in the hope that it will be useful
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
#  02111-1307, USA
#
#  This file must be encoded in EUC-JP encoding
#

package excel;
use strict;
use English;
use File::Basename;
require 'util.pl';
require 'gfilter.pl';
require 'html.pl';
eval 'require NKF;';

my $xlconvpath  = undef;
my @xlconvopts  = undef;
my $utfconvpath = undef;
my $convname = undef;
my $wvsummarypath = undef;

my $nkfversion = 0.00;
eval '$nkfversion = $NKF::VERSION;';
$nkfversion = 0.00 if (!defined $nkfversion);

sub mediatype() {
    return ('application/excel');
}

sub status() {
    # The check of a dependence filter.
    return 'no' if (html::status() ne 'yes');

    $wvsummarypath = util::checkcmd('wvSummary');

    $xlconvpath = util::checkcmd('xlhtml') || util::checkcmd('xlHtml');
#    return 'no' unless defined $xlconvpath;
    if (defined $xlconvpath) {
        @xlconvopts = ("-m");
        if (!util::islang("ja")) {
            return 'yes';
        } else {
            return 'yes' if ($English::PERL_VERSION >= 5.008);
            return 'yes' if ($nkfversion >= 2.04);

            $utfconvpath = util::checkcmd('lv');
            if (defined $utfconvpath) {
                return 'yes';
            } else {
                return 'no';
            }
        } 
    } else {
        $xlconvpath = util::checkcmd('doccat');
        @xlconvopts = ("-o", "e");
        return 'yes' if defined $xlconvpath;
        return 'no'; 
    }
}

sub recursive() {
    return 0;
}

sub pre_codeconv() {
    return 0;
}

sub post_codeconv () {
    return 0;
}

sub add_magic ($) {
    my ($magic) = @_;

    # FIXME: very ad hoc.
    $magic->addFileExts('\\.xls$', 'application/excel');
    return;
}

sub filter ($$$$$) {
    my ($orig_cfile, $cont, $weighted_str, $headings, $fields)
        = @_;
    my $err = undef;

    $convname = basename($xlconvpath) unless (defined $convname);

    if ($convname =~ /xlhtml/i) {
        $err = filter_xl($orig_cfile, $cont, $weighted_str, $headings, $fields);
    } else {
        $err = filter_doccat($orig_cfile, $cont, $weighted_str, $headings, $fields);
    }
    return $err;
}

sub filter_xl ($$$$$) {
    my ($orig_cfile, $cont, $weighted_str, $headings, $fields)
        = @_;
    my $cfile = defined $orig_cfile ? $$orig_cfile : '';

    util::vprint("Processing ms-excel file ... (using  '$xlconvpath')\n");

    my $tmpfile  = util::tmpnam('NMZ.excel');
    {
        my $fh = util::efopen("> $tmpfile");
        print $fh $$cont;
        util::fclose($fh);
    }

    # get summary info in all OLE documents.
    getSummaryInfo($tmpfile, $cont, $weighted_str, $headings, $fields);
    my $title = $fields->{'title'};

    # -m: No encoding for multibyte. It's necessary to
    # handle a Japanese Excel 5.0 or 95 document correctly.
    {
        my @cmd = ($xlconvpath, @xlconvopts, $tmpfile);
        my $fh_out = IO::File->new_tmpfile();
        my $status = util::syscmd(
            command => \@cmd,
            option => {
                "stdout" => $fh_out,
                "stderr" => "/dev/null",
            },
        );
        my $size = util::filesize($fh_out);
        if ($size == 0) {
            util::fclose($fh_out);
            unlink $tmpfile;
            return "Unable to convert file ($xlconvpath error occurred).";
        }
        if ($size > $conf::FILE_SIZE_MAX) {
            util::fclose($fh_out);
            unlink $tmpfile;
            return 'Too large excel file.';
        }
        $$cont = util::readfile($fh_out);
        util::fclose($fh_out);
    }
    unlink $tmpfile;

    codeconv::normalize_document($cont);

    # Code conversion for Japanese document.
    if (util::islang("ja")) {
        # Pattern for xlHtml version 0.2.6.
        if ($$cont =~ m!^<FONT SIZE="?-1"?><I>Last Updated(&nbsp;using| with) Excel 5.0 or 95</I></FONT><br>$!m) 
        {
            $$cont = codeconv::shiftjis_to_eucjp($$cont);
            codeconv::normalize_eucjp($cont);
        } else {
            utf8_to_eucjp($cont);
        }
    } 

    # Extract the author and exclude xlHtml's footer at once.
    $$cont =~ s!^<FONT SIZE="?-1"?><I>Spreadsheet's Author:&nbsp;(.*?)</I></FONT><br>.*!!ms;  # '
    if (defined $1) {
        $fields->{'author'} = $1 unless (defined $fields->{'author'});
    }

    # Title shoud be removed.
    # Because xlHtml generate poor <TITLE>/foo/bar/NMZ.excel.tmp</TITLE>.
    $$cont =~ s!<TITLE.*?>.*?</TITLE>!!is;

    $$cont =~ s!<TD>&nbsp;</TD>!!g;

    html::html_filter($cont, $weighted_str, $fields, $headings);
    $fields->{'title'} = $title if (defined $title);

    gfilter::line_adjust_filter($cont);
    gfilter::line_adjust_filter($weighted_str);
    gfilter::white_space_adjust_filter($cont);
    $fields->{'title'} = gfilter::filename_to_title($cfile, $weighted_str)
        unless $fields->{'title'};
    gfilter::show_filter_debug_info($cont, $weighted_str,
                                    $fields, $headings);

    return undef;
}

sub filter_doccat ($$$$$) {
    my ($orig_cfile, $cont, $weighted_str, $headings, $fields)
        = @_;
    my $cfile = defined $orig_cfile ? $$orig_cfile : '';

    util::vprint("Processing ms-excel file ... (using  '$xlconvpath')\n");

    my $tmpfile  = util::tmpnam('NMZ.excel');
    {
        my $fh = util::efopen("> $tmpfile");
        print $fh $$cont;
        util::fclose($fh);
    }
    {
        my @cmd = ($xlconvpath, @xlconvopts, $tmpfile);
        my $fh_out = IO::File->new_tmpfile();
        my $status = util::syscmd(
            command => \@cmd,
            option => {
                "stdout" => $fh_out,
                "stderr" => "/dev/null",
            },
        );
        my $size = util::filesize($fh_out);
        if ($size == 0) {
            util::fclose($fh_out);
            unlink $tmpfile;
            return "Unable to convert file ($xlconvpath error occurred)";
        }
        if ($size > $conf::FILE_SIZE_MAX) {
            util::fclose($fh_out);
            unlink $tmpfile;
            return 'Too large excel file.';
        }
        $$cont = util::readfile($fh_out);
        util::fclose($fh_out);
    }
    unlink $tmpfile;

    codeconv::normalize_document($cont);

    gfilter::line_adjust_filter($cont);
    gfilter::line_adjust_filter($weighted_str);
    gfilter::white_space_adjust_filter($cont);
    $fields->{'title'} = gfilter::filename_to_title($cfile, $weighted_str)
        unless $fields->{'title'};
    gfilter::show_filter_debug_info($cont, $weighted_str,
                                    $fields, $headings);

    return undef;
}

sub getSummaryInfo ($$$$$) {
    my ($cfile, $cont, $weighted_str, $headings, $fields)
        = @_;

    return undef unless (defined $wvsummarypath);

    my @cmd = ($wvsummarypath, $cfile);
    my $fh_out = IO::File->new_tmpfile();
    my $status = util::syscmd(
        command => \@cmd,
        option => {
            "stdout" => $fh_out,
            "stderr" => "/dev/null",
        },
    );
    my $summary = util::readfile($fh_out);
    codeconv::normalize_document(\$summary);
    my $orgsummary = $summary;

    my $size = util::filesize($fh_out);
    util::fclose($fh_out);
    if ($size == 0) {
        return undef;
    }
    if ($size > $conf::FILE_SIZE_MAX) {
        return 'Too large summary file.';
    }

    # Codepage
    #   932 : 0x000003a4 : Shift_JIS
    # 10001 : 0x00002711 : x-mac-japanese
    # 65001 : 0xfffffde9 : UTF-8


    my $codepage = "000003a4"; # Shift_JIS
    my $title = undef;
    my $subject = undef;
    my $lastauthor = undef;
    my $author = undef;
    my $keywords = undef;

    if ($summary =~ /^Codepage is 0x([0-9a-f]*)/m) {
        $codepage = sprintf("%8.8x", hex($1));
    }

    if ($codepage eq "fffffde9") { 
        utf8_to_eucjp(\$summary);
    } else {
        codeconv::codeconv_document(\$summary);
        codeconv::normalize_eucjp($cont);
    }

    if ($summary =~ /^The title is (.*)$/m) {
        my $orgtitle;

        $title = $1;

        # PowerPoint Only
#        $orgsummary =~ /^The title is (.*)$/m;
#        $orgtitle = $1;
#        undef $title if $orgtitle eq # which has no slide title
#            "\xbd\xd7\xb2\xc4\xde\x20\xc0\xb2\xc4\xd9\x82\xc8\x82\xb5";
    }
    if ($summary =~ /^The subject is (.*)$/m) {
        $subject = $1;
    }
    if ($summary =~ /^The last author was (.*)$/m) {
        $lastauthor = $1;
    }
    if ($summary =~ /^The author is (.*)$/m) {
        $author = $1;
    }
    if ($summary =~ /^The keywords are (.*)$/m) {
        $keywords = $1;
    }

    my $weight = $conf::Weight{'html'}->{'title'};
    if (defined $title) {
        $$weighted_str .= "\x7f$weight\x7f$title\x7f/$weight\x7f\n";
    }
    if (defined $subject) {
        $$weighted_str .= "\x7f$weight\x7f$subject\x7f/$weight\x7f\n";
    }

    $fields->{'title'} = $title;
    $fields->{'title'} = $subject unless (defined $title);

    $fields->{'author'} = $lastauthor;
    $fields->{'author'} = $author unless (defined $lastauthor);

    if (defined $keywords) {
        $weight = $conf::Weight{'metakey'};
        $$weighted_str .= "\x7f$weight\x7f$keywords\x7f/$weight\x7f\n";
    }

    return undef;
}

sub utf8_to_eucjp($) {
    my ($cont) = @_;

    return undef unless (util::islang("ja"));

    if ($English::PERL_VERSION >= 5.008){
        eval 'use Encode qw/from_to Unicode JP/;';
        Encode::from_to($$cont, "utf-8" ,"euc-jp");
        codeconv::normalize_eucjp_document($cont);
        return undef;
    }

    if ($nkfversion >= 2.04) {
        $$cont = NKF::nkf("-WemXZ1", $$cont);
        codeconv::normalize_document($cont);
        return undef;
    }

    return undef unless (defined $utfconvpath);

    my $tmpfile  = util::tmpnam('NMZ.tmp.utf8');
    { 
        my $fh = util::efopen("> $tmpfile");
        print $fh $$cont;
        util::fclose($fh);
    }

    my @cmd = ($utfconvpath, "-Iu8", "-Oej", $tmpfile);
    my $fh_out = IO::File->new_tmpfile();
    my $status = util::syscmd(
        command => \@cmd,
        option => {
            "stdout" => $fh_out,
            "stderr" => "/dev/null",
        },
    );
    $$cont = util::readfile($fh_out);
    util::fclose($fh_out);
    codeconv::normalize_eucjp_document($cont);

    unlink $tmpfile;

    return undef;
}

1;

Man Man