Current Path : /usr/local/lib/perl5/site_perl/5.8.9/MIME/ |
FreeBSD hs32.drive.ne.jp 9.1-RELEASE FreeBSD 9.1-RELEASE #1: Wed Jan 14 12:18:08 JST 2015 root@hs32.drive.ne.jp:/sys/amd64/compile/hs32 amd64 |
Current File : //usr/local/lib/perl5/site_perl/5.8.9/MIME/Words.pm |
package MIME::Words; =head1 NAME MIME::Words - deal with RFC 2047 encoded words =head1 SYNOPSIS Before reading further, you should see L<MIME::Tools> to make sure that you understand where this module fits into the grand scheme of things. Go on, do it now. I'll wait. Ready? Ok... use MIME::Words qw(:all); ### Decode the string into another string, forgetting the charsets: $decoded = decode_mimewords( 'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>', ); ### Split string into array of decoded [DATA,CHARSET] pairs: @decoded = decode_mimewords( 'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>', ); ### Encode a single unsafe word: $encoded = encode_mimeword("\xABFran\xE7ois\xBB"); ### Encode a string, trying to find the unsafe words inside it: $encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB in town"); =head1 DESCRIPTION Fellow Americans, you probably won't know what the hell this module is for. Europeans, Russians, et al, you probably do. C<:-)>. For example, here's a valid MIME header you might get: From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu> To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk> CC: =?ISO-8859-1?Q?Andr=E9_?= Pirard <PIRARD@vm1.ulg.ac.be> Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= =?US-ASCII?Q?.._cool!?= The fields basically decode to (sorry, I can only approximate the Latin characters with 7 bit sequences /o and 'e): From: Keith Moore <moore@cs.utk.edu> To: Keld J/orn Simonsen <keld@dkuug.dk> CC: Andr'e Pirard <PIRARD@vm1.ulg.ac.be> Subject: If you can read this you understand the example... cool! =head1 PUBLIC INTERFACE =over 4 =cut require 5.001; ### Pragmas: use strict; use vars qw($VERSION @EXPORT_OK %EXPORT_TAGS @ISA); ### Exporting: use Exporter; %EXPORT_TAGS = (all => [qw(decode_mimewords encode_mimeword encode_mimewords )]); Exporter::export_ok_tags('all'); ### Inheritance: @ISA = qw(Exporter); ### Other modules: use MIME::Base64; use MIME::QuotedPrint; #------------------------------ # # Globals... # #------------------------------ ### The package version, both in 1.23 style *and* usable by MakeMaker: $VERSION = "5.427"; ### Nonprintables (controls + x7F + 8bit): my $NONPRINT = "\\x00-\\x1F\\x7F-\\xFF"; #------------------------------ # _decode_Q STRING # Private: used by _decode_header() to decode "Q" encoding, which is # almost, but not exactly, quoted-printable. :-P sub _decode_Q { my $str = shift; $str =~ s/_/\x20/g; # RFC-1522, Q rule 2 $str =~ s/=([\da-fA-F]{2})/pack("C", hex($1))/ge; # RFC-1522, Q rule 1 $str; } # _encode_Q STRING # Private: used by _encode_header() to decode "Q" encoding, which is # almost, but not exactly, quoted-printable. :-P sub _encode_Q { my $str = shift; $str =~ s{([ _\?\=$NONPRINT])}{sprintf("=%02X", ord($1))}eog; $str; } # _decode_B STRING # Private: used by _decode_header() to decode "B" encoding. sub _decode_B { my $str = shift; decode_base64($str); } # _encode_B STRING # Private: used by _decode_header() to decode "B" encoding. sub _encode_B { my $str = shift; encode_base64($str, ''); } #------------------------------ =item decode_mimewords ENCODED, [OPTS...] I<Function.> Go through the string looking for RFC 2047-style "Q" (quoted-printable, sort of) or "B" (base64) encoding, and decode them. B<In an array context,> splits the ENCODED string into a list of decoded C<[DATA, CHARSET]> pairs, and returns that list. Unencoded data are returned in a 1-element array C<[DATA]>, giving an effective CHARSET of C<undef>. $enc = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>'; foreach (decode_mimewords($enc)) { print "", ($_[1] || 'US-ASCII'), ": ", $_[0], "\n"; } B<In a scalar context,> joins the "data" elements of the above list together, and returns that. I<Warning: this is information-lossy,> and probably I<not> what you want, but if you know that all charsets in the ENCODED string are identical, it might be useful to you. (Before you use this, please see L<MIME::WordDecoder/unmime>, which is probably what you want.) In the event of a syntax error, $@ will be set to a description of the error, but parsing will continue as best as possible (so as to get I<something> back when decoding headers). $@ will be false if no error was detected. Any arguments past the ENCODED string are taken to define a hash of options: =over 4 =item Field Name of the mail field this string came from. I<Currently ignored.> =back =cut sub decode_mimewords { my $encstr = shift; my %params = @_; my @tokens; $@ = ''; ### error-return ### Collapse boundaries between adjacent encoded words: $encstr =~ s{(\?\=)\s*(\=\?)}{$1$2}gs; pos($encstr) = 0; ### print STDOUT "ENC = [", $encstr, "]\n"; ### Decode: my ($charset, $encoding, $enc, $dec); while (1) { last if (pos($encstr) >= length($encstr)); my $pos = pos($encstr); ### save it ### Case 1: are we looking at "=?..?..?="? if ($encstr =~ m{\G # from where we left off.. =\?([^?]*) # "=?" + charset + \?([bq]) # "?" + encoding + \?([^?]+) # "?" + data maybe with spcs + \?= # "?=" }xgi) { ($charset, $encoding, $enc) = ($1, lc($2), $3); $dec = (($encoding eq 'q') ? _decode_Q($enc) : _decode_B($enc)); push @tokens, [$dec, $charset]; next; } ### Case 2: are we looking at a bad "=?..." prefix? ### We need this to detect problems for case 3, which stops at "=?": pos($encstr) = $pos; # reset the pointer. if ($encstr =~ m{\G=\?}xg) { $@ .= qq|unterminated "=?..?..?=" in "$encstr" (pos $pos)\n|; push @tokens, ['=?']; next; } ### Case 3: are we looking at ordinary text? pos($encstr) = $pos; # reset the pointer. if ($encstr =~ m{\G # from where we left off... ([\x00-\xFF]*? # shortest possible string, \n*) # followed by 0 or more NLs, (?=(\Z|=\?)) # terminated by "=?" or EOS }xg) { length($1) or die "MIME::Words: internal logic err: empty token\n"; push @tokens, [$1]; next; } ### Case 4: bug! die "MIME::Words: unexpected case:\n($encstr) pos $pos\n\t". "Please alert developer.\n"; } return (wantarray ? @tokens : join('',map {$_->[0]} @tokens)); } #------------------------------ =item encode_mimeword RAW, [ENCODING], [CHARSET] I<Function.> Encode a single RAW "word" that has unsafe characters. The "word" will be encoded in its entirety. ### Encode "<<Franc,ois>>": $encoded = encode_mimeword("\xABFran\xE7ois\xBB"); You may specify the ENCODING (C<"Q"> or C<"B">), which defaults to C<"Q">. You may specify the CHARSET, which defaults to C<iso-8859-1>. =cut sub encode_mimeword { my $word = shift; my $encoding = uc(shift || 'Q'); my $charset = uc(shift || 'ISO-8859-1'); my $encfunc = (($encoding eq 'Q') ? \&_encode_Q : \&_encode_B); "=?$charset?$encoding?" . &$encfunc($word) . "?="; } #------------------------------ =item encode_mimewords RAW, [OPTS] I<Function.> Given a RAW string, try to find and encode all "unsafe" sequences of characters: ### Encode a string with some unsafe "words": $encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB"); Returns the encoded string. Any arguments past the RAW string are taken to define a hash of options: =over 4 =item Charset Encode all unsafe stuff with this charset. Default is 'ISO-8859-1', a.k.a. "Latin-1". =item Encoding The encoding to use, C<"q"> or C<"b">. The default is C<"q">. =item Field Name of the mail field this string will be used in. I<Currently ignored.> =back B<Warning:> this is a quick-and-dirty solution, intended for character sets which overlap ASCII. B<It does not comply with the RFC 2047 rules regarding the use of encoded words in message headers>. You may want to roll your own variant, using C<encoded_mimeword()>, for your application. I<Thanks to Jan Kasprzak for reminding me about this problem.> =cut sub encode_mimewords { my ($rawstr, %params) = @_; my $charset = $params{Charset} || 'ISO-8859-1'; my $encoding = lc($params{Encoding} || 'q'); ### Encode any "words" with unsafe characters. ### We limit such words to 18 characters, to guarantee that the ### worst-case encoding give us no more than 54 + ~10 < 75 characters my $word; $rawstr =~ s{([ a-zA-Z0-9\x7F-\xFF]{1,18})}{ ### get next "word" $word = $1; (($word !~ /(?:[$NONPRINT])|(?:^\s+$)/o) ? $word ### no unsafe chars : encode_mimeword($word, $encoding, $charset)); ### has unsafe chars }xeg; $rawstr =~ s/\?==\?/?= =?/g; $rawstr; } 1; __END__ =back =head1 SEE ALSO L<MIME::Base64>, L<MIME::QuotedPrint>, L<MIME::Tools> =head1 NOTES Exports its principle functions by default, in keeping with MIME::Base64 and MIME::QuotedPrint. =head1 AUTHOR Eryq (F<eryq@zeegee.com>), ZeeGee Software Inc (F<http://www.zeegee.com>). David F. Skoll (dfs@roaringpenguin.com) http://www.roaringpenguin.com All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Thanks also to... Kent Boortz For providing the idea, and the baseline RFC-1522-decoding code! KJJ at PrimeNet For requesting that this be split into its own module. Stephane Barizien For reporting a nasty bug.