diff -Nur Mail-SpamAssassin-2.63.orig/lib/Mail/SpamAssassin/EvalTests.pm Mail-SpamAssassin-2.63/lib/Mail/SpamAssassin/EvalTests.pm --- Mail-SpamAssassin-2.63.orig/lib/Mail/SpamAssassin/EvalTests.pm 2004-01-18 08:56:08.000000000 +0900 +++ Mail-SpamAssassin-2.63/lib/Mail/SpamAssassin/EvalTests.pm 2004-09-15 11:02:01.025914924 +0900 @@ -1997,8 +1997,8 @@ my $line = $_; # copy so we don't muck up the original - # remove shift-JIS charset codes - $line =~ s/\x1b\$B.*\x1b\(B//gs; + # remove JIS charset codes + $line =~ s/\x1b\$B.*\x1b\([BJ]//gs; # remove URIs $line =~ s/URI:\S+//gs; @@ -2261,6 +2261,13 @@ $self->{mime_base64_latin} = 1; } + if (!$name && + $cte =~ /(?:base64|quoted-printable)/ && + $charset =~ /\biso-2022-jp\b/) + { + $self->{mime_encoded_iso2022jp} = 1; + } + if ($cte =~ /quoted-printable/ && $cd =~ /inline/ && !$charset) { $self->{mime_qp_inline_no_charset} = 1; } @@ -2371,6 +2378,7 @@ $self->{mime_qp_long_line} = 0; $self->{mime_qp_ratio} = 0; $self->{mime_suspect_name} = 0; + $self->{mime_encoded_iso2022jp} = 0; # message headers $ctype = $self->get('Content-Type'); diff -Nur Mail-SpamAssassin-2.63.orig/lib/Mail/SpamAssassin/PerMsgStatus.pm Mail-SpamAssassin-2.63/lib/Mail/SpamAssassin/PerMsgStatus.pm --- Mail-SpamAssassin-2.63.orig/lib/Mail/SpamAssassin/PerMsgStatus.pm 2004-01-21 06:40:04.000000000 +0900 +++ Mail-SpamAssassin-2.63/lib/Mail/SpamAssassin/PerMsgStatus.pm 2004-09-15 11:18:17.064482443 +0900 @@ -918,6 +918,7 @@ $self->{found_encoding_base64} = 0; $self->{found_encoding_quoted_printable} = 0; + $self->{found_encoding_iso2022jp} = 0; my $cte = $self->{msg}->get_header ('Content-Transfer-Encoding'); if (defined $cte && $cte =~ /quoted-printable/i) { @@ -936,6 +937,9 @@ $self->{body_text_array} = [ ]; return $self->{body_text_array}; } + elsif ($ctype =~ /iso-2022-jp/i) { + $self->{found_encoding_iso2022jp} = 1; + } # if it's a multipart MIME message, skip non-text parts and # just assemble the body array from the text bits. @@ -1134,6 +1138,17 @@ $self->{decoded_body_text_array} = \@ary; return \@ary; } + elsif ($self->{found_encoding_iso2022jp}) { + # convert JIS to EUC-JP + $_ = ''; + foreach my $line (@{$textary}) { + $line =~ s/\e\$B([\x21-\x7e]*)\e\([BJ]/$self->convert_jis_to_eucjp($1)/geo; + $_ .= $line; + } + my @ary = $self->split_into_array_of_short_lines ($_); + $self->{decoded_body_text_array} = \@ary; + return \@ary; + } else { $self->{decoded_body_text_array} = $textary; return $textary; @@ -1416,7 +1431,7 @@ # =?UTF-8?B?bmcgY29jb29uIC0gcmVzZW50IA==?= (yuck) if ($enc =~ s{\s*=\?([^\?]+)\?[Bb]\?([^\?]+)\?=}{ - $self->generic_base64_decode ($2); + $self->generic_base64_decode_with_charset ($1, $2); }eg) { my $rawenc = $enc; @@ -2493,6 +2508,21 @@ } } +sub generic_base64_decode_with_charset { + my ($self, $charset, $to_decode) = @_; + $to_decode = $self->generic_base64_decode($to_decode); + if ($charset =~ /iso-2022-jp/i) { + $to_decode =~ s/\e\$B([\041-\176]*)\e\([BJ]/$self->convert_jis_to_eucjp($1)/geo; + } + return $to_decode; +} + +sub convert_jis_to_eucjp { + my ($self, $s) = @_; + $s =~ tr/\x21-\x7e/\xa1-\xfe/; + return $s; +} + ########################################################################### sub dbg { Mail::SpamAssassin::dbg (@_); } diff -Nur Mail-SpamAssassin-2.63.orig/rules/20_head_tests.cf Mail-SpamAssassin-2.63/rules/20_head_tests.cf --- Mail-SpamAssassin-2.63.orig/rules/20_head_tests.cf 2004-01-18 08:56:13.000000000 +0900 +++ Mail-SpamAssassin-2.63/rules/20_head_tests.cf 2004-09-15 11:12:32.050830274 +0900 @@ -227,7 +227,7 @@ # FWIW, according to Peter Evans, this should be sufficient to catch the # UCE tag and a common attempt at evasion (using the "sue" instead of # "mi" Chinese character). -header JAPANESE_UCE_SUBJECT Subject =~ /\e\$B.*(?:L\$>5Bz|EE;R%a!<%k)9-9p/ +header JAPANESE_UCE_SUBJECT Subject =~ /(?:̤¾µÂú|ÅŻҥ᡼¥ë)¹­¹ð/ describe JAPANESE_UCE_SUBJECT Subject contains a Japanese UCE tag # quinlan: "advertisement" in Russian KOI8-R