Split B-/Q-decoding and charset decoding into two different phases

This commit is contained in:
cketti 2018-01-25 16:45:29 +01:00
parent a36254dbc0
commit 4bdf64e990
3 changed files with 67 additions and 77 deletions

View file

@ -17,6 +17,7 @@ repositories {
dependencies {
compile 'org.apache.james:apache-mime4j-core:0.8.1'
compile 'org.apache.james:apache-mime4j-dom:0.8.1'
compile "com.squareup.okio:okio:${okioVersion}"
compile 'commons-io:commons-io:2.4'
compile 'com.jcraft:jzlib:1.0.7'
compile 'com.beetstra.jutf7:jutf7:1.0.0'
@ -27,7 +28,6 @@ dependencies {
androidTestCompile 'com.madgag.spongycastle:pg:1.51.0.0'
testCompile "org.jetbrains.kotlin:kotlin-stdlib-jre7:${kotlinVersion}"
testCompile "com.squareup.okio:okio:${okioVersion}"
testCompile "org.robolectric:robolectric:${robolectricVersion}"
testCompile "junit:junit:${junitVersion}"
testCompile "com.google.truth:truth:${truthVersion}"

View file

@ -1,13 +1,17 @@
package com.fsck.k9.mail.internet;
import com.fsck.k9.mail.Message;
import com.fsck.k9.mail.MessagingException;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import org.apache.james.mime4j.codec.Base64InputStream;
import com.fsck.k9.mail.Message;
import com.fsck.k9.mail.MessagingException;
import okio.Buffer;
import okio.ByteString;
import okio.Okio;
import org.apache.james.mime4j.codec.QuotedPrintableInputStream;
import org.apache.james.mime4j.util.CharsetUtil;
import timber.log.Timber;
@ -21,65 +25,6 @@ import timber.log.Timber;
* it has to be determined with the sender address, the mailer and so on.
*/
class DecoderUtil {
private static class EncodedWord {
private String charset;
private String encoding;
private String encodedText;
}
/**
* Decodes an encoded word encoded with the 'B' encoding (described in
* RFC 2047) found in a header field body.
*
* @param encodedWord the encoded word to decode.
* @param charset the Java charset to use.
* @return the decoded string.
*/
private static String decodeB(String encodedWord, String charset) {
byte[] bytes = encodedWord.getBytes(Charset.forName("US-ASCII"));
Base64InputStream is = new Base64InputStream(new ByteArrayInputStream(bytes));
try {
return CharsetSupport.readToString(is, charset);
} catch (IOException e) {
return null;
}
}
/**
* Decodes an encoded word encoded with the 'Q' encoding (described in
* RFC 2047) found in a header field body.
*
* @param encodedWord the encoded word to decode.
* @param charset the Java charset to use.
* @return the decoded string.
*/
static String decodeQ(String encodedWord, String charset) {
/*
* Replace _ with =20
*/
StringBuilder sb = new StringBuilder();
for (int i = 0; i < encodedWord.length(); i++) {
char c = encodedWord.charAt(i);
if (c == '_') {
sb.append("=20");
} else {
sb.append(c);
}
}
byte[] bytes = sb.toString().getBytes(Charset.forName("US-ASCII"));
QuotedPrintableInputStream is = new QuotedPrintableInputStream(new ByteArrayInputStream(bytes));
try {
return CharsetSupport.readToString(is, charset);
} catch (IOException e) {
return null;
}
}
/**
* Decodes a string containing encoded words as defined by RFC 2047.
* Encoded words in have the form
@ -145,18 +90,18 @@ class DecoderUtil {
}
} else {
if (word == null) {
sb.append(decodeEncodedWord(previousWord));
sb.append(charsetDecode(previousWord));
sb.append(sep);
sb.append(body.substring(begin, end));
} else {
if (!CharsetUtil.isWhitespace(sep)) {
sb.append(decodeEncodedWord(previousWord));
sb.append(charsetDecode(previousWord));
sb.append(sep);
} else if (previousWord.encoding.equals(word.encoding) &&
previousWord.charset.equals(word.charset)) {
word.encodedText = previousWord.encodedText + word.encodedText;
word.data = concat(previousWord.data, word.data);
} else {
sb.append(decodeEncodedWord(previousWord));
sb.append(charsetDecode(previousWord));
}
}
}
@ -170,19 +115,17 @@ class DecoderUtil {
int previousEnd) {
if (previousWord != null) {
sb.append(decodeEncodedWord(previousWord));
sb.append(charsetDecode(previousWord));
}
sb.append(body.substring(previousEnd));
}
private static String decodeEncodedWord(EncodedWord word) {
if (word.encoding.equals("Q")) {
return decodeQ(word.encodedText, word.charset);
} else if (word.encoding.equals("B")) {
return DecoderUtil.decodeB(word.encodedText, word.charset);
} else {
Timber.w("Warning: Unknown encoding '%s'", word.encoding);
private static String charsetDecode(EncodedWord word) {
try {
InputStream inputStream = new Buffer().write(word.data).inputStream();
return CharsetSupport.readToString(inputStream, word.charset);
} catch (IOException e) {
return null;
}
}
@ -216,13 +159,54 @@ class DecoderUtil {
encodedWord.charset = charset;
if (encoding.equalsIgnoreCase("Q")) {
encodedWord.encoding = "Q";
encodedWord.data = decodeQ(encodedText);
} else if (encoding.equalsIgnoreCase("B")) {
encodedWord.encoding = "B";
encodedWord.data = decodeB(encodedText);
} else {
Timber.w("Warning: Unknown encoding in encoded word '%s'", body.substring(begin, end));
return null;
}
encodedWord.encodedText = encodedText;
return encodedWord;
}
private static ByteString decodeQ(String encodedWord) {
/*
* Replace _ with =20
*/
StringBuilder sb = new StringBuilder();
for (int i = 0; i < encodedWord.length(); i++) {
char c = encodedWord.charAt(i);
if (c == '_') {
sb.append("=20");
} else {
sb.append(c);
}
}
byte[] bytes = sb.toString().getBytes(Charset.forName("US-ASCII"));
QuotedPrintableInputStream is = new QuotedPrintableInputStream(new ByteArrayInputStream(bytes));
try {
return Okio.buffer(Okio.source(is)).readByteString();
} catch (IOException e) {
return null;
}
}
private static ByteString decodeB(String encodedText) {
ByteString decoded = ByteString.decodeBase64(encodedText);
return decoded == null ? ByteString.EMPTY : decoded;
}
private static ByteString concat(ByteString first, ByteString second) {
return new Buffer().write(first).write(second).readByteString();
}
private static class EncodedWord {
private String charset;
private String encoding;
private ByteString data;
}
}

View file

@ -119,7 +119,7 @@ public class DecoderUtilTest {
@Test
public void decodeEncodedWords_withInvalidBase64String_returnsEmptyString() {
assertInputDecodesToExpected("=?us-ascii?b?abc?=", "");
assertInputDecodesToExpected("=?us-ascii?b?ab#?=", "");
}
@Test
@ -192,6 +192,12 @@ public class DecoderUtilTest {
assertInputDecodesToExpected("=?us-ascii?Q?oh_no_?= =?utf-8?Q?=F0=9F=92=A9?=", "oh no 💩");
}
@Test
public void decodeEncodedWords_withTwoCompleteEncodedWords_shouldProvideBoth() {
assertInputDecodesToExpected("=?UTF-8?B?W+aWsOioguWWrl0g6aGn5a6iOiB4eHhAeHh4LmNvbSDmnInmlrDoqILllq46ICMyMDE4MA==?= " +
"=?UTF-8?B?MTE4MTIzNDU2Nzg=?=", "[新訂單] 顧客: xxx@xxx.com 有新訂單: #2018011812345678");
}
@Test
public void decodeEncodedWords_withRFC2047examples_decodesCorrectly() {
assertInputDecodesToExpected("(=?ISO-8859-1?Q?a?=)", "(a)");