Split B-/Q-decoding and charset decoding into two different phases
This commit is contained in:
parent
a36254dbc0
commit
4bdf64e990
3 changed files with 67 additions and 77 deletions
|
@ -17,6 +17,7 @@ repositories {
|
|||
dependencies {
|
||||
compile 'org.apache.james:apache-mime4j-core:0.8.1'
|
||||
compile 'org.apache.james:apache-mime4j-dom:0.8.1'
|
||||
compile "com.squareup.okio:okio:${okioVersion}"
|
||||
compile 'commons-io:commons-io:2.4'
|
||||
compile 'com.jcraft:jzlib:1.0.7'
|
||||
compile 'com.beetstra.jutf7:jutf7:1.0.0'
|
||||
|
@ -27,7 +28,6 @@ dependencies {
|
|||
androidTestCompile 'com.madgag.spongycastle:pg:1.51.0.0'
|
||||
|
||||
testCompile "org.jetbrains.kotlin:kotlin-stdlib-jre7:${kotlinVersion}"
|
||||
testCompile "com.squareup.okio:okio:${okioVersion}"
|
||||
testCompile "org.robolectric:robolectric:${robolectricVersion}"
|
||||
testCompile "junit:junit:${junitVersion}"
|
||||
testCompile "com.google.truth:truth:${truthVersion}"
|
||||
|
|
|
@ -1,13 +1,17 @@
|
|||
|
||||
package com.fsck.k9.mail.internet;
|
||||
|
||||
import com.fsck.k9.mail.Message;
|
||||
import com.fsck.k9.mail.MessagingException;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import org.apache.james.mime4j.codec.Base64InputStream;
|
||||
import com.fsck.k9.mail.Message;
|
||||
import com.fsck.k9.mail.MessagingException;
|
||||
import okio.Buffer;
|
||||
import okio.ByteString;
|
||||
import okio.Okio;
|
||||
import org.apache.james.mime4j.codec.QuotedPrintableInputStream;
|
||||
import org.apache.james.mime4j.util.CharsetUtil;
|
||||
import timber.log.Timber;
|
||||
|
@ -21,65 +25,6 @@ import timber.log.Timber;
|
|||
* it has to be determined with the sender address, the mailer and so on.
|
||||
*/
|
||||
class DecoderUtil {
|
||||
|
||||
private static class EncodedWord {
|
||||
private String charset;
|
||||
private String encoding;
|
||||
private String encodedText;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes an encoded word encoded with the 'B' encoding (described in
|
||||
* RFC 2047) found in a header field body.
|
||||
*
|
||||
* @param encodedWord the encoded word to decode.
|
||||
* @param charset the Java charset to use.
|
||||
* @return the decoded string.
|
||||
*/
|
||||
private static String decodeB(String encodedWord, String charset) {
|
||||
byte[] bytes = encodedWord.getBytes(Charset.forName("US-ASCII"));
|
||||
|
||||
Base64InputStream is = new Base64InputStream(new ByteArrayInputStream(bytes));
|
||||
try {
|
||||
return CharsetSupport.readToString(is, charset);
|
||||
} catch (IOException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes an encoded word encoded with the 'Q' encoding (described in
|
||||
* RFC 2047) found in a header field body.
|
||||
*
|
||||
* @param encodedWord the encoded word to decode.
|
||||
* @param charset the Java charset to use.
|
||||
* @return the decoded string.
|
||||
*/
|
||||
static String decodeQ(String encodedWord, String charset) {
|
||||
|
||||
/*
|
||||
* Replace _ with =20
|
||||
*/
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0; i < encodedWord.length(); i++) {
|
||||
char c = encodedWord.charAt(i);
|
||||
if (c == '_') {
|
||||
sb.append("=20");
|
||||
} else {
|
||||
sb.append(c);
|
||||
}
|
||||
}
|
||||
|
||||
byte[] bytes = sb.toString().getBytes(Charset.forName("US-ASCII"));
|
||||
|
||||
QuotedPrintableInputStream is = new QuotedPrintableInputStream(new ByteArrayInputStream(bytes));
|
||||
try {
|
||||
return CharsetSupport.readToString(is, charset);
|
||||
} catch (IOException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a string containing encoded words as defined by RFC 2047.
|
||||
* Encoded words in have the form
|
||||
|
@ -145,18 +90,18 @@ class DecoderUtil {
|
|||
}
|
||||
} else {
|
||||
if (word == null) {
|
||||
sb.append(decodeEncodedWord(previousWord));
|
||||
sb.append(charsetDecode(previousWord));
|
||||
sb.append(sep);
|
||||
sb.append(body.substring(begin, end));
|
||||
} else {
|
||||
if (!CharsetUtil.isWhitespace(sep)) {
|
||||
sb.append(decodeEncodedWord(previousWord));
|
||||
sb.append(charsetDecode(previousWord));
|
||||
sb.append(sep);
|
||||
} else if (previousWord.encoding.equals(word.encoding) &&
|
||||
previousWord.charset.equals(word.charset)) {
|
||||
word.encodedText = previousWord.encodedText + word.encodedText;
|
||||
word.data = concat(previousWord.data, word.data);
|
||||
} else {
|
||||
sb.append(decodeEncodedWord(previousWord));
|
||||
sb.append(charsetDecode(previousWord));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -170,19 +115,17 @@ class DecoderUtil {
|
|||
int previousEnd) {
|
||||
|
||||
if (previousWord != null) {
|
||||
sb.append(decodeEncodedWord(previousWord));
|
||||
sb.append(charsetDecode(previousWord));
|
||||
}
|
||||
|
||||
sb.append(body.substring(previousEnd));
|
||||
}
|
||||
|
||||
private static String decodeEncodedWord(EncodedWord word) {
|
||||
if (word.encoding.equals("Q")) {
|
||||
return decodeQ(word.encodedText, word.charset);
|
||||
} else if (word.encoding.equals("B")) {
|
||||
return DecoderUtil.decodeB(word.encodedText, word.charset);
|
||||
} else {
|
||||
Timber.w("Warning: Unknown encoding '%s'", word.encoding);
|
||||
private static String charsetDecode(EncodedWord word) {
|
||||
try {
|
||||
InputStream inputStream = new Buffer().write(word.data).inputStream();
|
||||
return CharsetSupport.readToString(inputStream, word.charset);
|
||||
} catch (IOException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -216,13 +159,54 @@ class DecoderUtil {
|
|||
encodedWord.charset = charset;
|
||||
if (encoding.equalsIgnoreCase("Q")) {
|
||||
encodedWord.encoding = "Q";
|
||||
encodedWord.data = decodeQ(encodedText);
|
||||
} else if (encoding.equalsIgnoreCase("B")) {
|
||||
encodedWord.encoding = "B";
|
||||
encodedWord.data = decodeB(encodedText);
|
||||
} else {
|
||||
Timber.w("Warning: Unknown encoding in encoded word '%s'", body.substring(begin, end));
|
||||
return null;
|
||||
}
|
||||
encodedWord.encodedText = encodedText;
|
||||
return encodedWord;
|
||||
}
|
||||
|
||||
private static ByteString decodeQ(String encodedWord) {
|
||||
/*
|
||||
* Replace _ with =20
|
||||
*/
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0; i < encodedWord.length(); i++) {
|
||||
char c = encodedWord.charAt(i);
|
||||
if (c == '_') {
|
||||
sb.append("=20");
|
||||
} else {
|
||||
sb.append(c);
|
||||
}
|
||||
}
|
||||
|
||||
byte[] bytes = sb.toString().getBytes(Charset.forName("US-ASCII"));
|
||||
|
||||
QuotedPrintableInputStream is = new QuotedPrintableInputStream(new ByteArrayInputStream(bytes));
|
||||
try {
|
||||
return Okio.buffer(Okio.source(is)).readByteString();
|
||||
} catch (IOException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static ByteString decodeB(String encodedText) {
|
||||
ByteString decoded = ByteString.decodeBase64(encodedText);
|
||||
return decoded == null ? ByteString.EMPTY : decoded;
|
||||
}
|
||||
|
||||
private static ByteString concat(ByteString first, ByteString second) {
|
||||
return new Buffer().write(first).write(second).readByteString();
|
||||
}
|
||||
|
||||
|
||||
private static class EncodedWord {
|
||||
private String charset;
|
||||
private String encoding;
|
||||
private ByteString data;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -119,7 +119,7 @@ public class DecoderUtilTest {
|
|||
|
||||
@Test
|
||||
public void decodeEncodedWords_withInvalidBase64String_returnsEmptyString() {
|
||||
assertInputDecodesToExpected("=?us-ascii?b?abc?=", "");
|
||||
assertInputDecodesToExpected("=?us-ascii?b?ab#?=", "");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -192,6 +192,12 @@ public class DecoderUtilTest {
|
|||
assertInputDecodesToExpected("=?us-ascii?Q?oh_no_?= =?utf-8?Q?=F0=9F=92=A9?=", "oh no 💩");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decodeEncodedWords_withTwoCompleteEncodedWords_shouldProvideBoth() {
|
||||
assertInputDecodesToExpected("=?UTF-8?B?W+aWsOioguWWrl0g6aGn5a6iOiB4eHhAeHh4LmNvbSDmnInmlrDoqILllq46ICMyMDE4MA==?= " +
|
||||
"=?UTF-8?B?MTE4MTIzNDU2Nzg=?=", "[新訂單] 顧客: xxx@xxx.com 有新訂單: #2018011812345678");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decodeEncodedWords_withRFC2047examples_decodesCorrectly() {
|
||||
assertInputDecodesToExpected("(=?ISO-8859-1?Q?a?=)", "(a)");
|
||||
|
|
Loading…
Reference in a new issue