Merge pull request #3134 from k9mail/fix_encoded_word_decoding

Split B-/Q-decoding and charset decoding into two different phases
2018-01-25 23:35:59 +01:00 · 2018-01-25 23:35:59 +01:00 · 26f6963e2a
commit 26f6963e2a
parent a36254dbc0 4bdf64e990
3 changed files with 67 additions and 77 deletions
--- a/k9mail-library/build.gradle
+++ b/k9mail-library/build.gradle
@ -17,6 +17,7 @@ repositories {
 dependencies {
    compile 'org.apache.james:apache-mime4j-core:0.8.1'
    compile 'org.apache.james:apache-mime4j-dom:0.8.1'
+    compile "com.squareup.okio:okio:${okioVersion}"
    compile 'commons-io:commons-io:2.4'
    compile 'com.jcraft:jzlib:1.0.7'
    compile 'com.beetstra.jutf7:jutf7:1.0.0'
@ -27,7 +28,6 @@ dependencies {
    androidTestCompile 'com.madgag.spongycastle:pg:1.51.0.0'

    testCompile "org.jetbrains.kotlin:kotlin-stdlib-jre7:${kotlinVersion}"
-    testCompile "com.squareup.okio:okio:${okioVersion}"
    testCompile "org.robolectric:robolectric:${robolectricVersion}"
    testCompile "junit:junit:${junitVersion}"
    testCompile "com.google.truth:truth:${truthVersion}"
--- a/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java
+++ b/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java
@ -1,13 +1,17 @@

 package com.fsck.k9.mail.internet;

-import com.fsck.k9.mail.Message;
-import com.fsck.k9.mail.MessagingException;
+
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.nio.charset.Charset;

-import org.apache.james.mime4j.codec.Base64InputStream;
+import com.fsck.k9.mail.Message;
+import com.fsck.k9.mail.MessagingException;
+import okio.Buffer;
+import okio.ByteString;
+import okio.Okio;
 import org.apache.james.mime4j.codec.QuotedPrintableInputStream;
 import org.apache.james.mime4j.util.CharsetUtil;
 import timber.log.Timber;
@ -21,65 +25,6 @@ import timber.log.Timber;
 * it has to be determined with the sender address, the mailer and so on.
 */
 class DecoderUtil {
-
-    private static class EncodedWord {
-        private String charset;
-        private String encoding;
-        private String encodedText;
-    }
-
-    /**
-     * Decodes an encoded word encoded with the 'B' encoding (described in
-     * RFC 2047) found in a header field body.
-     *
-     * @param encodedWord the encoded word to decode.
-     * @param charset the Java charset to use.
-     * @return the decoded string.
-     */
-    private static String decodeB(String encodedWord, String charset) {
-        byte[] bytes = encodedWord.getBytes(Charset.forName("US-ASCII"));
-
-        Base64InputStream is = new Base64InputStream(new ByteArrayInputStream(bytes));
-        try {
-            return CharsetSupport.readToString(is, charset);
-        } catch (IOException e) {
-            return null;
-        }
-    }
-
-    /**
-     * Decodes an encoded word encoded with the 'Q' encoding (described in
-     * RFC 2047) found in a header field body.
-     *
-     * @param encodedWord the encoded word to decode.
-     * @param charset the Java charset to use.
-     * @return the decoded string.
-     */
-    static String decodeQ(String encodedWord, String charset) {
-
-        /*
-         * Replace _ with =20
-         */
-        StringBuilder sb = new StringBuilder();
-        for (int i = 0; i < encodedWord.length(); i++) {
-            char c = encodedWord.charAt(i);
-            if (c == '_') {
-                sb.append("=20");
-            } else {
-                sb.append(c);
-            }
-        }
-
-        byte[] bytes = sb.toString().getBytes(Charset.forName("US-ASCII"));
-
-        QuotedPrintableInputStream is = new QuotedPrintableInputStream(new ByteArrayInputStream(bytes));
-        try {
-            return CharsetSupport.readToString(is, charset);
-        } catch (IOException e) {
-            return null;
-        }
-    }
-
    /**
     * Decodes a string containing encoded words as defined by RFC 2047.
     * Encoded words in have the form
@ -145,18 +90,18 @@ class DecoderUtil {
                }
            } else {
                if (word == null) {
-                    sb.append(decodeEncodedWord(previousWord));
+                    sb.append(charsetDecode(previousWord));
                    sb.append(sep);
                    sb.append(body.substring(begin, end));
                } else {
                    if (!CharsetUtil.isWhitespace(sep)) {
-                        sb.append(decodeEncodedWord(previousWord));
+                        sb.append(charsetDecode(previousWord));
                        sb.append(sep);
                    } else if (previousWord.encoding.equals(word.encoding) &&
                            previousWord.charset.equals(word.charset)) {
-                        word.encodedText = previousWord.encodedText + word.encodedText;
+                        word.data = concat(previousWord.data, word.data);
                    } else {
-                        sb.append(decodeEncodedWord(previousWord));
+                        sb.append(charsetDecode(previousWord));
                    }
                }
            }
@ -170,19 +115,17 @@ class DecoderUtil {
            int previousEnd) {

        if (previousWord != null) {
-            sb.append(decodeEncodedWord(previousWord));
+            sb.append(charsetDecode(previousWord));
        }

        sb.append(body.substring(previousEnd));
    }

-    private static String decodeEncodedWord(EncodedWord word) {
-        if (word.encoding.equals("Q")) {
-            return decodeQ(word.encodedText, word.charset);
-        } else if (word.encoding.equals("B")) {
-            return DecoderUtil.decodeB(word.encodedText, word.charset);
-        } else {
-            Timber.w("Warning: Unknown encoding '%s'", word.encoding);
+    private static String charsetDecode(EncodedWord word) {
+        try {
+            InputStream inputStream = new Buffer().write(word.data).inputStream();
+            return CharsetSupport.readToString(inputStream, word.charset);
+        } catch (IOException e) {
            return null;
        }
    }
@ -216,13 +159,54 @@ class DecoderUtil {
        encodedWord.charset = charset;
        if (encoding.equalsIgnoreCase("Q")) {
            encodedWord.encoding = "Q";
+            encodedWord.data = decodeQ(encodedText);
        } else if (encoding.equalsIgnoreCase("B")) {
            encodedWord.encoding = "B";
+            encodedWord.data = decodeB(encodedText);
        } else {
            Timber.w("Warning: Unknown encoding in encoded word '%s'", body.substring(begin, end));
            return null;
        }
-        encodedWord.encodedText = encodedText;
        return encodedWord;
    }
+
+    private static ByteString decodeQ(String encodedWord) {
+        /*
+         * Replace _ with =20
+         */
+        StringBuilder sb = new StringBuilder();
+        for (int i = 0; i < encodedWord.length(); i++) {
+            char c = encodedWord.charAt(i);
+            if (c == '_') {
+                sb.append("=20");
+            } else {
+                sb.append(c);
+            }
+        }
+
+        byte[] bytes = sb.toString().getBytes(Charset.forName("US-ASCII"));
+
+        QuotedPrintableInputStream is = new QuotedPrintableInputStream(new ByteArrayInputStream(bytes));
+        try {
+            return Okio.buffer(Okio.source(is)).readByteString();
+        } catch (IOException e) {
+            return null;
+        }
+    }
+
+    private static ByteString decodeB(String encodedText) {
+        ByteString decoded = ByteString.decodeBase64(encodedText);
+        return decoded == null ? ByteString.EMPTY : decoded;
+    }
+
+    private static ByteString concat(ByteString first, ByteString second) {
+        return new Buffer().write(first).write(second).readByteString();
+    }
+
+
+    private static class EncodedWord {
+        private String charset;
+        private String encoding;
+        private ByteString data;
+    }
 }
--- a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java
+++ b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java
@ -119,7 +119,7 @@ public class DecoderUtilTest {

    @Test
    public void decodeEncodedWords_withInvalidBase64String_returnsEmptyString() {
-        assertInputDecodesToExpected("=?us-ascii?b?abc?=", "");
+        assertInputDecodesToExpected("=?us-ascii?b?ab#?=", "");
    }

    @Test
@ -192,6 +192,12 @@ public class DecoderUtilTest {
        assertInputDecodesToExpected("=?us-ascii?Q?oh_no_?= =?utf-8?Q?=F0=9F=92=A9?=", "oh no 💩");
    }

+    @Test
+    public void decodeEncodedWords_withTwoCompleteEncodedWords_shouldProvideBoth() {
+        assertInputDecodesToExpected("=?UTF-8?B?W+aWsOioguWWrl0g6aGn5a6iOiB4eHhAeHh4LmNvbSDmnInmlrDoqILllq46ICMyMDE4MA==?= " +
+                "=?UTF-8?B?MTE4MTIzNDU2Nzg=?=", "[新訂單] 顧客: xxx@xxx.com 有新訂單: #2018011812345678");
+    }
+
    @Test
    public void decodeEncodedWords_withRFC2047examples_decodesCorrectly() {
        assertInputDecodesToExpected("(=?ISO-8859-1?Q?a?=)", "(a)");