Add support for RFC 2047 non-compliant splitting of UTF-8 encoded characters

2017-08-31 13:28:50 +01:00 · 2017-08-31 13:28:50 +01:00 · f88c3594fc
commit f88c3594fc
parent 79582f12e4
2 changed files with 78 additions and 14 deletions
--- a/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java
+++ b/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java
@ -21,6 +21,13 @@ import timber.log.Timber;
 * it has to be determined with the sender address, the mailer and so on.
 */
 class DecoderUtil {
+
+    private static class EncodedWord {
+        private String charset;
+        private String encoding;
+        private String encodedText;
+    }
+
    /**
     * Decodes an encoded word encoded with the 'B' encoding (described in
     * RFC 2047) found in a header field body.
@ -93,14 +100,18 @@ class DecoderUtil {
            return body;
        }

+        EncodedWord previousWord = null;
        int previousEnd = 0;
-        boolean previousWasEncoded = false;

        StringBuilder sb = new StringBuilder();

        while (true) {
            int begin = body.indexOf("=?", previousEnd);
            if (begin == -1) {
+                if (previousWord != null) {
+                    sb.append(decodeEncodedWord(previousWord));
+                    previousWord = null;
+                }
                sb.append(body.substring(previousEnd));
                return sb.toString();
            }
@ -110,18 +121,30 @@ class DecoderUtil {
            // to find the two '?' in the "header", before looking for the final "?=".
            int qm1 = body.indexOf('?', begin + 2);
            if (qm1 == -1) {
+                if (previousWord != null) {
+                    sb.append(decodeEncodedWord(previousWord));
+                    previousWord = null;
+                }
                sb.append(body.substring(previousEnd));
                return sb.toString();
            }

            int qm2 = body.indexOf('?', qm1 + 1);
            if (qm2 == -1) {
+                if (previousWord != null) {
+                    sb.append(decodeEncodedWord(previousWord));
+                    previousWord = null;
+                }
                sb.append(body.substring(previousEnd));
                return sb.toString();
            }

            int end = body.indexOf("?=", qm2 + 1);
            if (end == -1) {
+                if (previousWord != null) {
+                    sb.append(decodeEncodedWord(previousWord));
+                    previousWord = null;
+                }
                sb.append(body.substring(previousEnd));
                return sb.toString();
            }
@ -129,24 +152,52 @@ class DecoderUtil {

            String sep = body.substring(previousEnd, begin);

-            String decoded = decodeEncodedWord(body, begin, end, message);
-            if (decoded == null) {
+            EncodedWord word = extractEncodedWord(body, begin, end, message);
+
+            if (word == null) {
+                if (previousWord != null) {
+                    sb.append(decodeEncodedWord(previousWord));
+                    sb.append(sep);
+                    previousWord = null;
+                }
+            } else if (previousWord != null) {
+                if (previousWord.encoding.equals(word.encoding) && previousWord.charset.equals(word.charset)) {
+                    previousWord.encodedText += word.encodedText;
+                } else {
+                    sb.append(decodeEncodedWord(previousWord));
+                    sb.append(sep);
+                    previousWord = word;
+                }
+            } else {
+                previousWord = word;
+            }
+
+            if (previousWord == null) {
                sb.append(sep);
                sb.append(body.substring(begin, end));
-            } else {
-                if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep)) {
-                    sb.append(sep);
-                }
-                sb.append(decoded);
            }

            previousEnd = end;
-            previousWasEncoded = decoded != null;
        }
    }

    // return null on error
    private static String decodeEncodedWord(String body, int begin, int end, Message message) {
+        return decodeEncodedWord(extractEncodedWord(body, begin, end, message));
+    }
+
+    private static String decodeEncodedWord(EncodedWord word) {
+        if (word.encoding.equals("Q")) {
+            return decodeQ(word.encodedText, word.charset);
+        } else if (word.encoding.equals("B")) {
+            return DecoderUtil.decodeB(word.encodedText, word.charset);
+        } else {
+            Timber.w("Warning: Unknown encoding '%s'", word.encoding);
+            return null;
+        }
+    }
+
+    private static EncodedWord extractEncodedWord(String body, int begin, int end, Message message) {
        int qm1 = body.indexOf('?', begin + 2);
        if (qm1 == end - 2)
            return null;
@ -171,13 +222,17 @@ class DecoderUtil {
            return null;
        }

+        EncodedWord encodedWord = new EncodedWord();
+        encodedWord.charset = charset;
        if (encoding.equalsIgnoreCase("Q")) {
-            return decodeQ(encodedText, charset);
+            encodedWord.encoding = "Q";
        } else if (encoding.equalsIgnoreCase("B")) {
-            return DecoderUtil.decodeB(encodedText, charset);
+            encodedWord.encoding = "B";
        } else {
            Timber.w("Warning: Unknown encoding in encoded word '%s'", body.substring(begin, end));
            return null;
        }
+        encodedWord.encodedText = encodedText;
+        return encodedWord;
    }
 }
--- a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java
+++ b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java
@ -169,10 +169,19 @@ public class DecoderUtilTest {
    }

    @Test
-    public void decodeEncodedWords_withMultipleEncodedSections_decodesAll() {
+    public void decodeEncodedWords_withMultipleEncodedSections_decodesBoth() {
+        body = "=?us-ascii?q?abc?= =?us-ascii?q?def?=";
+        expect = "abcdef";
+        message = null;
+        assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message));
+    }
+
+    @Test
+    public void decodeEncodedWords_withMultipleEncodedSections_decodesSequentialSectionTogether() {
+        //Splitting mid-character is RFC2047 non-compliant but seen in practice.
        body = "=?utf-8?B?5Liq5Lq66YKu566xOkJVRyAjMzAyNDY6OumCruS7tuato+aWh+mZhOS7tuWQ?=\n" +
                "=?utf-8?B?jeensOecgeeVpeaYvuekuuS8mOWMlg==?=";
-        expect = "个人邮箱:BUG #30246::邮件正文附件<E99984><E4BBB6>称省略显示优化";
+        expect = "个人邮箱:BUG #30246::邮件正文附件名称省略显示优化";
        message = null;
        assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message));
    }
@ -181,7 +190,7 @@ public class DecoderUtilTest {
    public void decodeEncodedWords_withGB2312_decodes_correctly() {
        body = "=?gb2312?B?Obv9t9az6cnu29rHsLqju6rHyLPHSlfN8rrAvsa16qOsuPzT0DIwvNIzOTnU?= " +
                "=?gb2312?B?qr6r0aG439DHytTLr77Gteq1yMTjwLSjoaOoQUSjqQ?=";
-        expect = "9积分抽深圳前海华侨城JW万豪酒店，更有20家399<EFBFBD><EFBFBD>精选高星试睡酒店等你来！（AD<EFBFBD>";
+        expect = "9积分抽深圳前海华侨城JW万豪酒店，更有20家399元精选高星试睡酒店等你来！（AD<EFBFBD>";
        message = null;
        assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message));
    }