From f88c3594fca933014cc64b1168c60856cc0091bd Mon Sep 17 00:00:00 2001 From: Philip Whitehouse Date: Thu, 31 Aug 2017 13:28:50 +0100 Subject: [PATCH] Add support for RFC 2047 non-compliant splitting of UTF-8 encoded characters --- .../fsck/k9/mail/internet/DecoderUtil.java | 77 ++++++++++++++++--- .../k9/mail/internet/DecoderUtilTest.java | 15 +++- 2 files changed, 78 insertions(+), 14 deletions(-) diff --git a/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java b/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java index 548f2dc08..2027e1a21 100644 --- a/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java +++ b/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java @@ -21,6 +21,13 @@ import timber.log.Timber; * it has to be determined with the sender address, the mailer and so on. */ class DecoderUtil { + + private static class EncodedWord { + private String charset; + private String encoding; + private String encodedText; + } + /** * Decodes an encoded word encoded with the 'B' encoding (described in * RFC 2047) found in a header field body. @@ -93,14 +100,18 @@ class DecoderUtil { return body; } + EncodedWord previousWord = null; int previousEnd = 0; - boolean previousWasEncoded = false; StringBuilder sb = new StringBuilder(); while (true) { int begin = body.indexOf("=?", previousEnd); if (begin == -1) { + if (previousWord != null) { + sb.append(decodeEncodedWord(previousWord)); + previousWord = null; + } sb.append(body.substring(previousEnd)); return sb.toString(); } @@ -110,18 +121,30 @@ class DecoderUtil { // to find the two '?' in the "header", before looking for the final "?=". int qm1 = body.indexOf('?', begin + 2); if (qm1 == -1) { + if (previousWord != null) { + sb.append(decodeEncodedWord(previousWord)); + previousWord = null; + } sb.append(body.substring(previousEnd)); return sb.toString(); } int qm2 = body.indexOf('?', qm1 + 1); if (qm2 == -1) { + if (previousWord != null) { + sb.append(decodeEncodedWord(previousWord)); + previousWord = null; + } sb.append(body.substring(previousEnd)); return sb.toString(); } int end = body.indexOf("?=", qm2 + 1); if (end == -1) { + if (previousWord != null) { + sb.append(decodeEncodedWord(previousWord)); + previousWord = null; + } sb.append(body.substring(previousEnd)); return sb.toString(); } @@ -129,24 +152,52 @@ class DecoderUtil { String sep = body.substring(previousEnd, begin); - String decoded = decodeEncodedWord(body, begin, end, message); - if (decoded == null) { + EncodedWord word = extractEncodedWord(body, begin, end, message); + + if (word == null) { + if (previousWord != null) { + sb.append(decodeEncodedWord(previousWord)); + sb.append(sep); + previousWord = null; + } + } else if (previousWord != null) { + if (previousWord.encoding.equals(word.encoding) && previousWord.charset.equals(word.charset)) { + previousWord.encodedText += word.encodedText; + } else { + sb.append(decodeEncodedWord(previousWord)); + sb.append(sep); + previousWord = word; + } + } else { + previousWord = word; + } + + if (previousWord == null) { sb.append(sep); sb.append(body.substring(begin, end)); - } else { - if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep)) { - sb.append(sep); - } - sb.append(decoded); } previousEnd = end; - previousWasEncoded = decoded != null; } } // return null on error private static String decodeEncodedWord(String body, int begin, int end, Message message) { + return decodeEncodedWord(extractEncodedWord(body, begin, end, message)); + } + + private static String decodeEncodedWord(EncodedWord word) { + if (word.encoding.equals("Q")) { + return decodeQ(word.encodedText, word.charset); + } else if (word.encoding.equals("B")) { + return DecoderUtil.decodeB(word.encodedText, word.charset); + } else { + Timber.w("Warning: Unknown encoding '%s'", word.encoding); + return null; + } + } + + private static EncodedWord extractEncodedWord(String body, int begin, int end, Message message) { int qm1 = body.indexOf('?', begin + 2); if (qm1 == end - 2) return null; @@ -171,13 +222,17 @@ class DecoderUtil { return null; } + EncodedWord encodedWord = new EncodedWord(); + encodedWord.charset = charset; if (encoding.equalsIgnoreCase("Q")) { - return decodeQ(encodedText, charset); + encodedWord.encoding = "Q"; } else if (encoding.equalsIgnoreCase("B")) { - return DecoderUtil.decodeB(encodedText, charset); + encodedWord.encoding = "B"; } else { Timber.w("Warning: Unknown encoding in encoded word '%s'", body.substring(begin, end)); return null; } + encodedWord.encodedText = encodedText; + return encodedWord; } } diff --git a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java index 5f48e7828..36f1402f5 100644 --- a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java +++ b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java @@ -169,10 +169,19 @@ public class DecoderUtilTest { } @Test - public void decodeEncodedWords_withMultipleEncodedSections_decodesAll() { + public void decodeEncodedWords_withMultipleEncodedSections_decodesBoth() { + body = "=?us-ascii?q?abc?= =?us-ascii?q?def?="; + expect = "abcdef"; + message = null; + assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } + + @Test + public void decodeEncodedWords_withMultipleEncodedSections_decodesSequentialSectionTogether() { + //Splitting mid-character is RFC2047 non-compliant but seen in practice. body = "=?utf-8?B?5Liq5Lq66YKu566xOkJVRyAjMzAyNDY6OumCruS7tuato+aWh+mZhOS7tuWQ?=\n" + "=?utf-8?B?jeensOecgeeVpeaYvuekuuS8mOWMlg==?="; - expect = "个人邮箱:BUG #30246::邮件正文附件��称省略显示优化"; + expect = "个人邮箱:BUG #30246::邮件正文附件名称省略显示优化"; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); } @@ -181,7 +190,7 @@ public class DecoderUtilTest { public void decodeEncodedWords_withGB2312_decodes_correctly() { body = "=?gb2312?B?Obv9t9az6cnu29rHsLqju6rHyLPHSlfN8rrAvsa16qOsuPzT0DIwvNIzOTnU?= " + "=?gb2312?B?qr6r0aG439DHytTLr77Gteq1yMTjwLSjoaOoQUSjqQ?="; - expect = "9积分抽深圳前海华侨城JW万豪酒店,更有20家399��精选高星试睡酒店等你来!(AD�"; + expect = "9积分抽深圳前海华侨城JW万豪酒店,更有20家399元精选高星试睡酒店等你来!(AD�"; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); }