From 1471e7a06336f7a68deefb881d3f7a1a89537e7a Mon Sep 17 00:00:00 2001 From: Philip Whitehouse Date: Thu, 31 Aug 2017 12:55:35 +0100 Subject: [PATCH 1/9] Split DecoderUtil tests up --- .../k9/mail/internet/DecoderUtilTest.java | 77 +++++++++++++++++-- 1 file changed, 69 insertions(+), 8 deletions(-) diff --git a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java index cd693c3be..0cd4fc893 100644 --- a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java +++ b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java @@ -12,112 +12,173 @@ import static org.junit.Assert.assertEquals; @RunWith(K9LibRobolectricTestRunner.class) public class DecoderUtilTest { - @Test - public void testDecodeEncodedWords() { - String body, expect; - MimeMessage message; + private String body, expect; + private MimeMessage message; + @Test + public void decodeEncodedWords_with_unencoded_data_returns_original_text() { body = "abc"; expect = "abc"; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } + + @Test + public void decodeEncodedWords_withAsciiCharset_encoded_data_returns_text() { body = "=?us-ascii?q?abc?="; expect = "abc"; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } + @Test + public void decodeEncodedWords_withStartOnly_encoding_format_returnAsText() { body = "=?"; expect = "=?"; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } + @Test + public void decodeEncodedWords_withStartAndSeparatorOnly_returnAsText() { body = "=??"; expect = "=??"; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } + @Test + public void decodeEncodedWords_withStartAnd2SeparatorOnly_returnAsText() { body = "=???"; expect = "=???"; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } + @Test + public void decodeEncodedWords_withStartAnd3SeparatorOnly_returnAsText() { body = "=????"; expect = "=????"; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } + @Test + public void decodeEncodedWords_withSeparatorsOnly_returnAsText() { body = "=????="; expect = "=????="; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } + @Test + public void decodeEncodedWords_withMissingCharset_returnAsText() { body = "=??q??="; expect = "=??q??="; ; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } + + @Test + public void decodeEncodedWords_withTextAndMissingCharset_returnAsText() { body = "=??q?a?="; expect = "a"; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } + + @Test + public void decodeEncodedWords_withNoTextCharsetOrEncoding_returnAsText() { body = "=??="; expect = "=??="; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } + + @Test + public void decodeEncodedWords_with_MissingEncodingAndData_returnAsText() { body = "=?x?="; expect = "=?x?="; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } + + @Test + public void decodeEncodedWords_withMissingEncoding_returnAsText() { body = "=?x??="; expect = "=?x??="; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } + + @Test + public void decodeEncodedWords_with_incompleteEncodingFormat_returnAsText() { body = "=?x?q?="; expect = "=?x?q?="; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } + + @Test + public void decodeEncodedWords_with_unrecognisedEncoding_withEmptyEncodedData_returnAsText() { body = "=?x?q??="; expect = "=?x?q??="; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } + + @Test + public void decodeEncodedWords_withUnrecognisedEncoding_withEncodedData_return_encoded_data() { body = "=?x?q?X?="; expect = "X"; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } - // invalid base64 string + @Test + public void decodeEncodedWords_withInvalidBase64String_returnsEmptyString() { body = "=?us-ascii?b?abc?="; expect = ""; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } - // broken encoded header + @Test + public void decodeEncodedWords_withBrokenEncoding_returnsEncodedData() { body = "=?us-ascii?q?abc?= =?"; expect = "abc =?"; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } + @Test + public void decodeEncodedWords_withUnrecognisedCharset_returnsEncodedData() { body = "=?x?= =?"; expect = "=?x?= =?"; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } - //Multi encoded header + @Test + public void decodeEncodedWords_withMultipleEncodedSections_decodesAll() { body = "=?utf-8?B?5Liq5Lq66YKu566xOkJVRyAjMzAyNDY6OumCruS7tuato+aWh+mZhOS7tuWQ?=\n" + "=?utf-8?B?jeensOecgeeVpeaYvuekuuS8mOWMlg==?="; expect = "个人邮箱:BUG #30246::邮件正文附件��称省略显示优化"; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } - //Non utf-8 encoding + @Test + public void decodeEncodedWords_withGB2312_decodes_correctly() { body = "=?gb2312?B?Obv9t9az6cnu29rHsLqju6rHyLPHSlfN8rrAvsa16qOsuPzT0DIwvNIzOTnU?= " + "=?gb2312?B?qr6r0aG439DHytTLr77Gteq1yMTjwLSjoaOoQUSjqQ?="; expect = "9积分抽深圳前海华侨城JW万豪酒店,更有20家399��精选高星试睡酒店等你来!(AD�"; From 79582f12e42d909e5508bfa0c9b4ad48e7c01459 Mon Sep 17 00:00:00 2001 From: Philip Whitehouse Date: Thu, 31 Aug 2017 13:27:32 +0100 Subject: [PATCH 2/9] Correct test name --- .../test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java index 0cd4fc893..5f48e7828 100644 --- a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java +++ b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java @@ -153,7 +153,7 @@ public class DecoderUtilTest { } @Test - public void decodeEncodedWords_withBrokenEncoding_returnsEncodedData() { + public void decodeEncodedWords_withPartiallyEncoded_returnsBothSections() { body = "=?us-ascii?q?abc?= =?"; expect = "abc =?"; message = null; From f88c3594fca933014cc64b1168c60856cc0091bd Mon Sep 17 00:00:00 2001 From: Philip Whitehouse Date: Thu, 31 Aug 2017 13:28:50 +0100 Subject: [PATCH 3/9] Add support for RFC 2047 non-compliant splitting of UTF-8 encoded characters --- .../fsck/k9/mail/internet/DecoderUtil.java | 77 ++++++++++++++++--- .../k9/mail/internet/DecoderUtilTest.java | 15 +++- 2 files changed, 78 insertions(+), 14 deletions(-) diff --git a/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java b/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java index 548f2dc08..2027e1a21 100644 --- a/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java +++ b/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java @@ -21,6 +21,13 @@ import timber.log.Timber; * it has to be determined with the sender address, the mailer and so on. */ class DecoderUtil { + + private static class EncodedWord { + private String charset; + private String encoding; + private String encodedText; + } + /** * Decodes an encoded word encoded with the 'B' encoding (described in * RFC 2047) found in a header field body. @@ -93,14 +100,18 @@ class DecoderUtil { return body; } + EncodedWord previousWord = null; int previousEnd = 0; - boolean previousWasEncoded = false; StringBuilder sb = new StringBuilder(); while (true) { int begin = body.indexOf("=?", previousEnd); if (begin == -1) { + if (previousWord != null) { + sb.append(decodeEncodedWord(previousWord)); + previousWord = null; + } sb.append(body.substring(previousEnd)); return sb.toString(); } @@ -110,18 +121,30 @@ class DecoderUtil { // to find the two '?' in the "header", before looking for the final "?=". int qm1 = body.indexOf('?', begin + 2); if (qm1 == -1) { + if (previousWord != null) { + sb.append(decodeEncodedWord(previousWord)); + previousWord = null; + } sb.append(body.substring(previousEnd)); return sb.toString(); } int qm2 = body.indexOf('?', qm1 + 1); if (qm2 == -1) { + if (previousWord != null) { + sb.append(decodeEncodedWord(previousWord)); + previousWord = null; + } sb.append(body.substring(previousEnd)); return sb.toString(); } int end = body.indexOf("?=", qm2 + 1); if (end == -1) { + if (previousWord != null) { + sb.append(decodeEncodedWord(previousWord)); + previousWord = null; + } sb.append(body.substring(previousEnd)); return sb.toString(); } @@ -129,24 +152,52 @@ class DecoderUtil { String sep = body.substring(previousEnd, begin); - String decoded = decodeEncodedWord(body, begin, end, message); - if (decoded == null) { + EncodedWord word = extractEncodedWord(body, begin, end, message); + + if (word == null) { + if (previousWord != null) { + sb.append(decodeEncodedWord(previousWord)); + sb.append(sep); + previousWord = null; + } + } else if (previousWord != null) { + if (previousWord.encoding.equals(word.encoding) && previousWord.charset.equals(word.charset)) { + previousWord.encodedText += word.encodedText; + } else { + sb.append(decodeEncodedWord(previousWord)); + sb.append(sep); + previousWord = word; + } + } else { + previousWord = word; + } + + if (previousWord == null) { sb.append(sep); sb.append(body.substring(begin, end)); - } else { - if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep)) { - sb.append(sep); - } - sb.append(decoded); } previousEnd = end; - previousWasEncoded = decoded != null; } } // return null on error private static String decodeEncodedWord(String body, int begin, int end, Message message) { + return decodeEncodedWord(extractEncodedWord(body, begin, end, message)); + } + + private static String decodeEncodedWord(EncodedWord word) { + if (word.encoding.equals("Q")) { + return decodeQ(word.encodedText, word.charset); + } else if (word.encoding.equals("B")) { + return DecoderUtil.decodeB(word.encodedText, word.charset); + } else { + Timber.w("Warning: Unknown encoding '%s'", word.encoding); + return null; + } + } + + private static EncodedWord extractEncodedWord(String body, int begin, int end, Message message) { int qm1 = body.indexOf('?', begin + 2); if (qm1 == end - 2) return null; @@ -171,13 +222,17 @@ class DecoderUtil { return null; } + EncodedWord encodedWord = new EncodedWord(); + encodedWord.charset = charset; if (encoding.equalsIgnoreCase("Q")) { - return decodeQ(encodedText, charset); + encodedWord.encoding = "Q"; } else if (encoding.equalsIgnoreCase("B")) { - return DecoderUtil.decodeB(encodedText, charset); + encodedWord.encoding = "B"; } else { Timber.w("Warning: Unknown encoding in encoded word '%s'", body.substring(begin, end)); return null; } + encodedWord.encodedText = encodedText; + return encodedWord; } } diff --git a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java index 5f48e7828..36f1402f5 100644 --- a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java +++ b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java @@ -169,10 +169,19 @@ public class DecoderUtilTest { } @Test - public void decodeEncodedWords_withMultipleEncodedSections_decodesAll() { + public void decodeEncodedWords_withMultipleEncodedSections_decodesBoth() { + body = "=?us-ascii?q?abc?= =?us-ascii?q?def?="; + expect = "abcdef"; + message = null; + assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } + + @Test + public void decodeEncodedWords_withMultipleEncodedSections_decodesSequentialSectionTogether() { + //Splitting mid-character is RFC2047 non-compliant but seen in practice. body = "=?utf-8?B?5Liq5Lq66YKu566xOkJVRyAjMzAyNDY6OumCruS7tuato+aWh+mZhOS7tuWQ?=\n" + "=?utf-8?B?jeensOecgeeVpeaYvuekuuS8mOWMlg==?="; - expect = "个人邮箱:BUG #30246::邮件正文附件��称省略显示优化"; + expect = "个人邮箱:BUG #30246::邮件正文附件名称省略显示优化"; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); } @@ -181,7 +190,7 @@ public class DecoderUtilTest { public void decodeEncodedWords_withGB2312_decodes_correctly() { body = "=?gb2312?B?Obv9t9az6cnu29rHsLqju6rHyLPHSlfN8rrAvsa16qOsuPzT0DIwvNIzOTnU?= " + "=?gb2312?B?qr6r0aG439DHytTLr77Gteq1yMTjwLSjoaOoQUSjqQ?="; - expect = "9积分抽深圳前海华侨城JW万豪酒店,更有20家399��精选高星试睡酒店等你来!(AD�"; + expect = "9积分抽深圳前海华侨城JW万豪酒店,更有20家399元精选高星试睡酒店等你来!(AD�"; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); } From 0e9e4daa8086975d191acaf4164dbf90f94bc5da Mon Sep 17 00:00:00 2001 From: Philip Whitehouse Date: Thu, 31 Aug 2017 22:01:07 +0100 Subject: [PATCH 4/9] Fix decoding with mix of decoded and encoded --- .../fsck/k9/mail/internet/DecoderUtil.java | 20 ++++++++++++------- .../k9/mail/internet/DecoderUtilTest.java | 16 +++++++-------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java b/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java index 2027e1a21..16c613584 100644 --- a/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java +++ b/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java @@ -158,18 +158,24 @@ class DecoderUtil { if (previousWord != null) { sb.append(decodeEncodedWord(previousWord)); sb.append(sep); + sb.append(body.substring(begin, end)); previousWord = null; - } - } else if (previousWord != null) { - if (previousWord.encoding.equals(word.encoding) && previousWord.charset.equals(word.charset)) { - previousWord.encodedText += word.encodedText; } else { - sb.append(decodeEncodedWord(previousWord)); + sb.append(sep); + } + } else { + if (previousWord != null) { + if (previousWord.encoding.equals(word.encoding) && previousWord.charset.equals(word.charset)) { + previousWord.encodedText += word.encodedText; + } else { + sb.append(decodeEncodedWord(previousWord)); + sb.append(sep); + previousWord = word; + } + } else { sb.append(sep); previousWord = word; } - } else { - previousWord = word; } if (previousWord == null) { diff --git a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java index 36f1402f5..e53070262 100644 --- a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java +++ b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java @@ -25,7 +25,6 @@ public class DecoderUtilTest { @Test public void decodeEncodedWords_withAsciiCharset_encoded_data_returns_text() { - body = "=?us-ascii?q?abc?="; expect = "abc"; message = null; @@ -76,7 +75,6 @@ public class DecoderUtilTest { public void decodeEncodedWords_withMissingCharset_returnAsText() { body = "=??q??="; expect = "=??q??="; - ; message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); } @@ -92,7 +90,6 @@ public class DecoderUtilTest { @Test public void decodeEncodedWords_withNoTextCharsetOrEncoding_returnAsText() { - body = "=??="; expect = "=??="; message = null; @@ -101,7 +98,6 @@ public class DecoderUtilTest { @Test public void decodeEncodedWords_with_MissingEncodingAndData_returnAsText() { - body = "=?x?="; expect = "=?x?="; message = null; @@ -110,7 +106,6 @@ public class DecoderUtilTest { @Test public void decodeEncodedWords_withMissingEncoding_returnAsText() { - body = "=?x??="; expect = "=?x??="; message = null; @@ -119,7 +114,6 @@ public class DecoderUtilTest { @Test public void decodeEncodedWords_with_incompleteEncodingFormat_returnAsText() { - body = "=?x?q?="; expect = "=?x?q?="; message = null; @@ -128,7 +122,6 @@ public class DecoderUtilTest { @Test public void decodeEncodedWords_with_unrecognisedEncoding_withEmptyEncodedData_returnAsText() { - body = "=?x?q??="; expect = "=?x?q??="; message = null; @@ -137,7 +130,6 @@ public class DecoderUtilTest { @Test public void decodeEncodedWords_withUnrecognisedEncoding_withEncodedData_return_encoded_data() { - body = "=?x?q?X?="; expect = "X"; message = null; @@ -160,6 +152,14 @@ public class DecoderUtilTest { assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); } + @Test + public void decodeEncodedWords_withPartiallyEncodedAfter_returnsBothSections() { + body = "def=?us-ascii?q?abc?="; + expect = "defabc"; + message = null; + assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } + @Test public void decodeEncodedWords_withUnrecognisedCharset_returnsEncodedData() { body = "=?x?= =?"; From 498df5db73acaa6599029c48a2b793737e60d76d Mon Sep 17 00:00:00 2001 From: Philip Whitehouse Date: Sat, 2 Sep 2017 11:17:50 +0100 Subject: [PATCH 5/9] Add RFC2047 tests and fix a resulting whitespace bug --- .../fsck/k9/mail/internet/DecoderUtil.java | 6 +-- .../k9/mail/internet/DecoderUtilTest.java | 38 +++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java b/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java index 16c613584..7d7a350b7 100644 --- a/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java +++ b/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java @@ -161,7 +161,7 @@ class DecoderUtil { sb.append(body.substring(begin, end)); previousWord = null; } else { - sb.append(sep); +// sb.append(sep); } } else { if (previousWord != null) { @@ -169,11 +169,11 @@ class DecoderUtil { previousWord.encodedText += word.encodedText; } else { sb.append(decodeEncodedWord(previousWord)); - sb.append(sep); + sb.append(sep.trim()); previousWord = word; } } else { - sb.append(sep); + sb.append(sep.trim()); previousWord = word; } } diff --git a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java index e53070262..3a6a12a00 100644 --- a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java +++ b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java @@ -194,4 +194,42 @@ public class DecoderUtilTest { message = null; assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); } + + @Test + public void decodeEncodedWords_withRFC2047examples_decodesCorrectly() { + body = "(=?ISO-8859-1?Q?a?=)"; + expect = "(a)"; + message = null; + assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + + body = "(=?ISO-8859-1?Q?a?= b)"; + expect = "(a b)"; + message = null; + assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + + body = "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)"; + expect = "(ab)"; + message = null; + assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + + body = "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)"; + expect = "(ab)"; + message = null; + assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + + body = "(=?ISO-8859-1?Q?a?= \n =?ISO-8859-1?Q?b?=)"; + expect = "(ab)"; + message = null; + assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + + body = "(=?ISO-8859-1?Q?a_b?=)"; + expect = "(a b)"; + message = null; + assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + + body = "(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)"; + expect = "(a b)"; + message = null; + assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + } } From 5f6b1dffc661b5b41f45f0350403745eb5529696 Mon Sep 17 00:00:00 2001 From: cketti Date: Sat, 14 Oct 2017 05:41:33 +0200 Subject: [PATCH 6/9] Refactor DecoderUtilTest --- .../k9/mail/internet/DecoderUtilTest.java | 157 +++++------------- 1 file changed, 37 insertions(+), 120 deletions(-) diff --git a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java index 3a6a12a00..8c749f71c 100644 --- a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java +++ b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java @@ -12,224 +12,141 @@ import static org.junit.Assert.assertEquals; @RunWith(K9LibRobolectricTestRunner.class) public class DecoderUtilTest { - private String body, expect; - private MimeMessage message; - @Test public void decodeEncodedWords_with_unencoded_data_returns_original_text() { - body = "abc"; - expect = "abc"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("abc", "abc"); } @Test public void decodeEncodedWords_withAsciiCharset_encoded_data_returns_text() { - body = "=?us-ascii?q?abc?="; - expect = "abc"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("=?us-ascii?q?abc?=", "abc"); } @Test public void decodeEncodedWords_withStartOnly_encoding_format_returnAsText() { - body = "=?"; - expect = "=?"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("=?", "=?"); } @Test public void decodeEncodedWords_withStartAndSeparatorOnly_returnAsText() { - body = "=??"; - expect = "=??"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("=??", "=??"); } @Test public void decodeEncodedWords_withStartAnd2SeparatorOnly_returnAsText() { - body = "=???"; - expect = "=???"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("=???", "=???"); } @Test public void decodeEncodedWords_withStartAnd3SeparatorOnly_returnAsText() { - body = "=????"; - expect = "=????"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("=????", "=????"); } @Test public void decodeEncodedWords_withSeparatorsOnly_returnAsText() { - body = "=????="; - expect = "=????="; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("=????=", "=????="); } @Test public void decodeEncodedWords_withMissingCharset_returnAsText() { - body = "=??q??="; - expect = "=??q??="; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("=??q??=", "=??q??="); } @Test public void decodeEncodedWords_withTextAndMissingCharset_returnAsText() { - - body = "=??q?a?="; - expect = "a"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("=??q?a?=", "a"); } @Test public void decodeEncodedWords_withNoTextCharsetOrEncoding_returnAsText() { - body = "=??="; - expect = "=??="; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("=??=", "=??="); } @Test public void decodeEncodedWords_with_MissingEncodingAndData_returnAsText() { - body = "=?x?="; - expect = "=?x?="; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("=?x?=", "=?x?="); } @Test public void decodeEncodedWords_withMissingEncoding_returnAsText() { - body = "=?x??="; - expect = "=?x??="; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("=?x??=", "=?x??="); } @Test public void decodeEncodedWords_with_incompleteEncodingFormat_returnAsText() { - body = "=?x?q?="; - expect = "=?x?q?="; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("=?x?q?=", "=?x?q?="); } @Test public void decodeEncodedWords_with_unrecognisedEncoding_withEmptyEncodedData_returnAsText() { - body = "=?x?q??="; - expect = "=?x?q??="; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("=?x?q??=", "=?x?q??="); } @Test public void decodeEncodedWords_withUnrecognisedEncoding_withEncodedData_return_encoded_data() { - body = "=?x?q?X?="; - expect = "X"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("=?x?q?X?=", "X"); } @Test public void decodeEncodedWords_withInvalidBase64String_returnsEmptyString() { - body = "=?us-ascii?b?abc?="; - expect = ""; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("=?us-ascii?b?abc?=", ""); } @Test public void decodeEncodedWords_withPartiallyEncoded_returnsBothSections() { - body = "=?us-ascii?q?abc?= =?"; - expect = "abc =?"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("=?us-ascii?q?abc?= =?", "abc =?"); } @Test public void decodeEncodedWords_withPartiallyEncodedAfter_returnsBothSections() { - body = "def=?us-ascii?q?abc?="; - expect = "defabc"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("def=?us-ascii?q?abc?=", "defabc"); } @Test public void decodeEncodedWords_withUnrecognisedCharset_returnsEncodedData() { - body = "=?x?= =?"; - expect = "=?x?= =?"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("=?x?= =?", "=?x?= =?"); } @Test public void decodeEncodedWords_withMultipleEncodedSections_decodesBoth() { - body = "=?us-ascii?q?abc?= =?us-ascii?q?def?="; - expect = "abcdef"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("=?us-ascii?q?abc?= =?us-ascii?q?def?=", "abcdef"); } @Test public void decodeEncodedWords_withMultipleEncodedSections_decodesSequentialSectionTogether() { //Splitting mid-character is RFC2047 non-compliant but seen in practice. - body = "=?utf-8?B?5Liq5Lq66YKu566xOkJVRyAjMzAyNDY6OumCruS7tuato+aWh+mZhOS7tuWQ?=\n" + + String input = "=?utf-8?B?5Liq5Lq66YKu566xOkJVRyAjMzAyNDY6OumCruS7tuato+aWh+mZhOS7tuWQ?=\n" + "=?utf-8?B?jeensOecgeeVpeaYvuekuuS8mOWMlg==?="; - expect = "个人邮箱:BUG #30246::邮件正文附件名称省略显示优化"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected(input, "个人邮箱:BUG #30246::邮件正文附件名称省略显示优化"); } @Test public void decodeEncodedWords_withGB2312_decodes_correctly() { - body = "=?gb2312?B?Obv9t9az6cnu29rHsLqju6rHyLPHSlfN8rrAvsa16qOsuPzT0DIwvNIzOTnU?= " + + String input = "=?gb2312?B?Obv9t9az6cnu29rHsLqju6rHyLPHSlfN8rrAvsa16qOsuPzT0DIwvNIzOTnU?= " + "=?gb2312?B?qr6r0aG439DHytTLr77Gteq1yMTjwLSjoaOoQUSjqQ?="; - expect = "9积分抽深圳前海华侨城JW万豪酒店,更有20家399元精选高星试睡酒店等你来!(AD�"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected(input, "9积分抽深圳前海华侨城JW万豪酒店,更有20家399元精选高星试睡酒店等你来!(AD�"); } @Test public void decodeEncodedWords_withRFC2047examples_decodesCorrectly() { - body = "(=?ISO-8859-1?Q?a?=)"; - expect = "(a)"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("(=?ISO-8859-1?Q?a?=)", "(a)"); - body = "(=?ISO-8859-1?Q?a?= b)"; - expect = "(a b)"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("(=?ISO-8859-1?Q?a?= b)", "(a b)"); - body = "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)"; - expect = "(ab)"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)", "(ab)"); - body = "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)"; - expect = "(ab)"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)", "(ab)"); - body = "(=?ISO-8859-1?Q?a?= \n =?ISO-8859-1?Q?b?=)"; - expect = "(ab)"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("(=?ISO-8859-1?Q?a?= \n =?ISO-8859-1?Q?b?=)", "(ab)"); - body = "(=?ISO-8859-1?Q?a_b?=)"; - expect = "(a b)"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("(=?ISO-8859-1?Q?a_b?=)", "(a b)"); - body = "(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)"; - expect = "(a b)"; - message = null; - assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message)); + assertInputDecodesToExpected("(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)", "(a b)"); + } + + + private void assertInputDecodesToExpected(String input, String expected) { + String decodedText = DecoderUtil.decodeEncodedWords(input, null); + assertEquals(expected, decodedText); } } From 37d2c3609bacdef12e6c1a832314fca7441d1a95 Mon Sep 17 00:00:00 2001 From: cketti Date: Sat, 14 Oct 2017 06:01:05 +0200 Subject: [PATCH 7/9] Modify tests for text wrongly split into encoded words We were using test data submitted by users. But we don't really know what the expected result should be. In the second test the expected text ended in a replacement character. That indicates the text is not decoded correctly. Maybe there was an additional encoded word that was missing. --- .../fsck/k9/mail/internet/DecoderUtilTest.java | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java index 8c749f71c..8025c64d6 100644 --- a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java +++ b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java @@ -114,17 +114,10 @@ public class DecoderUtilTest { @Test public void decodeEncodedWords_withMultipleEncodedSections_decodesSequentialSectionTogether() { - //Splitting mid-character is RFC2047 non-compliant but seen in practice. - String input = "=?utf-8?B?5Liq5Lq66YKu566xOkJVRyAjMzAyNDY6OumCruS7tuato+aWh+mZhOS7tuWQ?=\n" + - "=?utf-8?B?jeensOecgeeVpeaYvuekuuS8mOWMlg==?="; - assertInputDecodesToExpected(input, "个人邮箱:BUG #30246::邮件正文附件名称省略显示优化"); - } - - @Test - public void decodeEncodedWords_withGB2312_decodes_correctly() { - String input = "=?gb2312?B?Obv9t9az6cnu29rHsLqju6rHyLPHSlfN8rrAvsa16qOsuPzT0DIwvNIzOTnU?= " + - "=?gb2312?B?qr6r0aG439DHytTLr77Gteq1yMTjwLSjoaOoQUSjqQ?="; - assertInputDecodesToExpected(input, "9积分抽深圳前海华侨城JW万豪酒店,更有20家399元精选高星试睡酒店等你来!(AD�"); + // Splitting mid-character is RFC2047 non-compliant but seen in practice. + // "=?utf-8?B?b2hhaSDw?=" individually decodes to "ohai �" + // "=?utf-8?B?n5Kp==?=" individually decodes to "���" + assertInputDecodesToExpected("=?utf-8?B?b2hhaSDw?= =?utf-8?B?n5Kp?=", "ohai 💩"); } @Test From 2de1c02c83bc4701d6d255cbed4e0f76063ef2b0 Mon Sep 17 00:00:00 2001 From: cketti Date: Sat, 14 Oct 2017 06:53:04 +0200 Subject: [PATCH 8/9] Add missing tests to DecoderUtilTest Some tests fail showing bugs in the current implementation. --- .../k9/mail/internet/DecoderUtilTest.java | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java index 8025c64d6..a0c205e7e 100644 --- a/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java +++ b/k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java @@ -11,6 +11,16 @@ import static org.junit.Assert.assertEquals; @RunWith(K9LibRobolectricTestRunner.class) public class DecoderUtilTest { + private static final String INVALID = "=?utf-8?Q??="; + + + @Test + public void decodeEncodedWords_withInvalidEncodedWord_shouldReturnInputText() { + // We use INVALID as instance of an invalid encoded word in tests. If at some point we decide to change the code + // to recognize empty encoded text as valid and decode it to an empty string, a lot of tests will break. + // Hopefully this test will help the developer figure out why the other tests broke. + assertInputDecodesToExpected(INVALID, INVALID); + } @Test public void decodeEncodedWords_with_unencoded_data_returns_original_text() { @@ -27,21 +37,41 @@ public class DecoderUtilTest { assertInputDecodesToExpected("=?", "=?"); } + @Test + public void decodeEncodedWords_withEncodedWordAndOnlyStartOfEncodedWord_shouldDecodeAndAddSuffix() { + assertInputDecodesToExpected("=?utf-8?Q?abc?= =?", "abc =?"); + } + @Test public void decodeEncodedWords_withStartAndSeparatorOnly_returnAsText() { assertInputDecodesToExpected("=??", "=??"); } + @Test + public void decodeEncodedWords_withEncodedWordAndOnlyStartAndSeparatorOfEncodedWord_shouldDecodeAndAddSuffix() { + assertInputDecodesToExpected("=?utf-8?Q?abc?= =??", "abc =??"); + } + @Test public void decodeEncodedWords_withStartAnd2SeparatorOnly_returnAsText() { assertInputDecodesToExpected("=???", "=???"); } + @Test + public void decodeEncodedWords_withEncodedWordAndOnlyStartAndTwoSeparatorsOfEncodedWord_shouldDecodeAndAddSuffix() { + assertInputDecodesToExpected("=?utf-8?Q?abc?= =???", "abc =???"); + } + @Test public void decodeEncodedWords_withStartAnd3SeparatorOnly_returnAsText() { assertInputDecodesToExpected("=????", "=????"); } + @Test + public void decodeEncodedWords_withEncodedWordAndOnlyStartAndThreeSeparatorsOfEncodedWord_shouldDecodeAndAddSuffix() { + assertInputDecodesToExpected("=?utf-8?Q?abc?= =????", "abc =????"); + } + @Test public void decodeEncodedWords_withSeparatorsOnly_returnAsText() { assertInputDecodesToExpected("=????=", "=????="); @@ -117,9 +147,51 @@ public class DecoderUtilTest { // Splitting mid-character is RFC2047 non-compliant but seen in practice. // "=?utf-8?B?b2hhaSDw?=" individually decodes to "ohai �" // "=?utf-8?B?n5Kp==?=" individually decodes to "���" + // (invalid bytes in a UTF-8 sequence are replaced with the replacement character) assertInputDecodesToExpected("=?utf-8?B?b2hhaSDw?= =?utf-8?B?n5Kp?=", "ohai 💩"); } + @Test + public void decodeEncodedWords_withMultipleEncodedSectionsButCharsetAndEncodingDifferingInCase_decodesSequentialSectionTogether() { + assertInputDecodesToExpected("=?utf-8?B?b2hhaSDw?= =?UTF-8?b?n5Kp?=", "ohai 💩"); + } + + @Test + public void decodeEncodedWords_withEncodedWordWhitespaceInvalidEncodedWord_shouldOnlyDecodeEncodedWord() { + assertInputDecodesToExpected("=?utf-8?Q?abc?= " + INVALID, "abc " + INVALID); + } + + @Test + public void decodeEncodedWords_withInvalidEncodedWordWhitespaceInvalidEncodedWord_shouldReturnInputText() { + String input = INVALID + " " + INVALID; + assertInputDecodesToExpected(input, input); + } + + @Test + public void decodeEncodedWords_withEncodedWordNonWhitespaceSeparatorEncodedWord_shouldDecodeBothAndKeepSeparator() { + assertInputDecodesToExpected("=?utf-8?Q?ab?= -- =?utf-8?Q?cd?=", "ab -- cd"); + } + + @Test + public void decodeEncodedWords_withInvalidEncodedWordWhitespaceEncodedWord_shouldOnlyDecodeEncodedWord() { + assertInputDecodesToExpected(INVALID + " =?utf-8?Q?abc?=", INVALID + " abc"); + } + + @Test + public void decodeEncodedWords_withEncodedWordFollowedByEncodedWordWithDifferentEncoding_shouldDecodeIndividually() { + assertInputDecodesToExpected("=?utf-8?Q?ab?= =?utf-8?B?Y2Q=?=", "abcd"); + } + + @Test + public void decodeEncodedWords_withEncodedWordSeparatorEncodedWordWithDifferentEncoding_shouldDecodeIndividuallyAndKeepSeparator() { + assertInputDecodesToExpected("=?utf-8?Q?ab?= / =?utf-8?B?Y2Q=?=", "ab / cd"); + } + + @Test + public void decodeEncodedWords_withEncodedWordFollowedByEncodedWordWithDifferentCharset_shouldDecodeIndividually() { + assertInputDecodesToExpected("=?us-ascii?Q?oh_no_?= =?utf-8?Q?=F0=9F=92=A9?=", "oh no 💩"); + } + @Test public void decodeEncodedWords_withRFC2047examples_decodesCorrectly() { assertInputDecodesToExpected("(=?ISO-8859-1?Q?a?=)", "(a)"); From cdf92c0a1017950128218a2c997aea13a6f653ba Mon Sep 17 00:00:00 2001 From: cketti Date: Sat, 14 Oct 2017 07:39:10 +0200 Subject: [PATCH 9/9] Fix decoding of encoded words --- .../fsck/k9/mail/internet/DecoderUtil.java | 70 +++++++------------ 1 file changed, 27 insertions(+), 43 deletions(-) diff --git a/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java b/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java index 7d7a350b7..6fc2e8a8a 100644 --- a/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java +++ b/k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java @@ -55,7 +55,7 @@ class DecoderUtil { * @param charset the Java charset to use. * @return the decoded string. */ - private static String decodeQ(String encodedWord, String charset) { + static String decodeQ(String encodedWord, String charset) { /* * Replace _ with =20 @@ -108,11 +108,7 @@ class DecoderUtil { while (true) { int begin = body.indexOf("=?", previousEnd); if (begin == -1) { - if (previousWord != null) { - sb.append(decodeEncodedWord(previousWord)); - previousWord = null; - } - sb.append(body.substring(previousEnd)); + decodePreviousAndAppendSuffix(sb, previousWord, body, previousEnd); return sb.toString(); } @@ -121,31 +117,19 @@ class DecoderUtil { // to find the two '?' in the "header", before looking for the final "?=". int qm1 = body.indexOf('?', begin + 2); if (qm1 == -1) { - if (previousWord != null) { - sb.append(decodeEncodedWord(previousWord)); - previousWord = null; - } - sb.append(body.substring(previousEnd)); + decodePreviousAndAppendSuffix(sb, previousWord, body, previousEnd); return sb.toString(); } int qm2 = body.indexOf('?', qm1 + 1); if (qm2 == -1) { - if (previousWord != null) { - sb.append(decodeEncodedWord(previousWord)); - previousWord = null; - } - sb.append(body.substring(previousEnd)); + decodePreviousAndAppendSuffix(sb, previousWord, body, previousEnd); return sb.toString(); } int end = body.indexOf("?=", qm2 + 1); if (end == -1) { - if (previousWord != null) { - sb.append(decodeEncodedWord(previousWord)); - previousWord = null; - } - sb.append(body.substring(previousEnd)); + decodePreviousAndAppendSuffix(sb, previousWord, body, previousEnd); return sb.toString(); } end += 2; @@ -154,42 +138,42 @@ class DecoderUtil { EncodedWord word = extractEncodedWord(body, begin, end, message); - if (word == null) { - if (previousWord != null) { + if (previousWord == null) { + sb.append(sep); + if (word == null) { + sb.append(body.substring(begin, end)); + } + } else { + if (word == null) { sb.append(decodeEncodedWord(previousWord)); sb.append(sep); sb.append(body.substring(begin, end)); - previousWord = null; } else { -// sb.append(sep); - } - } else { - if (previousWord != null) { - if (previousWord.encoding.equals(word.encoding) && previousWord.charset.equals(word.charset)) { - previousWord.encodedText += word.encodedText; + if (!CharsetUtil.isWhitespace(sep)) { + sb.append(decodeEncodedWord(previousWord)); + sb.append(sep); + } else if (previousWord.encoding.equals(word.encoding) && + previousWord.charset.equals(word.charset)) { + word.encodedText = previousWord.encodedText + word.encodedText; } else { sb.append(decodeEncodedWord(previousWord)); - sb.append(sep.trim()); - previousWord = word; } - } else { - sb.append(sep.trim()); - previousWord = word; } } - if (previousWord == null) { - sb.append(sep); - sb.append(body.substring(begin, end)); - } - + previousWord = word; previousEnd = end; } } - // return null on error - private static String decodeEncodedWord(String body, int begin, int end, Message message) { - return decodeEncodedWord(extractEncodedWord(body, begin, end, message)); + private static void decodePreviousAndAppendSuffix(StringBuilder sb, EncodedWord previousWord, String body, + int previousEnd) { + + if (previousWord != null) { + sb.append(decodeEncodedWord(previousWord)); + } + + sb.append(body.substring(previousEnd)); } private static String decodeEncodedWord(EncodedWord word) {