Take special care when decoding encoded words with charset ISO-2022-JP

2021-11-03 13:56:35 +01:00 · 2021-11-03 13:56:35 +01:00 · 9861fc4d5a
commit 9861fc4d5a
parent 94548c11a8
5 changed files with 49 additions and 3 deletions
--- a/mail/common/build.gradle
+++ b/mail/common/build.gradle
@ -19,6 +19,7 @@ dependencies {
    testImplementation "com.google.truth:truth:${versions.truth}"
    testImplementation "org.mockito:mockito-core:${versions.mockito}"
    testImplementation "org.mockito.kotlin:mockito-kotlin:${versions.mockitoKotlin}"
+    testImplementation "com.ibm.icu:icu4j-charset:70.1"
 }

 android {
--- a/mail/common/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.kt
+++ b/mail/common/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.kt
@ -81,7 +81,7 @@ internal object DecoderUtil {
            } else if (!CharsetUtil.isWhitespace(sep)) {
                output.append(charsetDecode(previousWord))
                output.append(sep)
-            } else if (previousWord.isTypeEqualTo(word)) {
+            } else if (previousWord.canBeCombinedWith(word)) {
                word.data = previousWord.data + word.data
            } else {
                output.append(charsetDecode(previousWord))
@ -179,13 +179,19 @@ internal object DecoderUtil {
        return Buffer().write(this).write(second).readByteString()
    }

+    private val ASCII_ESCAPE_SEQUENCE = byteArrayOf(0x1B, 0x28, 0x42)
+
    private class EncodedWord(
        val charset: String,
        val encoding: Encoding,
        var data: ByteString
    ) {
-        fun isTypeEqualTo(other: EncodedWord): Boolean {
-            return encoding == other.encoding && charset == other.charset
+        fun canBeCombinedWith(other: EncodedWord): Boolean {
+            return encoding == other.encoding && charset == other.charset && !isAsciiEscapeSequence()
+        }
+
+        private fun isAsciiEscapeSequence(): Boolean {
+            return charset.startsWith("ISO-2022-JP", ignoreCase = true) && data.endsWith(ASCII_ESCAPE_SEQUENCE)
        }
    }

--- a/mail/common/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java
+++ b/mail/common/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java
@ -220,6 +220,16 @@ public class DecoderUtilTest {
        assertInputDecodesToExpected("=?utf-8*de?b?R3LDvMOfZQ==?=", "Grüße");
    }

+    @Test
+    public void decodeEncodedWords_withMultipleIso2022JpEncodedWordsProperlyEndingWithSwitchingToAscii() {
+        // If we try to combine the base64-decoded data of both encoded words and only then perform the charset
+        // decoding, we end up with an escape sequence switching to ASCII (end of first encoded word) followed by an
+        // escape sequence switching to JIS X 0208:1983 (start of second encoded word). The decoder on Android reports
+        // an error for this case, leading to a replacement character being inserted.
+        // We use the ISO-2022-JP-TEST charset to get Android's behavior on the JVM. See TestCharsetProvider.
+        assertInputDecodesToExpected("=?ISO-2022-JP-TEST?B?GyRCRnxLXDhsJEhGfEtcOGwkSEZ8S1w4bCROJUElJyVDGyhC?=\r\n" +
+                " =?ISO-2022-JP-TEST?B?GyRCJS8bKEI=?=", "日本語と日本語と日本語のチェック");
+    }

    private void assertInputDecodesToExpected(String input, String expected) {
        String decodedText = DecoderUtil.decodeEncodedWords(input, null);
--- a/mail/common/src/test/java/com/fsck/k9/mail/internet/TestCharsetProvider.kt
+++ b/mail/common/src/test/java/com/fsck/k9/mail/internet/TestCharsetProvider.kt
@ -0,0 +1,28 @@
+package com.fsck.k9.mail.internet
+
+import com.ibm.icu.charset.CharsetProviderICU
+import java.nio.charset.Charset
+import java.nio.charset.spi.CharsetProvider
+
+/**
+ * CharsetProvider that adds the "ISO-2022-JP-TEST" charset.
+ *
+ * The "ISO-2022-JP" decoder on the JVM is more lenient than the ICU4J decoder that is used on Android. For tests we
+ * use the ICU4J implementation that is also used on Android.
+ */
+class TestCharsetProvider : CharsetProvider() {
+    private val icuCharsetProvider = CharsetProviderICU()
+    private val charset = icuCharsetProvider.charsetForName("ISO-2022-JP")
+
+    override fun charsets(): Iterator<Charset> {
+        return listOf(charset).iterator()
+    }
+
+    override fun charsetForName(charsetName: String?): Charset? {
+        return if (charsetName?.equals("ISO-2022-JP-TEST", ignoreCase = true) == true) {
+            charset
+        } else {
+            null
+        }
+    }
+}
--- a/mail/common/src/test/resources/META-INF/services/java.nio.charset.spi.CharsetProvider
+++ b/mail/common/src/test/resources/META-INF/services/java.nio.charset.spi.CharsetProvider
@ -0,0 +1 @@
+com.fsck.k9.mail.internet.TestCharsetProvider
				`@ -0,0 +1 @@`
				`com.fsck.k9.mail.internet.TestCharsetProvider`