Take special care when decoding encoded words with charset ISO-2022-JP
This commit is contained in:
parent
94548c11a8
commit
9861fc4d5a
5 changed files with 49 additions and 3 deletions
|
@ -19,6 +19,7 @@ dependencies {
|
|||
testImplementation "com.google.truth:truth:${versions.truth}"
|
||||
testImplementation "org.mockito:mockito-core:${versions.mockito}"
|
||||
testImplementation "org.mockito.kotlin:mockito-kotlin:${versions.mockitoKotlin}"
|
||||
testImplementation "com.ibm.icu:icu4j-charset:70.1"
|
||||
}
|
||||
|
||||
android {
|
||||
|
|
|
@ -81,7 +81,7 @@ internal object DecoderUtil {
|
|||
} else if (!CharsetUtil.isWhitespace(sep)) {
|
||||
output.append(charsetDecode(previousWord))
|
||||
output.append(sep)
|
||||
} else if (previousWord.isTypeEqualTo(word)) {
|
||||
} else if (previousWord.canBeCombinedWith(word)) {
|
||||
word.data = previousWord.data + word.data
|
||||
} else {
|
||||
output.append(charsetDecode(previousWord))
|
||||
|
@ -179,13 +179,19 @@ internal object DecoderUtil {
|
|||
return Buffer().write(this).write(second).readByteString()
|
||||
}
|
||||
|
||||
private val ASCII_ESCAPE_SEQUENCE = byteArrayOf(0x1B, 0x28, 0x42)
|
||||
|
||||
private class EncodedWord(
|
||||
val charset: String,
|
||||
val encoding: Encoding,
|
||||
var data: ByteString
|
||||
) {
|
||||
fun isTypeEqualTo(other: EncodedWord): Boolean {
|
||||
return encoding == other.encoding && charset == other.charset
|
||||
fun canBeCombinedWith(other: EncodedWord): Boolean {
|
||||
return encoding == other.encoding && charset == other.charset && !isAsciiEscapeSequence()
|
||||
}
|
||||
|
||||
private fun isAsciiEscapeSequence(): Boolean {
|
||||
return charset.startsWith("ISO-2022-JP", ignoreCase = true) && data.endsWith(ASCII_ESCAPE_SEQUENCE)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -220,6 +220,16 @@ public class DecoderUtilTest {
|
|||
assertInputDecodesToExpected("=?utf-8*de?b?R3LDvMOfZQ==?=", "Grüße");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decodeEncodedWords_withMultipleIso2022JpEncodedWordsProperlyEndingWithSwitchingToAscii() {
|
||||
// If we try to combine the base64-decoded data of both encoded words and only then perform the charset
|
||||
// decoding, we end up with an escape sequence switching to ASCII (end of first encoded word) followed by an
|
||||
// escape sequence switching to JIS X 0208:1983 (start of second encoded word). The decoder on Android reports
|
||||
// an error for this case, leading to a replacement character being inserted.
|
||||
// We use the ISO-2022-JP-TEST charset to get Android's behavior on the JVM. See TestCharsetProvider.
|
||||
assertInputDecodesToExpected("=?ISO-2022-JP-TEST?B?GyRCRnxLXDhsJEhGfEtcOGwkSEZ8S1w4bCROJUElJyVDGyhC?=\r\n" +
|
||||
" =?ISO-2022-JP-TEST?B?GyRCJS8bKEI=?=", "日本語と日本語と日本語のチェック");
|
||||
}
|
||||
|
||||
private void assertInputDecodesToExpected(String input, String expected) {
|
||||
String decodedText = DecoderUtil.decodeEncodedWords(input, null);
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
package com.fsck.k9.mail.internet
|
||||
|
||||
import com.ibm.icu.charset.CharsetProviderICU
|
||||
import java.nio.charset.Charset
|
||||
import java.nio.charset.spi.CharsetProvider
|
||||
|
||||
/**
|
||||
* CharsetProvider that adds the "ISO-2022-JP-TEST" charset.
|
||||
*
|
||||
* The "ISO-2022-JP" decoder on the JVM is more lenient than the ICU4J decoder that is used on Android. For tests we
|
||||
* use the ICU4J implementation that is also used on Android.
|
||||
*/
|
||||
class TestCharsetProvider : CharsetProvider() {
|
||||
private val icuCharsetProvider = CharsetProviderICU()
|
||||
private val charset = icuCharsetProvider.charsetForName("ISO-2022-JP")
|
||||
|
||||
override fun charsets(): Iterator<Charset> {
|
||||
return listOf(charset).iterator()
|
||||
}
|
||||
|
||||
override fun charsetForName(charsetName: String?): Charset? {
|
||||
return if (charsetName?.equals("ISO-2022-JP-TEST", ignoreCase = true) == true) {
|
||||
charset
|
||||
} else {
|
||||
null
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
com.fsck.k9.mail.internet.TestCharsetProvider
|
Loading…
Reference in a new issue