Merge pull request #5958 from k9mail/duplicate_charset_value

Add support for duplicate "charset" parameters with matching values
This commit is contained in:
cketti 2022-03-11 18:44:18 +01:00 committed by GitHub
commit 35da101507
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 99 additions and 2 deletions

View file

@ -23,7 +23,6 @@ import org.apache.commons.io.input.BoundedInputStream;
import timber.log.Timber;
import static com.fsck.k9.mail.internet.CharsetSupport.fixupCharset;
import static com.fsck.k9.mail.internet.MimeUtility.getHeaderParameter;
import static com.fsck.k9.mail.internet.MimeUtility.isSameMimeType;
import static com.fsck.k9.mail.internet.Viewable.Alternative;
import static com.fsck.k9.mail.internet.Viewable.Html;
@ -77,7 +76,7 @@ public class MessageExtractor {
/*
* We've got a text part, so let's see if it needs to be processed further.
*/
String charset = getHeaderParameter(part.getContentType(), "charset");
String charset = PartExtensions.getCharset(part);
/*
* determine the charset from HTML message.
*/

View file

@ -54,6 +54,26 @@ object MimeParameterDecoder {
)
}
fun decodeBasic(headerBody: String): MimeValue {
val parser = MimeHeaderParser(headerBody)
val value = parser.readHeaderValue()
parser.skipCFWS()
if (parser.endReached()) {
return MimeValue(value)
}
val (basicParameters, duplicateParameters, parserErrorIndex) = readBasicParameters(parser)
val parameters = basicParameters.mapValues { (_, parameterValue) -> parameterValue.value }
return MimeValue(
value = value,
parameters = parameters,
ignoredParameters = duplicateParameters,
parserErrorIndex = parserErrorIndex
)
}
@JvmStatic
fun extractHeaderValue(headerBody: String): String {
val parser = MimeHeaderParser(headerBody)

View file

@ -0,0 +1,24 @@
@file:JvmName("PartExtensions")
package com.fsck.k9.mail.internet
import com.fsck.k9.mail.Part
/**
* Return the `charset` parameter value of this [Part]'s `Content-Type` header.
*/
val Part.charset: String?
get() {
val contentTypeHeader = this.contentType ?: return null
val (_, parameters, duplicateParameters) = MimeParameterDecoder.decodeBasic(contentTypeHeader)
return parameters["charset"] ?: extractNonConflictingCharsetValue(duplicateParameters)
}
// If there are multiple "charset" parameters, but they all agree on the value, we use that value.
private fun extractNonConflictingCharsetValue(duplicateParameters: List<Pair<String, String>>): String? {
val charsets = duplicateParameters.asSequence()
.filter { (parameterName, _) -> parameterName == "charset" }
.map { (_, charset) -> charset.lowercase() }
.toSet()
return if (charsets.size == 1) charsets.first() else null
}

View file

@ -0,0 +1,54 @@
package com.fsck.k9.mail.internet
import com.google.common.truth.Truth.assertThat
import org.junit.Test
class PartExtensionsTest {
@Test
fun `get charset without charset parameter`() {
assertGetCharset(headerValue = "text/plain", expectedCharset = null)
}
@Test
fun `get charset with single charset parameter`() {
assertGetCharset(headerValue = "text/plain; charset=UTF-8", expectedCharset = "utf-8")
}
@Test
fun `get charset with single quoted charset parameter`() {
assertGetCharset(headerValue = "text/plain; charset=\"iso-8859-1\"", expectedCharset = "ISO-8859-1")
}
@Test
fun `get charset with two charset parameters where values match exactly`() {
assertGetCharset(headerValue = "text/plain; charset=utf-8; charset=utf-8", expectedCharset = "utf-8")
}
@Test
fun `get charset with two charset parameters where values differ in case`() {
assertGetCharset(headerValue = "text/plain; charset=utf-8; charset=UTF-8", expectedCharset = "utf-8")
}
@Test
fun `get charset with two charset parameters where values differ in quoting`() {
assertGetCharset(headerValue = "text/plain; charset=utf-8; charset=\"utf-8\"", expectedCharset = "utf-8")
}
@Test
fun `get charset with two charset parameters with conflicting values`() {
assertGetCharset(headerValue = "text/plain; charset=utf-8; charset=iso-8859-1", expectedCharset = null)
}
@Test
fun `get charset with extended parameter syntax`() {
assertGetCharset(headerValue = "text/plain; charset*=us-ascii'en-us'utf-8", expectedCharset = null)
}
private fun assertGetCharset(headerValue: String, expectedCharset: String?) {
val part = MimeBodyPart.create(null, headerValue)
val charset = part.charset
assertThat(charset).ignoringCase().isEqualTo(expectedCharset)
}
}