From 267c6584925b90185e3d6b26c40aefa03642c8db Mon Sep 17 00:00:00 2001 From: cketti Date: Sun, 4 Oct 2020 22:19:39 +0200 Subject: [PATCH 1/3] Only consider US-ASCII and UTF-8 in EncoderUtil.determineCharset() --- .../java/com/fsck/k9/mail/internet/EncoderUtil.java | 10 ++-------- .../java/com/fsck/k9/mail/internet/EncoderUtilTest.kt | 4 ++-- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/mail/common/src/main/java/com/fsck/k9/mail/internet/EncoderUtil.java b/mail/common/src/main/java/com/fsck/k9/mail/internet/EncoderUtil.java index ac08775e9..ed016a77a 100644 --- a/mail/common/src/main/java/com/fsck/k9/mail/internet/EncoderUtil.java +++ b/mail/common/src/main/java/com/fsck/k9/mail/internet/EncoderUtil.java @@ -164,20 +164,14 @@ class EncoderUtil { } private static Charset determineCharset(String text) { - // it is an important property of iso-8859-1 that it directly maps - // unicode code points 0000 to 00ff to byte values 00 to ff. - boolean ascii = true; final int len = text.length(); for (int index = 0; index < len; index++) { char ch = text.charAt(index); - if (ch > 0xff) { + if (ch > 0x7f) { return Charsets.UTF_8; } - if (ch > 0x7f) { - ascii = false; - } } - return ascii ? Charsets.US_ASCII : Charsets.ISO_8859_1; + return Charsets.US_ASCII; } private static Encoding determineEncoding(byte[] bytes) { diff --git a/mail/common/src/test/java/com/fsck/k9/mail/internet/EncoderUtilTest.kt b/mail/common/src/test/java/com/fsck/k9/mail/internet/EncoderUtilTest.kt index 879b67549..70fa5236f 100644 --- a/mail/common/src/test/java/com/fsck/k9/mail/internet/EncoderUtilTest.kt +++ b/mail/common/src/test/java/com/fsck/k9/mail/internet/EncoderUtilTest.kt @@ -6,12 +6,12 @@ import org.junit.Test class EncoderUtilTest { @Test fun singleNonAsciiCharacter() { - assertInputEncodesToExpected("123456789Ä", "=?ISO-8859-1?Q?123456789=C4?=") + assertInputEncodesToExpected("123456789Ä", "=?UTF-8?Q?123456789=C3=84?=") } @Test fun onlyNonAsciiCharacters() { - assertInputEncodesToExpected("ÄÖÜÄÖÜÄÖÜÄ", "=?ISO-8859-1?B?xNbcxNbcxNbcxA==?=") + assertInputEncodesToExpected("ÄÖÜÄÖÜÄÖÜÄ", "=?UTF-8?B?w4TDlsOcw4TDlsOcw4TDlsOcw4Q=?=") } @Test From 2a3cc1d4224070ea01c5c000099046d2fbb0103d Mon Sep 17 00:00:00 2001 From: cketti Date: Sun, 4 Oct 2020 22:41:51 +0200 Subject: [PATCH 2/3] Don't pass charset to EncoderUtil.encodeEncodedWord() --- .../com/fsck/k9/mail/internet/EncoderUtil.java | 16 ++++------------ .../java/com/fsck/k9/mail/internet/MimeHeader.kt | 4 +--- .../com/fsck/k9/mail/internet/EncoderUtilTest.kt | 2 +- 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/mail/common/src/main/java/com/fsck/k9/mail/internet/EncoderUtil.java b/mail/common/src/main/java/com/fsck/k9/mail/internet/EncoderUtil.java index ed016a77a..281604a36 100644 --- a/mail/common/src/main/java/com/fsck/k9/mail/internet/EncoderUtil.java +++ b/mail/common/src/main/java/com/fsck/k9/mail/internet/EncoderUtil.java @@ -12,9 +12,7 @@ import org.apache.james.mime4j.Charsets; * as defined in RFC 2047 * or display-names of an e-mail address, for example. * - * This class is copied from the org.apache.james.mime4j.decoder.EncoderUtil class. It's modified here in order to - * encode emoji characters in the Subject headers. The method to decode emoji depends on the MimeMessage class because - * it has to be determined with the sender address. + * This class is copied from the org.apache.james.mime4j.decoder.EncoderUtil class. */ class EncoderUtil { private static final BitSet Q_RESTRICTED_CHARS = initChars("=_?\"#$%&'(),.:;<>@[\\]^`{|}~"); @@ -54,21 +52,15 @@ class EncoderUtil { * * @param text * text to encode. - * @param charset - * the Java charset that should be used to encode the specified - * string into a byte array. A suitable charset is detected - * automatically if this parameter is null. * @return the encoded word (or sequence of encoded words if the given text * does not fit in a single encoded word). */ - public static String encodeEncodedWord(String text, Charset charset) { + public static String encodeEncodedWord(String text) { if (text == null) throw new IllegalArgumentException(); - if (charset == null) - charset = determineCharset(text); - - String mimeCharset = CharsetSupport.getExternalCharset(charset.name()); + Charset charset = determineCharset(text); + String mimeCharset = charset.name(); byte[] bytes = encode(text, charset); diff --git a/mail/common/src/main/java/com/fsck/k9/mail/internet/MimeHeader.kt b/mail/common/src/main/java/com/fsck/k9/mail/internet/MimeHeader.kt index 8dbb7c72f..155ec2a10 100644 --- a/mail/common/src/main/java/com/fsck/k9/mail/internet/MimeHeader.kt +++ b/mail/common/src/main/java/com/fsck/k9/mail/internet/MimeHeader.kt @@ -4,7 +4,6 @@ import com.fsck.k9.mail.internet.MimeHeader.Field.NameValueField import com.fsck.k9.mail.internet.MimeHeader.Field.RawField import java.io.IOException import java.io.OutputStream -import java.nio.charset.Charset import java.util.ArrayList import java.util.LinkedHashSet @@ -76,8 +75,7 @@ class MimeHeader { private fun Appendable.appendNameValueField(field: Field) { val value = field.value val encodedValue = if (hasToBeEncoded(value)) { - val charset = this@MimeHeader.charset?.let { Charset.forName(it) } - EncoderUtil.encodeEncodedWord(value, charset) + EncoderUtil.encodeEncodedWord(value) } else { value } diff --git a/mail/common/src/test/java/com/fsck/k9/mail/internet/EncoderUtilTest.kt b/mail/common/src/test/java/com/fsck/k9/mail/internet/EncoderUtilTest.kt index 70fa5236f..422c7b899 100644 --- a/mail/common/src/test/java/com/fsck/k9/mail/internet/EncoderUtilTest.kt +++ b/mail/common/src/test/java/com/fsck/k9/mail/internet/EncoderUtilTest.kt @@ -27,7 +27,7 @@ class EncoderUtilTest { } private fun assertInputEncodesToExpected(input: String, expected: String) { - val encodedText = EncoderUtil.encodeEncodedWord(input, null) + val encodedText = EncoderUtil.encodeEncodedWord(input) assertEquals(expected, encodedText) } } From 3c0d9b99f9b20217b00dfde1e4a2a7a576f3d46b Mon Sep 17 00:00:00 2001 From: cketti Date: Sun, 4 Oct 2020 23:03:04 +0200 Subject: [PATCH 3/3] Remove unused Message.setCharset() mechanism --- .../main/java/com/fsck/k9/mail/Message.java | 2 -- .../main/java/com/fsck/k9/mail/Multipart.java | 15 ---------- .../fsck/k9/mail/internet/CharsetSupport.java | 13 -------- .../com/fsck/k9/mail/internet/MimeHeader.kt | 5 ---- .../fsck/k9/mail/internet/MimeMessage.java | 11 ------- .../com/fsck/k9/mail/internet/TextBody.java | 30 +++++++------------ .../test/java/com/fsck/k9/mail/MessageTest.kt | 18 +++++------ 7 files changed, 17 insertions(+), 77 deletions(-) diff --git a/mail/common/src/main/java/com/fsck/k9/mail/Message.java b/mail/common/src/main/java/com/fsck/k9/mail/Message.java index 63e1bc3ca..538aa663a 100644 --- a/mail/common/src/main/java/com/fsck/k9/mail/Message.java +++ b/mail/common/src/main/java/com/fsck/k9/mail/Message.java @@ -157,8 +157,6 @@ public abstract class Message implements Part, Body { @Override public abstract void setEncoding(String encoding) throws MessagingException; - public abstract void setCharset(String charset) throws MessagingException; - public long calculateSize() { try { diff --git a/mail/common/src/main/java/com/fsck/k9/mail/Multipart.java b/mail/common/src/main/java/com/fsck/k9/mail/Multipart.java index 48f8df828..2e41b8da2 100644 --- a/mail/common/src/main/java/com/fsck/k9/mail/Multipart.java +++ b/mail/common/src/main/java/com/fsck/k9/mail/Multipart.java @@ -7,9 +7,6 @@ import java.util.List; import org.apache.james.mime4j.util.MimeUtil; -import com.fsck.k9.mail.internet.CharsetSupport; -import com.fsck.k9.mail.internet.TextBody; - public abstract class Multipart implements Body { private Part mParent; @@ -54,18 +51,6 @@ public abstract class Multipart implements Body { /* Nothing else to do. Each subpart has its own separate encoding */ } - public void setCharset(String charset) throws MessagingException { - if (mParts.isEmpty()) - return; - - BodyPart part = mParts.get(0); - Body body = part.getBody(); - if (body instanceof TextBody) { - CharsetSupport.setCharset(charset, part); - ((TextBody)body).setCharset(charset); - } - } - public abstract byte[] getPreamble(); public abstract byte[] getEpilogue(); } diff --git a/mail/common/src/main/java/com/fsck/k9/mail/internet/CharsetSupport.java b/mail/common/src/main/java/com/fsck/k9/mail/internet/CharsetSupport.java index fcc635fee..f6f0f262a 100644 --- a/mail/common/src/main/java/com/fsck/k9/mail/internet/CharsetSupport.java +++ b/mail/common/src/main/java/com/fsck/k9/mail/internet/CharsetSupport.java @@ -30,19 +30,6 @@ public class CharsetSupport { }; - public static void setCharset(String charset, Part part) { - part.setHeader(MimeHeader.HEADER_CONTENT_TYPE, - part.getMimeType() + ";\r\n charset=" + getExternalCharset(charset)); - } - - static String getExternalCharset(String charset) { - if (JisSupport.isShiftJis(charset)) { - return SHIFT_JIS; - } else { - return charset; - } - } - static String fixupCharset(String charset, Message message) throws MessagingException { if (charset == null || "0".equals(charset)) charset = "US-ASCII"; // No encoding, so use us-ascii, which is the standard. diff --git a/mail/common/src/main/java/com/fsck/k9/mail/internet/MimeHeader.kt b/mail/common/src/main/java/com/fsck/k9/mail/internet/MimeHeader.kt index 155ec2a10..31e8e5cd0 100644 --- a/mail/common/src/main/java/com/fsck/k9/mail/internet/MimeHeader.kt +++ b/mail/common/src/main/java/com/fsck/k9/mail/internet/MimeHeader.kt @@ -9,7 +9,6 @@ import java.util.LinkedHashSet class MimeHeader { private val fields: MutableList = ArrayList() - private var charset: String? = null val headerNames: Set get() = fields.mapTo(LinkedHashSet()) { it.name } @@ -90,10 +89,6 @@ class MimeHeader { return text.any { !it.isVChar() && !it.isWspOrCrlf() } } - fun setCharset(charset: String?) { - this.charset = charset - } - companion object { const val SUBJECT = "Subject" const val HEADER_CONTENT_TYPE = "Content-Type" diff --git a/mail/common/src/main/java/com/fsck/k9/mail/internet/MimeMessage.java b/mail/common/src/main/java/com/fsck/k9/mail/internet/MimeMessage.java index b7fd4a329..d8a497c5c 100644 --- a/mail/common/src/main/java/com/fsck/k9/mail/internet/MimeMessage.java +++ b/mail/common/src/main/java/com/fsck/k9/mail/internet/MimeMessage.java @@ -466,17 +466,6 @@ public class MimeMessage extends Message { setHeader(MimeHeader.HEADER_CONTENT_TRANSFER_ENCODING, encoding); } - @Override - public void setCharset(String charset) throws MessagingException { - mHeader.setCharset(charset); - if (mBody instanceof Multipart) { - ((Multipart)mBody).setCharset(charset); - } else if (mBody instanceof TextBody) { - CharsetSupport.setCharset(charset, this); - ((TextBody)mBody).setCharset(charset); - } - } - private class MimeMessageBuilder implements ContentHandler { private final LinkedList stack = new LinkedList<>(); private final BodyFactory bodyFactory; diff --git a/mail/common/src/main/java/com/fsck/k9/mail/internet/TextBody.java b/mail/common/src/main/java/com/fsck/k9/mail/internet/TextBody.java index 8df8a0909..346e7bf76 100644 --- a/mail/common/src/main/java/com/fsck/k9/mail/internet/TextBody.java +++ b/mail/common/src/main/java/com/fsck/k9/mail/internet/TextBody.java @@ -6,7 +6,6 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.io.UnsupportedEncodingException; import androidx.annotation.Nullable; @@ -14,9 +13,10 @@ import com.fsck.k9.mail.Body; import com.fsck.k9.mail.MessagingException; import com.fsck.k9.mail.filter.CountingOutputStream; import com.fsck.k9.mail.filter.SignSafeOutputStream; + +import org.apache.james.mime4j.Charsets; import org.apache.james.mime4j.codec.QuotedPrintableOutputStream; import org.apache.james.mime4j.util.MimeUtil; -import timber.log.Timber; public class TextBody implements Body, SizeAware { @@ -25,7 +25,6 @@ public class TextBody implements Body, SizeAware { private final String text; private String encoding; - private String charset = "UTF-8"; // Length of the message composed (as opposed to quoted). I don't like the name of this variable and am open to // suggestions as to what it should otherwise be. -achen 20101207 @Nullable @@ -41,7 +40,7 @@ public class TextBody implements Body, SizeAware { @Override public void writeTo(OutputStream out) throws IOException, MessagingException { if (text != null) { - byte[] bytes = text.getBytes(charset); + byte[] bytes = text.getBytes(Charsets.UTF_8); if (MimeUtil.ENC_QUOTED_PRINTABLE.equalsIgnoreCase(encoding)) { writeSignSafeQuotedPrintable(out, bytes); } else if (MimeUtil.ENC_8BIT.equalsIgnoreCase(encoding)) { @@ -58,18 +57,13 @@ public class TextBody implements Body, SizeAware { @Override public InputStream getInputStream() throws MessagingException { - try { - byte[] b; - if (text != null) { - b = text.getBytes(charset); - } else { - b = EMPTY_BYTE_ARRAY; - } - return new ByteArrayInputStream(b); - } catch (UnsupportedEncodingException uee) { - Timber.e(uee, "Unsupported charset: %s", charset); - return null; + byte[] b; + if (text != null) { + b = text.getBytes(Charsets.UTF_8); + } else { + b = EMPTY_BYTE_ARRAY; } + return new ByteArrayInputStream(b); } @Override @@ -83,10 +77,6 @@ public class TextBody implements Body, SizeAware { this.encoding = encoding; } - public void setCharset(String charset) { - this.charset = charset; - } - @Nullable public Integer getComposedMessageLength() { return composedMessageLength; @@ -108,7 +98,7 @@ public class TextBody implements Body, SizeAware { @Override public long getSize() { try { - byte[] bytes = text.getBytes(charset); + byte[] bytes = text.getBytes(Charsets.UTF_8); if (MimeUtil.ENC_QUOTED_PRINTABLE.equalsIgnoreCase(encoding)) { return getLengthWhenQuotedPrintableEncoded(bytes); diff --git a/mail/common/src/test/java/com/fsck/k9/mail/MessageTest.kt b/mail/common/src/test/java/com/fsck/k9/mail/MessageTest.kt index 1e9eeb6e1..43408b4df 100644 --- a/mail/common/src/test/java/com/fsck/k9/mail/MessageTest.kt +++ b/mail/common/src/test/java/com/fsck/k9/mail/MessageTest.kt @@ -2,7 +2,6 @@ package com.fsck.k9.mail import com.fsck.k9.mail.internet.BinaryTempFileBody import com.fsck.k9.mail.internet.BinaryTempFileMessageBody -import com.fsck.k9.mail.internet.CharsetSupport import com.fsck.k9.mail.internet.MimeBodyPart import com.fsck.k9.mail.internet.MimeHeader import com.fsck.k9.mail.internet.MimeMessage @@ -78,9 +77,9 @@ class MessageTest { Content-Transfer-Encoding: 7bit ------Boundary103 - Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset=utf-8 + Content-Transfer-Encoding: quoted-printable Testing=2E This is a text body with some greek characters=2E @@ -108,9 +107,9 @@ class MessageTest { Content-Transfer-Encoding: 7bit ------Boundary102 - Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset=utf-8 + Content-Transfer-Encoding: quoted-printable Testing=2E This is a text body with some greek characters=2E @@ -138,9 +137,9 @@ class MessageTest { Content-Transfer-Encoding: 7bit ------Boundary101 - Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset=utf-8 + Content-Transfer-Encoding: quoted-printable Testing=2E This is a text body with some greek characters=2E @@ -177,9 +176,9 @@ class MessageTest { Content-Transfer-Encoding: 7bit ------Boundary103 - Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset=utf-8 + Content-Transfer-Encoding: quoted-printable Testing=2E This is a text body with some greek characters=2E @@ -207,9 +206,9 @@ class MessageTest { Content-Transfer-Encoding: 7bit ------Boundary102 - Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset=utf-8 + Content-Transfer-Encoding: quoted-printable Testing=2E This is a text body with some greek characters=2E @@ -237,9 +236,9 @@ class MessageTest { Content-Transfer-Encoding: 7bit ------Boundary101 - Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset=utf-8 + Content-Transfer-Encoding: quoted-printable Testing=2E This is a text body with some greek characters=2E @@ -315,13 +314,10 @@ class MessageTest { End of test. """.trimIndent().crlf() - ).apply { - setCharset("utf-8") - } + ) return MimeBodyPart().apply { MimeMessageHelper.setBody(this, textBody) - CharsetSupport.setCharset("utf-8", this) } }