Add support for RFC 2047 non-compliant splitting of UTF-8 encoded characters
This commit is contained in:
parent
79582f12e4
commit
f88c3594fc
2 changed files with 78 additions and 14 deletions
|
@ -21,6 +21,13 @@ import timber.log.Timber;
|
||||||
* it has to be determined with the sender address, the mailer and so on.
|
* it has to be determined with the sender address, the mailer and so on.
|
||||||
*/
|
*/
|
||||||
class DecoderUtil {
|
class DecoderUtil {
|
||||||
|
|
||||||
|
private static class EncodedWord {
|
||||||
|
private String charset;
|
||||||
|
private String encoding;
|
||||||
|
private String encodedText;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decodes an encoded word encoded with the 'B' encoding (described in
|
* Decodes an encoded word encoded with the 'B' encoding (described in
|
||||||
* RFC 2047) found in a header field body.
|
* RFC 2047) found in a header field body.
|
||||||
|
@ -93,14 +100,18 @@ class DecoderUtil {
|
||||||
return body;
|
return body;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EncodedWord previousWord = null;
|
||||||
int previousEnd = 0;
|
int previousEnd = 0;
|
||||||
boolean previousWasEncoded = false;
|
|
||||||
|
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
int begin = body.indexOf("=?", previousEnd);
|
int begin = body.indexOf("=?", previousEnd);
|
||||||
if (begin == -1) {
|
if (begin == -1) {
|
||||||
|
if (previousWord != null) {
|
||||||
|
sb.append(decodeEncodedWord(previousWord));
|
||||||
|
previousWord = null;
|
||||||
|
}
|
||||||
sb.append(body.substring(previousEnd));
|
sb.append(body.substring(previousEnd));
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
@ -110,18 +121,30 @@ class DecoderUtil {
|
||||||
// to find the two '?' in the "header", before looking for the final "?=".
|
// to find the two '?' in the "header", before looking for the final "?=".
|
||||||
int qm1 = body.indexOf('?', begin + 2);
|
int qm1 = body.indexOf('?', begin + 2);
|
||||||
if (qm1 == -1) {
|
if (qm1 == -1) {
|
||||||
|
if (previousWord != null) {
|
||||||
|
sb.append(decodeEncodedWord(previousWord));
|
||||||
|
previousWord = null;
|
||||||
|
}
|
||||||
sb.append(body.substring(previousEnd));
|
sb.append(body.substring(previousEnd));
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
int qm2 = body.indexOf('?', qm1 + 1);
|
int qm2 = body.indexOf('?', qm1 + 1);
|
||||||
if (qm2 == -1) {
|
if (qm2 == -1) {
|
||||||
|
if (previousWord != null) {
|
||||||
|
sb.append(decodeEncodedWord(previousWord));
|
||||||
|
previousWord = null;
|
||||||
|
}
|
||||||
sb.append(body.substring(previousEnd));
|
sb.append(body.substring(previousEnd));
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
int end = body.indexOf("?=", qm2 + 1);
|
int end = body.indexOf("?=", qm2 + 1);
|
||||||
if (end == -1) {
|
if (end == -1) {
|
||||||
|
if (previousWord != null) {
|
||||||
|
sb.append(decodeEncodedWord(previousWord));
|
||||||
|
previousWord = null;
|
||||||
|
}
|
||||||
sb.append(body.substring(previousEnd));
|
sb.append(body.substring(previousEnd));
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
@ -129,24 +152,52 @@ class DecoderUtil {
|
||||||
|
|
||||||
String sep = body.substring(previousEnd, begin);
|
String sep = body.substring(previousEnd, begin);
|
||||||
|
|
||||||
String decoded = decodeEncodedWord(body, begin, end, message);
|
EncodedWord word = extractEncodedWord(body, begin, end, message);
|
||||||
if (decoded == null) {
|
|
||||||
|
if (word == null) {
|
||||||
|
if (previousWord != null) {
|
||||||
|
sb.append(decodeEncodedWord(previousWord));
|
||||||
|
sb.append(sep);
|
||||||
|
previousWord = null;
|
||||||
|
}
|
||||||
|
} else if (previousWord != null) {
|
||||||
|
if (previousWord.encoding.equals(word.encoding) && previousWord.charset.equals(word.charset)) {
|
||||||
|
previousWord.encodedText += word.encodedText;
|
||||||
|
} else {
|
||||||
|
sb.append(decodeEncodedWord(previousWord));
|
||||||
|
sb.append(sep);
|
||||||
|
previousWord = word;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
previousWord = word;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (previousWord == null) {
|
||||||
sb.append(sep);
|
sb.append(sep);
|
||||||
sb.append(body.substring(begin, end));
|
sb.append(body.substring(begin, end));
|
||||||
} else {
|
|
||||||
if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep)) {
|
|
||||||
sb.append(sep);
|
|
||||||
}
|
|
||||||
sb.append(decoded);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
previousEnd = end;
|
previousEnd = end;
|
||||||
previousWasEncoded = decoded != null;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// return null on error
|
// return null on error
|
||||||
private static String decodeEncodedWord(String body, int begin, int end, Message message) {
|
private static String decodeEncodedWord(String body, int begin, int end, Message message) {
|
||||||
|
return decodeEncodedWord(extractEncodedWord(body, begin, end, message));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String decodeEncodedWord(EncodedWord word) {
|
||||||
|
if (word.encoding.equals("Q")) {
|
||||||
|
return decodeQ(word.encodedText, word.charset);
|
||||||
|
} else if (word.encoding.equals("B")) {
|
||||||
|
return DecoderUtil.decodeB(word.encodedText, word.charset);
|
||||||
|
} else {
|
||||||
|
Timber.w("Warning: Unknown encoding '%s'", word.encoding);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static EncodedWord extractEncodedWord(String body, int begin, int end, Message message) {
|
||||||
int qm1 = body.indexOf('?', begin + 2);
|
int qm1 = body.indexOf('?', begin + 2);
|
||||||
if (qm1 == end - 2)
|
if (qm1 == end - 2)
|
||||||
return null;
|
return null;
|
||||||
|
@ -171,13 +222,17 @@ class DecoderUtil {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EncodedWord encodedWord = new EncodedWord();
|
||||||
|
encodedWord.charset = charset;
|
||||||
if (encoding.equalsIgnoreCase("Q")) {
|
if (encoding.equalsIgnoreCase("Q")) {
|
||||||
return decodeQ(encodedText, charset);
|
encodedWord.encoding = "Q";
|
||||||
} else if (encoding.equalsIgnoreCase("B")) {
|
} else if (encoding.equalsIgnoreCase("B")) {
|
||||||
return DecoderUtil.decodeB(encodedText, charset);
|
encodedWord.encoding = "B";
|
||||||
} else {
|
} else {
|
||||||
Timber.w("Warning: Unknown encoding in encoded word '%s'", body.substring(begin, end));
|
Timber.w("Warning: Unknown encoding in encoded word '%s'", body.substring(begin, end));
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
encodedWord.encodedText = encodedText;
|
||||||
|
return encodedWord;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -169,10 +169,19 @@ public class DecoderUtilTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void decodeEncodedWords_withMultipleEncodedSections_decodesAll() {
|
public void decodeEncodedWords_withMultipleEncodedSections_decodesBoth() {
|
||||||
|
body = "=?us-ascii?q?abc?= =?us-ascii?q?def?=";
|
||||||
|
expect = "abcdef";
|
||||||
|
message = null;
|
||||||
|
assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void decodeEncodedWords_withMultipleEncodedSections_decodesSequentialSectionTogether() {
|
||||||
|
//Splitting mid-character is RFC2047 non-compliant but seen in practice.
|
||||||
body = "=?utf-8?B?5Liq5Lq66YKu566xOkJVRyAjMzAyNDY6OumCruS7tuato+aWh+mZhOS7tuWQ?=\n" +
|
body = "=?utf-8?B?5Liq5Lq66YKu566xOkJVRyAjMzAyNDY6OumCruS7tuato+aWh+mZhOS7tuWQ?=\n" +
|
||||||
"=?utf-8?B?jeensOecgeeVpeaYvuekuuS8mOWMlg==?=";
|
"=?utf-8?B?jeensOecgeeVpeaYvuekuuS8mOWMlg==?=";
|
||||||
expect = "个人邮箱:BUG #30246::邮件正文附件<E99984><E4BBB6>称省略显示优化";
|
expect = "个人邮箱:BUG #30246::邮件正文附件名称省略显示优化";
|
||||||
message = null;
|
message = null;
|
||||||
assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message));
|
assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message));
|
||||||
}
|
}
|
||||||
|
@ -181,7 +190,7 @@ public class DecoderUtilTest {
|
||||||
public void decodeEncodedWords_withGB2312_decodes_correctly() {
|
public void decodeEncodedWords_withGB2312_decodes_correctly() {
|
||||||
body = "=?gb2312?B?Obv9t9az6cnu29rHsLqju6rHyLPHSlfN8rrAvsa16qOsuPzT0DIwvNIzOTnU?= " +
|
body = "=?gb2312?B?Obv9t9az6cnu29rHsLqju6rHyLPHSlfN8rrAvsa16qOsuPzT0DIwvNIzOTnU?= " +
|
||||||
"=?gb2312?B?qr6r0aG439DHytTLr77Gteq1yMTjwLSjoaOoQUSjqQ?=";
|
"=?gb2312?B?qr6r0aG439DHytTLr77Gteq1yMTjwLSjoaOoQUSjqQ?=";
|
||||||
expect = "9积分抽深圳前海华侨城JW万豪酒店,更有20家399<EFBFBD><EFBFBD>精选高星试睡酒店等你来!(AD<EFBFBD>";
|
expect = "9积分抽深圳前海华侨城JW万豪酒店,更有20家399元精选高星试睡酒店等你来!(AD<EFBFBD>";
|
||||||
message = null;
|
message = null;
|
||||||
assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message));
|
assertEquals(expect, DecoderUtil.decodeEncodedWords(body, message));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue