Only consider US-ASCII and UTF-8 in EncoderUtil.determineCharset()

This commit is contained in:
cketti 2020-10-04 22:19:39 +02:00
parent 8f7e9ee73c
commit 267c658492
2 changed files with 4 additions and 10 deletions

View file

@ -164,20 +164,14 @@ class EncoderUtil {
}
private static Charset determineCharset(String text) {
// it is an important property of iso-8859-1 that it directly maps
// unicode code points 0000 to 00ff to byte values 00 to ff.
boolean ascii = true;
final int len = text.length();
for (int index = 0; index < len; index++) {
char ch = text.charAt(index);
if (ch > 0xff) {
if (ch > 0x7f) {
return Charsets.UTF_8;
}
if (ch > 0x7f) {
ascii = false;
}
}
return ascii ? Charsets.US_ASCII : Charsets.ISO_8859_1;
return Charsets.US_ASCII;
}
private static Encoding determineEncoding(byte[] bytes) {

View file

@ -6,12 +6,12 @@ import org.junit.Test
class EncoderUtilTest {
@Test
fun singleNonAsciiCharacter() {
assertInputEncodesToExpected("123456789Ä", "=?ISO-8859-1?Q?123456789=C4?=")
assertInputEncodesToExpected("123456789Ä", "=?UTF-8?Q?123456789=C3=84?=")
}
@Test
fun onlyNonAsciiCharacters() {
assertInputEncodesToExpected("ÄÖÜÄÖÜÄÖÜÄ", "=?ISO-8859-1?B?xNbcxNbcxNbcxA==?=")
assertInputEncodesToExpected("ÄÖÜÄÖÜÄÖÜÄ", "=?UTF-8?B?w4TDlsOcw4TDlsOcw4TDlsOcw4Q=?=")
}
@Test