Only consider US-ASCII and UTF-8 in EncoderUtil.determineCharset()
This commit is contained in:
parent
8f7e9ee73c
commit
267c658492
2 changed files with 4 additions and 10 deletions
|
@ -164,20 +164,14 @@ class EncoderUtil {
|
|||
}
|
||||
|
||||
private static Charset determineCharset(String text) {
|
||||
// it is an important property of iso-8859-1 that it directly maps
|
||||
// unicode code points 0000 to 00ff to byte values 00 to ff.
|
||||
boolean ascii = true;
|
||||
final int len = text.length();
|
||||
for (int index = 0; index < len; index++) {
|
||||
char ch = text.charAt(index);
|
||||
if (ch > 0xff) {
|
||||
if (ch > 0x7f) {
|
||||
return Charsets.UTF_8;
|
||||
}
|
||||
if (ch > 0x7f) {
|
||||
ascii = false;
|
||||
}
|
||||
}
|
||||
return ascii ? Charsets.US_ASCII : Charsets.ISO_8859_1;
|
||||
return Charsets.US_ASCII;
|
||||
}
|
||||
|
||||
private static Encoding determineEncoding(byte[] bytes) {
|
||||
|
|
|
@ -6,12 +6,12 @@ import org.junit.Test
|
|||
class EncoderUtilTest {
|
||||
@Test
|
||||
fun singleNonAsciiCharacter() {
|
||||
assertInputEncodesToExpected("123456789Ä", "=?ISO-8859-1?Q?123456789=C4?=")
|
||||
assertInputEncodesToExpected("123456789Ä", "=?UTF-8?Q?123456789=C3=84?=")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun onlyNonAsciiCharacters() {
|
||||
assertInputEncodesToExpected("ÄÖÜÄÖÜÄÖÜÄ", "=?ISO-8859-1?B?xNbcxNbcxNbcxA==?=")
|
||||
assertInputEncodesToExpected("ÄÖÜÄÖÜÄÖÜÄ", "=?UTF-8?B?w4TDlsOcw4TDlsOcw4TDlsOcw4Q=?=")
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in a new issue