Merge pull request #2118 from philipwhiuk/handleInvalidHtmlBetter

Handle invalid HTML better
This commit is contained in:
Philip 2017-01-22 20:40:22 +00:00 committed by GitHub
commit 0dd7a4aafe
3 changed files with 73 additions and 10 deletions

View file

@ -27,7 +27,7 @@ dependencies {
compile "com.squareup.okio:okio:${okioVersion}" compile "com.squareup.okio:okio:${okioVersion}"
compile 'commons-io:commons-io:2.4' compile 'commons-io:commons-io:2.4'
compile "com.android.support:support-v4:${androidSupportLibraryVersion}" compile "com.android.support:support-v4:${androidSupportLibraryVersion}"
compile 'net.sourceforge.htmlcleaner:htmlcleaner:2.16' compile 'net.sourceforge.htmlcleaner:htmlcleaner:2.18'
compile 'de.cketti.library.changelog:ckchangelog:1.2.1' compile 'de.cketti.library.changelog:ckchangelog:1.2.1'
compile 'com.github.bumptech.glide:glide:3.6.1' compile 'com.github.bumptech.glide:glide:3.6.1'
compile 'com.splitwise:tokenautocomplete:2.0.7' compile 'com.splitwise:tokenautocomplete:2.0.7'

View file

@ -47,6 +47,7 @@ public class HtmlSanitizer {
properties.setOmitDoctypeDeclaration(false); properties.setOmitDoctypeDeclaration(false);
properties.setTranslateSpecialEntities(false); properties.setTranslateSpecialEntities(false);
properties.setRecognizeUnicodeChars(false); properties.setRecognizeUnicodeChars(false);
properties.setIgnoreQuestAndExclam(false);
return properties; return properties;
} }

View file

@ -21,7 +21,10 @@ public class HtmlSanitizerTest {
"<head><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\"></head>" + "<head><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\"></head>" +
"<body>Message</body>" + "<body>Message</body>" +
"</html>"; "</html>";
assertEquals("<html><head></head><body>Message</body></html>", htmlSanitizer.sanitize(html));
String result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body>Message</body></html>", result);
} }
@Test @Test
@ -30,7 +33,10 @@ public class HtmlSanitizerTest {
"<head></head><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\">" + "<head></head><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\">" +
"<body>Message</body>" + "<body>Message</body>" +
"</html>"; "</html>";
assertEquals("<html><head></head><body>Message</body></html>", htmlSanitizer.sanitize(html));
String result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body>Message</body></html>", result);
} }
@Test @Test
@ -39,7 +45,10 @@ public class HtmlSanitizerTest {
"<head></head>" + "<head></head>" +
"<body><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\">Message</body>" + "<body><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\">Message</body>" +
"</html>"; "</html>";
assertEquals("<html><head></head><body>Message</body></html>", htmlSanitizer.sanitize(html));
String result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body>Message</body></html>", result);
} }
@Test @Test
@ -48,7 +57,10 @@ public class HtmlSanitizerTest {
"<head><meta http-equiv=\"REFRESH\" content=\"1; URL=http://example.com/\"></head>" + "<head><meta http-equiv=\"REFRESH\" content=\"1; URL=http://example.com/\"></head>" +
"<body>Message</body>" + "<body>Message</body>" +
"</html>"; "</html>";
assertEquals("<html><head></head><body>Message</body></html>", htmlSanitizer.sanitize(html));
String result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body>Message</body></html>", result);
} }
@Test @Test
@ -57,7 +69,10 @@ public class HtmlSanitizerTest {
"<head><meta http-equiv=\"Refresh\" content=\"1; URL=http://example.com/\"></head>" + "<head><meta http-equiv=\"Refresh\" content=\"1; URL=http://example.com/\"></head>" +
"<body>Message</body>" + "<body>Message</body>" +
"</html>"; "</html>";
assertEquals("<html><head></head><body>Message</body></html>", htmlSanitizer.sanitize(html));
String result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body>Message</body></html>", result);
} }
@Test @Test
@ -66,7 +81,10 @@ public class HtmlSanitizerTest {
"<head><meta http-equiv=refresh content=\"1; URL=http://example.com/\"></head>" + "<head><meta http-equiv=refresh content=\"1; URL=http://example.com/\"></head>" +
"<body>Message</body>" + "<body>Message</body>" +
"</html>"; "</html>";
assertEquals("<html><head></head><body>Message</body></html>", htmlSanitizer.sanitize(html));
String result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body>Message</body></html>", result);
} }
@Test @Test
@ -75,7 +93,10 @@ public class HtmlSanitizerTest {
"<head><meta http-equiv=\"refresh \" content=\"1; URL=http://example.com/\"></head>" + "<head><meta http-equiv=\"refresh \" content=\"1; URL=http://example.com/\"></head>" +
"<body>Message</body>" + "<body>Message</body>" +
"</html>"; "</html>";
assertEquals("<html><head></head><body>Message</body></html>", htmlSanitizer.sanitize(html));
String result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body>Message</body></html>", result);
} }
@Test @Test
@ -84,7 +105,10 @@ public class HtmlSanitizerTest {
"<head><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\"></head>" + "<head><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\"></head>" +
"<body><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\">Message</body>" + "<body><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\">Message</body>" +
"</html>"; "</html>";
assertEquals("<html><head></head><body>Message</body></html>", htmlSanitizer.sanitize(html));
String result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body>Message</body></html>", result);
} }
@Test @Test
@ -96,7 +120,45 @@ public class HtmlSanitizerTest {
"</head>" + "</head>" +
"<body>Message</body>" + "<body>Message</body>" +
"</html>"; "</html>";
String result = htmlSanitizer.sanitize(html);
assertEquals("<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" /></head>" + assertEquals("<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" /></head>" +
"<body>Message</body></html>", htmlSanitizer.sanitize(html)); "<body>Message</body></html>", result);
}
@Test
public void shouldProduceValidHtmlFromHtmlWithXmlDeclaration() {
String html = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
"<html><head></head><body></body></html>";
String result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body></body></html>", result);
}
@Test
public void shouldNormalizeTables() {
String html = "<html><head></head><body><table><tr><td></td><td></td></tr></table></body></html>";
String result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body><table><tbody>" +
"<tr><td></td><td></td></tr>" +
"</tbody></table></body></html>", result);
}
@Test
public void shouldHtmlEncodeXmlDirectives() {
String html = "<html><head></head><body><table>" +
"<tr><td><!==><!==>Hmailserver service shutdown:</td><td><!==><!==>Ok</td></tr>" +
"</table></body></html>";
String result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body><table><tbody>" +
"<tr><td>&lt;!==&gt;&lt;!==&gt;Hmailserver service shutdown:</td><td>&lt;!==&gt;&lt;!==&gt;Ok</td></tr>" +
"</tbody></table></body></html>", result);
} }
} }