Switched to "classic" domain name detection and added multiple tests.

This commit is contained in:
Tim Bolender 2017-03-21 11:59:21 +01:00
parent cf9c3d078e
commit 9d3cc8ed00
4 changed files with 107 additions and 86 deletions

View file

@ -1,8 +1,6 @@
package com.fsck.k9.message.html; package com.fsck.k9.message.html;
import java.net.IDN;
import java.util.Locale;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -18,6 +16,8 @@ import java.util.regex.Pattern;
class HttpUriParser implements UriParser { class HttpUriParser implements UriParser {
// This string represent character group sub-delim as described in RFC 3986 // This string represent character group sub-delim as described in RFC 3986
private static final String SUB_DELIM = "!$&'()*+,;="; private static final String SUB_DELIM = "!$&'()*+,;=";
private static final Pattern DOMAIN_PATTERN =
Pattern.compile("\\w([\\w-]*\\w)*(\\.\\w([\\w-]*\\w)*)*(:(\\d{0,5}))?");
private static final Pattern IPv4_PATTERN = private static final Pattern IPv4_PATTERN =
Pattern.compile("(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})(:(\\d{0,5}))?"); Pattern.compile("(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})(:(\\d{0,5}))?");
@ -40,19 +40,15 @@ class HttpUriParser implements UriParser {
} }
// Authority // Authority
int authorityEnd = text.indexOf('/', currentPos); currentPos = matchUserInfoIfAvailable(text, currentPos);
if (authorityEnd == -1) {
authorityEnd = text.length();
}
currentPos = matchUserInfoIfAvailable(text, currentPos, authorityEnd); int matchedAuthorityEnd = Math.max(tryMatchDomainName(text, currentPos),
Math.max(tryMatchIpv4Address(text, currentPos, true),
if (!tryMatchDomainName(text, currentPos, authorityEnd) && tryMatchIpv6Address(text, currentPos)));
!tryMatchIpv4Address(text, currentPos, authorityEnd, true) && if (matchedAuthorityEnd == currentPos) {
!tryMatchIpv6Address(text, currentPos, authorityEnd)) {
return startPos; return startPos;
} }
currentPos = authorityEnd; currentPos = matchedAuthorityEnd;
// Path // Path
if (currentPos < text.length() && text.charAt(currentPos) == '/') { if (currentPos < text.length() && text.charAt(currentPos) == '/') {
@ -79,9 +75,9 @@ class HttpUriParser implements UriParser {
return currentPos; return currentPos;
} }
private int matchUserInfoIfAvailable(String text, int startPos, int authorityEnd) { private int matchUserInfoIfAvailable(String text, int startPos) {
int userInfoEnd = text.indexOf('@', startPos); int userInfoEnd = text.indexOf('@', startPos);
if (userInfoEnd != -1 && userInfoEnd < authorityEnd) { if (userInfoEnd != -1) {
if (matchUnreservedPCTEncodedSubDelimClassesGreedy(text, startPos, ":") != userInfoEnd) { if (matchUnreservedPCTEncodedSubDelimClassesGreedy(text, startPos, ":") != userInfoEnd) {
// Illegal character in user info // Illegal character in user info
return startPos; return startPos;
@ -91,91 +87,63 @@ class HttpUriParser implements UriParser {
return startPos; return startPos;
} }
private boolean tryMatchDomainName(String text, int startPos, int authorityEnd) { private int tryMatchDomainName(String text, int startPos) {
// Partly from OkHttp's HttpUrl
try { try {
// Check for port Matcher matcher = DOMAIN_PATTERN.matcher(text);
int portPos = text.indexOf(':', startPos); if (!matcher.find(startPos) || matcher.start() != startPos) {
boolean hasPort = portPos != -1 && portPos < authorityEnd; return startPos;
if (hasPort) { }
int port = 0;
for (int i = portPos + 1; i < authorityEnd; i++) { String portString = matcher.group(matcher.groupCount());
int c = text.codePointAt(i); if (portString != null && !portString.isEmpty()) {
if (c < '0' || c > '9') { int port = Integer.parseInt(portString);
return false;
}
port = port * 10 + c - '0';
}
if (port > 65535) { if (port > 65535) {
return false; return startPos;
} }
} }
// Check actual domain return matcher.end();
String result = IDN.toASCII(text.substring(startPos, authorityEnd)).toLowerCase(Locale.US);
if (result.isEmpty()) {
return false;
}
// Confirm that the IDN ToASCII result doesn't contain any illegal characters.
for (int i = 0; i < result.length(); i++) {
char c = result.charAt(i);
// The WHATWG Host parsing rules accepts some character codes which are invalid by
// definition for OkHttp's host header checks (and the WHATWG Host syntax definition). Here
// we rule out characters that would cause problems in host headers.
if (c <= '\u001f' || c >= '\u007f') {
return false;
}
// Check for the characters mentioned in the WHATWG Host parsing spec:
// U+0000, U+0009, U+000A, U+000D, U+0020, "#", "%", "/", ":", "?", "@", "[", "\", and "]"
// (excluding the characters covered above).
if (" #%/:?@[\\]".indexOf(c) != -1) {
return false;
}
}
return true;
} catch (IllegalArgumentException e) { } catch (IllegalArgumentException e) {
return false; return startPos;
} }
} }
private boolean tryMatchIpv4Address(String text, int startPos, int authorityEnd, boolean portAllowed) { private int tryMatchIpv4Address(String text, int startPos, boolean portAllowed) {
Matcher matcher = IPv4_PATTERN.matcher(text.subSequence(startPos, authorityEnd)); Matcher matcher = IPv4_PATTERN.matcher(text);
if (!matcher.matches()) { if (!matcher.find(startPos) || matcher.start() != startPos) {
return false; return startPos;
} }
for (int i = 1; i <= 4; i++) { for (int i = 1; i <= 4; i++) {
int segment = Integer.parseInt(matcher.group(1)); int segment = Integer.parseInt(matcher.group(1));
if (segment > 255) { if (segment > 255) {
return false; return startPos;
} }
} }
if (!portAllowed && matcher.group(5) != null) { if (!portAllowed && matcher.group(5) != null) {
return false; return startPos;
} }
String portString = matcher.group(6); String portString = matcher.group(6);
if (portString != null && !portString.isEmpty()) { if (portString != null && !portString.isEmpty()) {
int port = Integer.parseInt(portString); int port = Integer.parseInt(portString);
if (port > 65535) { if (port > 65535) {
return false; return startPos;
} }
} }
return true; return matcher.end();
} }
private boolean tryMatchIpv6Address(String text, int startPos, int authorityEnd) { private int tryMatchIpv6Address(String text, int startPos) {
if (text.codePointAt(startPos) != '[') { if (startPos == text.length() || text.codePointAt(startPos) != '[') {
return false; return startPos;
} }
int addressEnd = text.indexOf(']'); int addressEnd = text.indexOf(']');
if (addressEnd == -1 || addressEnd >= authorityEnd) { if (addressEnd == -1) {
return false; return startPos;
} }
// Actual parsing // Actual parsing
@ -191,7 +159,7 @@ class HttpUriParser implements UriParser {
// Check segment separator // Check segment separator
if (beginSegmentsCount > 0) { if (beginSegmentsCount > 0) {
if (text.codePointAt(currentPos) != ':') { if (text.codePointAt(currentPos) != ':') {
return false; return startPos;
} else { } else {
++currentPos; ++currentPos;
} }
@ -201,7 +169,7 @@ class HttpUriParser implements UriParser {
int possibleSegmentEnd = int possibleSegmentEnd =
parse16BitHexSegment(text, currentPos, Math.min(currentPos + 4, compressionPos)); parse16BitHexSegment(text, currentPos, Math.min(currentPos + 4, compressionPos));
if (possibleSegmentEnd == currentPos) { if (possibleSegmentEnd == currentPos) {
return false; return startPos;
} }
currentPos = possibleSegmentEnd; currentPos = possibleSegmentEnd;
++beginSegmentsCount; ++beginSegmentsCount;
@ -215,7 +183,7 @@ class HttpUriParser implements UriParser {
// Check segment separator // Check segment separator
if (endSegmentsCount > 0) { if (endSegmentsCount > 0) {
if (text.codePointAt(currentPos) != ':') { if (text.codePointAt(currentPos) != ':') {
return false; return startPos;
} else { } else {
++currentPos; ++currentPos;
} }
@ -230,7 +198,7 @@ class HttpUriParser implements UriParser {
// Parse segment // Parse segment
int possibleSegmentEnd = parse16BitHexSegment(text, currentPos, Math.min(currentPos + 4, addressEnd)); int possibleSegmentEnd = parse16BitHexSegment(text, currentPos, Math.min(currentPos + 4, addressEnd));
if (possibleSegmentEnd == currentPos) { if (possibleSegmentEnd == currentPos) {
return false; return startPos;
} }
currentPos = possibleSegmentEnd; currentPos = possibleSegmentEnd;
++endSegmentsCount; ++endSegmentsCount;
@ -245,34 +213,31 @@ class HttpUriParser implements UriParser {
// Only optional port left, skip address bracket // Only optional port left, skip address bracket
++currentPos; ++currentPos;
} else { } else {
return false; return startPos;
} }
} else { } else {
// 3) Still some stuff missing, check for IPv4 as tail necessary // 3) Still some stuff missing, check for IPv4 as tail necessary
if (!tryMatchIpv4Address(text, currentPos, addressEnd, false)) { if (tryMatchIpv4Address(text, currentPos, false) != addressEnd) {
return false; return startPos;
} }
currentPos = addressEnd + 1; currentPos = addressEnd + 1;
} }
// Check optional port // Check optional port
if (currentPos == authorityEnd) { if (currentPos == text.length() || text.codePointAt(currentPos) != ':') {
return true; return currentPos;
}
if (text.codePointAt(currentPos) != ':' || currentPos + 1 == authorityEnd) {
return false;
} }
++currentPos; ++currentPos;
int port = 0; int port = 0;
for (int i = currentPos; i < authorityEnd; i++) { for (; currentPos < text.length(); currentPos++) {
int c = text.codePointAt(i); int c = text.codePointAt(currentPos);
if (c < '0' || c > '9') { if (c < '0' || c > '9') {
return false; break;
} }
port = port * 10 + c - '0'; port = port * 10 + c - '0';
} }
return port <= 65535; return (port <= 65535) ? currentPos : startPos;
} }
private int parse16BitHexSegment(String text, int startPos, int endPos) { private int parse16BitHexSegment(String text, int startPos, int endPos) {

View file

@ -13,7 +13,7 @@ import android.text.TextUtils;
public class UriLinkifier { public class UriLinkifier {
private static final Pattern URI_SCHEME; private static final Pattern URI_SCHEME;
private static final Map<String, UriParser> SUPPORTED_URIS; private static final Map<String, UriParser> SUPPORTED_URIS;
private static final String SCHEME_SEPARATORS = " ("; private static final String SCHEME_SEPARATORS = " (\\n";
private static final String ALLOWED_SEPARATORS_PATTERN = "(?:^|[" + SCHEME_SEPARATORS + "])"; private static final String ALLOWED_SEPARATORS_PATTERN = "(?:^|[" + SCHEME_SEPARATORS + "])";
static { static {

View file

@ -12,11 +12,26 @@ public class HttpUriParserTest {
private final StringBuffer outputBuffer = new StringBuffer(); private final StringBuffer outputBuffer = new StringBuffer();
@Test
public void emptyUriIgnored() {
assertLinkIgnored("http://");
}
@Test
public void emptyAuthorityIgnored() {
assertLinkIgnored("http:///");
}
@Test @Test
public void simpleDomain() { public void simpleDomain() {
assertLinkify("http://www.google.com"); assertLinkify("http://www.google.com");
} }
@Test
public void invalidDomainIgnored() {
assertLinkIgnored("http://-www.google.com");
}
@Test @Test
public void domainWithTrailingSlash() { public void domainWithTrailingSlash() {
assertLinkify("http://www.google.com/"); assertLinkify("http://www.google.com/");
@ -102,6 +117,16 @@ public class HttpUriParserTest {
assertLinkify("http://[::192.9.5.5]:80/"); assertLinkify("http://[::192.9.5.5]:80/");
} }
@Test
public void ipv6WithoutClosingSquareBracketIgnored() {
assertLinkIgnored("http://[1080:0:0:0:8:80:200C:417A/");
}
@Test
public void ipv6InvalidClosingSquareBracketIgnored() {
assertLinkIgnored("http://[1080:0:0:0:8:800:270C:417A/]");
}
@Test @Test
public void domainWithTrailingSpace() { public void domainWithTrailingSpace() {
String text = "http://google.com/ "; String text = "http://google.com/ ";
@ -133,7 +158,7 @@ public class HttpUriParserTest {
} }
@Test @Test
public void uriInMiddleOfInput() throws Exception { public void uriInMiddleAfterInput() {
String prefix = "prefix "; String prefix = "prefix ";
String uri = "http://google.com/"; String uri = "http://google.com/";
String text = prefix + uri; String text = prefix + uri;
@ -143,6 +168,18 @@ public class HttpUriParserTest {
assertLinkOnly(uri, outputBuffer); assertLinkOnly(uri, outputBuffer);
} }
@Test
public void uriInMiddleOfInput() {
String prefix = "prefix ";
String uri = "http://google.com/";
String postfix = " postfix";
String text = prefix + uri + postfix;
parser.linkifyUri(text, prefix.length(), outputBuffer);
assertLinkOnly(uri, outputBuffer);
}
int linkify(String uri) { int linkify(String uri) {
return parser.linkifyUri(uri, 0, outputBuffer); return parser.linkifyUri(uri, 0, outputBuffer);
@ -152,4 +189,11 @@ public class HttpUriParserTest {
linkify(uri); linkify(uri);
assertLinkOnly(uri, outputBuffer); assertLinkOnly(uri, outputBuffer);
} }
void assertLinkIgnored(String uri) {
int endPos = linkify(uri);
assertEquals("", outputBuffer.toString());
assertEquals(0, endPos);
}
} }

View file

@ -117,11 +117,23 @@ public class UriLinkifierTest {
} }
@Test @Test
public void schemaMatchWithInvalidUriInMiddleOfTextFollowedVyValidUri() throws Exception { public void schemaMatchWithInvalidUriInMiddleOfTextFollowedByValidUri() {
String text = "prefix http:42 http://example.org"; String text = "prefix http:42 http://example.org";
UriLinkifier.linkifyText(text, outputBuffer); UriLinkifier.linkifyText(text, outputBuffer);
assertEquals("prefix http:42 <a href=\"http://example.org\">http://example.org</a>", outputBuffer.toString()); assertEquals("prefix http:42 <a href=\"http://example.org\">http://example.org</a>", outputBuffer.toString());
} }
@Test
public void multipleValidUrisInRow() {
String text = "prefix http://uri1.example.org some text http://uri2.example.org/path postfix";
UriLinkifier.linkifyText(text, outputBuffer);
assertEquals(
"prefix <a href=\"http://uri1.example.org\">http://uri1.example.org</a> some text " +
"<a href=\"http://uri2.example.org/path\">http://uri2.example.org/path</a> postfix",
outputBuffer.toString());
}
} }