Switched to "classic" domain name detection and added multiple tests.

This commit is contained in:
Tim Bolender 2017-03-21 11:59:21 +01:00
parent cf9c3d078e
commit 9d3cc8ed00
4 changed files with 107 additions and 86 deletions

View file

@ -1,8 +1,6 @@
package com.fsck.k9.message.html;
import java.net.IDN;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -18,6 +16,8 @@ import java.util.regex.Pattern;
class HttpUriParser implements UriParser {
// This string represent character group sub-delim as described in RFC 3986
private static final String SUB_DELIM = "!$&'()*+,;=";
private static final Pattern DOMAIN_PATTERN =
Pattern.compile("\\w([\\w-]*\\w)*(\\.\\w([\\w-]*\\w)*)*(:(\\d{0,5}))?");
private static final Pattern IPv4_PATTERN =
Pattern.compile("(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})(:(\\d{0,5}))?");
@ -40,19 +40,15 @@ class HttpUriParser implements UriParser {
}
// Authority
int authorityEnd = text.indexOf('/', currentPos);
if (authorityEnd == -1) {
authorityEnd = text.length();
}
currentPos = matchUserInfoIfAvailable(text, currentPos);
currentPos = matchUserInfoIfAvailable(text, currentPos, authorityEnd);
if (!tryMatchDomainName(text, currentPos, authorityEnd) &&
!tryMatchIpv4Address(text, currentPos, authorityEnd, true) &&
!tryMatchIpv6Address(text, currentPos, authorityEnd)) {
int matchedAuthorityEnd = Math.max(tryMatchDomainName(text, currentPos),
Math.max(tryMatchIpv4Address(text, currentPos, true),
tryMatchIpv6Address(text, currentPos)));
if (matchedAuthorityEnd == currentPos) {
return startPos;
}
currentPos = authorityEnd;
currentPos = matchedAuthorityEnd;
// Path
if (currentPos < text.length() && text.charAt(currentPos) == '/') {
@ -79,9 +75,9 @@ class HttpUriParser implements UriParser {
return currentPos;
}
private int matchUserInfoIfAvailable(String text, int startPos, int authorityEnd) {
private int matchUserInfoIfAvailable(String text, int startPos) {
int userInfoEnd = text.indexOf('@', startPos);
if (userInfoEnd != -1 && userInfoEnd < authorityEnd) {
if (userInfoEnd != -1) {
if (matchUnreservedPCTEncodedSubDelimClassesGreedy(text, startPos, ":") != userInfoEnd) {
// Illegal character in user info
return startPos;
@ -91,91 +87,63 @@ class HttpUriParser implements UriParser {
return startPos;
}
private boolean tryMatchDomainName(String text, int startPos, int authorityEnd) {
// Partly from OkHttp's HttpUrl
private int tryMatchDomainName(String text, int startPos) {
try {
// Check for port
int portPos = text.indexOf(':', startPos);
boolean hasPort = portPos != -1 && portPos < authorityEnd;
if (hasPort) {
int port = 0;
for (int i = portPos + 1; i < authorityEnd; i++) {
int c = text.codePointAt(i);
if (c < '0' || c > '9') {
return false;
}
port = port * 10 + c - '0';
}
Matcher matcher = DOMAIN_PATTERN.matcher(text);
if (!matcher.find(startPos) || matcher.start() != startPos) {
return startPos;
}
String portString = matcher.group(matcher.groupCount());
if (portString != null && !portString.isEmpty()) {
int port = Integer.parseInt(portString);
if (port > 65535) {
return false;
return startPos;
}
}
// Check actual domain
String result = IDN.toASCII(text.substring(startPos, authorityEnd)).toLowerCase(Locale.US);
if (result.isEmpty()) {
return false;
}
// Confirm that the IDN ToASCII result doesn't contain any illegal characters.
for (int i = 0; i < result.length(); i++) {
char c = result.charAt(i);
// The WHATWG Host parsing rules accepts some character codes which are invalid by
// definition for OkHttp's host header checks (and the WHATWG Host syntax definition). Here
// we rule out characters that would cause problems in host headers.
if (c <= '\u001f' || c >= '\u007f') {
return false;
}
// Check for the characters mentioned in the WHATWG Host parsing spec:
// U+0000, U+0009, U+000A, U+000D, U+0020, "#", "%", "/", ":", "?", "@", "[", "\", and "]"
// (excluding the characters covered above).
if (" #%/:?@[\\]".indexOf(c) != -1) {
return false;
}
}
return true;
return matcher.end();
} catch (IllegalArgumentException e) {
return false;
return startPos;
}
}
private boolean tryMatchIpv4Address(String text, int startPos, int authorityEnd, boolean portAllowed) {
Matcher matcher = IPv4_PATTERN.matcher(text.subSequence(startPos, authorityEnd));
if (!matcher.matches()) {
return false;
private int tryMatchIpv4Address(String text, int startPos, boolean portAllowed) {
Matcher matcher = IPv4_PATTERN.matcher(text);
if (!matcher.find(startPos) || matcher.start() != startPos) {
return startPos;
}
for (int i = 1; i <= 4; i++) {
int segment = Integer.parseInt(matcher.group(1));
if (segment > 255) {
return false;
return startPos;
}
}
if (!portAllowed && matcher.group(5) != null) {
return false;
return startPos;
}
String portString = matcher.group(6);
if (portString != null && !portString.isEmpty()) {
int port = Integer.parseInt(portString);
if (port > 65535) {
return false;
return startPos;
}
}
return true;
return matcher.end();
}
private boolean tryMatchIpv6Address(String text, int startPos, int authorityEnd) {
if (text.codePointAt(startPos) != '[') {
return false;
private int tryMatchIpv6Address(String text, int startPos) {
if (startPos == text.length() || text.codePointAt(startPos) != '[') {
return startPos;
}
int addressEnd = text.indexOf(']');
if (addressEnd == -1 || addressEnd >= authorityEnd) {
return false;
if (addressEnd == -1) {
return startPos;
}
// Actual parsing
@ -191,7 +159,7 @@ class HttpUriParser implements UriParser {
// Check segment separator
if (beginSegmentsCount > 0) {
if (text.codePointAt(currentPos) != ':') {
return false;
return startPos;
} else {
++currentPos;
}
@ -201,7 +169,7 @@ class HttpUriParser implements UriParser {
int possibleSegmentEnd =
parse16BitHexSegment(text, currentPos, Math.min(currentPos + 4, compressionPos));
if (possibleSegmentEnd == currentPos) {
return false;
return startPos;
}
currentPos = possibleSegmentEnd;
++beginSegmentsCount;
@ -215,7 +183,7 @@ class HttpUriParser implements UriParser {
// Check segment separator
if (endSegmentsCount > 0) {
if (text.codePointAt(currentPos) != ':') {
return false;
return startPos;
} else {
++currentPos;
}
@ -230,7 +198,7 @@ class HttpUriParser implements UriParser {
// Parse segment
int possibleSegmentEnd = parse16BitHexSegment(text, currentPos, Math.min(currentPos + 4, addressEnd));
if (possibleSegmentEnd == currentPos) {
return false;
return startPos;
}
currentPos = possibleSegmentEnd;
++endSegmentsCount;
@ -245,34 +213,31 @@ class HttpUriParser implements UriParser {
// Only optional port left, skip address bracket
++currentPos;
} else {
return false;
return startPos;
}
} else {
// 3) Still some stuff missing, check for IPv4 as tail necessary
if (!tryMatchIpv4Address(text, currentPos, addressEnd, false)) {
return false;
if (tryMatchIpv4Address(text, currentPos, false) != addressEnd) {
return startPos;
}
currentPos = addressEnd + 1;
}
// Check optional port
if (currentPos == authorityEnd) {
return true;
}
if (text.codePointAt(currentPos) != ':' || currentPos + 1 == authorityEnd) {
return false;
if (currentPos == text.length() || text.codePointAt(currentPos) != ':') {
return currentPos;
}
++currentPos;
int port = 0;
for (int i = currentPos; i < authorityEnd; i++) {
int c = text.codePointAt(i);
for (; currentPos < text.length(); currentPos++) {
int c = text.codePointAt(currentPos);
if (c < '0' || c > '9') {
return false;
break;
}
port = port * 10 + c - '0';
}
return port <= 65535;
return (port <= 65535) ? currentPos : startPos;
}
private int parse16BitHexSegment(String text, int startPos, int endPos) {

View file

@ -13,7 +13,7 @@ import android.text.TextUtils;
public class UriLinkifier {
private static final Pattern URI_SCHEME;
private static final Map<String, UriParser> SUPPORTED_URIS;
private static final String SCHEME_SEPARATORS = " (";
private static final String SCHEME_SEPARATORS = " (\\n";
private static final String ALLOWED_SEPARATORS_PATTERN = "(?:^|[" + SCHEME_SEPARATORS + "])";
static {

View file

@ -12,11 +12,26 @@ public class HttpUriParserTest {
private final StringBuffer outputBuffer = new StringBuffer();
@Test
public void emptyUriIgnored() {
assertLinkIgnored("http://");
}
@Test
public void emptyAuthorityIgnored() {
assertLinkIgnored("http:///");
}
@Test
public void simpleDomain() {
assertLinkify("http://www.google.com");
}
@Test
public void invalidDomainIgnored() {
assertLinkIgnored("http://-www.google.com");
}
@Test
public void domainWithTrailingSlash() {
assertLinkify("http://www.google.com/");
@ -102,6 +117,16 @@ public class HttpUriParserTest {
assertLinkify("http://[::192.9.5.5]:80/");
}
@Test
public void ipv6WithoutClosingSquareBracketIgnored() {
assertLinkIgnored("http://[1080:0:0:0:8:80:200C:417A/");
}
@Test
public void ipv6InvalidClosingSquareBracketIgnored() {
assertLinkIgnored("http://[1080:0:0:0:8:800:270C:417A/]");
}
@Test
public void domainWithTrailingSpace() {
String text = "http://google.com/ ";
@ -133,7 +158,7 @@ public class HttpUriParserTest {
}
@Test
public void uriInMiddleOfInput() throws Exception {
public void uriInMiddleAfterInput() {
String prefix = "prefix ";
String uri = "http://google.com/";
String text = prefix + uri;
@ -143,6 +168,18 @@ public class HttpUriParserTest {
assertLinkOnly(uri, outputBuffer);
}
@Test
public void uriInMiddleOfInput() {
String prefix = "prefix ";
String uri = "http://google.com/";
String postfix = " postfix";
String text = prefix + uri + postfix;
parser.linkifyUri(text, prefix.length(), outputBuffer);
assertLinkOnly(uri, outputBuffer);
}
int linkify(String uri) {
return parser.linkifyUri(uri, 0, outputBuffer);
@ -152,4 +189,11 @@ public class HttpUriParserTest {
linkify(uri);
assertLinkOnly(uri, outputBuffer);
}
void assertLinkIgnored(String uri) {
int endPos = linkify(uri);
assertEquals("", outputBuffer.toString());
assertEquals(0, endPos);
}
}

View file

@ -117,11 +117,23 @@ public class UriLinkifierTest {
}
@Test
public void schemaMatchWithInvalidUriInMiddleOfTextFollowedVyValidUri() throws Exception {
public void schemaMatchWithInvalidUriInMiddleOfTextFollowedByValidUri() {
String text = "prefix http:42 http://example.org";
UriLinkifier.linkifyText(text, outputBuffer);
assertEquals("prefix http:42 <a href=\"http://example.org\">http://example.org</a>", outputBuffer.toString());
}
@Test
public void multipleValidUrisInRow() {
String text = "prefix http://uri1.example.org some text http://uri2.example.org/path postfix";
UriLinkifier.linkifyText(text, outputBuffer);
assertEquals(
"prefix <a href=\"http://uri1.example.org\">http://uri1.example.org</a> some text " +
"<a href=\"http://uri2.example.org/path\">http://uri2.example.org/path</a> postfix",
outputBuffer.toString());
}
}