From 9d3cc8ed00a4f058b21318d125b96892cafc7d27 Mon Sep 17 00:00:00 2001 From: Tim Bolender Date: Tue, 21 Mar 2017 11:59:21 +0100 Subject: [PATCH] Switched to "classic" domain name detection and added multiple tests. --- .../fsck/k9/message/html/HttpUriParser.java | 131 +++++++----------- .../fsck/k9/message/html/UriLinkifier.java | 2 +- .../k9/message/html/HttpUriParserTest.java | 46 +++++- .../k9/message/html/UriLinkifierTest.java | 14 +- 4 files changed, 107 insertions(+), 86 deletions(-) diff --git a/k9mail/src/main/java/com/fsck/k9/message/html/HttpUriParser.java b/k9mail/src/main/java/com/fsck/k9/message/html/HttpUriParser.java index c4070ed0a..11133bb19 100644 --- a/k9mail/src/main/java/com/fsck/k9/message/html/HttpUriParser.java +++ b/k9mail/src/main/java/com/fsck/k9/message/html/HttpUriParser.java @@ -1,8 +1,6 @@ package com.fsck.k9.message.html; -import java.net.IDN; -import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -18,6 +16,8 @@ import java.util.regex.Pattern; class HttpUriParser implements UriParser { // This string represent character group sub-delim as described in RFC 3986 private static final String SUB_DELIM = "!$&'()*+,;="; + private static final Pattern DOMAIN_PATTERN = + Pattern.compile("\\w([\\w-]*\\w)*(\\.\\w([\\w-]*\\w)*)*(:(\\d{0,5}))?"); private static final Pattern IPv4_PATTERN = Pattern.compile("(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})(:(\\d{0,5}))?"); @@ -40,19 +40,15 @@ class HttpUriParser implements UriParser { } // Authority - int authorityEnd = text.indexOf('/', currentPos); - if (authorityEnd == -1) { - authorityEnd = text.length(); - } + currentPos = matchUserInfoIfAvailable(text, currentPos); - currentPos = matchUserInfoIfAvailable(text, currentPos, authorityEnd); - - if (!tryMatchDomainName(text, currentPos, authorityEnd) && - !tryMatchIpv4Address(text, currentPos, authorityEnd, true) && - !tryMatchIpv6Address(text, currentPos, authorityEnd)) { + int matchedAuthorityEnd = Math.max(tryMatchDomainName(text, currentPos), + Math.max(tryMatchIpv4Address(text, currentPos, true), + tryMatchIpv6Address(text, currentPos))); + if (matchedAuthorityEnd == currentPos) { return startPos; } - currentPos = authorityEnd; + currentPos = matchedAuthorityEnd; // Path if (currentPos < text.length() && text.charAt(currentPos) == '/') { @@ -79,9 +75,9 @@ class HttpUriParser implements UriParser { return currentPos; } - private int matchUserInfoIfAvailable(String text, int startPos, int authorityEnd) { + private int matchUserInfoIfAvailable(String text, int startPos) { int userInfoEnd = text.indexOf('@', startPos); - if (userInfoEnd != -1 && userInfoEnd < authorityEnd) { + if (userInfoEnd != -1) { if (matchUnreservedPCTEncodedSubDelimClassesGreedy(text, startPos, ":") != userInfoEnd) { // Illegal character in user info return startPos; @@ -91,91 +87,63 @@ class HttpUriParser implements UriParser { return startPos; } - private boolean tryMatchDomainName(String text, int startPos, int authorityEnd) { - // Partly from OkHttp's HttpUrl + private int tryMatchDomainName(String text, int startPos) { try { - // Check for port - int portPos = text.indexOf(':', startPos); - boolean hasPort = portPos != -1 && portPos < authorityEnd; - if (hasPort) { - int port = 0; - for (int i = portPos + 1; i < authorityEnd; i++) { - int c = text.codePointAt(i); - if (c < '0' || c > '9') { - return false; - } - port = port * 10 + c - '0'; - } + Matcher matcher = DOMAIN_PATTERN.matcher(text); + if (!matcher.find(startPos) || matcher.start() != startPos) { + return startPos; + } + + String portString = matcher.group(matcher.groupCount()); + if (portString != null && !portString.isEmpty()) { + int port = Integer.parseInt(portString); if (port > 65535) { - return false; + return startPos; } } - // Check actual domain - String result = IDN.toASCII(text.substring(startPos, authorityEnd)).toLowerCase(Locale.US); - if (result.isEmpty()) { - return false; - } - - // Confirm that the IDN ToASCII result doesn't contain any illegal characters. - for (int i = 0; i < result.length(); i++) { - char c = result.charAt(i); - // The WHATWG Host parsing rules accepts some character codes which are invalid by - // definition for OkHttp's host header checks (and the WHATWG Host syntax definition). Here - // we rule out characters that would cause problems in host headers. - if (c <= '\u001f' || c >= '\u007f') { - return false; - } - // Check for the characters mentioned in the WHATWG Host parsing spec: - // U+0000, U+0009, U+000A, U+000D, U+0020, "#", "%", "/", ":", "?", "@", "[", "\", and "]" - // (excluding the characters covered above). - if (" #%/:?@[\\]".indexOf(c) != -1) { - return false; - } - } - - return true; + return matcher.end(); } catch (IllegalArgumentException e) { - return false; + return startPos; } } - private boolean tryMatchIpv4Address(String text, int startPos, int authorityEnd, boolean portAllowed) { - Matcher matcher = IPv4_PATTERN.matcher(text.subSequence(startPos, authorityEnd)); - if (!matcher.matches()) { - return false; + private int tryMatchIpv4Address(String text, int startPos, boolean portAllowed) { + Matcher matcher = IPv4_PATTERN.matcher(text); + if (!matcher.find(startPos) || matcher.start() != startPos) { + return startPos; } for (int i = 1; i <= 4; i++) { int segment = Integer.parseInt(matcher.group(1)); if (segment > 255) { - return false; + return startPos; } } if (!portAllowed && matcher.group(5) != null) { - return false; + return startPos; } String portString = matcher.group(6); if (portString != null && !portString.isEmpty()) { int port = Integer.parseInt(portString); if (port > 65535) { - return false; + return startPos; } } - return true; + return matcher.end(); } - private boolean tryMatchIpv6Address(String text, int startPos, int authorityEnd) { - if (text.codePointAt(startPos) != '[') { - return false; + private int tryMatchIpv6Address(String text, int startPos) { + if (startPos == text.length() || text.codePointAt(startPos) != '[') { + return startPos; } int addressEnd = text.indexOf(']'); - if (addressEnd == -1 || addressEnd >= authorityEnd) { - return false; + if (addressEnd == -1) { + return startPos; } // Actual parsing @@ -191,7 +159,7 @@ class HttpUriParser implements UriParser { // Check segment separator if (beginSegmentsCount > 0) { if (text.codePointAt(currentPos) != ':') { - return false; + return startPos; } else { ++currentPos; } @@ -201,7 +169,7 @@ class HttpUriParser implements UriParser { int possibleSegmentEnd = parse16BitHexSegment(text, currentPos, Math.min(currentPos + 4, compressionPos)); if (possibleSegmentEnd == currentPos) { - return false; + return startPos; } currentPos = possibleSegmentEnd; ++beginSegmentsCount; @@ -215,7 +183,7 @@ class HttpUriParser implements UriParser { // Check segment separator if (endSegmentsCount > 0) { if (text.codePointAt(currentPos) != ':') { - return false; + return startPos; } else { ++currentPos; } @@ -230,7 +198,7 @@ class HttpUriParser implements UriParser { // Parse segment int possibleSegmentEnd = parse16BitHexSegment(text, currentPos, Math.min(currentPos + 4, addressEnd)); if (possibleSegmentEnd == currentPos) { - return false; + return startPos; } currentPos = possibleSegmentEnd; ++endSegmentsCount; @@ -245,34 +213,31 @@ class HttpUriParser implements UriParser { // Only optional port left, skip address bracket ++currentPos; } else { - return false; + return startPos; } } else { // 3) Still some stuff missing, check for IPv4 as tail necessary - if (!tryMatchIpv4Address(text, currentPos, addressEnd, false)) { - return false; + if (tryMatchIpv4Address(text, currentPos, false) != addressEnd) { + return startPos; } currentPos = addressEnd + 1; } // Check optional port - if (currentPos == authorityEnd) { - return true; - } - if (text.codePointAt(currentPos) != ':' || currentPos + 1 == authorityEnd) { - return false; + if (currentPos == text.length() || text.codePointAt(currentPos) != ':') { + return currentPos; } ++currentPos; int port = 0; - for (int i = currentPos; i < authorityEnd; i++) { - int c = text.codePointAt(i); + for (; currentPos < text.length(); currentPos++) { + int c = text.codePointAt(currentPos); if (c < '0' || c > '9') { - return false; + break; } port = port * 10 + c - '0'; } - return port <= 65535; + return (port <= 65535) ? currentPos : startPos; } private int parse16BitHexSegment(String text, int startPos, int endPos) { diff --git a/k9mail/src/main/java/com/fsck/k9/message/html/UriLinkifier.java b/k9mail/src/main/java/com/fsck/k9/message/html/UriLinkifier.java index 6a25679da..01ee239e6 100644 --- a/k9mail/src/main/java/com/fsck/k9/message/html/UriLinkifier.java +++ b/k9mail/src/main/java/com/fsck/k9/message/html/UriLinkifier.java @@ -13,7 +13,7 @@ import android.text.TextUtils; public class UriLinkifier { private static final Pattern URI_SCHEME; private static final Map SUPPORTED_URIS; - private static final String SCHEME_SEPARATORS = " ("; + private static final String SCHEME_SEPARATORS = " (\\n"; private static final String ALLOWED_SEPARATORS_PATTERN = "(?:^|[" + SCHEME_SEPARATORS + "])"; static { diff --git a/k9mail/src/test/java/com/fsck/k9/message/html/HttpUriParserTest.java b/k9mail/src/test/java/com/fsck/k9/message/html/HttpUriParserTest.java index 159e4ddb1..4ae455b90 100644 --- a/k9mail/src/test/java/com/fsck/k9/message/html/HttpUriParserTest.java +++ b/k9mail/src/test/java/com/fsck/k9/message/html/HttpUriParserTest.java @@ -12,11 +12,26 @@ public class HttpUriParserTest { private final StringBuffer outputBuffer = new StringBuffer(); + @Test + public void emptyUriIgnored() { + assertLinkIgnored("http://"); + } + + @Test + public void emptyAuthorityIgnored() { + assertLinkIgnored("http:///"); + } + @Test public void simpleDomain() { assertLinkify("http://www.google.com"); } + @Test + public void invalidDomainIgnored() { + assertLinkIgnored("http://-www.google.com"); + } + @Test public void domainWithTrailingSlash() { assertLinkify("http://www.google.com/"); @@ -102,6 +117,16 @@ public class HttpUriParserTest { assertLinkify("http://[::192.9.5.5]:80/"); } + @Test + public void ipv6WithoutClosingSquareBracketIgnored() { + assertLinkIgnored("http://[1080:0:0:0:8:80:200C:417A/"); + } + + @Test + public void ipv6InvalidClosingSquareBracketIgnored() { + assertLinkIgnored("http://[1080:0:0:0:8:800:270C:417A/]"); + } + @Test public void domainWithTrailingSpace() { String text = "http://google.com/ "; @@ -133,7 +158,7 @@ public class HttpUriParserTest { } @Test - public void uriInMiddleOfInput() throws Exception { + public void uriInMiddleAfterInput() { String prefix = "prefix "; String uri = "http://google.com/"; String text = prefix + uri; @@ -143,6 +168,18 @@ public class HttpUriParserTest { assertLinkOnly(uri, outputBuffer); } + @Test + public void uriInMiddleOfInput() { + String prefix = "prefix "; + String uri = "http://google.com/"; + String postfix = " postfix"; + String text = prefix + uri + postfix; + + parser.linkifyUri(text, prefix.length(), outputBuffer); + + assertLinkOnly(uri, outputBuffer); + } + int linkify(String uri) { return parser.linkifyUri(uri, 0, outputBuffer); @@ -152,4 +189,11 @@ public class HttpUriParserTest { linkify(uri); assertLinkOnly(uri, outputBuffer); } + + void assertLinkIgnored(String uri) { + int endPos = linkify(uri); + + assertEquals("", outputBuffer.toString()); + assertEquals(0, endPos); + } } diff --git a/k9mail/src/test/java/com/fsck/k9/message/html/UriLinkifierTest.java b/k9mail/src/test/java/com/fsck/k9/message/html/UriLinkifierTest.java index 1e86b45e9..3b02dc035 100644 --- a/k9mail/src/test/java/com/fsck/k9/message/html/UriLinkifierTest.java +++ b/k9mail/src/test/java/com/fsck/k9/message/html/UriLinkifierTest.java @@ -117,11 +117,23 @@ public class UriLinkifierTest { } @Test - public void schemaMatchWithInvalidUriInMiddleOfTextFollowedVyValidUri() throws Exception { + public void schemaMatchWithInvalidUriInMiddleOfTextFollowedByValidUri() { String text = "prefix http:42 http://example.org"; UriLinkifier.linkifyText(text, outputBuffer); assertEquals("prefix http:42 http://example.org", outputBuffer.toString()); } + + @Test + public void multipleValidUrisInRow() { + String text = "prefix http://uri1.example.org some text http://uri2.example.org/path postfix"; + + UriLinkifier.linkifyText(text, outputBuffer); + + assertEquals( + "prefix http://uri1.example.org some text " + + "http://uri2.example.org/path postfix", + outputBuffer.toString()); + } }