Switched to "classic" domain name detection and added multiple tests.
This commit is contained in:
parent
cf9c3d078e
commit
9d3cc8ed00
4 changed files with 107 additions and 86 deletions
|
@ -1,8 +1,6 @@
|
|||
package com.fsck.k9.message.html;
|
||||
|
||||
|
||||
import java.net.IDN;
|
||||
import java.util.Locale;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
@ -18,6 +16,8 @@ import java.util.regex.Pattern;
|
|||
class HttpUriParser implements UriParser {
|
||||
// This string represent character group sub-delim as described in RFC 3986
|
||||
private static final String SUB_DELIM = "!$&'()*+,;=";
|
||||
private static final Pattern DOMAIN_PATTERN =
|
||||
Pattern.compile("\\w([\\w-]*\\w)*(\\.\\w([\\w-]*\\w)*)*(:(\\d{0,5}))?");
|
||||
private static final Pattern IPv4_PATTERN =
|
||||
Pattern.compile("(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})(:(\\d{0,5}))?");
|
||||
|
||||
|
@ -40,19 +40,15 @@ class HttpUriParser implements UriParser {
|
|||
}
|
||||
|
||||
// Authority
|
||||
int authorityEnd = text.indexOf('/', currentPos);
|
||||
if (authorityEnd == -1) {
|
||||
authorityEnd = text.length();
|
||||
}
|
||||
currentPos = matchUserInfoIfAvailable(text, currentPos);
|
||||
|
||||
currentPos = matchUserInfoIfAvailable(text, currentPos, authorityEnd);
|
||||
|
||||
if (!tryMatchDomainName(text, currentPos, authorityEnd) &&
|
||||
!tryMatchIpv4Address(text, currentPos, authorityEnd, true) &&
|
||||
!tryMatchIpv6Address(text, currentPos, authorityEnd)) {
|
||||
int matchedAuthorityEnd = Math.max(tryMatchDomainName(text, currentPos),
|
||||
Math.max(tryMatchIpv4Address(text, currentPos, true),
|
||||
tryMatchIpv6Address(text, currentPos)));
|
||||
if (matchedAuthorityEnd == currentPos) {
|
||||
return startPos;
|
||||
}
|
||||
currentPos = authorityEnd;
|
||||
currentPos = matchedAuthorityEnd;
|
||||
|
||||
// Path
|
||||
if (currentPos < text.length() && text.charAt(currentPos) == '/') {
|
||||
|
@ -79,9 +75,9 @@ class HttpUriParser implements UriParser {
|
|||
return currentPos;
|
||||
}
|
||||
|
||||
private int matchUserInfoIfAvailable(String text, int startPos, int authorityEnd) {
|
||||
private int matchUserInfoIfAvailable(String text, int startPos) {
|
||||
int userInfoEnd = text.indexOf('@', startPos);
|
||||
if (userInfoEnd != -1 && userInfoEnd < authorityEnd) {
|
||||
if (userInfoEnd != -1) {
|
||||
if (matchUnreservedPCTEncodedSubDelimClassesGreedy(text, startPos, ":") != userInfoEnd) {
|
||||
// Illegal character in user info
|
||||
return startPos;
|
||||
|
@ -91,91 +87,63 @@ class HttpUriParser implements UriParser {
|
|||
return startPos;
|
||||
}
|
||||
|
||||
private boolean tryMatchDomainName(String text, int startPos, int authorityEnd) {
|
||||
// Partly from OkHttp's HttpUrl
|
||||
private int tryMatchDomainName(String text, int startPos) {
|
||||
try {
|
||||
// Check for port
|
||||
int portPos = text.indexOf(':', startPos);
|
||||
boolean hasPort = portPos != -1 && portPos < authorityEnd;
|
||||
if (hasPort) {
|
||||
int port = 0;
|
||||
for (int i = portPos + 1; i < authorityEnd; i++) {
|
||||
int c = text.codePointAt(i);
|
||||
if (c < '0' || c > '9') {
|
||||
return false;
|
||||
}
|
||||
port = port * 10 + c - '0';
|
||||
}
|
||||
Matcher matcher = DOMAIN_PATTERN.matcher(text);
|
||||
if (!matcher.find(startPos) || matcher.start() != startPos) {
|
||||
return startPos;
|
||||
}
|
||||
|
||||
String portString = matcher.group(matcher.groupCount());
|
||||
if (portString != null && !portString.isEmpty()) {
|
||||
int port = Integer.parseInt(portString);
|
||||
if (port > 65535) {
|
||||
return false;
|
||||
return startPos;
|
||||
}
|
||||
}
|
||||
|
||||
// Check actual domain
|
||||
String result = IDN.toASCII(text.substring(startPos, authorityEnd)).toLowerCase(Locale.US);
|
||||
if (result.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Confirm that the IDN ToASCII result doesn't contain any illegal characters.
|
||||
for (int i = 0; i < result.length(); i++) {
|
||||
char c = result.charAt(i);
|
||||
// The WHATWG Host parsing rules accepts some character codes which are invalid by
|
||||
// definition for OkHttp's host header checks (and the WHATWG Host syntax definition). Here
|
||||
// we rule out characters that would cause problems in host headers.
|
||||
if (c <= '\u001f' || c >= '\u007f') {
|
||||
return false;
|
||||
}
|
||||
// Check for the characters mentioned in the WHATWG Host parsing spec:
|
||||
// U+0000, U+0009, U+000A, U+000D, U+0020, "#", "%", "/", ":", "?", "@", "[", "\", and "]"
|
||||
// (excluding the characters covered above).
|
||||
if (" #%/:?@[\\]".indexOf(c) != -1) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return matcher.end();
|
||||
} catch (IllegalArgumentException e) {
|
||||
return false;
|
||||
return startPos;
|
||||
}
|
||||
}
|
||||
|
||||
private boolean tryMatchIpv4Address(String text, int startPos, int authorityEnd, boolean portAllowed) {
|
||||
Matcher matcher = IPv4_PATTERN.matcher(text.subSequence(startPos, authorityEnd));
|
||||
if (!matcher.matches()) {
|
||||
return false;
|
||||
private int tryMatchIpv4Address(String text, int startPos, boolean portAllowed) {
|
||||
Matcher matcher = IPv4_PATTERN.matcher(text);
|
||||
if (!matcher.find(startPos) || matcher.start() != startPos) {
|
||||
return startPos;
|
||||
}
|
||||
|
||||
for (int i = 1; i <= 4; i++) {
|
||||
int segment = Integer.parseInt(matcher.group(1));
|
||||
if (segment > 255) {
|
||||
return false;
|
||||
return startPos;
|
||||
}
|
||||
}
|
||||
|
||||
if (!portAllowed && matcher.group(5) != null) {
|
||||
return false;
|
||||
return startPos;
|
||||
}
|
||||
|
||||
String portString = matcher.group(6);
|
||||
if (portString != null && !portString.isEmpty()) {
|
||||
int port = Integer.parseInt(portString);
|
||||
if (port > 65535) {
|
||||
return false;
|
||||
return startPos;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return matcher.end();
|
||||
}
|
||||
|
||||
private boolean tryMatchIpv6Address(String text, int startPos, int authorityEnd) {
|
||||
if (text.codePointAt(startPos) != '[') {
|
||||
return false;
|
||||
private int tryMatchIpv6Address(String text, int startPos) {
|
||||
if (startPos == text.length() || text.codePointAt(startPos) != '[') {
|
||||
return startPos;
|
||||
}
|
||||
|
||||
int addressEnd = text.indexOf(']');
|
||||
if (addressEnd == -1 || addressEnd >= authorityEnd) {
|
||||
return false;
|
||||
if (addressEnd == -1) {
|
||||
return startPos;
|
||||
}
|
||||
|
||||
// Actual parsing
|
||||
|
@ -191,7 +159,7 @@ class HttpUriParser implements UriParser {
|
|||
// Check segment separator
|
||||
if (beginSegmentsCount > 0) {
|
||||
if (text.codePointAt(currentPos) != ':') {
|
||||
return false;
|
||||
return startPos;
|
||||
} else {
|
||||
++currentPos;
|
||||
}
|
||||
|
@ -201,7 +169,7 @@ class HttpUriParser implements UriParser {
|
|||
int possibleSegmentEnd =
|
||||
parse16BitHexSegment(text, currentPos, Math.min(currentPos + 4, compressionPos));
|
||||
if (possibleSegmentEnd == currentPos) {
|
||||
return false;
|
||||
return startPos;
|
||||
}
|
||||
currentPos = possibleSegmentEnd;
|
||||
++beginSegmentsCount;
|
||||
|
@ -215,7 +183,7 @@ class HttpUriParser implements UriParser {
|
|||
// Check segment separator
|
||||
if (endSegmentsCount > 0) {
|
||||
if (text.codePointAt(currentPos) != ':') {
|
||||
return false;
|
||||
return startPos;
|
||||
} else {
|
||||
++currentPos;
|
||||
}
|
||||
|
@ -230,7 +198,7 @@ class HttpUriParser implements UriParser {
|
|||
// Parse segment
|
||||
int possibleSegmentEnd = parse16BitHexSegment(text, currentPos, Math.min(currentPos + 4, addressEnd));
|
||||
if (possibleSegmentEnd == currentPos) {
|
||||
return false;
|
||||
return startPos;
|
||||
}
|
||||
currentPos = possibleSegmentEnd;
|
||||
++endSegmentsCount;
|
||||
|
@ -245,34 +213,31 @@ class HttpUriParser implements UriParser {
|
|||
// Only optional port left, skip address bracket
|
||||
++currentPos;
|
||||
} else {
|
||||
return false;
|
||||
return startPos;
|
||||
}
|
||||
} else {
|
||||
// 3) Still some stuff missing, check for IPv4 as tail necessary
|
||||
if (!tryMatchIpv4Address(text, currentPos, addressEnd, false)) {
|
||||
return false;
|
||||
if (tryMatchIpv4Address(text, currentPos, false) != addressEnd) {
|
||||
return startPos;
|
||||
}
|
||||
currentPos = addressEnd + 1;
|
||||
}
|
||||
|
||||
// Check optional port
|
||||
if (currentPos == authorityEnd) {
|
||||
return true;
|
||||
}
|
||||
if (text.codePointAt(currentPos) != ':' || currentPos + 1 == authorityEnd) {
|
||||
return false;
|
||||
if (currentPos == text.length() || text.codePointAt(currentPos) != ':') {
|
||||
return currentPos;
|
||||
}
|
||||
++currentPos;
|
||||
|
||||
int port = 0;
|
||||
for (int i = currentPos; i < authorityEnd; i++) {
|
||||
int c = text.codePointAt(i);
|
||||
for (; currentPos < text.length(); currentPos++) {
|
||||
int c = text.codePointAt(currentPos);
|
||||
if (c < '0' || c > '9') {
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
port = port * 10 + c - '0';
|
||||
}
|
||||
return port <= 65535;
|
||||
return (port <= 65535) ? currentPos : startPos;
|
||||
}
|
||||
|
||||
private int parse16BitHexSegment(String text, int startPos, int endPos) {
|
||||
|
|
|
@ -13,7 +13,7 @@ import android.text.TextUtils;
|
|||
public class UriLinkifier {
|
||||
private static final Pattern URI_SCHEME;
|
||||
private static final Map<String, UriParser> SUPPORTED_URIS;
|
||||
private static final String SCHEME_SEPARATORS = " (";
|
||||
private static final String SCHEME_SEPARATORS = " (\\n";
|
||||
private static final String ALLOWED_SEPARATORS_PATTERN = "(?:^|[" + SCHEME_SEPARATORS + "])";
|
||||
|
||||
static {
|
||||
|
|
|
@ -12,11 +12,26 @@ public class HttpUriParserTest {
|
|||
private final StringBuffer outputBuffer = new StringBuffer();
|
||||
|
||||
|
||||
@Test
|
||||
public void emptyUriIgnored() {
|
||||
assertLinkIgnored("http://");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void emptyAuthorityIgnored() {
|
||||
assertLinkIgnored("http:///");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void simpleDomain() {
|
||||
assertLinkify("http://www.google.com");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void invalidDomainIgnored() {
|
||||
assertLinkIgnored("http://-www.google.com");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void domainWithTrailingSlash() {
|
||||
assertLinkify("http://www.google.com/");
|
||||
|
@ -102,6 +117,16 @@ public class HttpUriParserTest {
|
|||
assertLinkify("http://[::192.9.5.5]:80/");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void ipv6WithoutClosingSquareBracketIgnored() {
|
||||
assertLinkIgnored("http://[1080:0:0:0:8:80:200C:417A/");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void ipv6InvalidClosingSquareBracketIgnored() {
|
||||
assertLinkIgnored("http://[1080:0:0:0:8:800:270C:417A/]");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void domainWithTrailingSpace() {
|
||||
String text = "http://google.com/ ";
|
||||
|
@ -133,7 +158,7 @@ public class HttpUriParserTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void uriInMiddleOfInput() throws Exception {
|
||||
public void uriInMiddleAfterInput() {
|
||||
String prefix = "prefix ";
|
||||
String uri = "http://google.com/";
|
||||
String text = prefix + uri;
|
||||
|
@ -143,6 +168,18 @@ public class HttpUriParserTest {
|
|||
assertLinkOnly(uri, outputBuffer);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void uriInMiddleOfInput() {
|
||||
String prefix = "prefix ";
|
||||
String uri = "http://google.com/";
|
||||
String postfix = " postfix";
|
||||
String text = prefix + uri + postfix;
|
||||
|
||||
parser.linkifyUri(text, prefix.length(), outputBuffer);
|
||||
|
||||
assertLinkOnly(uri, outputBuffer);
|
||||
}
|
||||
|
||||
|
||||
int linkify(String uri) {
|
||||
return parser.linkifyUri(uri, 0, outputBuffer);
|
||||
|
@ -152,4 +189,11 @@ public class HttpUriParserTest {
|
|||
linkify(uri);
|
||||
assertLinkOnly(uri, outputBuffer);
|
||||
}
|
||||
|
||||
void assertLinkIgnored(String uri) {
|
||||
int endPos = linkify(uri);
|
||||
|
||||
assertEquals("", outputBuffer.toString());
|
||||
assertEquals(0, endPos);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -117,11 +117,23 @@ public class UriLinkifierTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void schemaMatchWithInvalidUriInMiddleOfTextFollowedVyValidUri() throws Exception {
|
||||
public void schemaMatchWithInvalidUriInMiddleOfTextFollowedByValidUri() {
|
||||
String text = "prefix http:42 http://example.org";
|
||||
|
||||
UriLinkifier.linkifyText(text, outputBuffer);
|
||||
|
||||
assertEquals("prefix http:42 <a href=\"http://example.org\">http://example.org</a>", outputBuffer.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void multipleValidUrisInRow() {
|
||||
String text = "prefix http://uri1.example.org some text http://uri2.example.org/path postfix";
|
||||
|
||||
UriLinkifier.linkifyText(text, outputBuffer);
|
||||
|
||||
assertEquals(
|
||||
"prefix <a href=\"http://uri1.example.org\">http://uri1.example.org</a> some text " +
|
||||
"<a href=\"http://uri2.example.org/path\">http://uri2.example.org/path</a> postfix",
|
||||
outputBuffer.toString());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue