Switched to "classic" domain name detection and added multiple tests.
This commit is contained in:
parent
cf9c3d078e
commit
9d3cc8ed00
4 changed files with 107 additions and 86 deletions
|
@ -1,8 +1,6 @@
|
||||||
package com.fsck.k9.message.html;
|
package com.fsck.k9.message.html;
|
||||||
|
|
||||||
|
|
||||||
import java.net.IDN;
|
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
@ -18,6 +16,8 @@ import java.util.regex.Pattern;
|
||||||
class HttpUriParser implements UriParser {
|
class HttpUriParser implements UriParser {
|
||||||
// This string represent character group sub-delim as described in RFC 3986
|
// This string represent character group sub-delim as described in RFC 3986
|
||||||
private static final String SUB_DELIM = "!$&'()*+,;=";
|
private static final String SUB_DELIM = "!$&'()*+,;=";
|
||||||
|
private static final Pattern DOMAIN_PATTERN =
|
||||||
|
Pattern.compile("\\w([\\w-]*\\w)*(\\.\\w([\\w-]*\\w)*)*(:(\\d{0,5}))?");
|
||||||
private static final Pattern IPv4_PATTERN =
|
private static final Pattern IPv4_PATTERN =
|
||||||
Pattern.compile("(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})(:(\\d{0,5}))?");
|
Pattern.compile("(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})(:(\\d{0,5}))?");
|
||||||
|
|
||||||
|
@ -40,19 +40,15 @@ class HttpUriParser implements UriParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Authority
|
// Authority
|
||||||
int authorityEnd = text.indexOf('/', currentPos);
|
currentPos = matchUserInfoIfAvailable(text, currentPos);
|
||||||
if (authorityEnd == -1) {
|
|
||||||
authorityEnd = text.length();
|
|
||||||
}
|
|
||||||
|
|
||||||
currentPos = matchUserInfoIfAvailable(text, currentPos, authorityEnd);
|
int matchedAuthorityEnd = Math.max(tryMatchDomainName(text, currentPos),
|
||||||
|
Math.max(tryMatchIpv4Address(text, currentPos, true),
|
||||||
if (!tryMatchDomainName(text, currentPos, authorityEnd) &&
|
tryMatchIpv6Address(text, currentPos)));
|
||||||
!tryMatchIpv4Address(text, currentPos, authorityEnd, true) &&
|
if (matchedAuthorityEnd == currentPos) {
|
||||||
!tryMatchIpv6Address(text, currentPos, authorityEnd)) {
|
|
||||||
return startPos;
|
return startPos;
|
||||||
}
|
}
|
||||||
currentPos = authorityEnd;
|
currentPos = matchedAuthorityEnd;
|
||||||
|
|
||||||
// Path
|
// Path
|
||||||
if (currentPos < text.length() && text.charAt(currentPos) == '/') {
|
if (currentPos < text.length() && text.charAt(currentPos) == '/') {
|
||||||
|
@ -79,9 +75,9 @@ class HttpUriParser implements UriParser {
|
||||||
return currentPos;
|
return currentPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
private int matchUserInfoIfAvailable(String text, int startPos, int authorityEnd) {
|
private int matchUserInfoIfAvailable(String text, int startPos) {
|
||||||
int userInfoEnd = text.indexOf('@', startPos);
|
int userInfoEnd = text.indexOf('@', startPos);
|
||||||
if (userInfoEnd != -1 && userInfoEnd < authorityEnd) {
|
if (userInfoEnd != -1) {
|
||||||
if (matchUnreservedPCTEncodedSubDelimClassesGreedy(text, startPos, ":") != userInfoEnd) {
|
if (matchUnreservedPCTEncodedSubDelimClassesGreedy(text, startPos, ":") != userInfoEnd) {
|
||||||
// Illegal character in user info
|
// Illegal character in user info
|
||||||
return startPos;
|
return startPos;
|
||||||
|
@ -91,91 +87,63 @@ class HttpUriParser implements UriParser {
|
||||||
return startPos;
|
return startPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean tryMatchDomainName(String text, int startPos, int authorityEnd) {
|
private int tryMatchDomainName(String text, int startPos) {
|
||||||
// Partly from OkHttp's HttpUrl
|
|
||||||
try {
|
try {
|
||||||
// Check for port
|
Matcher matcher = DOMAIN_PATTERN.matcher(text);
|
||||||
int portPos = text.indexOf(':', startPos);
|
if (!matcher.find(startPos) || matcher.start() != startPos) {
|
||||||
boolean hasPort = portPos != -1 && portPos < authorityEnd;
|
return startPos;
|
||||||
if (hasPort) {
|
}
|
||||||
int port = 0;
|
|
||||||
for (int i = portPos + 1; i < authorityEnd; i++) {
|
String portString = matcher.group(matcher.groupCount());
|
||||||
int c = text.codePointAt(i);
|
if (portString != null && !portString.isEmpty()) {
|
||||||
if (c < '0' || c > '9') {
|
int port = Integer.parseInt(portString);
|
||||||
return false;
|
|
||||||
}
|
|
||||||
port = port * 10 + c - '0';
|
|
||||||
}
|
|
||||||
if (port > 65535) {
|
if (port > 65535) {
|
||||||
return false;
|
return startPos;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check actual domain
|
return matcher.end();
|
||||||
String result = IDN.toASCII(text.substring(startPos, authorityEnd)).toLowerCase(Locale.US);
|
|
||||||
if (result.isEmpty()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Confirm that the IDN ToASCII result doesn't contain any illegal characters.
|
|
||||||
for (int i = 0; i < result.length(); i++) {
|
|
||||||
char c = result.charAt(i);
|
|
||||||
// The WHATWG Host parsing rules accepts some character codes which are invalid by
|
|
||||||
// definition for OkHttp's host header checks (and the WHATWG Host syntax definition). Here
|
|
||||||
// we rule out characters that would cause problems in host headers.
|
|
||||||
if (c <= '\u001f' || c >= '\u007f') {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Check for the characters mentioned in the WHATWG Host parsing spec:
|
|
||||||
// U+0000, U+0009, U+000A, U+000D, U+0020, "#", "%", "/", ":", "?", "@", "[", "\", and "]"
|
|
||||||
// (excluding the characters covered above).
|
|
||||||
if (" #%/:?@[\\]".indexOf(c) != -1) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
return false;
|
return startPos;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean tryMatchIpv4Address(String text, int startPos, int authorityEnd, boolean portAllowed) {
|
private int tryMatchIpv4Address(String text, int startPos, boolean portAllowed) {
|
||||||
Matcher matcher = IPv4_PATTERN.matcher(text.subSequence(startPos, authorityEnd));
|
Matcher matcher = IPv4_PATTERN.matcher(text);
|
||||||
if (!matcher.matches()) {
|
if (!matcher.find(startPos) || matcher.start() != startPos) {
|
||||||
return false;
|
return startPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 1; i <= 4; i++) {
|
for (int i = 1; i <= 4; i++) {
|
||||||
int segment = Integer.parseInt(matcher.group(1));
|
int segment = Integer.parseInt(matcher.group(1));
|
||||||
if (segment > 255) {
|
if (segment > 255) {
|
||||||
return false;
|
return startPos;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!portAllowed && matcher.group(5) != null) {
|
if (!portAllowed && matcher.group(5) != null) {
|
||||||
return false;
|
return startPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
String portString = matcher.group(6);
|
String portString = matcher.group(6);
|
||||||
if (portString != null && !portString.isEmpty()) {
|
if (portString != null && !portString.isEmpty()) {
|
||||||
int port = Integer.parseInt(portString);
|
int port = Integer.parseInt(portString);
|
||||||
if (port > 65535) {
|
if (port > 65535) {
|
||||||
return false;
|
return startPos;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return matcher.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean tryMatchIpv6Address(String text, int startPos, int authorityEnd) {
|
private int tryMatchIpv6Address(String text, int startPos) {
|
||||||
if (text.codePointAt(startPos) != '[') {
|
if (startPos == text.length() || text.codePointAt(startPos) != '[') {
|
||||||
return false;
|
return startPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
int addressEnd = text.indexOf(']');
|
int addressEnd = text.indexOf(']');
|
||||||
if (addressEnd == -1 || addressEnd >= authorityEnd) {
|
if (addressEnd == -1) {
|
||||||
return false;
|
return startPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Actual parsing
|
// Actual parsing
|
||||||
|
@ -191,7 +159,7 @@ class HttpUriParser implements UriParser {
|
||||||
// Check segment separator
|
// Check segment separator
|
||||||
if (beginSegmentsCount > 0) {
|
if (beginSegmentsCount > 0) {
|
||||||
if (text.codePointAt(currentPos) != ':') {
|
if (text.codePointAt(currentPos) != ':') {
|
||||||
return false;
|
return startPos;
|
||||||
} else {
|
} else {
|
||||||
++currentPos;
|
++currentPos;
|
||||||
}
|
}
|
||||||
|
@ -201,7 +169,7 @@ class HttpUriParser implements UriParser {
|
||||||
int possibleSegmentEnd =
|
int possibleSegmentEnd =
|
||||||
parse16BitHexSegment(text, currentPos, Math.min(currentPos + 4, compressionPos));
|
parse16BitHexSegment(text, currentPos, Math.min(currentPos + 4, compressionPos));
|
||||||
if (possibleSegmentEnd == currentPos) {
|
if (possibleSegmentEnd == currentPos) {
|
||||||
return false;
|
return startPos;
|
||||||
}
|
}
|
||||||
currentPos = possibleSegmentEnd;
|
currentPos = possibleSegmentEnd;
|
||||||
++beginSegmentsCount;
|
++beginSegmentsCount;
|
||||||
|
@ -215,7 +183,7 @@ class HttpUriParser implements UriParser {
|
||||||
// Check segment separator
|
// Check segment separator
|
||||||
if (endSegmentsCount > 0) {
|
if (endSegmentsCount > 0) {
|
||||||
if (text.codePointAt(currentPos) != ':') {
|
if (text.codePointAt(currentPos) != ':') {
|
||||||
return false;
|
return startPos;
|
||||||
} else {
|
} else {
|
||||||
++currentPos;
|
++currentPos;
|
||||||
}
|
}
|
||||||
|
@ -230,7 +198,7 @@ class HttpUriParser implements UriParser {
|
||||||
// Parse segment
|
// Parse segment
|
||||||
int possibleSegmentEnd = parse16BitHexSegment(text, currentPos, Math.min(currentPos + 4, addressEnd));
|
int possibleSegmentEnd = parse16BitHexSegment(text, currentPos, Math.min(currentPos + 4, addressEnd));
|
||||||
if (possibleSegmentEnd == currentPos) {
|
if (possibleSegmentEnd == currentPos) {
|
||||||
return false;
|
return startPos;
|
||||||
}
|
}
|
||||||
currentPos = possibleSegmentEnd;
|
currentPos = possibleSegmentEnd;
|
||||||
++endSegmentsCount;
|
++endSegmentsCount;
|
||||||
|
@ -245,34 +213,31 @@ class HttpUriParser implements UriParser {
|
||||||
// Only optional port left, skip address bracket
|
// Only optional port left, skip address bracket
|
||||||
++currentPos;
|
++currentPos;
|
||||||
} else {
|
} else {
|
||||||
return false;
|
return startPos;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// 3) Still some stuff missing, check for IPv4 as tail necessary
|
// 3) Still some stuff missing, check for IPv4 as tail necessary
|
||||||
if (!tryMatchIpv4Address(text, currentPos, addressEnd, false)) {
|
if (tryMatchIpv4Address(text, currentPos, false) != addressEnd) {
|
||||||
return false;
|
return startPos;
|
||||||
}
|
}
|
||||||
currentPos = addressEnd + 1;
|
currentPos = addressEnd + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check optional port
|
// Check optional port
|
||||||
if (currentPos == authorityEnd) {
|
if (currentPos == text.length() || text.codePointAt(currentPos) != ':') {
|
||||||
return true;
|
return currentPos;
|
||||||
}
|
|
||||||
if (text.codePointAt(currentPos) != ':' || currentPos + 1 == authorityEnd) {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
++currentPos;
|
++currentPos;
|
||||||
|
|
||||||
int port = 0;
|
int port = 0;
|
||||||
for (int i = currentPos; i < authorityEnd; i++) {
|
for (; currentPos < text.length(); currentPos++) {
|
||||||
int c = text.codePointAt(i);
|
int c = text.codePointAt(currentPos);
|
||||||
if (c < '0' || c > '9') {
|
if (c < '0' || c > '9') {
|
||||||
return false;
|
break;
|
||||||
}
|
}
|
||||||
port = port * 10 + c - '0';
|
port = port * 10 + c - '0';
|
||||||
}
|
}
|
||||||
return port <= 65535;
|
return (port <= 65535) ? currentPos : startPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
private int parse16BitHexSegment(String text, int startPos, int endPos) {
|
private int parse16BitHexSegment(String text, int startPos, int endPos) {
|
||||||
|
|
|
@ -13,7 +13,7 @@ import android.text.TextUtils;
|
||||||
public class UriLinkifier {
|
public class UriLinkifier {
|
||||||
private static final Pattern URI_SCHEME;
|
private static final Pattern URI_SCHEME;
|
||||||
private static final Map<String, UriParser> SUPPORTED_URIS;
|
private static final Map<String, UriParser> SUPPORTED_URIS;
|
||||||
private static final String SCHEME_SEPARATORS = " (";
|
private static final String SCHEME_SEPARATORS = " (\\n";
|
||||||
private static final String ALLOWED_SEPARATORS_PATTERN = "(?:^|[" + SCHEME_SEPARATORS + "])";
|
private static final String ALLOWED_SEPARATORS_PATTERN = "(?:^|[" + SCHEME_SEPARATORS + "])";
|
||||||
|
|
||||||
static {
|
static {
|
||||||
|
|
|
@ -12,11 +12,26 @@ public class HttpUriParserTest {
|
||||||
private final StringBuffer outputBuffer = new StringBuffer();
|
private final StringBuffer outputBuffer = new StringBuffer();
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void emptyUriIgnored() {
|
||||||
|
assertLinkIgnored("http://");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void emptyAuthorityIgnored() {
|
||||||
|
assertLinkIgnored("http:///");
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void simpleDomain() {
|
public void simpleDomain() {
|
||||||
assertLinkify("http://www.google.com");
|
assertLinkify("http://www.google.com");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void invalidDomainIgnored() {
|
||||||
|
assertLinkIgnored("http://-www.google.com");
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void domainWithTrailingSlash() {
|
public void domainWithTrailingSlash() {
|
||||||
assertLinkify("http://www.google.com/");
|
assertLinkify("http://www.google.com/");
|
||||||
|
@ -102,6 +117,16 @@ public class HttpUriParserTest {
|
||||||
assertLinkify("http://[::192.9.5.5]:80/");
|
assertLinkify("http://[::192.9.5.5]:80/");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void ipv6WithoutClosingSquareBracketIgnored() {
|
||||||
|
assertLinkIgnored("http://[1080:0:0:0:8:80:200C:417A/");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void ipv6InvalidClosingSquareBracketIgnored() {
|
||||||
|
assertLinkIgnored("http://[1080:0:0:0:8:800:270C:417A/]");
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void domainWithTrailingSpace() {
|
public void domainWithTrailingSpace() {
|
||||||
String text = "http://google.com/ ";
|
String text = "http://google.com/ ";
|
||||||
|
@ -133,7 +158,7 @@ public class HttpUriParserTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void uriInMiddleOfInput() throws Exception {
|
public void uriInMiddleAfterInput() {
|
||||||
String prefix = "prefix ";
|
String prefix = "prefix ";
|
||||||
String uri = "http://google.com/";
|
String uri = "http://google.com/";
|
||||||
String text = prefix + uri;
|
String text = prefix + uri;
|
||||||
|
@ -143,6 +168,18 @@ public class HttpUriParserTest {
|
||||||
assertLinkOnly(uri, outputBuffer);
|
assertLinkOnly(uri, outputBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void uriInMiddleOfInput() {
|
||||||
|
String prefix = "prefix ";
|
||||||
|
String uri = "http://google.com/";
|
||||||
|
String postfix = " postfix";
|
||||||
|
String text = prefix + uri + postfix;
|
||||||
|
|
||||||
|
parser.linkifyUri(text, prefix.length(), outputBuffer);
|
||||||
|
|
||||||
|
assertLinkOnly(uri, outputBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int linkify(String uri) {
|
int linkify(String uri) {
|
||||||
return parser.linkifyUri(uri, 0, outputBuffer);
|
return parser.linkifyUri(uri, 0, outputBuffer);
|
||||||
|
@ -152,4 +189,11 @@ public class HttpUriParserTest {
|
||||||
linkify(uri);
|
linkify(uri);
|
||||||
assertLinkOnly(uri, outputBuffer);
|
assertLinkOnly(uri, outputBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void assertLinkIgnored(String uri) {
|
||||||
|
int endPos = linkify(uri);
|
||||||
|
|
||||||
|
assertEquals("", outputBuffer.toString());
|
||||||
|
assertEquals(0, endPos);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -117,11 +117,23 @@ public class UriLinkifierTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void schemaMatchWithInvalidUriInMiddleOfTextFollowedVyValidUri() throws Exception {
|
public void schemaMatchWithInvalidUriInMiddleOfTextFollowedByValidUri() {
|
||||||
String text = "prefix http:42 http://example.org";
|
String text = "prefix http:42 http://example.org";
|
||||||
|
|
||||||
UriLinkifier.linkifyText(text, outputBuffer);
|
UriLinkifier.linkifyText(text, outputBuffer);
|
||||||
|
|
||||||
assertEquals("prefix http:42 <a href=\"http://example.org\">http://example.org</a>", outputBuffer.toString());
|
assertEquals("prefix http:42 <a href=\"http://example.org\">http://example.org</a>", outputBuffer.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void multipleValidUrisInRow() {
|
||||||
|
String text = "prefix http://uri1.example.org some text http://uri2.example.org/path postfix";
|
||||||
|
|
||||||
|
UriLinkifier.linkifyText(text, outputBuffer);
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
"prefix <a href=\"http://uri1.example.org\">http://uri1.example.org</a> some text " +
|
||||||
|
"<a href=\"http://uri2.example.org/path\">http://uri2.example.org/path</a> postfix",
|
||||||
|
outputBuffer.toString());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue