Merge pull request #5195 from k9mail/message_id_parser
Add 'MessageIdParser'
This commit is contained in:
commit
7e1aacce98
3 changed files with 398 additions and 0 deletions
|
@ -0,0 +1,205 @@
|
|||
package com.fsck.k9.mail.internet
|
||||
|
||||
/**
|
||||
* Read Message identifier(s).
|
||||
*
|
||||
* Used in the `Message-ID`, `In-Reply-To`, and `References` header fields.
|
||||
* This does not support the obsolete syntax.
|
||||
*
|
||||
* See RFC 5322
|
||||
* ```
|
||||
* msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS]
|
||||
* id-left = dot-atom-text / obs-id-left
|
||||
* id-right = dot-atom-text / no-fold-literal / obs-id-right
|
||||
*
|
||||
* dot-atom-text = 1*atext *("." 1*atext)
|
||||
* no-fold-literal = "[" *dtext "]"
|
||||
* CFWS = (1*([FWS] comment) [FWS]) / FWS
|
||||
* FWS = ([*WSP CRLF] 1*WSP) / obs-FWS ; Folding white space
|
||||
* comment = "(" *([FWS] ccontent) [FWS] ")"
|
||||
* ccontent = ctext / quoted-pair / comment
|
||||
* quoted-pair = ("\" (VCHAR / WSP)) / obs-qp
|
||||
* ```
|
||||
*/
|
||||
class MessageIdParser private constructor(private val input: String) {
|
||||
private val endIndex = input.length
|
||||
private var currentIndex = 0
|
||||
|
||||
fun parse(): String {
|
||||
val messageId = readMessageId()
|
||||
|
||||
if (!endReached()) {
|
||||
throw MimeHeaderParserException("Expected end of input", currentIndex)
|
||||
}
|
||||
|
||||
return messageId
|
||||
}
|
||||
|
||||
fun parseList(): List<String> {
|
||||
if (input.isEmpty()) {
|
||||
throw MimeHeaderParserException("Expected message identifier", errorIndex = 0)
|
||||
}
|
||||
|
||||
val messageIds = mutableListOf<String>()
|
||||
while (!endReached()) {
|
||||
messageIds.add(readMessageId())
|
||||
}
|
||||
|
||||
return messageIds
|
||||
}
|
||||
|
||||
private fun readMessageId(): String {
|
||||
skipCfws()
|
||||
expect('<')
|
||||
val idLeft = readIdLeft()
|
||||
expect('@')
|
||||
val idRight = readIdRight()
|
||||
expect('>')
|
||||
skipCfws()
|
||||
|
||||
return "<$idLeft@$idRight>"
|
||||
}
|
||||
|
||||
private fun readIdLeft(): String {
|
||||
return readDotAtom()
|
||||
}
|
||||
|
||||
private fun readIdRight(): String {
|
||||
return if (peek() == '[') {
|
||||
readDText()
|
||||
} else {
|
||||
readDotAtom()
|
||||
}
|
||||
}
|
||||
|
||||
private fun readDotAtom(): String {
|
||||
val startIndex = currentIndex
|
||||
|
||||
do {
|
||||
expect("atext") { it.isAText() }
|
||||
if (peek() == '.') {
|
||||
expect('.')
|
||||
expect("atext") { it.isAText() }
|
||||
}
|
||||
} while (peek().isAText())
|
||||
|
||||
return input.substring(startIndex, currentIndex)
|
||||
}
|
||||
|
||||
private fun readDText(): String {
|
||||
val startIndex = currentIndex
|
||||
|
||||
expect('[')
|
||||
|
||||
while (peek().isDText()) {
|
||||
skip()
|
||||
}
|
||||
|
||||
expect(']')
|
||||
|
||||
return input.substring(startIndex, currentIndex)
|
||||
}
|
||||
|
||||
private fun skipCfws() {
|
||||
do {
|
||||
val lastIndex = currentIndex
|
||||
|
||||
skipFws()
|
||||
|
||||
if (!endReached() && peek() == '(') {
|
||||
expectComment()
|
||||
}
|
||||
} while (currentIndex != lastIndex && !endReached())
|
||||
}
|
||||
|
||||
private fun skipFws() {
|
||||
skipWsp()
|
||||
if (!endReached() && peek() == CR) {
|
||||
expectCr()
|
||||
expectLf()
|
||||
expectWsp()
|
||||
skipWsp()
|
||||
}
|
||||
}
|
||||
|
||||
private fun expectComment() {
|
||||
expect('(')
|
||||
var level = 1
|
||||
|
||||
do {
|
||||
skipFws()
|
||||
|
||||
val char = peek()
|
||||
when {
|
||||
char == '(' -> {
|
||||
expect('(')
|
||||
level++
|
||||
}
|
||||
char == '\\' -> {
|
||||
expectQuotedPair()
|
||||
}
|
||||
char.isCText() -> {
|
||||
skip()
|
||||
}
|
||||
else -> {
|
||||
expect(')')
|
||||
level--
|
||||
}
|
||||
}
|
||||
} while (level > 0)
|
||||
}
|
||||
|
||||
private fun expectQuotedPair() {
|
||||
expect('\\')
|
||||
expect("VCHAR or WSP") { it.isVChar() || it.isWsp() }
|
||||
}
|
||||
|
||||
private fun expectCr() = expect("CR", CR)
|
||||
|
||||
private fun expectLf() = expect("LF", LF)
|
||||
|
||||
private fun expectWsp() = expect("WSP") { it.isWsp() }
|
||||
|
||||
private fun skipWsp() {
|
||||
while (!endReached() && peek().isWsp()) {
|
||||
skip()
|
||||
}
|
||||
}
|
||||
|
||||
private fun endReached() = currentIndex >= endIndex
|
||||
|
||||
private fun peek(): Char {
|
||||
if (currentIndex >= input.length) {
|
||||
throw MimeHeaderParserException("End of input reached unexpectedly", currentIndex)
|
||||
}
|
||||
|
||||
return input[currentIndex]
|
||||
}
|
||||
|
||||
private fun skip() {
|
||||
currentIndex++
|
||||
}
|
||||
|
||||
private fun expect(character: Char) {
|
||||
expect("'$character'") { it == character }
|
||||
}
|
||||
|
||||
private fun expect(displayInError: String, character: Char) {
|
||||
expect(displayInError) { it == character }
|
||||
}
|
||||
|
||||
private inline fun expect(displayInError: String, predicate: (Char) -> Boolean) {
|
||||
if (!endReached() && predicate(peek())) {
|
||||
skip()
|
||||
} else {
|
||||
throw MimeHeaderParserException("Expected $displayInError", currentIndex)
|
||||
}
|
||||
}
|
||||
|
||||
companion object {
|
||||
fun parse(input: String): String = MessageIdParser(input).parse()
|
||||
|
||||
@JvmStatic
|
||||
fun parseList(input: String): List<String> = MessageIdParser(input).parseList()
|
||||
}
|
||||
}
|
|
@ -6,6 +6,10 @@ internal const val RECOMMENDED_MAX_LINE_LENGTH = 78
|
|||
// RFC 2045: tspecials := "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" / "\" / <"> / "/" / "[" / "]" / "?" / "="
|
||||
private val TSPECIALS = charArrayOf('(', ')', '<', '>', '@', ',', ';', ':', '\\', '"', '/', '[', ']', '?', '=')
|
||||
|
||||
private val ATEXT_SPECIAL = charArrayOf(
|
||||
'!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~'
|
||||
)
|
||||
|
||||
// RFC 5234: HTAB = %x09
|
||||
internal const val HTAB = '\t'
|
||||
|
||||
|
@ -39,3 +43,20 @@ internal fun Char.isWspOrCrlf() = this == SPACE || this == HTAB || this == CR ||
|
|||
|
||||
// RFC 2231: attribute-char := <any (US-ASCII) CHAR except SPACE, CTLs, "*", "'", "%", or tspecials>
|
||||
internal fun Char.isAttributeChar() = isVChar() && this != '*' && this != '\'' && this != '%' && !isTSpecial()
|
||||
|
||||
// RFC 5322: ctext = %d33-39 / %d42-91 / %d93-126
|
||||
internal fun Char.isCText() = toInt().let { it in 33..39 || it in 42..91 || it in 93..126 }
|
||||
|
||||
// RFC 5234: DIGIT = %x30-39 ; 0-9
|
||||
internal fun Char.isDIGIT() = this in '0'..'9'
|
||||
|
||||
// RFC 5234: ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
|
||||
internal fun Char.isALPHA() = this in 'A'..'Z' || this in 'a'..'z'
|
||||
|
||||
// RFC 5322: atext = ALPHA / DIGIT / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "/" / "=" / "?" / "^" /
|
||||
// "_" / "`" / "{" / "|" / "}" / "~"
|
||||
internal fun Char.isAText() = isALPHA() || isDIGIT() || this in ATEXT_SPECIAL
|
||||
|
||||
// RFC 5322: Printable US-ASCII characters not including "[", "]", or "\"
|
||||
// dtext = %d33-90 / %d94-126 / obs-dtext
|
||||
internal fun Char.isDText() = toInt().let { it in 33..90 || it in 94..126 }
|
||||
|
|
|
@ -0,0 +1,172 @@
|
|||
package com.fsck.k9.mail.internet
|
||||
|
||||
import com.google.common.truth.Truth.assertThat
|
||||
import org.junit.Test
|
||||
|
||||
private const val MESSAGE_ID = "<left-side@domain.example>"
|
||||
|
||||
class MessageIdParserTest {
|
||||
@Test
|
||||
fun `typical message identifiers`() {
|
||||
assertMessageIdValid("<left-side@right-side>")
|
||||
assertMessageIdValid("<left-side@domain.example>")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `message identifier with domain literal`() {
|
||||
assertMessageIdValid("<left-side@[dtext]>")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `message identifier with extra space`() {
|
||||
assertMessageIdWithExtraValid(MESSAGE_ID, " ")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `message identifier with multiple extra spaces`() {
|
||||
assertMessageIdWithExtraValid(MESSAGE_ID, " ")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `message identifier with extra tab`() {
|
||||
assertMessageIdWithExtraValid(MESSAGE_ID, "\t")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `message identifier with extra comment`() {
|
||||
assertMessageIdWithExtraValid(MESSAGE_ID, "(comment)")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `message identifier with extra nested comments`() {
|
||||
assertMessageIdWithExtraValid(MESSAGE_ID, "(comment one (nested comment (nested nested comment)))")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `message identifier with extra comment and folding whitespace`() {
|
||||
assertMessageIdWithExtraValid(MESSAGE_ID, " \r\n\t(comment \\(\r\n more comment)\r\n \t")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `message identifier with excessive extra comment nesting`() {
|
||||
val extra = "(".repeat(10_000) + ")".repeat(10_000)
|
||||
assertMessageIdWithExtraValid(MESSAGE_ID, extra)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `multiple message identifiers`() {
|
||||
val messageId1 = "<left-side@right-side>"
|
||||
val messageId2 = "<left-side@domain.example>"
|
||||
|
||||
assertMessageIdsValid("$messageId1 $messageId2", listOf(messageId1, messageId2))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `multiple message identifiers without separation`() {
|
||||
val messageId1 = "<left-side@right-side>"
|
||||
val messageId2 = "<left-side@domain.example>"
|
||||
|
||||
assertMessageIdsValid("$messageId1$messageId2", listOf(messageId1, messageId2))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `multiple message identifiers separated by tab`() {
|
||||
val messageId1 = "<left-side@right-side>"
|
||||
val messageId2 = "<left-side@domain.example>"
|
||||
|
||||
assertMessageIdsValid("$messageId1\t$messageId2", listOf(messageId1, messageId2))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `multiple message identifiers separated by line break`() {
|
||||
val messageId1 = "<left-side@right-side>"
|
||||
val messageId2 = "<left-side@domain.example>"
|
||||
|
||||
assertMessageIdsValid("$messageId1\r\n $messageId2", listOf(messageId1, messageId2))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `multiple message identifiers separated by comment`() {
|
||||
val messageId1 = "<left-side@right-side>"
|
||||
val messageId2 = "<left-side@domain.example>"
|
||||
|
||||
assertMessageIdsValid("$messageId1(comment <this.is@ignored>)$messageId2", listOf(messageId1, messageId2))
|
||||
}
|
||||
|
||||
@Test(expected = MimeHeaderParserException::class)
|
||||
fun `message identifier with additional data should throw`() {
|
||||
MessageIdParser.parse("$MESSAGE_ID extra")
|
||||
}
|
||||
|
||||
@Test(expected = MimeHeaderParserException::class)
|
||||
fun `message identifiers with additional data should throw`() {
|
||||
MessageIdParser.parseList("<one@domain.example> <two@domain.example> extra")
|
||||
}
|
||||
|
||||
@Test(expected = MimeHeaderParserException::class)
|
||||
fun `message identifier missing angle brackets`() {
|
||||
MessageIdParser.parse("left-side@domain.example")
|
||||
}
|
||||
|
||||
@Test(expected = MimeHeaderParserException::class)
|
||||
fun `message identifier missing left side`() {
|
||||
MessageIdParser.parse("<@domain.example>")
|
||||
}
|
||||
|
||||
@Test(expected = MimeHeaderParserException::class)
|
||||
fun `message identifier containing only left side`() {
|
||||
MessageIdParser.parse("<left-side>")
|
||||
}
|
||||
|
||||
@Test(expected = MimeHeaderParserException::class)
|
||||
fun `message identifier missing right side`() {
|
||||
MessageIdParser.parse("<left-side@>")
|
||||
}
|
||||
|
||||
@Test(expected = MimeHeaderParserException::class)
|
||||
fun `empty input`() {
|
||||
MessageIdParser.parse("")
|
||||
}
|
||||
|
||||
@Test(expected = MimeHeaderParserException::class)
|
||||
fun `empty input for list`() {
|
||||
MessageIdParser.parseList("")
|
||||
}
|
||||
|
||||
private fun assertMessageIdValid(input: String, expected: String = input) {
|
||||
showMimeHeaderParserError(input) {
|
||||
assertThat(MessageIdParser.parse(input)).isEqualTo(expected)
|
||||
}
|
||||
|
||||
assertMessageIdsValid(input, listOf(expected))
|
||||
}
|
||||
|
||||
private fun assertMessageIdsValid(input: String, expected: List<String>) {
|
||||
showMimeHeaderParserError(input) {
|
||||
val messageIds = MessageIdParser.parseList(input)
|
||||
assertThat(messageIds).isEqualTo(expected)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test input with [extra] prepended, appended, and both at the same time.
|
||||
*/
|
||||
@Suppress("SameParameterValue")
|
||||
private fun assertMessageIdWithExtraValid(messageId: String, extra: String) {
|
||||
assertMessageIdValid("$extra$messageId", messageId)
|
||||
assertMessageIdValid("$messageId$extra", messageId)
|
||||
assertMessageIdValid("$extra$messageId$extra", messageId)
|
||||
}
|
||||
|
||||
private fun showMimeHeaderParserError(input: String, block: () -> Unit) {
|
||||
try {
|
||||
block()
|
||||
} catch (e: MimeHeaderParserException) {
|
||||
// Replace tabs with spaces so the error indicator lines up
|
||||
val tweakedInput = input.replace("\t", " ")
|
||||
println("Input: $tweakedInput")
|
||||
println("Error: " + "-".repeat(e.errorIndex) + "^")
|
||||
throw e
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue