Merge pull request #5195 from k9mail/message_id_parser

Add 'MessageIdParser'
This commit is contained in:
cketti 2021-03-05 22:44:13 +01:00 committed by GitHub
commit 7e1aacce98
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 398 additions and 0 deletions

View file

@ -0,0 +1,205 @@
package com.fsck.k9.mail.internet
/**
* Read Message identifier(s).
*
* Used in the `Message-ID`, `In-Reply-To`, and `References` header fields.
* This does not support the obsolete syntax.
*
* See RFC 5322
* ```
* msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS]
* id-left = dot-atom-text / obs-id-left
* id-right = dot-atom-text / no-fold-literal / obs-id-right
*
* dot-atom-text = 1*atext *("." 1*atext)
* no-fold-literal = "[" *dtext "]"
* CFWS = (1*([FWS] comment) [FWS]) / FWS
* FWS = ([*WSP CRLF] 1*WSP) / obs-FWS ; Folding white space
* comment = "(" *([FWS] ccontent) [FWS] ")"
* ccontent = ctext / quoted-pair / comment
* quoted-pair = ("\" (VCHAR / WSP)) / obs-qp
* ```
*/
class MessageIdParser private constructor(private val input: String) {
private val endIndex = input.length
private var currentIndex = 0
fun parse(): String {
val messageId = readMessageId()
if (!endReached()) {
throw MimeHeaderParserException("Expected end of input", currentIndex)
}
return messageId
}
fun parseList(): List<String> {
if (input.isEmpty()) {
throw MimeHeaderParserException("Expected message identifier", errorIndex = 0)
}
val messageIds = mutableListOf<String>()
while (!endReached()) {
messageIds.add(readMessageId())
}
return messageIds
}
private fun readMessageId(): String {
skipCfws()
expect('<')
val idLeft = readIdLeft()
expect('@')
val idRight = readIdRight()
expect('>')
skipCfws()
return "<$idLeft@$idRight>"
}
private fun readIdLeft(): String {
return readDotAtom()
}
private fun readIdRight(): String {
return if (peek() == '[') {
readDText()
} else {
readDotAtom()
}
}
private fun readDotAtom(): String {
val startIndex = currentIndex
do {
expect("atext") { it.isAText() }
if (peek() == '.') {
expect('.')
expect("atext") { it.isAText() }
}
} while (peek().isAText())
return input.substring(startIndex, currentIndex)
}
private fun readDText(): String {
val startIndex = currentIndex
expect('[')
while (peek().isDText()) {
skip()
}
expect(']')
return input.substring(startIndex, currentIndex)
}
private fun skipCfws() {
do {
val lastIndex = currentIndex
skipFws()
if (!endReached() && peek() == '(') {
expectComment()
}
} while (currentIndex != lastIndex && !endReached())
}
private fun skipFws() {
skipWsp()
if (!endReached() && peek() == CR) {
expectCr()
expectLf()
expectWsp()
skipWsp()
}
}
private fun expectComment() {
expect('(')
var level = 1
do {
skipFws()
val char = peek()
when {
char == '(' -> {
expect('(')
level++
}
char == '\\' -> {
expectQuotedPair()
}
char.isCText() -> {
skip()
}
else -> {
expect(')')
level--
}
}
} while (level > 0)
}
private fun expectQuotedPair() {
expect('\\')
expect("VCHAR or WSP") { it.isVChar() || it.isWsp() }
}
private fun expectCr() = expect("CR", CR)
private fun expectLf() = expect("LF", LF)
private fun expectWsp() = expect("WSP") { it.isWsp() }
private fun skipWsp() {
while (!endReached() && peek().isWsp()) {
skip()
}
}
private fun endReached() = currentIndex >= endIndex
private fun peek(): Char {
if (currentIndex >= input.length) {
throw MimeHeaderParserException("End of input reached unexpectedly", currentIndex)
}
return input[currentIndex]
}
private fun skip() {
currentIndex++
}
private fun expect(character: Char) {
expect("'$character'") { it == character }
}
private fun expect(displayInError: String, character: Char) {
expect(displayInError) { it == character }
}
private inline fun expect(displayInError: String, predicate: (Char) -> Boolean) {
if (!endReached() && predicate(peek())) {
skip()
} else {
throw MimeHeaderParserException("Expected $displayInError", currentIndex)
}
}
companion object {
fun parse(input: String): String = MessageIdParser(input).parse()
@JvmStatic
fun parseList(input: String): List<String> = MessageIdParser(input).parseList()
}
}

View file

@ -6,6 +6,10 @@ internal const val RECOMMENDED_MAX_LINE_LENGTH = 78
// RFC 2045: tspecials := "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" / "\" / <"> / "/" / "[" / "]" / "?" / "="
private val TSPECIALS = charArrayOf('(', ')', '<', '>', '@', ',', ';', ':', '\\', '"', '/', '[', ']', '?', '=')
private val ATEXT_SPECIAL = charArrayOf(
'!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~'
)
// RFC 5234: HTAB = %x09
internal const val HTAB = '\t'
@ -39,3 +43,20 @@ internal fun Char.isWspOrCrlf() = this == SPACE || this == HTAB || this == CR ||
// RFC 2231: attribute-char := <any (US-ASCII) CHAR except SPACE, CTLs, "*", "'", "%", or tspecials>
internal fun Char.isAttributeChar() = isVChar() && this != '*' && this != '\'' && this != '%' && !isTSpecial()
// RFC 5322: ctext = %d33-39 / %d42-91 / %d93-126
internal fun Char.isCText() = toInt().let { it in 33..39 || it in 42..91 || it in 93..126 }
// RFC 5234: DIGIT = %x30-39 ; 0-9
internal fun Char.isDIGIT() = this in '0'..'9'
// RFC 5234: ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
internal fun Char.isALPHA() = this in 'A'..'Z' || this in 'a'..'z'
// RFC 5322: atext = ALPHA / DIGIT / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "/" / "=" / "?" / "^" /
// "_" / "`" / "{" / "|" / "}" / "~"
internal fun Char.isAText() = isALPHA() || isDIGIT() || this in ATEXT_SPECIAL
// RFC 5322: Printable US-ASCII characters not including "[", "]", or "\"
// dtext = %d33-90 / %d94-126 / obs-dtext
internal fun Char.isDText() = toInt().let { it in 33..90 || it in 94..126 }

View file

@ -0,0 +1,172 @@
package com.fsck.k9.mail.internet
import com.google.common.truth.Truth.assertThat
import org.junit.Test
private const val MESSAGE_ID = "<left-side@domain.example>"
class MessageIdParserTest {
@Test
fun `typical message identifiers`() {
assertMessageIdValid("<left-side@right-side>")
assertMessageIdValid("<left-side@domain.example>")
}
@Test
fun `message identifier with domain literal`() {
assertMessageIdValid("<left-side@[dtext]>")
}
@Test
fun `message identifier with extra space`() {
assertMessageIdWithExtraValid(MESSAGE_ID, " ")
}
@Test
fun `message identifier with multiple extra spaces`() {
assertMessageIdWithExtraValid(MESSAGE_ID, " ")
}
@Test
fun `message identifier with extra tab`() {
assertMessageIdWithExtraValid(MESSAGE_ID, "\t")
}
@Test
fun `message identifier with extra comment`() {
assertMessageIdWithExtraValid(MESSAGE_ID, "(comment)")
}
@Test
fun `message identifier with extra nested comments`() {
assertMessageIdWithExtraValid(MESSAGE_ID, "(comment one (nested comment (nested nested comment)))")
}
@Test
fun `message identifier with extra comment and folding whitespace`() {
assertMessageIdWithExtraValid(MESSAGE_ID, " \r\n\t(comment \\(\r\n more comment)\r\n \t")
}
@Test
fun `message identifier with excessive extra comment nesting`() {
val extra = "(".repeat(10_000) + ")".repeat(10_000)
assertMessageIdWithExtraValid(MESSAGE_ID, extra)
}
@Test
fun `multiple message identifiers`() {
val messageId1 = "<left-side@right-side>"
val messageId2 = "<left-side@domain.example>"
assertMessageIdsValid("$messageId1 $messageId2", listOf(messageId1, messageId2))
}
@Test
fun `multiple message identifiers without separation`() {
val messageId1 = "<left-side@right-side>"
val messageId2 = "<left-side@domain.example>"
assertMessageIdsValid("$messageId1$messageId2", listOf(messageId1, messageId2))
}
@Test
fun `multiple message identifiers separated by tab`() {
val messageId1 = "<left-side@right-side>"
val messageId2 = "<left-side@domain.example>"
assertMessageIdsValid("$messageId1\t$messageId2", listOf(messageId1, messageId2))
}
@Test
fun `multiple message identifiers separated by line break`() {
val messageId1 = "<left-side@right-side>"
val messageId2 = "<left-side@domain.example>"
assertMessageIdsValid("$messageId1\r\n $messageId2", listOf(messageId1, messageId2))
}
@Test
fun `multiple message identifiers separated by comment`() {
val messageId1 = "<left-side@right-side>"
val messageId2 = "<left-side@domain.example>"
assertMessageIdsValid("$messageId1(comment <this.is@ignored>)$messageId2", listOf(messageId1, messageId2))
}
@Test(expected = MimeHeaderParserException::class)
fun `message identifier with additional data should throw`() {
MessageIdParser.parse("$MESSAGE_ID extra")
}
@Test(expected = MimeHeaderParserException::class)
fun `message identifiers with additional data should throw`() {
MessageIdParser.parseList("<one@domain.example> <two@domain.example> extra")
}
@Test(expected = MimeHeaderParserException::class)
fun `message identifier missing angle brackets`() {
MessageIdParser.parse("left-side@domain.example")
}
@Test(expected = MimeHeaderParserException::class)
fun `message identifier missing left side`() {
MessageIdParser.parse("<@domain.example>")
}
@Test(expected = MimeHeaderParserException::class)
fun `message identifier containing only left side`() {
MessageIdParser.parse("<left-side>")
}
@Test(expected = MimeHeaderParserException::class)
fun `message identifier missing right side`() {
MessageIdParser.parse("<left-side@>")
}
@Test(expected = MimeHeaderParserException::class)
fun `empty input`() {
MessageIdParser.parse("")
}
@Test(expected = MimeHeaderParserException::class)
fun `empty input for list`() {
MessageIdParser.parseList("")
}
private fun assertMessageIdValid(input: String, expected: String = input) {
showMimeHeaderParserError(input) {
assertThat(MessageIdParser.parse(input)).isEqualTo(expected)
}
assertMessageIdsValid(input, listOf(expected))
}
private fun assertMessageIdsValid(input: String, expected: List<String>) {
showMimeHeaderParserError(input) {
val messageIds = MessageIdParser.parseList(input)
assertThat(messageIds).isEqualTo(expected)
}
}
/**
* Test input with [extra] prepended, appended, and both at the same time.
*/
@Suppress("SameParameterValue")
private fun assertMessageIdWithExtraValid(messageId: String, extra: String) {
assertMessageIdValid("$extra$messageId", messageId)
assertMessageIdValid("$messageId$extra", messageId)
assertMessageIdValid("$extra$messageId$extra", messageId)
}
private fun showMimeHeaderParserError(input: String, block: () -> Unit) {
try {
block()
} catch (e: MimeHeaderParserException) {
// Replace tabs with spaces so the error indicator lines up
val tweakedInput = input.replace("\t", " ")
println("Input: $tweakedInput")
println("Error: " + "-".repeat(e.errorIndex) + "^")
throw e
}
}
}