Merge pull request #3132 from k9mail/email_section_extractor
TextToHtml: Extract sections from a plain text email
This commit is contained in:
commit
5931d46a42
5 changed files with 450 additions and 1 deletions
|
@ -25,6 +25,7 @@ dependencies {
|
|||
compile project(':k9mail-library')
|
||||
compile project(':plugins:HoloColorPicker')
|
||||
compile project(':plugins:openpgp-api-lib:openpgp-api')
|
||||
compile "org.jetbrains.kotlin:kotlin-stdlib-jre7:${kotlinVersion}"
|
||||
compile "com.squareup.okio:okio:${okioVersion}"
|
||||
compile 'commons-io:commons-io:2.4'
|
||||
compile "com.android.support:support-v4:${androidSupportLibraryVersion}"
|
||||
|
@ -40,7 +41,6 @@ dependencies {
|
|||
|
||||
androidTestCompile 'com.android.support.test.espresso:espresso-core:2.2.2'
|
||||
|
||||
testCompile "org.jetbrains.kotlin:kotlin-stdlib-jre7:${kotlinVersion}"
|
||||
testCompile "org.robolectric:robolectric:${robolectricVersion}"
|
||||
testCompile "junit:junit:${junitVersion}"
|
||||
testCompile "com.google.truth:truth:${truthVersion}"
|
||||
|
|
117
k9mail/src/main/java/com/fsck/k9/message/html/EmailSection.kt
Normal file
117
k9mail/src/main/java/com/fsck/k9/message/html/EmailSection.kt
Normal file
|
@ -0,0 +1,117 @@
|
|||
package com.fsck.k9.message.html
|
||||
|
||||
/**
|
||||
* Represents a section of an email's plain text body.
|
||||
*
|
||||
* See [EmailSectionExtractor].
|
||||
*/
|
||||
class EmailSection private constructor(builder: Builder) : CharSequence {
|
||||
val quoteDepth = builder.quoteDepth
|
||||
private val text = builder.text
|
||||
private val segments: List<Segment> = if (builder.indent == 0) {
|
||||
builder.segments.toList()
|
||||
} else {
|
||||
builder.segments.map { Segment(it.startIndex + builder.indent, it.endIndex) }
|
||||
}
|
||||
|
||||
override val length = segments.map { it.endIndex - it.startIndex }.sum()
|
||||
|
||||
override fun get(index: Int): Char {
|
||||
require(index in 0..(length - 1)) { "index: $index; length: $length" }
|
||||
|
||||
var offset = index
|
||||
for (i in 0..(segments.size - 1)) {
|
||||
val segment = segments[i]
|
||||
val segmentLength = segment.endIndex - segment.startIndex
|
||||
if (offset < segmentLength) {
|
||||
return text[segment.startIndex + offset]
|
||||
}
|
||||
offset -= segmentLength
|
||||
}
|
||||
|
||||
throw AssertionError()
|
||||
}
|
||||
|
||||
override fun subSequence(startIndex: Int, endIndex: Int): CharSequence {
|
||||
require(startIndex in 0..(length - 1)) { "startIndex: $startIndex; length: $length" }
|
||||
require(endIndex in 0..length) { "endIndex: $endIndex; length: $length" }
|
||||
require(startIndex <= endIndex) { "startIndex > endIndex" }
|
||||
|
||||
if (startIndex == endIndex) return ""
|
||||
if (startIndex == 0 && endIndex == length) return this
|
||||
|
||||
val builder = Builder(text, quoteDepth)
|
||||
|
||||
val (startSegmentIndex, startOffset) = findSegmentIndexAndOffset(startIndex)
|
||||
val (endSegmentIndex, endOffset) = findSegmentIndexAndOffset(endIndex, isEndIndex = true)
|
||||
val startSegment = segments[startSegmentIndex]
|
||||
|
||||
if (startSegmentIndex == endSegmentIndex) {
|
||||
builder.addSegment(0, startSegment.startIndex + startOffset, startSegment.startIndex + endOffset)
|
||||
return builder.build()
|
||||
}
|
||||
|
||||
if (startOffset == 0) {
|
||||
builder.addSegment(startSegment)
|
||||
} else {
|
||||
builder.addSegment(0, startSegment.startIndex + startOffset, startSegment.endIndex)
|
||||
}
|
||||
|
||||
for (segmentIndex in startSegmentIndex + 1 until endSegmentIndex) {
|
||||
builder.addSegment(segments[segmentIndex])
|
||||
}
|
||||
|
||||
val endSegment = segments[endSegmentIndex]
|
||||
if (endSegment.startIndex + endOffset == endSegment.endIndex) {
|
||||
builder.addSegment(endSegment)
|
||||
} else {
|
||||
builder.addSegment(0, endSegment.startIndex, endSegment.startIndex + endOffset)
|
||||
}
|
||||
|
||||
return builder.build()
|
||||
}
|
||||
|
||||
private fun findSegmentIndexAndOffset(index: Int, isEndIndex: Boolean = false): Pair<Int, Int> {
|
||||
var offset = index
|
||||
segments.forEachIndexed { segmentIndex, segment ->
|
||||
val segmentLength = segment.endIndex - segment.startIndex
|
||||
if (offset < segmentLength || (isEndIndex && offset == segmentLength)) {
|
||||
return Pair(segmentIndex, offset)
|
||||
}
|
||||
offset -= segmentLength
|
||||
}
|
||||
|
||||
throw AssertionError()
|
||||
}
|
||||
|
||||
override fun toString() = StringBuilder().apply {
|
||||
segments.forEach {
|
||||
append(text, it.startIndex, it.endIndex)
|
||||
}
|
||||
}.toString()
|
||||
|
||||
|
||||
internal data class Segment(val startIndex: Int, val endIndex: Int)
|
||||
|
||||
class Builder(val text: String, val quoteDepth: Int) {
|
||||
internal val segments: MutableList<Segment> = mutableListOf()
|
||||
internal var indent = Int.MAX_VALUE
|
||||
|
||||
val hasSegments
|
||||
get() = !segments.isEmpty()
|
||||
|
||||
fun addSegment(leadingSpaces: Int, startIndex: Int, endIndex: Int): Builder {
|
||||
indent = minOf(indent, leadingSpaces)
|
||||
segments.add(Segment(startIndex, endIndex))
|
||||
return this
|
||||
}
|
||||
|
||||
internal fun addSegment(segment: Segment) {
|
||||
indent = 0
|
||||
segments.add(segment)
|
||||
}
|
||||
|
||||
fun build() = EmailSection(this)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
package com.fsck.k9.message.html
|
||||
|
||||
/**
|
||||
* Extract sections from a plain text email.
|
||||
*
|
||||
* A section consists of all consecutive lines of the same quote depth. Quote characters and spaces at the beginning of
|
||||
* a line are stripped and not part of the section's content.
|
||||
*
|
||||
* ### Example:
|
||||
*
|
||||
* ```
|
||||
* On 2018-01-25 Alice <alice@example.com> wrote:
|
||||
* > Hi Bob
|
||||
*
|
||||
* Hi Alice
|
||||
* ```
|
||||
*
|
||||
* This message consists of three sections with the following contents:
|
||||
* * `On 2018-01-25 Alice <alice@example.com> wrote:`
|
||||
* * `Hi Bob`
|
||||
* * `Hi Alice`
|
||||
*/
|
||||
class EmailSectionExtractor private constructor(val text: String) {
|
||||
private val sections = mutableListOf<EmailSection>()
|
||||
private var sectionBuilder = EmailSection.Builder(text, 0)
|
||||
private var sectionStartIndex = 0
|
||||
private var newlineIndex = -1
|
||||
private var startOfContentIndex = 0
|
||||
private var isStartOfLine = true
|
||||
private var spaces = 0
|
||||
private var quoteDepth = 0
|
||||
private var currentQuoteDepth = 0
|
||||
|
||||
fun extract(): List<EmailSection> {
|
||||
text.forEachIndexed { index, character ->
|
||||
if (isStartOfLine) {
|
||||
detectQuoteCharacters(index, character)
|
||||
} else if (character == '\n') {
|
||||
addQuotedLineToSection(endIndex = index + 1)
|
||||
}
|
||||
|
||||
if (character == '\n') {
|
||||
newlineIndex = index
|
||||
resetForStartOfLine()
|
||||
}
|
||||
}
|
||||
|
||||
completeLastSection()
|
||||
|
||||
return sections
|
||||
}
|
||||
|
||||
private fun detectQuoteCharacters(index: Int, character: Char) {
|
||||
when (character) {
|
||||
' ' -> spaces++
|
||||
'>' -> {
|
||||
currentQuoteDepth++
|
||||
spaces = 0
|
||||
}
|
||||
'\n' -> {
|
||||
if (quoteDepth == currentQuoteDepth) {
|
||||
addQuotedLineToSection(startIndex = index - spaces, endIndex = index + 1)
|
||||
} else {
|
||||
finishSection(index + 1)
|
||||
sectionStartIndex = index - spaces
|
||||
}
|
||||
}
|
||||
else -> {
|
||||
isStartOfLine = false
|
||||
startOfContentIndex = index - spaces
|
||||
if (quoteDepth != currentQuoteDepth) {
|
||||
finishSection(newlineIndex + 1)
|
||||
sectionStartIndex = startOfContentIndex
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun addUnquotedLineToSection(endIndex: Int) {
|
||||
if (quoteDepth == 0 && sectionStartIndex != endIndex) {
|
||||
sectionBuilder.addSegment(0, sectionStartIndex, endIndex)
|
||||
}
|
||||
}
|
||||
|
||||
private fun addQuotedLineToSection(startIndex: Int = startOfContentIndex, endIndex: Int) {
|
||||
if (currentQuoteDepth > 0) {
|
||||
sectionBuilder.addSegment(spaces, startIndex, endIndex)
|
||||
}
|
||||
}
|
||||
|
||||
private fun finishSection(endIndex: Int) {
|
||||
addUnquotedLineToSection(endIndex)
|
||||
appendSection()
|
||||
sectionBuilder = EmailSection.Builder(text, currentQuoteDepth)
|
||||
quoteDepth = currentQuoteDepth
|
||||
}
|
||||
|
||||
private fun completeLastSection() {
|
||||
if (!isStartOfLine) {
|
||||
if (quoteDepth == 0) {
|
||||
sectionBuilder.addSegment(0, sectionStartIndex, text.length)
|
||||
} else {
|
||||
sectionBuilder.addSegment(spaces, startOfContentIndex, text.length)
|
||||
}
|
||||
}
|
||||
|
||||
appendSection()
|
||||
}
|
||||
|
||||
private fun appendSection() {
|
||||
if (sectionBuilder.hasSegments) {
|
||||
sections.add(sectionBuilder.build())
|
||||
}
|
||||
}
|
||||
|
||||
private fun resetForStartOfLine() {
|
||||
isStartOfLine = true
|
||||
currentQuoteDepth = 0
|
||||
spaces = 0
|
||||
}
|
||||
|
||||
companion object {
|
||||
fun extract(text: String) = EmailSectionExtractor(text).extract()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,113 @@
|
|||
package com.fsck.k9.message.html
|
||||
|
||||
|
||||
import com.google.common.truth.Truth.assertThat
|
||||
import org.junit.Test
|
||||
|
||||
|
||||
class EmailSectionExtractorTest {
|
||||
@Test
|
||||
fun simpleMessageWithoutQuotes() {
|
||||
val message = """
|
||||
Hi Alice,
|
||||
|
||||
are we still on for new Thursday?
|
||||
|
||||
Best
|
||||
Bob
|
||||
""".trimIndent()
|
||||
|
||||
val sections = EmailSectionExtractor.extract(message)
|
||||
|
||||
assertThat(sections.size).isEqualTo(1)
|
||||
with(sections[0]) {
|
||||
assertThat(quoteDepth).isEqualTo(0)
|
||||
assertThat(toString()).isEqualTo(message)
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun quoteFollowedByReply() {
|
||||
val message = """
|
||||
Alice <alice@example.org> wrote:
|
||||
> Hi there
|
||||
|
||||
Hi, what's up?
|
||||
""".trimIndent()
|
||||
|
||||
val sections = EmailSectionExtractor.extract(message)
|
||||
|
||||
assertThat(sections.size).isEqualTo(3)
|
||||
with(sections[0]) {
|
||||
assertThat(quoteDepth).isEqualTo(0)
|
||||
assertThat(toString()).isEqualTo("Alice <alice@example.org> wrote:\n")
|
||||
}
|
||||
with(sections[1]) {
|
||||
assertThat(quoteDepth).isEqualTo(1)
|
||||
assertThat(toString()).isEqualTo("Hi there\n")
|
||||
}
|
||||
with(sections[2]) {
|
||||
assertThat(quoteDepth).isEqualTo(0)
|
||||
assertThat(toString()).isEqualTo("\nHi, what's up?")
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun replyFollowedByTwoQuoteLevels() {
|
||||
val message = """
|
||||
Three
|
||||
|
||||
Bob <bob@example.org> wrote:
|
||||
> Two
|
||||
>${" "}
|
||||
> Alice <alice@example.org> wrote:
|
||||
>> One
|
||||
""".trimIndent()
|
||||
|
||||
val sections = EmailSectionExtractor.extract(message)
|
||||
|
||||
assertThat(sections.size).isEqualTo(3)
|
||||
with(sections[0]) {
|
||||
assertThat(quoteDepth).isEqualTo(0)
|
||||
assertThat(toString()).isEqualTo("Three\n\nBob <bob@example.org> wrote:\n")
|
||||
}
|
||||
with(sections[1]) {
|
||||
assertThat(quoteDepth).isEqualTo(1)
|
||||
assertThat(toString()).isEqualTo("Two\n\nAlice <alice@example.org> wrote:\n")
|
||||
}
|
||||
with(sections[2]) {
|
||||
assertThat(quoteDepth).isEqualTo(2)
|
||||
assertThat(toString()).isEqualTo("One")
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun chaosQuoting() {
|
||||
val message = """
|
||||
>>> One
|
||||
> Three
|
||||
Four
|
||||
>> Two${"\n"}
|
||||
""".trimIndent()
|
||||
|
||||
val sections = EmailSectionExtractor.extract(message)
|
||||
|
||||
assertThat(sections.size).isEqualTo(4)
|
||||
with(sections[0]) {
|
||||
assertThat(quoteDepth).isEqualTo(3)
|
||||
assertThat(toString()).isEqualTo("One\n")
|
||||
}
|
||||
with(sections[1]) {
|
||||
assertThat(quoteDepth).isEqualTo(1)
|
||||
assertThat(toString()).isEqualTo("Three\n")
|
||||
}
|
||||
with(sections[2]) {
|
||||
assertThat(quoteDepth).isEqualTo(0)
|
||||
assertThat(toString()).isEqualTo("Four\n")
|
||||
}
|
||||
with(sections[3]) {
|
||||
assertThat(quoteDepth).isEqualTo(2)
|
||||
assertThat(toString()).isEqualTo("Two\n")
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
package com.fsck.k9.message.html
|
||||
|
||||
|
||||
import com.google.common.truth.Truth.assertThat
|
||||
import org.junit.Test
|
||||
|
||||
|
||||
class EmailSectionTest {
|
||||
@Test
|
||||
fun charAt() {
|
||||
assertThat("[a]".asEmailSection()[0]).isEqualTo('a')
|
||||
assertThat(".[a]".asEmailSection()[0]).isEqualTo('a')
|
||||
assertThat("[a].".asEmailSection()[0]).isEqualTo('a')
|
||||
assertThat("[ a]".asEmailSection()[0]).isEqualTo('a')
|
||||
assertThat("[abc]".asEmailSection()[0]).isEqualTo('a')
|
||||
|
||||
assertThat("[a][b]".asEmailSection()[1]).isEqualTo('b')
|
||||
assertThat("[a][bc]".asEmailSection()[1]).isEqualTo('b')
|
||||
assertThat("[ab]".asEmailSection()[1]).isEqualTo('b')
|
||||
assertThat("[ab][c]".asEmailSection()[1]).isEqualTo('b')
|
||||
assertThat("[a][b][c]".asEmailSection()[1]).isEqualTo('b')
|
||||
assertThat(".[a][b][c]".asEmailSection()[1]).isEqualTo('b')
|
||||
assertThat(".[a].[b][c]".asEmailSection()[1]).isEqualTo('b')
|
||||
assertThat(".[a].[b].[c]".asEmailSection()[1]).isEqualTo('b')
|
||||
assertThat("[ a][ b][ c]".asEmailSection()[1]).isEqualTo('b')
|
||||
assertThat("[a]..[bc]".asEmailSection()[1]).isEqualTo('b')
|
||||
|
||||
assertThat("[abc]".asEmailSection()[2]).isEqualTo('c')
|
||||
assertThat("[ab][c]".asEmailSection()[2]).isEqualTo('c')
|
||||
assertThat("[a][bc]".asEmailSection()[2]).isEqualTo('c')
|
||||
assertThat("[a][b][c]".asEmailSection()[2]).isEqualTo('c')
|
||||
assertThat(".[a].[b].[c].".asEmailSection()[2]).isEqualTo('c')
|
||||
assertThat("[ a][ b][ c]".asEmailSection()[2]).isEqualTo('c')
|
||||
}
|
||||
|
||||
@Test
|
||||
fun length() {
|
||||
assertThat("[]".asEmailSection().length).isEqualTo(0)
|
||||
assertThat("...[]...".asEmailSection().length).isEqualTo(0)
|
||||
assertThat("[ ]".asEmailSection().length).isEqualTo(0)
|
||||
assertThat("[ ][ ]".asEmailSection().length).isEqualTo(1)
|
||||
assertThat("[One]".asEmailSection().length).isEqualTo(3)
|
||||
assertThat("[One][Two]".asEmailSection().length).isEqualTo(6)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun subSequence() {
|
||||
val section = "[ One][ Two][ Three]".asEmailSection()
|
||||
|
||||
assertThat(section.subSequence(0, 11)).isSameAs(section)
|
||||
assertThat(section.subSequence(0, 3).asString()).isEqualTo("One")
|
||||
assertThat(section.subSequence(0, 2).asString()).isEqualTo("On")
|
||||
assertThat(section.subSequence(1, 3).asString()).isEqualTo("ne")
|
||||
assertThat(section.subSequence(1, 2).asString()).isEqualTo("n")
|
||||
assertThat(section.subSequence(0, 4).asString()).isEqualTo("OneT")
|
||||
assertThat(section.subSequence(1, 4).asString()).isEqualTo("neT")
|
||||
assertThat(section.subSequence(1, 6).asString()).isEqualTo("neTwo")
|
||||
assertThat(section.subSequence(1, 7).asString()).isEqualTo("neTwoT")
|
||||
assertThat(section.subSequence(1, 11).asString()).isEqualTo("neTwoThree")
|
||||
assertThat(section.subSequence(3, 11).asString()).isEqualTo("TwoThree")
|
||||
assertThat(section.subSequence(4, 11).asString()).isEqualTo("woThree")
|
||||
assertThat(section.subSequence(4, 9).asString()).isEqualTo("woThr")
|
||||
assertThat(section.subSequence(6, 9).asString()).isEqualTo("Thr")
|
||||
assertThat(section.subSequence(7, 10).asString()).isEqualTo("hre")
|
||||
assertThat(section.subSequence(6, 11).asString()).isEqualTo("Three")
|
||||
}
|
||||
|
||||
|
||||
private fun CharSequence.asString() = StringBuilder(length).apply {
|
||||
this@asString.forEach { append(it) }
|
||||
}.toString()
|
||||
|
||||
private fun String.asEmailSection(): EmailSection {
|
||||
val builder = EmailSection.Builder(this, 0)
|
||||
|
||||
var startIndex = -1
|
||||
var isStartOfLine = true
|
||||
var spaces = 0
|
||||
this.forEachIndexed { index, c ->
|
||||
when (c) {
|
||||
'[' -> {
|
||||
startIndex = index + 1
|
||||
isStartOfLine = true
|
||||
spaces = 0
|
||||
}
|
||||
' ' -> if (isStartOfLine) spaces++
|
||||
']' -> builder.addSegment(spaces, startIndex, index)
|
||||
else -> isStartOfLine = false
|
||||
}
|
||||
}
|
||||
|
||||
return builder.build()
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue