diff --git a/app/core/src/main/java/com/fsck/k9/message/extractors/PreviewTextExtractor.java b/app/core/src/main/java/com/fsck/k9/message/extractors/PreviewTextExtractor.java deleted file mode 100644 index db96ca8e0..000000000 --- a/app/core/src/main/java/com/fsck/k9/message/extractors/PreviewTextExtractor.java +++ /dev/null @@ -1,69 +0,0 @@ -package com.fsck.k9.message.extractors; - - -import androidx.annotation.NonNull; - -import com.fsck.k9.message.html.HtmlConverter; -import com.fsck.k9.mail.Part; -import com.fsck.k9.mail.internet.MessageExtractor; - -import static com.fsck.k9.mail.internet.MimeUtility.isSameMimeType; - - -class PreviewTextExtractor { - private static final int MAX_PREVIEW_LENGTH = 512; - private static final int MAX_CHARACTERS_CHECKED_FOR_PREVIEW = 8192; - - - @NonNull - public String extractPreview(@NonNull Part textPart) throws PreviewExtractionException { - String text = MessageExtractor.getTextFromPart(textPart, MAX_CHARACTERS_CHECKED_FOR_PREVIEW); - if (text == null) { - throw new PreviewExtractionException("Couldn't get text from part"); - } - - String plainText = convertFromHtmlIfNecessary(textPart, text); - - return stripTextForPreview(plainText); - } - - private String convertFromHtmlIfNecessary(Part textPart, String text) { - String mimeType = textPart.getMimeType(); - if (!isSameMimeType(mimeType, "text/html")) { - return text; - } - - return HtmlConverter.htmlToText(text); - } - - private String stripTextForPreview(String text) { - if (text == null) { - return ""; - } - - // Remove (correctly delimited by '-- \n') signatures - text = text.replaceAll("(?ms)^-- [\\r\\n]+.*", ""); - // try to remove lines of dashes in the preview - text = text.replaceAll("(?m)^----.*?$", ""); - // remove quoted text from the preview - text = text.replaceAll("(?m)^[#>].*$", ""); - // Remove a common quote header from the preview - text = text.replaceAll("(?m)^On .*wrote.?$", ""); - // Remove a more generic quote header from the preview - text = text.replaceAll("(?m)^.*\\w+:$", ""); - // Remove horizontal rules. - text = text.replaceAll("\\s*([-=_]{30,}+)\\s*", " "); - - // URLs in the preview should just be shown as "..." - They're not - // clickable and they usually overwhelm the preview - text = text.replaceAll("https?://\\S+", "..."); - // Don't show newlines in the preview - text = text.replaceAll("(\\r|\\n)+", " "); - // Collapse whitespace in the preview - text = text.replaceAll("\\s+", " "); - // Remove any whitespace at the beginning and end of the string. - text = text.trim(); - - return (text.length() > MAX_PREVIEW_LENGTH) ? text.substring(0, MAX_PREVIEW_LENGTH - 1) + "…" : text; - } -} diff --git a/app/core/src/main/java/com/fsck/k9/message/extractors/PreviewTextExtractor.kt b/app/core/src/main/java/com/fsck/k9/message/extractors/PreviewTextExtractor.kt new file mode 100644 index 000000000..19e7d71f3 --- /dev/null +++ b/app/core/src/main/java/com/fsck/k9/message/extractors/PreviewTextExtractor.kt @@ -0,0 +1,126 @@ +package com.fsck.k9.message.extractors + +import com.fsck.k9.mail.Part +import com.fsck.k9.mail.internet.MessageExtractor +import com.fsck.k9.mail.internet.MimeUtility.isSameMimeType +import com.fsck.k9.message.html.EmailSection +import com.fsck.k9.message.html.EmailSectionExtractor +import com.fsck.k9.message.html.HtmlConverter + +internal class PreviewTextExtractor { + @Throws(PreviewExtractionException::class) + fun extractPreview(textPart: Part): String { + val text = MessageExtractor.getTextFromPart(textPart, MAX_CHARACTERS_CHECKED_FOR_PREVIEW) + ?: throw PreviewExtractionException("Couldn't get text from part") + + val plainText = convertFromHtmlIfNecessary(textPart, text) + return stripTextForPreview(plainText) + } + + private fun convertFromHtmlIfNecessary(textPart: Part, text: String): String { + return if (isSameMimeType(textPart.mimeType, "text/html")) { + HtmlConverter.htmlToText(text) + } else { + text + } + } + + private fun stripTextForPreview(text: String): String { + var intermediateText = text + + intermediateText = normalizeLineBreaks(intermediateText) + intermediateText = stripSignature(intermediateText) + intermediateText = extractUnquotedText(intermediateText) + + // try to remove lines of dashes in the preview + intermediateText = intermediateText.replace("(?m)^----.*?$".toRegex(), "") + // Remove horizontal rules. + intermediateText = intermediateText.replace("\\s*([-=_]{30,}+)\\s*".toRegex(), " ") + + // URLs in the preview should just be shown as "..." - They're not + // clickable and they usually overwhelm the preview + intermediateText = intermediateText.replace("https?://\\S+".toRegex(), "...") + // Don't show newlines in the preview + intermediateText = intermediateText.replace('\n', ' ') + // Collapse whitespace in the preview + intermediateText = intermediateText.replace("\\s+".toRegex(), " ") + // Remove any whitespace at the beginning and end of the string. + intermediateText = intermediateText.trim() + + return if (intermediateText.length > MAX_PREVIEW_LENGTH) { + intermediateText.substring(0, MAX_PREVIEW_LENGTH - 1) + "…" + } else { + intermediateText + } + } + + private fun normalizeLineBreaks(text: String) = text.replace(REGEX_CRLF, "\n") + + private fun stripSignature(text: String): String { + return if (text.startsWith("-- \n")) { + "" + } else { + text.substringBefore("\n-- \n") + } + } + + private fun extractUnquotedText(text: String): String { + val emailSections = EmailSectionExtractor.extract(text) + if (emailSections.isEmpty()) { + return "" + } + + val firstEmailSection = emailSections.first() + val replySections = if (firstEmailSection.quoteDepth == 0) { + val replyEmailSections = emailSections.drop(1).filter { it.quoteDepth == 0 && it.isNotBlank() } + if (firstEmailSection.isQuoteHeaderOnly()) { + replyEmailSections + } else { + val firstSectionTextWithoutQuoteHeader = stripQuoteHeader(firstEmailSection) + listOf(firstSectionTextWithoutQuoteHeader) + replyEmailSections + } + } else { + emailSections.filter { it.quoteDepth == 0 && it.isNotBlank() } + } + + return replySections.joinToString(separator = " […] ") + } + + private fun stripQuoteHeader(emailSection: EmailSection): String { + val quoteHeaderIndex = emailSection.quoteHeaderIndex + if (quoteHeaderIndex == -1) return emailSection.toString() + return emailSection.substring(startIndex = 0, endIndex = quoteHeaderIndex) + } + + private fun EmailSection.isQuoteHeaderOnly(): Boolean { + return quoteHeaderIndex == 0 + } + + private val EmailSection.quoteHeaderIndex: Int + get() { + var quoteHeaderIndex = lastIndex + while (quoteHeaderIndex > 0 && this[quoteHeaderIndex] == '\n') { + quoteHeaderIndex-- + } + if (this[quoteHeaderIndex] != ':') return -1 + + var newlineCount = 0 + while (quoteHeaderIndex > 0) { + when { + this[quoteHeaderIndex] == '\n' -> newlineCount++ + newlineCount > 1 -> return quoteHeaderIndex + 1 + else -> newlineCount = 0 + } + quoteHeaderIndex-- + } + + return 0 + } + + companion object { + private const val MAX_PREVIEW_LENGTH = 512 + private const val MAX_CHARACTERS_CHECKED_FOR_PREVIEW = 8192L + + private val REGEX_CRLF = "(\\r\\n|\\r)".toRegex() + } +} diff --git a/app/core/src/test/java/com/fsck/k9/message/extractors/PreviewTextExtractorTest.java b/app/core/src/test/java/com/fsck/k9/message/extractors/PreviewTextExtractorTest.java deleted file mode 100644 index b9b491375..000000000 --- a/app/core/src/test/java/com/fsck/k9/message/extractors/PreviewTextExtractorTest.java +++ /dev/null @@ -1,155 +0,0 @@ -package com.fsck.k9.message.extractors; - - -import com.fsck.k9.RobolectricTest; -import com.fsck.k9.mail.Part; -import com.fsck.k9.mail.internet.MimeBodyPart; -import org.junit.Before; -import org.junit.Test; - -import static com.fsck.k9.message.MessageCreationHelper.createTextPart; -import static org.junit.Assert.assertEquals; - - -public class PreviewTextExtractorTest extends RobolectricTest { - private PreviewTextExtractor previewTextExtractor; - - - @Before - public void setUp() throws Exception { - previewTextExtractor = new PreviewTextExtractor(); - } - - @Test(expected = PreviewExtractionException.class) - public void extractPreview_withEmptyBody_shouldThrow() throws Exception { - Part part = new MimeBodyPart(null, "text/plain"); - - previewTextExtractor.extractPreview(part); - } - - @Test - public void extractPreview_withSimpleTextPlain() throws Exception { - String text = "The quick brown fox jumps over the lazy dog"; - Part part = createTextPart("text/plain", text); - - String preview = previewTextExtractor.extractPreview(part); - - assertEquals(text, preview); - } - - @Test - public void extractPreview_withSimpleTextHtml() throws Exception { - String text = "The quick brown fox jumps over the lazy dog"; - Part part = createTextPart("text/html", text); - - String preview = previewTextExtractor.extractPreview(part); - - assertEquals("The quick brown fox jumps over the lazy dog", preview); - } - - @Test - public void extractPreview_withLongTextPlain() throws Exception { - String text = "" + - "10--------20--------30--------40--------50--------" + - "60--------70--------80--------90--------100-------" + - "110-------120-------130-------140-------150-------" + - "160-------170-------180-------190-------200-------" + - "210-------220-------230-------240-------250-------" + - "260-------270-------280-------290-------300-------" + - "310-------320-------330-------340-------350-------" + - "360-------370-------380-------390-------400-------" + - "410-------420-------430-------440-------450-------" + - "460-------470-------480-------490-------500-------" + - "510-------520-------"; - Part part = createTextPart("text/plain", text); - - String preview = previewTextExtractor.extractPreview(part); - - assertEquals(text.substring(0, 511) + "…", preview); - } - - @Test - public void extractPreview_shouldStripSignature() throws Exception { - String text = "" + - "Some text\r\n" + - "-- \r\n" + - "Signature"; - Part part = createTextPart("text/plain", text); - - String preview = previewTextExtractor.extractPreview(part); - - assertEquals("Some text", preview); - } - - @Test - public void extractPreview_shouldStripHorizontalLine() throws Exception { - String text = "" + - "line 1\r\n" + - "----\r\n" + - "line 2"; - Part part = createTextPart("text/plain", text); - - String preview = previewTextExtractor.extractPreview(part); - - assertEquals("line 1 line 2", preview); - } - - @Test - public void extractPreview_shouldStripQuoteHeaderAndQuotedText() throws Exception { - String text = "" + - "some text\r\n" + - "On 01/02/03 someone wrote\r\n" + - "> some quoted text\r\n" + - "# some other quoted text\r\n"; - Part part = createTextPart("text/plain", text); - - String preview = previewTextExtractor.extractPreview(part); - - assertEquals("some text", preview); - } - - @Test - public void extractPreview_shouldStripGenericQuoteHeader() throws Exception { - String text = "" + - "Am 13.12.2015 um 23:42 schrieb Hans:\r\n" + - "> hallo\r\n" + - "hi there\r\n"; - Part part = createTextPart("text/plain", text); - - String preview = previewTextExtractor.extractPreview(part); - - assertEquals("hi there", preview); - } - - @Test - public void extractPreview_shouldStripHorizontalRules() throws Exception { - String text = "line 1" + - "------------------------------\r\n" + - "line 2"; - Part part = createTextPart("text/plain", text); - - String preview = previewTextExtractor.extractPreview(part); - - assertEquals("line 1 line 2", preview); - } - - @Test - public void extractPreview_shouldReplaceUrl() throws Exception { - String text = "some url: https://k9mail.org/"; - Part part = createTextPart("text/plain", text); - - String preview = previewTextExtractor.extractPreview(part); - - assertEquals("some url: ...", preview); - } - - @Test - public void extractPreview_shouldCollapseAndTrimWhitespace() throws Exception { - String text = " whitespace is\t\tfun "; - Part part = createTextPart("text/plain", text); - - String preview = previewTextExtractor.extractPreview(part); - - assertEquals("whitespace is fun", preview); - } -} diff --git a/app/core/src/test/java/com/fsck/k9/message/extractors/PreviewTextExtractorTest.kt b/app/core/src/test/java/com/fsck/k9/message/extractors/PreviewTextExtractorTest.kt new file mode 100644 index 000000000..e4cb35bd7 --- /dev/null +++ b/app/core/src/test/java/com/fsck/k9/message/extractors/PreviewTextExtractorTest.kt @@ -0,0 +1,200 @@ +package com.fsck.k9.message.extractors + +import com.fsck.k9.mail.internet.MimeBodyPart +import com.fsck.k9.message.MessageCreationHelper +import com.google.common.truth.Truth.assertThat +import org.junit.Test + +class PreviewTextExtractorTest { + private val previewTextExtractor = PreviewTextExtractor() + + @Test(expected = PreviewExtractionException::class) + fun extractPreview_withEmptyBody_shouldThrow() { + val part = MimeBodyPart(null, "text/plain") + + previewTextExtractor.extractPreview(part) + } + + @Test + fun extractPreview_withSimpleTextPlain() { + val text = "The quick brown fox jumps over the lazy dog" + val part = MessageCreationHelper.createTextPart("text/plain", text) + + val preview = previewTextExtractor.extractPreview(part) + + assertThat(preview).isEqualTo(text) + } + + @Test + fun extractPreview_withSimpleTextHtml() { + val text = "The quick brown fox jumps over the lazy dog" + val part = MessageCreationHelper.createTextPart("text/html", text) + + val preview = previewTextExtractor.extractPreview(part) + + assertThat(preview).isEqualTo("The quick brown fox jumps over the lazy dog") + } + + @Test + fun extractPreview_withLongTextPlain() { + val text = "" + + "10--------20--------30--------40--------50--------" + + "60--------70--------80--------90--------100-------" + + "110-------120-------130-------140-------150-------" + + "160-------170-------180-------190-------200-------" + + "210-------220-------230-------240-------250-------" + + "260-------270-------280-------290-------300-------" + + "310-------320-------330-------340-------350-------" + + "360-------370-------380-------390-------400-------" + + "410-------420-------430-------440-------450-------" + + "460-------470-------480-------490-------500-------" + + "510-------520-------" + val part = MessageCreationHelper.createTextPart("text/plain", text) + + val preview = previewTextExtractor.extractPreview(part) + + assertThat(preview).isEqualTo(text.substring(0, 511) + "…") + } + + @Test + fun extractPreview_shouldStripSignature() { + val text = """ + Some text + -- + Signature + """.trimIndent() + val part = MessageCreationHelper.createTextPart("text/plain", text) + + val preview = previewTextExtractor.extractPreview(part) + + assertThat(preview).isEqualTo("Some text") + } + + @Test + fun extractPreview_shouldStripHorizontalLine() { + val text = """ + line 1 + ---- + line 2 + """.trimIndent() + val part = MessageCreationHelper.createTextPart("text/plain", text) + + val preview = previewTextExtractor.extractPreview(part) + + assertThat(preview).isEqualTo("line 1 line 2") + } + + @Test + fun extractPreview_shouldStripQuoteHeaderAndQuotedText() { + val text = """ + some text + + On 01/02/03 someone wrote: + > some quoted text + > some other quoted text + """.trimIndent() + val part = MessageCreationHelper.createTextPart("text/plain", text) + + val preview = previewTextExtractor.extractPreview(part) + + assertThat(preview).isEqualTo("some text") + } + + @Test + fun extractPreview_shouldStripGenericQuoteHeader() { + val text = """ + Am 13.12.2015 um 23:42 schrieb Hans: + > hallo + hi there + + """.trimIndent() + val part = MessageCreationHelper.createTextPart("text/plain", text) + + val preview = previewTextExtractor.extractPreview(part) + + assertThat(preview).isEqualTo("hi there") + } + + @Test + fun extractPreview_shouldStripHorizontalRules() { + val text = """ + line 1------------------------------ + line 2 + """.trimIndent() + val part = MessageCreationHelper.createTextPart("text/plain", text) + + val preview = previewTextExtractor.extractPreview(part) + + assertThat(preview).isEqualTo("line 1 line 2") + } + + @Test + fun extractPreview_shouldReplaceUrl() { + val text = "some url: https://k9mail.org/" + val part = MessageCreationHelper.createTextPart("text/plain", text) + + val preview = previewTextExtractor.extractPreview(part) + + assertThat(preview).isEqualTo("some url: ...") + } + + @Test + fun extractPreview_shouldCollapseAndTrimWhitespace() { + val text = " whitespace is\t\tfun " + val part = MessageCreationHelper.createTextPart("text/plain", text) + + val preview = previewTextExtractor.extractPreview(part) + + assertThat(preview).isEqualTo("whitespace is fun") + } + + @Test + fun extractPreview_lineEndingWithColon() { + val text = """ + Here's a list: + - item 1 + - item 2 + """.trimIndent() + val part = MessageCreationHelper.createTextPart("text/plain", text) + + val preview = previewTextExtractor.extractPreview(part) + + assertThat(preview).isEqualTo("Here's a list: - item 1 - item 2") + } + + @Test + fun extractPreview_inlineReplies() { + val text = """ + On 2020-09-30 at 03:12 Bob wrote: + > Hi Alice + Hi Bob + + > How are you? + I'm fine. Thanks for asking. + + > Bye + See you tomorrow + """.trimIndent() + val part = MessageCreationHelper.createTextPart("text/plain", text) + + val preview = previewTextExtractor.extractPreview(part) + + assertThat(preview).isEqualTo("Hi Bob […] I'm fine. Thanks for asking. […] See you tomorrow") + } + + @Test + fun extractPreview_quoteHeaderContainingLineBreak() { + val text = """ + Reply text + + On 2020-09-30 at 03:12 + Bob wrote: + > Quoted text + """.trimIndent() + val part = MessageCreationHelper.createTextPart("text/plain", text) + + val preview = previewTextExtractor.extractPreview(part) + + assertThat(preview).isEqualTo("Reply text") + } +}