Merge pull request #5719 from k9mail/fix_html_to_text

Don't limit line length when converting from HTML to plain text
This commit is contained in:
cketti 2021-10-05 17:18:35 +02:00 committed by GitHub
commit e31c973fd3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 21 additions and 31 deletions

View file

@ -22,7 +22,6 @@ object HtmlToPlainText {
}
private class FormattingVisitor : NodeVisitor {
private var width = 0
private val output = StringBuilder()
private var collectLinkText = false
private var linkText = StringBuilder()
@ -73,36 +72,11 @@ private class FormattingVisitor : NodeVisitor {
}
private fun append(text: String) {
if (text.startsWith("\n")) {
width = 0
}
if (text == " " && (output.isEmpty() || output.last() in listOf(' ', '\n'))) {
return
}
if (text.length + width > MAX_WIDTH) {
val words = text.split(Regex("\\s+"))
for (i in words.indices) {
var word = words[i]
val last = i == words.size - 1
if (!last) {
word = "$word "
}
if (word.length + width > MAX_WIDTH) {
output.append("\n").append(word)
width = word.length
} else {
output.append(word)
width += word.length
}
}
} else {
output.append(text)
width += text.length
}
output.append(text)
}
private fun startNewLine() {
@ -134,8 +108,4 @@ private class FormattingVisitor : NodeVisitor {
return output.substring(0, lastIndex + 1)
}
companion object {
private const val MAX_WIDTH = 76
}
}

View file

@ -306,4 +306,24 @@ public class HtmlConverterTest {
assertEquals("https://domain.example/path/", result);
}
@Test
public void htmlToText_withLineBreaksInHtml() {
String input = "One\nTwo\r\nThree";
String result = HtmlConverter.htmlToText(input);
assertEquals("One Two Three", result);
}
@Test
public void htmlToText_withLongTextLine_shouldNotAddLineBreaksToOutput() {
String input = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam sit amet finibus felis, " +
"viverra ullamcorper justo. Suspendisse potenti. Etiam erat sem, interdum a condimentum quis, " +
"fringilla quis orci.";
String result = HtmlConverter.htmlToText(input);
assertEquals(input, result);
}
}