Add support for signature delimiters using non-breaking space in HTML parts

This commit is contained in:
cketti 2023-04-08 15:27:33 +02:00
parent 549d31ac62
commit e621d3f693
2 changed files with 19 additions and 1 deletions

View file

@ -130,7 +130,7 @@ class HtmlSignatureRemover {
}
companion object {
private val DASH_SIGNATURE_HTML = Pattern.compile("\\s*-- \\s*", Pattern.CASE_INSENSITIVE)
private val DASH_SIGNATURE_HTML = Pattern.compile("\\s*--[ \u00A0]\\s*")
private val BLOCKQUOTE = Tag.valueOf("blockquote")
private val BR = Tag.valueOf("br")

View file

@ -182,4 +182,22 @@ class HtmlSignatureRemoverTest {
""".trimIndent().removeNewlines(),
)
}
@Test
fun `signature delimiter with non-breaking space character entity`() {
val html = "Body text<br>--&nbsp;<br>Signature text"
val withoutSignature = stripSignature(html)
assertThat(extractText(withoutSignature)).isEqualTo("Body text")
}
@Test
fun `signature delimiter with non-breaking space`() {
val html = "Body text<br>--\u00A0<br>Signature text"
val withoutSignature = stripSignature(html)
assertThat(extractText(withoutSignature)).isEqualTo("Body text")
}
}