Merge pull request #2518 from k9mail/GH-2148_move_to_jsoup

Use jsoup to parse and clean HTML
This commit is contained in:
cketti 2017-05-25 00:21:12 +02:00 committed by GitHub
commit c816276a3f
15 changed files with 577 additions and 222 deletions

View file

@ -27,7 +27,7 @@ dependencies {
compile "com.squareup.okio:okio:${okioVersion}" compile "com.squareup.okio:okio:${okioVersion}"
compile 'commons-io:commons-io:2.4' compile 'commons-io:commons-io:2.4'
compile "com.android.support:support-v4:${androidSupportLibraryVersion}" compile "com.android.support:support-v4:${androidSupportLibraryVersion}"
compile 'net.sourceforge.htmlcleaner:htmlcleaner:2.18' compile 'org.jsoup:jsoup:1.10.2'
compile 'de.cketti.library.changelog:ckchangelog:1.2.1' compile 'de.cketti.library.changelog:ckchangelog:1.2.1'
compile 'com.github.bumptech.glide:glide:3.6.1' compile 'com.github.bumptech.glide:glide:3.6.1'
compile 'com.splitwise:tokenautocomplete:2.0.7' compile 'com.splitwise:tokenautocomplete:2.0.7'
@ -41,7 +41,7 @@ dependencies {
testCompile "org.robolectric:robolectric:${robolectricVersion}" testCompile "org.robolectric:robolectric:${robolectricVersion}"
testCompile "junit:junit:${junitVersion}" testCompile "junit:junit:${junitVersion}"
testCompile "org.mockito:mockito-core:${mockitoVersion}" testCompile "org.mockito:mockito-core:${mockitoVersion}"
testCompile 'org.jsoup:jsoup:1.10.2' testCompile "org.jdom:jdom2:2.0.6"
} }
android { android {
@ -96,6 +96,7 @@ android {
exclude 'META-INF/LICENSE.txt' exclude 'META-INF/LICENSE.txt'
exclude 'META-INF/NOTICE' exclude 'META-INF/NOTICE'
exclude 'META-INF/NOTICE.txt' exclude 'META-INF/NOTICE.txt'
exclude 'META-INF/README'
exclude 'LICENSE.txt' exclude 'LICENSE.txt'
} }

View file

@ -1273,14 +1273,20 @@ public class Accounts extends K9ListActivity implements OnItemClickListener {
} }
private static String[][] USED_LIBRARIES = new String[][] { private static String[][] USED_LIBRARIES = new String[][] {
new String[] {"Android Support Library", "https://developer.android.com/topic/libraries/support-library/index.html"},
new String[] {"ckChangeLog", "https://github.com/cketti/ckChangeLog"},
new String[] {"Commons IO", "http://commons.apache.org/io/"},
new String[] {"Glide", "https://github.com/bumptech/glide"},
new String[] {"HoloColorPicker", "https://github.com/LarsWerkman/HoloColorPicker"},
new String[] {"jsoup", "https://jsoup.org/"},
new String[] {"jutf7", "http://jutf7.sourceforge.net/"}, new String[] {"jutf7", "http://jutf7.sourceforge.net/"},
new String[] {"JZlib", "http://www.jcraft.com/jzlib/"}, new String[] {"JZlib", "http://www.jcraft.com/jzlib/"},
new String[] {"Commons IO", "http://commons.apache.org/io/"},
new String[] {"Mime4j", "http://james.apache.org/mime4j/"}, new String[] {"Mime4j", "http://james.apache.org/mime4j/"},
new String[] {"HtmlCleaner", "http://htmlcleaner.sourceforge.net/"}, new String[] {"Moshi", "https://github.com/square/moshi"},
new String[] {"ckChangeLog", "https://github.com/cketti/ckChangeLog"}, new String[] {"Okio", "https://github.com/square/okio"},
new String[] {"HoloColorPicker", "https://github.com/LarsWerkman/HoloColorPicker"}, new String[] {"SafeContentResolver", "https://github.com/cketti/SafeContentResolver"},
new String[] {"Glide", "https://github.com/bumptech/glide"}, new String[] {"ShowcaseView", "https://github.com/amlcurran/ShowcaseView"},
new String[] {"Timber", "https://github.com/JakeWharton/timber"},
new String[] {"TokenAutoComplete", "https://github.com/splitwise/TokenAutoComplete/"}, new String[] {"TokenAutoComplete", "https://github.com/splitwise/TokenAutoComplete/"},
}; };
@ -1326,8 +1332,7 @@ public class Accounts extends K9ListActivity implements OnItemClickListener {
"<div>TypePad \u7d75\u6587\u5b57\u30a2\u30a4\u30b3\u30f3\u753b\u50cf " + "<div>TypePad \u7d75\u6587\u5b57\u30a2\u30a4\u30b3\u30f3\u753b\u50cf " +
"(<a href=\"http://typepad.jp/\">Six Apart Ltd</a>) / " + "(<a href=\"http://typepad.jp/\">Six Apart Ltd</a>) / " +
"<a href=\"http://creativecommons.org/licenses/by/2.1/jp/\">CC BY 2.1</a></div>")) "<a href=\"http://creativecommons.org/licenses/by/2.1/jp/\">CC BY 2.1</a></div>"))
.append("</p><hr/><p>") .append("</p>");
.append(getString(R.string.app_htmlcleaner_license));
wv.loadDataWithBaseURL("file:///android_res/drawable/", html.toString(), "text/html", "utf-8", null); wv.loadDataWithBaseURL("file:///android_res/drawable/", html.toString(), "text/html", "utf-8", null);

View file

@ -0,0 +1,139 @@
/*
* The MIT License
*
* © 2009-2017, Jonathan Hedley <jonathan@hedley.net>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package com.fsck.k9.helper.jsoup;
import com.fsck.k9.helper.jsoup.NodeFilter.HeadFilterDecision;
import com.fsck.k9.helper.jsoup.NodeFilter.TailFilterDecision;
import org.jsoup.nodes.Node;
import org.jsoup.select.NodeTraversor;
/**
* Depth-first node traversor.
* <p>
* Based on {@link NodeTraversor}, but supports skipping sub trees, removing nodes, and stopping the traversal at any
* point.
* </p><p>
* This is an enhancement of the <a href="https://github.com/jhy/jsoup/pull/849">jsoup pull request 'Improved node
* traversal'</a> by <a href="https://github.com/kno10">Erich Schubert</a>.
* </p>
*/
public class AdvancedNodeTraversor {
/**
* Filter result.
*/
public enum FilterResult {
/**
* Processing the tree was completed.
*/
ENDED,
/**
* Processing was stopped.
*/
STOPPED,
/**
* Processing the tree was completed and the root node was removed.
*/
ROOT_REMOVED
}
private NodeFilter filter;
/**
* Create a new traversor.
*
* @param filter
* a class implementing the {@link NodeFilter} interface, to be called when visiting each node.
*/
public AdvancedNodeTraversor(NodeFilter filter) {
this.filter = filter;
}
/**
* Start a depth-first filtering of the root and all of its descendants.
*
* @param root
* the root node point to traverse.
*
* @return The result of the filter operation.
*/
public FilterResult filter(Node root) {
Node node = root;
int depth = 0;
while (node != null) {
HeadFilterDecision headResult = filter.head(node, depth);
if (headResult == HeadFilterDecision.STOP) {
return FilterResult.STOPPED;
}
if (headResult == HeadFilterDecision.CONTINUE && node.childNodeSize() > 0) {
node = node.childNode(0);
++depth;
continue;
}
TailFilterDecision tailResult = TailFilterDecision.CONTINUE;
while (node.nextSibling() == null && depth > 0) {
if (headResult == HeadFilterDecision.CONTINUE || headResult == HeadFilterDecision.SKIP_CHILDREN) {
tailResult = filter.tail(node, depth);
if (tailResult == TailFilterDecision.STOP) {
return FilterResult.STOPPED;
}
}
Node prev = node;
node = node.parentNode();
depth--;
if (headResult == HeadFilterDecision.REMOVE || tailResult == TailFilterDecision.REMOVE) {
prev.remove();
}
headResult = HeadFilterDecision.CONTINUE;
}
if (headResult == HeadFilterDecision.CONTINUE || headResult == HeadFilterDecision.SKIP_CHILDREN) {
tailResult = filter.tail(node, depth);
if (tailResult == TailFilterDecision.STOP) {
return FilterResult.STOPPED;
}
}
Node prev = node;
node = node.nextSibling();
if (headResult == HeadFilterDecision.REMOVE) {
prev.remove();
}
if (prev == root) {
return headResult == HeadFilterDecision.REMOVE ? FilterResult.ROOT_REMOVED : FilterResult.ENDED;
}
}
return FilterResult.ENDED;
}
}

View file

@ -0,0 +1,111 @@
package com.fsck.k9.helper.jsoup;
import android.support.annotation.NonNull;
import org.jsoup.nodes.Node;
/**
* Node filter interface. Provide an implementing class to {@link AdvancedNodeTraversor} to iterate through
* nodes.
* <p>
* This interface provides two methods, {@code head} and {@code tail}. The head method is called when the node is first
* seen, and the tail method when all of the node's children have been visited. As an example, head can be used to
* create a start tag for a node, and tail to create the end tag.
* </p>
* <p>
* For every node, the filter has to decide in {@link NodeFilter#head(Node, int)}) whether to
* <ul>
* <li>continue ({@link HeadFilterDecision#CONTINUE}),</li>
* <li>skip all children ({@link HeadFilterDecision#SKIP_CHILDREN}),</li>
* <li>skip node entirely ({@link HeadFilterDecision#SKIP_ENTIRELY}),</li>
* <li>remove the subtree ({@link HeadFilterDecision#REMOVE}),</li>
* <li>interrupt the iteration and return ({@link HeadFilterDecision#STOP}).</li>
* </ul>
* <p>
* The difference between {@link HeadFilterDecision#SKIP_CHILDREN} and {@link HeadFilterDecision#SKIP_ENTIRELY} is that
* the first will invoke {@link NodeFilter#tail(Node, int)} on the node, while the latter will not.
* </p>
* <p>
* When {@link NodeFilter#tail(Node, int)} is called the filter has to decide whether to
* <ul>
* <li>continue ({@link TailFilterDecision#CONTINUE}),</li>
* <li>remove the subtree ({@link TailFilterDecision#REMOVE}),</li>
* <li>interrupt the iteration and return ({@link TailFilterDecision#STOP}).</li>
* </ul>
* </p>
*/
public interface NodeFilter {
/**
* Filter decision for {@link NodeFilter#head(Node, int)}.
*/
enum HeadFilterDecision {
/**
* Continue processing the tree.
*/
CONTINUE,
/**
* Skip the child nodes, but do call {@link NodeFilter#tail(Node, int)} next.
*/
SKIP_CHILDREN,
/**
* Skip the subtree, and do not call {@link NodeFilter#tail(Node, int)}.
*/
SKIP_ENTIRELY,
/**
* Remove the node and its children, and do not call {@link NodeFilter#tail(Node, int)}.
*/
REMOVE,
/**
* Stop processing.
*/
STOP
}
/**
* Filter decision for {@link NodeFilter#tail(Node, int)}.
*/
enum TailFilterDecision {
/**
* Continue processing the tree.
*/
CONTINUE,
/**
* Remove the node and its children.
*/
REMOVE,
/**
* Stop processing.
*/
STOP
}
/**
* Callback for when a node is first visited.
*
* @param node
* the node being visited.
* @param depth
* the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node
* of that will have depth 1.
*
* @return Filter decision
*/
@NonNull
HeadFilterDecision head(Node node, int depth);
/**
* Callback for when a node is last visited, after all of its descendants have been visited.
*
* @param node
* the node being visited.
* @param depth
* the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node
* of that will have depth 1.
*
* @return Filter decision
*/
@NonNull
TailFilterDecision tail(Node node, int depth);
}

View file

@ -10,14 +10,9 @@ import android.content.Context;
import android.support.annotation.Nullable; import android.support.annotation.Nullable;
import android.support.annotation.VisibleForTesting; import android.support.annotation.VisibleForTesting;
import android.support.annotation.WorkerThread; import android.support.annotation.WorkerThread;
import timber.log.Timber;
import com.fsck.k9.Globals; import com.fsck.k9.Globals;
import com.fsck.k9.K9;
import com.fsck.k9.R; import com.fsck.k9.R;
import com.fsck.k9.mail.internet.MimeUtility;
import com.fsck.k9.message.html.HtmlConverter;
import com.fsck.k9.message.html.HtmlSanitizer;
import com.fsck.k9.mail.Address; import com.fsck.k9.mail.Address;
import com.fsck.k9.mail.Flag; import com.fsck.k9.mail.Flag;
import com.fsck.k9.mail.Message; import com.fsck.k9.mail.Message;
@ -28,9 +23,12 @@ import com.fsck.k9.mail.internet.Viewable;
import com.fsck.k9.mail.internet.Viewable.Flowed; import com.fsck.k9.mail.internet.Viewable.Flowed;
import com.fsck.k9.mailstore.util.FlowedMessageUtils; import com.fsck.k9.mailstore.util.FlowedMessageUtils;
import com.fsck.k9.message.extractors.AttachmentInfoExtractor; import com.fsck.k9.message.extractors.AttachmentInfoExtractor;
import com.fsck.k9.message.html.HtmlConverter;
import com.fsck.k9.message.html.HtmlProcessor;
import com.fsck.k9.ui.crypto.MessageCryptoAnnotations; import com.fsck.k9.ui.crypto.MessageCryptoAnnotations;
import com.fsck.k9.ui.crypto.MessageCryptoSplitter; import com.fsck.k9.ui.crypto.MessageCryptoSplitter;
import com.fsck.k9.ui.crypto.MessageCryptoSplitter.CryptoMessageParts; import com.fsck.k9.ui.crypto.MessageCryptoSplitter.CryptoMessageParts;
import timber.log.Timber;
import static com.fsck.k9.mail.internet.MimeUtility.getHeaderParameter; import static com.fsck.k9.mail.internet.MimeUtility.getHeaderParameter;
import static com.fsck.k9.mail.internet.Viewable.Alternative; import static com.fsck.k9.mail.internet.Viewable.Alternative;
@ -51,22 +49,22 @@ public class MessageViewInfoExtractor {
private final Context context; private final Context context;
private final AttachmentInfoExtractor attachmentInfoExtractor; private final AttachmentInfoExtractor attachmentInfoExtractor;
private final HtmlSanitizer htmlSanitizer; private final HtmlProcessor htmlProcessor;
public static MessageViewInfoExtractor getInstance() { public static MessageViewInfoExtractor getInstance() {
Context context = Globals.getContext(); Context context = Globals.getContext();
AttachmentInfoExtractor attachmentInfoExtractor = AttachmentInfoExtractor.getInstance(); AttachmentInfoExtractor attachmentInfoExtractor = AttachmentInfoExtractor.getInstance();
HtmlSanitizer htmlSanitizer = HtmlSanitizer.getInstance(); HtmlProcessor htmlProcessor = HtmlProcessor.newInstance();
return new MessageViewInfoExtractor(context, attachmentInfoExtractor, htmlSanitizer); return new MessageViewInfoExtractor(context, attachmentInfoExtractor, htmlProcessor);
} }
@VisibleForTesting @VisibleForTesting
MessageViewInfoExtractor(Context context, AttachmentInfoExtractor attachmentInfoExtractor, MessageViewInfoExtractor(Context context, AttachmentInfoExtractor attachmentInfoExtractor,
HtmlSanitizer htmlSanitizer) { HtmlProcessor htmlProcessor) {
this.context = context; this.context = context;
this.attachmentInfoExtractor = attachmentInfoExtractor; this.attachmentInfoExtractor = attachmentInfoExtractor;
this.htmlSanitizer = htmlSanitizer; this.htmlProcessor = htmlProcessor;
} }
@WorkerThread @WorkerThread
@ -199,8 +197,7 @@ public class MessageViewInfoExtractor {
} }
} }
String content = HtmlConverter.wrapMessageContent(html); String sanitizedHtml = htmlProcessor.processForDisplay(html.toString());
String sanitizedHtml = htmlSanitizer.sanitize(content);
return new ViewableExtractedText(text.toString(), sanitizedHtml); return new ViewableExtractedText(text.toString(), sanitizedHtml);
} catch (Exception e) { } catch (Exception e) {

View file

@ -0,0 +1,100 @@
package com.fsck.k9.message.html;
import java.util.List;
import java.util.Locale;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.parser.Tag;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;
import static java.util.Arrays.asList;
class HeadCleaner {
private static final List<String> ALLOWED_TAGS = asList("style", "meta");
public void clean(Document dirtyDocument, Document cleanedDocument) {
copySafeNodes(dirtyDocument.head(), cleanedDocument.head());
}
private void copySafeNodes(Element source, Element destination) {
CleaningVisitor cleaningVisitor = new CleaningVisitor(source, destination);
NodeTraversor traversor = new NodeTraversor(cleaningVisitor);
traversor.traverse(source);
}
static class CleaningVisitor implements NodeVisitor {
private final Element root;
private Element destination;
private boolean skipChildren = false;
CleaningVisitor(Element root, Element destination) {
this.root = root;
this.destination = destination;
}
public void head(Node source, int depth) {
if (skipChildren) {
return;
}
if (source instanceof Element) {
Element sourceElement = (Element) source;
if (isSafeTag(sourceElement)) {
String sourceTag = sourceElement.tagName();
Attributes destinationAttributes = sourceElement.attributes().clone();
Element destinationChild = new Element(Tag.valueOf(sourceTag), sourceElement.baseUri(), destinationAttributes);
destination.appendChild(destinationChild);
destination = destinationChild;
} else if (source != root) {
skipChildren = true;
}
} else if (source instanceof TextNode) {
TextNode sourceText = (TextNode) source;
TextNode destinationText = new TextNode(sourceText.getWholeText(), source.baseUri());
destination.appendChild(destinationText);
} else if (source instanceof DataNode && isSafeTag(source.parent())) {
DataNode sourceData = (DataNode) source;
DataNode destinationData = new DataNode(sourceData.getWholeData(), source.baseUri());
destination.appendChild(destinationData);
}
}
public void tail(Node source, int depth) {
if (source == destination) {
destination = destination.parent();
skipChildren = false;
}
}
private boolean isSafeTag(Node node) {
if (isMetaRefresh(node)) {
return false;
}
String tag = node.nodeName().toLowerCase(Locale.ROOT);
return ALLOWED_TAGS.contains(tag);
}
private boolean isMetaRefresh(Node node) {
if (!"meta".equalsIgnoreCase(node.nodeName())) {
return false;
}
String attributeValue = node.attributes().getIgnoreCase("http-equiv");
return "refresh".equalsIgnoreCase(attributeValue.trim());
}
}
}

View file

@ -1260,7 +1260,7 @@ public class HtmlConverter {
"</body></html>"; "</body></html>";
} }
private static String cssStyleTheme() { static String cssStyleTheme() {
if (K9.getK9MessageViewTheme() == K9.Theme.DARK) { if (K9.getK9MessageViewTheme() == K9.Theme.DARK) {
return "<style type=\"text/css\">" + return "<style type=\"text/css\">" +
"* { background: black ! important; color: #F3F3F3 !important }" + "* { background: black ! important; color: #F3F3F3 !important }" +
@ -1283,7 +1283,7 @@ public class HtmlConverter {
* A {@code <style>} element that can be dynamically included in the HTML * A {@code <style>} element that can be dynamically included in the HTML
* {@code <head>} element when messages are displayed. * {@code <head>} element when messages are displayed.
*/ */
private static String cssStylePre() { static String cssStylePre() {
final String font = K9.messageViewFixedWidthFont() final String font = K9.messageViewFixedWidthFont()
? "monospace" ? "monospace"
: "sans-serif"; : "sans-serif";

View file

@ -0,0 +1,40 @@
package com.fsck.k9.message.html;
import org.jsoup.nodes.Document;
public class HtmlProcessor {
private final HtmlSanitizer htmlSanitizer;
public static HtmlProcessor newInstance() {
HtmlSanitizer htmlSanitizer = new HtmlSanitizer();
return new HtmlProcessor(htmlSanitizer);
}
private HtmlProcessor(HtmlSanitizer htmlSanitizer) {
this.htmlSanitizer = htmlSanitizer;
}
public String processForDisplay(String html) {
Document document = htmlSanitizer.sanitize(html);
addCustomHeadContents(document);
return toCompactString(document);
}
private void addCustomHeadContents(Document document) {
document.head().append("<meta name=\"viewport\" content=\"width=device-width\"/>" +
HtmlConverter.cssStyleTheme() +
HtmlConverter.cssStylePre());
}
public static String toCompactString(Document document) {
document.outputSettings()
.prettyPrint(false)
.indentAmount(0);
return document.html();
}
}

View file

@ -1,63 +1,31 @@
package com.fsck.k9.message.html; package com.fsck.k9.message.html;
import android.support.annotation.VisibleForTesting; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.htmlcleaner.CleanerProperties; import org.jsoup.safety.Cleaner;
import org.htmlcleaner.HtmlCleaner; import org.jsoup.safety.Whitelist;
import org.htmlcleaner.HtmlSerializer;
import org.htmlcleaner.SimpleHtmlSerializer;
import org.htmlcleaner.TagNode;
public class HtmlSanitizer { public class HtmlSanitizer {
private static final HtmlCleaner HTML_CLEANER; private final HeadCleaner headCleaner;
private static final HtmlSerializer HTML_SERIALIZER; private final Cleaner cleaner;
static { HtmlSanitizer() {
CleanerProperties properties = createCleanerProperties(); Whitelist whitelist = Whitelist.relaxed()
HTML_CLEANER = new HtmlCleaner(properties); .addTags("font")
HTML_SERIALIZER = new SimpleHtmlSerializer(properties); .addAttributes("table", "align", "bgcolor", "border", "cellpadding", "cellspacing", "width")
.addAttributes(":all", "class", "style", "id")
.addProtocols("img", "src", "http", "https", "cid", "data");
cleaner = new Cleaner(whitelist);
headCleaner = new HeadCleaner();
} }
public Document sanitize(String html) {
public static HtmlSanitizer getInstance() { Document dirtyDocument = Jsoup.parse(html);
return new HtmlSanitizer(); Document cleanedDocument = cleaner.clean(dirtyDocument);
} headCleaner.clean(dirtyDocument, cleanedDocument);
return cleanedDocument;
@VisibleForTesting
HtmlSanitizer() {}
public String sanitize(String html) {
TagNode rootNode = HTML_CLEANER.clean(html);
removeMetaRefresh(rootNode);
return HTML_SERIALIZER.getAsString(rootNode, "UTF8");
}
private static CleanerProperties createCleanerProperties() {
CleanerProperties properties = new CleanerProperties();
// See http://htmlcleaner.sourceforge.net/parameters.php for descriptions
properties.setNamespacesAware(false);
properties.setAdvancedXmlEscape(false);
properties.setOmitXmlDeclaration(true);
properties.setOmitDoctypeDeclaration(false);
properties.setTranslateSpecialEntities(false);
properties.setRecognizeUnicodeChars(false);
properties.setIgnoreQuestAndExclam(false);
return properties;
}
private void removeMetaRefresh(TagNode rootNode) {
for (TagNode element : rootNode.getElementListByName("meta", true)) {
String httpEquiv = element.getAttributeByName("http-equiv");
if (httpEquiv != null && httpEquiv.trim().equalsIgnoreCase("refresh")) {
element.removeFromTree();
}
}
} }
} }

View file

@ -1,90 +1,100 @@
package com.fsck.k9.message.signature; package com.fsck.k9.message.signature;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import timber.log.Timber; import android.support.annotation.NonNull;
import com.fsck.k9.K9; import com.fsck.k9.helper.jsoup.AdvancedNodeTraversor;
import org.htmlcleaner.CleanerProperties; import com.fsck.k9.helper.jsoup.NodeFilter;
import org.htmlcleaner.HtmlCleaner; import com.fsck.k9.message.html.HtmlProcessor;
import org.htmlcleaner.SimpleHtmlSerializer; import org.jsoup.Jsoup;
import org.htmlcleaner.TagNode; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.parser.Tag;
public class HtmlSignatureRemover { public class HtmlSignatureRemover {
private static final Pattern DASH_SIGNATURE_HTML = Pattern.compile("(<br( /)?>|\r?\n)-- <br( /)?>", Pattern.CASE_INSENSITIVE);
private static final Pattern BLOCKQUOTE_START = Pattern.compile("<blockquote", Pattern.CASE_INSENSITIVE);
private static final Pattern BLOCKQUOTE_END = Pattern.compile("</blockquote>", Pattern.CASE_INSENSITIVE);
public static String stripSignature(String content) { public static String stripSignature(String content) {
Matcher dashSignatureHtml = DASH_SIGNATURE_HTML.matcher(content); return new HtmlSignatureRemover().stripSignatureInternal(content);
if (dashSignatureHtml.find()) { }
Matcher blockquoteStart = BLOCKQUOTE_START.matcher(content);
Matcher blockquoteEnd = BLOCKQUOTE_END.matcher(content);
List<Integer> start = new ArrayList<>();
List<Integer> end = new ArrayList<>();
while (blockquoteStart.find()) { private String stripSignatureInternal(String content) {
start.add(blockquoteStart.start()); Document document = Jsoup.parse(content);
AdvancedNodeTraversor nodeTraversor = new AdvancedNodeTraversor(new StripSignatureFilter());
nodeTraversor.filter(document.body());
return HtmlProcessor.toCompactString(document);
} }
while (blockquoteEnd.find()) {
end.add(blockquoteEnd.start());
static class StripSignatureFilter implements NodeFilter {
private static final Pattern DASH_SIGNATURE_HTML = Pattern.compile("\\s*-- \\s*", Pattern.CASE_INSENSITIVE);
private static final Tag BLOCKQUOTE = Tag.valueOf("blockquote");
private static final Tag BR = Tag.valueOf("br");
private static final Tag P = Tag.valueOf("p");
private boolean signatureFound = false;
private boolean lastElementCausedLineBreak = false;
private Element brElementPrecedingDashes;
@NonNull
@Override
public HeadFilterDecision head(Node node, int depth) {
if (signatureFound) {
return HeadFilterDecision.REMOVE;
} }
if (start.size() != end.size()) {
Timber.d("There are %d <blockquote> tags, but %d </blockquote> tags. Refusing to strip.", if (node instanceof Element) {
start.size(), end.size()); lastElementCausedLineBreak = false;
} else if (start.size() > 0) {
// Ignore quoted signatures in blockquotes. Element element = (Element) node;
dashSignatureHtml.region(0, start.get(0)); if (element.tag().equals(BLOCKQUOTE)) {
if (dashSignatureHtml.find()) { return HeadFilterDecision.SKIP_ENTIRELY;
// before first <blockquote>.
content = content.substring(0, dashSignatureHtml.start());
} else {
for (int i = 0; i < start.size() - 1; i++) {
// within blockquotes.
if (end.get(i) < start.get(i + 1)) {
dashSignatureHtml.region(end.get(i), start.get(i + 1));
if (dashSignatureHtml.find()) {
content = content.substring(0, dashSignatureHtml.start());
break;
} }
} else if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
if (lastElementCausedLineBreak && DASH_SIGNATURE_HTML.matcher(textNode.getWholeText()).matches()) {
Node nextNode = node.nextSibling();
if (nextNode instanceof Element && ((Element) nextNode).tag().equals(BR)) {
signatureFound = true;
if (brElementPrecedingDashes != null) {
brElementPrecedingDashes.remove();
brElementPrecedingDashes = null;
} }
return HeadFilterDecision.REMOVE;
} }
if (end.get(end.size() - 1) < content.length()) {
// after last </blockquote>.
dashSignatureHtml.region(end.get(end.size() - 1), content.length());
if (dashSignatureHtml.find()) {
content = content.substring(0, dashSignatureHtml.start());
}
}
}
} else {
// No blockquotes found.
content = content.substring(0, dashSignatureHtml.start());
} }
} }
// Fix the stripping off of closing tags if a signature was stripped, return HeadFilterDecision.CONTINUE;
// as well as clean up the HTML of the quoted message. }
HtmlCleaner cleaner = new HtmlCleaner();
CleanerProperties properties = cleaner.getProperties();
// see http://htmlcleaner.sourceforge.net/parameters.php for descriptions @NonNull
properties.setNamespacesAware(false); @Override
properties.setAdvancedXmlEscape(false); public TailFilterDecision tail(Node node, int depth) {
properties.setOmitXmlDeclaration(true); if (signatureFound) {
properties.setOmitDoctypeDeclaration(false); return TailFilterDecision.CONTINUE;
properties.setTranslateSpecialEntities(false); }
properties.setRecognizeUnicodeChars(false);
TagNode node = cleaner.clean(content); if (node instanceof Element) {
SimpleHtmlSerializer htmlSerialized = new SimpleHtmlSerializer(properties); Element element = (Element) node;
content = htmlSerialized.getAsString(node, "UTF8"); boolean elementIsBr = element.tag().equals(BR);
return content; if (elementIsBr || element.tag().equals(P)) {
lastElementCausedLineBreak = true;
brElementPrecedingDashes = elementIsBr ? element : null;
return TailFilterDecision.CONTINUE;
}
}
lastElementCausedLineBreak = false;
return TailFilterDecision.CONTINUE;
}
} }
} }

View file

@ -3,7 +3,4 @@
<string name="app_revision_url">https://github.com/k9mail/k-9/wiki/ReleaseNotes</string> <string name="app_revision_url">https://github.com/k9mail/k-9/wiki/ReleaseNotes</string>
<string name="app_webpage_url">https://k9mail.github.io/</string> <string name="app_webpage_url">https://k9mail.github.io/</string>
<string name="message_header_mua">K-9 Mail for Android</string> <string name="message_header_mua">K-9 Mail for Android</string>
<!-- Licenses - translating legal stuff should be left to experts -->
<string name="app_htmlcleaner_license">&lt;p>HtmlCleaner is distributed under BSD License. It gives the freedom for anyone to use, explore, modify, and distribute HtmlCleaner, but without any warranty.&lt;/p>&lt;p>Copyright © 20062011, HtmlCleaner team.&lt;br>All rights reserved.&lt;/p>&lt;p>Redistribution and use of this software in source and binary forms, with or without modification, are permitted provided that the following conditions are met:&lt;/p>&lt;p>&lt;ul>&lt;li>Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.&lt;/li>&lt;li>Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.&lt;/li>&lt;li>The name of HtmlCleaner may not be used to endorse or promote products derived from this software without specific prior written permission.&lt;/li>&lt;/ul>&lt;/p>&lt;p>THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.&lt;/p></string>
</resources> </resources>

View file

@ -12,8 +12,6 @@ import android.app.Application;
import com.fsck.k9.GlobalsHelper; import com.fsck.k9.GlobalsHelper;
import com.fsck.k9.K9RobolectricTestRunner; import com.fsck.k9.K9RobolectricTestRunner;
import com.fsck.k9.message.html.HtmlSanitizer;
import com.fsck.k9.message.html.HtmlSanitizerHelper;
import com.fsck.k9.mail.Address; import com.fsck.k9.mail.Address;
import com.fsck.k9.mail.Message.RecipientType; import com.fsck.k9.mail.Message.RecipientType;
import com.fsck.k9.mail.MessagingException; import com.fsck.k9.mail.MessagingException;
@ -28,14 +26,17 @@ import com.fsck.k9.mail.internet.TextBody;
import com.fsck.k9.mail.internet.Viewable; import com.fsck.k9.mail.internet.Viewable;
import com.fsck.k9.mail.internet.Viewable.MessageHeader; import com.fsck.k9.mail.internet.Viewable.MessageHeader;
import com.fsck.k9.mailstore.MessageViewInfoExtractor.ViewableExtractedText; import com.fsck.k9.mailstore.MessageViewInfoExtractor.ViewableExtractedText;
import com.fsck.k9.message.html.HtmlProcessor;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.junit.runner.RunWith; import org.junit.runner.RunWith;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import org.robolectric.RuntimeEnvironment; import org.robolectric.RuntimeEnvironment;
import static junit.framework.Assert.assertEquals; import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertSame; import static junit.framework.Assert.assertSame;
import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
@ -57,10 +58,8 @@ public class MessageViewInfoExtractorTest {
GlobalsHelper.setContext(context); GlobalsHelper.setContext(context);
HtmlSanitizer dummyHtmlSanitizer = HtmlSanitizerHelper.getDummyHtmlSanitizer(); HtmlProcessor htmlProcessor = createFakeHtmlProcessor();
messageViewInfoExtractor = new MessageViewInfoExtractor(context,null, htmlProcessor);
messageViewInfoExtractor = new MessageViewInfoExtractor(context,
null, dummyHtmlSanitizer);
} }
@Test @Test
@ -74,11 +73,11 @@ public class MessageViewInfoExtractorTest {
message.setHeader(MimeHeader.HEADER_CONTENT_TYPE, "text/plain; format=flowed"); message.setHeader(MimeHeader.HEADER_CONTENT_TYPE, "text/plain; format=flowed");
// Prepare fixture // Prepare fixture
HtmlSanitizer htmlSanitizer = mock(HtmlSanitizer.class); HtmlProcessor htmlProcessor = mock(HtmlProcessor.class);
MessageViewInfoExtractor messageViewInfoExtractor = MessageViewInfoExtractor messageViewInfoExtractor =
new MessageViewInfoExtractor(context, null, htmlSanitizer); new MessageViewInfoExtractor(context, null, htmlProcessor);
String value = "--sanitized html--"; String value = "--sanitized html--";
when(htmlSanitizer.sanitize(any(String.class))).thenReturn(value); when(htmlProcessor.processForDisplay(anyString())).thenReturn(value);
// Extract text // Extract text
List<Part> outputNonViewableParts = new ArrayList<>(); List<Part> outputNonViewableParts = new ArrayList<>();
@ -113,7 +112,7 @@ public class MessageViewInfoExtractorTest {
"</pre>"; "</pre>";
assertEquals(expectedText, container.text); assertEquals(expectedText, container.text);
assertEquals(expectedHtml, getHtmlBodyText(container.html)); assertEquals(expectedHtml, container.html);
} }
@Test @Test
@ -140,7 +139,7 @@ public class MessageViewInfoExtractorTest {
"</pre>"; "</pre>";
assertEquals(expectedText, container.text); assertEquals(expectedText, container.text);
assertEquals(expectedHtml, getHtmlBodyText(container.html)); assertEquals(expectedHtml, container.html);
} }
@Test @Test
@ -166,7 +165,7 @@ public class MessageViewInfoExtractorTest {
bodyText; bodyText;
assertEquals(expectedText, container.text); assertEquals(expectedText, container.text);
assertEquals(expectedHtml, getHtmlBodyText(container.html)); assertEquals(expectedHtml, container.html);
} }
@Test @Test
@ -211,7 +210,7 @@ public class MessageViewInfoExtractorTest {
assertEquals(expectedText, container.text); assertEquals(expectedText, container.text);
assertEquals(expectedHtml, getHtmlBodyText(container.html)); assertEquals(expectedHtml, container.html);
} }
@Test @Test
@ -229,7 +228,7 @@ public class MessageViewInfoExtractorTest {
// Create message/rfc822 body // Create message/rfc822 body
MimeMessage innerMessage = new MimeMessage(); MimeMessage innerMessage = new MimeMessage();
innerMessage.addSentDate(new Date(112, 02, 17), false); innerMessage.addSentDate(new Date(112, 2, 17), false);
innerMessage.setRecipients(RecipientType.TO, new Address[] { new Address("to@example.com") }); innerMessage.setRecipients(RecipientType.TO, new Address[] { new Address("to@example.com") });
innerMessage.setSubject("Subject"); innerMessage.setSubject("Subject");
innerMessage.setFrom(new Address("from@example.com")); innerMessage.setFrom(new Address("from@example.com"));
@ -290,7 +289,7 @@ public class MessageViewInfoExtractorTest {
"</pre>"; "</pre>";
assertEquals(expectedText, container.text); assertEquals(expectedText, container.text);
assertEquals(expectedHtml, getHtmlBodyText(container.html)); assertEquals(expectedHtml, container.html);
} }
@Test @Test
@ -355,13 +354,19 @@ public class MessageViewInfoExtractorTest {
ViewableExtractedText firstMessageExtractedText = ViewableExtractedText firstMessageExtractedText =
messageViewInfoExtractor.extractTextFromViewables(outputViewableParts); messageViewInfoExtractor.extractTextFromViewables(outputViewableParts);
assertEquals(expectedExtractedText, firstMessageExtractedText.text); assertEquals(expectedExtractedText, firstMessageExtractedText.text);
assertEquals(expectedHtmlText, getHtmlBodyText(firstMessageExtractedText.html)); assertEquals(expectedHtmlText, firstMessageExtractedText.html);
} }
private static String getHtmlBodyText(String htmlText) { HtmlProcessor createFakeHtmlProcessor() {
htmlText = htmlText.substring(htmlText.indexOf("<body>") +6); HtmlProcessor htmlProcessor = mock(HtmlProcessor.class);
htmlText = htmlText.substring(0, htmlText.indexOf("</body>"));
return htmlText;
}
when(htmlProcessor.processForDisplay(anyString())).thenAnswer(new Answer<String>() {
@Override
public String answer(InvocationOnMock invocation) throws Throwable {
return (String) invocation.getArguments()[0];
}
});
return htmlProcessor;
}
} }

View file

@ -1,13 +0,0 @@
package com.fsck.k9.message.html;
public class HtmlSanitizerHelper {
public static HtmlSanitizer getDummyHtmlSanitizer() {
return new HtmlSanitizer() {
@Override
public String sanitize(String html) {
return html;
}
};
}
}

View file

@ -1,19 +1,16 @@
package com.fsck.k9.message.html; package com.fsck.k9.message.html;
import org.junit.Before; import org.jsoup.nodes.Document;
import org.junit.Test; import org.junit.Test;
import static com.fsck.k9.message.html.HtmlProcessor.toCompactString;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
public class HtmlSanitizerTest { public class HtmlSanitizerTest {
private HtmlSanitizer htmlSanitizer; private HtmlSanitizer htmlSanitizer = new HtmlSanitizer();
@Before
public void setUp() throws Exception {
htmlSanitizer = HtmlSanitizer.getInstance();
}
@Test @Test
public void shouldRemoveMetaRefreshInHead() { public void shouldRemoveMetaRefreshInHead() {
@ -22,9 +19,9 @@ public class HtmlSanitizerTest {
"<body>Message</body>" + "<body>Message</body>" +
"</html>"; "</html>";
String result = htmlSanitizer.sanitize(html); Document result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body>Message</body></html>", result); assertEquals("<html><head></head><body>Message</body></html>", toCompactString(result));
} }
@Test @Test
@ -34,9 +31,9 @@ public class HtmlSanitizerTest {
"<body>Message</body>" + "<body>Message</body>" +
"</html>"; "</html>";
String result = htmlSanitizer.sanitize(html); Document result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body>Message</body></html>", result); assertEquals("<html><head></head><body>Message</body></html>", toCompactString(result));
} }
@Test @Test
@ -46,9 +43,9 @@ public class HtmlSanitizerTest {
"<body><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\">Message</body>" + "<body><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\">Message</body>" +
"</html>"; "</html>";
String result = htmlSanitizer.sanitize(html); Document result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body>Message</body></html>", result); assertEquals("<html><head></head><body>Message</body></html>", toCompactString(result));
} }
@Test @Test
@ -58,9 +55,9 @@ public class HtmlSanitizerTest {
"<body>Message</body>" + "<body>Message</body>" +
"</html>"; "</html>";
String result = htmlSanitizer.sanitize(html); Document result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body>Message</body></html>", result); assertEquals("<html><head></head><body>Message</body></html>", toCompactString(result));
} }
@Test @Test
@ -70,9 +67,9 @@ public class HtmlSanitizerTest {
"<body>Message</body>" + "<body>Message</body>" +
"</html>"; "</html>";
String result = htmlSanitizer.sanitize(html); Document result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body>Message</body></html>", result); assertEquals("<html><head></head><body>Message</body></html>", toCompactString(result));
} }
@Test @Test
@ -82,9 +79,9 @@ public class HtmlSanitizerTest {
"<body>Message</body>" + "<body>Message</body>" +
"</html>"; "</html>";
String result = htmlSanitizer.sanitize(html); Document result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body>Message</body></html>", result); assertEquals("<html><head></head><body>Message</body></html>", toCompactString(result));
} }
@Test @Test
@ -94,9 +91,9 @@ public class HtmlSanitizerTest {
"<body>Message</body>" + "<body>Message</body>" +
"</html>"; "</html>";
String result = htmlSanitizer.sanitize(html); Document result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body>Message</body></html>", result); assertEquals("<html><head></head><body>Message</body></html>", toCompactString(result));
} }
@Test @Test
@ -106,9 +103,9 @@ public class HtmlSanitizerTest {
"<body><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\">Message</body>" + "<body><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\">Message</body>" +
"</html>"; "</html>";
String result = htmlSanitizer.sanitize(html); Document result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body>Message</body></html>", result); assertEquals("<html><head></head><body>Message</body></html>", toCompactString(result));
} }
@Test @Test
@ -121,11 +118,11 @@ public class HtmlSanitizerTest {
"<body>Message</body>" + "<body>Message</body>" +
"</html>"; "</html>";
String result = htmlSanitizer.sanitize(html); Document result = htmlSanitizer.sanitize(html);
assertEquals("<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" /></head>" + assertEquals("<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\"></head>" +
"<body>Message</body></html>", result); "<body>Message</body></html>", toCompactString(result));
} }
@Test @Test
@ -133,20 +130,20 @@ public class HtmlSanitizerTest {
String html = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + String html = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
"<html><head></head><body></body></html>"; "<html><head></head><body></body></html>";
String result = htmlSanitizer.sanitize(html); Document result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body></body></html>", result); assertEquals("<html><head></head><body></body></html>", toCompactString(result));
} }
@Test @Test
public void shouldNormalizeTables() { public void shouldNormalizeTables() {
String html = "<html><head></head><body><table><tr><td></td><td></td></tr></table></body></html>"; String html = "<html><head></head><body><table><tr><td></td><td></td></tr></table></body></html>";
String result = htmlSanitizer.sanitize(html); Document result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body><table><tbody>" + assertEquals("<html><head></head><body><table><tbody>" +
"<tr><td></td><td></td></tr>" + "<tr><td></td><td></td></tr>" +
"</tbody></table></body></html>", result); "</tbody></table></body></html>", toCompactString(result));
} }
@Test @Test
@ -155,10 +152,10 @@ public class HtmlSanitizerTest {
"<tr><td><!==><!==>Hmailserver service shutdown:</td><td><!==><!==>Ok</td></tr>" + "<tr><td><!==><!==>Hmailserver service shutdown:</td><td><!==><!==>Ok</td></tr>" +
"</table></body></html>"; "</table></body></html>";
String result = htmlSanitizer.sanitize(html); Document result = htmlSanitizer.sanitize(html);
assertEquals("<html><head></head><body><table><tbody>" + assertEquals("<html><head></head><body><table><tbody>" +
"<tr><td>&lt;!==&gt;&lt;!==&gt;Hmailserver service shutdown:</td><td>&lt;!==&gt;&lt;!==&gt;Ok</td></tr>" + "<tr><td>Hmailserver service shutdown:</td><td>Ok</td></tr>" +
"</tbody></table></body></html>", result); "</tbody></table></body></html>", toCompactString(result));
} }
} }

View file

@ -3,7 +3,6 @@ package com.fsck.k9.message.signature;
import com.fsck.k9.K9RobolectricTestRunner; import com.fsck.k9.K9RobolectricTestRunner;
import org.junit.Ignore;
import org.junit.Test; import org.junit.Test;
import org.junit.runner.RunWith; import org.junit.runner.RunWith;
import org.robolectric.annotation.Config; import org.robolectric.annotation.Config;
@ -27,7 +26,6 @@ public class HtmlSignatureRemoverTest {
assertEquals("This is the body text", extractText(withoutSignature)); assertEquals("This is the body text", extractText(withoutSignature));
} }
@Ignore
@Test @Test
public void shouldStripSignatureFromThunderbirdStyleHtml() throws Exception { public void shouldStripSignatureFromThunderbirdStyleHtml() throws Exception {
String html = "<html>\r\n" + String html = "<html>\r\n" +
@ -88,8 +86,8 @@ public class HtmlSignatureRemoverTest {
assertEquals("<html><head></head><body>" + assertEquals("<html><head></head><body>" +
"<blockquote>" + "<blockquote>" +
"This is some quoted text" + "This is some quoted text" +
"<br />" + "<br>" +
"-- <br />" + "-- <br>" +
"Inner signature" + "Inner signature" +
"</blockquote>" + "</blockquote>" +
"<div>This is the body text</div>" + "<div>This is the body text</div>" +
@ -141,7 +139,7 @@ public class HtmlSignatureRemoverTest {
String withoutSignature = HtmlSignatureRemover.stripSignature(html); String withoutSignature = HtmlSignatureRemover.stripSignature(html);
assertEquals("<html><head></head><body>" + assertEquals("<html><head></head><body>" +
"This is the body text<br />" + "This is the body text<br>" +
"<blockquote>Some quote</blockquote>" + "<blockquote>Some quote</blockquote>" +
"</body></html>", "</body></html>",
withoutSignature); withoutSignature);