Improved html to text

pull/153/head
M66B 7 years ago
parent 12751b642b
commit a28cc1a0f9

@ -1667,7 +1667,11 @@ public class AdapterMessage extends RecyclerView.Adapter<AdapterMessage.ViewHold
final boolean show_quotes = properties.getValue("quotes", message.id); final boolean show_quotes = properties.getValue("quotes", message.id);
final boolean show_images = properties.getValue("images", message.id); final boolean show_images = properties.getValue("images", message.id);
return HtmlHelper.fromHtml(HtmlHelper.sanitize(context, body, show_quotes), new Html.ImageGetter() { String html = HtmlHelper.sanitize(context, body, show_quotes);
if (debug)
html += "<pre>" + Html.escapeHtml(html) + "</pre>";
return HtmlHelper.fromHtml(html, new Html.ImageGetter() {
@Override @Override
public Drawable getDrawable(String source) { public Drawable getDrawable(String source) {
Drawable image = HtmlHelper.decodeImage(source, context, message.id, show_images); Drawable image = HtmlHelper.decodeImage(source, context, message.id, show_images);

@ -38,6 +38,7 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node; import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode; import org.jsoup.nodes.TextNode;
import org.jsoup.safety.Cleaner;
import org.jsoup.safety.Whitelist; import org.jsoup.safety.Whitelist;
import org.jsoup.select.NodeTraversor; import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor; import org.jsoup.select.NodeVisitor;
@ -100,12 +101,16 @@ public class HtmlHelper {
} }
static String sanitize(Context context, String html, boolean showQuotes) { static String sanitize(Context context, String html, boolean showQuotes) {
final Document document = Jsoup.parse(Jsoup.clean(html, Whitelist Document parsed = Jsoup.parse(html);
.relaxed() Whitelist whitelist = Whitelist.relaxed()
.addTags("hr") .addTags("hr")
.removeTags("col", "colgroup", "thead", "tbody") .removeTags("col", "colgroup", "thead", "tbody")
.removeAttributes("table", "width")
.removeAttributes("td", "colspan", "rowspan", "width")
.removeAttributes("th", "colspan", "rowspan", "width")
.addProtocols("img", "src", "cid") .addProtocols("img", "src", "cid")
.addProtocols("img", "src", "data"))); .addProtocols("img", "src", "data");
final Document document = new Cleaner(whitelist).clean(parsed);
// Quotes // Quotes
if (!showQuotes) if (!showQuotes)
@ -115,12 +120,12 @@ public class HtmlHelper {
// Tables // Tables
for (Element col : document.select("th,td")) { for (Element col : document.select("th,td")) {
// prevent line breaks // prevent line breaks
col.select("br").tagName("span").html(" "); col.select("br").tagName("span").html("&nbsp;");
col.select("div").tagName("span"); col.select("div").tagName("span");
// separate columns by a space // separate columns by a space
if (col.nextElementSibling() != null) if (col.nextElementSibling() != null)
col.append(" "); col.append("&nbsp;");
if ("th".equals(col.tagName())) if ("th".equals(col.tagName()))
col.tagName("strong"); col.tagName("strong");
@ -180,6 +185,7 @@ public class HtmlHelper {
for (Element img : document.select("img")) { for (Element img : document.select("img")) {
String src = img.attr("src"); String src = img.attr("src");
String alt = img.attr("alt"); String alt = img.attr("alt");
String title = img.attr("title");
String height = img.attr("height").trim(); String height = img.attr("height").trim();
String width = img.attr("width").trim(); String width = img.attr("width").trim();
@ -210,6 +216,10 @@ public class HtmlHelper {
div.appendElement("br"); div.appendElement("br");
div.appendElement("em").text(alt); div.appendElement("em").text(alt);
} }
if (!TextUtils.isEmpty(title)) {
div.appendElement("br");
div.appendElement("em").text(title);
}
// Tracking image // Tracking image
if ("1".equals(height) && "1".equals(width) && !TextUtils.isEmpty(src)) { if ("1".equals(height) && "1".equals(width) && !TextUtils.isEmpty(src)) {
@ -226,11 +236,13 @@ public class HtmlHelper {
public void head(Node node, int depth) { public void head(Node node, int depth) {
if (node instanceof TextNode) { if (node instanceof TextNode) {
TextNode tnode = (TextNode) node; TextNode tnode = (TextNode) node;
Element span = document.createElement("span");
int pos = 0;
String text = tnode.text(); String text = tnode.text();
Matcher matcher = PatternsCompat.WEB_URL.matcher(text); Matcher matcher = PatternsCompat.WEB_URL.matcher(text);
if (matcher.matches()) {
Element span = document.createElement("span");
int pos = 0;
while (matcher.find()) { while (matcher.find()) {
boolean linked = false; boolean linked = false;
Node parent = tnode.parent(); Node parent = tnode.parent();
@ -266,11 +278,17 @@ public class HtmlHelper {
tnode.text(""); tnode.text("");
} }
} }
}
@Override @Override
public void tail(Node node, int depth) { public void tail(Node node, int depth) {
} }
}, document.body()); }, document);
// Remove block elements displaying nothing
for (Element e : document.select("*"))
if (e.isBlock() && !e.hasText() && e.select("img").size() == 0)
e.remove();
return document.body().html(); return document.body().html();
} }

Loading…
Cancel
Save