From 8ad66eb162117f0015031ae0b0cdcff7ac724f90 Mon Sep 17 00:00:00 2001 From: M66B Date: Sat, 5 Jan 2019 11:17:33 +0000 Subject: [PATCH] Improved html to text conversion --- .../eu/faircode/email/ActivityCompose.java | 14 ++++--- .../eu/faircode/email/AdapterMessage.java | 3 +- .../java/eu/faircode/email/HtmlHelper.java | 37 ++++++++++++++++++- .../java/eu/faircode/email/MessageHelper.java | 4 +- .../eu/faircode/email/ServiceSynchronize.java | 21 +++++------ 5 files changed, 55 insertions(+), 24 deletions(-) diff --git a/app/src/main/java/eu/faircode/email/ActivityCompose.java b/app/src/main/java/eu/faircode/email/ActivityCompose.java index 56def4886c..ce4a59a31f 100644 --- a/app/src/main/java/eu/faircode/email/ActivityCompose.java +++ b/app/src/main/java/eu/faircode/email/ActivityCompose.java @@ -25,8 +25,6 @@ import android.os.Bundle; import android.text.TextUtils; import android.view.MenuItem; -import org.jsoup.Jsoup; - import java.util.ArrayList; import javax.mail.internet.AddressException; @@ -113,13 +111,17 @@ public class ActivityCompose extends ActivityBilling implements FragmentManager. if (intent.hasExtra(Intent.EXTRA_SUBJECT)) { String subject = intent.getStringExtra(Intent.EXTRA_SUBJECT); if (subject != null) - args.putString("subject", Jsoup.parse(subject).text()); + args.putString("subject", subject); } - if (intent.hasExtra(Intent.EXTRA_TEXT)) { - String body = intent.getStringExtra(Intent.EXTRA_TEXT); // Intent.EXTRA_HTML_TEXT + if (intent.hasExtra(Intent.EXTRA_HTML_TEXT)) { + String html = intent.getStringExtra(Intent.EXTRA_HTML_TEXT); + if (html != null) + args.putString("body", HtmlHelper.getText(html)); + } else if (intent.hasExtra(Intent.EXTRA_TEXT)) { + String body = intent.getStringExtra(Intent.EXTRA_TEXT); if (body != null) - args.putString("body", Jsoup.parse(body).text()); + args.putString("body", body); } if (intent.hasExtra(Intent.EXTRA_STREAM)) diff --git a/app/src/main/java/eu/faircode/email/AdapterMessage.java b/app/src/main/java/eu/faircode/email/AdapterMessage.java index a2b0b63af5..9b3deb2c82 100644 --- a/app/src/main/java/eu/faircode/email/AdapterMessage.java +++ b/app/src/main/java/eu/faircode/email/AdapterMessage.java @@ -69,7 +69,6 @@ import android.widget.Toast; import com.google.android.material.bottomnavigation.BottomNavigationView; import com.google.android.material.snackbar.Snackbar; -import org.jsoup.Jsoup; import org.xml.sax.XMLReader; import java.io.IOException; @@ -1324,7 +1323,7 @@ public class AdapterMessage extends RecyclerView.Adapter heads = Arrays.asList("p", "h1", "h2", "h3", "h4", "h5", "tr"); + private static final List tails = Arrays.asList("br", "dd", "dt", "p", "h1", "h2", "h3", "h4", "h5"); static String getBody(String html) { return Jsoup.parse(html).body().html(); @@ -251,6 +256,36 @@ public class HtmlHelper { static String getPreview(String body) { String text = (body == null ? null : Jsoup.parse(body).text()); - return (text == null ? null : text.substring(0, Math.min(text.length(), 250))); + return (text == null ? null : text.substring(0, Math.min(text.length(), PREVIEW_SIZE))); + } + + static String getText(String html) { + final StringBuilder sb = new StringBuilder(); + + NodeTraversor.traverse(new NodeVisitor() { + public void head(Node node, int depth) { + if (node instanceof TextNode) + sb.append(((TextNode) node).text()); + else { + String name = node.nodeName(); + if (name.equals("li")) + sb.append("\n * "); + else if (name.equals("dt")) + sb.append(" "); + else if (heads.contains(name)) + sb.append("\n"); + } + } + + public void tail(Node node, int depth) { + String name = node.nodeName(); + if (tails.contains(name)) + sb.append("\n"); + else if (name.equals("a")) + sb.append(" <").append(node.absUrl("href")).append(">"); + } + }, Jsoup.parse(html)); + + return sb.toString(); } } diff --git a/app/src/main/java/eu/faircode/email/MessageHelper.java b/app/src/main/java/eu/faircode/email/MessageHelper.java index 4bfbcdb9ce..281f6a94eb 100644 --- a/app/src/main/java/eu/faircode/email/MessageHelper.java +++ b/app/src/main/java/eu/faircode/email/MessageHelper.java @@ -23,8 +23,6 @@ import android.content.Context; import android.text.TextUtils; import android.webkit.MimeTypeMap; -import org.jsoup.Jsoup; - import java.io.BufferedReader; import java.io.ByteArrayOutputStream; import java.io.File; @@ -290,7 +288,7 @@ public class MessageHelper { body.append(identity.signature); } - String plain = Jsoup.parse(body.toString()).text(); + String plain = HtmlHelper.getText(body.toString()); StringBuilder html = new StringBuilder(); html.append("").append("\n"); diff --git a/app/src/main/java/eu/faircode/email/ServiceSynchronize.java b/app/src/main/java/eu/faircode/email/ServiceSynchronize.java index cafa013cc2..b6339e70d2 100644 --- a/app/src/main/java/eu/faircode/email/ServiceSynchronize.java +++ b/app/src/main/java/eu/faircode/email/ServiceSynchronize.java @@ -65,7 +65,6 @@ import com.sun.mail.util.MailConnectException; import org.json.JSONArray; import org.json.JSONException; -import org.jsoup.Jsoup; import java.io.IOException; import java.io.InputStream; @@ -147,7 +146,6 @@ public class ServiceSynchronize extends LifecycleService { private static final int SYNC_BATCH_SIZE = 20; private static final int DOWNLOAD_BATCH_SIZE = 20; private static final long RECONNECT_BACKOFF = 90 * 1000L; // milliseconds - private static final int PREVIEW_SIZE = 250; private static final int ACCOUNT_ERROR_AFTER = 90; // minutes private static final int IDENTITY_ERROR_AFTER = 30; // minutes private static final long STOP_DELAY = 5000L; // milliseconds @@ -645,11 +643,11 @@ public class ServiceSynchronize extends LifecycleService { if (message.content) try { - String html = message.read(ServiceSynchronize.this); + String html = message.read(this); StringBuilder sb = new StringBuilder(); if (!TextUtils.isEmpty(message.subject)) sb.append(message.subject).append("
"); - sb.append(Jsoup.parse(html).text()); + sb.append(HtmlHelper.getPreview(html)); mbuilder.setStyle(new Notification.BigTextStyle().bigText(Html.fromHtml(sb.toString()))); } catch (IOException ex) { Log.e(ex); @@ -1277,7 +1275,7 @@ public class ServiceSynchronize extends LifecycleService { }; String id = BuildConfig.APPLICATION_ID + ".POLL." + account.id; - PendingIntent pi = PendingIntent.getBroadcast(ServiceSynchronize.this, 0, new Intent(id), 0); + PendingIntent pi = PendingIntent.getBroadcast(this, 0, new Intent(id), 0); registerReceiver(alarm, new IntentFilter(id)); // Keep alive @@ -1335,7 +1333,7 @@ public class ServiceSynchronize extends LifecycleService { Log.e(account.name, ex); reportError(account, null, ex); - EntityLog.log(ServiceSynchronize.this, account.name + " " + Helper.formatThrowable(ex)); + EntityLog.log(this, account.name + " " + Helper.formatThrowable(ex)); db.account().setAccountError(account.id, Helper.formatThrowable(ex)); } finally { // Stop watching for operations @@ -1351,9 +1349,9 @@ public class ServiceSynchronize extends LifecycleService { // Close store try { - EntityLog.log(ServiceSynchronize.this, account.name + " store closing"); + EntityLog.log(this, account.name + " store closing"); istore.close(); - EntityLog.log(ServiceSynchronize.this, account.name + " store closed"); + EntityLog.log(this, account.name + " store closed"); } catch (Throwable ex) { Log.w(account.name, ex); } finally { @@ -1389,7 +1387,7 @@ public class ServiceSynchronize extends LifecycleService { }; String id = BuildConfig.APPLICATION_ID + ".BACKOFF." + account.id; - PendingIntent pi = PendingIntent.getBroadcast(ServiceSynchronize.this, 0, new Intent(id), 0); + PendingIntent pi = PendingIntent.getBroadcast(this, 0, new Intent(id), 0); registerReceiver(alarm, new IntentFilter(id)); AlarmManager am = (AlarmManager) getSystemService(Context.ALARM_SERVICE); @@ -1817,7 +1815,7 @@ public class ServiceSynchronize extends LifecycleService { message.uid = null; db.message().updateMessage(message); Log.i("Appending sent msgid=" + message.msgid); - EntityOperation.queue(ServiceSynchronize.this, db, message, EntityOperation.ADD); // Could already exist + EntityOperation.queue(this, db, message, EntityOperation.ADD); // Could already exist } } @@ -1900,8 +1898,7 @@ public class ServiceSynchronize extends LifecycleService { MessageHelper helper = new MessageHelper((MimeMessage) imessage); String html = helper.getHtml(); - String text = (html == null ? null : Jsoup.parse(html).text()); - String preview = (text == null ? null : text.substring(0, Math.min(text.length(), PREVIEW_SIZE))); + String preview = HtmlHelper.getPreview(html); message.write(this, html); db.message().setMessageContent(message.id, true, preview); }