From a56005b95d6edf470397547c05be180be4c02904 Mon Sep 17 00:00:00 2001 From: M66B Date: Fri, 25 Jun 2021 09:09:12 +0200 Subject: [PATCH] EML: show detected charset --- .../java/eu/faircode/email/ActivityEML.java | 32 +++++++++++++++++++ .../java/eu/faircode/email/CharsetHelper.java | 4 ++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/app/src/main/java/eu/faircode/email/ActivityEML.java b/app/src/main/java/eu/faircode/email/ActivityEML.java index 553bf95c01..13257580ae 100644 --- a/app/src/main/java/eu/faircode/email/ActivityEML.java +++ b/app/src/main/java/eu/faircode/email/ActivityEML.java @@ -64,6 +64,8 @@ import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.InputStream; import java.io.OutputStream; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.text.DateFormat; import java.util.ArrayList; import java.util.Enumeration; @@ -77,6 +79,7 @@ import javax.mail.Message; import javax.mail.Multipart; import javax.mail.Part; import javax.mail.Session; +import javax.mail.internet.ContentType; import javax.mail.internet.MimeMessage; public class ActivityEML extends ActivityBase { @@ -422,6 +425,35 @@ public class ActivityEML extends ActivityBase { .append(size > 0 ? Helper.humanReadableByteCount(size) : "?") .append('\n'); + if (!part.isMimeType("multipart/*")) { + Object content = part.getContent(); + if (content instanceof String) { + String text = (String) content; + Charset detected = CharsetHelper.detect(text); + + String charset; + try { + ContentType ct = new ContentType(part.getContentType()); + charset = ct.getParameter("charset"); + } catch (Throwable ignored) { + charset = null; + } + if (charset == null) + charset = StandardCharsets.ISO_8859_1.name(); + + Charset cs = Charset.forName(charset); + boolean isUtf8 = CharsetHelper.isUTF8(text.getBytes(cs)); + + for (int i = 0; i < level; i++) + ssb.append(" "); + + ssb.append("Detected: ") + .append(detected == null ? "?" : detected.toString()) + .append(" isUTF8=").append(Boolean.toString(isUtf8)) + .append('\n'); + } + } + ssb.append('\n'); if (part.isMimeType("multipart/*")) { diff --git a/app/src/main/java/eu/faircode/email/CharsetHelper.java b/app/src/main/java/eu/faircode/email/CharsetHelper.java index 39dae563f3..4886b6dbae 100644 --- a/app/src/main/java/eu/faircode/email/CharsetHelper.java +++ b/app/src/main/java/eu/faircode/email/CharsetHelper.java @@ -48,11 +48,13 @@ public class CharsetHelper { static boolean isUTF8(String text) { // Get extended ASCII characters byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1); + return isUTF8(octets); + } + static boolean isUTF8(byte[] octets) { CharsetDecoder utf8Decoder = StandardCharsets.UTF_8.newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); - try { utf8Decoder.decode(ByteBuffer.wrap(octets)); return true;