From c2552cd74453371e3d145fa040057de2e4b4ed96 Mon Sep 17 00:00:00 2001 From: M66B Date: Fri, 10 Jun 2022 12:39:53 +0200 Subject: [PATCH] Fixed incorrect transform from UTF-16 to US-ASCII --- .../main/java/eu/faircode/email/ActivityEML.java | 2 ++ .../main/java/eu/faircode/email/CharsetHelper.java | 14 ++++++++++++++ .../main/java/eu/faircode/email/MessageHelper.java | 4 ++-- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/app/src/main/java/eu/faircode/email/ActivityEML.java b/app/src/main/java/eu/faircode/email/ActivityEML.java index 26fb8c4d7c..4575c390b0 100644 --- a/app/src/main/java/eu/faircode/email/ActivityEML.java +++ b/app/src/main/java/eu/faircode/email/ActivityEML.java @@ -472,6 +472,7 @@ public class ActivityEML extends ActivityBase { Charset cs = Charset.forName(charset); Charset detected = CharsetHelper.detect(text, cs); boolean isUtf8 = CharsetHelper.isUTF8(text.getBytes(cs)); + boolean isUtf16 = CharsetHelper.isUTF16(text.getBytes(cs)); boolean isW1252 = !Objects.equals(text, CharsetHelper.utf8toW1252(text)); for (int i = 0; i < level; i++) @@ -480,6 +481,7 @@ public class ActivityEML extends ActivityBase { ssb.append("Detected: ") .append(detected == null ? "?" : detected.toString()) .append(" isUTF8=").append(Boolean.toString(isUtf8)) + .append(" isUTF16=").append(Boolean.toString(isUtf16)) .append(" isW1252=").append(Boolean.toString(isW1252)) .append('\n'); } diff --git a/app/src/main/java/eu/faircode/email/CharsetHelper.java b/app/src/main/java/eu/faircode/email/CharsetHelper.java index 0d64cc43db..4cda2ea680 100644 --- a/app/src/main/java/eu/faircode/email/CharsetHelper.java +++ b/app/src/main/java/eu/faircode/email/CharsetHelper.java @@ -86,7 +86,21 @@ public class CharsetHelper { } } + static boolean isUTF16(byte[] octets) { + CharsetDecoder utf8Decoder = StandardCharsets.UTF_16.newDecoder() + .onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT); + try { + utf8Decoder.decode(ByteBuffer.wrap(octets)); + return true; + } catch (CharacterCodingException ex) { + Log.w(ex); + return false; + } + } + static boolean isUTF8Alt(String text) { + // This doesn't check the characters and is therefore unreliable byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1); int bytes; diff --git a/app/src/main/java/eu/faircode/email/MessageHelper.java b/app/src/main/java/eu/faircode/email/MessageHelper.java index 3ff073ae90..e0516a9202 100644 --- a/app/src/main/java/eu/faircode/email/MessageHelper.java +++ b/app/src/main/java/eu/faircode/email/MessageHelper.java @@ -3229,10 +3229,10 @@ public class MessageHelper { try { if (CHARSET16.contains(cs)) { Charset detected = CharsetHelper.detect(result, cs); + // UTF-16 can be detected as US-ASCII if (!CHARSET16.contains(detected)) Log.w(new Throwable("Charset=" + cs + " detected=" + detected)); - if (StandardCharsets.US_ASCII.equals(detected) || - StandardCharsets.UTF_8.equals(detected)) { + if (StandardCharsets.UTF_8.equals(detected)) { charset = null; result = new String(result.getBytes(cs), detected); }