From 7c30c8e7d866c77edc7fd4fd9bb6c5ef171413b9 Mon Sep 17 00:00:00 2001 From: M66B Date: Sat, 10 Oct 2020 13:20:24 +0200 Subject: [PATCH] Improved charset detection --- .../java/eu/faircode/email/CharsetHelper.java | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/app/src/main/java/eu/faircode/email/CharsetHelper.java b/app/src/main/java/eu/faircode/email/CharsetHelper.java index 69eb0e3ac2..8895e95458 100644 --- a/app/src/main/java/eu/faircode/email/CharsetHelper.java +++ b/app/src/main/java/eu/faircode/email/CharsetHelper.java @@ -26,9 +26,7 @@ import java.nio.charset.StandardCharsets; import java.nio.charset.UnsupportedCharsetException; class CharsetHelper { - private static UniversalDetector detector = new UniversalDetector(); - - private static final int SAMPLE_SIZE = 2 * 1024; + private static final int SAMPLE_SIZE = 1024; static boolean isUTF8(String text) { // Get extended ASCII characters @@ -124,9 +122,14 @@ class CharsetHelper { static Charset detect(String text) { try { - byte[] sample = text.getBytes(StandardCharsets.ISO_8859_1); - - detector.handleData(sample, 0, Math.min(SAMPLE_SIZE, sample.length)); + byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1); + + int offset = 0; + UniversalDetector detector = new UniversalDetector(); + while (offset < octets.length && !detector.isDone()) { + detector.handleData(octets, offset, Math.min(SAMPLE_SIZE, octets.length - offset)); + offset += SAMPLE_SIZE; + } detector.dataEnd(); String detected = detector.getDetectedCharset(); @@ -137,8 +140,6 @@ class CharsetHelper { } catch (Throwable ex) { Log.w(ex); return null; - } finally { - detector.reset(); } } }