Using juniversalchardet

pull/187/head
M66B 5 years ago
parent 5707f8b540
commit 615a006235

@ -27,3 +27,4 @@ FairEmail uses:
* [GPX file type icon](https://www.flaticon.com/free-icon/gpx-file-format-variant_29258) made by [Freepik](https://www.flaticon.com/authors/freepik) from [Flaticon](https://www.flaticon.com).
* [Disconnect's tracker protection lists](https://github.com/disconnectme/disconnect-tracking-protection). Copyright 2010-2020 Disconnect, Inc. [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International license](https://github.com/disconnectme/disconnect-tracking-protection/blob/master/LICENSE).
* [Over-Scroll Support For Android's RecyclerView, ListView, GridView, ScrollView ...](https://github.com/EverythingMe/overscroll-decor). Copyright (c) 2015, DoAT Media Ltd. [BSD-2-Clause License](https://github.com/EverythingMe/overscroll-decor/blob/master/LICENSE)
* [juniversalchardet](https://github.com/albfernandez/juniversalchardet). Copyright (C) 2001 the Initial Developer. All Rights Reserved. [GNU General Public License Version 2](https://github.com/albfernandez/juniversalchardet#license).

@ -235,6 +235,7 @@ dependencies {
def overscroll_version = "1.1.0"
def appauth_version = "0.7.1"
def jcharset_version = "2.1"
def jchardet_version = "2.3.2"
// https://developer.android.com/jetpack/androidx/releases/
@ -395,4 +396,8 @@ dependencies {
// http://www.freeutils.net/source/jcharset/
// https://mvnrepository.com/artifact/net.freeutils/jcharset
implementation "net.freeutils:jcharset:$jcharset_version"
// https://github.com/albfernandez/juniversalchardet
// https://mvnrepository.com/artifact/com.github.albfernandez/juniversalchardet
implementation "com.github.albfernandez:juniversalchardet:$jchardet_version"
}

@ -27,3 +27,4 @@ FairEmail uses:
* [GPX file type icon](https://www.flaticon.com/free-icon/gpx-file-format-variant_29258) made by [Freepik](https://www.flaticon.com/authors/freepik) from [Flaticon](https://www.flaticon.com).
* [Disconnect's tracker protection lists](https://github.com/disconnectme/disconnect-tracking-protection). Copyright 2010-2020 Disconnect, Inc. [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International license](https://github.com/disconnectme/disconnect-tracking-protection/blob/master/LICENSE).
* [Over-Scroll Support For Android's RecyclerView, ListView, GridView, ScrollView ...](https://github.com/EverythingMe/overscroll-decor). Copyright (c) 2015, DoAT Media Ltd. [BSD-2-Clause License](https://github.com/EverythingMe/overscroll-decor/blob/master/LICENSE)
* [juniversalchardet](https://github.com/albfernandez/juniversalchardet). Copyright (C) 2001 the Initial Developer. All Rights Reserved. [GNU General Public License Version 2](https://github.com/albfernandez/juniversalchardet#license).

@ -19,11 +19,17 @@ package eu.faircode.email;
Copyright 2018-2020 by Marcel Bokhorst (M66B)
*/
import org.mozilla.universalchardet.UniversalDetector;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.charset.UnsupportedCharsetException;
class CharsetHelper {
private static UniversalDetector detector = new UniversalDetector();
private static final int SAMPLE_SIZE = 2 * 1024;
static boolean isUTF8(String text) {
// Get extended ASCII characters
byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
@ -115,4 +121,24 @@ class CharsetHelper {
return false;
}
static Charset detect(String text) {
try {
byte[] sample = text.getBytes(StandardCharsets.ISO_8859_1);
detector.handleData(sample, 0, Math.min(SAMPLE_SIZE, sample.length));
detector.dataEnd();
String detected = detector.getDetectedCharset();
if (detected == null)
return null;
return Charset.forName(detected);
} catch (Throwable ex) {
Log.w(ex);
return null;
} finally {
detector.reset();
}
}
}

@ -1729,22 +1729,28 @@ public class MessageHelper {
if (UnknownCharsetProvider.charsetForMime(charset) == null)
warnings.add(context.getString(R.string.title_no_charset, charset));
if ((TextUtils.isEmpty(charset) || charset.equalsIgnoreCase(StandardCharsets.US_ASCII.name())))
charset = null;
if (part.isMimeType("text/plain")) {
if (TextUtils.isEmpty(charset) && CharsetHelper.isISO2022JP(result))
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), "ISO-2022-JP");
else if ((TextUtils.isEmpty(charset) || charset.equalsIgnoreCase(StandardCharsets.US_ASCII.name())) &&
CharsetHelper.isUTF8(result)) {
Log.i("Charset plain=UTF8");
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
if (charset == null) {
Charset detected = CharsetHelper.detect(result);
if (detected == null) {
if (CharsetHelper.isUTF8(result)) {
Log.i("Charset plain=UTF8");
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
}
} else {
Log.i("Charset plain=" + detected.name());
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), detected);
}
}
if ("flowed".equalsIgnoreCase(ct.getParameter("format")))
result = HtmlHelper.flow(result);
result = "<div x-plain=\"true\">" + HtmlHelper.formatPre(result) + "</div>";
} else if (part.isMimeType("text/html")) {
if (TextUtils.isEmpty(charset) && CharsetHelper.isISO2022JP(result))
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), "ISO-2022-JP");
else if (TextUtils.isEmpty(charset)) {
if (charset == null) {
// <meta charset="utf-8" />
// <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
String excerpt = result.substring(0, Math.min(MAX_META_EXCERPT, result.length()));

Loading…
Cancel
Save