diff --git a/app/src/main/java/eu/faircode/email/CharsetHelper.java b/app/src/main/java/eu/faircode/email/CharsetHelper.java index 67e71388de..0e29f3d47a 100644 --- a/app/src/main/java/eu/faircode/email/CharsetHelper.java +++ b/app/src/main/java/eu/faircode/email/CharsetHelper.java @@ -23,17 +23,23 @@ import android.text.TextUtils; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; import java.util.Locale; class CharsetHelper { - private static String CHINESE = new Locale("zh").getLanguage(); private static final int MAX_SAMPLE_SIZE = 8192; + private static String CHINESE = new Locale("zh").getLanguage(); + private static final List COMMON = Collections.unmodifiableList(Arrays.asList( + "US-ASCII", "ISO-8859-1", "ISO-8859-2", "windows-1250", "windows-1252", "windows-1257", "UTF-8" + )); static { System.loadLibrary("compact_enc_det"); } - private static native String jni_detect(byte[] chars); + private static native DetectResult jni_detect(byte[] octets); static boolean isUTF8(String text) { // Get extended ASCII characters @@ -80,30 +86,43 @@ class CharsetHelper { } Log.i("compact_enc_det sample=" + sample.length); - String detected = jni_detect(sample); - if ("US-ASCII".equals(detected) || - "ISO-8859-1".equals(detected) || - "ISO-8859-2".equals(detected) || - "windows-1250".equals(detected) || - "windows-1252".equals(detected) || - "windows-1257".equals(detected) || - "UTF-8".equals(detected)) + DetectResult detected = jni_detect(sample); + + if (TextUtils.isEmpty(detected.charset)) { + Log.e("compact_enc_det result=" + detected); + return null; + } else if (!BuildConfig.PLAY_STORE_RELEASE && + COMMON.contains(detected.charset)) Log.w("compact_enc_det result=" + detected); - else if ("GB18030".equals(detected) && + else if ("GB18030".equals(detected.charset) && !Locale.getDefault().getLanguage().equals(CHINESE)) { // https://github.com/google/compact_enc_det/issues/8 - Log.w("compact_enc_det result=" + detected); - return null; - } else // ISO-2022-JP, etc Log.e("compact_enc_det result=" + detected); - - if (TextUtils.isEmpty(detected)) return null; + } else + Log.e("compact_enc_det result=" + detected); - return Charset.forName(detected); + return Charset.forName(detected.charset); } catch (Throwable ex) { Log.w(ex); return null; } } + + private static class DetectResult { + String charset; + int bytes_consumed; + boolean is_reliable; + + DetectResult(String charset, int bytes_consumed, boolean is_reliable) { + this.charset = charset; + this.bytes_consumed = bytes_consumed; + this.is_reliable = is_reliable; + } + + @Override + public String toString() { + return charset + " c=" + bytes_consumed + " r=" + is_reliable; + } + } } diff --git a/app/src/main/jni/charset.cc b/app/src/main/jni/charset.cc index 9dc95ba322..04d0b2cb52 100644 --- a/app/src/main/jni/charset.cc +++ b/app/src/main/jni/charset.cc @@ -14,10 +14,10 @@ void log_android(int prio, const char *fmt, ...) { } } -extern "C" JNIEXPORT jstring JNICALL -Java_eu_faircode_email_CharsetHelper_jni_1detect(JNIEnv *env, jclass type, jbyteArray _bytes) { - int len = env->GetArrayLength(_bytes); - jbyte *bytes = env->GetByteArrayElements(_bytes, nullptr); +extern "C" JNIEXPORT jobject JNICALL +Java_eu_faircode_email_CharsetHelper_jni_1detect(JNIEnv *env, jclass type, jbyteArray _octets) { + int len = env->GetArrayLength(_octets); + jbyte *octets = env->GetByteArrayElements(_octets, nullptr); // https://github.com/google/compact_enc_det @@ -25,7 +25,7 @@ Java_eu_faircode_email_CharsetHelper_jni_1detect(JNIEnv *env, jclass type, jbyte int bytes_consumed; Encoding encoding = CompactEncDet::DetectEncoding( - (const char *) bytes, len, + (const char *) octets, len, nullptr, nullptr, nullptr, UNKNOWN_ENCODING, UNKNOWN_LANGUAGE, @@ -39,7 +39,10 @@ Java_eu_faircode_email_CharsetHelper_jni_1detect(JNIEnv *env, jclass type, jbyte encoding, name, bytes_consumed, is_reliable); // https://developer.android.com/training/articles/perf-jni#primitive-arrays - env->ReleaseByteArrayElements(_bytes, bytes, JNI_ABORT); + env->ReleaseByteArrayElements(_octets, octets, JNI_ABORT); - return env->NewStringUTF(name); + jclass cls = env->FindClass("eu/faircode/email/CharsetHelper$DetectResult"); + jmethodID ctor = env->GetMethodID(cls, "", "(Ljava/lang/String;IZ)V"); + jstring jname = env->NewStringUTF(name); + return env->NewObject(cls, ctor, jname, (jint) bytes_consumed, (jboolean) is_reliable); }