From 66293e7a74d39bbb1fcb649aaa072e1322ab4498 Mon Sep 17 00:00:00 2001 From: M66B Date: Sun, 5 Jun 2022 08:25:41 +0200 Subject: [PATCH] Less common charsets --- .../java/eu/faircode/email/CharsetHelper.java | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/app/src/main/java/eu/faircode/email/CharsetHelper.java b/app/src/main/java/eu/faircode/email/CharsetHelper.java index 1cc3b09163..c4e2b359f2 100644 --- a/app/src/main/java/eu/faircode/email/CharsetHelper.java +++ b/app/src/main/java/eu/faircode/email/CharsetHelper.java @@ -38,10 +38,18 @@ public class CharsetHelper { private static String CHINESE = new Locale("zh").getLanguage(); private static final List COMMON = Collections.unmodifiableList(Arrays.asList( "US-ASCII", - "ISO-8859-1", "ISO-8859-2", - "windows-1250", "windows-1252", "windows-1257", + "ISO-8859-1", "ISO-8859-2", "ISO-8859-3", "ISO-8859-7", + "windows-1250", "windows-1251", "windows-1252", "windows-1257", "UTF-7", "UTF-8" )); + private static final List LESS_COMMON = Collections.unmodifiableList(Arrays.asList( + "GBK", "GB2312", "HZ-GB-2312", + "EUC", "EUC-KR", + "Big5", "BIG5-CP950", + "ISO-2022-JP", "Shift_JIS", + "cp852", + "x-binaryenc" + )); private static final int MIN_W1252 = 10; private static final Pair[] sUtf8W1252 = new Pair[128]; @@ -182,7 +190,7 @@ public class CharsetHelper { if (TextUtils.isEmpty(detected.charset)) { Log.e("compact_enc_det result=" + detected); return null; - } else if (COMMON.contains(detected.charset)) + } else if (COMMON.contains(detected.charset) || LESS_COMMON.contains(detected.charset)) Log.w("compact_enc_det result=" + detected); else if ("GB18030".equals(detected.charset)) { boolean chinese = Locale.getDefault().getLanguage().equals(CHINESE); @@ -190,7 +198,7 @@ public class CharsetHelper { Log.e("compact_enc_det result=" + detected + " chinese=" + chinese); if (!chinese) return null; - } else // GBK, Big5, ISO-2022-JP, HZ-GB-2312, GB2312, Shift_JIS, x-binaryenc, EUC-KR + } else Log.e("compact_enc_det result=" + detected); return Charset.forName(detected.charset);