Charset: refactoring

pull/208/head
M66B 3 years ago
parent 546ff162fa
commit 14edad1e2e

@ -3109,21 +3109,35 @@ public class MessageHelper {
return null; return null;
} }
String result; // Check character set
String charset = h.contentType.getParameter("charset");
if (UnknownCharsetProvider.charsetForMime(charset) == null)
warnings.add(context.getString(R.string.title_no_charset, charset));
if (TextUtils.isEmpty(charset) ||
charset.equalsIgnoreCase(StandardCharsets.US_ASCII.name()))
charset = null;
Charset cs = null;
if (charset != null)
try {
cs = Charset.forName(charset);
} catch (UnsupportedCharsetException ignored) {
cs = null;
}
String result;
try { try {
Object content; Object content;
// Check for UTF-16 LE without BOM // Check for UTF-16 LE without BOM
String pcharset = h.contentType.getParameter("charset"); if (StandardCharsets.UTF_16.equals(cs) && override == null) {
if ("utf-16".equalsIgnoreCase(pcharset) && override == null) {
String charset = pcharset;
BufferedInputStream bis = new BufferedInputStream(h.part.getDataHandler().getInputStream()); BufferedInputStream bis = new BufferedInputStream(h.part.getDataHandler().getInputStream());
if (Boolean.TRUE.equals(CharsetHelper.isUTF16LE(bis))) { if (Boolean.TRUE.equals(CharsetHelper.isUTF16LE(bis))) {
charset = StandardCharsets.UTF_16LE.name(); Log.e("Charset " + cs + " -> UTF16LE");
Log.e("Charset " + pcharset + " -> " + charset); cs = StandardCharsets.UTF_16LE;
} }
content = Helper.readStream(bis, Charset.forName(charset)); content = Helper.readStream(bis, cs);
} else } else
content = h.part.getContent(); content = h.part.getContent();
@ -3141,15 +3155,8 @@ public class MessageHelper {
// Typically com.sun.mail.util.QPDecoderStream // Typically com.sun.mail.util.QPDecoderStream
if (BuildConfig.DEBUG && false) if (BuildConfig.DEBUG && false)
warnings.add(content.getClass().getName()); warnings.add(content.getClass().getName());
Charset charset; result = Helper.readStream((InputStream) content,
try { cs == null ? StandardCharsets.ISO_8859_1 : cs);
String cs = h.contentType.getParameter("charset");
charset = (cs == null ? StandardCharsets.ISO_8859_1 : Charset.forName(cs));
} catch (Throwable ex) {
Log.w(ex);
charset = StandardCharsets.ISO_8859_1;
}
result = Helper.readStream((InputStream) content, charset);
} else { } else {
Log.e(content.getClass().getName()); Log.e(content.getClass().getName());
result = content.toString(); result = content.toString();
@ -3169,24 +3176,9 @@ public class MessageHelper {
return null; return null;
} }
// Check character set
String charset = h.contentType.getParameter("charset");
if (UnknownCharsetProvider.charsetForMime(charset) == null)
warnings.add(context.getString(R.string.title_no_charset, charset));
if ((TextUtils.isEmpty(charset) || charset.equalsIgnoreCase(StandardCharsets.US_ASCII.name())))
charset = null;
Charset cs = null;
try {
if (charset != null)
cs = Charset.forName(charset);
} catch (UnsupportedCharsetException ignored) {
}
if (h.isPlainText()) { if (h.isPlainText()) {
if (override == null) { if (override == null) {
if (charset == null || StandardCharsets.ISO_8859_1.equals(cs)) { if (cs == null || StandardCharsets.ISO_8859_1.equals(cs)) {
if (StandardCharsets.ISO_8859_1.equals(cs) && CharsetHelper.isUTF8(result)) { if (StandardCharsets.ISO_8859_1.equals(cs) && CharsetHelper.isUTF8(result)) {
Log.i("Charset upgrade=UTF8"); Log.i("Charset upgrade=UTF8");
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8); result = new String(result.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
@ -3254,11 +3246,11 @@ public class MessageHelper {
try { try {
if (CHARSET16.contains(cs)) { if (CHARSET16.contains(cs)) {
Charset detected = CharsetHelper.detect(result, cs); Charset detected = CharsetHelper.detect(result, cs);
// UTF-16 can be detected as US-ASCII
if (!CHARSET16.contains(detected)) if (!CHARSET16.contains(detected))
Log.w(new Throwable("Charset=" + cs + " detected=" + detected)); Log.w(new Throwable("Charset=" + cs + " detected=" + detected));
// UTF-16 can be detected as US-ASCII
if (StandardCharsets.UTF_8.equals(detected)) { if (StandardCharsets.UTF_8.equals(detected)) {
charset = null; cs = null;
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), detected); result = new String(result.getBytes(StandardCharsets.ISO_8859_1), detected);
} }
} }
@ -3266,26 +3258,27 @@ public class MessageHelper {
Log.w(ex); Log.w(ex);
} }
if (charset == null) { if (cs == null) {
// <meta charset="utf-8" /> // <meta charset="utf-8" />
// <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> // <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
String excerpt = result.substring(0, Math.min(MAX_META_EXCERPT, result.length())); String excerpt = result.substring(0, Math.min(MAX_META_EXCERPT, result.length()));
Document d = JsoupEx.parse(excerpt); Document d = JsoupEx.parse(excerpt);
for (Element meta : d.select("meta")) { for (Element meta : d.select("meta")) {
String mcharset = null;
if ("Content-Type".equalsIgnoreCase(meta.attr("http-equiv"))) { if ("Content-Type".equalsIgnoreCase(meta.attr("http-equiv"))) {
try { try {
ContentType ct = new ContentType(meta.attr("content")); ContentType ct = new ContentType(meta.attr("content"));
charset = ct.getParameter("charset"); mcharset = ct.getParameter("charset");
} catch (ParseException ex) { } catch (ParseException ex) {
Log.w(ex); Log.w(ex);
} }
} else } else
charset = meta.attr("charset"); mcharset = meta.attr("charset");
if (!TextUtils.isEmpty(charset)) if (!TextUtils.isEmpty(mcharset))
try { try {
Log.i("Charset meta=" + meta); Log.i("Charset meta=" + meta);
Charset c = Charset.forName(charset); Charset c = Charset.forName(mcharset);
// US-ASCII is a subset of ISO8859-1 // US-ASCII is a subset of ISO8859-1
if (StandardCharsets.US_ASCII.equals(c)) if (StandardCharsets.US_ASCII.equals(c))

Loading…
Cancel
Save