Fixed incorrect transform from UTF-16 to US-ASCII

pull/208/head
M66B 2 years ago
parent da44c936be
commit c2552cd744

@ -472,6 +472,7 @@ public class ActivityEML extends ActivityBase {
Charset cs = Charset.forName(charset);
Charset detected = CharsetHelper.detect(text, cs);
boolean isUtf8 = CharsetHelper.isUTF8(text.getBytes(cs));
boolean isUtf16 = CharsetHelper.isUTF16(text.getBytes(cs));
boolean isW1252 = !Objects.equals(text, CharsetHelper.utf8toW1252(text));
for (int i = 0; i < level; i++)
@ -480,6 +481,7 @@ public class ActivityEML extends ActivityBase {
ssb.append("Detected: ")
.append(detected == null ? "?" : detected.toString())
.append(" isUTF8=").append(Boolean.toString(isUtf8))
.append(" isUTF16=").append(Boolean.toString(isUtf16))
.append(" isW1252=").append(Boolean.toString(isW1252))
.append('\n');
}

@ -86,7 +86,21 @@ public class CharsetHelper {
}
}
static boolean isUTF16(byte[] octets) {
CharsetDecoder utf8Decoder = StandardCharsets.UTF_16.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
try {
utf8Decoder.decode(ByteBuffer.wrap(octets));
return true;
} catch (CharacterCodingException ex) {
Log.w(ex);
return false;
}
}
static boolean isUTF8Alt(String text) {
// This doesn't check the characters and is therefore unreliable
byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
int bytes;

@ -3229,10 +3229,10 @@ public class MessageHelper {
try {
if (CHARSET16.contains(cs)) {
Charset detected = CharsetHelper.detect(result, cs);
// UTF-16 can be detected as US-ASCII
if (!CHARSET16.contains(detected))
Log.w(new Throwable("Charset=" + cs + " detected=" + detected));
if (StandardCharsets.US_ASCII.equals(detected) ||
StandardCharsets.UTF_8.equals(detected)) {
if (StandardCharsets.UTF_8.equals(detected)) {
charset = null;
result = new String(result.getBytes(cs), detected);
}

Loading…
Cancel
Save