Fixed incorrect transform from UTF-16 to US-ASCII

pull/208/head
M66B 2 years ago
parent da44c936be
commit c2552cd744

@ -472,6 +472,7 @@ public class ActivityEML extends ActivityBase {
Charset cs = Charset.forName(charset); Charset cs = Charset.forName(charset);
Charset detected = CharsetHelper.detect(text, cs); Charset detected = CharsetHelper.detect(text, cs);
boolean isUtf8 = CharsetHelper.isUTF8(text.getBytes(cs)); boolean isUtf8 = CharsetHelper.isUTF8(text.getBytes(cs));
boolean isUtf16 = CharsetHelper.isUTF16(text.getBytes(cs));
boolean isW1252 = !Objects.equals(text, CharsetHelper.utf8toW1252(text)); boolean isW1252 = !Objects.equals(text, CharsetHelper.utf8toW1252(text));
for (int i = 0; i < level; i++) for (int i = 0; i < level; i++)
@ -480,6 +481,7 @@ public class ActivityEML extends ActivityBase {
ssb.append("Detected: ") ssb.append("Detected: ")
.append(detected == null ? "?" : detected.toString()) .append(detected == null ? "?" : detected.toString())
.append(" isUTF8=").append(Boolean.toString(isUtf8)) .append(" isUTF8=").append(Boolean.toString(isUtf8))
.append(" isUTF16=").append(Boolean.toString(isUtf16))
.append(" isW1252=").append(Boolean.toString(isW1252)) .append(" isW1252=").append(Boolean.toString(isW1252))
.append('\n'); .append('\n');
} }

@ -86,7 +86,21 @@ public class CharsetHelper {
} }
} }
static boolean isUTF16(byte[] octets) {
CharsetDecoder utf8Decoder = StandardCharsets.UTF_16.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
try {
utf8Decoder.decode(ByteBuffer.wrap(octets));
return true;
} catch (CharacterCodingException ex) {
Log.w(ex);
return false;
}
}
static boolean isUTF8Alt(String text) { static boolean isUTF8Alt(String text) {
// This doesn't check the characters and is therefore unreliable
byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1); byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
int bytes; int bytes;

@ -3229,10 +3229,10 @@ public class MessageHelper {
try { try {
if (CHARSET16.contains(cs)) { if (CHARSET16.contains(cs)) {
Charset detected = CharsetHelper.detect(result, cs); Charset detected = CharsetHelper.detect(result, cs);
// UTF-16 can be detected as US-ASCII
if (!CHARSET16.contains(detected)) if (!CHARSET16.contains(detected))
Log.w(new Throwable("Charset=" + cs + " detected=" + detected)); Log.w(new Throwable("Charset=" + cs + " detected=" + detected));
if (StandardCharsets.US_ASCII.equals(detected) || if (StandardCharsets.UTF_8.equals(detected)) {
StandardCharsets.UTF_8.equals(detected)) {
charset = null; charset = null;
result = new String(result.getBytes(cs), detected); result = new String(result.getBytes(cs), detected);
} }

Loading…
Cancel
Save