From c3bbd714698ac9508e6ec43dd6abfb1f5e29ad8d Mon Sep 17 00:00:00 2001 From: M66B Date: Wed, 6 Jan 2021 21:43:26 +0100 Subject: [PATCH] Use ICU break iterator on recent Android versions --- .../eu/faircode/email/MessageClassifier.java | 41 +++++++++++++------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/app/src/main/java/eu/faircode/email/MessageClassifier.java b/app/src/main/java/eu/faircode/email/MessageClassifier.java index 1a3dc0f0a1..aaedf12d69 100644 --- a/app/src/main/java/eu/faircode/email/MessageClassifier.java +++ b/app/src/main/java/eu/faircode/email/MessageClassifier.java @@ -21,6 +21,7 @@ package eu.faircode.email; import android.content.Context; import android.content.SharedPreferences; +import android.os.Build; import android.text.TextUtils; import androidx.preference.PreferenceManager; @@ -32,7 +33,6 @@ import org.json.JSONObject; import java.io.File; import java.io.IOException; -import java.text.BreakIterator; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -175,18 +175,35 @@ public class MessageClassifier { State state = new State(); state.words.add(null); - BreakIterator boundary = BreakIterator.getWordInstance(); // TODO ICU - boundary.setText(text); - int start = boundary.first(); - for (int end = boundary.next(); end != BreakIterator.DONE; end = boundary.next()) { - String word = text.substring(start, end).toLowerCase(); - if (word.length() > 1 && - !state.words.contains(word) && - !word.matches(".*\\d.*")) { - state.words.add(word); - process(account, currentClass, added, state); + if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) { + java.text.BreakIterator boundary = java.text.BreakIterator.getWordInstance(); + boundary.setText(text); + int start = boundary.first(); + for (int end = boundary.next(); end != java.text.BreakIterator.DONE; end = boundary.next()) { + String word = text.substring(start, end).toLowerCase(); + if (word.length() > 1 && + !state.words.contains(word) && + !word.matches(".*\\d.*")) { + state.words.add(word); + process(account, currentClass, added, state); + } + start = end; + } + } else { + // The ICU break iterator can properly handle Chinese texts + android.icu.text.BreakIterator boundary = android.icu.text.BreakIterator.getWordInstance(); + boundary.setText(text); + int start = boundary.first(); + for (int end = boundary.next(); end != android.icu.text.BreakIterator.DONE; end = boundary.next()) { + String word = text.substring(start, end).toLowerCase(); + if (word.length() > 1 && + !state.words.contains(word) && + !word.matches(".*\\d.*")) { + state.words.add(word); + process(account, currentClass, added, state); + } + start = end; } - start = end; } state.words.add(null);