Classifier simplification

pull/191/head
M66B 4 years ago
parent 0d6ba41aab
commit 7860a3986f

@ -167,41 +167,30 @@ public class MessageClassifier {
} }
State state = new State(); State state = new State();
state.words.add(null); process(account, currentClass, added, null, state);
if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) { if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) {
java.text.BreakIterator boundary = java.text.BreakIterator.getWordInstance(); java.text.BreakIterator boundary = java.text.BreakIterator.getWordInstance();
boundary.setText(text); boundary.setText(text);
int start = boundary.first(); int start = boundary.first();
for (int end = boundary.next(); end != java.text.BreakIterator.DONE; end = boundary.next()) { for (int end = boundary.next(); end != java.text.BreakIterator.DONE; end = boundary.next()) {
String word = text.substring(start, end).trim().toLowerCase(); String word = text.substring(start, end);
if (word.length() > 1 && process(account, currentClass, added, word, state);
!state.words.contains(word) &&
!word.matches(".*\\d.*")) {
state.words.add(word);
process(account, currentClass, added, state);
}
start = end; start = end;
} }
} else { } else {
// The ICU break iterator can properly handle Chinese texts // The ICU break iterator works better for Chinese texts
android.icu.text.BreakIterator boundary = android.icu.text.BreakIterator.getWordInstance(); android.icu.text.BreakIterator boundary = android.icu.text.BreakIterator.getWordInstance();
boundary.setText(text); boundary.setText(text);
int start = boundary.first(); int start = boundary.first();
for (int end = boundary.next(); end != android.icu.text.BreakIterator.DONE; end = boundary.next()) { for (int end = boundary.next(); end != android.icu.text.BreakIterator.DONE; end = boundary.next()) {
String word = text.substring(start, end).trim().toLowerCase(); String word = text.substring(start, end);
if (word.length() > 1 && process(account, currentClass, added, word, state);
!state.words.contains(word) &&
!word.matches(".*\\d.*")) {
state.words.add(word);
process(account, currentClass, added, state);
}
start = end; start = end;
} }
} }
state.words.add(null); process(account, currentClass, added, null, state);
process(account, currentClass, added, state);
if (!added) if (!added)
return null; return null;
@ -253,7 +242,18 @@ public class MessageClassifier {
return classification; return classification;
} }
private static void process(long account, String currentClass, boolean added, State state) { private static void process(long account, String currentClass, boolean added, String word, State state) {
if (word != null) {
word = word.trim().toLowerCase();
if (word.length() < 2 ||
state.words.contains(word) ||
word.matches(".*\\d.*"))
return;
}
state.words.add(word);
if (state.words.size() < 3) if (state.words.size() < 3)
return; return;

Loading…
Cancel
Save