From 3ba8e97d5607c30b355a51d5dd8ed05e3df5d949 Mon Sep 17 00:00:00 2001 From: M66B Date: Thu, 29 Apr 2021 19:11:11 +0200 Subject: [PATCH] Simplified normalizing text --- .../java/eu/faircode/email/HtmlHelper.java | 56 ++++++------------- 1 file changed, 16 insertions(+), 40 deletions(-) diff --git a/app/src/main/java/eu/faircode/email/HtmlHelper.java b/app/src/main/java/eu/faircode/email/HtmlHelper.java index f69257b616..a67243f3bd 100644 --- a/app/src/main/java/eu/faircode/email/HtmlHelper.java +++ b/app/src/main/java/eu/faircode/email/HtmlHelper.java @@ -134,8 +134,6 @@ public class HtmlHelper { private static final int SMALL_IMAGE_SIZE = 5; // pixels private static final int TRACKING_PIXEL_SURFACE = 25; // pixels private static final float[] HEADING_SIZES = {1.5f, 1.4f, 1.3f, 1.2f, 1.1f, 1f}; - private static String WHITESPACE = " \t\f"; - private static String WHITESPACE_NL = WHITESPACE + "\r\n"; private static final String LINE = "----------------------------------------"; private static final HashMap x11ColorMap = new HashMap<>(); @@ -2038,8 +2036,7 @@ public class HtmlHelper { private int plain = 0; private List block = new ArrayList<>(); - private Pattern TRIM_WHITESPACE_NL = - Pattern.compile("[" + WHITESPACE + "]*\\r?\\n[" + WHITESPACE + "]*"); + private final Pattern FOLD_WHITESPACE = Pattern.compile("[ \t\f\r\n]+"); // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements @@ -2079,37 +2076,27 @@ public class HtmlHelper { // https://developer.mozilla.org/en-US/docs/Web/API/Document_Object_Model/Whitespace TextNode tnode; String text; - int index; for (int i = 0; i < block.size(); ) { tnode = block.get(i); text = tnode.getWholeText(); - // Remove whitespace before/after newlines - text = TRIM_WHITESPACE_NL.matcher(text).replaceAll(" "); - if ("-- ".equals(text)) { tnode.text(text); i++; continue; } - // Remove leading whitespace - if (i == 0 || endsWithWhitespace(block.get(i - 1).text())) { - index = 0; - while (isWhiteSpace(text, index)) - index++; - - if (index > 0) - text = text.substring(index); - } + // Fold white space + text = FOLD_WHITESPACE.matcher(text).replaceAll(" "); - // Remove multiple trailing whitespace - index = text.length() - 1; - while (isWhiteSpace(text, index) && - (isWhiteSpace(text, index - 1) || i == block.size() - 1)) - index--; + // Conditionally remove leading whitespace + if (isSpace(text, 0) && + (i == 0 || endsWithSpace(block.get(i - 1).text()))) + text = text.substring(1); - text = text.substring(0, index + 1); + // Conditionally remove trailing whitespace + if (i == block.size() - 1 && endsWithSpace(text)) + text = text.substring(0, text.length() - 1); tnode.text(text); @@ -2119,23 +2106,13 @@ public class HtmlHelper { i++; } - // Remove last trailing whitespace - if (block.size() > 0) { - tnode = block.get(block.size() - 1); - text = tnode.getWholeText(); - if (!"-- ".equals(text) && endsWithWhitespace(text)) { - text = text.substring(0, text.length() - 1); - tnode.text(text); - } - } - - // Remove blank blocks + // Remove all blank blocks boolean blank = true; for (int i = 0; i < block.size(); i++) { text = block.get(i).getWholeText(); for (int j = 0; j < text.length(); j++) { char kar = text.charAt(j); - if (WHITESPACE.indexOf(kar) < 0) { + if (kar == ' ') { blank = false; break; } @@ -2155,15 +2132,14 @@ public class HtmlHelper { } } - boolean isWhiteSpace(String text, int index) { + boolean isSpace(String text, int index) { if (index < 0 || index >= text.length()) return false; - char kar = text.charAt(index); - return (WHITESPACE_NL.indexOf(kar) >= 0); + return (text.charAt(index) == ' '); } - boolean endsWithWhitespace(String text) { - return isWhiteSpace(text, text.length() - 1); + boolean endsWithSpace(String text) { + return isSpace(text, text.length() - 1); } }, document.body());