Simplified normalizing text

pull/198/head
M66B 4 years ago
parent 4a216a719a
commit 3ba8e97d56

@ -134,8 +134,6 @@ public class HtmlHelper {
private static final int SMALL_IMAGE_SIZE = 5; // pixels private static final int SMALL_IMAGE_SIZE = 5; // pixels
private static final int TRACKING_PIXEL_SURFACE = 25; // pixels private static final int TRACKING_PIXEL_SURFACE = 25; // pixels
private static final float[] HEADING_SIZES = {1.5f, 1.4f, 1.3f, 1.2f, 1.1f, 1f}; private static final float[] HEADING_SIZES = {1.5f, 1.4f, 1.3f, 1.2f, 1.1f, 1f};
private static String WHITESPACE = " \t\f";
private static String WHITESPACE_NL = WHITESPACE + "\r\n";
private static final String LINE = "----------------------------------------"; private static final String LINE = "----------------------------------------";
private static final HashMap<String, Integer> x11ColorMap = new HashMap<>(); private static final HashMap<String, Integer> x11ColorMap = new HashMap<>();
@ -2038,8 +2036,7 @@ public class HtmlHelper {
private int plain = 0; private int plain = 0;
private List<TextNode> block = new ArrayList<>(); private List<TextNode> block = new ArrayList<>();
private Pattern TRIM_WHITESPACE_NL = private final Pattern FOLD_WHITESPACE = Pattern.compile("[ \t\f\r\n]+");
Pattern.compile("[" + WHITESPACE + "]*\\r?\\n[" + WHITESPACE + "]*");
// https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
@ -2079,37 +2076,27 @@ public class HtmlHelper {
// https://developer.mozilla.org/en-US/docs/Web/API/Document_Object_Model/Whitespace // https://developer.mozilla.org/en-US/docs/Web/API/Document_Object_Model/Whitespace
TextNode tnode; TextNode tnode;
String text; String text;
int index;
for (int i = 0; i < block.size(); ) { for (int i = 0; i < block.size(); ) {
tnode = block.get(i); tnode = block.get(i);
text = tnode.getWholeText(); text = tnode.getWholeText();
// Remove whitespace before/after newlines
text = TRIM_WHITESPACE_NL.matcher(text).replaceAll(" ");
if ("-- ".equals(text)) { if ("-- ".equals(text)) {
tnode.text(text); tnode.text(text);
i++; i++;
continue; continue;
} }
// Remove leading whitespace // Fold white space
if (i == 0 || endsWithWhitespace(block.get(i - 1).text())) { text = FOLD_WHITESPACE.matcher(text).replaceAll(" ");
index = 0;
while (isWhiteSpace(text, index))
index++;
if (index > 0)
text = text.substring(index);
}
// Remove multiple trailing whitespace // Conditionally remove leading whitespace
index = text.length() - 1; if (isSpace(text, 0) &&
while (isWhiteSpace(text, index) && (i == 0 || endsWithSpace(block.get(i - 1).text())))
(isWhiteSpace(text, index - 1) || i == block.size() - 1)) text = text.substring(1);
index--;
text = text.substring(0, index + 1); // Conditionally remove trailing whitespace
if (i == block.size() - 1 && endsWithSpace(text))
text = text.substring(0, text.length() - 1);
tnode.text(text); tnode.text(text);
@ -2119,23 +2106,13 @@ public class HtmlHelper {
i++; i++;
} }
// Remove last trailing whitespace // Remove all blank blocks
if (block.size() > 0) {
tnode = block.get(block.size() - 1);
text = tnode.getWholeText();
if (!"-- ".equals(text) && endsWithWhitespace(text)) {
text = text.substring(0, text.length() - 1);
tnode.text(text);
}
}
// Remove blank blocks
boolean blank = true; boolean blank = true;
for (int i = 0; i < block.size(); i++) { for (int i = 0; i < block.size(); i++) {
text = block.get(i).getWholeText(); text = block.get(i).getWholeText();
for (int j = 0; j < text.length(); j++) { for (int j = 0; j < text.length(); j++) {
char kar = text.charAt(j); char kar = text.charAt(j);
if (WHITESPACE.indexOf(kar) < 0) { if (kar == ' ') {
blank = false; blank = false;
break; break;
} }
@ -2155,15 +2132,14 @@ public class HtmlHelper {
} }
} }
boolean isWhiteSpace(String text, int index) { boolean isSpace(String text, int index) {
if (index < 0 || index >= text.length()) if (index < 0 || index >= text.length())
return false; return false;
char kar = text.charAt(index); return (text.charAt(index) == ' ');
return (WHITESPACE_NL.indexOf(kar) >= 0);
} }
boolean endsWithWhitespace(String text) { boolean endsWithSpace(String text) {
return isWhiteSpace(text, text.length() - 1); return isSpace(text, text.length() - 1);
} }
}, document.body()); }, document.body());

Loading…
Cancel
Save