Markdown improvements

pull/214/head
M66B 2 years ago
parent c05491dd11
commit f370b741d8

@ -31,15 +31,21 @@ import org.commonmark.ext.task.list.items.TaskListItemsExtension;
import org.commonmark.node.Node; import org.commonmark.node.Node;
import org.commonmark.parser.Parser; import org.commonmark.parser.Parser;
import org.commonmark.renderer.html.HtmlRenderer; import org.commonmark.renderer.html.HtmlRenderer;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map;
public class Markdown { public class Markdown {
static String toHtml(String markdown) { static String toHtml(String markdown) {
// https://github.com/commonmark/commonmark-java#usage // https://github.com/commonmark/commonmark-java#usage
// https://github.com/commonmark/commonmark-java/issues/294 // https://github.com/commonmark/commonmark-java/issues/294
markdown = markdown.replace('\u00a0', ' '); markdown = markdown.replace('\u00a0', ' ');
List<Extension> extensions = Arrays.asList( List<Extension> extensions = Arrays.asList(
InsExtension.create(), InsExtension.create(),
TaskListItemsExtension.create(), TaskListItemsExtension.create(),
@ -55,23 +61,73 @@ public class Markdown {
String html = r.render(d); String html = r.render(d);
if (BuildConfig.DEBUG) { if (BuildConfig.DEBUG) {
Log.i("Markdown md=" + markdown.replace('\n', '|')); Log.i("Markdown md=" + markdown.replace('\n', '|'));
Log.i("Markdown html=" + markdown.replace('\n', '|')); Log.i("Markdown html=" + html.replace('\n', '|'));
} }
return html; return html;
} }
static String fromHtml(String html) { static String fromHtml(String html) {
// https://github.com/vsch/flexmark-java/wiki/Extensions#html-to-markdown // https://github.com/vsch/flexmark-java/wiki/Extensions#html-to-markdown
Map<String, String> specialCharsMap = new HashMap<>();
//specialCharsMap.put("“", "\"");
//specialCharsMap.put("”", "\"");
specialCharsMap.put("&ldquo;", "\"");
specialCharsMap.put("&rdquo;", "\"");
//specialCharsMap.put("", "'");
//specialCharsMap.put("", "'");
specialCharsMap.put("&lsquo;", "'");
specialCharsMap.put("&rsquo;", "'");
specialCharsMap.put("&apos;", "'");
//specialCharsMap.put("«", "<<");
specialCharsMap.put("&laquo;", "<<");
//specialCharsMap.put("»", ">>");
specialCharsMap.put("&raquo;", ">>");
//specialCharsMap.put("…", "...");
specialCharsMap.put("&hellip;", "...");
//specialCharsMap.put("", "--");
specialCharsMap.put("&endash;", "--");
//specialCharsMap.put("—", "---");
specialCharsMap.put("&emdash;", "---");
DataHolder options = new MutableDataSet() DataHolder options = new MutableDataSet()
.set(FlexmarkHtmlConverter.SETEXT_HEADINGS, false) .set(FlexmarkHtmlConverter.SETEXT_HEADINGS, false)
.set(FlexmarkHtmlConverter.OUTPUT_ATTRIBUTES_ID, false)
.set(FlexmarkHtmlConverter.TYPOGRAPHIC_REPLACEMENT_MAP, specialCharsMap)
.toImmutable(); .toImmutable();
// Remove nested/empty tables
Document doc = JsoupEx.parse(html);
for (Element table : doc.select("table")) {
boolean empty = false;
Elements children = table.children().select("table");
if (children.size() == 0)
for (Element tr : table.children()) {
if (tr.children().size() == 1) {
empty = true;
break;
}
if (empty)
break;
}
if (children.size() > 0 || empty) {
table.tagName("div");
for (Element child : table.children())
if ("tr".equals(child.tagName()))
child.tagName("div");
else if ("td".equals(child.tagName()))
child.tagName("span");
}
}
String markdown = FlexmarkHtmlConverter.builder(options) String markdown = FlexmarkHtmlConverter.builder(options)
.build() .build()
.convert(html); .convert(doc.html());
if (BuildConfig.DEBUG) { if (BuildConfig.DEBUG) {
Log.i("Markdown html=" + markdown.replace('\n', '|')); Log.i("Markdown html=" + html.replace('\n', '|'));
Log.i("Markdown md=" + markdown.replace('\n', '|')); Log.i("Markdown md=" + markdown.replace('\n', '|'));
} }
return markdown return markdown
.replaceAll("(?m)^( *)(\\d+)\\.( +)", "$1$2\\\\.$3") .replaceAll("(?m)^( *)(\\d+)\\.( +)", "$1$2\\\\.$3")
.replaceAll("<br />", "") .replaceAll("<br />", "")

Loading…
Cancel
Save