wiki/libs/markdown.js

"use strict";

var Promise = require('bluebird'),
	md = require('markdown-it'),
	mdEmoji = require('markdown-it-emoji'),
	mdTaskLists = require('markdown-it-task-lists'),
	mdAbbr = require('markdown-it-abbr'),
	mdAnchor = require('markdown-it-anchor'),
	mdFootnote = require('markdown-it-footnote'),
	mdExternalLinks = require('markdown-it-external-links'),
	mdExpandTabs = require('markdown-it-expand-tabs'),
	mdAttrs = require('markdown-it-attrs'),
	hljs = require('highlight.js'),
	cheerio = require('cheerio'),
	_ = require('lodash'),
	mdRemove = require('remove-markdown');

// Load plugins

var mkdown = md({
		html: true,
		linkify: true,
		typography: true,
		highlight(str, lang) {
			if (lang && hljs.getLanguage(lang)) {
				try {
					return '<pre class="hljs"><code>' + hljs.highlight(lang, str, true).value + '</code></pre>';
				} catch (err) {
					return '<pre><code>' + str + '</code></pre>';
				}
			}
			return '<pre><code>' + str + '</code></pre>';
		}
	})
	.use(mdEmoji)
	.use(mdTaskLists)
	.use(mdAbbr)
	.use(mdAnchor, {
		slugify: _.kebabCase,
		permalink: true,
		permalinkClass: 'toc-anchor',
		permalinkSymbol: '#',
		permalinkBefore: true
	})
	.use(mdFootnote)
	.use(mdExternalLinks, {
		externalClassName: 'external-link',
		internalClassName: 'internal-link'
	})
	.use(mdExpandTabs, {
		tabWidth: 4
	})
	.use(mdAttrs);

// Rendering rules

mkdown.renderer.rules.emoji = function(token, idx) {
	return '<i class="twa twa-' + _.replace(token[idx].markup, /_/g, '-') + '"></i>';
};

/**
 * Parse markdown content and build TOC tree
 *
 * @param      {(Function|string)}  content  Markdown content
 * @return     {Array}             TOC tree
 */
const parseTree = (content) => {

	let tokens = md().parse(content, {});
	let tocArray = [];

	//-> Extract headings and their respective levels

	for (let i = 0; i < tokens.length; i++) {
		if (tokens[i].type !== "heading_close") {
			continue;
		}

		const heading = tokens[i - 1];
		const heading_close = tokens[i];

		if (heading.type === "inline") {
			let content = "";
			let anchor = "";
			if (heading.children && heading.children[0].type === "link_open") {
			 content = heading.children[1].content;
			 anchor = _.kebabCase(content);
			} else {
			 content = heading.content;
			 anchor = _.kebabCase(heading.children.reduce((acc, t) => acc + t.content, ""));
			}

			tocArray.push({
			 content,
			 anchor,
			 level: +heading_close.tag.substr(1, 1)
			});
		}
	 }

	 //-> Exclude levels deeper than 2

	 _.remove(tocArray, (n) => { return n.level > 2; });

	 //-> Build tree from flat array

	 return _.reduce(tocArray, (tree, v) => {
		let treeLength = tree.length - 1;
		if(v.level < 2) {
			tree.push({
				content: v.content,
				anchor: v.anchor,
				nodes: []
			});
		} else {
			let lastNodeLevel = 1;
			let GetNodePath = (startPos) => {
				lastNodeLevel++;
				if(_.isEmpty(startPos)) {
					startPos = 'nodes';
				}
				if(lastNodeLevel === v.level) {
					return startPos;
				} else {
					return GetNodePath(startPos + '[' + (_.at(tree[treeLength], startPos).length - 1) + '].nodes');
				}
			};
			let lastNodePath = GetNodePath();
			let lastNode = _.get(tree[treeLength], lastNodePath);
			if(lastNode) {
				lastNode.push({
					content: v.content,
					anchor: v.anchor,
					nodes: []
				});
				_.set(tree[treeLength], lastNodePath, lastNode);
			}
		}
		return tree;
	}, []);

};

/**
 * Parse markdown content to HTML
 *
 * @param      {String}    content  Markdown content
 * @return     {String}  HTML formatted content
 */
const parseContent = (content)  => {

	let output = mkdown.render(content);
	let cr = cheerio.load(output);

	//-> Check for empty first element

	let firstElm = cr.root().children().first()[0];
	if(firstElm.type === 'tag' && firstElm.name === 'p') {
		let firstElmChildren = firstElm.children;
		if(firstElmChildren.length < 1) {
			firstElm.remove();
		} else if(firstElmChildren.length === 1 && firstElmChildren[0].type === 'tag' && firstElmChildren[0].name === 'img') {
			cr(firstElm).addClass('is-gapless');
		}
	}

	//-> Remove links in headers

	cr('h1 > a:not(.toc-anchor), h2 > a:not(.toc-anchor), h3 > a:not(.toc-anchor)').each((i, elm) => {
		let txtLink = cr(elm).text();
		cr(elm).replaceWith(txtLink);
	});

	//-> Re-attach blockquote styling classes to their parents
	
	cr.root().children('blockquote').each((i, elm) => {
		if(cr(elm).children().length > 0) {
			let bqLastChild = cr(elm).children().last()[0];
			let bqLastChildClasses = cr(bqLastChild).attr('class');
			if(bqLastChildClasses && bqLastChildClasses.length > 0) {
				cr(bqLastChild).removeAttr('class');
				cr(elm).addClass(bqLastChildClasses);
			}
		}
	});

	//-> Enclose content below headers

	cr('h2').each((i, elm) => {
		let subH2Content = cr(elm).nextUntil('h1, h2');
		cr(elm).after('<div class="indent-h2"></div>');
		let subH2Container = cr(elm).next('.indent-h2');
		_.forEach(subH2Content, (ch) => {
			cr(subH2Container).append(ch);
		});
	});

	cr('h3').each((i, elm) => {
		let subH3Content = cr(elm).nextUntil('h1, h2, h3');
		cr(elm).after('<div class="indent-h3"></div>');
		let subH3Container = cr(elm).next('.indent-h3');
		_.forEach(subH3Content, (ch) => {
			cr(subH3Container).append(ch);
		});
	});

	output = cr.html();

	return output;

};

/**
 * Parse meta-data tags from content
 *
 * @param      {String}  content  Markdown content
 * @return     {Object}  Properties found in the content and their values
 */
const parseMeta = (content) => {

	let commentMeta = new RegExp('<!-- ?([a-zA-Z]+):(.*)-->','g');
	let results = {}, match;
	while(match = commentMeta.exec(content)) {
		results[_.toLower(match[1])] = _.trim(match[2]);
	}

	return results;

};

module.exports = {

	/**
	 * Parse content and return all data
	 *
	 * @param      {String}  content  Markdown-formatted content
	 * @return     {Object}  Object containing meta, html and tree data
	 */
	parse(content) {
		return {
			meta: parseMeta(content),
			html: parseContent(content),
			tree: parseTree(content)
		};
	},

	parseContent,
	parseMeta,
	parseTree,

	/**
	 * Strips non-text elements from Markdown content
	 *
	 * @param      {String}  content  Markdown-formatted content
	 * @return     {String}  Text-only version
	 */
	removeMarkdown(content) {
		return mdRemove(_.chain(content)
			.replace(/<!-- ?([a-zA-Z]+):(.*)-->/g, '')
			.replace(/```[^`]+```/g, '')
			.replace(/`[^`]+`/g, '')
			.replace(new RegExp('(?!mailto:)(?:(?:http|https|ftp)://)(?:\\S+(?::\\S*)?@)?(?:(?:(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[0-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))|localhost)(?::\\d{2,5})?(?:(/|\\?|#)[^\\s]*)?', 'g'), '')
			.replace(/\r?\n|\r/g, ' ')
			.deburr()
			.toLower()
			.replace(/(\b([^a-z]+)\b)/g, ' ')
			.replace(/[^a-z]+/g, ' ')
			.replace(/(\b(\w{1,2})\b(\W|$))/g, '')
			.replace(/\s\s+/g, ' ')
			.value()
		);
	}

};
Markdown parsing + tree parsing + content display 8 years ago			`"use strict";`

			`var Promise = require('bluebird'),`
			`md = require('markdown-it'),`
			`mdEmoji = require('markdown-it-emoji'),`
			`mdTaskLists = require('markdown-it-task-lists'),`
			`mdAbbr = require('markdown-it-abbr'),`
Fix for anchors to use lodash kebabcase function 8 years ago			`mdAnchor = require('markdown-it-anchor'),`
Markdown parsing + tree parsing + content display 8 years ago			`mdFootnote = require('markdown-it-footnote'),`
			`mdExternalLinks = require('markdown-it-external-links'),`
			`mdExpandTabs = require('markdown-it-expand-tabs'),`
syntax highlight + TOC scroll + other content parsing improvements 8 years ago			`mdAttrs = require('markdown-it-attrs'),`
			`hljs = require('highlight.js'),`
			`cheerio = require('cheerio'),`
Early work on background agent for search 8 years ago			`_ = require('lodash'),`
			`mdRemove = require('remove-markdown');`
Markdown parsing + tree parsing + content display 8 years ago
			`// Load plugins`

			`var mkdown = md({`
			`html: true,`
			`linkify: true,`
syntax highlight + TOC scroll + other content parsing improvements 8 years ago			`typography: true,`
Added alerts functionality + modal UI 8 years ago			`highlight(str, lang) {`
syntax highlight + TOC scroll + other content parsing improvements 8 years ago			`if (lang && hljs.getLanguage(lang)) {`
			`try {`
			`return '<pre class="hljs"><code>' + hljs.highlight(lang, str, true).value + '</code></pre>';`
Added alerts functionality + modal UI 8 years ago			`} catch (err) {`
			`return '<pre><code>' + str + '</code></pre>';`
			`}`
syntax highlight + TOC scroll + other content parsing improvements 8 years ago			`}`
Caching + Edit Mode UI 8 years ago			`return '<pre><code>' + str + '</code></pre>';`
syntax highlight + TOC scroll + other content parsing improvements 8 years ago			`}`
Markdown parsing + tree parsing + content display 8 years ago			`})`
			`.use(mdEmoji)`
			`.use(mdTaskLists)`
			`.use(mdAbbr)`
			`.use(mdAnchor, {`
Fix for anchors to use lodash kebabcase function 8 years ago			`slugify: _.kebabCase,`
			`permalink: true,`
			`permalinkClass: 'toc-anchor',`
			`permalinkSymbol: '#',`
			`permalinkBefore: true`
Markdown parsing + tree parsing + content display 8 years ago			`})`
			`.use(mdFootnote)`
			`.use(mdExternalLinks, {`
			`externalClassName: 'external-link',`
			`internalClassName: 'internal-link'`
			`})`
			`.use(mdExpandTabs, {`
			`tabWidth: 4`
syntax highlight + TOC scroll + other content parsing improvements 8 years ago			`})`
			`.use(mdAttrs);`
Markdown parsing + tree parsing + content display 8 years ago
			`// Rendering rules`

			`mkdown.renderer.rules.emoji = function(token, idx) {`
Markdown display fixes + error pages 8 years ago			`return '<i class="twa twa-' + _.replace(token[idx].markup, /_/g, '-') + '"></i>';`
Markdown parsing + tree parsing + content display 8 years ago			`};`

syntax highlight + TOC scroll + other content parsing improvements 8 years ago			`/**`
			`* Parse markdown content and build TOC tree`
			`*`
			`* @param {(Function\|string)} content Markdown content`
			`* @return {Array} TOC tree`
			`*/`
Markdown parsing + tree parsing + content display 8 years ago			`const parseTree = (content) => {`

			`let tokens = md().parse(content, {});`
			`let tocArray = [];`

syntax highlight + TOC scroll + other content parsing improvements 8 years ago			`//-> Extract headings and their respective levels`

Markdown parsing + tree parsing + content display 8 years ago			`for (let i = 0; i < tokens.length; i++) {`
			`if (tokens[i].type !== "heading_close") {`
Markdown metadata parser 8 years ago			`continue;`
Markdown parsing + tree parsing + content display 8 years ago			`}`

Added alerts functionality + modal UI 8 years ago			`const heading = tokens[i - 1];`
			`const heading_close = tokens[i];`
Markdown parsing + tree parsing + content display 8 years ago
			`if (heading.type === "inline") {`
Markdown metadata parser 8 years ago			`let content = "";`
			`let anchor = "";`
			`if (heading.children && heading.children[0].type === "link_open") {`
Added alerts functionality + modal UI 8 years ago			`content = heading.children[1].content;`
Create mode + Source view + UI enhancements 8 years ago			`anchor = _.kebabCase(content);`
Markdown metadata parser 8 years ago			`} else {`
Code style and performance fixes 8 years ago			`content = heading.content;`
Create mode + Source view + UI enhancements 8 years ago			`anchor = _.kebabCase(heading.children.reduce((acc, t) => acc + t.content, ""));`
Markdown metadata parser 8 years ago			`}`
Markdown parsing + tree parsing + content display 8 years ago
Markdown metadata parser 8 years ago			`tocArray.push({`
Markdown parsing + tree parsing + content display 8 years ago			`content,`
			`anchor,`
			`level: +heading_close.tag.substr(1, 1)`
Markdown metadata parser 8 years ago			`});`
Markdown parsing + tree parsing + content display 8 years ago			`}`
			`}`

syntax highlight + TOC scroll + other content parsing improvements 8 years ago			`//-> Exclude levels deeper than 2`

			`_.remove(tocArray, (n) => { return n.level > 2; });`

			`//-> Build tree from flat array`

Markdown parsing + tree parsing + content display 8 years ago			`return _.reduce(tocArray, (tree, v) => {`
			`let treeLength = tree.length - 1;`
			`if(v.level < 2) {`
			`tree.push({`
			`content: v.content,`
			`anchor: v.anchor,`
			`nodes: []`
			`});`
			`} else {`
			`let lastNodeLevel = 1;`
			`let GetNodePath = (startPos) => {`
			`lastNodeLevel++;`
			`if(_.isEmpty(startPos)) {`
			`startPos = 'nodes';`
			`}`
			`if(lastNodeLevel === v.level) {`
			`return startPos;`
			`} else {`
			`return GetNodePath(startPos + '[' + (_.at(tree[treeLength], startPos).length - 1) + '].nodes');`
			`}`
			`};`
			`let lastNodePath = GetNodePath();`
			`let lastNode = _.get(tree[treeLength], lastNodePath);`
syntax highlight + TOC scroll + other content parsing improvements 8 years ago			`if(lastNode) {`
			`lastNode.push({`
			`content: v.content,`
			`anchor: v.anchor,`
			`nodes: []`
			`});`
			`_.set(tree[treeLength], lastNodePath, lastNode);`
			`}`
Markdown parsing + tree parsing + content display 8 years ago			`}`
			`return tree;`
			`}, []);`

			`};`

syntax highlight + TOC scroll + other content parsing improvements 8 years ago			`/**`
			`* Parse markdown content to HTML`
			`*`
			`* @param {String} content Markdown content`
			`* @return {String} HTML formatted content`
			`*/`
			`const parseContent = (content) => {`

			`let output = mkdown.render(content);`
			`let cr = cheerio.load(output);`
Integration to Requarks Core 8 years ago
Modals styling + Fix for gapless first <p> 8 years ago			`//-> Check for empty first element`

			`let firstElm = cr.root().children().first()[0];`
			`if(firstElm.type === 'tag' && firstElm.name === 'p') {`
			`let firstElmChildren = firstElm.children;`
			`if(firstElmChildren.length < 1) {`
			`firstElm.remove();`
			`} else if(firstElmChildren.length === 1 && firstElmChildren[0].type === 'tag' && firstElmChildren[0].name === 'img') {`
			`cr(firstElm).addClass('is-gapless');`
			`}`
			`}`
Integration to Requarks Core 8 years ago
			`//-> Remove links in headers`

			`cr('h1 > a:not(.toc-anchor), h2 > a:not(.toc-anchor), h3 > a:not(.toc-anchor)').each((i, elm) => {`
			`let txtLink = cr(elm).text();`
			`cr(elm).replaceWith(txtLink);`
			`});`

Markdown display fixes + error pages 8 years ago			`//-> Re-attach blockquote styling classes to their parents`

			`cr.root().children('blockquote').each((i, elm) => {`
			`if(cr(elm).children().length > 0) {`
			`let bqLastChild = cr(elm).children().last()[0];`
			`let bqLastChildClasses = cr(bqLastChild).attr('class');`
Headers section styling 8 years ago			`if(bqLastChildClasses && bqLastChildClasses.length > 0) {`
Markdown display fixes + error pages 8 years ago			`cr(bqLastChild).removeAttr('class');`
			`cr(elm).addClass(bqLastChildClasses);`
			`}`
			`}`
			`});`

Headers section styling 8 years ago			`//-> Enclose content below headers`

			`cr('h2').each((i, elm) => {`
			`let subH2Content = cr(elm).nextUntil('h1, h2');`
			`cr(elm).after('<div class="indent-h2"></div>');`
			`let subH2Container = cr(elm).next('.indent-h2');`
			`_.forEach(subH2Content, (ch) => {`
			`cr(subH2Container).append(ch);`
			`});`
			`});`

			`cr('h3').each((i, elm) => {`
			`let subH3Content = cr(elm).nextUntil('h1, h2, h3');`
			`cr(elm).after('<div class="indent-h3"></div>');`
			`let subH3Container = cr(elm).next('.indent-h3');`
			`_.forEach(subH3Content, (ch) => {`
			`cr(subH3Container).append(ch);`
			`});`
			`});`

syntax highlight + TOC scroll + other content parsing improvements 8 years ago			`output = cr.html();`

			`return output;`

			`};`

Early work on background agent for search 8 years ago			`/**`
			`* Parse meta-data tags from content`
			`*`
			`* @param {String} content Markdown content`
			`* @return {Object} Properties found in the content and their values`
			`*/`
Markdown metadata parser 8 years ago			`const parseMeta = (content) => {`

			`let commentMeta = new RegExp('<!-- ?([a-zA-Z]+):(.*)-->','g');`
			`let results = {}, match;`
			`while(match = commentMeta.exec(content)) {`
			`results[_.toLower(match[1])] = _.trim(match[2]);`
			`}`

			`return results;`

			`};`

Markdown parsing + tree parsing + content display 8 years ago			`module.exports = {`

Early work on background agent for search 8 years ago			`/**`
			`* Parse content and return all data`
			`*`
			`* @param {String} content Markdown-formatted content`
			`* @return {Object} Object containing meta, html and tree data`
			`*/`
Markdown parsing + tree parsing + content display 8 years ago			`parse(content) {`
			`return {`
Markdown metadata parser 8 years ago			`meta: parseMeta(content),`
syntax highlight + TOC scroll + other content parsing improvements 8 years ago			`html: parseContent(content),`
Markdown parsing + tree parsing + content display 8 years ago			`tree: parseTree(content)`
			`};`
Caching + Edit Mode UI 8 years ago			`},`

			`parseContent,`
			`parseMeta,`
Early work on background agent for search 8 years ago			`parseTree,`

			`/**`
			`* Strips non-text elements from Markdown content`
			`*`
			`* @param {String} content Markdown-formatted content`
			`* @return {String} Text-only version`
			`*/`
			`removeMarkdown(content) {`
			`return mdRemove(_.chain(content)`
			`.replace(/<!-- ?([a-zA-Z]+):(.*)-->/g, '')`
			.replace(/```[^`]+```/g, '')
			.replace(/`[^`]+`/g, '')
			`.replace(new RegExp('(?!mailto:)(?:(?:http\|https\|ftp)://)(?:\\S+(?::\\S)?@)?(?:(?:(?:[1-9]\\d?\|1\\d\\d\|2[01]\\d\|22[0-3])(?:\\.(?:1?\\d{1,2}\|2[0-4]\\d\|25[0-5])){2}(?:\\.(?:[0-9]\\d?\|1\\d\\d\|2[0-4]\\d\|25[0-4]))\|(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))\|localhost)(?::\\d{2,5})?(?:(/\|\\?\|#)[^\\s]*)?', 'g'), '')`
			`.replace(/\r?\n\|\r/g, ' ')`
			`.deburr()`
			`.toLower()`
			`.replace(/(\b([^a-z]+)\b)/g, ' ')`
			`.replace(/[^a-z]+/g, ' ')`
			`.replace(/(\b(\w{1,2})\b(\W\|$))/g, '')`
			`.replace(/\s\s+/g, ' ')`
			`.value()`
			`);`
			`}`
Markdown parsing + tree parsing + content display 8 years ago
			`};`