diff --git a/package.json b/package.json index 5c1807da..502076d6 100644 --- a/package.json +++ b/package.json @@ -76,6 +76,7 @@ "graphql-rate-limit-directive": "1.1.0", "graphql-subscriptions": "1.1.0", "graphql-tools": "4.0.5", + "he": "1.2.0", "highlight.js": "9.15.10", "i18next": "17.0.15", "i18next-express-middleware": "1.8.2", diff --git a/server/models/pages.js b/server/models/pages.js index 929589e7..f39cd94f 100644 --- a/server/models/pages.js +++ b/server/models/pages.js @@ -7,6 +7,7 @@ const fs = require('fs-extra') const yaml = require('js-yaml') const striptags = require('striptags') const emojiRegex = require('emoji-regex') +const he = require('he') /* global WIKI */ @@ -17,7 +18,7 @@ const frontmatterRegex = { } const punctuationRegex = /[!,:;/\\_+\-=()&#@<>$~%^*[\]{}"'|]+|(\.\s)|(\s\.)/ig -const htmlEntitiesRegex = /(&#[0-9]{3};)|(&#x[a-zA-Z0-9]{2};)/ig +// const htmlEntitiesRegex = /(&#[0-9]{3};)|(&#x[a-zA-Z0-9]{2};)/ig /** * Pages model @@ -663,9 +664,10 @@ module.exports = class Page extends Model { * @returns {string} Cleaned Content Text */ static cleanHTML(rawHTML = '') { - return striptags(rawHTML || '') + let data = striptags(rawHTML || '') .replace(emojiRegex(), '') - .replace(htmlEntitiesRegex, '') + // .replace(htmlEntitiesRegex, '') + return he.decode(data) .replace(punctuationRegex, ' ') .replace(/(\r\n|\n|\r)/gm, ' ') .replace(/\s\s+/g, ' ') diff --git a/yarn.lock b/yarn.lock index 8c1aecc1..8e785f50 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6215,7 +6215,7 @@ hash.js@^1.0.0, hash.js@^1.0.3, hash.js@^1.1.3: inherits "^2.0.3" minimalistic-assert "^1.0.1" -he@^1.1.0, he@^1.2.0: +he@1.2.0, he@^1.1.0, he@^1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/he/-/he-1.2.0.tgz#84ae65fa7eafb165fddb61566ae14baf05664f0f" integrity sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==