From 5807b1784bb06a790e1a3de8ae72f43056bcf620 Mon Sep 17 00:00:00 2001 From: Tayeb Chlyah Date: Sat, 7 Mar 2026 11:07:00 +0400 Subject: [PATCH] feat(client, server) #3: add snippets and highlight matches to search results --- client/components/common/search-results.vue | 132 +++++++++++++++++- .../common/common-pages-query-search.gql | 1 + server/graph/resolvers/page.js | 55 +++++++- server/graph/schemas/page.graphql | 1 + server/models/pages.js | 52 +++++++ server/modules/search/algolia/engine.js | 14 +- server/modules/search/aws/engine.js | 8 +- server/modules/search/azure/engine.js | 10 +- server/modules/search/elasticsearch/engine.js | 14 +- server/modules/search/postgres/engine.js | 14 +- 10 files changed, 263 insertions(+), 38 deletions(-) diff --git a/client/components/common/search-results.vue b/client/components/common/search-results.vue index ddc33208..7f1b891e 100644 --- a/client/components/common/search-results.vue +++ b/client/components/common/search-results.vue @@ -16,15 +16,15 @@ .subheading {{$t('common:header.searchNoResult')}} template(v-if='search && search.length >= 2 && results && results.length > 0') v-subheader.white--text {{$t('common:header.searchResultsCount', { total: response.totalHits })}} - v-list.search-results-items.radius-7.py-0(two-line, dense) + v-list.search-results-items.radius-7.py-0(three-line, dense) template(v-for='(item, idx) of results') v-list-item(@click='goToPage(item)', @click.middle="goToPageInNewTab(item)", :key='item.id', :class='idx === cursor ? `highlighted` : ``') v-list-item-avatar(tile) img(src='/_assets/svg/icon-selective-highlighting.svg') v-list-item-content - v-list-item-title(v-text='item.title') - v-list-item-subtitle.caption(v-text='item.description') - .caption.grey--text(v-text='item.path') + v-list-item-title.search-results-title(v-html='highlightMatch(item.title)') + .search-results-snippet(v-html='formatSnippet(item.snippet || buildSnippet(item))') + .caption.grey--text.search-results-path(v-html='highlightMatch(item.path)') v-list-item-action v-chip(label, outlined) {{item.locale.toUpperCase()}} v-divider(v-if='idx < results.length - 1') @@ -83,6 +83,21 @@ export default { searchIsLoading: sync('site/searchIsLoading'), searchRestrictLocale: sync('site/searchRestrictLocale'), searchRestrictPath: sync('site/searchRestrictPath'), + searchTerms() { + if (!this.search) { + return [] + } + + const terms = this.search.split(/\s+/).reduce((acc, term) => { + acc.push(term, term.replace(/^[*+"'():|&!<>\-]+|[*+"'():|&!<>\-]+$/g, '')) + return acc + }, []) + + return _.sortBy(_.uniq(terms + .map(term => _.trim(term)) + .filter(term => term.length > 1) + ), term => -term.length) + }, results() { const currentIndex = (this.pagination - 1) * this.perPage return this.response.results ? _.slice(this.response.results, currentIndex, currentIndex + this.perPage) : [] @@ -132,6 +147,69 @@ export default { }) }, methods: { + escapeHtml(value = '') { + return _.escape(value) + }, + findFirstMatchIndex(value = '') { + const text = _.toString(value) + const normalizedText = text.toLowerCase() + + for (const term of this.searchTerms) { + const idx = normalizedText.indexOf(term.toLowerCase()) + if (idx >= 0) { + return idx + } + } + + return -1 + }, + highlightMatch(value = '') { + const escapedValue = this.escapeHtml(value) + + if (_.isEmpty(this.searchTerms)) { + return escapedValue + } + + const pattern = this.searchTerms.map(term => _.escapeRegExp(term)).join('|') + if (!pattern) { + return escapedValue + } + + return escapedValue.replace(new RegExp(`(${pattern})`, 'gi'), '$1') + }, + formatSnippet(value = '') { + const highlightedValue = this.highlightMatch(value) + + return highlightedValue.replace(/^(\.{3})?([^:|]{2,72}:)(\s+)/, (match, prefix = '', heading, spacing) => { + return `${prefix}${heading}${spacing}` + }) + }, + buildSnippet(item) { + const source = _.trim(item.description || item.title || item.path || '') + + if (!source) { + return '' + } + + const snippetLength = 120 + const matchIndex = this.findFirstMatchIndex(source) + let start = 0 + + if (matchIndex >= 0) { + start = Math.max(0, matchIndex - 36) + } + + let snippet = source.slice(start, start + snippetLength).trim() + + if (start > 0) { + snippet = `...${snippet}` + } + if (start + snippetLength < source.length) { + snippet = `${snippet}...` + } + + return snippet + }, setSearchTerm(term) { this.search = term }, @@ -223,6 +301,16 @@ export default { &-items { text-align: left; + .v-list-item { + align-items: flex-start; + } + + .v-list-item__content { + overflow: hidden; + padding-top: 4px; + padding-bottom: 4px; + } + .highlighted { background: #FFF linear-gradient(to bottom, #FFF, mc('orange', '100')); @@ -232,6 +320,42 @@ export default { } } + &-title { + margin-bottom: 2px; + } + + &-snippet { + display: block; + font-size: .8rem; + white-space: normal; + line-height: 1.05rem; + overflow-wrap: anywhere; + color: rgba(0, 0, 0, .72); + margin-bottom: 2px; + } + + &-path { + display: block; + overflow-wrap: anywhere; + } + + mark { + background-color: #ffd54f; + color: inherit; + border-radius: 3px; + padding: 0 2px; + box-shadow: inset 0 -1px 0 rgba(0, 0, 0, .08); + } + + .theme--dark & mark { + background-color: #ef6c00; + color: #fff; + } + + .theme--dark &-snippet { + color: rgba(255, 255, 255, .72); + } + &-suggestions { .highlighted { background: transparent linear-gradient(to bottom, mc('blue', '500'), mc('blue', '700')); diff --git a/client/graph/common/common-pages-query-search.gql b/client/graph/common/common-pages-query-search.gql index e36445b3..27232ff9 100644 --- a/client/graph/common/common-pages-query-search.gql +++ b/client/graph/common/common-pages-query-search.gql @@ -5,6 +5,7 @@ query ($query: String!) { id title description + snippet path locale } diff --git a/server/graph/resolvers/page.js b/server/graph/resolvers/page.js index 9e99686a..c1041708 100644 --- a/server/graph/resolvers/page.js +++ b/server/graph/resolvers/page.js @@ -3,6 +3,39 @@ const graphHelper = require('../../helpers/graph') /* global WIKI */ +function getSearchTerms(query = '') { + return _.sortBy(_.uniq(query.split(/\s+/).reduce((acc, term) => { + acc.push(term, term.replace(/^[*+"'():|&!<>\-]+|[*+"'():|&!<>\-]+$/g, '')) + return acc + }, []).map(term => _.trim(term)).filter(term => term.length > 1)), term => -term.length) +} + +function buildSnippetExcerpt(source = '', query = '', snippetLength = 160) { + const text = _.trim(source) + if (!text) { + return '' + } + + const terms = getSearchTerms(query) + const normalizedText = text.toLowerCase() + const matchIndex = _.find(terms.map(term => normalizedText.indexOf(term.toLowerCase())), idx => idx >= 0) ?? -1 + const start = matchIndex >= 0 ? Math.max(0, matchIndex - 48) : 0 + let snippet = text.slice(start, start + snippetLength).trim() + + if (start > 0) { + snippet = `...${snippet}` + } + if (start + snippetLength < text.length) { + snippet = `${snippet}...` + } + + return snippet +} + +function buildSearchSnippet(source = '', query = '') { + return buildSnippetExcerpt(source, query) +} + module.exports = { Query: { async pages() { return {} } @@ -52,15 +85,23 @@ module.exports = { async search (obj, args, context) { if (WIKI.data.searchEngine) { const resp = await WIKI.data.searchEngine.query(args.query, args) + const filteredResults = _.filter(resp.results, r => { + return WIKI.auth.checkAccess(context.req.user, ['read:pages'], { + path: r.path, + locale: r.locale, + tags: r.tags // Tags are needed since access permissions can be limited by page tags too + }) + }) + return { ...resp, - results: _.filter(resp.results, r => { - return WIKI.auth.checkAccess(context.req.user, ['read:pages'], { - path: r.path, - locale: r.locale, - tags: r.tags // Tags are needed since access permissions can be limited by page tags too - }) - }) + results: filteredResults.map(r => { + return { + ...r, + snippet: buildSearchSnippet(r.snippet || r.content || r.description || r.title || r.path || '', args.query) + } + }), + totalHits: filteredResults.length } } else { return { diff --git a/server/graph/schemas/page.graphql b/server/graph/schemas/page.graphql index 552ad325..2bb2adf6 100644 --- a/server/graph/schemas/page.graphql +++ b/server/graph/schemas/page.graphql @@ -264,6 +264,7 @@ type PageSearchResult { id: String! title: String! description: String! + snippet: String! path: String! locale: String! } diff --git a/server/models/pages.js b/server/models/pages.js index bb5b6585..8dbdbed6 100644 --- a/server/models/pages.js +++ b/server/models/pages.js @@ -338,6 +338,7 @@ module.exports = class Page extends Model { // -> Add to Search Index const pageContents = await WIKI.models.pages.query().findById(page.id).select('render') page.safeContent = WIKI.models.pages.cleanHTML(pageContents.render) + page.searchContent = WIKI.models.pages.buildSearchContent(pageContents.render) await WIKI.data.searchEngine.created(page) // -> Add to Storage @@ -449,6 +450,7 @@ module.exports = class Page extends Model { // -> Update Search Index const pageContents = await WIKI.models.pages.query().findById(page.id).select('render') page.safeContent = WIKI.models.pages.cleanHTML(pageContents.render) + page.searchContent = WIKI.models.pages.buildSearchContent(pageContents.render) await WIKI.data.searchEngine.updated(page) // -> Update on Storage @@ -741,6 +743,7 @@ module.exports = class Page extends Model { // -> Rename in Search Index const pageContents = await WIKI.models.pages.query().findById(page.id).select('render') page.safeContent = WIKI.models.pages.cleanHTML(pageContents.render) + page.searchContent = WIKI.models.pages.buildSearchContent(pageContents.render) await WIKI.data.searchEngine.renamed({ ...page, destinationPath: opts.destinationPath, @@ -1160,6 +1163,55 @@ module.exports = class Page extends Model { .split(' ').filter(w => w.length > 1).join(' ').toLowerCase() } + static buildSearchContent(rawHTML = '') { + const $ = cheerio.load(rawHTML || '', { + decodeEntities: true + }) + + $('.toc-anchor').remove() + + const blocks = [] + $('h1,h2,h3,h4,h5,h6,p,li,td,th,blockquote,pre').each((idx, el) => { + const text = he.decode(striptags($(el).text(), [], ' ')) + .replace(/\s+/g, ' ') + .trim() + + if (!text) { + return + } + + if (_.last(blocks)?.text === text) { + return + } + + blocks.push({ + text, + isHeader: /^h[1-6]$/i.test(el.name) + }) + }) + + if (blocks.length < 1) { + return he.decode(striptags(rawHTML || '', [], ' ')) + .replace(/\s+/g, ' ') + .trim() + } + + const segments = [] + for (let i = 0; i < blocks.length; i++) { + const block = blocks[i] + const nextBlock = blocks[i + 1] + + if (block.isHeader && nextBlock && !nextBlock.isHeader) { + segments.push(`${block.text}: ${nextBlock.text}`) + i++ + } else { + segments.push(block.text) + } + } + + return segments.join(' | ') + } + /** * Subscribe to HA propagation events */ diff --git a/server/modules/search/algolia/engine.js b/server/modules/search/algolia/engine.js index 16380a54..957f0591 100644 --- a/server/modules/search/algolia/engine.js +++ b/server/modules/search/algolia/engine.js @@ -32,7 +32,8 @@ module.exports = { 'locale', 'path', 'title', - 'description' + 'description', + 'content' ], advancedSyntax: true }) @@ -55,7 +56,8 @@ module.exports = { locale: r.locale, path: r.path, title: r.title, - description: r.description + description: r.description, + content: r.content })), suggestions: [], totalHits: results.nbHits @@ -77,7 +79,7 @@ module.exports = { path: page.path, title: page.title, description: page.description, - content: page.safeContent + content: page.searchContent || page.safeContent }) }, /** @@ -90,7 +92,7 @@ module.exports = { objectID: page.hash, title: page.title, description: page.description, - content: page.safeContent + content: page.searchContent || page.safeContent }) }, /** @@ -114,7 +116,7 @@ module.exports = { path: page.destinationPath, title: page.title, description: page.description, - content: page.safeContent + content: page.searchContent || page.safeContent }) }, /** @@ -176,7 +178,7 @@ module.exports = { path: doc.path, title: doc.title, description: doc.description, - content: WIKI.models.pages.cleanHTML(doc.render) + content: WIKI.models.pages.buildSearchContent(doc.render) })) ) } catch (err) { diff --git a/server/modules/search/aws/engine.js b/server/modules/search/aws/engine.js index 86263704..debc830f 100644 --- a/server/modules/search/aws/engine.js +++ b/server/modules/search/aws/engine.js @@ -198,7 +198,7 @@ module.exports = { path: page.path, title: page.title, description: page.description, - content: page.safeContent + content: page.searchContent || page.safeContent } } ]) @@ -221,7 +221,7 @@ module.exports = { path: page.path, title: page.title, description: page.description, - content: page.safeContent + content: page.searchContent || page.safeContent } } ]) @@ -269,7 +269,7 @@ module.exports = { path: page.destinationPath, title: page.title, description: page.description, - content: page.safeContent + content: page.searchContent || page.safeContent } } ]) @@ -336,7 +336,7 @@ module.exports = { path: doc.path, title: doc.title, description: doc.description, - content: WIKI.models.pages.cleanHTML(doc.render) + content: WIKI.models.pages.buildSearchContent(doc.render) } }))) }).promise() diff --git a/server/modules/search/azure/engine.js b/server/modules/search/azure/engine.js index b5e9b16c..f303c8e2 100644 --- a/server/modules/search/azure/engine.js +++ b/server/modules/search/azure/engine.js @@ -95,7 +95,7 @@ module.exports = { count: true, scoringProfile: 'fieldWeights', search: q, - select: 'id, locale, path, title, description', + select: 'id, locale, path, title, description, content', queryType: QueryType.simple, top: 50 }) @@ -147,7 +147,7 @@ module.exports = { path: page.path, title: page.title, description: page.description, - content: page.safeContent + content: page.searchContent || page.safeContent } ]) }, @@ -164,7 +164,7 @@ module.exports = { path: page.path, title: page.title, description: page.description, - content: page.safeContent + content: page.searchContent || page.safeContent } ]) }, @@ -200,7 +200,7 @@ module.exports = { path: page.destinationPath, title: page.title, description: page.description, - content: page.safeContent + content: page.searchContent || page.safeContent } ]) }, @@ -223,7 +223,7 @@ module.exports = { locale: chunk.locale, title: chunk.title, description: chunk.description, - content: WIKI.models.pages.cleanHTML(chunk.render) + content: WIKI.models.pages.buildSearchContent(chunk.render) }) } }), diff --git a/server/modules/search/elasticsearch/engine.js b/server/modules/search/elasticsearch/engine.js index 5ad710b0..56f4d6dc 100644 --- a/server/modules/search/elasticsearch/engine.js +++ b/server/modules/search/elasticsearch/engine.js @@ -160,7 +160,7 @@ module.exports = { }, from: 0, size: 50, - _source: ['title', 'description', 'path', 'locale'], + _source: ['title', 'description', 'path', 'locale', 'content'], suggest: { suggestions: { text: q, @@ -180,7 +180,8 @@ module.exports = { locale: r._source.locale, path: r._source.path, title: r._source.title, - description: r._source.description + description: r._source.description, + content: r._source.content })), suggestions: _.reject(_.get(results, 'suggest.suggestions', []).map(s => _.get(s, 'options[0].text', false)), s => !s), totalHits: _.get(results, this.config.apiVersion === '8.x' ? 'hits.total.value' : 'body.hits.total.value', _.get(results, this.config.apiVersion === '8.x' ? 'hits.total' : 'body.hits.total', 0)) @@ -236,7 +237,7 @@ module.exports = { path: page.path, title: page.title, description: page.description, - content: page.safeContent, + content: page.searchContent || page.safeContent, tags: await this.buildTags(page.id) }, refresh: true @@ -258,7 +259,7 @@ module.exports = { path: page.path, title: page.title, description: page.description, - content: page.safeContent, + content: page.searchContent || page.safeContent, tags: await this.buildTags(page.id) }, refresh: true @@ -299,7 +300,7 @@ module.exports = { path: page.destinationPath, title: page.title, description: page.description, - content: page.safeContent, + content: page.searchContent || page.safeContent, tags: await this.buildTags(page.id) }, refresh: true @@ -365,6 +366,7 @@ module.exports = { } }) doc.safeContent = WIKI.models.pages.cleanHTML(doc.render) + doc.searchContent = WIKI.models.pages.buildSearchContent(doc.render) result.push({ suggest: this.buildSuggest(doc), tags: doc.tags, @@ -372,7 +374,7 @@ module.exports = { path: doc.path, title: doc.title, description: doc.description, - content: doc.safeContent + content: doc.searchContent || doc.safeContent }) return result }, []), diff --git a/server/modules/search/postgres/engine.js b/server/modules/search/postgres/engine.js index cfbf927d..ffff750e 100644 --- a/server/modules/search/postgres/engine.js +++ b/server/modules/search/postgres/engine.js @@ -62,7 +62,7 @@ module.exports = { try { let suggestions = [] let qry = ` - SELECT id, path, locale, title, description + SELECT id, path, locale, title, description, content FROM "pagesVector", to_tsquery(?,?) query WHERE (query @@ "tokens" OR path ILIKE ?) ` @@ -106,10 +106,10 @@ module.exports = { */ async created(page) { await WIKI.models.knex.raw(` - INSERT INTO "pagesVector" (path, locale, title, description, "tokens") VALUES ( - ?, ?, ?, ?, (setweight(to_tsvector('${this.config.dictLanguage}', ?), 'A') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'B') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'C')) + INSERT INTO "pagesVector" (path, locale, title, description, "tokens", content) VALUES ( + ?, ?, ?, ?, (setweight(to_tsvector('${this.config.dictLanguage}', ?), 'A') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'B') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'C')), ? ) - `, [page.path, page.localeCode, page.title, page.description, page.title, page.description, page.safeContent]) + `, [page.path, page.localeCode, page.title, page.description, page.title, page.description, page.safeContent, page.searchContent || page.safeContent]) }, /** * UPDATE @@ -121,11 +121,12 @@ module.exports = { UPDATE "pagesVector" SET title = ?, description = ?, + content = ?, tokens = (setweight(to_tsvector('${this.config.dictLanguage}', ?), 'A') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'B') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'C')) WHERE path = ? AND locale = ? - `, [page.title, page.description, page.title, page.description, page.safeContent, page.path, page.localeCode]) + `, [page.title, page.description, page.searchContent || page.safeContent, page.title, page.description, page.safeContent, page.path, page.localeCode]) }, /** * DELETE @@ -169,11 +170,12 @@ module.exports = { objectMode: true, transform: async (page, enc, cb) => { const content = WIKI.models.pages.cleanHTML(page.render) + const searchContent = WIKI.models.pages.buildSearchContent(page.render) await WIKI.models.knex.raw(` INSERT INTO "pagesVector" (path, locale, title, description, "tokens", content) VALUES ( ?, ?, ?, ?, (setweight(to_tsvector('${this.config.dictLanguage}', ?), 'A') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'B') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'C')), ? ) - `, [page.path, page.localeCode, page.title, page.description, page.title, page.description, content, content]) + `, [page.path, page.localeCode, page.title, page.description, page.title, page.description, content, searchContent || content]) cb() } })