feat(client, server) #3: add snippets and highlight matches to search results

pull/7968/head
Tayeb Chlyah 3 months ago
parent d6e677ee90
commit 5807b1784b

@ -16,15 +16,15 @@
.subheading {{$t('common:header.searchNoResult')}}
template(v-if='search && search.length >= 2 && results && results.length > 0')
v-subheader.white--text {{$t('common:header.searchResultsCount', { total: response.totalHits })}}
v-list.search-results-items.radius-7.py-0(two-line, dense)
v-list.search-results-items.radius-7.py-0(three-line, dense)
template(v-for='(item, idx) of results')
v-list-item(@click='goToPage(item)', @click.middle="goToPageInNewTab(item)", :key='item.id', :class='idx === cursor ? `highlighted` : ``')
v-list-item-avatar(tile)
img(src='/_assets/svg/icon-selective-highlighting.svg')
v-list-item-content
v-list-item-title(v-text='item.title')
v-list-item-subtitle.caption(v-text='item.description')
.caption.grey--text(v-text='item.path')
v-list-item-title.search-results-title(v-html='highlightMatch(item.title)')
.search-results-snippet(v-html='formatSnippet(item.snippet || buildSnippet(item))')
.caption.grey--text.search-results-path(v-html='highlightMatch(item.path)')
v-list-item-action
v-chip(label, outlined) {{item.locale.toUpperCase()}}
v-divider(v-if='idx < results.length - 1')
@ -83,6 +83,21 @@ export default {
searchIsLoading: sync('site/searchIsLoading'),
searchRestrictLocale: sync('site/searchRestrictLocale'),
searchRestrictPath: sync('site/searchRestrictPath'),
searchTerms() {
if (!this.search) {
return []
}
const terms = this.search.split(/\s+/).reduce((acc, term) => {
acc.push(term, term.replace(/^[*+"'():|&!<>\-]+|[*+"'():|&!<>\-]+$/g, ''))
return acc
}, [])
return _.sortBy(_.uniq(terms
.map(term => _.trim(term))
.filter(term => term.length > 1)
), term => -term.length)
},
results() {
const currentIndex = (this.pagination - 1) * this.perPage
return this.response.results ? _.slice(this.response.results, currentIndex, currentIndex + this.perPage) : []
@ -132,6 +147,69 @@ export default {
})
},
methods: {
escapeHtml(value = '') {
return _.escape(value)
},
findFirstMatchIndex(value = '') {
const text = _.toString(value)
const normalizedText = text.toLowerCase()
for (const term of this.searchTerms) {
const idx = normalizedText.indexOf(term.toLowerCase())
if (idx >= 0) {
return idx
}
}
return -1
},
highlightMatch(value = '') {
const escapedValue = this.escapeHtml(value)
if (_.isEmpty(this.searchTerms)) {
return escapedValue
}
const pattern = this.searchTerms.map(term => _.escapeRegExp(term)).join('|')
if (!pattern) {
return escapedValue
}
return escapedValue.replace(new RegExp(`(${pattern})`, 'gi'), '<mark>$1</mark>')
},
formatSnippet(value = '') {
const highlightedValue = this.highlightMatch(value)
return highlightedValue.replace(/^(\.{3})?([^:|]{2,72}:)(\s+)/, (match, prefix = '', heading, spacing) => {
return `${prefix}<strong>${heading}</strong>${spacing}`
})
},
buildSnippet(item) {
const source = _.trim(item.description || item.title || item.path || '')
if (!source) {
return ''
}
const snippetLength = 120
const matchIndex = this.findFirstMatchIndex(source)
let start = 0
if (matchIndex >= 0) {
start = Math.max(0, matchIndex - 36)
}
let snippet = source.slice(start, start + snippetLength).trim()
if (start > 0) {
snippet = `...${snippet}`
}
if (start + snippetLength < source.length) {
snippet = `${snippet}...`
}
return snippet
},
setSearchTerm(term) {
this.search = term
},
@ -223,6 +301,16 @@ export default {
&-items {
text-align: left;
.v-list-item {
align-items: flex-start;
}
.v-list-item__content {
overflow: hidden;
padding-top: 4px;
padding-bottom: 4px;
}
.highlighted {
background: #FFF linear-gradient(to bottom, #FFF, mc('orange', '100'));
@ -232,6 +320,42 @@ export default {
}
}
&-title {
margin-bottom: 2px;
}
&-snippet {
display: block;
font-size: .8rem;
white-space: normal;
line-height: 1.05rem;
overflow-wrap: anywhere;
color: rgba(0, 0, 0, .72);
margin-bottom: 2px;
}
&-path {
display: block;
overflow-wrap: anywhere;
}
mark {
background-color: #ffd54f;
color: inherit;
border-radius: 3px;
padding: 0 2px;
box-shadow: inset 0 -1px 0 rgba(0, 0, 0, .08);
}
.theme--dark & mark {
background-color: #ef6c00;
color: #fff;
}
.theme--dark &-snippet {
color: rgba(255, 255, 255, .72);
}
&-suggestions {
.highlighted {
background: transparent linear-gradient(to bottom, mc('blue', '500'), mc('blue', '700'));

@ -5,6 +5,7 @@ query ($query: String!) {
id
title
description
snippet
path
locale
}

@ -3,6 +3,39 @@ const graphHelper = require('../../helpers/graph')
/* global WIKI */
function getSearchTerms(query = '') {
return _.sortBy(_.uniq(query.split(/\s+/).reduce((acc, term) => {
acc.push(term, term.replace(/^[*+"'():|&!<>\-]+|[*+"'():|&!<>\-]+$/g, ''))
return acc
}, []).map(term => _.trim(term)).filter(term => term.length > 1)), term => -term.length)
}
function buildSnippetExcerpt(source = '', query = '', snippetLength = 160) {
const text = _.trim(source)
if (!text) {
return ''
}
const terms = getSearchTerms(query)
const normalizedText = text.toLowerCase()
const matchIndex = _.find(terms.map(term => normalizedText.indexOf(term.toLowerCase())), idx => idx >= 0) ?? -1
const start = matchIndex >= 0 ? Math.max(0, matchIndex - 48) : 0
let snippet = text.slice(start, start + snippetLength).trim()
if (start > 0) {
snippet = `...${snippet}`
}
if (start + snippetLength < text.length) {
snippet = `${snippet}...`
}
return snippet
}
function buildSearchSnippet(source = '', query = '') {
return buildSnippetExcerpt(source, query)
}
module.exports = {
Query: {
async pages() { return {} }
@ -52,15 +85,23 @@ module.exports = {
async search (obj, args, context) {
if (WIKI.data.searchEngine) {
const resp = await WIKI.data.searchEngine.query(args.query, args)
const filteredResults = _.filter(resp.results, r => {
return WIKI.auth.checkAccess(context.req.user, ['read:pages'], {
path: r.path,
locale: r.locale,
tags: r.tags // Tags are needed since access permissions can be limited by page tags too
})
})
return {
...resp,
results: _.filter(resp.results, r => {
return WIKI.auth.checkAccess(context.req.user, ['read:pages'], {
path: r.path,
locale: r.locale,
tags: r.tags // Tags are needed since access permissions can be limited by page tags too
})
})
results: filteredResults.map(r => {
return {
...r,
snippet: buildSearchSnippet(r.snippet || r.content || r.description || r.title || r.path || '', args.query)
}
}),
totalHits: filteredResults.length
}
} else {
return {

@ -264,6 +264,7 @@ type PageSearchResult {
id: String!
title: String!
description: String!
snippet: String!
path: String!
locale: String!
}

@ -338,6 +338,7 @@ module.exports = class Page extends Model {
// -> Add to Search Index
const pageContents = await WIKI.models.pages.query().findById(page.id).select('render')
page.safeContent = WIKI.models.pages.cleanHTML(pageContents.render)
page.searchContent = WIKI.models.pages.buildSearchContent(pageContents.render)
await WIKI.data.searchEngine.created(page)
// -> Add to Storage
@ -449,6 +450,7 @@ module.exports = class Page extends Model {
// -> Update Search Index
const pageContents = await WIKI.models.pages.query().findById(page.id).select('render')
page.safeContent = WIKI.models.pages.cleanHTML(pageContents.render)
page.searchContent = WIKI.models.pages.buildSearchContent(pageContents.render)
await WIKI.data.searchEngine.updated(page)
// -> Update on Storage
@ -741,6 +743,7 @@ module.exports = class Page extends Model {
// -> Rename in Search Index
const pageContents = await WIKI.models.pages.query().findById(page.id).select('render')
page.safeContent = WIKI.models.pages.cleanHTML(pageContents.render)
page.searchContent = WIKI.models.pages.buildSearchContent(pageContents.render)
await WIKI.data.searchEngine.renamed({
...page,
destinationPath: opts.destinationPath,
@ -1160,6 +1163,55 @@ module.exports = class Page extends Model {
.split(' ').filter(w => w.length > 1).join(' ').toLowerCase()
}
static buildSearchContent(rawHTML = '') {
const $ = cheerio.load(rawHTML || '', {
decodeEntities: true
})
$('.toc-anchor').remove()
const blocks = []
$('h1,h2,h3,h4,h5,h6,p,li,td,th,blockquote,pre').each((idx, el) => {
const text = he.decode(striptags($(el).text(), [], ' '))
.replace(/\s+/g, ' ')
.trim()
if (!text) {
return
}
if (_.last(blocks)?.text === text) {
return
}
blocks.push({
text,
isHeader: /^h[1-6]$/i.test(el.name)
})
})
if (blocks.length < 1) {
return he.decode(striptags(rawHTML || '', [], ' '))
.replace(/\s+/g, ' ')
.trim()
}
const segments = []
for (let i = 0; i < blocks.length; i++) {
const block = blocks[i]
const nextBlock = blocks[i + 1]
if (block.isHeader && nextBlock && !nextBlock.isHeader) {
segments.push(`${block.text}: ${nextBlock.text}`)
i++
} else {
segments.push(block.text)
}
}
return segments.join(' | ')
}
/**
* Subscribe to HA propagation events
*/

@ -32,7 +32,8 @@ module.exports = {
'locale',
'path',
'title',
'description'
'description',
'content'
],
advancedSyntax: true
})
@ -55,7 +56,8 @@ module.exports = {
locale: r.locale,
path: r.path,
title: r.title,
description: r.description
description: r.description,
content: r.content
})),
suggestions: [],
totalHits: results.nbHits
@ -77,7 +79,7 @@ module.exports = {
path: page.path,
title: page.title,
description: page.description,
content: page.safeContent
content: page.searchContent || page.safeContent
})
},
/**
@ -90,7 +92,7 @@ module.exports = {
objectID: page.hash,
title: page.title,
description: page.description,
content: page.safeContent
content: page.searchContent || page.safeContent
})
},
/**
@ -114,7 +116,7 @@ module.exports = {
path: page.destinationPath,
title: page.title,
description: page.description,
content: page.safeContent
content: page.searchContent || page.safeContent
})
},
/**
@ -176,7 +178,7 @@ module.exports = {
path: doc.path,
title: doc.title,
description: doc.description,
content: WIKI.models.pages.cleanHTML(doc.render)
content: WIKI.models.pages.buildSearchContent(doc.render)
}))
)
} catch (err) {

@ -198,7 +198,7 @@ module.exports = {
path: page.path,
title: page.title,
description: page.description,
content: page.safeContent
content: page.searchContent || page.safeContent
}
}
])
@ -221,7 +221,7 @@ module.exports = {
path: page.path,
title: page.title,
description: page.description,
content: page.safeContent
content: page.searchContent || page.safeContent
}
}
])
@ -269,7 +269,7 @@ module.exports = {
path: page.destinationPath,
title: page.title,
description: page.description,
content: page.safeContent
content: page.searchContent || page.safeContent
}
}
])
@ -336,7 +336,7 @@ module.exports = {
path: doc.path,
title: doc.title,
description: doc.description,
content: WIKI.models.pages.cleanHTML(doc.render)
content: WIKI.models.pages.buildSearchContent(doc.render)
}
})))
}).promise()

@ -95,7 +95,7 @@ module.exports = {
count: true,
scoringProfile: 'fieldWeights',
search: q,
select: 'id, locale, path, title, description',
select: 'id, locale, path, title, description, content',
queryType: QueryType.simple,
top: 50
})
@ -147,7 +147,7 @@ module.exports = {
path: page.path,
title: page.title,
description: page.description,
content: page.safeContent
content: page.searchContent || page.safeContent
}
])
},
@ -164,7 +164,7 @@ module.exports = {
path: page.path,
title: page.title,
description: page.description,
content: page.safeContent
content: page.searchContent || page.safeContent
}
])
},
@ -200,7 +200,7 @@ module.exports = {
path: page.destinationPath,
title: page.title,
description: page.description,
content: page.safeContent
content: page.searchContent || page.safeContent
}
])
},
@ -223,7 +223,7 @@ module.exports = {
locale: chunk.locale,
title: chunk.title,
description: chunk.description,
content: WIKI.models.pages.cleanHTML(chunk.render)
content: WIKI.models.pages.buildSearchContent(chunk.render)
})
}
}),

@ -160,7 +160,7 @@ module.exports = {
},
from: 0,
size: 50,
_source: ['title', 'description', 'path', 'locale'],
_source: ['title', 'description', 'path', 'locale', 'content'],
suggest: {
suggestions: {
text: q,
@ -180,7 +180,8 @@ module.exports = {
locale: r._source.locale,
path: r._source.path,
title: r._source.title,
description: r._source.description
description: r._source.description,
content: r._source.content
})),
suggestions: _.reject(_.get(results, 'suggest.suggestions', []).map(s => _.get(s, 'options[0].text', false)), s => !s),
totalHits: _.get(results, this.config.apiVersion === '8.x' ? 'hits.total.value' : 'body.hits.total.value', _.get(results, this.config.apiVersion === '8.x' ? 'hits.total' : 'body.hits.total', 0))
@ -236,7 +237,7 @@ module.exports = {
path: page.path,
title: page.title,
description: page.description,
content: page.safeContent,
content: page.searchContent || page.safeContent,
tags: await this.buildTags(page.id)
},
refresh: true
@ -258,7 +259,7 @@ module.exports = {
path: page.path,
title: page.title,
description: page.description,
content: page.safeContent,
content: page.searchContent || page.safeContent,
tags: await this.buildTags(page.id)
},
refresh: true
@ -299,7 +300,7 @@ module.exports = {
path: page.destinationPath,
title: page.title,
description: page.description,
content: page.safeContent,
content: page.searchContent || page.safeContent,
tags: await this.buildTags(page.id)
},
refresh: true
@ -365,6 +366,7 @@ module.exports = {
}
})
doc.safeContent = WIKI.models.pages.cleanHTML(doc.render)
doc.searchContent = WIKI.models.pages.buildSearchContent(doc.render)
result.push({
suggest: this.buildSuggest(doc),
tags: doc.tags,
@ -372,7 +374,7 @@ module.exports = {
path: doc.path,
title: doc.title,
description: doc.description,
content: doc.safeContent
content: doc.searchContent || doc.safeContent
})
return result
}, []),

@ -62,7 +62,7 @@ module.exports = {
try {
let suggestions = []
let qry = `
SELECT id, path, locale, title, description
SELECT id, path, locale, title, description, content
FROM "pagesVector", to_tsquery(?,?) query
WHERE (query @@ "tokens" OR path ILIKE ?)
`
@ -106,10 +106,10 @@ module.exports = {
*/
async created(page) {
await WIKI.models.knex.raw(`
INSERT INTO "pagesVector" (path, locale, title, description, "tokens") VALUES (
?, ?, ?, ?, (setweight(to_tsvector('${this.config.dictLanguage}', ?), 'A') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'B') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'C'))
INSERT INTO "pagesVector" (path, locale, title, description, "tokens", content) VALUES (
?, ?, ?, ?, (setweight(to_tsvector('${this.config.dictLanguage}', ?), 'A') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'B') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'C')), ?
)
`, [page.path, page.localeCode, page.title, page.description, page.title, page.description, page.safeContent])
`, [page.path, page.localeCode, page.title, page.description, page.title, page.description, page.safeContent, page.searchContent || page.safeContent])
},
/**
* UPDATE
@ -121,11 +121,12 @@ module.exports = {
UPDATE "pagesVector" SET
title = ?,
description = ?,
content = ?,
tokens = (setweight(to_tsvector('${this.config.dictLanguage}', ?), 'A') ||
setweight(to_tsvector('${this.config.dictLanguage}', ?), 'B') ||
setweight(to_tsvector('${this.config.dictLanguage}', ?), 'C'))
WHERE path = ? AND locale = ?
`, [page.title, page.description, page.title, page.description, page.safeContent, page.path, page.localeCode])
`, [page.title, page.description, page.searchContent || page.safeContent, page.title, page.description, page.safeContent, page.path, page.localeCode])
},
/**
* DELETE
@ -169,11 +170,12 @@ module.exports = {
objectMode: true,
transform: async (page, enc, cb) => {
const content = WIKI.models.pages.cleanHTML(page.render)
const searchContent = WIKI.models.pages.buildSearchContent(page.render)
await WIKI.models.knex.raw(`
INSERT INTO "pagesVector" (path, locale, title, description, "tokens", content) VALUES (
?, ?, ?, ?, (setweight(to_tsvector('${this.config.dictLanguage}', ?), 'A') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'B') || setweight(to_tsvector('${this.config.dictLanguage}', ?), 'C')), ?
)
`, [page.path, page.localeCode, page.title, page.description, page.title, page.description, content, content])
`, [page.path, page.localeCode, page.title, page.description, page.title, page.description, content, searchContent || content])
cb()
}
})

Loading…
Cancel
Save