feat: add sqlite query preprocessor

pull/6038/head
dzruyk 3 years ago
parent fcbb8ecb30
commit eea6ee229a

@ -3,6 +3,8 @@ const Promise = require('bluebird')
const pipeline = Promise.promisify(stream.pipeline)
const _ = require('lodash')
const matchquery = require('./match-query')
/* global WIKI */
module.exports = {
@ -44,13 +46,20 @@ module.exports = {
try {
let qry = `
SELECT rowid as id, path, locale, title, description
FROM "fts5_pages_vector"
WHERE content MATCH ?
ORDER BY rank
`
let qryParams = [ q ]
FROM "fts5_pages_vector"`
let qryEnd = 'ORDER BY rank'
let qryWhere = 'WHERE fts5_pages_vector MATCH ?'
let o = matchquery.parse(q)
if (o.negated)
qryWhere = 'WHERE rowid not in (select rowid from fts5_pages_vector where fts5_pages_vector MATCH ?)'
let qryParams = [ o.str ]
const results = await WIKI.models.knex.raw(qry, qryParams)
const results = await WIKI.models.knex.raw(`
${qry}
${qryWhere}
${qryEnd}
` , qryParams)
return {
results,
suggestions: [],
@ -119,24 +128,10 @@ module.exports = {
WIKI.logger.info(`(SEARCH/SQLITE3) Rebuilding Index...`)
await WIKI.models.knex('fts5_pages_vector').truncate()
await pipeline(
WIKI.models.knex.column('path', 'localeCode', 'title', 'description', 'render').select().from('pages').where({
isPublished: true,
isPrivate: false
}).stream(),
new stream.Transform({
objectMode: true,
transform: async (page, enc, cb) => {
const content = WIKI.models.pages.cleanHTML(page.render)
await WIKI.models.knex.raw(`
INSERT INTO "fts5_pages_vector" (path, locale, title, description, content) VALUES (
?, ?, ?, ?, ?
)
`, [page.path, page.localeCode, page.title, page.description, content])
cb()
}
})
)
WIKI.logger.info(`(SEARCH/SQLITE3) Index rebuilt successfully.`)
await WIKI.models.knex.raw(`
INSERT INTO "fts5_pages_vector" (path, locale, title, description, content)
SELECT path, localeCode, title, description, content from pages`)
}
}

@ -0,0 +1,349 @@
const _ = require('lodash')
/*
* Full text query preprocessor for sqlite3 FTS similar to pg-tsquery.
* Converts input string into internal sqlite match query
* FTS info: https://www.sqlite.org/fts5.html#full_text_query_syntax
*/
/*
| input | output |
| --- | --- |
| `foo bar` | `foo bar` |
| `foo -bar`, `foo !bar`, `foo + !bar` | `foo NOT bar` |
| `foo bar,bip`, `foo+bar | bip` | `(foo bar) OR bip` |
| `foo (bar,bip)`, `foo+(bar|bip)` | `foo (bar OR bip)` |
| `foo*,bar* bana*` | `(foo *) or (bar * bana*)` |
*/
module.exports = {
parse(input) {
let p = new MatchQueryParser()
let v = p.parse(input)
let negated = v.negated
/*
* Since sqlite does not support top level negated MATCH queries
* calling function need to create negated sql query like
* select * not in (select ... match)
*/
if (negated)
v.negated = false
return {
negated,
str: v.toString()
}
}
}
class Token
{
constructor(type, value) {
this.type = type
this.value = value
}
}
class Node
{
constructor({type, value, negated = false, args, parNode = undefined, star = false}) {
this.type = type
this.value = value
this.negated = negated
this.star = star
this.args = args
if (this.args) {
this.args.forEach(item => { item.parNode = this })
}
this.parNode = parNode
}
toString() {
let s = ''
if (this.type == 'id') {
s = `"${this.value}"`
if (this.star)
s += '*'
} else {
let separator = ''
if (this.type == 'and') {
separator = ' AND '
} else if (this.type == 'or') {
separator = ' OR '
} else {
throw new Error('should not reach')
}
if (this.args && this.args.length > 0)
this.args.forEach(item => {
if (s != '') {
if (item.negated && this.type == 'and')
s += ' '
else
s += separator
}
s += item
})
if (this.parNode !== undefined || this.negated)
s = `(${s})`
}
if (this.negated)
s = 'NOT ' + s
return s
}
}
function negateNodeType(node)
{
if (node.type == 'or')
return 'and'
else if (node.type == 'and')
return 'or'
else
throw new Error('should not reach')
}
function negateNodes(lst)
{
lst.forEach(item => {
if (!(item instanceof Node))
throw new Error('should not reach')
item.negated = !item.negated
})
}
class MatchQueryParser
{
constructor() {
this.tokenRegex = /^([",!*\(\)-])/
this.phraseSeparator = ' '
this.terms = /[ \t,!*\(\)-]/
this.knownLexemes = {
'-' : 'not', '!' : 'not', 'not' : 'not',
'&' : 'and', 'and' : 'and',
',' : 'or', 'or' : 'or', '|' : 'or',
}
}
asKeywordToken(s) {
const k = s.toLowerCase()
if (!this.knownLexemes.hasOwnProperty(k))
return undefined
return new Token(this.knownLexemes[k], s)
}
intNextToken() {
let tail = this.input.substring(this.idx).trimStart()
if (!tail)
return undefined
tail = tail.trimStart()
this.idx = this.input.length - tail.length
let m = tail.match(this.tokenRegex)
if (m) {
if (m[0] == '"') {
let idx = tail.indexOf('"', 1)
if (idx == -1) {
tail = tail.substring(1)
this.idx = this.input.length
} else {
tail = tail.substring(1, idx)
this.idx += idx + 1
}
return new Token('id', tail)
}
this.idx += m[0].length
let keyword = this.asKeywordToken(m[0])
return keyword || new Token(m[0], m[0])
}
// this is literal string, find next valid token start
let idx = tail.search(this.terms)
if (idx > 0)
tail = tail.substring(0, idx)
this.idx += tail.length
let keyword = this.asKeywordToken(tail)
return keyword || new Token('id', tail)
}
nextToken() {
this.tok = this.intNextToken()
return this.tok
}
match(v) {
if (this.tok === undefined)
return false
return this.tok.type == v
}
eat(v) {
if (!this.match(v))
return false
this.nextToken()
return true
}
setParent(node, par) {
if (node === undefined)
return undefined
node.parNode = par
if (!node.args)
return
node.args.forEach(item => {
if (item instanceof Node)
this.setParent(item, node)
})
}
/*
* Sqlite3 `NOT` operator is binary but our input search string
* have unary not ('!', '-') operators so we need to preprocess request
* and rearange some items to generate valid queries
*/
preprocess(node) {
if (node === undefined || node.args === undefined)
return node
node.args.forEach(item => {
if (item instanceof Node)
this.preprocess(item)
})
//try to rearrange items
let l = [], nl = []
node.args.forEach(item => {
if (item.negated)
nl.push(item)
else
l.push(item)
})
if (l.length == 0 && nl.length > 1) {
/* invert node type if all children are negated */
node.negated = !node.negated
node.type = negateNodeType(node)
negateNodes(node.args)
return node
} else if (nl.length > 1) {
// merge multiple negated nodes into one, since NOT(A & B) = NOT(A) | NOT(B)
negateNodes(nl)
nl = [
new Node({
type: negateNodeType(node),
parNode: node,
negated: true,
args: nl
})
]
}
node.args = l.concat(nl)
return node
}
parse(str) {
this.input = str
this.tok = undefined
this.idx = 0
this.nextToken()
let o = this.parseOr()
this.setParent(o, undefined)
return this.preprocess(o)
}
parseOr() {
let o = this.parseAnd()
if (!o)
return undefined
if (!this.match('or'))
return o
let l = [o]
while (this.eat('or')) {
o = this.parseAnd()
if (!o)
break
l.push(o)
}
return new Node({
type: 'or',
args: l
})
}
parseAnd() {
let o = this.parseLit()
if (!o)
return undefined
let l = [o]
while (true) {
this.eat('and') //optional
o = this.parseLit()
if (!o)
break
l.push(o)
}
if (l.length == 1)
return l[0]
return new Node({
type: 'and',
args: l,
})
}
parseLit() {
let o = this.tok
let negated = false
let star = false
if (o == undefined)
return o
if (this.eat('not')) {
if (this.tok == undefined) {
return new Node({
type: 'id',
negated: false,
value: o.value,
})
}
negated = true
o = this.tok
}
if (this.eat('(')) {
let tail = this.input
let n = this.parseOr()
if (!this.eat(')') || n === undefined)
return undefined
n.negated = negated
return n
}
if (['and', 'or', '(', ')'].indexOf(o.type) >= 0) {
return undefined
}
this.nextToken()
if (this.eat('*'))
star = true
return new Node({
type: 'id',
negated,
star,
value: o.value,
})
}
}
Loading…
Cancel
Save