You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
wiki/server/modules/search/sqlite3/match-query.js

370 lines
7.4 KiB

const _ = require('lodash')
/*
* Full text query preprocessor for sqlite3 FTS similar to pg-tsquery.
* Converts input string into internal sqlite match query
* FTS info: https://www.sqlite.org/fts5.html#full_text_query_syntax
*/
/*
| input | output |
| --- | --- |
| `foo bar` | `foo bar` |
| `foo -bar`, `foo !bar`, `foo + !bar` | `foo NOT bar` |
| `foo bar,bip`, `foo+bar | bip` | `(foo bar) OR bip` |
| `foo (bar,bip)`, `foo+(bar|bip)` | `foo (bar OR bip)` |
| `foo*,bar* bana*` | `(foo *) or (bar * bana*)` |
*/
module.exports = {
parse(input) {
const p = new MatchQueryParser()
const v = p.parse(input)
const negated = v.negated
/*
* Since sqlite does not support top level negated MATCH queries
* calling function need to create negated sql query like
* select * not in (select ... match)
*/
if (negated) {
v.negated = false
}
return {
negated,
str: v.toString()
}
}
}
class Token {
constructor(type, value) {
this.type = type
this.value = value
}
}
class Node {
constructor({ type, value, negated = false, args, parNode = undefined, star = false }) {
this.type = type
this.value = value
this.negated = negated
this.star = star
this.args = args
if (this.args) {
this.args.forEach(item => { item.parNode = this })
}
this.parNode = parNode
}
toString() {
let s = ''
if (this.type === 'id') {
s = `"${this.value}"`
if (this.star) {
s += '*'
}
} else {
let separator = ''
if (this.type === 'and') {
separator = ' AND '
} else if (this.type === 'or') {
separator = ' OR '
} else {
throw new Error('should not reach')
}
if (this.args && this.args.length > 0) {
this.args.forEach(item => {
if (s !== '') {
if (item.negated && this.type === 'and') {
s += ' '
} else {
s += separator
}
}
s += item
})
}
if (this.parNode !== undefined || this.negated) {
s = `(${s})`
}
}
if (this.negated) {
s = 'NOT ' + s
}
return s
}
}
function negateNodeType(node) {
if (node.type === 'or') {
return 'and'
} else if (node.type === 'and') {
return 'or'
} else {
throw new Error('should not reach')
}
}
function negateNodes(lst) {
lst.forEach(item => {
if (!(item instanceof Node)) {
throw new Error('should not reach')
}
item.negated = !item.negated
})
}
class MatchQueryParser {
constructor() {
this.tokenRegex = /^([",!*()-])/
this.phraseSeparator = ' '
this.terms = /[ \t,!*()-]/
this.knownLexemes = {
'-': 'not', '!': 'not', 'not': 'not',
'&': 'and', 'and': 'and',
',': 'or', 'or': 'or', '|': 'or'
}
}
asKeywordToken(s) {
const k = s.toLowerCase()
if (!_.has(this.knownLexemes, k)) {
return undefined
}
return new Token(this.knownLexemes[k], s)
}
intNextToken() {
let tail = this.input.substring(this.idx).trimStart()
if (!tail) {
return undefined
}
tail = tail.trimStart()
this.idx = this.input.length - tail.length
const m = tail.match(this.tokenRegex)
if (m) {
if (m[0] === '"') {
const idx = tail.indexOf('"', 1)
if (idx === -1) {
tail = tail.substring(1)
this.idx = this.input.length
} else {
tail = tail.substring(1, idx)
this.idx += idx + 1
}
return new Token('id', tail)
}
this.idx += m[0].length
const keyword = this.asKeywordToken(m[0])
return keyword || new Token(m[0], m[0])
}
// this is literal string, find next valid token start
const idx = tail.search(this.terms)
if (idx > 0) {
tail = tail.substring(0, idx)
}
this.idx += tail.length
const keyword = this.asKeywordToken(tail)
return keyword || new Token('id', tail)
}
nextToken() {
this.tok = this.intNextToken()
return this.tok
}
match(v) {
if (this.tok === undefined) {
return false
}
return this.tok.type === v
}
eat(v) {
if (!this.match(v)) {
return false
}
this.nextToken()
return true
}
setParent(node, par) {
if (node === undefined) {
return undefined
}
node.parNode = par
if (!node.args) {
return
}
node.args.forEach(item => {
if (item instanceof Node) {
this.setParent(item, node)
}
})
}
/*
* Sqlite3 `NOT` operator is binary but our input search string
* have unary not ('!', '-') operators so we need to preprocess request
* and rearange some items to generate valid queries
*/
preprocess(node) {
if (node === undefined || node.args === undefined) {
return node
}
node.args.forEach(item => {
if (item instanceof Node) {
this.preprocess(item)
}
})
// try to rearrange items
const l = []
let nl = []
node.args.forEach(item => {
if (item.negated) {
nl.push(item)
} else {
l.push(item)
}
})
if (l.length === 0 && nl.length > 1) {
/* invert node type if all children are negated */
node.negated = !node.negated
node.type = negateNodeType(node)
negateNodes(node.args)
return node
} else if (nl.length > 1) {
// merge multiple negated nodes into one, since NOT(A & B) = NOT(A) | NOT(B)
negateNodes(nl)
nl = [
new Node({
type: negateNodeType(node),
parNode: node,
negated: true,
args: nl
})
]
}
node.args = l.concat(nl)
return node
}
parse(str) {
this.input = str
this.tok = undefined
this.idx = 0
this.nextToken()
const o = this.parseOr()
this.setParent(o, undefined)
return this.preprocess(o)
}
parseOr() {
let o = this.parseAnd()
if (!o) {
return undefined
} else if (!this.match('or')) {
return o
}
const l = [o]
while (this.eat('or')) {
o = this.parseAnd()
if (!o) {
break
}
l.push(o)
}
return new Node({
type: 'or',
args: l
})
}
parseAnd() {
let o = this.parseLit()
if (!o) {
return undefined
}
const l = [o]
while (true) {
this.eat('and') // optional 'and' keyword
o = this.parseLit()
if (!o) {
break
}
l.push(o)
}
if (l.length === 1) {
return l[0]
}
return new Node({
type: 'and',
args: l
})
}
parseLit() {
let o = this.tok
let negated = false
let star = false
if (o === undefined) {
return o
}
if (this.eat('not')) {
if (this.tok === undefined) {
return new Node({
type: 'id',
negated: false,
value: o.value
})
}
negated = true
o = this.tok
}
if (this.eat('(')) {
const n = this.parseOr()
if (!this.eat(')') || n === undefined) {
return undefined
}
n.negated = negated
return n
}
if (['and', 'or', '(', ')'].indexOf(o.type) >= 0) {
return undefined
}
this.nextToken()
if (this.eat('*')) {
star = true
}
return new Node({
type: 'id',
negated,
star,
value: o.value
})
}
}