diff --git a/server/modules/search/sqlite3/definition.yml b/server/modules/search/sqlite3/definition.yml new file mode 100644 index 00000000..810f6e81 --- /dev/null +++ b/server/modules/search/sqlite3/definition.yml @@ -0,0 +1,8 @@ +key: sqlite3 +title: Database - sqlite3 +description: Advanced Sqlite3-based search engine. +author: dzruyk +logo: https://static.requarks.io/logo/database.svg +website: https://www.requarks.io/ +isAvailable: true +props: {} diff --git a/server/modules/search/sqlite3/engine.js b/server/modules/search/sqlite3/engine.js new file mode 100644 index 00000000..34bce709 --- /dev/null +++ b/server/modules/search/sqlite3/engine.js @@ -0,0 +1,135 @@ +const stream = require('stream') +const Promise = require('bluebird') +const _ = require('lodash') + +const matchquery = require('./match-query') + +/* global WIKI */ + +module.exports = { + async activate() { + if (WIKI.config.db.type !== 'sqlite') { + throw new WIKI.Error.SearchActivationFailed('Must use Sqlite3 database to activate this engine!') + } + const opts = await WIKI.models.knex.schema.raw('PRAGMA compile_options') + if (!_.find(opts, { compile_options: 'ENABLE_FTS5' })) { + throw new WIKI.Error.SearchActivationFailed('Sqlite3 must have FTS5 module!') + } + }, + async deactivate() { + WIKI.logger.info(`(SEARCH/SQLITE3) Dropping index tables...`) + await WIKI.models.knex.schema.dropTable('fts5_pages_vector') + WIKI.logger.info(`(SEARCH/SQLITE3) Index tables have been dropped.`) + }, + /** + * INIT + */ + async init() { + WIKI.logger.info(`(SEARCH/SQLITE3) Initializing...`) + + // -> Create Search Index + const indexExists = await WIKI.models.knex.schema.hasTable('fts5_pages_vector') + if (!indexExists) { + WIKI.logger.info(`(SEARCH/SQLITE3) Creating Pages Vector table...`) + await WIKI.models.knex.raw('CREATE VIRTUAL TABLE fts5_pages_vector USING fts5(tokenize=unicode61, path, locale, title, description, content)') + } + WIKI.logger.info(`(SEARCH/SQLITE3) Initialization completed.`) + }, + /** + * QUERY + * + * @param {String} q Query + * @param {Object} opts Additional options + */ + async query(q, opts) { + try { + const qry = ` + SELECT rowid AS id, path, locale, title, description + FROM "fts5_pages_vector"` + const qryEnd = 'ORDER BY rank' + let qryWhere = 'WHERE fts5_pages_vector MATCH ?' + + const o = matchquery.parse(q) + if (o.negated) { + qryWhere = 'WHERE rowid NOT IN (SELECT rowid FROM fts5_pages_vector WHERE fts5_pages_vector MATCH ?)' + } + + const results = await WIKI.models.knex.raw(` + ${qry} + ${qryWhere} + ${qryEnd} + `, [o.str]) + return { + results, + suggestions: [], + totalHits: results.length + } + } catch (err) { + WIKI.logger.warn('Search Engine Error:') + WIKI.logger.warn(err) + } + }, + /** + * CREATE + * + * @param {Object} page Page to create + */ + async created(page) { + await WIKI.models.knex.raw(` + INSERT INTO "fts5_pages_vector" (path, locale, title, description, content) VALUES ( + ?, ?, ?, ?, ? + ) + `, [page.path, page.localeCode, page.title, page.description, page.safeContent]) + }, + /** + * UPDATE + * + * @param {Object} page Page to update + */ + async updated(page) { + await WIKI.models.knex.raw(` + UPDATE "fts5_pages_vector" SET + title = ?, + description = ?, + content = ? + WHERE path = ? AND locale = ? + `, [page.title, page.description, page.safeContent, page.path, page.localeCode]) + }, + /** + * DELETE + * + * @param {Object} page Page to delete + */ + async deleted(page) { + await WIKI.models.knex('fts5_pages_vector').where({ + locale: page.localeCode, + path: page.path + }).del().limit(1) + }, + /** + * RENAME + * + * @param {Object} page Page to rename + */ + async renamed(page) { + await WIKI.models.knex('fts5_pages_vector').where({ + locale: page.localeCode, + path: page.path + }).update({ + locale: page.destinationLocaleCode, + path: page.destinationPath + }) + }, + /** + * REBUILD INDEX + */ + async rebuild() { + WIKI.logger.info(`(SEARCH/SQLITE3) Rebuilding Index...`) + await WIKI.models.knex('fts5_pages_vector').truncate() + + await WIKI.models.knex.raw(` + INSERT INTO "fts5_pages_vector" (path, locale, title, description, content) + SELECT path, localeCode, title, description, content FROM pages` + ) + } +} diff --git a/server/modules/search/sqlite3/match-query.js b/server/modules/search/sqlite3/match-query.js new file mode 100644 index 00000000..84685b05 --- /dev/null +++ b/server/modules/search/sqlite3/match-query.js @@ -0,0 +1,369 @@ +const _ = require('lodash') +/* + * Full text query preprocessor for sqlite3 FTS similar to pg-tsquery. + * Converts input string into internal sqlite match query + * FTS info: https://www.sqlite.org/fts5.html#full_text_query_syntax + */ + +/* +| input | output | +| --- | --- | +| `foo bar` | `foo bar` | +| `foo -bar`, `foo !bar`, `foo + !bar` | `foo NOT bar` | +| `foo bar,bip`, `foo+bar | bip` | `(foo bar) OR bip` | +| `foo (bar,bip)`, `foo+(bar|bip)` | `foo (bar OR bip)` | +| `foo*,bar* bana*` | `(foo *) or (bar * bana*)` | +*/ + +module.exports = { + parse(input) { + const p = new MatchQueryParser() + const v = p.parse(input) + + const negated = v.negated + /* + * Since sqlite does not support top level negated MATCH queries + * calling function need to create negated sql query like + * select * not in (select ... match) + */ + if (negated) { + v.negated = false + } + return { + negated, + str: v.toString() + } + } +} + +class Token { + constructor(type, value) { + this.type = type + this.value = value + } +} + +class Node { + constructor({ type, value, negated = false, args, parNode = undefined, star = false }) { + this.type = type + this.value = value + this.negated = negated + this.star = star + this.args = args + if (this.args) { + this.args.forEach(item => { item.parNode = this }) + } + this.parNode = parNode + } + + toString() { + let s = '' + if (this.type === 'id') { + s = `"${this.value}"` + if (this.star) { + s += '*' + } + } else { + let separator = '' + + if (this.type === 'and') { + separator = ' AND ' + } else if (this.type === 'or') { + separator = ' OR ' + } else { + throw new Error('should not reach') + } + + if (this.args && this.args.length > 0) { + this.args.forEach(item => { + if (s !== '') { + if (item.negated && this.type === 'and') { + s += ' ' + } else { + s += separator + } + } + s += item + }) + } + if (this.parNode !== undefined || this.negated) { + s = `(${s})` + } + } + if (this.negated) { + s = 'NOT ' + s + } + return s + } +} + +function negateNodeType(node) { + if (node.type === 'or') { + return 'and' + } else if (node.type === 'and') { + return 'or' + } else { + throw new Error('should not reach') + } +} + +function negateNodes(lst) { + lst.forEach(item => { + if (!(item instanceof Node)) { + throw new Error('should not reach') + } + item.negated = !item.negated + }) +} + +class MatchQueryParser { + constructor() { + this.tokenRegex = /^([",!*()-])/ + this.phraseSeparator = ' ' + this.terms = /[ \t,!*()-]/ + this.knownLexemes = { + '-': 'not', '!': 'not', 'not': 'not', + '&': 'and', 'and': 'and', + ',': 'or', 'or': 'or', '|': 'or' + } + } + + asKeywordToken(s) { + const k = s.toLowerCase() + if (!_.has(this.knownLexemes, k)) { + return undefined + } + return new Token(this.knownLexemes[k], s) + } + + intNextToken() { + let tail = this.input.substring(this.idx).trimStart() + if (!tail) { + return undefined + } + + tail = tail.trimStart() + this.idx = this.input.length - tail.length + + const m = tail.match(this.tokenRegex) + if (m) { + if (m[0] === '"') { + const idx = tail.indexOf('"', 1) + if (idx === -1) { + tail = tail.substring(1) + this.idx = this.input.length + } else { + tail = tail.substring(1, idx) + this.idx += idx + 1 + } + return new Token('id', tail) + } + this.idx += m[0].length + const keyword = this.asKeywordToken(m[0]) + return keyword || new Token(m[0], m[0]) + } + + // this is literal string, find next valid token start + const idx = tail.search(this.terms) + if (idx > 0) { + tail = tail.substring(0, idx) + } + this.idx += tail.length + + const keyword = this.asKeywordToken(tail) + return keyword || new Token('id', tail) + } + + nextToken() { + this.tok = this.intNextToken() + return this.tok + } + + match(v) { + if (this.tok === undefined) { + return false + } + return this.tok.type === v + } + + eat(v) { + if (!this.match(v)) { + return false + } + this.nextToken() + return true + } + + setParent(node, par) { + if (node === undefined) { + return undefined + } + + node.parNode = par + if (!node.args) { + return + } + + node.args.forEach(item => { + if (item instanceof Node) { + this.setParent(item, node) + } + }) + } + + /* + * Sqlite3 `NOT` operator is binary but our input search string + * have unary not ('!', '-') operators so we need to preprocess request + * and rearange some items to generate valid queries + */ + preprocess(node) { + if (node === undefined || node.args === undefined) { + return node + } + + node.args.forEach(item => { + if (item instanceof Node) { + this.preprocess(item) + } + }) + + // try to rearrange items + const l = [] + let nl = [] + node.args.forEach(item => { + if (item.negated) { + nl.push(item) + } else { + l.push(item) + } + }) + + if (l.length === 0 && nl.length > 1) { + /* invert node type if all children are negated */ + node.negated = !node.negated + node.type = negateNodeType(node) + negateNodes(node.args) + return node + } else if (nl.length > 1) { + // merge multiple negated nodes into one, since NOT(A & B) = NOT(A) | NOT(B) + negateNodes(nl) + nl = [ + new Node({ + type: negateNodeType(node), + parNode: node, + negated: true, + args: nl + + }) + ] + } + node.args = l.concat(nl) + + return node + } + + parse(str) { + this.input = str + this.tok = undefined + this.idx = 0 + + this.nextToken() + const o = this.parseOr() + this.setParent(o, undefined) + return this.preprocess(o) + } + + parseOr() { + let o = this.parseAnd() + if (!o) { + return undefined + } else if (!this.match('or')) { + return o + } + + const l = [o] + + while (this.eat('or')) { + o = this.parseAnd() + if (!o) { + break + } + l.push(o) + } + return new Node({ + type: 'or', + args: l + }) + } + + parseAnd() { + let o = this.parseLit() + if (!o) { + return undefined + } + + const l = [o] + while (true) { + this.eat('and') // optional 'and' keyword + o = this.parseLit() + if (!o) { + break + } + l.push(o) + } + if (l.length === 1) { + return l[0] + } + + return new Node({ + type: 'and', + args: l + }) + } + + parseLit() { + let o = this.tok + let negated = false + let star = false + + if (o === undefined) { + return o + } + + if (this.eat('not')) { + if (this.tok === undefined) { + return new Node({ + type: 'id', + negated: false, + value: o.value + }) + } + negated = true + o = this.tok + } + + if (this.eat('(')) { + const n = this.parseOr() + if (!this.eat(')') || n === undefined) { + return undefined + } + n.negated = negated + return n + } + if (['and', 'or', '(', ')'].indexOf(o.type) >= 0) { + return undefined + } + + this.nextToken() + if (this.eat('*')) { + star = true + } + + return new Node({ + type: 'id', + negated, + star, + value: o.value + }) + } +}