mirror of https://github.com/requarks/wiki
parent
55a63a3c5a
commit
6c5599b29c
@ -0,0 +1,41 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Check if platform argument is provided
|
||||
if [ -z "$1" ]; then
|
||||
echo "Error: Platform argument is required."
|
||||
echo "Usage: $0 <platform>"
|
||||
echo "Example: $0 linux/amd64"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Assign the first argument to PLATFORM
|
||||
PLATFORM=$1
|
||||
|
||||
# Function to check if Docker is running
|
||||
check_docker_running() {
|
||||
if ! docker info >/dev/null 2>&1; then
|
||||
echo "Error: Docker is not running. Please make sure Docker is installed and running before running this script."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Call the function to check Docker status
|
||||
check_docker_running
|
||||
|
||||
# Generate a unique tag using today's date and an optional index (e.g., 20230901-1)
|
||||
TAG=$(date +"%Y%m%d_%H%M%S")
|
||||
|
||||
# Login to the Azure Container registry
|
||||
az acr login --name acruicwiki
|
||||
|
||||
# Build the Docker image with the specified platform
|
||||
docker build --platform "$PLATFORM" -t acruicwiki.azurecr.io/uic-wiki:$TAG -f dev/build/Dockerfile .
|
||||
|
||||
# Push the Docker image to the registry
|
||||
docker push acruicwiki.azurecr.io/uic-wiki:$TAG
|
||||
|
||||
# Update the container app with the new image
|
||||
# az containerapp update \
|
||||
# --name ca-uic-wiki \
|
||||
# --resource-group rg-uic-wiki \
|
||||
# --image acruicwiki.azurecr.io/uic-wiki:$TAG
|
||||
@ -0,0 +1,45 @@
|
||||
key: azure-search-similarity
|
||||
title: Azure Search with Similarity Search
|
||||
description: AI-Powered cloud search service with vectorisation for embedding.
|
||||
author: UIC Digital
|
||||
logo: https://static.requarks.io/logo/azure.svg
|
||||
website: https://azure.microsoft.com/services/search/
|
||||
isAvailable: true
|
||||
props:
|
||||
endpoint:
|
||||
type: String
|
||||
title: Azure Search Endpoint
|
||||
hint: The endpoint of the Azure Search Service. Found under Properties.
|
||||
order: 1
|
||||
adminKey:
|
||||
type: String
|
||||
title: Admin API Key
|
||||
hint: Either the primary or secondary admin key. Found under Keys.
|
||||
order: 2
|
||||
indexName:
|
||||
type: String
|
||||
title: Index Name
|
||||
hint: 'Name to use when creating the index. (default: wiki)'
|
||||
default: wiki
|
||||
order: 3
|
||||
embeddingModelAPIVersion:
|
||||
type: String
|
||||
title: Embedding Model API Version
|
||||
hint: 'API version of the embedding model to use for vectorisation'
|
||||
default: 2024-02-01
|
||||
order: 4
|
||||
embeddingModelKey:
|
||||
type: String
|
||||
title: Embedding Model Key
|
||||
hint: 'Key of the embedding model to use for vectorisation'
|
||||
order: 5
|
||||
embeddingModelEndpoint:
|
||||
type: String
|
||||
title: Embedding Model Endpoint
|
||||
hint: 'Endpoint of the embedding model to use for vectorisation'
|
||||
order: 6
|
||||
embeddingModelDeploymentName:
|
||||
type: String
|
||||
title: Embedding Model Deployment Name
|
||||
hint: 'Deployment name of the embedding model to use for vectorisation'
|
||||
order: 7
|
||||
@ -0,0 +1,300 @@
|
||||
const _ = require('lodash')
|
||||
const { SearchService, QueryType } = require('azure-search-client')
|
||||
const {
|
||||
AzureKeyCredential,
|
||||
SearchIndexClient,
|
||||
IndexDocumentsResult,
|
||||
SearchClient,
|
||||
SearchDocumentsResult,
|
||||
SearchFieldArray,
|
||||
SelectArray,
|
||||
SelectFields,
|
||||
} = require('@azure/search-documents');
|
||||
const request = require('request-promise')
|
||||
const stream = require('stream')
|
||||
const Promise = require('bluebird')
|
||||
const pipeline = Promise.promisify(stream.pipeline)
|
||||
|
||||
/* global WIKI */
|
||||
|
||||
module.exports = {
|
||||
async activate() {
|
||||
// not used
|
||||
},
|
||||
async deactivate() {
|
||||
// not used
|
||||
},
|
||||
/**
|
||||
* INIT
|
||||
*/
|
||||
async init() {
|
||||
WIKI.logger.info(`(SEARCH/AZURE) Initializing...`)
|
||||
|
||||
this.client = new SearchIndexClient(
|
||||
this.config.endpoint,
|
||||
new AzureKeyCredential(this.config.adminKey)
|
||||
);
|
||||
// -> Create Search Index
|
||||
WIKI.logger.info(`(SEARCH/AZURE) Creating index...`)
|
||||
await this.client.createOrUpdateIndex({
|
||||
name: this.config.indexName,
|
||||
fields: [
|
||||
{
|
||||
name: 'id',
|
||||
type: 'Edm.String',
|
||||
key: true,
|
||||
searchable: false
|
||||
},
|
||||
{
|
||||
name: 'locale',
|
||||
type: 'Edm.String',
|
||||
searchable: false
|
||||
},
|
||||
{
|
||||
name: 'path',
|
||||
type: 'Edm.String',
|
||||
searchable: false
|
||||
},
|
||||
{
|
||||
name: 'title',
|
||||
type: 'Edm.String',
|
||||
searchable: true
|
||||
},
|
||||
{
|
||||
name: 'titleVector',
|
||||
type: 'Edm.Collection(Edm.Single)',
|
||||
searchable: true,
|
||||
vectorSearchDimensions: 1536,
|
||||
vectorSearchProfileName: 'vector-profile',
|
||||
},
|
||||
{
|
||||
name: 'description',
|
||||
type: 'Edm.String',
|
||||
searchable: true
|
||||
},
|
||||
{
|
||||
name: 'descriptionVector',
|
||||
type: 'Edm.Collection(Edm.Single)',
|
||||
searchable: true,
|
||||
vectorSearchDimensions: 1536,
|
||||
vectorSearchProfileName: 'vector-profile',
|
||||
},
|
||||
{
|
||||
name: 'content',
|
||||
type: 'Edm.String',
|
||||
searchable: true
|
||||
},
|
||||
{
|
||||
name: 'contentVector',
|
||||
type: 'Edm.Collection(Edm.Single)',
|
||||
searchable: true,
|
||||
vectorSearchDimensions: 1536,
|
||||
vectorSearchProfileName: 'vector-profile',
|
||||
},
|
||||
],
|
||||
scoringProfiles: [
|
||||
{
|
||||
name: 'fieldWeights',
|
||||
text: {
|
||||
weights: {
|
||||
title: 4,
|
||||
description: 3,
|
||||
content: 1
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
corsOptions: {
|
||||
allowedOrigins: ['*'],
|
||||
},
|
||||
vectorSearch: {
|
||||
algorithms: [{ name: 'vector-search-algorithm', kind: 'hnsw' }],
|
||||
profiles: [
|
||||
{
|
||||
name: 'vector-profile',
|
||||
algorithmConfigurationName: 'vector-search-algorithm',
|
||||
},
|
||||
],
|
||||
},
|
||||
suggesters: [
|
||||
{
|
||||
name: 'suggestions',
|
||||
searchMode: 'analyzingInfixMatching',
|
||||
sourceFields: ['title', 'description', 'content']
|
||||
}
|
||||
]
|
||||
})
|
||||
this.searchClient = new SearchClient(
|
||||
this.config.endpoint,
|
||||
this.config.indexName,
|
||||
new AzureKeyCredential(this.config.adminKey)
|
||||
);
|
||||
WIKI.logger.info(`(SEARCH/AZURE) Initialization completed.`)
|
||||
},
|
||||
/**
|
||||
* QUERY
|
||||
*
|
||||
* @param {String} q Query
|
||||
* @param {Object} opts Additional options
|
||||
*/
|
||||
async query(q, opts) {
|
||||
try {
|
||||
const results = await this.searchClient.search(q, {
|
||||
select: ['id', 'locale', 'path', 'title', 'description'],
|
||||
searchFields: ['title', 'description', 'content'],
|
||||
queryType: 'full',
|
||||
top: 50,
|
||||
includeTotalCount: true,
|
||||
});
|
||||
const searchResults = [];
|
||||
for await (const result of results.results) {
|
||||
searchResults.push(result.document);
|
||||
}
|
||||
WIKI.logger.info(`(SEARCH/AZURE) Search: ${JSON.stringify(searchResults)}.`)
|
||||
return {
|
||||
results: searchResults,
|
||||
suggestions: [],
|
||||
totalHits: results.count
|
||||
}
|
||||
} catch (err) {
|
||||
WIKI.logger.warn('Search Engine Error:')
|
||||
WIKI.logger.warn(err)
|
||||
}
|
||||
},
|
||||
/**
|
||||
* CREATE
|
||||
*
|
||||
* @param {Object} page Page to create
|
||||
*/
|
||||
async created(page) {
|
||||
const doc = {
|
||||
id: page.hash,
|
||||
locale: page.localeCode,
|
||||
path: page.path,
|
||||
title: page.title,
|
||||
description: page.description,
|
||||
content: page.safeContent
|
||||
}
|
||||
await this.updateDocument(doc)
|
||||
},
|
||||
/**
|
||||
* UPDATE
|
||||
*
|
||||
* @param {Object} page Page to update
|
||||
*/
|
||||
async updated(page) {
|
||||
const doc = {
|
||||
id: page.hash,
|
||||
locale: page.localeCode,
|
||||
path: page.path,
|
||||
title: page.title,
|
||||
description: page.description,
|
||||
content: page.safeContent
|
||||
}
|
||||
await this.updateDocument(doc)
|
||||
},
|
||||
/**
|
||||
* DELETE
|
||||
*
|
||||
* @param {Object} page Page to delete
|
||||
*/
|
||||
async deleted(page) {
|
||||
await this.searchClient.deleteDocuments([page.hash])
|
||||
},
|
||||
/**
|
||||
* RENAME
|
||||
*
|
||||
* @param {Object} page Page to rename
|
||||
*/
|
||||
async renamed(page) {
|
||||
const doc = {
|
||||
id: page.destinationHash,
|
||||
locale: page.destinationLocaleCode,
|
||||
path: page.destinationPath,
|
||||
title: page.title,
|
||||
description: page.description,
|
||||
content: page.safeContent
|
||||
}
|
||||
await this.updateDocument(doc)
|
||||
},
|
||||
/**
|
||||
* REBUILD INDEX
|
||||
*/
|
||||
async rebuild() {
|
||||
WIKI.logger.info(`(SEARCH/AZURE) Rebuilding Index...`)
|
||||
await pipeline(
|
||||
WIKI.models.knex.column({ id: 'hash' }, 'path', { locale: 'localeCode' }, 'title', 'description', 'render').select().from('pages').where({
|
||||
isPublished: true,
|
||||
isPrivate: false
|
||||
}).stream(),
|
||||
new stream.Transform({
|
||||
objectMode: true,
|
||||
transform: async (page, enc, cb) => {
|
||||
await this.rebuildPage(page)
|
||||
cb()
|
||||
}
|
||||
}),
|
||||
)
|
||||
WIKI.logger.info(`(SEARCH/AZURE) Index rebuilt successfully.`)
|
||||
},
|
||||
|
||||
async updateDocument(doc) {
|
||||
const [titleVector, descriptionVector, contentVector] = await Promise.all([
|
||||
this.generateEmbedding(doc.title),
|
||||
this.generateEmbedding(doc.description),
|
||||
this.generateEmbedding(doc.content)
|
||||
])
|
||||
|
||||
WIKI.logger.info(`(SEARCH/AZURE) Generated embeddings for ${doc.id}.`)
|
||||
|
||||
doc.titleVector = titleVector;
|
||||
doc.descriptionVector = descriptionVector;
|
||||
doc.contentVector = contentVector;
|
||||
await this.searchClient.mergeOrUploadDocuments([doc])
|
||||
},
|
||||
|
||||
async rebuildPage(page) {
|
||||
const doc = {
|
||||
id: page.id,
|
||||
locale: page.locale,
|
||||
path: page.path,
|
||||
title: page.title,
|
||||
description: page.description,
|
||||
content: WIKI.models.pages.cleanHTML(page.render)
|
||||
}
|
||||
|
||||
await this.updateDocument(doc)
|
||||
// sleep for 1 second to avoid rate limiting
|
||||
await new Promise(resolve => setTimeout(resolve, 1000))
|
||||
},
|
||||
|
||||
async generateEmbedding(str) {
|
||||
const apiKey = this.config.embeddingModelKey;
|
||||
const apiBase = this.config.embeddingModelEndpoint;
|
||||
const deploymentName = this.config.embeddingModelDeploymentName;
|
||||
const apiVersion = this.config.embeddingModelAPIVersion;
|
||||
|
||||
const url = `${apiBase}/openai/deployments/${deploymentName}/embeddings?api-version=${apiVersion}`;
|
||||
|
||||
const body = {
|
||||
input: str,
|
||||
};
|
||||
|
||||
try {
|
||||
const response = await request({
|
||||
uri: url,
|
||||
method: 'post',
|
||||
headers: {
|
||||
'api-key': apiKey,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
json: true,
|
||||
body,
|
||||
})
|
||||
|
||||
return response.data[0].embedding;
|
||||
} catch (error) {
|
||||
WIKI.logger.info(`(SEARCH/AZURE) Error generating embedding. ${error}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in new issue