mirror of https://github.com/sveltejs/svelte
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
235 lines
5.4 KiB
235 lines
5.4 KiB
// adapted from https://github.com/Glench/fuzzyset.js/blob/master/lib/fuzzyset.js
|
|
// BSD Licensed
|
|
|
|
const GRAM_SIZE_LOWER = 2;
|
|
const GRAM_SIZE_UPPER = 3;
|
|
|
|
// return an edit distance from 0 to 1
|
|
function _distance(str1: string, str2: string) {
|
|
if (str1 === null && str2 === null)
|
|
throw 'Trying to compare two null values';
|
|
if (str1 === null || str2 === null) return 0;
|
|
str1 = String(str1);
|
|
str2 = String(str2);
|
|
|
|
const distance = levenshtein(str1, str2);
|
|
if (str1.length > str2.length) {
|
|
return 1 - distance / str1.length;
|
|
} else {
|
|
return 1 - distance / str2.length;
|
|
}
|
|
}
|
|
|
|
// helper functions
|
|
function levenshtein(str1: string, str2: string) {
|
|
const current: number[] = [];
|
|
let prev;
|
|
let value;
|
|
|
|
for (let i = 0; i <= str2.length; i++) {
|
|
for (let j = 0; j <= str1.length; j++) {
|
|
if (i && j) {
|
|
if (str1.charAt(j - 1) === str2.charAt(i - 1)) {
|
|
value = prev;
|
|
} else {
|
|
value = Math.min(current[j], current[j - 1], prev) + 1;
|
|
}
|
|
} else {
|
|
value = i + j;
|
|
}
|
|
|
|
prev = current[j];
|
|
current[j] = value;
|
|
}
|
|
}
|
|
|
|
return current.pop();
|
|
}
|
|
|
|
const _nonWordRe = /[^\w, ]+/;
|
|
|
|
function _iterateGrams(value: string, gramSize: number) {
|
|
gramSize = gramSize || 2;
|
|
const simplified = '-' + value.toLowerCase().replace(_nonWordRe, '') + '-';
|
|
const lenDiff = gramSize - simplified.length;
|
|
const results = [];
|
|
|
|
if (lenDiff > 0) {
|
|
for (let i = 0; i < lenDiff; ++i) {
|
|
value += '-';
|
|
}
|
|
}
|
|
for (let i = 0; i < simplified.length - gramSize + 1; ++i) {
|
|
results.push(simplified.slice(i, i + gramSize));
|
|
}
|
|
return results;
|
|
}
|
|
|
|
function _gramCounter(value: string, gramSize: number) {
|
|
// return an object where key=gram, value=number of occurrences
|
|
gramSize = gramSize || 2;
|
|
const result = {};
|
|
const grams = _iterateGrams(value, gramSize);
|
|
let i = 0;
|
|
|
|
for (i; i < grams.length; ++i) {
|
|
if (grams[i] in result) {
|
|
result[grams[i]] += 1;
|
|
} else {
|
|
result[grams[i]] = 1;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
function sortDescending(a, b) {
|
|
return b[0] - a[0];
|
|
}
|
|
|
|
export default class FuzzySet {
|
|
exactSet: object;
|
|
matchDict: object;
|
|
items: object;
|
|
|
|
constructor(arr: string[]) {
|
|
// define all the object functions and attributes
|
|
this.exactSet = {};
|
|
this.matchDict = {};
|
|
this.items = {};
|
|
|
|
// initialization
|
|
for (let i = GRAM_SIZE_LOWER; i < GRAM_SIZE_UPPER + 1; ++i) {
|
|
this.items[i] = [];
|
|
}
|
|
// add all the items to the set
|
|
for (let i = 0; i < arr.length; ++i) {
|
|
this.add(arr[i]);
|
|
}
|
|
}
|
|
|
|
add(value: string) {
|
|
const normalizedValue = value.toLowerCase();
|
|
if (normalizedValue in this.exactSet) {
|
|
return false;
|
|
}
|
|
|
|
let i = GRAM_SIZE_LOWER;
|
|
for (i; i < GRAM_SIZE_UPPER + 1; ++i) {
|
|
this._add(value, i);
|
|
}
|
|
}
|
|
|
|
_add(value: string, gramSize: number) {
|
|
const normalizedValue = value.toLowerCase();
|
|
const items = this.items[gramSize] || [];
|
|
const index = items.length;
|
|
|
|
items.push(0);
|
|
const gramCounts = _gramCounter(normalizedValue, gramSize);
|
|
let sumOfSquareGramCounts = 0;
|
|
let gram;
|
|
let gramCount;
|
|
|
|
for (gram in gramCounts) {
|
|
gramCount = gramCounts[gram];
|
|
sumOfSquareGramCounts += Math.pow(gramCount, 2);
|
|
if (gram in this.matchDict) {
|
|
this.matchDict[gram].push([index, gramCount]);
|
|
} else {
|
|
this.matchDict[gram] = [[index, gramCount]];
|
|
}
|
|
}
|
|
const vectorNormal = Math.sqrt(sumOfSquareGramCounts);
|
|
items[index] = [vectorNormal, normalizedValue];
|
|
this.items[gramSize] = items;
|
|
this.exactSet[normalizedValue] = value;
|
|
};
|
|
|
|
get(value: string) {
|
|
const normalizedValue = value.toLowerCase();
|
|
const result = this.exactSet[normalizedValue];
|
|
|
|
if (result) {
|
|
return [[1, result]];
|
|
}
|
|
|
|
let results = [];
|
|
// start with high gram size and if there are no results, go to lower gram sizes
|
|
for (
|
|
let gramSize = GRAM_SIZE_UPPER;
|
|
gramSize >= GRAM_SIZE_LOWER;
|
|
--gramSize
|
|
) {
|
|
results = this.__get(value, gramSize);
|
|
if (results) {
|
|
return results;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
__get(value: string, gramSize: number) {
|
|
const normalizedValue = value.toLowerCase();
|
|
const matches = {};
|
|
const gramCounts = _gramCounter(normalizedValue, gramSize);
|
|
const items = this.items[gramSize];
|
|
let sumOfSquareGramCounts = 0;
|
|
let gram;
|
|
let gramCount;
|
|
let i;
|
|
let index;
|
|
let otherGramCount;
|
|
|
|
for (gram in gramCounts) {
|
|
gramCount = gramCounts[gram];
|
|
sumOfSquareGramCounts += Math.pow(gramCount, 2);
|
|
if (gram in this.matchDict) {
|
|
for (i = 0; i < this.matchDict[gram].length; ++i) {
|
|
index = this.matchDict[gram][i][0];
|
|
otherGramCount = this.matchDict[gram][i][1];
|
|
if (index in matches) {
|
|
matches[index] += gramCount * otherGramCount;
|
|
} else {
|
|
matches[index] = gramCount * otherGramCount;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
const vectorNormal = Math.sqrt(sumOfSquareGramCounts);
|
|
let results = [];
|
|
let matchScore;
|
|
|
|
// build a results list of [score, str]
|
|
for (const matchIndex in matches) {
|
|
matchScore = matches[matchIndex];
|
|
results.push([
|
|
matchScore / (vectorNormal * items[matchIndex][0]),
|
|
items[matchIndex][1],
|
|
]);
|
|
}
|
|
|
|
results.sort(sortDescending);
|
|
|
|
let newResults = [];
|
|
const endIndex = Math.min(50, results.length);
|
|
// truncate somewhat arbitrarily to 50
|
|
for (let i = 0; i < endIndex; ++i) {
|
|
newResults.push([
|
|
_distance(results[i][1], normalizedValue),
|
|
results[i][1],
|
|
]);
|
|
}
|
|
results = newResults;
|
|
results.sort(sortDescending);
|
|
|
|
newResults = [];
|
|
for (let i = 0; i < results.length; ++i) {
|
|
if (results[i][0] == results[0][0]) {
|
|
newResults.push([results[i][0], this.exactSet[results[i][1]]]);
|
|
}
|
|
}
|
|
|
|
return newResults;
|
|
};
|
|
} |