You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

113 lines
3.2 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

package class_2022_03_2_week;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.TreeSet;
// 来自字节飞书团队
// 语法补全功能,比如"as soon as possible"
// 当我们识别到"as soon as"时, 基本即可判定用户需要键入"possible"
// 设计一个统计词频的模型,用于这个功能
// 类似(prefix, next word)这样的二元组
// 比如一个上面的句子"as soon as possible"
// 有产生如下的二元组(as, soon, 1)、(as soon, as, 1)、(as soon as, possible, 1)
// 意思是这一个句子产生了如下的统计:
// 当前缀为"as",接下来的单词是"soon"有了1个期望点
// 当前缀为"as soon",接下来的单词是"as"有了1个期望点
// 当前缀为"as soon as",接下来的单词是"possible"有了1个期望点
// 那么如果给你很多的句子当然就可以产生很多的期望点同一个前缀下同一个next word的期望点可以累加
// 现在给你n个句子让你来建立统计
// 然后给你m个句子作为查询
// 最后给你k表示每个句子作为前缀的情况下词频排在前k名的联想
// 返回m个结果每个结果最多k个单词
public class Code03_AiFill {
public static class TrieNode {
public String word;
public int times;
public HashMap<String, TrieNode> nextNodes;
public TreeSet<TrieNode> nextRanks;
public TrieNode(String w) {
word = w;
times = 1;
nextNodes = new HashMap<>();
nextRanks = new TreeSet<>((a, b) -> a.times != b.times ? (b.times - a.times) : a.word.compareTo(b.word));
}
}
public static class AI {
public TrieNode root;
public int topk;
public AI(List<String> sentences, int k) {
root = new TrieNode("");
topk = k;
for (String sentence : sentences) {
fill(sentence);
}
}
public void fill(String sentence) {
TrieNode cur = root;
TrieNode next = null;
for (String word : sentence.split(" ")) {
if (!cur.nextNodes.containsKey(word)) {
next = new TrieNode(word);
cur.nextNodes.put(word, next);
cur.nextRanks.add(next);
} else {
next = cur.nextNodes.get(word);
cur.nextRanks.remove(next);
next.times++;
cur.nextRanks.add(next);
}
cur = next;
}
}
public List<String> suggest(String sentence) {
List<String> ans = new ArrayList<>();
TrieNode cur = root;
for (String word : sentence.split(" ")) {
if (!cur.nextNodes.containsKey(word)) {
return ans;
} else {
cur = cur.nextNodes.get(word);
}
}
for (TrieNode n : cur.nextRanks) {
ans.add(n.word);
if (ans.size() == topk) {
break;
}
}
return ans;
}
}
public static void main(String[] args) {
ArrayList<String> sentences = new ArrayList<>();
sentences.add("i think you are good");
sentences.add("i think you are fine");
sentences.add("i think you are good man");
int k = 2;
AI ai = new AI(sentences, k);
for (String ans : ai.suggest("i think you are")) {
System.out.println(ans);
}
System.out.println("=====");
ai.fill("i think you are fucking good");
ai.fill("i think you are fucking great");
ai.fill("i think you are fucking genius");
for (String ans : ai.suggest("i think you are")) {
System.out.println(ans);
}
System.out.println("=====");
}
}