You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

215 lines
6.1 KiB

2 years ago
package class46;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map.Entry;
import java.util.PriorityQueue;
// 本文件不牵扯任何byte类型的转化
// 怎么转byte自己来我只负责huffman算法本身的正确实现
// 字符串为空的时候,自己处理边界吧
// 实现的代码通过了大样本随机测试的对数器
// 可以从main函数的内容开始看起
public class Code05_HuffmanTree {
// 根据文章str, 生成词频统计表
public static HashMap<Character, Integer> countMap(String str) {
HashMap<Character, Integer> ans = new HashMap<>();
char[] s = str.toCharArray();
for (char cha : s) {
if (!ans.containsKey(cha)) {
ans.put(cha, 1);
} else {
ans.put(cha, ans.get(cha) + 1);
}
}
return ans;
}
public static class Node {
public int count;
public Node left;
public Node right;
public Node(int c) {
count = c;
}
}
public static class NodeComp implements Comparator<Node> {
@Override
public int compare(Node o1, Node o2) {
return o1.count - o2.count;
}
}
// 根据由文章生成词频表countMap生成哈夫曼编码表
// key : 字符
// value: 该字符编码后的二进制形式
// 比如,频率表 A60, B:45, C:13 D:69 E:14 F:5 G:3
// A 10
// B 01
// C 0011
// D 11
// E 000
// F 00101
// G 00100
public static HashMap<Character, String> huffmanForm(HashMap<Character, Integer> countMap) {
HashMap<Character, String> ans = new HashMap<>();
if (countMap.size() == 1) {
for (char key : countMap.keySet()) {
ans.put(key, "0");
}
return ans;
}
HashMap<Node, Character> nodes = new HashMap<>();
PriorityQueue<Node> heap = new PriorityQueue<>(new NodeComp());
for (Entry<Character, Integer> entry : countMap.entrySet()) {
Node cur = new Node(entry.getValue());
char cha = entry.getKey();
nodes.put(cur, cha);
heap.add(cur);
}
while (heap.size() != 1) {
Node a = heap.poll();
Node b = heap.poll();
Node h = new Node(a.count + b.count);
h.left = a;
h.right = b;
heap.add(h);
}
Node head = heap.poll();
fillForm(head, "", nodes, ans);
return ans;
}
public static void fillForm(Node head, String pre, HashMap<Node, Character> nodes, HashMap<Character, String> ans) {
if (nodes.containsKey(head)) {
ans.put(nodes.get(head), pre);
} else {
fillForm(head.left, pre + "0", nodes, ans);
fillForm(head.right, pre + "1", nodes, ans);
}
}
// 原始字符串str根据哈夫曼编码表转译成哈夫曼编码返回
public static String huffmanEncode(String str, HashMap<Character, String> huffmanForm) {
char[] s = str.toCharArray();
StringBuilder builder = new StringBuilder();
for (char cha : s) {
builder.append(huffmanForm.get(cha));
}
return builder.toString();
}
// 原始字符串的哈夫曼编码huffmanEncode根据哈夫曼编码表还原成原始字符串
public static String huffmanDecode(String huffmanEncode, HashMap<Character, String> huffmanForm) {
TrieNode root = createTrie(huffmanForm);
TrieNode cur = root;
char[] encode = huffmanEncode.toCharArray();
StringBuilder builder = new StringBuilder();
for (int i = 0; i < encode.length; i++) {
int index = encode[i] == '0' ? 0 : 1;
cur = cur.nexts[index];
if (cur.nexts[0] == null && cur.nexts[1] == null) {
builder.append(cur.value);
cur = root;
}
}
return builder.toString();
}
public static TrieNode createTrie(HashMap<Character, String> huffmanForm) {
TrieNode root = new TrieNode();
for (char key : huffmanForm.keySet()) {
char[] path = huffmanForm.get(key).toCharArray();
TrieNode cur = root;
for (int i = 0; i < path.length; i++) {
int index = path[i] == '0' ? 0 : 1;
if (cur.nexts[index] == null) {
cur.nexts[index] = new TrieNode();
}
cur = cur.nexts[index];
}
cur.value = key;
}
return root;
}
public static class TrieNode {
public char value;
public TrieNode[] nexts;
public TrieNode() {
value = 0;
nexts = new TrieNode[2];
}
}
// 为了测试
public static String randomNumberString(int len, int range) {
char[] str = new char[len];
for (int i = 0; i < len; i++) {
str[i] = (char) ((int) (Math.random() * range) + 'a');
}
return String.valueOf(str);
}
// 为了测试
public static void main(String[] args) {
// 根据词频表生成哈夫曼编码表
HashMap<Character, Integer> map = new HashMap<>();
map.put('A', 60);
map.put('B', 45);
map.put('C', 13);
map.put('D', 69);
map.put('E', 14);
map.put('F', 5);
map.put('G', 3);
HashMap<Character, String> huffmanForm = huffmanForm(map);
for (Entry<Character, String> entry : huffmanForm.entrySet()) {
System.out.println(entry.getKey() + " : " + entry.getValue());
}
System.out.println("====================");
// str是原始字符串
String str = "CBBBAABBACAABDDEFBA";
System.out.println(str);
// countMap是根据str建立的词频表
HashMap<Character, Integer> countMap = countMap(str);
// hf是根据countMap生成的哈夫曼编码表
HashMap<Character, String> hf = huffmanForm(countMap);
// huffmanEncode是原始字符串转译后的哈夫曼编码
String huffmanEncode = huffmanEncode(str, hf);
System.out.println(huffmanEncode);
// huffmanDecode是哈夫曼编码还原成的原始字符串
String huffmanDecode = huffmanDecode(huffmanEncode, hf);
System.out.println(huffmanDecode);
System.out.println("====================");
System.out.println("大样本随机测试开始");
// 字符串最大长度
int len = 500;
// 所含字符种类
int range = 26;
// 随机测试进行的次数
int testTime = 100000;
for (int i = 0; i < testTime; i++) {
int N = (int) (Math.random() * len) + 1;
String test = randomNumberString(N, range);
HashMap<Character, Integer> counts = countMap(test);
HashMap<Character, String> form = huffmanForm(counts);
String encode = huffmanEncode(test, form);
String decode = huffmanDecode(encode, form);
if (!test.equals(decode)) {
System.out.println(test);
System.out.println(encode);
System.out.println(decode);
System.out.println("出错了!");
}
}
System.out.println("大样本随机测试结束");
}
}