You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

215 lines
6.1 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

package class46;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map.Entry;
import java.util.PriorityQueue;
// 本文件不牵扯任何byte类型的转化
// 怎么转byte自己来我只负责huffman算法本身的正确实现
// 字符串为空的时候,自己处理边界吧
// 实现的代码通过了大样本随机测试的对数器
// 可以从main函数的内容开始看起
public class Code05_HuffmanTree {
// 根据文章str, 生成词频统计表
public static HashMap<Character, Integer> countMap(String str) {
HashMap<Character, Integer> ans = new HashMap<>();
char[] s = str.toCharArray();
for (char cha : s) {
if (!ans.containsKey(cha)) {
ans.put(cha, 1);
} else {
ans.put(cha, ans.get(cha) + 1);
}
}
return ans;
}
public static class Node {
public int count;
public Node left;
public Node right;
public Node(int c) {
count = c;
}
}
public static class NodeComp implements Comparator<Node> {
@Override
public int compare(Node o1, Node o2) {
return o1.count - o2.count;
}
}
// 根据由文章生成词频表countMap生成哈夫曼编码表
// key : 字符
// value: 该字符编码后的二进制形式
// 比如,频率表 A60, B:45, C:13 D:69 E:14 F:5 G:3
// A 10
// B 01
// C 0011
// D 11
// E 000
// F 00101
// G 00100
public static HashMap<Character, String> huffmanForm(HashMap<Character, Integer> countMap) {
HashMap<Character, String> ans = new HashMap<>();
if (countMap.size() == 1) {
for (char key : countMap.keySet()) {
ans.put(key, "0");
}
return ans;
}
HashMap<Node, Character> nodes = new HashMap<>();
PriorityQueue<Node> heap = new PriorityQueue<>(new NodeComp());
for (Entry<Character, Integer> entry : countMap.entrySet()) {
Node cur = new Node(entry.getValue());
char cha = entry.getKey();
nodes.put(cur, cha);
heap.add(cur);
}
while (heap.size() != 1) {
Node a = heap.poll();
Node b = heap.poll();
Node h = new Node(a.count + b.count);
h.left = a;
h.right = b;
heap.add(h);
}
Node head = heap.poll();
fillForm(head, "", nodes, ans);
return ans;
}
public static void fillForm(Node head, String pre, HashMap<Node, Character> nodes, HashMap<Character, String> ans) {
if (nodes.containsKey(head)) {
ans.put(nodes.get(head), pre);
} else {
fillForm(head.left, pre + "0", nodes, ans);
fillForm(head.right, pre + "1", nodes, ans);
}
}
// 原始字符串str根据哈夫曼编码表转译成哈夫曼编码返回
public static String huffmanEncode(String str, HashMap<Character, String> huffmanForm) {
char[] s = str.toCharArray();
StringBuilder builder = new StringBuilder();
for (char cha : s) {
builder.append(huffmanForm.get(cha));
}
return builder.toString();
}
// 原始字符串的哈夫曼编码huffmanEncode根据哈夫曼编码表还原成原始字符串
public static String huffmanDecode(String huffmanEncode, HashMap<Character, String> huffmanForm) {
TrieNode root = createTrie(huffmanForm);
TrieNode cur = root;
char[] encode = huffmanEncode.toCharArray();
StringBuilder builder = new StringBuilder();
for (int i = 0; i < encode.length; i++) {
int index = encode[i] == '0' ? 0 : 1;
cur = cur.nexts[index];
if (cur.nexts[0] == null && cur.nexts[1] == null) {
builder.append(cur.value);
cur = root;
}
}
return builder.toString();
}
public static TrieNode createTrie(HashMap<Character, String> huffmanForm) {
TrieNode root = new TrieNode();
for (char key : huffmanForm.keySet()) {
char[] path = huffmanForm.get(key).toCharArray();
TrieNode cur = root;
for (int i = 0; i < path.length; i++) {
int index = path[i] == '0' ? 0 : 1;
if (cur.nexts[index] == null) {
cur.nexts[index] = new TrieNode();
}
cur = cur.nexts[index];
}
cur.value = key;
}
return root;
}
public static class TrieNode {
public char value;
public TrieNode[] nexts;
public TrieNode() {
value = 0;
nexts = new TrieNode[2];
}
}
// 为了测试
public static String randomNumberString(int len, int range) {
char[] str = new char[len];
for (int i = 0; i < len; i++) {
str[i] = (char) ((int) (Math.random() * range) + 'a');
}
return String.valueOf(str);
}
// 为了测试
public static void main(String[] args) {
// 根据词频表生成哈夫曼编码表
HashMap<Character, Integer> map = new HashMap<>();
map.put('A', 60);
map.put('B', 45);
map.put('C', 13);
map.put('D', 69);
map.put('E', 14);
map.put('F', 5);
map.put('G', 3);
HashMap<Character, String> huffmanForm = huffmanForm(map);
for (Entry<Character, String> entry : huffmanForm.entrySet()) {
System.out.println(entry.getKey() + " : " + entry.getValue());
}
System.out.println("====================");
// str是原始字符串
String str = "CBBBAABBACAABDDEFBA";
System.out.println(str);
// countMap是根据str建立的词频表
HashMap<Character, Integer> countMap = countMap(str);
// hf是根据countMap生成的哈夫曼编码表
HashMap<Character, String> hf = huffmanForm(countMap);
// huffmanEncode是原始字符串转译后的哈夫曼编码
String huffmanEncode = huffmanEncode(str, hf);
System.out.println(huffmanEncode);
// huffmanDecode是哈夫曼编码还原成的原始字符串
String huffmanDecode = huffmanDecode(huffmanEncode, hf);
System.out.println(huffmanDecode);
System.out.println("====================");
System.out.println("大样本随机测试开始");
// 字符串最大长度
int len = 500;
// 所含字符种类
int range = 26;
// 随机测试进行的次数
int testTime = 100000;
for (int i = 0; i < testTime; i++) {
int N = (int) (Math.random() * len) + 1;
String test = randomNumberString(N, range);
HashMap<Character, Integer> counts = countMap(test);
HashMap<Character, String> form = huffmanForm(counts);
String encode = huffmanEncode(test, form);
String decode = huffmanDecode(encode, form);
if (!test.equals(decode)) {
System.out.println(test);
System.out.println(encode);
System.out.println(decode);
System.out.println("出错了!");
}
}
System.out.println("大样本随机测试结束");
}
}