package class46; import java.util.Comparator; import java.util.HashMap; import java.util.Map.Entry; import java.util.PriorityQueue; // 本文件不牵扯任何byte类型的转化 // 怎么转byte自己来，我只负责huffman算法本身的正确实现 // 字符串为空的时候，自己处理边界吧 // 实现的代码通过了大样本随机测试的对数器 // 可以从main函数的内容开始看起 public class Code05_HuffmanTree { // 根据文章str, 生成词频统计表 public static HashMap countMap(String str) { HashMap ans = new HashMap<>(); char[] s = str.toCharArray(); for (char cha : s) { if (!ans.containsKey(cha)) { ans.put(cha, 1); } else { ans.put(cha, ans.get(cha) + 1); } } return ans; } public static class Node { public int count; public Node left; public Node right; public Node(int c) { count = c; } } public static class NodeComp implements Comparator { @Override public int compare(Node o1, Node o2) { return o1.count - o2.count; } } // 根据由文章生成词频表countMap，生成哈夫曼编码表 // key : 字符 // value: 该字符编码后的二进制形式 // 比如，频率表 A：60, B:45, C:13 D:69 E:14 F:5 G:3 // A 10 // B 01 // C 0011 // D 11 // E 000 // F 00101 // G 00100 public static HashMap huffmanForm(HashMap countMap) { HashMap ans = new HashMap<>(); if (countMap.size() == 1) { for (char key : countMap.keySet()) { ans.put(key, "0"); } return ans; } HashMap nodes = new HashMap<>(); PriorityQueue heap = new PriorityQueue<>(new NodeComp()); for (Entry entry : countMap.entrySet()) { Node cur = new Node(entry.getValue()); char cha = entry.getKey(); nodes.put(cur, cha); heap.add(cur); } while (heap.size() != 1) { Node a = heap.poll(); Node b = heap.poll(); Node h = new Node(a.count + b.count); h.left = a; h.right = b; heap.add(h); } Node head = heap.poll(); fillForm(head, "", nodes, ans); return ans; } public static void fillForm(Node head, String pre, HashMap nodes, HashMap ans) { if (nodes.containsKey(head)) { ans.put(nodes.get(head), pre); } else { fillForm(head.left, pre + "0", nodes, ans); fillForm(head.right, pre + "1", nodes, ans); } } // 原始字符串str，根据哈夫曼编码表，转译成哈夫曼编码返回 public static String huffmanEncode(String str, HashMap huffmanForm) { char[] s = str.toCharArray(); StringBuilder builder = new StringBuilder(); for (char cha : s) { builder.append(huffmanForm.get(cha)); } return builder.toString(); } // 原始字符串的哈夫曼编码huffmanEncode，根据哈夫曼编码表，还原成原始字符串 public static String huffmanDecode(String huffmanEncode, HashMap huffmanForm) { TrieNode root = createTrie(huffmanForm); TrieNode cur = root; char[] encode = huffmanEncode.toCharArray(); StringBuilder builder = new StringBuilder(); for (int i = 0; i < encode.length; i++) { int index = encode[i] == '0' ? 0 : 1; cur = cur.nexts[index]; if (cur.nexts[0] == null && cur.nexts[1] == null) { builder.append(cur.value); cur = root; } } return builder.toString(); } public static TrieNode createTrie(HashMap huffmanForm) { TrieNode root = new TrieNode(); for (char key : huffmanForm.keySet()) { char[] path = huffmanForm.get(key).toCharArray(); TrieNode cur = root; for (int i = 0; i < path.length; i++) { int index = path[i] == '0' ? 0 : 1; if (cur.nexts[index] == null) { cur.nexts[index] = new TrieNode(); } cur = cur.nexts[index]; } cur.value = key; } return root; } public static class TrieNode { public char value; public TrieNode[] nexts; public TrieNode() { value = 0; nexts = new TrieNode[2]; } } // 为了测试 public static String randomNumberString(int len, int range) { char[] str = new char[len]; for (int i = 0; i < len; i++) { str[i] = (char) ((int) (Math.random() * range) + 'a'); } return String.valueOf(str); } // 为了测试 public static void main(String[] args) { // 根据词频表生成哈夫曼编码表 HashMap map = new HashMap<>(); map.put('A', 60); map.put('B', 45); map.put('C', 13); map.put('D', 69); map.put('E', 14); map.put('F', 5); map.put('G', 3); HashMap huffmanForm = huffmanForm(map); for (Entry entry : huffmanForm.entrySet()) { System.out.println(entry.getKey() + " : " + entry.getValue()); } System.out.println("===================="); // str是原始字符串 String str = "CBBBAABBACAABDDEFBA"; System.out.println(str); // countMap是根据str建立的词频表 HashMap countMap = countMap(str); // hf是根据countMap生成的哈夫曼编码表 HashMap hf = huffmanForm(countMap); // huffmanEncode是原始字符串转译后的哈夫曼编码 String huffmanEncode = huffmanEncode(str, hf); System.out.println(huffmanEncode); // huffmanDecode是哈夫曼编码还原成的原始字符串 String huffmanDecode = huffmanDecode(huffmanEncode, hf); System.out.println(huffmanDecode); System.out.println("===================="); System.out.println("大样本随机测试开始"); // 字符串最大长度 int len = 500; // 所含字符种类 int range = 26; // 随机测试进行的次数 int testTime = 100000; for (int i = 0; i < testTime; i++) { int N = (int) (Math.random() * len) + 1; String test = randomNumberString(N, range); HashMap counts = countMap(test); HashMap form = huffmanForm(counts); String encode = huffmanEncode(test, form); String decode = huffmanDecode(encode, form); if (!test.equals(decode)) { System.out.println(test); System.out.println(encode); System.out.println(decode); System.out.println("出错了!"); } } System.out.println("大样本随机测试结束"); } }