本文不描述Huffman编码的算法和理论知识。Huffman编码算法的描述请看文章《Huffman压缩算法》。本文只考虑Huffman编码的Java实现,代码中有些地方没有做性能优化。
PriorityQueue是一个排序的队列,是构建Huffman树的基础。
- package art.programming.huffman;
-
- //Frequency用来抽象每个字符出现频率
- public class Frequency implements Comparable<Frequency>{
- private int frequency;
- private Character value;
- private TreeNode treeNodeRef;
-
- public Frequency(Character value, int frequency){
- this.value = value;
- this.frequency = frequency;
- }
-
- public int getFrequency() {
- return frequency;
- }
-
- public Character getValue() {
- return value;
- }
-
- public void increase(){
- ++frequency;
- }
-
- @Override
- public int compareTo(Frequency another) {
- if (frequency != another.getFrequency()){
- return frequency - another.getFrequency();
- }
- if (value == null && another.getValue()!=null){
- return -1;
- }
-
- if (value != null && another.getValue()==null){
- return 1;
- }
-
- if (value == null & another.getValue() ==null){
- int i=0,j=0;
- TreeNode treeNode = treeNodeRef;
- TreeNode anotherTreeNode = another.getTreeNodeRef();
- while(treeNode.getLeft()!=null){
- i ++;
- treeNode = treeNode.getLeft();
- }
- while(anotherTreeNode.getLeft()!=null){
- j ++;
- anotherTreeNode = anotherTreeNode.getLeft();
- }
- return i - j;
- }
- return frequency - another.getFrequency();
- }
-
- public TreeNode getTreeNodeRef() {
- return treeNodeRef;
- }
-
- public void setTreeNodeRef(TreeNode treeNodeRef) {
- this.treeNodeRef = treeNodeRef;
- }
-
-
- public String toString(){
- return value + " : " + frequency;
- }
-
- }
-
计算字符出现的频率放到PriorityQueue中
- package art.programming.huffman;
-
- import java.util.HashMap;
- import java.util.Map;
- import java.util.PriorityQueue;
-
- public class FrequencyCalculator {
- private Map<Character, Frequency> frequencyMap;
-
- public FrequencyCalculator(){
- frequencyMap = new HashMap<Character, Frequency>();
- }
-
- public PriorityQueue<Frequency> calculate(String c){
- int len = c.length();
- for (int i=0; i<len; i++){
- Character character = c.charAt(i);
- Frequency frequency = frequencyMap.get(character);
- if (frequency == null){
- frequency = new Frequency(character, 1);
- frequencyMap.put(character, frequency);
- }else{
- frequency.increase();
- }
- }
- PriorityQueue<Frequency> priorityQueue = new PriorityQueue<Frequency>();
- for (Frequency frequency : frequencyMap.values()){
- priorityQueue.add(frequency);
- }
- return priorityQueue;
- }
- }
-
- package art.programming.huffman;
-
- import java.util.Collection;
- import java.util.HashMap;
- import java.util.Map;
- import java.util.PriorityQueue;
-
- public class HuffmanTree {
-
- private TreeNode root;
-
- private Map<Character, Encoding> encodings = new HashMap<Character, Encoding>();
-
-
- public void build(PriorityQueue<Frequency> prioQueue) {
- while (prioQueue.size() >= 2) {
- //Poll the first element, then build the left node
- Frequency first = prioQueue.poll();
- TreeNode leftNode = buildNode(first);
-
- //Poll the second element, then build the right node
- Frequency second = prioQueue.poll();
- TreeNode rightNode = buildNode(second);
-
- //Build the sub-tree
- TreeNode node = new TreeNode();
- node.setLeft(leftNode);
- node.setRight(rightNode);
- node.setValue(null);
-
- //Put the calculated frequency back to the priority queue
- int freq = first.getFrequency() + second.getFrequency();
- Frequency frequency = new Frequency(null, freq);
- frequency.setTreeNodeRef(node);
- prioQueue.add(frequency);
-
- if (prioQueue.size()==1){
- //byte rootscore = 0xf;
- //rootscore = (byte) (rootscore << 1);
-
- root = node;
- //root.setScore(rootscore);
- }
- }
- }
-
- public Map<Character, Encoding> getEncodingTable(){
- traverse();
- Collection<Encoding> coll = encodings.values();
- for (Encoding encoding : coll){
- System.out.println(encoding.getBits() + " " + encoding.getValue());
- }
- return encodings;
- }
-
- private void traverse(TreeNode node){
- if (node != null){
- if (node.getLeft()!=null){
- byte i = node.getScore();
- i = (byte) (i << 1);
- node.getLeft().setScore(i);
- }
- if (node.getRight()!=null){
- byte i = node.getScore();
- i = (byte) ((i << 1) + 1);
- node.getRight().setScore(i);
- }
-
- traverse(node.getLeft());
-
- if (node.getValue() != null){
- System.out.println(node.getValue());
- System.out.println(node.getScore());
- Encoding encoding = new Encoding();
- encoding.setBits(node.getScore());
- encoding.setValue(node.getValue());
- encodings.put(node.getValue(), encoding);
-
- }
- traverse(node.getRight());
- }
- }
-
-
- private void traverse(){
- traverse(this.root);
- }
-
-
-
- private TreeNode buildNode(Frequency freq){
- if (freq.getTreeNodeRef() != null){
- return freq.getTreeNodeRef();
- }
- TreeNode node = new TreeNode();
- node.setValue(freq.getValue());
- return node;
- }
-
- public TreeNode getRoot(){
- return root;
- }
- }
-
Huffman树节点结构
- package art.programming.huffman;
-
- public class TreeNode {
-
- private TreeNode left;
-
- private TreeNode right;
-
- private Character value;
-
- private byte score = 0xf;
-
- public void setValue(Character value){
- this.value = value;
- }
-
- public TreeNode getLeft() {
- return left;
- }
-
- public void setLeft(TreeNode left) {
- this.left = left;
- }
-
- public TreeNode getRight() {
- return right;
- }
-
- public void setRight(TreeNode right) {
- this.right = right;
- }
-
- public Character getValue() {
- return value;
- }
-
- public byte getScore() {
- return score;
- }
-
- public void setScore(byte score) {
- this.score = score;
- }
-
- public String toString(){
- return value + " : " + score;
- }
- }
-
Huffman编码字段结构
- package art.programming.huffman;
-
- public class Encoding {
- private Character value;
- private byte bits;
-
- public Character getValue() {
- return value;
- }
-
- public void setValue(Character value) {
- this.value = value;
- }
-
- public byte getBits() {
- return bits;
- }
-
- public void setBits(byte bits) {
- this.bits = bits;
- }
- }
-
- package art.programming.huffman;
-
- import java.util.Collection;
- import java.util.PriorityQueue;
-
- public class HuffmanEncoding {
-
- HuffmanTree huffmanTree = new HuffmanTree();
-
- public byte[] encode(String toEncode){
- //Calculate the frequency
- PriorityQueue<Frequency> frequencyPrioQueue
- = new FrequencyCalculator().calculate(toEncode);
- huffmanTree.build(frequencyPrioQueue);
- byte[] bytes = new byte[toEncode.length()];
-
- for (int i=0; i<toEncode.length(); i++){
- Encoding encoding = huffmanTree.getEncodingTable().get(toEncode.charAt(i));
- bytes[i] = encoding.getBits();
- }
- return bytes;
- }
-
- public String decode(byte[] bytes){
- Collection<Encoding> encodings = huffmanTree.getEncodingTable().values();
- char[] chars = new char[bytes.length];
- for (int i = 0; i<bytes.length;i ++){
- byte b = bytes[i];
- for (Encoding encoding : encodings){
- if (encoding.getBits() == b){
- chars[i] = encoding.getValue().charValue();
- }
- }
- }
- return new String(chars);
- }
- }
-
- package art.programming.huffman;
-
- import java.io.File;
- import java.io.FileOutputStream;
- import java.io.IOException;
-
- import org.junit.Test;
-
- public class HuffmanEncodingTest {
-
- @Test
- public void encodeAndDecode() throws IOException{
- HuffmanEncoding encoding = new HuffmanEncoding();
- //byte[] bytes = encoding.encode("beep boop beer!");
- String toEncode = "歌舞青春 舞蹈 舞出我人生 好电影啊, 哈哈,我爱看电影";
- byte[] bytes = encoding.encode(toEncode);
- FileOutputStream fileOutputStream = new FileOutputStream(new File("/home/alex/odbytes"));
- FileOutputStream fileOutputStreamOrigin = new FileOutputStream(new File("/home/alex/original"));
- try {
- fileOutputStream.write(bytes);
- fileOutputStreamOrigin.write(toEncode.getBytes());
- } catch (IOException e) {
- e.printStackTrace();
- }finally{
- fileOutputStream.close();
- fileOutputStreamOrigin.close();
- }
-
- String original = encoding.decode(bytes);
- System.out.println(original);
- }
-
- }
-
压缩前
- alex@ubuntu:~$ od original
- 0000000 126746 164214 117210 116751 163222 122630 164040 117210
- 0000020 134750 020210 104350 162636 135207 104346 162221 135272
- 0000040 112347 020237 122745 163675 132624 136745 162661 105225
- 0000060 136357 020214 111745 162610 104223 136357 163214 110610
- 0000100 104347 163661 105634 112347 162665 130675
- 0000114
-
压缩后
- alex@ubuntu:~$ od odbytes
- 0000000 075373 173346 075177 077764 175172 171366 077747 173765
- 0000020 174170 077770 174771 173370 174767 074367
- 0000034