编程练习——Huffman编码
这个类的名字叫huffman Tree,但是不仅仅是生成huffman Tree,因为Huffman树的生成要使用优先队列,也就是堆。stl中其实有这个的实
现。但是我机器里的vs2005这个堆的使用有点小bug(估计是stl中使用的就是数组存储,而且数组一旦固定大小,就再也无法改变,其实这样实现失去了堆的某些特
性。 _ 所以我还是自己实现了一下 _
。顺便练习下。)。刚开始的时候
就采用int作为编码的存储单元。后来发现huffman是可变长编码。所以使用int作为编码单元就失去了huffman的特性。于是寻找bit数组。找到了bit
set这个stl类,却发现这个是不可变的。无语。只得自己实现一个可变大小的 _ bit vector _
。然后近乎重写了这个Huffman的编码和解码。不过也不是很难。在这之间又陆陆续续的发现了一系列的bug和编译错误。对以后的编程大有启发。
- /* created by chico chen
- * date 2008/10/25
- */
-
-
- #ifndef HUFFMAN_TREE
-
- #define HUFFMAN_TREE
- #include “Heap.h”
- #include “TreeNode.h”
- #include “BinaryTree.h”
- #include
- #include “…/…/ArithmeticTest/ArithmeticTest/BITVector.h”
- using namespace std;
-
- // this class is used for the combin function in huffmanTree class.
- class IntCombin
- {
- public :
- static int Add( int i, int j)
- {
- return i+j;
- }
- };
-
- // this class for compare tree node
- template < class T>
- class TreeNodeCompare
- {
- public :
- static bool Up(TreeNode
* d1, TreeNode * d2) - {
- return Lt(d1->getData(),d2->getData());
- }
-
- static bool Eq(T& d1, T& d2)
- {
- if (d1 == d2)
- return true ;
- return false ;
- }
-
- static bool Gt(T& d1, T& d2)
- {
- if (d1 > d2)
- return true ;
- return false ;
- }
-
- static bool Lt(T& d1, T& d2)
- {
- if (d1 < d2)
- return true ;
- return false ;
- }
- };
-
- // this class is for storing huffman code.
- // bits and the length of bits
- class HuffmanCode
- {
- public :
- BITVector bits;
- int len;
- HuffmanCode()
- {
- len = 0;
- }
-
- HuffmanCode( const HuffmanCode & codes):bits(codes.bits)
- {
- len = codes.len;
-
- }
-
- const HuffmanCode& operator=( const HuffmanCode & codes)
- {
- if ( this != &codes)
- {
- this ->len = codes.len;
- this ->bits = codes.bits;
- }
- return * this ;
- }
-
- ~HuffmanCode()
- {
-
- }
- };
-
- template < class T, class Cmp, class Combin>
- class HuffmanTree: public BinaryTree
- {
- private :
- // store the data, and sort it, then for building a huffman tree
- Heap<TreeNode
* ,Cmp> huffmanQueue; -
- const unsigned int mask;
- // the max number can be shifted, if it is unsigned int then it will be 32.
- const int maxShiftNum;
-
-
- TreeNode
* initTree()
- TreeNode
-
- // init the huffman tree
- {
- TreeNode
* combinNode = NULL; - T tempData;
- while (! this ->huffmanQueue.IsEmpty())
- {
- // fetch two small data and generate a large data
- TreeNode
* node1 = this ->huffmanQueue.Top(); - combinNode = node1;
- this ->huffmanQueue.RemoveTop();
- if (! this ->huffmanQueue.IsEmpty())
- {
- TreeNode
* node2 = this ->huffmanQueue.Top(); - this ->huffmanQueue.RemoveTop();
- tempData = Combin::Add(node1->getData(),node2->getData());
-
-
- if (Cmp::Lt(node1->getData(),node2->getData()))
-
- {
- // here is comparing the data1 of node1 and the data2 of node2
- // Cmp:Lt means ‘<’ and Cmp:Gt means ‘>’
-
- combinNode = new TreeNode
(tempData,node1,node2);
- combinNode = new TreeNode
- }
- else
- {
- combinNode = new TreeNode
(tempData,node2,node1); - }
- this ->huffmanQueue.Insert(combinNode);
- }
- else
- {
- break ;
- }
-
- }
- return combinNode;
- }
-
-
-
- // the final huffman code
-
-
- map<T,HuffmanCode> huffmanTable;
-
-
- void SelfEncode(TreeNode
* subRoot,unsigned int bitcode, int num,map<T,HuffmanCode > &ht)
- void SelfEncode(TreeNode
-
- // encoding the data from a huffman tree.
- // The left branch is 0, and the right branch is 1
- {
-
- if (subRoot != NULL)
- {
-
- if (subRoot->left == NULL && subRoot->right==NULL)
- {
- HuffmanCode hc;
- hc.bits.SetInt(bitcode,0,maxShiftNum-num);
- hc.len = maxShiftNum-num;
- ht[subRoot->getData()] = hc;
- return ;
- }
- else
- {
- if (num<=0)
- {
- throw “the huffman is too deep!” ;
- }
-
-
- SelfEncode(subRoot->left,bitcode,num-1,ht);
-
- bitcode = bitcode | (0x80000000 >> (maxShiftNum-num));
- SelfEncode(subRoot->right,bitcode,num-1,ht);
- }
- }
-
-
- }
-
-
-
- void DecodeFromTree(TreeNode
* subRoot,BITVector& bits, int index, const int & len,vector & decode)
- void DecodeFromTree(TreeNode
-
- // decoding the code. use the code to search the data from the huffmanTree.
- // here you can create you own map<code,data> deHuffmanTable.
- // but I can not analyze the space the table will use,
- // so I find the data by searching huffman tree
- {
- if (index == len)
- {
- if (subRoot->getLeft()==NULL && subRoot->getRight() == NULL)
- {
-
- decode.push_back(subRoot->getData());
- return ;
- }
- throw “code length is not match the bits length. Huffman Tree construct error.” ;
- }
- if (subRoot == NULL)
- {
- throw “decode error!” ;
-
- }
- else if (subRoot->getLeft() == NULL && subRoot->getRight() == NULL)
- {
-
- decode.push_back(subRoot->getData());
- return DecodeFromTree( this ->root,bits,index,len,decode);
- }
-
- if (bits.Get(index) == 0)
- {
- return DecodeFromTree(subRoot->getLeft(),bits,index+1,len,decode);
- }
- else if (bits.Get(index)==1)
- {
-
- return DecodeFromTree(subRoot->getRight(),bits,index+1,len,decode);
- }
- else
- {
- throw “code error!” ;
- }
-
-
- }
-
-
-
- unsigned int FindCodeFromTable(T& data)
-
- // find the data code from huffman table
- // may be not efficient.
- {
- return this ->huffmanTable[data];
- }
-
- public :
-
- HuffmanTree(T t[], int n):
- BinaryTree
(),mask(0x80000000),maxShiftNum( sizeof (unsigned int )*8) - // the array t type is T, and the number is n
- {
- if (n == 0)
- return ;
- TreeNode
* node; - for ( int i = 0; i < n; i++)
- {
- node = new TreeNode
(t[i]); - this ->huffmanQueue.Insert(node);
- }
- this ->root = initTree();
- }
-
- ~HuffmanTree()
- {
- // destroy
-
- }
-
-
- void SelfEncode()
-
- // convert the huffmanTree into huffmanTable
- // unsigned int code is 32 bits, so the huffman tree has only less than 33 layer.
- {
- string s= “” ;
-
- SelfEncode( this ->root,0,maxShiftNum, this ->huffmanTable);
-
- }
-
-
-
- void Decode(HuffmanCode& huffmanCode,vector
& decode)
- void Decode(HuffmanCode& huffmanCode,vector
-
-
- // use bit to find the data node.
- {
-
- return DecodeFromTree( this ->root,huffmanCode.bits,0,huffmanCode.len,decode);
-
- }
-
- HuffmanCode Encode(T info[], int n)
- // n is size
- {
- HuffmanCode hc;
-
- for ( int i = 0; i < n; i++)
- {
-
- hc.bits.SetBitVector(((HuffmanCode) this ->huffmanTable[info[i]]).bits,hc.len,((HuffmanCode) this ->huffmanTable[info[i]]).len);
- hc.len +=((HuffmanCode) this ->huffmanTable[info[i]]).len;
- }
-
- return hc;
- }
-
-
- void PrintHuffmanTable()
-
- // print the huffman table
- // print the pair data<->code
- {
- int len = this ->huffmanTable.size();
- cout << “i/tdata/tcode/n” ;
- int count = 0;
- map<T,HuffmanCode>::iterator i= this ->huffmanTable.begin();
- for (; i != this ->huffmanTable.end(); i++)
- {
- cout << count++<< “/t” <<(*i).first<< “/t” ;
- (*i).second.bits.PrintfZeroOne(0,(*i).second.len);
- cout<<endl;
- }
-
- }
-
- };
-
-
- #endif
-
这个类设计方面还有些不足之处。比如encode方法
测试一下:
- int a[10] = {12,224,33,32,1,91,35,34,36,291};
- HuffmanTree< int ,TreeNodeCompare< int >,IntCombin> ht(a,10);
- ht.SelfEncode();
- ht.PrintHuffmanTable();
- ht.printTree();
- cout << endl;
- int info[] = {33,34,33};
- HuffmanCode hc = ht.Encode(info,3);
- hc.bits.PrintfZeroOne(0,hc.len);
- vector< int > code;
- ht.Decode(hc,code);
- ** for ** ( int i = 0; i != code.size();i++)
- {
-
- cout<<code[i]<<endl;
- }
-
- ** return ** 0;