文件壓縮

 Heap.h:
#pragmaonce
#include<vector>
#include<assert.h>

//仿函數
template<class T>
//小堆
struct Less
{
           bool operator()(const T& l, const T& r)
          {
                    return l < r;
          }
};

template<class T>
//大堆
struct Greater
{
           bool operator()(const T& l, const T& r)
          {
                    return l > r;
          }
};


template<class T, class Compare = Less <T>>
class Heap
{
public:
          Heap()
          {}

          Heap( const T * a, size_t size)
          {
                   _a.resize( size);
                    for (int i = 0; i < size; ++i)
                   {
                             _a.push_back( a[i]);
                   }
                    //建堆
                    for (int i = (_a.size() - 2) / 2; i >= 0; i--)
                   {
                             _AdjustDown(i, size);
                   }
          }

           void Push(const T& x)
          {
                   _a.push_back( x);
                   _AdjustUp(_a.size()-1);
          }

           void Pop()
          {
                    assert(_a.size()> 0);
                   swap(_a[0], _a[_a.size() - 1]);
                   _a.pop_back();
                   _AdjustDown(0, _a.size());
          }

           T& Top()
          {
                    assert(_a.size()>0);
                    return _a[0];
          }

           int Size()
          {
                    return _a.size();
          }

           bool Empty()
          {
                    return _a.empty();
          }

           //向下調整
           void _AdjustDown(size_t parent, size_t size )
          {
                    size_t child = 2 * parent + 1;//子節點
                    while (child < size )
                   {
                              Compare _com;
                              if (child + 1 < size
                                      && _com(_a[child + 1], _a[child]))//右子樹存在且大於左子樹
                             {
                                      ++child;
                             }
                              //大堆:父節點小於子節點,則交換
                              //小堆:父節點大於子節點,則交換
                              if (_com(_a[child], _a[parent ]))
                             {
                                      swap(_a[child], _a[ parent]);
                                       parent = child;//向下調整
                                      child = 2 * parent + 1;
                             }
                              else//已有序
                             {
                                       break;
                             }
                   }
          }

           //向上調整
           void _AdjustUp(int child)
          {
                    size_t parent = (child - 1) / 2;//父節點
                    while (child > 0)
                   {
                              Compare _com;
                              //大堆:父節點小於子節點,則交換
                              //小堆:父節點大於子節點,則交換
                              if (_com(_a[child ], _a[parent]))
                             {
                                      swap(_a[ child], _a[parent]);
                                       child = parent;//向上調整
                                      parent = ( child - 1) / 2;
                             }
                              else
                             {
                                       break;
                             }
                   }
          }

           vector<T > _a;
};
#####################################################################################
Huffman.h:
#pragma once
#include<assert.h>
#include"Heap.h"

template<class T>
struct HuffmanNode
{
           HuffmanNode<T >* _left;
           HuffmanNode<T >* _right;
           T _weight;

          HuffmanNode( const T & w)
                   :_left( NULL)
                   ,_right( NULL)
                   , _weight( w)
          {}
};

template<class T>
class HuffmanTree
{
           typedef HuffmanNode <T> Node;
public:
          HuffmanTree()
                   :_root( NULL)
          {}

          HuffmanTree( const T * a, size_t size,const T& invalid)
          {
                   _root = CreateTree( a, size , invalid);
          }


           Node* CreateTree(const T* a, size_t size,const T& invalid)
          {
                    //仿函數
                    struct Compare
                   {
                              bool operator()(const Node* l,const Node* r)
                             {
                                       return l ->_weight < r->_weight;
                             }
                   };

                    assert(a);
                    //小堆
                    Heap<Node *, Compare>  minHeap;
                    for (size_t i = 0; i < size; ++i)
                   {
                              if (a [i] != invalid)
                             {
                                      minHeap.Push( new Node (a[i]));
                             }
                   }
                    //生成HuffmanTree
                    while (minHeap.Size()>1)
                   {
                              //選出當前最小的兩個節點
                              Node* left = minHeap.Top();
                             minHeap.Pop();
                              Node* right = minHeap.Top();
                             minHeap.Pop();

                              //權值相加,生成父節點
                              Node* parent = new Node(left->_weight + right->_weight);
                             parent->_left = left;
                             parent->_right = right;
                             minHeap.Push(parent);
                   }
                    return minHeap.Top();
          }

           //獲得根節點
           Node* GetRootNode()
          {
                    return _root;
          }
protected:
           Node* _root;
};
#########################################################################################
FileCompress.h:
#include<string>
#include"Huffman.h"
#include<assert.h>

typedef unsigned long long LongType ;

struct CharInfo
{
           unsigned char _ch;//字符
           LongType _count;  //出現次數
           string _code;     //Huffman code

          CharInfo( const LongType count=0)
                   :_ch(0)
                   , _count( count)
          {}

           CharInfo operator+(const CharInfo& fi)const
          {
                    return CharInfo (_count + fi._count);
          }

           bool operator!=(const CharInfo fi)const
          {
                    return _count != fi ._count;
          }

           bool operator<(const CharInfo& fi)const
          {
                    return _count < fi ._count;
          }
};


template<class T>
class FileCompress
{
public:
          FileCompress()
          {
                    for (int i = 0; i < 256; i++)
                   {
                             _infos[i]._ch = i;
                             _infos[i]._count = 0;
                   }
          }
public:
           //壓縮
           void Compress(const char* filename)
          {
                    //統計字符個數
                    assert(filename);
                    FILE* fOut = fopen(filename , "rb");
                    assert(fOut);//判斷是否讀取成功

                    char ch = fgetc(fOut);//unsigned char ch=fgetc(fOut);不可行
                    while (!feof(fOut))//EOF值爲-1,與無符號char比較會造成死循環
                   {
                             _infos[( unsigned char )ch]._count++;
                             ch = fgetc(fOut);
                   }

                    //構建HuffmanTree
                    CharInfo invalid(0);
                    HuffmanTree<CharInfo > tree(_infos, 256, invalid);

                    //生成Huffman code
                    string code;//編碼
                   GenerateHuffmanCode(tree.GetRootNode(), code);

                    //讀取源文件,壓縮
                    string compress = filename ;
                   compress += ".compress";//加上壓縮文件後綴
                    FILE* fIn = fopen(compress.c_str(), "wb" );
                    assert(fIn);

                   fseek(fOut, 0, SEEK_SET);//文件指針
                    char value = 0;
                    int pos = 0;
                   ch = fgetc(fOut); //讀取字符
                    while (!feof(fOut))
                   {
                              //取出Huffman code
                              string& code = _infos[(unsigned char)ch]._code;
                              for (size_t i = 0; i < code.size(); ++i)
                             {
                                      value <<= 1;
                                       //將字符串轉化成對應的碼,存入fIn壓縮文件中
                                       if (code[i] == '1' )
                                      {
                                                value |= 1;
                                      }
                                       //8個字節
                                       if (++pos == 8)
                                      {
                                                fputc(value, fIn);
                                                value = 0;
                                                pos = 0;
                                      }
                             }
                             ch = fgetc(fOut);
                   }
                    if (pos)//不足8爲,後補0
                   {
                             value <<= (8 - pos);
                             fputc(value, fIn);
                   }

                    //配置文件
                    string config = filename ;
                   config += ".config";//配置文件後綴
                    FILE* fConfig = fopen(config.c_str(), "wb");
                    assert(fConfig);

                    char countStr[128];//存儲轉化後的編碼
                    string str;
                    for (size_t i = 0; i < 256; ++i)
                   {
                              //記錄字符出現次數
                              if (_infos[i]._count>0)
                             {
                                      str += _infos[i]._ch;
                                      str += ',';
                                       //將字符記錄轉換爲10進制,存在countStr中
                                      _itoa(_infos[i]._count, countStr, 10);
                                      str += countStr;
                                      str += '\n';
                             }
                              //將字符串寫入配置文件
                             fputs(str.c_str(), fConfig);
                             str.clear();
                   }
                    //關閉文件
                   fclose(fOut);
                   fclose(fIn);
                   fclose(fConfig);
          }

           //解壓縮
           void UnCompress(const char* filename)
          {
                    //配置文件
                    string config = filename ;
                   config += ".config";
                    FILE* fConfig = fopen(config.c_str(), "rb");
                    assert(fConfig);

                    string tmp;
                    while (ReadLine(fConfig, tmp))
                   {
                              if (!tmp.empty())
                             {
                                       //收集字符
                                      _infos[( unsigned char )tmp[0]]._count = atoi(tmp.substr(2).c_str());
                                      tmp.clear();
                             }
                              else
                             {
                                      tmp += '\n';
                             }
                   }

                    //重建Huffman樹
                    CharInfo invalid(0);
                    HuffmanTree<CharInfo >ht(_infos, 256, invalid);

                    //讀壓縮文件
                    string compress = filename ;
                   compress += ".compress";
                    FILE* fOut = fopen(compress.c_str(), "rb");
                    assert(fOut);

                    //生成解壓文件
                    string UnCompress = filename ;
                   UnCompress += ".uncompress";
                    FILE* fIn = fopen(UnCompress.c_str(), "wb");
                    assert(fIn);

                    unsigned char ch = fgetc(fOut);
                    HuffmanNode<CharInfo >* root = ht.GetRootNode();
                    HuffmanNode<CharInfo >* cur = root;
                    int pos = 8;

                    LongType charCount = root->_weight._count;
                    while (!feof(fOut))
                   {
                              if (ch & 1 << (pos-1))
                             {
                                      cur = cur->_right;
                             }
                              else
                             {
                                      cur = cur->_left;
                             }
                              if (cur->_left == NULL && cur->_right == NULL)
                             {
                                      fputc(cur->_weight._ch, fIn);
                                      cur = root;
                                       if (--charCount == 0)
                                      {
                                                 break;
                                      }
                             }
                             --pos;
                              if ( pos == 0)
                             {
                                      pos = 8;
                                      ch = fgetc(fOut);
                             }
                   }
                   fclose(fConfig);
                   fclose(fIn);
                   fclose(fOut);
          }

           //生成Huffman編碼
           void  GenerateHuffmanCode(HuffmanNode <CharInfo>* root,string code)
          {
                    if (root == NULL)
                   {
                              return;
                   }
                    //遞歸左右節點,生成Huffman Code
                    if (root ->_left)
                   {
                             GenerateHuffmanCode( root->_left, code + '0');
                   }
                    if (root ->_right)
                   {
                             GenerateHuffmanCode( root->_right, code + '1');
                   }
                    if (root ->_left == NULL&& root->_right == NULL )
                   {
                              //將得到的葉結點編碼存入數組中節點位置的Huffman code中
                             _infos[ root->_weight._ch]._code = code ;
                   }
                    //cout << _infos[root->_weight._ch]._ch << code << endl;
          }

           //按行讀
           bool ReadLine(FILE * fConfig, string& tmp )
          {
                    assert(fConfig);
                    char ch = fgetc(fConfig );
                    if (feof(fConfig ))
                   {
                              return false ;
                   }
                    while (ch != '\n' )
                   {
                              tmp += ch;
                             ch = fgetc( fConfig);
                   }
                    return true ;
          }
          
protected:
           CharInfo _infos[256];
};
  
######################################################################################
test.cpp:
#include<iostream>
using namespace std;

#include"FileCompress.h"
#include"Heap.h"

void test()
{
           /*FileCompress<CharInfo> fc;
          fc.Compress("input.txt");
          cout << "壓縮成功" << endl;

          fc.UnCompress("input.txt");
          cout << "解壓成功" << endl;
*/
           FileCompress<CharInfo > fcb;
          fcb.Compress( "Input.BIG");
          cout << "壓縮成功" << endl;

          fcb.UnCompress( "Input.BIG");
          cout << "解壓成功" << endl;
}

int main()
{
          test();
           //Heap<int> hp;
           //hp.Push(20);
           //hp.Push(30);
           //hp.Push(10);
           //hp.Push(50);
          system( "pause");
           return 0;
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章