基於Huffman的C++解壓縮文件程序

//*******************************controll.h**********************
//*****************壓縮命令和解壓命令**********

#ifndef CONTROLL_H
#define CONTROLL_H

class Controll //:public HuffmanTree
{
public:
    void compress_file(const char *,const char *,const char *);
    void uncompress_file(const char *,const char *,const char *);
    void freFromFile(const char *,char **,char *,int *);
    int powmy(int,int);
};

#endif //controll.h end


//*******************************huffman.h**********************
//*****************葉子結點爲n的哈夫曼樹共有2n-1個結點**********

#ifndef HUFFMAN_H
#define HUFFMAN_H

class HuffmanNode {
public:
	char info;                  //結點信息
	double weight;              //結點權值
	int parent, lchild, rchild; //父親結點,左右孩子結點
	HuffmanNode() {
		parent=lchild=rchild=-1;
	}
	HuffmanNode(const char &data, const double &wt, const int &pa=-1, const int &lch=-1, const int &rch=-1) {
		info=data;
		weight=wt;
		parent=pa;
		lchild=lch;
		rchild=rch;
	}
}; //class HuffmanNode end


/*現在我把它調出來爲的就是在controlll類當中使用這個類*/
class Code { //HuffmanTree的私有類,編碼類
	public:
		Code():length(10) { ptr=new char[length]; }
		~Code() { delete[] ptr; }
		char *ptr;
		const int length;
};

class HuffmanTree {
public:
	HuffmanTree(const int &s=100) {
		maxSize=(s>100?s:100);
		arrayTree=new HuffmanNode[maxSize];
		currentSize=0;
		codeArray=0;
	}
	~HuffmanTree() {
		delete[] arrayTree;
		if (codeArray!=0)
			delete[] codeArray;
	}
	void run(const char*, const char*);
    int getSumBytes();//定義待壓縮文件的總bytes數
    int currentSize; //當前數組大小
    HuffmanNode *arrayTree; //哈夫曼結點數組
    Code *codeArray; //數組大小爲currentSize
    int sum_bits;//定義壓縮後文件的總bit數
private:

	int maxSize; //數組最大值

                    //int sum_bytes;
	void insert(const char&, const double&); //插入結點
	void createHuffmanTree(); //創建哈夫曼樹
	void createHuffmanCode(); //創建哈夫曼編碼
	void writeCodeToFile(const char *);//將Huffman編碼寫入到詞頻表文件當中
	int findPosition(const char &) const; //返回字符在arrayTree[]中的位置
	int isEqual(const char *s) const; //判斷s是否存在於編碼系統中,若存在則返回s在編碼系統中的位置,否則返回-1
	void reverse(char arr[]);
}; //class HuffmanTree end

#endif //huffman.h end

//**************************controll.cpp**********************

#include <iostream>
#include <limits>
#include <cstring>
#include<cstdlib>
#include <cstdio>
#include "controll.h"
#include "huffman.h"
using namespace std;

int Controll::powmy(int a,int b)
{
    if(b==0) return 1;
    int i = 0;
    int result = 1;
    for(;i<b;i++)
    {
        result *=a;
    }
    return result;
}
void Controll::freFromFile(const char *codeFilename,char **HC,char value[],int res[])
{
    FILE *fe = fopen(codeFilename,"rb");
    if(fe == NULL)
    {
        printf("打開文件失敗!");
        return;
    }
    int num[10];
    int m,i;
    char * cd = (char *)malloc((100+1)*sizeof(char));  //臨時保存的編碼文件名
    char  c;
    for(i=1;;i++)
    {
        c = fgetc(fe);
        if(c=='#') break;
        int j = 0;
        while(c!=' ')
        {
            cd[j++] = c;
            c = fgetc(fe);
        }
        cd[j] = '\0';
        HC[i] = (char *)malloc((j+1)*sizeof(char));
        strcpy(HC[i],&cd[0]);
        /*以上完成了huffman編碼的讀出  下面將Huffman編碼對應的字符寫進codeArray*/
        c = fgetc(fe);
        value[i] = c;
        fgetc(fe);
    }

    int k;
    c = fgetc(fe);  //頭一個讀取#,後一個纔開始讀取數據
    k = 0;
    while(c!='#')
    {
        num[k++] = c-'0';
        c = fgetc(fe);
    }

    m = 0;
    res[0] = 0;
    for(k=k-1;k>=0;k--)
    {
        //printf("powmy(10,m)=%d ",powmy(10,m));
        res[0]+=(num[k]*powmy(10,m));
        //printf("%d sum_bit=%d  m=%d ",num[k],sum_bit,m);
        m = m + 1;
    }
    printf("sum_bits=%d  ",res[0]);


    c = fgetc(fe);  c = fgetc(fe);//頭一個讀取#,後一個纔開始讀取數據
    k = 0;
    while(c!='#')
    {
        num[k++] = c-'0';
        c = fgetc(fe);
    }

    m = 0;  res[1] = 0;
    for(k=k-1;k>=0;k--)
    {
        res[1]+=num[k]*powmy(10,m);
        m++;
    }
    printf("currentSize =%d\n",res[1]);

    fclose(fe);
    //果然是這個問題的原因導致的。 沒有關閉文件會導致出錯
   /* fclose(fe);
    FILE *fww = fopen("C:\\out4.txt","wb");//打開詞頻表文件
    for(i=0;i<currentSize;i++)
    {
        fprintf(fww,"%s %c\n",HC[i],value[i]);
    }
    printf("hehe");
    fprintf(fww,"#%d#",sum_bit);
    fprintf(fww,"#%d#",currentSize);*/
}
void Controll::compress_file(const char *sourceFilename,const char *codeFilename,const char *geneFilename)
{
     HuffmanTree tree(620);
     tree.run(sourceFilename,codeFilename);
     FILE *fo = fopen(sourceFilename,"rb");
     FILE *fw = fopen(geneFilename,"wb");
     if(fo == NULL || fw == NULL)
     {
         printf("文件打開失敗!");
         return;
     }
     int aa = 0;
     int sum_bytes = tree.getSumBytes();       //得到文件的總字節數,用於計算壓縮百分比
     int sum = 0;                                  //用於計算八位的值,從而寫進壓縮文件當中
     int i,flag = 0,j,k=0;
     int temp[1000];
     memset(temp,0,sizeof(temp));
     printf("before compress sumbytes=%d  after compress sumytes=%d\n",sum_bytes,tree.sum_bits/8);
     printf("The compress efficiency is %4.2f%%\n",(double)tree.sum_bits/8*1.0/(sum_bytes*1.0)*100);
     while(!feof(fo))
     {
         sum = 0;
         char one_byte = fgetc(fo);
         aa++;
         for(i=0;i<tree.currentSize;i++)
         {
             if(one_byte == tree.arrayTree[i].info)
             {
                 flag +=strlen(tree.codeArray[i].ptr);
                 int len = strlen(tree.codeArray[i].ptr);
                 if(flag<8)
                 {
                    for(j=0;j<len;j++)
                       temp[k++] = tree.codeArray[i].ptr[j] - '0';
                 }
                else if(flag>=8)
                {

                    for(j=0;k<8;j++)
                        temp[k++] =  tree.codeArray[i].ptr[j] - '0';
                     for(;j<len;j++)
                        temp[k++] = tree.codeArray[i].ptr[j] - '0';

                    sum+=temp[0]*128+temp[1]*64+temp[2]*32+temp[3]*16+temp[4]*8
                        +temp[5]*4+temp[6]*2+temp[7]*1;

                    for(j=0;j<8;j++)
                       temp[j] = 0;

                    for(j=8;j<k;j++)
                      temp[j-8] = temp[j];
                    k = flag = j-8;

                    char c = sum;
                    fputc(c,fw);

                    if(aa%1000==0)
                    {
                        printf("\r%4.2f%%",(double)aa/sum_bytes*100.0);
                    }
                    fflush(fw);
                    i = tree.currentSize+1;

                }
            }
        }
    }
    aa = sum_bytes;
    printf("\r%4.2f%%",(double)aa/sum_bytes*100.0);
    //printf("壓縮成功!");
    /*考慮到最後可能沒有湊夠八位的情況*/
    if(flag)
    {
        sum+=temp[0]*128+temp[1]*64+temp[2]*32+temp[3]*16+temp[4]*8
                        +temp[5]*4+temp[6]*2+temp[7]*1;
        char c = sum;
        fputc(c,fw);
        fflush(fw);
    }
    fclose(fw);
    fclose(fo);
}

void Controll::uncompress_file(const char *geneFilename,const char *codeFilename,const char *backFilename)
{
   char **HC = (char**)malloc(260*sizeof(char*));//用於保存從文件當中讀取的huffman編碼
   char value[270];
   int res[2];
   //果然,還是數組地址符比較好的  關於整形的值如何變化,我記得我也是實驗過的,是可以的
   freFromFile(codeFilename,HC,value,res);
   int sum_bits = res[0];
   int currentSize = res[1];

   FILE *fo = fopen(geneFilename,"rb");
   FILE *fw = fopen(backFilename,"wb");
   if(fo==NULL || fw==NULL)
   {
        printf("文件打開失敗!");
        return;
   }
   char str[1000];
   int i,j,k,temp = 0;
   int index;
   int sum_bit2 = sum_bits;
   int num[10];
   while(!feof(fo))
    {
       if(sum_bit2<0) break;
       sum_bit2 -=8;
       int data = fgetc(fo);
       if(data == -1) break;
       if(sum_bit2<0)
       {
            index = 0-sum_bit2;
       }
       else
       {
            index = 0;
       }
       memset(num,0,sizeof(num));
       /*這是可以綜合出一個函數的*/
       i = 0;
       while(data)
       {
        num[i++] = data%2;
        data = data/2;
       }

       i = temp;
       for(k=7;k>=index;i++,k--)
       {
           if(num[k])
              str[i] = '1';
            else
              str[i] = '0';

           str[i+1] ='\0';

           for(j=1;j<=currentSize;j++)
           {
               if(strcmp(str,HC[j])==0)
               {
                    fputc(value[j],fw);
                     if((sum_bits-sum_bit2)%8000==0)
                    {
                        //cout<<'\r'<<(double)(sum_bits-sum_bit2)/sum_bits*100.0<<'%%';
                        printf("\r%4.2f%%",(double)(sum_bits-sum_bit2)/sum_bits*100.0);
                    }
                    fflush(fw);
                    j = currentSize+1;
                    i = -1;
               }
           }
       }
       if(i)
       {
            temp = i;
       }
       else
       {
            temp = 0;
       }
    }
    sum_bit2 = 0;
    printf("\r%4.2f%%",(double)(sum_bits-sum_bit2)/sum_bits*100.0);
    fclose(fw);
    fclose(fo);
}

//**************************huffman.cpp**********************

#include <iostream>
#include <fstream> //for ofstream ifstream
#include <limits>  //for numeric_limits<double>::max()
#include <cstdlib> //for exit()
#include <cstring> //for strlen(), strcpy(), strcmp()
#include <cstdio>
#include "huffman.h"
using namespace std;

void HuffmanTree::insert(const char &data, const double &wt) { //插入結點
	if (2*currentSize-1 >= maxSize) //葉子結點爲n的哈夫曼樹共有2n-1個結點
		return;

	arrayTree[currentSize].info=data;
	arrayTree[currentSize].weight=wt;
	currentSize++;
}
/*將詞頻表存入相應的文件當中*/
void HuffmanTree::writeCodeToFile(const char *outFilename)
{
    int i;
    FILE *fw = fopen(outFilename,"wb");//打開詞頻表文件
    for(i=0;i<currentSize;i++)
    {
        fprintf(fw,"%s %c\n",codeArray[i].ptr,arrayTree[i].info);
    }
    sum_bits = 0;
    for(i=0;i<currentSize;i++)
    {
        sum_bits += arrayTree[i].weight*strlen(codeArray[i].ptr);
    }
    fprintf(fw,"#%d#",sum_bits);
    fprintf(fw,"#%d#",currentSize);

    fclose(fw);
}

int HuffmanTree::getSumBytes()
{
    int sum_bytes = 0;
    int i = 0;
    for(i=0;i<currentSize;i++)
    {
        sum_bytes+=arrayTree[i].weight;
    }
    return sum_bytes;
}

void HuffmanTree::reverse(char arr[]) { //反轉字符串
	const int len=strlen(arr);
	char *p;
	p=new char[len+1];
	strcpy(p, arr);
	p[len]='\0';
	int k=0;
	for (int i=len-1; i>=0; i--)
		arr[i]=p[k++];
	arr[len]='\0';
	delete[] p;
}

int HuffmanTree::findPosition(const char &ch) const { //返回字符ch在arrayTree[]中的位置
	for (int i=0; i<currentSize; i++)
		if (arrayTree[i].info == ch)
			return i;
	return -1;
}


int HuffmanTree::isEqual(const char *s) const { //判斷s的編碼是否存在,若存在返回編碼在數組codeArray[]中的位置,否則返回-1
	for (int i=0; i<currentSize; i++)
		if (strlen(s) == strlen(codeArray[i].ptr)) //可以去掉此行
			if (strcmp(s, codeArray[i].ptr) == 0)
				return i;
	return -1;
}

void HuffmanTree::createHuffmanTree() { //構造huffmanTree
	int i=currentSize;
	int k;
	double wt1, wt2;
	int lnode = 0, rnode = 0;
	while (i < 2*currentSize-1) {
		wt1=wt2=numeric_limits<double>::max();
		k=0;
		while (k < i) {
			if (arrayTree[k].parent==-1) {
				if (arrayTree[k].weight<wt1) {
					wt2=wt1;
					rnode=lnode;
					wt1=arrayTree[k].weight;
					lnode=k;
				}
				else if (arrayTree[k].weight<wt2) {
					wt2=arrayTree[k].weight;
					rnode=k;
				}
			}
			k++;
		}
		arrayTree[i].weight = arrayTree[lnode].weight+arrayTree[rnode].weight;
		arrayTree[i].lchild=lnode;
		arrayTree[i].rchild=rnode;
		arrayTree[lnode].parent=arrayTree[rnode].parent=i;
		i++;
	}
}

void HuffmanTree::createHuffmanCode() { //構造huffmanCode,即哈夫曼編碼
	codeArray=new Code[currentSize];
	int i=0;
	int k, n, m;
	while (i < currentSize) {
		k = arrayTree[i].parent;
		n=0;
		m=i;
		while (k!=-1 && k<currentSize*2-1) {
			if (arrayTree[k].lchild==m)
				codeArray[i].ptr[n++]='0';
			else if (arrayTree[k].rchild==m)
				codeArray[i].ptr[n++]='1';
			m=k;
			k=arrayTree[m].parent;
		}
		codeArray[i].ptr[n]='\0';
		reverse(codeArray[i].ptr); //反轉字符串,使之變成正確的編碼
		i++;
	}
}


void HuffmanTree::run(const char *inFilename,const char *outFilename) { //run函數的實現
	FILE *fo = fopen(inFilename,"rb");//讀入待壓縮文件
	if(fo==NULL)
	{
	    cerr<<"\""<<inFilename<<"\" could not open."<<endl;
	    exit(1);
	}
	char ch;
	int pos;
	// 從文件當中讀入字符,並且統計字符個數
	ch = fgetc(fo);
	while(!feof(fo))
	{
	    //printf("hehe  ");
	    pos = findPosition(ch);
	    if (pos != -1)
			arrayTree[pos].weight++;
		else
			insert(ch, 1);
        ch = fgetc(fo);
	}
	cout<<endl;
	createHuffmanTree(); //構造huffman樹
	createHuffmanCode(); //對統計字符進行編碼
	writeCodeToFile(outFilename);   //將編碼表存入文件
	fclose(fo);
}

//huffman.cpp end

//*****************************main.cpp*************************

#include "huffman.h"
#include "controll.h"
#include<cstring>
int main(int argc,char **argv) {
     const char *codeFileName   ="C:\\out1.txt";   //詞頻表文件名
     Controll controller;
    //執行壓縮命令
    if(strcmp("-c",argv[1])==0)
    {
        controller.compress_file(argv[2],codeFileName,argv[3]);
    }
    //執行解壓命令
    else if(strcmp("-u",argv[1])==0)
    {
        controller.uncompress_file(argv[2],codeFileName,argv[3]);
    }
	return 0;
} //main.cpp end


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章