//*******************************controll.h**********************
//*****************壓縮命令和解壓命令**********
#ifndef CONTROLL_H
#define CONTROLL_H
class Controll //:public HuffmanTree
{
public:
void compress_file(const char *,const char *,const char *);
void uncompress_file(const char *,const char *,const char *);
void freFromFile(const char *,char **,char *,int *);
int powmy(int,int);
};
#endif //controll.h end
//*******************************huffman.h**********************
//*****************葉子結點爲n的哈夫曼樹共有2n-1個結點**********
#ifndef HUFFMAN_H
#define HUFFMAN_H
class HuffmanNode {
public:
char info; //結點信息
double weight; //結點權值
int parent, lchild, rchild; //父親結點,左右孩子結點
HuffmanNode() {
parent=lchild=rchild=-1;
}
HuffmanNode(const char &data, const double &wt, const int &pa=-1, const int &lch=-1, const int &rch=-1) {
info=data;
weight=wt;
parent=pa;
lchild=lch;
rchild=rch;
}
}; //class HuffmanNode end
/*現在我把它調出來爲的就是在controlll類當中使用這個類*/
class Code { //HuffmanTree的私有類,編碼類
public:
Code():length(10) { ptr=new char[length]; }
~Code() { delete[] ptr; }
char *ptr;
const int length;
};
class HuffmanTree {
public:
HuffmanTree(const int &s=100) {
maxSize=(s>100?s:100);
arrayTree=new HuffmanNode[maxSize];
currentSize=0;
codeArray=0;
}
~HuffmanTree() {
delete[] arrayTree;
if (codeArray!=0)
delete[] codeArray;
}
void run(const char*, const char*);
int getSumBytes();//定義待壓縮文件的總bytes數
int currentSize; //當前數組大小
HuffmanNode *arrayTree; //哈夫曼結點數組
Code *codeArray; //數組大小爲currentSize
int sum_bits;//定義壓縮後文件的總bit數
private:
int maxSize; //數組最大值
//int sum_bytes;
void insert(const char&, const double&); //插入結點
void createHuffmanTree(); //創建哈夫曼樹
void createHuffmanCode(); //創建哈夫曼編碼
void writeCodeToFile(const char *);//將Huffman編碼寫入到詞頻表文件當中
int findPosition(const char &) const; //返回字符在arrayTree[]中的位置
int isEqual(const char *s) const; //判斷s是否存在於編碼系統中,若存在則返回s在編碼系統中的位置,否則返回-1
void reverse(char arr[]);
}; //class HuffmanTree end
#endif //huffman.h end
//**************************controll.cpp**********************
#include <iostream>
#include <limits>
#include <cstring>
#include<cstdlib>
#include <cstdio>
#include "controll.h"
#include "huffman.h"
using namespace std;
int Controll::powmy(int a,int b)
{
if(b==0) return 1;
int i = 0;
int result = 1;
for(;i<b;i++)
{
result *=a;
}
return result;
}
void Controll::freFromFile(const char *codeFilename,char **HC,char value[],int res[])
{
FILE *fe = fopen(codeFilename,"rb");
if(fe == NULL)
{
printf("打開文件失敗!");
return;
}
int num[10];
int m,i;
char * cd = (char *)malloc((100+1)*sizeof(char)); //臨時保存的編碼文件名
char c;
for(i=1;;i++)
{
c = fgetc(fe);
if(c=='#') break;
int j = 0;
while(c!=' ')
{
cd[j++] = c;
c = fgetc(fe);
}
cd[j] = '\0';
HC[i] = (char *)malloc((j+1)*sizeof(char));
strcpy(HC[i],&cd[0]);
/*以上完成了huffman編碼的讀出 下面將Huffman編碼對應的字符寫進codeArray*/
c = fgetc(fe);
value[i] = c;
fgetc(fe);
}
int k;
c = fgetc(fe); //頭一個讀取#,後一個纔開始讀取數據
k = 0;
while(c!='#')
{
num[k++] = c-'0';
c = fgetc(fe);
}
m = 0;
res[0] = 0;
for(k=k-1;k>=0;k--)
{
//printf("powmy(10,m)=%d ",powmy(10,m));
res[0]+=(num[k]*powmy(10,m));
//printf("%d sum_bit=%d m=%d ",num[k],sum_bit,m);
m = m + 1;
}
printf("sum_bits=%d ",res[0]);
c = fgetc(fe); c = fgetc(fe);//頭一個讀取#,後一個纔開始讀取數據
k = 0;
while(c!='#')
{
num[k++] = c-'0';
c = fgetc(fe);
}
m = 0; res[1] = 0;
for(k=k-1;k>=0;k--)
{
res[1]+=num[k]*powmy(10,m);
m++;
}
printf("currentSize =%d\n",res[1]);
fclose(fe);
//果然是這個問題的原因導致的。 沒有關閉文件會導致出錯
/* fclose(fe);
FILE *fww = fopen("C:\\out4.txt","wb");//打開詞頻表文件
for(i=0;i<currentSize;i++)
{
fprintf(fww,"%s %c\n",HC[i],value[i]);
}
printf("hehe");
fprintf(fww,"#%d#",sum_bit);
fprintf(fww,"#%d#",currentSize);*/
}
void Controll::compress_file(const char *sourceFilename,const char *codeFilename,const char *geneFilename)
{
HuffmanTree tree(620);
tree.run(sourceFilename,codeFilename);
FILE *fo = fopen(sourceFilename,"rb");
FILE *fw = fopen(geneFilename,"wb");
if(fo == NULL || fw == NULL)
{
printf("文件打開失敗!");
return;
}
int aa = 0;
int sum_bytes = tree.getSumBytes(); //得到文件的總字節數,用於計算壓縮百分比
int sum = 0; //用於計算八位的值,從而寫進壓縮文件當中
int i,flag = 0,j,k=0;
int temp[1000];
memset(temp,0,sizeof(temp));
printf("before compress sumbytes=%d after compress sumytes=%d\n",sum_bytes,tree.sum_bits/8);
printf("The compress efficiency is %4.2f%%\n",(double)tree.sum_bits/8*1.0/(sum_bytes*1.0)*100);
while(!feof(fo))
{
sum = 0;
char one_byte = fgetc(fo);
aa++;
for(i=0;i<tree.currentSize;i++)
{
if(one_byte == tree.arrayTree[i].info)
{
flag +=strlen(tree.codeArray[i].ptr);
int len = strlen(tree.codeArray[i].ptr);
if(flag<8)
{
for(j=0;j<len;j++)
temp[k++] = tree.codeArray[i].ptr[j] - '0';
}
else if(flag>=8)
{
for(j=0;k<8;j++)
temp[k++] = tree.codeArray[i].ptr[j] - '0';
for(;j<len;j++)
temp[k++] = tree.codeArray[i].ptr[j] - '0';
sum+=temp[0]*128+temp[1]*64+temp[2]*32+temp[3]*16+temp[4]*8
+temp[5]*4+temp[6]*2+temp[7]*1;
for(j=0;j<8;j++)
temp[j] = 0;
for(j=8;j<k;j++)
temp[j-8] = temp[j];
k = flag = j-8;
char c = sum;
fputc(c,fw);
if(aa%1000==0)
{
printf("\r%4.2f%%",(double)aa/sum_bytes*100.0);
}
fflush(fw);
i = tree.currentSize+1;
}
}
}
}
aa = sum_bytes;
printf("\r%4.2f%%",(double)aa/sum_bytes*100.0);
//printf("壓縮成功!");
/*考慮到最後可能沒有湊夠八位的情況*/
if(flag)
{
sum+=temp[0]*128+temp[1]*64+temp[2]*32+temp[3]*16+temp[4]*8
+temp[5]*4+temp[6]*2+temp[7]*1;
char c = sum;
fputc(c,fw);
fflush(fw);
}
fclose(fw);
fclose(fo);
}
void Controll::uncompress_file(const char *geneFilename,const char *codeFilename,const char *backFilename)
{
char **HC = (char**)malloc(260*sizeof(char*));//用於保存從文件當中讀取的huffman編碼
char value[270];
int res[2];
//果然,還是數組地址符比較好的 關於整形的值如何變化,我記得我也是實驗過的,是可以的
freFromFile(codeFilename,HC,value,res);
int sum_bits = res[0];
int currentSize = res[1];
FILE *fo = fopen(geneFilename,"rb");
FILE *fw = fopen(backFilename,"wb");
if(fo==NULL || fw==NULL)
{
printf("文件打開失敗!");
return;
}
char str[1000];
int i,j,k,temp = 0;
int index;
int sum_bit2 = sum_bits;
int num[10];
while(!feof(fo))
{
if(sum_bit2<0) break;
sum_bit2 -=8;
int data = fgetc(fo);
if(data == -1) break;
if(sum_bit2<0)
{
index = 0-sum_bit2;
}
else
{
index = 0;
}
memset(num,0,sizeof(num));
/*這是可以綜合出一個函數的*/
i = 0;
while(data)
{
num[i++] = data%2;
data = data/2;
}
i = temp;
for(k=7;k>=index;i++,k--)
{
if(num[k])
str[i] = '1';
else
str[i] = '0';
str[i+1] ='\0';
for(j=1;j<=currentSize;j++)
{
if(strcmp(str,HC[j])==0)
{
fputc(value[j],fw);
if((sum_bits-sum_bit2)%8000==0)
{
//cout<<'\r'<<(double)(sum_bits-sum_bit2)/sum_bits*100.0<<'%%';
printf("\r%4.2f%%",(double)(sum_bits-sum_bit2)/sum_bits*100.0);
}
fflush(fw);
j = currentSize+1;
i = -1;
}
}
}
if(i)
{
temp = i;
}
else
{
temp = 0;
}
}
sum_bit2 = 0;
printf("\r%4.2f%%",(double)(sum_bits-sum_bit2)/sum_bits*100.0);
fclose(fw);
fclose(fo);
}
//**************************huffman.cpp**********************
#include <iostream>
#include <fstream> //for ofstream ifstream
#include <limits> //for numeric_limits<double>::max()
#include <cstdlib> //for exit()
#include <cstring> //for strlen(), strcpy(), strcmp()
#include <cstdio>
#include "huffman.h"
using namespace std;
void HuffmanTree::insert(const char &data, const double &wt) { //插入結點
if (2*currentSize-1 >= maxSize) //葉子結點爲n的哈夫曼樹共有2n-1個結點
return;
arrayTree[currentSize].info=data;
arrayTree[currentSize].weight=wt;
currentSize++;
}
/*將詞頻表存入相應的文件當中*/
void HuffmanTree::writeCodeToFile(const char *outFilename)
{
int i;
FILE *fw = fopen(outFilename,"wb");//打開詞頻表文件
for(i=0;i<currentSize;i++)
{
fprintf(fw,"%s %c\n",codeArray[i].ptr,arrayTree[i].info);
}
sum_bits = 0;
for(i=0;i<currentSize;i++)
{
sum_bits += arrayTree[i].weight*strlen(codeArray[i].ptr);
}
fprintf(fw,"#%d#",sum_bits);
fprintf(fw,"#%d#",currentSize);
fclose(fw);
}
int HuffmanTree::getSumBytes()
{
int sum_bytes = 0;
int i = 0;
for(i=0;i<currentSize;i++)
{
sum_bytes+=arrayTree[i].weight;
}
return sum_bytes;
}
void HuffmanTree::reverse(char arr[]) { //反轉字符串
const int len=strlen(arr);
char *p;
p=new char[len+1];
strcpy(p, arr);
p[len]='\0';
int k=0;
for (int i=len-1; i>=0; i--)
arr[i]=p[k++];
arr[len]='\0';
delete[] p;
}
int HuffmanTree::findPosition(const char &ch) const { //返回字符ch在arrayTree[]中的位置
for (int i=0; i<currentSize; i++)
if (arrayTree[i].info == ch)
return i;
return -1;
}
int HuffmanTree::isEqual(const char *s) const { //判斷s的編碼是否存在,若存在返回編碼在數組codeArray[]中的位置,否則返回-1
for (int i=0; i<currentSize; i++)
if (strlen(s) == strlen(codeArray[i].ptr)) //可以去掉此行
if (strcmp(s, codeArray[i].ptr) == 0)
return i;
return -1;
}
void HuffmanTree::createHuffmanTree() { //構造huffmanTree
int i=currentSize;
int k;
double wt1, wt2;
int lnode = 0, rnode = 0;
while (i < 2*currentSize-1) {
wt1=wt2=numeric_limits<double>::max();
k=0;
while (k < i) {
if (arrayTree[k].parent==-1) {
if (arrayTree[k].weight<wt1) {
wt2=wt1;
rnode=lnode;
wt1=arrayTree[k].weight;
lnode=k;
}
else if (arrayTree[k].weight<wt2) {
wt2=arrayTree[k].weight;
rnode=k;
}
}
k++;
}
arrayTree[i].weight = arrayTree[lnode].weight+arrayTree[rnode].weight;
arrayTree[i].lchild=lnode;
arrayTree[i].rchild=rnode;
arrayTree[lnode].parent=arrayTree[rnode].parent=i;
i++;
}
}
void HuffmanTree::createHuffmanCode() { //構造huffmanCode,即哈夫曼編碼
codeArray=new Code[currentSize];
int i=0;
int k, n, m;
while (i < currentSize) {
k = arrayTree[i].parent;
n=0;
m=i;
while (k!=-1 && k<currentSize*2-1) {
if (arrayTree[k].lchild==m)
codeArray[i].ptr[n++]='0';
else if (arrayTree[k].rchild==m)
codeArray[i].ptr[n++]='1';
m=k;
k=arrayTree[m].parent;
}
codeArray[i].ptr[n]='\0';
reverse(codeArray[i].ptr); //反轉字符串,使之變成正確的編碼
i++;
}
}
void HuffmanTree::run(const char *inFilename,const char *outFilename) { //run函數的實現
FILE *fo = fopen(inFilename,"rb");//讀入待壓縮文件
if(fo==NULL)
{
cerr<<"\""<<inFilename<<"\" could not open."<<endl;
exit(1);
}
char ch;
int pos;
// 從文件當中讀入字符,並且統計字符個數
ch = fgetc(fo);
while(!feof(fo))
{
//printf("hehe ");
pos = findPosition(ch);
if (pos != -1)
arrayTree[pos].weight++;
else
insert(ch, 1);
ch = fgetc(fo);
}
cout<<endl;
createHuffmanTree(); //構造huffman樹
createHuffmanCode(); //對統計字符進行編碼
writeCodeToFile(outFilename); //將編碼表存入文件
fclose(fo);
}
//huffman.cpp end
//*****************************main.cpp*************************
#include "huffman.h"
#include "controll.h"
#include<cstring>
int main(int argc,char **argv) {
const char *codeFileName ="C:\\out1.txt"; //詞頻表文件名
Controll controller;
//執行壓縮命令
if(strcmp("-c",argv[1])==0)
{
controller.compress_file(argv[2],codeFileName,argv[3]);
}
//執行解壓命令
else if(strcmp("-u",argv[1])==0)
{
controller.uncompress_file(argv[2],codeFileName,argv[3]);
}
return 0;
} //main.cpp end