网站建设活动海报,wordpress 商场模板,外贸推广引流,用动物做网站名称本文首先简要阐述哈夫曼算法的基本思想#xff0c;然后介绍了使用哈夫曼算法进行文件压缩和解压缩的处理步骤#xff0c;最后给出了C语言实现的文件压缩和解压缩的源代码。哈夫曼算法的主要思想是#xff1a;①首先遍历要处理的字符串#xff0c;得到每个字符的出现的次数然后介绍了使用哈夫曼算法进行文件压缩和解压缩的处理步骤最后给出了C语言实现的文件压缩和解压缩的源代码。哈夫曼算法的主要思想是①首先遍历要处理的字符串得到每个字符的出现的次数②将每个字符(以其出现次数为权值)分别构造为二叉树(注意此时的二叉树只有一个节点)③取所有二叉树种种字符出现次数最小的二叉树合并为一颗新的二叉树新二叉树根节点的权值等于两个子节点的权值之和新节点中的字符忽略④重复过程③直到所有树被合并为同一棵二叉树⑤遍历最后得到的二叉树自顶向下按路径编号指向左节点的边编号0指向右节点的边编号1从根到叶节点的所有边上的0和1链接起来就是叶子节点中字符的哈夫曼编码。下图展示了哈夫曼编码的基本思想。 基于哈夫曼算法的文件压缩和解压缩过程分别说明如下一、文件压缩①统计词频读取文件的每个字节使用整数数组int statistic[MAX_CHARS]统计每个字符出现的次数由于一个字节最多表示2^8-1个字符所以MAX_CHARS256就足够了。在统计字符数的时候对于每一个byte, 有statistic[(unsigned char)byte]。②构造哈夫曼树根据statistic数组基于哈夫曼树算法造哈夫曼树由于构造的过程中每次都要取最小权值的字符所以需要用优先队列来维护每棵树的根节点。③生成编码深度优先遍历哈弗曼树得到每个叶子节点中的字符的编码并存入字符串数组char *dictionary[MAX_CHARS];④存储词频新建存储压缩数据的文件首先写入不同字符的个数然后将每个字符及其对应的词频写入文件。⑤存储压缩数据再次读取待压缩文件的每个字节byte由dictionary[(unsigned int)byte]得到对应的编码(注意每个字符编码的长度不一)使用位运算一次将编码中的每个位(BIT)设置到一个char类型的位缓冲中可能多个编码才能填满一个位缓冲每填满一次将位缓冲区以单个字节的形式写入文件。当文件遍历完成的时候文件的压缩也就完成了。二、文件解压①读取词频读取压缩文件将每个字符的出现次数存入数组statistic②构造哈夫曼编码树根据statistic数组构造哈夫曼编码树③继续读取压缩文件对于每个字节使用位运算得到每个位(BIT)。对于每个BIT根据BIT从根开始遍历哈夫曼树如果BIT是0就走左分支如果BIT是1就走有分支走到叶子节点的时候输出对应的字符。走到叶子节点后重新从哈夫曼树根节点开始匹配每个位。当整个压缩文件读取完毕时文件解压缩也完成了。上文介绍了基于哈夫曼算法的文件压缩和解压缩下面给出基于上述思想的C语言源代码一共有5个文件其中pq.h和pq.c是优先队列compress.h和compress.c是压缩和解压缩的实现main.c是测试文件。pq.h和pq.c请参见另外一篇文章《优先队列(priority_queue)的C语言实现》。另外三个文件内容如下/** File: compress.h* Purpose: To compress file using the Haffman algorithm* Author: puresky* Date: 2011/05/01*/#ifndef _FILE_COMPRESSION_H#define _FILE_COMPRESSION_H//Haffuman Tree Nodetypedef struct HaffumanTreeNode HTN;struct HaffumanTreeNode{char _ch; //characterint _count; //frequencystruct HaffumanTreeNode *_left; //left childstruct HaffumanTreeNode *_right;//rigth child};//FileCompress Struct#define BITS_PER_CHAR 8 //the number of bits in a char#define MAX_CHARS 256 //the max number of chars#define FILE_BUF_SIZE 8192 //the size of Buffer for FILE I/Otypedef struct FileCompressStruct FCS;struct FileCompressStruct{HTN *_haffuman; //A pointer to the root of hafumman treeunsigned int _charsCount; //To store the number of charsunsigned int _total; //Total bytes in a file.char *_dictionary[MAX_CHARS]; //to store the encoding of each characterint _statistic[MAX_CHARS]; //To store the number of each character};FCS *fcs_new();void fcs_compress(FCS *fcs, const char *inFileName, const char *outFileName);void fcs_decompress(FCS *fcs, const char *inFileName, const char *outFileName);void fcs_free(FCS *fcs);#endif/** File: compress.c* Purpose: To compress file using the Haffman algorithm* Author: puresky* Date: 2011/05/01*/#include #include #include #include compress.h#include pq.hstatic const unsigned char mask[8] {0x80, /* 10000000 */0x40, /* 01000000 */0x20, /* 00100000 */0x10, /* 00010000 */0x08, /* 00001000 */0x04, /* 00000100 */0x02, /* 00000010 */0x01 /* 00000001 */};//static functions of HTNstatic HTN *htn_new(char ch, int count){HTN *htn (HTN *)malloc(sizeof(HTN));htn-_left NULL;htn-_right NULL;htn-_ch ch;htn-_count count;return htn;}static void htn_print_recursive(HTN *htn, int depth){int i;if(htn){for(i 0; i depth; i)printf( );printf(%d:%d\n, htn-_ch, htn-_count);htn_print_recursive(htn-_left, depth 1);htn_print_recursive(htn-_right, depth 1);}}static void htn_print(HTN *htn){htn_print_recursive(htn, 0);}static void htn_free(HTN *htn){if(htn){htn_free(htn-_left);htn_free(htn-_right);free(htn);}}//static functions of FCSstatic void fcs_generate_statistic(FCS *fcs, const char *inFileName){int ret, i;unsigned char buf[FILE_BUF_SIZE];FILE *pf fopen(inFileName, rb);if(!pf){fprintf(stderr, cant open file:%s\n, inFileName);return;}while((ret fread(buf, 1, FILE_BUF_SIZE, pf)) 0){fcs-_total ret;for(i 0; i ret; i){if(fcs-_statistic[buf[i]] 0)fcs-_charsCount;fcs-_statistic[buf[i]];}}fclose(pf);}static void fcs_create_haffuman_tree(FCS *fcs){int i, count;HTN *htn, *parent, *left, *right;KeyValue *kv, *kv1, *kv2;PriorityQueue *pq;pq priority_queue_new(PRIORITY_MIN);for(i 0; i MAX_CHARS; i){if(fcs-_statistic[i]){htn htn_new((char)i, fcs-_statistic[i]);kv key_value_new(fcs-_statistic[i], htn);priority_queue_enqueue(pq, kv);}}//fprintf(stdout, the number of haffuman leaf is %d\n, priority_queue_size(pq));while(!priority_queue_empty(pq)){//fprintf(stdout, priority queue size:%d\n, priority_queue_size(pq));kv1 priority_queue_dequeue(pq);kv2 priority_queue_dequeue(pq);if(kv2 NULL){fcs-_haffuman kv1-_value;key_value_free(kv1, NULL);}else{left (HTN *)kv1-_value;right (HTN *)kv2-_value;count left-_count right-_count;key_value_free(kv1, NULL);key_value_free(kv2, NULL);parent htn_new(0, count);parent-_left left;parent-_right right;kv key_value_new(count, parent);priority_queue_enqueue(pq, kv);}}priority_queue_free(pq, NULL);//htn_print(fcs-_haffuman);}static void fcs_generate_dictionary_recursively(HTN *htn, char *dictionary[], char path[], int depth){char *code NULL;if(htn){if(htn-_left NULL htn-_right NULL){code (char *)malloc(sizeof(char) * (depth 1));memset(code, 0, sizeof(char) * (depth 1));memcpy(code, path, depth);dictionary[(unsigned char)htn-_ch] code;}if(htn-_left){path[depth] 0;fcs_generate_dictionary_recursively(htn-_left, dictionary, path, depth 1);}if(htn-_right){path[depth] 1;fcs_generate_dictionary_recursively(htn-_right, dictionary, path, depth 1);}}}static void fcs_generate_dictionary(FCS *fcs){char path[32];fcs_generate_dictionary_recursively(fcs-_haffuman, fcs-_dictionary, path, 0);//fcs_print_dictionary(fcs);}static void fcs_print_dictionary(FCS *fcs){int i;for(i 0; i MAX_CHARS; i)if(fcs-_dictionary[i] ! NULL)fprintf(stdout, %d:%s\n, i, fcs-_dictionary[i]);}static void fcs_write_statistic(FCS *fcs, FILE *pf){int i;fprintf(pf, %d\n, fcs-_charsCount);for(i 0; i MAX_CHARS; i)if(fcs-_statistic[i] ! 0)fprintf(pf, %d %d\n, i, fcs-_statistic[i]);}static void fcs_do_compress(FCS *fcs, const char *inFileName, const char* outFileName){int i, j, ret;char *dictEntry, len;unsigned int bytes;char bitBuf;int bitPos;unsigned char inBuf[FILE_BUF_SIZE];FILE *pfIn, *pfOut;pfIn fopen(inFileName, rb);if(!pfIn){fprintf(stderr, cant open file:%s\n, inFileName);return;}pfOut fopen(outFileName, wb);if(!pfOut){fclose(pfIn);fprintf(stderr, cant open file:%s\n, outFileName);return;}fcs_write_statistic(fcs, pfOut);bitBuf 0x00;bitPos 0;bytes 0;while((ret fread(inBuf, 1, FILE_BUF_SIZE, pfIn)) 0){for(i 0; i ret; i){len strlen(fcs-_dictionary[inBuf[i]]);dictEntry fcs-_dictionary[inBuf[i]];//printf(%s\n, dictEntry);for(j 0; j len; j){if(dictEntry[j] 1){bitBuf | mask[bitPos];}else{bitPos;}if(bitPos BITS_PER_CHAR){fwrite(bitBuf, 1, sizeof(bitBuf), pfOut);bitBuf 0x00;bitPos 0;bytes;}}}}if(bitPos ! 0){fwrite(bitBuf, 1, sizeof(bitBuf), pfOut);bytes;}fclose(pfIn);fclose(pfOut);printf(The compression ratio is:%f%%\n,(fcs-_total - bytes) * 100.0 / fcs-_total);}static void fcs_read_statistic(FCS *fcs, FILE *pf){int i, charsCount 0;int ch;int num;fscanf(pf, %d\n, charsCount);fcs-_charsCount charsCount;for(i 0; i charsCount; i){fscanf(pf, %d %d\n, ch, num);fcs-_statistic[(unsigned int)ch] num;fcs-_total num;}}static void fcs_do_decompress(FCS *fcs, FILE *pfIn, const char *outFileName){int i, j, ret;unsigned char ch;HTN *htn;unsigned char buf[FILE_BUF_SIZE];unsigned char bitCode;int bitPos;FILE *pfOut;pfOut fopen(outFileName, wb);if(!pfOut){fprintf(stderr, cant open file:%s\n, outFileName);return;}htn fcs-_haffuman;bitCode 0x00;bitPos 0;while((ret fread(buf, 1, FILE_BUF_SIZE, pfIn)) 0){for(i 0; i ret; i){ch buf[i];for(j 0; j BITS_PER_CHAR; j){if(ch mask[j]){htn htn-_right;}else{htn htn-_left;}if(htn-_left NULL htn-_right NULL) //leaf{if(fcs-_total 0){fwrite(htn-_ch, 1, sizeof(char), pfOut);fcs-_total--;}htn fcs-_haffuman;}}}}fclose(pfOut);}//FCS functionsFCS *fcs_new(){FCS *fcs (FCS *)malloc(sizeof(FCS));fcs-_charsCount 0;fcs-_total 0;memset(fcs-_statistic, 0, sizeof(fcs-_statistic));memset(fcs-_dictionary, 0, sizeof(fcs-_dictionary));fcs-_haffuman NULL;return fcs;}void fcs_free(FCS *fcs){int i;if(fcs){if(fcs-_haffuman)htn_free(fcs-_haffuman);for(i 0; i MAX_CHARS; i)free(fcs-_dictionary[i]);free(fcs);}}void fcs_compress(FCS *fcs, const char *inFileName, const char *outFileName){fprintf(stdout, To compress file: %s ...\n, inFileName);fcs_generate_statistic(fcs, inFileName);fcs_create_haffuman_tree(fcs);fcs_generate_dictionary(fcs);fcs_do_compress(fcs, inFileName, outFileName);fprintf(stdout, The compressed data of file: %s stored at %s!\n,inFileName, outFileName);}void fcs_decompress(FCS *fcs, const char *inFileName, const char *outFileName){FILE *pfIn;fprintf(stdout, To decompress file: %s ...\n, inFileName);pfIn fopen(inFileName, rb);if(!pfIn){fprintf(stderr, cant open file: %s\n, inFileName);return ;}fcs_read_statistic(fcs, pfIn);fcs_create_haffuman_tree(fcs);fcs_generate_dictionary(fcs);fcs_do_decompress(fcs, pfIn, outFileName);fclose(pfIn);fprintf(stdout, The decompressed data of file: %s stored at %s\n,inFileName, outFileName);}/** File: main.c* Purpose: testing File Compression* Author:puresky* Date: 2011/05/01*/#include #include compress.hconst int DO_COMPRESS 1;const int DO_DECOMPRESS 1;const char *InFile data.txt; //The file to compress.const char *CompressedFile data.hfm; //Compressed data of the file.const char *OutFile data2.txt; //The decompressed file of the data.int main(int argc, char **argv){//1. compress fileif(DO_COMPRESS){FCS *fcs1;fcs1 fcs_new();fcs_compress(fcs1, InFile, CompressedFile);fcs_free(fcs1);}//2. decompress fileif(DO_DECOMPRESS){FCS *fcs2;fcs2 fcs_new();fcs_decompress(fcs2, CompressedFile, OutFile);fcs_free(fcs2);}system(pause);return 0;}