Huffman编码
本帖最后由 eeeeelin 于 2013-8-9 20:40 编辑仿照小甲鱼的数据结构与算法课程,自己写了一个Huffman编码!不过就是有个BUG,就是当输入的字符串太长的时候,程序就会崩毁的,如果有解决办法,请留言告诉我哈,谢谢了!!!!
#pragma once
#ifndef HUFFMAN_H
#define HUFFMAN_H
#define UINT unsigned int
#define MAXSIZE100
#define EXTENDSIZE 4
#define OK 0
#define ERROR -1
#define TRUE 1
#define FALSE 0
typedef int Status;
/*
*=============================================================================
*huffman树节点
*=============================================================================
*/
typedef struct _huffmanNode
{
UINT weight; //权重
char symbol; //符号
char code; //编码
struct _huffmanNode *left;//左子树
struct _huffmanNode *right; //右子树
}huffmanNode; //树节点的具体内容
/*
*=============================================================================
*链队列,用于存放huffman树节点
*=============================================================================
*/
typedef huffmanNode ElementType;
typedef struct _queueNode
{
ElementType data;//具有树节点的元素
struct _queueNode *next;
}queueNode; //队列节点的具体内容
typedef struct
{
queueNode *front; //队头
queueNode *rear; //队尾
int queueSize; //当前队列的大小
}linkQueue;//链队列
/*
*============================================================================
*这个节点用于存放huffman树编码
*============================================================================
*/
typedef struct
{
char symbol; //存放符号
UINT weight; //权重
char *code; //编码
}storeCode;
/*===========================================================================*/
Status selectWeight(linkQueue *Q, char *inputString);//对权重进行统计,就是对输入的字符出现次数统计,将节点按权值从小到大放入队列
huffmanNode* createHuffmanTree(linkQueue *Q); //创建haffman树
Status huffmanCode(huffmanNode *root, char *stringToCode, char *inputString);//huffman编码
Status huffmandeCode(huffmanNode *T, char *stringToDecode); //huffman解码
Status huffmanShow(huffmanNode *T);//递归遍历huffman树
Status queueInit(linkQueue *Q); //初始化队列
Status enQueue(linkQueue *Q, ElementType *X);//入队
huffmanNode* deQueue(linkQueue *Q);//出队
Status isEmpty(linkQueue *Q);//判断队列是否为空
Status queueShow( linkQueue *Q ); //遍历队列
void queueDestroy(linkQueue *Q);//销毁队列
Status orderEnQueue(linkQueue *Q, ElementType *X); //由小到大排序入队
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "huffman.h"
/*
*=============================================================================
*对权重进行统计,就是对输入的字符出现次数统计,将节点按权值从小到大放入队列
*=============================================================================
*/
Status selectWeight(linkQueue *Q, char *inputString)
{
int *probability; //用来统计字符串出现的次数
int i, j;//循环变量
huffmanNode *tmp;//具有树结构的临时变量
probability = (int *)malloc(sizeof(int)*256 ); //分配内存
if(NULL == probability)
return ERROR;
memset(probability, 0, sizeof(int)*256); //将problilty的元素重置为0
for(i = 0; inputString != '\0'; i++) //统计输入字符串的字符出现的次数
{//比如说:I在字符串中出现了,那么probability++,就在相应的ACSII上I的位置+1
probability[(unsigned char)inputString]++;
}
for(j = 0; j < 256; j++) //对出现的字符分配内存
{
if(probability != 0)
{
tmp = (huffmanNode *)malloc(sizeof(huffmanNode));
if(NULL == tmp)
return ERROR;
tmp->symbol = (char)j; //将字符j放到tmp->symbol
tmp->weight = probability;//权重为probability
tmp->left = NULL;
tmp->right = NULL;
orderEnQueue(Q, tmp);//入队
free(tmp); //释放内存
tmp = NULL;
}
}
free(probability); //释放内存
probability = NULL;
return OK;
}
/*
*=============================================================================
*创建huffman树
*=============================================================================
*/
huffmanNode* createHuffmanTree(linkQueue *Q)
{
huffmanNode *left, *right, *current;
huffmanNode *huffmantree;
while(Q->queueSize != 1) //不等于1是因为在队列要存放一个树根,到时候函数返回
{
left = deQueue(Q);
right = deQueue(Q);
current = (huffmanNode *)malloc(sizeof(huffmanNode));
if(NULL == current)
exit(-1);
current->weight = left->weight + right->weight;//新节点的权重等于左右子树权重相加
current->left = left;
current->right = right;
strcpy(current->code, ""); //warning:用strcpy对数组进行初始化,输出的编码记得先初始化,不然后面的编码就会有乱码
orderEnQueue(Q, current);
free(current);
current = NULL;
}
huffmantree = deQueue(Q);//这时候队列就只有一个节点了,返回给huffman树
return huffmantree;
}
/*
*=============================================================================
*huffman编码
*=============================================================================
*/
Status huffmanCode(huffmanNode *root, char *stringToCode, char *inputString)
{
FILE *fp1, *fp2;
int i = 0, j; //专用循环变量
huffmanNode *current = NULL;
linkQueue Q;
storeCode *array; //用于存放huffman树编码的
fp1 = fopen("huffmanCode1.txt","w");
if(NULL == fp1)
perror("huffmanCode1.txt");
fp2 = fopen("huffmanCode2.txt","w");
if(NULL == fp2)
perror("huffmanCode2.txt");
array = (storeCode *)malloc(sizeof(storeCode)*256); //分配内存
if(NULL == array)
return ERROR;
queueInit(&Q);//初始化huffman树队列
enQueue(&Q, root); //把树根入队
while(isEmpty(&Q)) //判断队列是否为空
{
current = deQueue(&Q);
if(current->left == NULL && current->right == NULL) //如果树的左子树和右子树都为空,就说明到了树的叶子了
{
fprintf(fp1, "符号:%c,权重:%d,编码:%s\n", current->symbol,current->weight, current->code);
array.symbol = current->symbol;//将输出的字符放到数组里面
array.weight = current->weight;//将输出的权重放到数组里面
array.code = current->code; //将输出的编码放到数组里面,后面要用的
i++;
}
if(current->left != NULL)
{
strcpy(current->left->code, current->code); //把父节点的编码复制到子节点,这样才可以把父节点的编码继承下去
strcat(current->left->code, "0"); //因为节点在左边,所以把'0'放到节点上
enQueue(&Q, current->left);
}
if(current->right != NULL)
{
strcpy(current->right->code, current->code);//把父节点的编码复制到子节点,这样才可以把父节点的编码继承下去
strcat(current->right->code, "1"); //因为节点在右边,所以把'1'放到节点上
enQueue(&Q, current->right);
}
}
for(i = 0, j = 0; ; i++)
{
if(array.symbol == inputString)
{
strcat(stringToCode, array.code); //将编好的码用strcat连接起来
if(strlen(stringToCode) >= MAXSIZE) //防止stringToCode不够大
{
stringToCode = realloc(stringToCode, (MAXSIZE + EXTENDSIZE)*sizeof(char));
if(NULL == stringToCode)
returnERROR;
}
i = -1; //-1是因为for那里会i++,变成0
j++;
if(inputString == '\0')//到了输入字符串的尾就要结束了,不然进行下去就会有乱码的
{
break;
}
}
}
free(array);
array = NULL;
for(i = 0; stringToCode != '\0'; i++) //输出huffman编码
{
fprintf(fp2, "%c", stringToCode);
}
puts("");
fclose(fp1);
fclose(fp2);
return OK;
}
/*
*=============================================================================
*huffman解码
*原理:当遇到0时向左走,遇到1时向右走,直到遇到叶子节点就读出相应的符号
*=============================================================================
*/
Status huffmandeCode(huffmanNode *T, char *stringToDecode)
{
FILE *fp;
int i; //循环变量
huffmanNode *tmp;//临时变量
tmp = T;
fp = fopen("huffmandeCode.txt","w");
if(NULL == fp)
perror("huffmandeCode.txt");
for(i = 0; stringToDecode != '\0'; i++)
{
if(stringToDecode == '0')//向左走
{
tmp = tmp->left;
}
if(stringToDecode == '1')//向右走
{
tmp = tmp->right;
}
if(NULL == tmp->left && NULL == tmp->right)//到了叶子节点,输出相应的字符
{
fprintf(fp, "%c", tmp->symbol);
tmp = T;
}
if(stringToDecode != '0' || stringToDecode != '1')//防止编码错误
{
continue;
}
}
if(NULL == tmp->left && NULL == tmp->right) //到了叶子节点并且输出完字符后就要回到树根再继续
{
fprintf(fp, "%c", tmp->symbol);
tmp = T;
}
return OK;
}
/*
*=============================================================================
*递归遍历huffman树(test用的)
*=============================================================================
*/
Status huffmanShow(huffmanNode *T)
{
if(T != NULL)
{
huffmanShow(T->left); //遍历左子树
printf("%c\t", T->symbol);//打印权重
huffmanShow(T->right);//遍历右子树
}
return OK;
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "huffman.h"
/*
*============================================================================
*初始化huffman树队列
*============================================================================
*/
Status queueInit(linkQueue *Q)
{
Q->front = (queueNode *)malloc(sizeof(queueNode));
if(NULL == Q->front)
return ERROR;
Q->rear = Q->front;
Q->rear->next = NULL;
Q->queueSize = 0;
return OK;
}
/*
*============================================================================
*入队列
*============================================================================
*/
Status enQueue(linkQueue *Q, ElementType *X)
{
queueNode *tmp;
tmp = (queueNode *)malloc(sizeof(queueNode));
if(NULL == tmp)
return ERROR;
tmp->data = *X;
tmp->next = NULL;
Q->rear->next = tmp;
Q->rear = tmp;
Q->queueSize++;
return OK;
}
/*
*============================================================================
*出队列
*============================================================================
*/
huffmanNode* deQueue(linkQueue *Q)
{
queueNode *tmp;
huffmanNode *val;
if(Q->front == Q->rear)
{
printf("队列为空,出队失败\n");
exit(-1);
}
val = (huffmanNode *)malloc(sizeof(huffmanNode));
if(NULL == val)
exit(-1);
tmp = Q->front->next;
*val = tmp->data;
Q->front->next = tmp->next;
if(Q->rear == tmp) //如果只有一个元素
Q->rear = Q->front; //重置尾指针
Q->queueSize--;
free(tmp);
tmp = NULL;
return val;
}
/*
*============================================================================
*判断队列是否为空
*============================================================================
*/
Status isEmpty(linkQueue *Q)
{
if(Q->front == Q->rear)
return FALSE;
else
return TRUE;
}
/*
*============================================================================
*遍历队列
*============================================================================
*/
Status queueShow( linkQueue *Q )
{
queueNode *p;
if( Q->front == Q->rear )
return ERROR;
p = Q->front->next;
while( p != NULL)
{
printf("%c", p->data.symbol );
p = p->next ;
}
puts("");
return OK;
}
/*
*============================================================================
*销毁队列
*============================================================================
*/
void queueDestroy(linkQueue *Q)
{
queueNode *tmp;
tmp = Q->front->next;
Q->front->next = NULL;
while(tmp != NULL)
{
Q->rear = tmp->next;
free(tmp);
tmp = NULL;
tmp = Q->rear;
}
Q->queueSize = 0;
}
/*
*============================================================================
*由小到大排序的入队列(画图出来很好理解的)
*============================================================================
*/
Status orderEnQueue(linkQueue *Q, ElementType *X)
{
queueNode *tmp;
queueNode *iterator;
tmp = (queueNode *)malloc(sizeof(queueNode));//分配内存
if(NULL == tmp)
return ERROR;
tmp->data = *X;
if(NULL == Q->front->next || 0 == Q->queueSize) //第一个元素入队列,直接插入就可以了
{
tmp->next = NULL;
Q->rear->next = tmp;
Q->rear = tmp;
Q->queueSize++;
}
else
{
if(tmp->data.weight <= Q->front->next->data.weight) //已经有了第一个元素的情况,在头节点front和front后一个
{ ////元素直接插入tmp
tmp->next = Q->front->next; //这两句千万记得
Q->front->next = tmp; //不要搞反了,会死循环的!!!!!!!!!!
Q->queueSize++;
}
else
{
iterator = Q->front->next;
while(iterator->next != NULL)
{
if(tmp->data.weight <= iterator->next->data.weight)//比较中间的元素的权重大小
{
tmp->next = iterator->next;
iterator->next = tmp;
Q->queueSize++;
return OK;
}
iterator = iterator->next;
}
if(NULL == iterator->next)//在队尾直接插入
{
iterator->next = tmp;
tmp->next = NULL;
Q->rear = tmp;
Q->queueSize++;
}
}
}
return OK;
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "huffman.h"
int main(int argc, char *argv[])
{
int fn = 0, fileSize = 0; //文件指针的底层流式文件号,文件的大小
FILE *fp; //定义一个文件指针
char string; //要编码的字符串
char *stringToCode = NULL;//用来存放编码的
linkQueue Q; //定义一个huffman树队列
huffmanNode *huffmantree = NULL; //定义一个huffman树节点
/*=========================文件操作==============================================*/
fp = fopen("huffman.txt","r"); //只读文件
if(NULL == fp)
perror("huffman.txt");
fn = fileno(fp);//取得文件指针的底层流式文件号
fileSize = filelength(fn); //取文件长度字节数
printf("文件的大小为%d\n", fileSize);
if(fileSize > 4096)
{
printf("请定义文件大小<=4096字节\n");
exit(-1);
}
fgets(string, fileSize+1, fp); //将文本文件里的内容输出到数组string
/*===============================================================================*/
huffmantree = (huffmanNode *)malloc(sizeof(huffmanNode)); //初始化
if(NULL == huffmantree)
exit(-1);
stringToCode = (char *)malloc(MAXSIZE*sizeof(char));//分配内存大小
if(NULL == stringToCode)
exit(-1);
memset(stringToCode, 0, sizeof(char)*MAXSIZE); //将字符数组置为空
queueInit(&Q); //初始化huffman树队列
selectWeight(&Q, string);//对权重进行统计,就是对输入的字符出现次数统计,将节点按权值从小到大放入队列
huffmantree = createHuffmanTree(&Q);//创建huffman树
// printf("huffman编码为\n");
huffmanCode(huffmantree, stringToCode, string); //huffman编码
// printf("huffman解码为\n");
huffmandeCode(huffmantree, stringToCode); //huffman解码
/*============释放内存================*/
free(huffmantree);
huffmantree = NULL;
free(stringToCode);
stringToCode = NULL;
fclose(fp);
return 0;
}
沙发自己的!! 。。。。 强
页:
[1]