|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 andalousie 于 2014-3-25 22:51 编辑
原来写的《两种风格的Huffman树》增加了一个二进制流bitstream,用于一位位的读写,自己写的~
原理如图
#if !defined BITSTREAM
#define BITSTREAM
//Alexander Misel 2014, Oschina
//Open Source, free to copy
#include <iostream>
#include <fstream>
#include <string>
#include <cstdio>
static const int READ_MODE = std::ios::in | std::ios::binary;
static const int WRITE_MODE = std::ios::out | std::ios::binary;
class bitostream
{
struct bit{
bit() : _next(0) {};
bit & operator=(unsigned char v)
{
_bit = v;
return *this;
}
unsigned char _bit;
bit * _next;
};
public:
bitostream(std::string const & nameFile)
: _stream(nameFile.c_str(), WRITE_MODE),
_head(0), _byte(0), buffer(0), N(-1)
{}
//OUTPUT
void allocate();
void writeChar(unsigned char v);
void writeBool(bool b);
void bitout();
private:
std::ofstream _stream;
bit * _head;
bit * _byte;
unsigned char buffer;
int N;
};
inline void bitostream::allocate()
{
if(!_head)
{
bit * newByte = new bit;
_head = newByte;
_byte = newByte;
* _byte = 0;
buffer = 0;
}
else
{
_byte->_next = new bit;
_byte = _byte->_next;
* _byte = 0;
buffer = 0;
}
}
inline void bitostream::writeChar(unsigned char v)
{
if(N == -1 || N == 8)
{ allocate(); N=0; }
if(!N)
{
N = 8;
* _byte = v;
return;
}
unsigned char tmp = v >> N;
unsigned char pmt = v << (8-N);
int save = N;
buffer += tmp;
N = 0;
writeChar(buffer);
N = -1;
writeChar(pmt);
buffer = pmt;
N = save;
}
inline void bitostream::writeBool(bool b)
{
if(N == -1 || N == 8)
{ allocate(); N=0; }
unsigned char tmp = b ? 1 : 0;
tmp = tmp << (7-N);
buffer += tmp;
if(N == 7)
{
int save = N;
N = 0;
writeChar(buffer);
N = save;
}
N++;
}
inline void bitostream::bitout()
{
bit * outByte = _head;
while(outByte)
{
_stream.write(reinterpret_cast<char *>(outByte), 1);
outByte = outByte->_next;
}
_head = 0;
_byte = 0;
buffer = 0;
_stream.flush();
}
class bitistream
{
public:
bitistream(std::string const & nameFile)
: _stream(nameFile.c_str(), READ_MODE),
inbuf(0), inN(0)
{
if(!_stream.is_open())
throw "couldn't open file";
pbuf = _stream.rdbuf();
}
//INPUT
void get_in();
bool isEmpty()
{
return (pbuf->sgetc() == EOF) && (inN == 8 || inN == 0);
}
unsigned char readChar();
bool readBool();
public:
std::ifstream _stream;
std::streambuf * pbuf;
unsigned char inbuf;
int inN;
};
inline void bitistream::get_in()
{
if(pbuf->sgetc() != EOF)
inbuf = pbuf->sbumpc();
}
inline unsigned char bitistream::readChar()
{
if(!inN || inN == 8)
{
get_in();
inN = 0;
return inbuf;
}
unsigned char tmp = inbuf << inN;
inbuf = 0;
get_in();
unsigned char pmt = inbuf >> (8-inN);
tmp += pmt;
return tmp;
}
inline bool bitistream::readBool()
{
if(!inN || inN == 8)
{
get_in();
inN = 0;
}
unsigned char tmp = inbuf << inN;
inN++;
return (tmp & 0x80)>0;
}
#endif
改代码花去了好多时间。尤其是C++的char型默认是有符号的,大家知道,有符号的位运算有诸多的不方便。还有,关于流缓冲的内容,大家也可以参考一下rdbuf的相关用法。
huffman.cpp#include <iostream>
#include <string>
#include "GPQ.h"
#include "bitstream.h"
const int R = 256;
class Huffman
{
class Node
{
public:
const unsigned char _ch;
const int _freq;
Node *_left, *_right;
Node(unsigned char ch, int freq, Node* left, Node* right)
: _ch(ch), _freq(freq), _left(left), _right(right)
{}
bool isLeaf() { return !_left && !_right; }
bool compareTo(Node *that) { return _freq > that->_freq; }
};
struct buf
{
buf(): _id(-1) {}
int index() { _id++; return _id; }
int _id;
}id;
public:
void expand(bitistream& trie, std::string& s, int N)
{
id._id = -1;
Node *root = readTrie(trie);
id._id = -1;
for (int i = 0; i < N; ++i)
{
Node *x = root;
while (!(x->isLeaf()))
{
if(s[id.index()] == '0')
x = x->_left;
else x = x->_right;
}
std::cout << x->_ch;
}
}
void writeTrie(Node* x, bitostream& b)
{
if(x->isLeaf())
{
b.writeBool(true);
b.writeChar(x->_ch);
return;
}
b.writeBool(false);
writeTrie(x->_left, b);
writeTrie(x->_right, b);
}
Node* readTrie(bitistream& trie)
{
if(trie.readBool())
{
unsigned char c = trie.readChar();
Node *fresh = new Node(c, 0, 0, 0);
return fresh;
}
Node *x = readTrie(trie);
Node *y = readTrie(trie);
Node *fresh = new Node('\0', 0, x, y);
return fresh;
}
void compress(std::string& s, bitostream& b)
{
// Tabulate frequency counts.
int freq[R] = {0};
for (int i = 0; i < s.size(); ++i)
freq[s[i]]++;
// Build Huffman code trie.
Node *root = buildTrie(freq);
std::string *st = new std::string [R];
buildCode(st, root, "");
// Print trie for decoder (recursive).
writeTrie(root, b);
// Use Huffman code to encode input.
for (int id = 0; id < s.length(); id++)
{
std::string code = st[s[id]];
std::cout << s[id] << " ";
std::cout << code << std::endl;
}
}
private:
Node* buildTrie(int *freq)
{
GenericPQ<Node*> pq(R);
for (int i = 0; i < R; i++)
if (freq[i] > 0)
pq.insert(new Node(i, freq[i], 0, 0));
while (pq.size() > 1)
{
Node *x = pq.pop();
Node *y = pq.pop();
Node *parent = new Node('\0', x->_freq + y->_freq, x, y);
pq.insert(parent);
}
return pq.pop();
}
std::string* buildCode(Node* root)
{
std::string *st = new std::string[R];
buildCode(st, root, "");
return st;
}
void buildCode(std::string *st, Node *x, std::string s)
{
if(x->isLeaf()) { st[x->_ch] = s; return; }
buildCode(st, x->_left, s + '0');
buildCode(st, x->_right, s + '1');
}
};
int main()
{
Huffman code;
std::string sample = "ABRACADABRA!";
std::string coder = "0111110010110100011111001010";
bitostream bin("huff.trie");
code.compress(sample, bin);
bin.bitout();
bitistream by("huff.trie");
code.expand(by, coder, 12);
return 0;
}
我们输出的结果自然和从前一样,但是树结构就输出到了huff.trie文件里面。其实bitstream我做过好多次调试。0,1,010000|00,0,0,1,010|00000,0,1,0|0100001,0|01000011|01,010100|00,1,01000|010.00000 wrong
0,1,010000|01,0,0,1,010|00100,0,1,0|0100001,0|01000011|01,010100|10,1,01000|010.00000 almost
0,1,010000|01,0,0,1,010|00100,0,1,0|0100001,1|01000011|01,010100|10,1,01000|010.00000 right
因为需要一位一位的读写,所以就会面临各种问题。由于时间紧,没有来及写注释,改天会补上的。大家一定要支持,一定要支持哦~ 我写的bitstream算是开源的,大家用得着可以拿去使,:ton:。
|
|