再看Huffman树,以及游程编码
本帖最后由 andalousie 于 2014-3-25 22:51 编辑原来写的《两种风格的Huffman树》增加了一个二进制流bitstream,用于一位位的读写,自己写的~
原理如图
#if !defined BITSTREAM
#define BITSTREAM
//Alexander Misel 2014, Oschina
//Open Source, free to copy
#include <iostream>
#include <fstream>
#include <string>
#include <cstdio>
static const int READ_MODE = std::ios::in | std::ios::binary;
static const int WRITE_MODE = std::ios::out | std::ios::binary;
class bitostream
{
struct bit{
bit() : _next(0) {};
bit & operator=(unsigned char v)
{
_bit = v;
return *this;
}
unsigned char _bit;
bit * _next;
};
public:
bitostream(std::string const & nameFile)
: _stream(nameFile.c_str(), WRITE_MODE),
_head(0), _byte(0), buffer(0), N(-1)
{}
//OUTPUT
void allocate();
void writeChar(unsigned char v);
void writeBool(bool b);
void bitout();
private:
std::ofstream _stream;
bit * _head;
bit * _byte;
unsigned char buffer;
int N;
};
inline void bitostream::allocate()
{
if(!_head)
{
bit * newByte = new bit;
_head = newByte;
_byte = newByte;
* _byte = 0;
buffer = 0;
}
else
{
_byte->_next = new bit;
_byte = _byte->_next;
* _byte = 0;
buffer = 0;
}
}
inline void bitostream::writeChar(unsigned char v)
{
if(N == -1 || N == 8)
{ allocate(); N=0; }
if(!N)
{
N = 8;
* _byte = v;
return;
}
unsigned char tmp = v >> N;
unsigned char pmt = v << (8-N);
int save = N;
buffer += tmp;
N = 0;
writeChar(buffer);
N = -1;
writeChar(pmt);
buffer = pmt;
N = save;
}
inline void bitostream::writeBool(bool b)
{
if(N == -1 || N == 8)
{ allocate(); N=0; }
unsigned char tmp = b ? 1 : 0;
tmp = tmp << (7-N);
buffer += tmp;
if(N == 7)
{
int save = N;
N = 0;
writeChar(buffer);
N = save;
}
N++;
}
inline void bitostream::bitout()
{
bit * outByte = _head;
while(outByte)
{
_stream.write(reinterpret_cast<char *>(outByte), 1);
outByte = outByte->_next;
}
_head = 0;
_byte = 0;
buffer = 0;
_stream.flush();
}
class bitistream
{
public:
bitistream(std::string const & nameFile)
: _stream(nameFile.c_str(), READ_MODE),
inbuf(0), inN(0)
{
if(!_stream.is_open())
throw "couldn't open file";
pbuf = _stream.rdbuf();
}
//INPUT
void get_in();
bool isEmpty()
{
return (pbuf->sgetc() == EOF) && (inN == 8 || inN == 0);
}
unsigned char readChar();
bool readBool();
public:
std::ifstream _stream;
std::streambuf * pbuf;
unsigned charinbuf;
int inN;
};
inline void bitistream::get_in()
{
if(pbuf->sgetc() != EOF)
inbuf = pbuf->sbumpc();
}
inline unsigned char bitistream::readChar()
{
if(!inN || inN == 8)
{
get_in();
inN = 0;
return inbuf;
}
unsigned char tmp = inbuf << inN;
inbuf = 0;
get_in();
unsigned char pmt = inbuf >> (8-inN);
tmp += pmt;
return tmp;
}
inline bool bitistream::readBool()
{
if(!inN || inN == 8)
{
get_in();
inN = 0;
}
unsigned char tmp = inbuf << inN;
inN++;
return (tmp & 0x80)>0;
}
#endif改代码花去了好多时间。尤其是C++的char型默认是有符号的,大家知道,有符号的位运算有诸多的不方便。还有,关于流缓冲的内容,大家也可以参考一下rdbuf的相关用法。
huffman.cpp#include <iostream>
#include <string>
#include "GPQ.h"
#include "bitstream.h"
const int R = 256;
class Huffman
{
class Node
{
public:
const unsigned char _ch;
const int _freq;
Node *_left, *_right;
Node(unsigned char ch, int freq, Node* left, Node* right)
: _ch(ch), _freq(freq), _left(left), _right(right)
{}
bool isLeaf() { return !_left && !_right; }
bool compareTo(Node *that){ return _freq > that->_freq; }
};
struct buf
{
buf(): _id(-1) {}
int index() { _id++; return _id; }
int _id;
}id;
public:
void expand(bitistream& trie, std::string& s, int N)
{
id._id = -1;
Node *root = readTrie(trie);
id._id = -1;
for (int i = 0; i < N; ++i)
{
Node *x = root;
while (!(x->isLeaf()))
{
if(s == '0')
x = x->_left;
elsex = x->_right;
}
std::cout << x->_ch;
}
}
void writeTrie(Node* x, bitostream& b)
{
if(x->isLeaf())
{
b.writeBool(true);
b.writeChar(x->_ch);
return;
}
b.writeBool(false);
writeTrie(x->_left, b);
writeTrie(x->_right, b);
}
Node* readTrie(bitistream& trie)
{
if(trie.readBool())
{
unsigned char c = trie.readChar();
Node *fresh = new Node(c, 0, 0, 0);
return fresh;
}
Node *x = readTrie(trie);
Node *y = readTrie(trie);
Node *fresh = new Node('\0', 0, x, y);
return fresh;
}
void compress(std::string& s, bitostream& b)
{
// Tabulate frequency counts.
int freq = {0};
for (int i = 0; i < s.size(); ++i)
freq]++;
// Build Huffman code trie.
Node *root = buildTrie(freq);
std::string *st = new std::string ;
buildCode(st, root, "");
// Print trie for decoder (recursive).
writeTrie(root, b);
// Use Huffman code to encode input.
for (int id = 0; id < s.length(); id++)
{
std::string code = st];
std::cout << s << " ";
std::cout << code << std::endl;
}
}
private:
Node* buildTrie(int *freq)
{
GenericPQ<Node*> pq(R);
for (int i = 0; i < R; i++)
if (freq > 0)
pq.insert(new Node(i, freq, 0, 0));
while (pq.size() > 1)
{
Node *x = pq.pop();
Node *y = pq.pop();
Node *parent = new Node('\0', x->_freq + y->_freq, x, y);
pq.insert(parent);
}
return pq.pop();
}
std::string* buildCode(Node* root)
{
std::string *st = new std::string;
buildCode(st, root, "");
return st;
}
void buildCode(std::string *st, Node *x, std::string s)
{
if(x->isLeaf()) { st = s; return; }
buildCode(st, x->_left, s + '0');
buildCode(st, x->_right, s + '1');
}
};
int main()
{
Huffman code;
std::string sample = "ABRACADABRA!";
std::string coder= "0111110010110100011111001010";
bitostream bin("huff.trie");
code.compress(sample, bin);
bin.bitout();
bitistream by("huff.trie");
code.expand(by, coder, 12);
return 0;
}我们输出的结果自然和从前一样,但是树结构就输出到了huff.trie文件里面。其实bitstream我做过好多次调试。0,1,010000|00,0,0,1,010|00000,0,1,0|0100001,0|01000011|01,010100|00,1,01000|010.00000 wrong
0,1,010000|01,0,0,1,010|00100,0,1,0|0100001,0|01000011|01,010100|10,1,01000|010.00000 almost
0,1,010000|01,0,0,1,010|00100,0,1,0|0100001,1|01000011|01,010100|10,1,01000|010.00000 right因为需要一位一位的读写,所以就会面临各种问题。由于时间紧,没有来及写注释,改天会补上的。大家一定要支持,一定要支持哦~ 我写的bitstream算是开源的,大家用得着可以拿去使,:ton:。
本帖最后由 andalousie 于 2014-3-6 18:32 编辑
下午急匆匆的发表了,但是前面的解说不知道怎么没了。是在抱歉。大家来看看吧~ 有问题可以回帖。GPQ.h可以看以前的。 同样,根据这个bitstream,对于成块的0或1比较普遍的数据,我们可以写出一个RunLength游程编码,如下:#include <iostream>
#include <string>
#include "bitstream.h"
const int R = 256;
class RunLength {
public:
static void expand(std::string const & in, std::string const & out) {
bitistream BinaryStdIn(in);
bitostream BinaryStdOut(out);
bool b = false;
while (!BinaryStdIn.isEmpty()) {
int run = BinaryStdIn.readChar();
for (int i = 0; i < run; i++)
BinaryStdOut.writeBool(b);
b = !b;
}
BinaryStdOut.bitout();
}
static void compress(std::string const & in, std::string const & out) {
bitistream BinaryStdIn(in);
bitostream BinaryStdOut(out);
unsigned char run = 0;
bool old = false;
while (!BinaryStdIn.isEmpty()) {
bool b = BinaryStdIn.readBool();
if (b != old) {
BinaryStdOut.writeChar(run);
run = 1;
old = !old;
}
else {
if (run == R-1) {
BinaryStdOut.writeChar(run);
run = 0;
BinaryStdOut.writeChar(run);
}
run++;
}
}
BinaryStdOut.writeChar(run);
BinaryStdOut.bitout();
}
};
int main()
{
RunLength::compress("huff.trie", "huff.trie.rle");
RunLength::expand("huff.trie.rle", "huff2.trie");
}
页:
[1]