/*
								+----------------------------------+
								|                                  |
								|  *** Basic compression algs ***  |
								|                                  |
								|   Copyright  -tHE SWINe- 2008   |
								|                                  |
								|           Compress.inl           |
								|                                  |
								+----------------------------------+
*/

#pragma once
#ifndef __SIMPLE_COMPRESSION_INLINES_INCLUDED
#define __SIMPLE_COMPRESSION_INLINES_INCLUDED

/*
 *								=== CBitCoder ===
 */

template <class CSymbolType>
class CBitCoder {
public:
	static inline bool Decode_Symbol(CSymbolType &r_n_value, int n_symbol_bit_num,
		uint8_t &r_n_byte, int &r_n_bit_num, const uint8_t *&r_p_input, const uint8_t *p_end) // unfortunately, msvc 6.0 requires the implementation here
	{
		{
			r_n_value = 0;
			while(n_symbol_bit_num) {
				if(!r_n_bit_num) {
					if(r_p_input >= p_end)
						return false;
					r_n_byte = *r_p_input;
					++ r_p_input;
					r_n_bit_num = 8;
				}
				int n_read = min(r_n_bit_num, n_symbol_bit_num);
				r_n_value <<= n_read;
#if 1 // read from left
				r_n_value |= r_n_byte >> (8 - n_read);//r_n_byte & ((1 << n_read) - 1);
				r_n_byte <<= n_read;//r_n_byte >>= n_read;
#else // read from right
				r_n_value |= r_n_byte & ((1 << n_read) - 1);
				r_n_byte >>= n_read;
#endif
				n_symbol_bit_num -= n_read;
				r_n_bit_num -= n_read;
			}
			return true;
		}
	}

	static inline bool Encode_Symbol(CSymbolType n_value, int n_bit_num,
		uint8_t &r_n_byte, int &r_n_bit_num_left, uint8_t *&r_p_output,
		uint8_t *&r_p_out_end, TBuffer &r_t_out_buffer)
	{
		for(int i = n_bit_num; i;) {
			int n_write = min(r_n_bit_num_left, i);
#if 1 // fill from left // note that this order ever so slightly decreases compression ratio of SIF-based codecs but must be used for compatibility with older Huffman-encoded data (and Jpeg)
			r_n_byte <<= n_write;
			r_n_byte |= ((n_value >> (i -= n_write)) & ((1 << n_write) - 1));// << (8 - r_n_bit_num_left);
#else // fill from right
			r_n_byte |= ((n_value >> (i -= n_write)) & ((1 << n_write) - 1)) <<
				(8 - r_n_bit_num_left);
#endif
			if(!(r_n_bit_num_left -= n_write)) {
				_ASSERTE(r_p_output <= r_p_out_end);
				if(r_p_output == r_p_out_end) {
					if(!r_t_out_buffer.Grow(1))
						return false;
					r_p_out_end = r_t_out_buffer.p_Data() + r_t_out_buffer.n_Size();
					r_p_output = r_p_out_end - 1;
				}
				*r_p_output ++ = r_n_byte;
				r_n_bit_num_left = 8;
				r_n_byte = 0; // !!
			}
		}
		return true;
	}

	static inline bool Flush(uint8_t &r_n_byte, int &r_n_bit_num_left,
		uint8_t *&r_p_output, uint8_t *&r_p_out_end, TBuffer &r_t_out_buffer)
	{
		if(r_n_bit_num_left < 8) {
			_ASSERTE(r_p_output <= r_p_out_end);
			if(r_p_output == r_p_out_end) {
				if(!r_t_out_buffer.Grow(1))
					return false;
				r_p_out_end = r_t_out_buffer.p_Data() + r_t_out_buffer.n_Size();
				r_p_output = r_p_out_end - 1;
			}
#if 1 // fill from left
			*r_p_output ++ = r_n_byte << r_n_bit_num_left;
#else // fill from right
			*r_p_output ++ = r_n_byte;
#endif
			r_n_bit_num_left = 8;
			r_n_byte = 0; // !!
		}
		return true;
	}
};

/*
 *								=== ~CBitCoder ===
 */

/*
 *								=== CBitDecoder ===
 */

inline CBitDecoder::CBitDecoder(const TBuffer &r_t_in_buffer)
	:m_n_byte(0), m_n_bit_num(0), m_p_input(r_t_in_buffer.p_Data()),
	m_p_end(r_t_in_buffer.p_Data() + r_t_in_buffer.n_Size())
{}

inline CBitDecoder::CBitDecoder(const uint8_t *p_src, const uint8_t *p_end)
	:m_n_byte(0), m_n_bit_num(0), m_p_input(p_src), m_p_end(p_end)
{}

/*template <class CSymbolType>
inline bool CBitDecoder::Decode_Symbol(CSymbolType &r_n_value, int n_symbol_bit_num)
{
	return Decode_Symbol(r_n_value, n_symbol_bit_num,
		m_n_byte, m_n_bit_num, m_p_input, m_p_end);
}*/

inline bool CBitDecoder::b_Finished() const
{
	return m_p_input == m_p_end;
}

inline const uint8_t *CBitDecoder::p_Pointer() const
{
	return m_p_input;
}

/*template <class CSymbolType>
inline bool CBitDecoder::Decode_Symbol(CSymbolType &r_n_value, int n_symbol_bit_num,
	uint8_t &r_n_byte, int &r_n_bit_num, const uint8_t *&r_p_input, const uint8_t *p_end)
{
	r_n_value = 0;
	while(n_symbol_bit_num) {
		if(!r_n_bit_num) {
			if(r_p_input >= p_end)
				return false;
			r_n_byte = *r_p_input;
			++ r_p_input;
			r_n_bit_num = 8;
		}
		int n_read = min(r_n_bit_num, n_symbol_bit_num);
		r_n_value <<= n_read;
#if 1
		r_n_value |= r_n_byte >> (8 - n_read);//r_n_byte & ((1 << n_read) - 1); // read from left
		r_n_byte <<= n_read;//r_n_byte >>= n_read;
#else
		r_n_value |= r_n_byte & ((1 << n_read) - 1); // read from right
		r_n_byte >>= n_read;
#endif
		n_symbol_bit_num -= n_read;
		r_n_bit_num -= n_read;
	}
	return true;
}*/

/*
 *								=== ~CBitDecoder ===
 */

/*
 *								=== CBitEncoder ===
 */

inline CBitEncoder::CBitEncoder(TBuffer &r_t_out_buffer)
	:m_n_byte(0), m_n_bit_num_left(8),
	m_p_output(r_t_out_buffer.p_Data() + r_t_out_buffer.n_Size()),
	m_p_out_end(r_t_out_buffer.p_Data() + r_t_out_buffer.n_Size()),
	m_r_t_out_buffer(r_t_out_buffer)
{}

inline CBitEncoder::CBitEncoder(uint8_t *p_output, uint8_t *p_out_end, TBuffer &r_t_out_buffer)
	:m_n_byte(0), m_n_bit_num_left(8), m_p_output(p_output), m_p_out_end(p_out_end),
	m_r_t_out_buffer(r_t_out_buffer)
{
	_ASSERTE(p_out_end == r_t_out_buffer.p_Data() + r_t_out_buffer.n_Size()); // otherwise reallocation is going to happen sooner than needed, or never
	_ASSERTE(p_output >= r_t_out_buffer.p_Data() && p_output <= p_out_end); // make sure it points to the same buffer and is before end
}

inline CBitEncoder::CBitEncoder(uint8_t *p_output, TBuffer &r_t_out_buffer)
	:m_n_byte(0), m_n_bit_num_left(8), m_p_output(p_output),
	m_p_out_end(r_t_out_buffer.p_Data() + r_t_out_buffer.n_Size()),
	m_r_t_out_buffer(r_t_out_buffer)
{
	_ASSERTE(p_output >= r_t_out_buffer.p_Data() && p_output <= m_p_out_end); // make sure it points to the same buffer and is before end
}

inline CBitEncoder::CBitEncoder(uint8_t n_byte, int n_bit_num_left,
	uint8_t *p_output, uint8_t *p_out_end, TBuffer &r_t_out_buffer)
	:m_n_byte(n_byte), m_n_bit_num_left(n_bit_num_left), m_p_output(p_output),
	m_p_out_end(p_out_end), m_r_t_out_buffer(r_t_out_buffer)
{
	_ASSERTE(n_bit_num_left >= 0 && n_bit_num_left < 8);
	_ASSERTE(p_out_end == r_t_out_buffer.p_Data() + r_t_out_buffer.n_Size()); // otherwise reallocation is going to happen sooner than needed, or never
	_ASSERTE(p_output >= r_t_out_buffer.p_Data() && p_output <= p_out_end); // make sure it points to the same buffer and is before end
}

/*template <class CSymbolType>
inline bool CBitEncoder::Encode_Symbol(CSymbolType n_value, int n_bit_num)
{
	return Encode_Symbol(n_value, n_bit_num, m_n_byte,
		m_n_bit_num_left, m_p_output, m_p_out_end, m_r_t_out_buffer);
}

template <class CSymbolType>
inline bool CBitEncoder::Encode_Symbol(CSymbolType n_value)
{
	int n_bit_num = n_Bit_Width(n_value); // calculate automatically
	return Encode_Symbol(n_value, n_bit_num, m_n_byte,
		m_n_bit_num_left, m_p_output, m_p_out_end, m_r_t_out_buffer);
}*/

inline bool CBitEncoder::Flush()
{
	return CBitCoder<uint8_t>::Flush(m_n_byte, m_n_bit_num_left,
		m_p_output, m_p_out_end, m_r_t_out_buffer);
}

inline uint8_t *CBitEncoder::p_Pointer()
{
	return m_p_output;
}

inline const uint8_t *CBitEncoder::p_Pointer() const
{
	return m_p_output;
}

/*template <class CSymbolType>
inline bool CBitEncoder::Encode_Symbol(CSymbolType n_value, int n_bit_num,
	uint8_t &r_n_byte, int &r_n_bit_num_left, uint8_t *&r_p_output,
	uint8_t *&r_p_out_end, TBuffer &r_t_out_buffer)
{
	for(int i = n_bit_num; i;) {
		int n_write = min(r_n_bit_num_left, i);
#if 1 // note that this order ever so slightly decreases compression ratio of SIF-based codecs but must be used for compatibility with older Huffman-encoded data (and Jpeg)
		r_n_byte <<= n_write; // fill from left
		r_n_byte |= ((n_value >> (i -= n_write)) & ((1 << n_write) - 1));// << (8 - r_n_bit_num_left);
#else
		r_n_byte |= ((n_value >> (i -= n_write)) & ((1 << n_write) - 1)) <<
			(8 - r_n_bit_num_left); // fill from right
#endif
		if(!(r_n_bit_num_left -= n_write)) {
			_ASSERTE(r_p_output <= r_p_out_end);
			if(r_p_output == r_p_out_end) {
				if(!r_t_out_buffer.Grow(1))
					return false;
				r_p_out_end = r_t_out_buffer.p_Data() + r_t_out_buffer.n_Size();
				r_p_output = r_p_out_end - 1;
			}
			*r_p_output ++ = r_n_byte;
			r_n_bit_num_left = 8;
			r_n_byte = 0; // !!
		}
	}
	return true;
}

inline bool CBitEncoder::Flush(uint8_t &r_n_byte, int &r_n_bit_num_left,
	uint8_t *&r_p_output, uint8_t *&r_p_out_end, TBuffer &r_t_out_buffer)
{
	if(r_n_bit_num_left < 8) {
		_ASSERTE(r_p_output <= r_p_out_end);
		if(r_p_output == r_p_out_end) {
			if(!r_t_out_buffer.Grow(1))
				return false;
			r_p_out_end = r_t_out_buffer.p_Data() + r_t_out_buffer.n_Size();
			r_p_output = r_p_out_end - 1;
		}
#if 1
		*r_p_output ++ = r_n_byte << r_n_bit_num_left; // fill from left
#else
		*r_p_output ++ = r_n_byte; // fill from right
#endif
		r_n_bit_num_left = 8;
		r_n_byte = 0; // !!
	}
	return true;
}*/

/*
 *								=== ~CBitEncoder ===
 */

/*
 *								=== CEmitVarLength ===
 */

inline CEmitVarLength::CEmitVarLength(TBuffer &r_t_out_buffer)
	:m_r_dest(r_t_out_buffer)
{}

inline bool CEmitVarLength::operator ()(uint32_t n_value)
{
	TBuffer &r_t_out_buffer = m_r_dest; // rename
	if(n_value <= 0x7f) { // 7 bits
		if(!r_t_out_buffer.Grow(1))
			return false;
		uint8_t *p_dest = r_t_out_buffer.p_Data() + r_t_out_buffer.n_Size() - 1;
		// resize output

		p_dest[0] = n_value;
		// write values
	} else if(n_value <= 0x3fff) { // 14 bits
		if(!r_t_out_buffer.Grow(2))
			return false;
		uint8_t *p_dest = r_t_out_buffer.p_Data() + r_t_out_buffer.n_Size() - 2;
		// resize output

		p_dest[0] = ((n_value >> 7) & 0x7f) | 0x80;
		p_dest[1] = n_value & 0x7f;
		// write values
	} else if(n_value <= 0x1fffff) { // 21 bits
		if(!r_t_out_buffer.Grow(3))
			return false;
		uint8_t *p_dest = r_t_out_buffer.p_Data() + r_t_out_buffer.n_Size() - 3;
		// resize output

		p_dest[0] = ((n_value >> 14) & 0x7f) | 0x80;
		p_dest[1] = ((n_value >> 7) & 0x7f) | 0x80;
		p_dest[2] = n_value & 0x7f;
		// write values
	} else if(n_value <= 0xfffffff) { // 28 bits
		if(!r_t_out_buffer.Grow(4))
			return false;
		uint8_t *p_dest = r_t_out_buffer.p_Data() + r_t_out_buffer.n_Size() - 4;
		// resize output

		p_dest[0] = ((n_value >> 21) & 0x7f) | 0x80;
		p_dest[1] = ((n_value >> 14) & 0x7f) | 0x80;
		p_dest[2] = ((n_value >> 7) & 0x7f) | 0x80;
		p_dest[3] = n_value & 0x7f;
		// write values
	} else { // 32 bits
		if(!r_t_out_buffer.Grow(5))
			return false;
		uint8_t *p_dest = r_t_out_buffer.p_Data() + r_t_out_buffer.n_Size() - 5;
		// resize output

		p_dest[0] = ((n_value >> 28) & 0x7f) | 0x80;
		p_dest[1] = ((n_value >> 21) & 0x7f) | 0x80;
		p_dest[2] = ((n_value >> 14) & 0x7f) | 0x80;
		p_dest[3] = ((n_value >> 7) & 0x7f) | 0x80;
		p_dest[4] = n_value & 0x7f;
		// write values
	}

	return true;
}

/*
 *								=== ~CEmitVarLength ===
 */

/*
 *								=== CDecodeVarLength ===
 */

inline CDecodeVarLength::CDecodeVarLength(const uint8_t *p_src, const uint8_t *p_end)
	:m_p_src(p_src), m_p_end(p_end)
{
	_ASSERTE(p_src < p_end);
}

inline bool CDecodeVarLength::operator ()(uint32_t &r_n_value)
{
	uint32_t n_value = 0;
	for(int n_char_size = 0;; ++ n_char_size) {
		if(m_p_src == m_p_end || n_char_size > 5)
			return false;
		uint8_t n_char = *m_p_src ++;
		n_value |= n_char & 0x7f;
		if(!(n_char & 0x80))
			break;
		n_value <<= 7;
	}
	// read escaped value

	r_n_value = n_value;

	return true;
}

inline const uint8_t *CDecodeVarLength::p_Pointer() const
{
	return m_p_src;
}

inline bool CDecodeVarLength::b_Finished() const
{
	return m_p_src == m_p_end;
}

/*
 *								=== ~CDecodeVarLength ===
 */

/*
 *								=== CEmitInt ===
 */

template <class CIntType>
inline CEmitInt<CIntType>::CEmitInt(TBuffer &r_t_out_buffer)
	:m_r_dest(r_t_out_buffer)
{}

template <class CIntType>
inline bool CEmitInt<CIntType>::operator ()(CIntType n_value)
{
	TBuffer &r_t_out_buffer = m_r_dest; // rename
	if(!r_t_out_buffer.Grow(sizeof(CIntType)))
		return false;
	CIntType *p_dest = (CIntType*)(r_t_out_buffer.p_Data() +
		r_t_out_buffer.n_Size() - sizeof(CIntType));
	// resize output

	*p_dest = n_value;
	// write values

	return true;
}

/*
 *								=== ~CEmitInt ===
 */

/*
 *								=== CDecodeInt ===
 */

template <class CIntType>
inline CDecodeInt<CIntType>::CDecodeInt(const uint8_t *p_src, const uint8_t *p_end)
	:m_p_src(p_src), m_p_end(p_end)
{
	_ASSERTE(p_src < p_end);
}

template <class CIntType>
inline bool CDecodeInt<CIntType>::operator ()(CIntType &r_n_value)
{
	if(m_p_src + sizeof(CIntType) > m_p_end)
		return false;
	r_n_value = *(CIntType*)m_p_src;
	m_p_src += sizeof(CIntType);

	return true;
}

template <class CIntType>
inline const uint8_t *CDecodeInt<CIntType>::p_Pointer() const
{
	return m_p_src;
}

template <class CIntType>
inline bool CDecodeInt<CIntType>::b_Finished() const
{
	return m_p_src == m_p_end;
}

/*
 *								=== ~CDecodeInt ===
 */

/*
 *								=== CHuffmanTree ===
 */

template <class CSymbol, const int n_max_code_bit_num>
inline CHuffmanTree<CSymbol, n_max_code_bit_num>::CHuffmanTree(unsigned int n_max_code_bit_num)
	:m_n_max_code_bit_num(n_max_code_bit_num)
{
	_ASSERTE(n_max_code_bit_num > 0 && n_max_code_bit_num <= max_CodeBitNum);
}

template <class CSymbol, const int n_max_code_bit_num>
bool CHuffmanTree<CSymbol, n_max_code_bit_num>::CalculateSymbolFrequencies(const _TySymbol *p_begin,
	const _TySymbol *p_end)
{
	if(sizeof(_TySymbol) == 1) {
		_ASSERTE(sizeof(size_t) >= sizeof(_TySymbol));
		_ASSERTE(SIZE_MAX >= ((uint64_t(1) << (sizeof(_TySymbol) * 8 - 1)) |
			((uint64_t(1) << (sizeof(_TySymbol) * 8 - 1)) - 1)));
		// make sure n_max_symbol_value will not overflow

		size_t n_max_symbol_value_1 = n_Mask(sizeof(_TySymbol) * 8); // would warn about overflow in shift otherwise
		if(!stl_ut::Resize_To_N(m_freq_list, n_max_symbol_value_1 + 1, TFrequency(_TySymbol(0))))
			return false;
		for(size_t i = 1; i < n_max_symbol_value_1 + 1; ++ i)
			m_freq_list[i].n_symbol = _TySymbol(i);
		// alloc/clear frequency list

		_ASSERTE(p_begin <= p_end);
		for(; p_begin != p_end; ++ p_begin) {
			_ASSERTE(m_freq_list[*p_begin].n_frequency < SIZE_MAX);
			// owerflow shouldn't occur since sum of all frequencies
			// is buffer length which is size_t as well

			++ m_freq_list[*p_begin].n_frequency;
			// calculate frequency
		}
		// accumulate symbol frequencies
	} else {
		try {
			std::map<_TySymbol, size_t> symbol_map;

			_ASSERTE(p_begin <= p_end);
			for(; p_begin != p_end; ++ p_begin) {
				_ASSERTE(symbol_map[*p_begin] < SIZE_MAX);
				// owerflow shouldn't occur since sum of all frequencies
				// is buffer length which is size_t as well

				++ symbol_map[*p_begin];
				// calculate frequency
			}
			// accumulate symbol frequencies

			m_freq_list.clear(); // avoid copying if already allocated
			m_freq_list.resize(symbol_map.size(), TFrequency(_TySymbol(0))); // we're in try-catch
			std::vector<TFrequency>::iterator p_freq_it = m_freq_list.begin();
			for(std::map<_TySymbol, size_t>::const_iterator p_sym_it = symbol_map.begin(),
			   p_end_it = symbol_map.end(); p_sym_it != p_end_it; ++ p_sym_it, ++ p_freq_it)
				*p_freq_it = TFrequency((*p_sym_it).first, (*p_sym_it).second);
			// copy the map to std::vector
		} catch(std::bad_alloc&) { // using std::map, need to catch
			return false;
		}
	}
	// handle large tables

	return true;
}

template <class CSymbol, const int n_max_code_bit_num>
bool CHuffmanTree<CSymbol, n_max_code_bit_num>::Use_SymbolFrequencies(std::vector<TFrequency>
	&r_freq_list, bool b_allow_swap)
{
	if(b_allow_swap)
		m_freq_list.swap(r_freq_list);
	else {
		try {
			m_freq_list = r_freq_list;
		} catch(std::bad_alloc&) {
			return false;
		}
	}
	return true;
}

template <class CSymbol, const int n_max_code_bit_num>
void CHuffmanTree<CSymbol, n_max_code_bit_num>::Get_SymbolTable(_TySymbol *p_dest,
	size_t UNUSED(n_space_bytes)) const
{
	_ASSERTE(n_space_bytes >= n_SymbolTable_Size() * sizeof(_TySymbol));

	for(size_t i = 0, n = m_freq_list.size(); i < n; ++ i, ++ p_dest)
		*p_dest = m_freq_list[i].n_symbol;
}

template <class CSymbol, const int n_max_code_bit_num>
const typename CHuffmanTree<CSymbol, n_max_code_bit_num>::TFrequency
	&CHuffmanTree<CSymbol, n_max_code_bit_num>::r_LookupSymbol(const _TySymbol t_sym) const
{
#if defined(_MSC_VER) && !defined(__MWERKS__) && _MSC_VER > 1200
	_ASSERTE(std::lower_bound(m_freq_list.begin(),
		m_freq_list.end(), t_sym, b_LowerSymbol) != m_freq_list.end()); // make sure the symbol exists
#endif // _MSC_VER && !__MWERKS__ && _MSC_VER > 1200
	/*return *std::lower_bound(m_freq_list.begin(), m_freq_list.end(), t_sym, b_LowerSymbol);*/
	return r_LookupSymbol(t_sym, m_freq_list); // otherwise get duplicate comdat in MSVC 6.0
}

template <class CSymbol, const int _n_max_code_bit_num>
bool CHuffmanTree<CSymbol, _n_max_code_bit_num>::Assign_CodeWords(std::vector<TFrequency> &r_freq_list,
	uint32_t *p_code_num, unsigned int n_max_code_bit_num)
{
	std::sort(r_freq_list.begin(), r_freq_list.end());
	// sort by symbol frequencies

	r_freq_list.erase(std::find_if(r_freq_list.begin(),
		r_freq_list.end(), FindZeroFreq), r_freq_list.end());
	// erase any symbols with zero frequencies

	if(r_freq_list.size() >= n_Mask(uint64_t(n_max_code_bit_num)))
		return false;
	// too many symbols

	if(r_freq_list.empty()) {
		memset(p_code_num, 0, sizeof(uint32_t) * n_max_code_bit_num);
		return true;
	}
	// no symbols

	std::vector<TNode> node_list;
	if(!stl_ut::Resize_To_N(node_list, r_freq_list.size() * 2 - 1))
		return false;
	// create list of all nodes in the tree (including non-leaf nodes)

	for(size_t i = 0, n = r_freq_list.size(); i < n; ++ i)
		node_list[i] = TNode(r_freq_list[i].n_frequency);
	// add nodes holding original symbol frequencies

	_ASSERTE(node_list.size() == r_freq_list.size() * 2 - 1);
	// add new nodes which will be used as non-leaf nodes

	std::vector<const TNode*> q1, q2;
	// have two queues

	if(!stl_ut::Reserve_N(q1, r_freq_list.size()) ||
	   !stl_ut::Reserve_N(q2, /*(*/r_freq_list.size()/* + 1) / 2*/)) // needs more space for tree traversal
		return false;
	// make sure there's plenty of space in both of them

	for(bool b_restart = false;;) {
		_ASSERTE(q1.empty() && q1.capacity() >= r_freq_list.size());
		q1.resize(r_freq_list.size());
		std::transform(node_list.begin(), node_list.begin() +
			r_freq_list.size(), q1.begin(), CMakePtr());
		// add all original nodes to the first queue

		size_t n_free_node = r_freq_list.size();
		const size_t n_end_node = node_list.size();
		// index to the first free node and one node past the end of the list

		_ASSERTE(q2.empty());
		if(q1.size() == 1) // handle only a single symbol "tree"
			q1.swap(q2); // the result is expected in q2
		else {
			while(q1.size() + q2.size() > 1) {
				_ASSERTE(CFindUnsorted::b_IsSorted(q1));
				// make sure q1 is sorted

				const TNode *p_node[2];
				for(int i = 0; i < 2; ++ i) {
					if(q2.empty() || (!q1.empty() && q1.back()->n_frequency < q2.back()->n_frequency)) {
						p_node[i] = q1.back();
						q1.erase(q1.end() - 1);
						// node in the first queue has smaller freq or is the only left
					} else {
						p_node[i] = q2.back();
						q2.erase(q2.end() - 1);
						// node in second queue has smaller freq or is the only left
					}
				}
				// choose two nodes to merge

				_ASSERTE(n_free_node != n_end_node);
				TNode *p_new = &node_list[n_free_node ++];
				// get some unused node

				_ASSERTE(p_node[0]->n_frequency <= SIZE_MAX - p_node[1]->n_frequency);
				*p_new = TNode(p_node[0]->n_frequency + p_node[1]->n_frequency, p_node[0], p_node[1]);
				// assign frequency and children

				_ASSERTE(q2.capacity() > q2.size());
				q2.insert(q2.begin(), p_new);
				_ASSERTE(q2.size() <= (r_freq_list.size() + 1) / 2);
				// put it to second queue

				//std::sort(q2.begin(), q2.end(), TNode::CompareFreq); // not needed
				_ASSERTE(CFindUnsorted::b_IsSorted(q2));
				// make sure the nodes in q2 are sorted
			}
		}
		// create Huffman tree (O(n) method, described in wiki)

		_ASSERTE(n_free_node == n_end_node); // used all the nodes in the tree
		_ASSERTE(q1.empty() && q2.size() == 1); // root node is in q2

		memset(p_code_num, 0, n_max_code_bit_num * sizeof(uint32_t));
		// clear the code counts array

		if(q2.front()->b_Leaf())
			p_code_num[0] = 1; // single leaf node (extreme case)
		else {
			for(int n_level = -1; !q1.empty() || !q2.empty(); ++ n_level) {
				_ASSERTE(n_max_code_bit_num < INT_MAX);
				if(n_level >= int(n_max_code_bit_num)) {
					b_restart = true;
					break;
				}
				// compare only if there is more nodes

				if(n_level & 1) {
					_ASSERTE(!q2.empty() && q1.empty());
					std::for_each(q2.begin(), q2.end(), CGetChildren(q1, p_code_num[n_level]));
					q2.clear();
				} else {
					_ASSERTE(!q1.empty() && q2.empty());
					std::for_each(q1.begin(), q1.end(), CGetChildren(q2, p_code_num[n_level]));
					q1.clear();
				}
				// alternately move whole tree levels between q1 and q2
			}
		}
		// determine number of symbols with distinct lengths (note this kind of
		// non-recursive traversal destroys the tree, but needs no additional memory)

		if(!b_restart) {
			_TyCodeWord n_code_word = 0;
			for(size_t i = 0, j = 0, n_symbol_num = r_freq_list.size();; ++ j) {
				unsigned int n_length = j + 1;
				_ASSERTE(n_length <= n_max_code_bit_num);
				_ASSERTE(p_code_num[j] < UINT32_MAX); // otherwise the below loop gets infinite
				for(uint32_t k = 0; k < p_code_num[j]; ++ k, ++ i) {
					_ASSERTE(n_code_word < UINT32_MAX && !(n_code_word >> n_length));
					TFrequency &r_freq = r_freq_list[i];
					r_freq.n_code_word = n_code_word ++;
					r_freq.n_code_length = n_length;
				}
				// assign code words

				if(!(n_symbol_num -= p_code_num[j]))
					break;
				// we'we processed this much symbols

				_ASSERTE(n_code_word == ((n_code_word << 1) >> 1));
				n_code_word <<= 1;
				// shift code word (increase length)
			}
			// assign code words, fill-in code lengths

			break;
		} else {
			//printf("Huffman restarts ...\r");
			// we need to modify codes so it would get built

			for(size_t i = 0, n = r_freq_list.size(); i < n; ++ i) {
				_TyFrequency &r_n_freq = node_list[i].n_frequency;
				r_n_freq = (r_n_freq / 2) | 1;
			}
			// change symbol probablility distributionx

			std::sort(node_list.begin(), node_list.begin() + r_freq_list.size());
			// sort leaf nodes by new frequencies

			q1.clear();
			q2.clear();
			b_restart = false;
			// clear for the next pass
		}
	}

	return true;
}

template <class CSymbol, const int n_max_code_bit_num>
inline bool CHuffmanTree<CSymbol, n_max_code_bit_num>::Assign_CodeWords()
{
	return Assign_CodeWords(m_freq_list, m_p_code_num, m_n_max_code_bit_num);
}

template <class CSymbol, const int n_max_code_bit_num>
inline unsigned int CHuffmanTree<CSymbol, n_max_code_bit_num>::n_CodeTable_Size() const
{
	return m_n_max_code_bit_num;
}

template <class CSymbol, const int n_max_code_bit_num>
inline const uint32_t *CHuffmanTree<CSymbol, n_max_code_bit_num>::p_CodeTable() const
{
	return m_p_code_num;
}

template <class CSymbol, const int n_max_code_bit_num>
inline size_t CHuffmanTree<CSymbol, n_max_code_bit_num>::n_SymbolTable_Size() const
{
	return m_freq_list.size();
}

template <class CSymbol, const int n_max_code_bit_num>
inline void CHuffmanTree<CSymbol, n_max_code_bit_num>::SortFrequencies_BySymbol()
{
	SortFrequencies_BySymbol(m_freq_list);
}

template <class CSymbol, const int n_max_code_bit_num>
inline void CHuffmanTree<CSymbol, n_max_code_bit_num>::SortFrequencies_BySymbol(std::vector<TFrequency> &freq_list)
{
	std::sort(freq_list.begin(), freq_list.end(), CompareSymbol);
}

template <class CSymbol, const int n_max_code_bit_num>
inline const typename CHuffmanTree<CSymbol, n_max_code_bit_num>::TFrequency &CHuffmanTree<CSymbol,
	n_max_code_bit_num>::r_LookupSymbol(const _TySymbol t_sym, const std::vector<TFrequency> &freq_list)
{
	return *std::lower_bound(freq_list.begin(), freq_list.end(), t_sym, b_LowerSymbol); // t_odo - see if this compiles with msvc 60
}

template <class CSymbol, const int n_max_code_bit_num>
bool CHuffmanTree<CSymbol, n_max_code_bit_num>::b_LowerSymbol(_TySymbol n_sym, const TFrequency &r_freq) // symbol equality comparison for r_LookupSymbol()
{
	return n_sym < r_freq.n_symbol;
}

template <class CSymbol, const int n_max_code_bit_num>
inline bool CHuffmanTree<CSymbol, n_max_code_bit_num>::FindZeroFreq(const TFrequency &r_t_freq)
{
	return !r_t_freq.n_frequency;
}

template <class CSymbol, const int n_max_code_bit_num>
inline bool CHuffmanTree<CSymbol, n_max_code_bit_num>::CompareSymbol(const TFrequency &r_freq_a,
	const TFrequency &r_freq_b)
{
	return r_freq_a.n_symbol < r_freq_b.n_symbol;
}

/*
 *								=== ~CHuffmanTree ===
 */

/*
 *								=== CHuffmanUtil_DecodeTable ===
 */

template <class CSymbolType, int n_max_tree_size>
inline CHuffmanUtil_DecodeTable<CSymbolType, n_max_tree_size>::CHuffmanUtil_DecodeTable(const uint8_t *p_input,
	const uint8_t *p_end, unsigned int n_tree_size /*= n_max_tree_size*/)
	:m_n_bit_num(0), m_p_input(p_input), m_p_end(p_end), m_p_symbol(0),
	m_n_symbol_num(0), m_p_code_num(0), m_n_code_bit_num(n_tree_size)
{
	_ASSERTE(p_input <= p_end);
	_ASSERTE(n_tree_size <= n_max_tree_size);
}

template <class CSymbolType, int n_max_tree_size>
bool CHuffmanUtil_DecodeTable<CSymbolType, n_max_tree_size>::Initialize()
{
	_ASSERTE(!m_p_symbol); // make sure this was not called before

	if(m_p_input + sizeof(int32_t) * m_n_code_bit_num > m_p_end)
		return false;
	// least possible size of input

	const uint32_t *p_code_num = (const uint32_t*)m_p_input; // numbers of codes of different lengths
	m_p_code_num = p_code_num;
	m_p_input += sizeof(int32_t) * m_n_code_bit_num;
	// get numbers of codes

	size_t n_symbol_num = 0;
	for(uint32_t i = 0, n_cw = 0; i < m_n_code_bit_num; ++ i, n_cw <<= 1) {
		//m_p_min_code[i] = n_cw;
		m_p_table_off[i] = n_symbol_num - n_cw;
		m_p_max_code[i] = (n_cw += p_code_num[i]);
		n_symbol_num += p_code_num[i];
	}
	m_p_max_code[m_n_code_bit_num] = UINT32_MAX; // serves as a sentinell
	m_n_symbol_num = n_symbol_num;
	// calculate number of symbols, table indices and min / max code values

	if(m_p_input + n_symbol_num * sizeof(CSymbolType) > m_p_end)
		return false;
	// check size of input

	m_p_symbol = (const CSymbolType*)m_p_input;
	m_p_input += n_symbol_num * sizeof(CSymbolType);
	// copy symbols

	return true;
}

template <class CSymbolType, int n_max_tree_size>
inline int CHuffmanUtil_DecodeTable<CSymbolType, n_max_tree_size>::n_Code_Bit_Num() const
{
	return m_n_code_bit_num;
}

template <class CSymbolType, int n_max_tree_size>
inline uint32_t CHuffmanUtil_DecodeTable<CSymbolType, n_max_tree_size>::n_Max_Code(int n_code_bit_num) const
{
	_ASSERTE(n_code_bit_num >= 0 && unsigned(n_code_bit_num) < m_n_code_bit_num);
	return m_p_max_code[n_code_bit_num];
}

template <class CSymbolType, int n_max_tree_size>
inline uint32_t CHuffmanUtil_DecodeTable<CSymbolType, n_max_tree_size>::n_Min_Code(int n_code_bit_num) const
{
	_ASSERTE(n_code_bit_num >= 0 && unsigned(n_code_bit_num) < m_n_code_bit_num);
	return m_p_max_code[n_code_bit_num] - m_p_code_num[n_code_bit_num];
}

template <class CSymbolType, int n_max_tree_size>
CSymbolType CHuffmanUtil_DecodeTable<CSymbolType, n_max_tree_size>::n_Min_Symbol() const
{
	_ASSERTE(m_p_symbol); // make sure Initialize() was called
	if(!m_n_symbol_num)
		return CSymbolType(0);
	CSymbolType n_min = m_p_symbol[0];
	for(size_t i = 1; i < m_n_symbol_num; ++ i) {
		if(n_min > m_p_symbol[i])
			n_min = m_p_symbol[i];
	}
	return n_min;
}

template <class CSymbolType, int n_max_tree_size>
CSymbolType CHuffmanUtil_DecodeTable<CSymbolType, n_max_tree_size>::n_Max_Symbol() const
{
	_ASSERTE(m_p_symbol); // make sure Initialize() was called
	if(!m_n_symbol_num)
		return CSymbolType(0);
	CSymbolType n_max = m_p_symbol[0];
	for(size_t i = 1; i < m_n_symbol_num; ++ i) {
		if(n_max < m_p_symbol[i])
			n_max = m_p_symbol[i];
	}
	return n_max;
}

template <class CSymbolType, int n_max_tree_size>
inline bool CHuffmanUtil_DecodeTable<CSymbolType, n_max_tree_size>::Decode_Symbol(CSymbolType &r_n_value,
	uint8_t &r_n_byte, int &r_n_bit_num, const uint8_t *&r_p_input, const uint8_t *p_end) const
{
	_ASSERTE(m_p_symbol); // make sure Initialize() was called

	uint32_t n_code = 0;
	for(unsigned int i = 0;; ++ i) {
		if(!r_n_bit_num) {
			if(r_p_input == p_end)
				return false;
			// not enough data to decompress

			r_n_byte = *r_p_input;
			++ r_p_input;
			r_n_bit_num = 7;
		} else
			-- r_n_bit_num;
		n_code = (n_code << 1) | (r_n_byte >> 7);
		r_n_byte <<= 1;
		// get a single bit from input stream, add bit to code

		if(n_code < m_p_max_code[i]) {
			if(i == m_n_code_bit_num)
				return false; // sentinell was decoded, the maximum length of symbol reached

			_ASSERTE(n_code >= n_Min_Code(i)); // always
			r_n_value = m_p_symbol[n_code + m_p_table_off[i]]; // add "- m_p_min_code[i]" to use with dumb version
			return true;
		}
		// see if it's valid code for this bit length

		_ASSERTE(i < m_n_code_bit_num);
		// the last m_p_max_code is UINT32_MAX, will always stop
		// (the maximum permitted Huffman tree depth is 31, but mostly only 16)
	}
}

template <class CSymbolType, int n_max_tree_size>
inline bool CHuffmanUtil_DecodeTable<CSymbolType, n_max_tree_size>::Decode_Symbol(CSymbolType &r_n_value)
{
	return Decode_Symbol(r_n_value, m_n_byte, m_n_bit_num, m_p_input, m_p_end);
}

template <class CSymbolType, int n_max_tree_size>
inline const uint8_t *CHuffmanUtil_DecodeTable<CSymbolType, n_max_tree_size>::p_Pointer() const
{
	return m_p_input;
}

template <class CSymbolType, int n_max_tree_size>
inline bool CHuffmanUtil_DecodeTable<CSymbolType, n_max_tree_size>::b_Finished() const
{
	return m_p_input == m_p_end;
}

/*
 *								=== ~CHuffmanUtil_DecodeTable ===
 */

/*
 *								=== CHuffmanUtil_EncodeTable ===
 */

template <class CSymbolType, int n_max_tree_size>
inline CHuffmanUtil_EncodeTable<CSymbolType, n_max_tree_size>::CHuffmanUtil_EncodeTable(const CHuff &r_huffman_tree)
	:m_r_huffman_tree(r_huffman_tree)
{}

template <class CSymbolType, int n_max_tree_size>
inline size_t CHuffmanUtil_EncodeTable<CSymbolType, n_max_tree_size>::n_Table_Size() const
{
	return n_Table_Size(m_r_huffman_tree);
}

template <class CSymbolType, int n_max_tree_size>
inline bool CHuffmanUtil_EncodeTable<CSymbolType, n_max_tree_size>::Write_Table(TBuffer &r_t_buffer) const
{
	uint8_t *p_output = r_t_buffer.p_Data() + r_t_buffer.n_Size();
	return Write_Table(p_output, r_t_buffer, m_r_huffman_tree);
}

template <class CSymbolType, int n_max_tree_size>
inline bool CHuffmanUtil_EncodeTable<CSymbolType, n_max_tree_size>::Write_Table(uint8_t *&r_p_output,
	TBuffer &r_t_buffer) const
{
	return Write_Table(r_p_output, r_t_buffer, m_r_huffman_tree);
}

template <class CSymbolType, int n_max_tree_size>
size_t CHuffmanUtil_EncodeTable<CSymbolType, n_max_tree_size>::n_Table_Size(const CHuff &r_huffman_tree)
{
	size_t n_code_table_size = r_huffman_tree.n_CodeTable_Size() * sizeof(uint32_t);
	size_t n_symbol_table_size = r_huffman_tree.n_SymbolTable_Size() * sizeof(CSymbolType);
	return n_code_table_size + n_symbol_table_size;
}

template <class CSymbolType, int n_max_tree_size>
void CHuffmanUtil_EncodeTable<CSymbolType, n_max_tree_size>::Write_Table(uint8_t *&r_p_output,
	const uint8_t *p_end, const CHuff &r_huffman_tree)
{
	size_t n_code_table_size = r_huffman_tree.n_CodeTable_Size() * sizeof(uint32_t);
	size_t n_symbol_table_size = r_huffman_tree.n_SymbolTable_Size() * sizeof(CSymbolType);
	size_t n_header_size = n_code_table_size + n_symbol_table_size;
	// we're going to need to store the Huffman tree
	// (numbers of codes with distinct lengths and symbols)

	_ASSERTE(r_p_output + n_header_size <= p_end);
	// make sure there is space

	memcpy(r_p_output, r_huffman_tree.p_CodeTable(), n_code_table_size);
	r_p_output += n_code_table_size;
	r_huffman_tree.Get_SymbolTable((CSymbolType*)r_p_output, n_symbol_table_size);
	r_p_output += n_symbol_table_size;
	_ASSERTE(r_p_output <= p_end);
	// write numbers of codes of different lengths and associated symbols
}

template <class CSymbolType, int n_max_tree_size>
bool CHuffmanUtil_EncodeTable<CSymbolType, n_max_tree_size>::Write_Table(uint8_t *&r_p_output, TBuffer &r_t_buffer, const CHuff &r_huffman_tree)
{
	_ASSERTE(r_p_output >= r_t_buffer.p_Data() && r_p_output <=
		r_t_buffer.p_Data() + r_t_buffer.n_Size());
	size_t n_capacity = r_t_buffer.n_Size() - (r_p_output - r_t_buffer.p_Data());
	size_t n_header_size = n_Table_Size(r_huffman_tree);

	if(n_capacity < n_header_size) {
		if(!r_t_buffer.Grow(n_header_size - n_capacity))
			return false;
		r_p_output = r_t_buffer.p_Data() + (r_t_buffer.n_Size() - n_header_size);
	}
	// if there is not enough space, reallocate the buffer

	Write_Table(r_p_output, r_t_buffer.p_Data() + r_t_buffer.n_Size(), r_huffman_tree);
	// write the table there, r_p_output is modified to point after the table

	return true;
}

/*
 *								=== ~CHuffmanUtil_EncodeTable ===
 */

/*
 *								=== CHuffmanUtil ===
 */

template <class CSymbolType, int n_max_tree_size>
const uint8_t *CHuffmanUtil<CSymbolType, n_max_tree_size>::p_Decode(const TBuffer &r_t_in_buffer,
	CSymbolType *p_max_symbol, TBuffer &r_t_out_buffer,
	unsigned int n_max_code_bit_num = n_max_tree_size) // returns pointer to where it stopped reading or null on failure
{
	if(r_t_in_buffer.n_Size() < sizeof(int32_t) * n_max_code_bit_num + sizeof(uint32_t))
		return 0;
	// least possible size of input

	const uint8_t *p_input = r_t_in_buffer.p_Data();
	// input buffer pointer

	uint32_t n_unpack_length = *(uint32_t*)p_input;
	p_input += sizeof(uint32_t);
	// get size of uncompressed data

#if 0
	int32_t *p_code_num = (int32_t*)p_input; // numbers of codes of different lengths
	p_input += sizeof(int32_t) * n_max_code_bit_num;
	// get numbers of codes

	//uint32_t p_min_code[n_max_tree_size]; // not needed
	uint32_t p_max_code[n_max_tree_size];
	int32_t p_table_off[n_max_tree_size];
	// minimal / maximal codes for given lengths and offsets to symbol table

	/*for(int i = 0, n_code_word = 0, n_prev_code_length = 0; i < n_max_code_bit_num; ++ i) {
		int n_code_length = i + 1; // used as difference value, don't need + 1
		if(n_symbol_num && n_code_length != n_prev_code_length)
			n_code_word <<= n_code_length - n_prev_code_length;
		p_min_code[i] = n_code_word;
		n_code_word += p_code_num[i];
		p_max_code[i] = n_code_word;
		p_table_offset[i] = n_symbol_num;
		n_symbol_num += p_code_num[i];
		if(p_code_num[i])
			n_prev_code_length = n_code_length;
	}*/
	// dumb version of below

	int n_symbol_num = 0;
	for(uint32_t i = 0, n_cw = 0; i < n_max_code_bit_num; ++ i, n_cw <<= 1) {
		//p_min_code[i] = n_cw;
		p_table_off[i] = n_symbol_num - n_cw;
		p_max_code[i] = (n_cw += p_code_num[i]);
		n_symbol_num += p_code_num[i];
	}
	// calculate number of symbols, table indices and min / max code values

	if(r_t_in_buffer.n_Size() < sizeof(CSymbolType) * n_symbol_num)
		return 0;
	// check size of input

	const CSymbolType *p_symbol = (const CSymbolType*)p_input;
	p_input += n_symbol_num * sizeof(CSymbolType);
	// copy symbols
#else // 0
	CDecodeTable decode_table(p_input, r_t_in_buffer.p_Data() + r_t_in_buffer.n_Size());
	if(!decode_table.Initialize())
		return false;
	// read Huffman table
#endif // 0

	if(p_max_symbol) {
		*p_max_symbol = decode_table.n_Max_Symbol();
		/*if(n_symbol_num) {
			CSymbolType n_max_symbol = *p_symbol;
			for(int i = 1; i < n_symbol_num; ++ i) {
				if(n_max_symbol < p_symbol[i])
					n_max_symbol = p_symbol[i];
			}
			*p_max_symbol = n_max_symbol;
		} else
			*p_max_symbol = 0;*/
	}
	// find maximum symbol value

	if(!r_t_out_buffer.Resize(n_unpack_length, false))
		return 0;
	// alloc output buffer

	uint8_t n_byte = 0;
	int n_bit_num = 0;
	for(CSymbolType *p_dest = (CSymbolType*)r_t_out_buffer.p_Data(), *p_end =
	   (CSymbolType*)(r_t_out_buffer.p_Data() + r_t_out_buffer.n_Size()); p_dest != p_end; ++ p_dest) {
#if 0
		for(uint32_t i = 0, n_code = 0;;) {
			if(!n_bit_num) {
				if(p_input == r_t_in_buffer.p_Data() + r_t_in_buffer.n_Size())
					return 0;
				// not enough data to decompress

				n_byte = *p_input ++;
				n_bit_num = 7;
			} else
				-- n_bit_num;
			n_code = (n_code << 1) | (n_byte >> 7);
			n_byte <<= 1;
			// get a single bit from input stream, add bit to code

			if(/*n_code >= p_min_code[i] &&*/ n_code < p_max_code[i]) {
				_ASSERTE(n_code >= /*p_min_code[i]*/p_max_code[i] - p_code_num[i]); // always
				*p_dest = p_symbol[n_code + p_table_off[i]]; // add "- p_min_code[i]" to use with dumb version
				break;
			}
			// see if it's valid code for this bit length

			if(++ i == n_max_code_bit_num)
				return 0; // invalid code
		}
#else
		if(!decode_table.Decode_Symbol(*p_dest))
			return false;
#endif
		// todo - implement lookahead table to guess the length of the symbol based on the first byte
	}
	// decode data

	p_input = decode_table.p_Pointer();

	//_ASSERTE(decode_table.b_Finished());
	//_ASSERTE(p_input == r_t_in_buffer.p_Data() + r_t_in_buffer.n_Size());
	// don't make sure we've read the whole input buffer, there might be some other payload following

	return p_input;
}

template <class CSymbolType, int n_max_tree_size>
bool CHuffmanUtil<CSymbolType, n_max_tree_size>::Encode(const TBuffer &r_t_in_buffer,
	TBuffer &r_t_out_buffer, unsigned int n_max_code_bit_num = n_max_tree_size)
{
	typedef CHuffmanTree<CSymbolType, n_max_tree_size> CHuff; // Huffman tree
	CHuff huff_tree(n_max_code_bit_num);

	_ASSERTE(!(r_t_in_buffer.n_Size() % sizeof(CSymbolType))); // likely a programmer's fault
	if(r_t_in_buffer.n_Size() % sizeof(CSymbolType))
		return false;
	// must be a buffer of entire symbols

	{
		const CSymbolType *p_src = (const CSymbolType*)r_t_in_buffer.p_Data(),
			*p_end = (const CSymbolType*)(r_t_in_buffer.p_Data() + r_t_in_buffer.n_Size());
		if(!huff_tree.CalculateSymbolFrequencies(p_src, p_end))
			return false;
	}
	// calculate symbol frequencies

	if(!huff_tree.Assign_CodeWords())
		return false;
	// create Huffman tree

	r_t_out_buffer.Resize(0);
	// make sure there's enough space for header

	if(!CEmitInt<uint32_t>(r_t_out_buffer)(uint32_t(r_t_in_buffer.n_Size())))
		return false;
	// write size of decompressed stream so decompressor can work in a single pass

	uint8_t *p_output = r_t_out_buffer.p_Data() + sizeof(uint32_t);
	// output buffer (size will change dynamically)

	CEncodeTable encode_table(huff_tree);
	if(!encode_table.Write_Table(p_output, r_t_out_buffer))
		return false;
	// write Huffman table

	huff_tree.SortFrequencies_BySymbol();
	// sort by symbols so we can search using lower_bound (binary search)
	// note that freq_list has symbols with zero frequency removed
	// in CreateHuffmanTree(), searching is therefore inevitable (could
	// build lookup table though)

	{
		CBitEncoder bit_coder(p_output, r_t_out_buffer);
		// bit writer

		const CSymbolType *p_src = (const CSymbolType*)r_t_in_buffer.p_Data(),
			*p_end2 = (const CSymbolType*)(r_t_in_buffer.p_Data() + r_t_in_buffer.n_Size());
		for(; p_src != p_end2; ++ p_src) {
			const CHuff::TFrequency &r_freq =
				huff_tree.r_LookupSymbol(*p_src);
			_ASSERTE(r_freq.n_symbol == *p_src);
			// find symbol in Huffman tree

			if(!bit_coder.Encode_Symbol(r_freq.n_code_word, r_freq.n_code_length))
				return false;
			// write symbol as series of bits
		}

		if(!bit_coder.Flush())
			return false;
		// output any leftover bits

		p_output = bit_coder.p_Pointer();
		// restore pointer
	}
	// compress

	_ASSERTE(p_output == r_t_out_buffer.p_Data() + r_t_out_buffer.n_Size()); // no need to resize

	return true;
}

/*
 *								=== ~CHuffmanUtil ===
 */

/*
 *								=== CBlockyHuffmanCodec ===
 */

template <class _TyHuffmanCodec, const size_t n_block_size>
bool CBlockyHuffmanCodec<_TyHuffmanCodec, n_block_size>::Decode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer)
{
	_ASSERTE(&r_t_in_buffer != &r_t_out_buffer);

	r_t_out_buffer.Clear(); // !!

	//FILE *p_fw = fopen("blocky_dec.txt", "w"); // debug

	TBuffer t_block_buffer, t_tmp;
	const uint8_t *p_src = r_t_in_buffer.p_Data(),
		*p_end = r_t_in_buffer.p_Data() + r_t_in_buffer.n_Size();
	while(p_src != p_end) {
		if(p_src + sizeof(uint32_t) > p_end)
			return false; // not enough data
		size_t n_huffman_slice_size = *(uint32_t*)p_src;
		p_src += sizeof(uint32_t);
		// get size of input slice ...

		t_block_buffer.Resize(n_huffman_slice_size, false);
		memcpy(t_block_buffer.p_Data(), p_src, n_huffman_slice_size);
		p_src += n_huffman_slice_size;
		// prepare the input buffer

		if(!CBlockCodec::Decode(t_block_buffer, t_tmp))
			return false;
		// decode using Huffman

		//fprintf(p_fw, "block %d -> %d\n", t_tmp.n_Size(), t_block_buffer.n_Size()); // debug

		size_t n_output_offset = r_t_out_buffer.n_Size();
		if(!r_t_out_buffer.Grow(t_tmp.n_Size()))
			return false;
		memcpy(r_t_out_buffer.p_Data() + n_output_offset,
			t_tmp.p_Data(), t_tmp.n_Size());
		// append the decoded block to the output
	}

	//fclose(p_fw); // debug

	_ASSERTE(p_src == p_end); // make sure the entire sequence was processed

	return true;
}

template <class _TyHuffmanCodec, const size_t n_block_size>
bool CBlockyHuffmanCodec<_TyHuffmanCodec, n_block_size>::Encode(const TBuffer &r_t_in_buffer, TBuffer &r_t_out_buffer)
{
	_ASSERTE(&r_t_in_buffer != &r_t_out_buffer); // input and output buffer must not be the same

	r_t_out_buffer.Clear(); // !!

	//FILE *p_fw = fopen("blocky_enc.txt", "w"); // debug

	TBuffer t_block_buffer, t_tmp;
	for(size_t n_offset = 0; n_offset < r_t_in_buffer.n_Size(); n_offset += n_block_size) {
		size_t n_size = min(size_t(n_block_size), size_t(r_t_in_buffer.n_Size() - n_offset));
		t_block_buffer.Resize(n_size, false);
		memcpy(t_block_buffer.p_Data(), r_t_in_buffer.p_Data() + n_offset, n_size);
		// prepare the input buffer

		if(!CBlockCodec::Encode(t_block_buffer, t_tmp))
			return false;
		// encode using Huffman

		//fprintf(p_fw, "block %d -> %d\n", t_block_buffer.n_Size(), t_tmp.n_Size()); // debug

		size_t n_output_offset = r_t_out_buffer.n_Size();
		if(!r_t_out_buffer.Grow(sizeof(uint32_t) + t_tmp.n_Size()))
			return false;
		_ASSERTE(t_tmp.n_Size() <= UINT32_MAX);
		*(uint32_t*)(r_t_out_buffer.p_Data() + n_output_offset) = uint32_t(t_tmp.n_Size());
		n_output_offset += sizeof(uint32_t);
		memcpy(r_t_out_buffer.p_Data() + n_output_offset, t_tmp.p_Data(), t_tmp.n_Size());
		// append output buffer with the length of compressed Huffman data and the data
	}
	// perform the compression on blocks

	//fclose(p_fw); // debug

	return true;
}

/*
 *								=== ~CBlockyHuffmanCodec ===
 */

#endif // !__SIMPLE_COMPRESSION_INLINES_INCLUDED
