ALib C++ Library
Library Version: 2402 R1
Documentation generated by doxygen
Loading...
Searching...
No Matches
huffman.hpp
Go to the documentation of this file.
1/** ************************************************************************************************
2 * \file
3 * This header file is part of module \alib_bitbuffer of the \aliblong.
4 *
5 * \emoji :copyright: 2013-2024 A-Worx GmbH, Germany.
6 * Published under \ref mainpage_license "Boost Software License".
7 **************************************************************************************************/
8#ifndef HPP_ALIB_BITBUFFER_AC_V1_HUFFMAN
9#define HPP_ALIB_BITBUFFER_AC_V1_HUFFMAN 1
10
11#if !defined (HPP_AWORX_ALIB_BITBUFFER)
13#endif
14
15namespace alib { namespace bitbuffer { namespace ac_v1 {
16
17/**
18 * This class, together with sibling \alib{bitbuffer::ac_v1;HuffmanDecoder} implements the well known
19 * \https{Huffman compression,en.wikipedia.org/wiki/Huffman_coding}.
20 * The class uses type \alib{bitbuffer;BitWriter} to write the bit stream into an underlying
21 * \alib{bitbuffer;BitBuffer}.
22 *
23 * The use of this class is quite straight forward:
24 * - Create an instance of this class
25 * - Feed the data to be compressed once to the algorithm using method
26 * \alib{bitbuffer::ac_v1::HuffmanEncoder;CountSymbol}
27 * - Invoke method \alib{bitbuffer::ac_v1::HuffmanEncoder;Generate}
28 * - Feed all data a second time using method
29 * \alib{bitbuffer::ac_v1::HuffmanEncoder;Write}.
30 */
32{
33 public:
34 static constexpr int WORD_SIZE = 32; ///< Codes are stored in two words of size 32
35 ///< (this constant)
36 static constexpr int MAX_WORDS = 2; ///< Codes are stored in two (this constant) words
37 ///< of size 32.
38
39 /**
40 * The maximum length of a huffman code.
41 * In theory, with 256 symbols, a maximum code length of 255 could occur. For this, each
42 * next symbol needed to have the double frequency than the previous. Hence, setting the
43 * value to 64, limits the encoded data size to 2^64 symbols, which should be more than enaugh
44 * for the next decades.
45 */
46 static constexpr int MAX_CODE_LENGTH = 64;
47
48 /**
49 * Information about the encoding of symbols. The symbol's value (between \c 0 and \c 255)
50 * is not included, but deduced from the objects' position in the symbol array found in
51 * field \alib{bitbuffer::ac_v1::HuffmanEncoder;symbols}.
52 */
53 struct Symbol
54 {
55 std::size_t frequency = 0; ///< The number of occurrences of the symbol.
56 alib::ShiftOpRHS wordLength = 0; ///< 0: symbol not used, otherwise between 1 and 255.
57 uint32_t words[MAX_WORDS] = {0,0}; ///< The bitcode of the symbol.
58 };
59
60 protected:
61
62 BitWriter& bw; ///< The bit writer to use for encoding the data.
63 Symbol symbols[256]; ///< The symbol table.
64
65ALIB_DBG( bool dbgAllValuesAreSame; )
66
67 public:
68 /**
69 * Constructor.
70 * @param bitWriter The bit writer to write the encoding information as well as the encoded
71 * symbols to. (Stored in field #bw.)
72 */
74 : bw(bitWriter)
75 { }
76
77 /**
78 * Counts a symbol.
79 * This method has to be invoked for each byte to compress later.
80 * @param symbol The symbol to count.
81 */
89
90 /**
91 * Generates the huffman encoding table and writes this information to the bit writer.
92 */
94 void Generate();
95
96 /**
97 * Writes the given \p symbol to the bit stream.
98 * @param symbol The symbol to write.
99 */
100 void Write(uint8_t symbol)
101 {
103 auto& rec = symbols[symbol];
104 int bitsLeft= rec.wordLength;
105 #if ALIB_STRINGS
106 ALIB_ASSERT_ERROR( (bitsLeft != 0) | dbgAllValuesAreSame, "BITBUFFER/AC/HFMN",
107 NString256("Try to write symbol unknown to Huffman table: ")
108 << symbol << ", " << bitsLeft )
109 #else
110 ALIB_ASSERT_ERROR( bitsLeft | dbgAllValuesAreSame, "BITBUFFER/AC/HFMN",
111 "Try to write symbol unknown to Huffman table: ", symbol )
112 #endif
113
114
115 auto* word= rec.words;
116 if( bitsLeft > 32)
117 {
118 bw.Write<32>( *word );
119 word++;
120 bitsLeft-= 32;
121 }
123
124 bw.Write(bitsLeft, *word );
125 }
126
127}; // HuffmanEncoder
128
129/**
130 * This class, together with sibling \alib{bitbuffer::ac_v1;HuffmanEncoder} implements the well known
131 * \https{Huffman compression,en.wikipedia.org/wiki/Huffman_coding}.
132 * The class uses type \alib{bitbuffer;BitReader} to read the bit stream from an underlying
133 * \alib{bitbuffer;BitBuffer}.
134 *
135 * The use of this class is quite straight forward:
136 * - Prepare a \b BitReader to point to the beginning of the bit stream generated with
137 * class \b HuffmanEncoder.
138 * - Invoke method \alib{bitbuffer::ac_v1::HuffmanDecoder;ReadTree} once.
139 * - For each encoded byte, invoke \alib{bitbuffer::ac_v1::HuffmanDecoder;Read}.
140 *
141 * Note, that the length of the stream (the number of bytes to be decompressed) have to be
142 * known by the using software. This class is not responsible for storing this piece of information.
143 */
145{
146 protected:
147 /** Internal struct representing nodes of the huffman code tree. */
148 struct Node
149 {
150 Node* left; ///< The left child node.
151 Node* right; ///< The right child node.
152 uint8_t symbol; ///< If this is a leaf node (neither #left nor #right are set, then
153 ///< then this is the symbol found.
154
155 /** Constructor. */
157 : left (nullptr)
158 , right (nullptr)
159 {}
160 };
161
162 static constexpr int MAX_NODES= 511; ///< The maximum number of nodes in the tree.
163
164 BitReader& br; ///< The bit reader given in the constructor.
165 Node tree; ///< The root node of the symbol tree.
166 Node nodePool[MAX_NODES]; ///< Pre-allocated node objects.
167 int npNext= 0; ///< The next node in #nodePool to use.
168
169 public:
170 /**
171 * Constructor.
172 * @param bitReader The bit reader to read the huffman encoding table and then the encoded
173 * symbols from. (Stored in field #br.)
174 */
176 :br(bitReader)
177 {}
178
179 /** Reads the information to decode the data from the beginning of the bit stream.
180 * This method has to be invoked once prior to reading the symbols with method #Read. */
182 void ReadTree();
183
184 /** Reads a symbol from the bit stream.
185 * This method has to be invoked for every symbol stored, after once reading the encoding
186 * information with method #ReadTree.
187 * @return The symbol read.
188 */
189 uint8_t Read()
190 {
191 Node* node= &tree;
192 while(true)
193 {
194 if( node->left == nullptr) // (could also test on right)
195 return node->symbol;
196
197 auto v= br.Read<1>();
198 node= v ? node->right
199 : node->left;
200 }
201 }
202
203}; // HuffmanDecoder
204
205}}} // namespace [alib::bitbuffer::ac_v1]
206
207#endif // HPP_ALIB_BITBUFFER_AC_V1_HUFFMAN
void Write(TIntegral value)
int npNext
The next node in nodePool to use.
Definition huffman.hpp:167
BitReader & br
The bit reader given in the constructor.
Definition huffman.hpp:164
static constexpr int MAX_NODES
The maximum number of nodes in the tree.
Definition huffman.hpp:162
Node tree
The root node of the symbol tree.
Definition huffman.hpp:165
Node nodePool[MAX_NODES]
Pre-allocated node objects.
Definition huffman.hpp:166
HuffmanDecoder(BitReader &bitReader)
Definition huffman.hpp:175
Symbol symbols[256]
The symbol table.
Definition huffman.hpp:63
static constexpr int MAX_CODE_LENGTH
Definition huffman.hpp:46
ALIB_FORCE_INLINE void CountSymbol(uint8_t symbol)
Definition huffman.hpp:83
BitWriter & bw
The bit writer to use for encoding the data.
Definition huffman.hpp:62
#define ALIB_WARNINGS_RESTORE
Definition alib.hpp:715
#define ALIB_FORCE_INLINE
Definition alib.hpp:549
#define ALIB_API
Definition alib.hpp:538
#define ALIB_ASSERT_ERROR(cond,...)
Definition alib.hpp:984
#define ALIB_WARNINGS_ALLOW_UNSAFE_BUFFER_USAGE
Definition alib.hpp:644
#define ALIB_DBG(...)
Definition alib.hpp:457
Definition alib.cpp:57
NLocalString< 256 > NString256
Type alias name for TLocalString<nchar,256> .
int ShiftOpRHS
Type alias in namespace alib.
Node * right
The right child node.
Definition huffman.hpp:151
uint32_t words[MAX_WORDS]
The bitcode of the symbol.
Definition huffman.hpp:57
alib::ShiftOpRHS wordLength
0: symbol not used, otherwise between 1 and 255.
Definition huffman.hpp:56
std::size_t frequency
The number of occurrences of the symbol.
Definition huffman.hpp:55