ALib C++ Library
Library Version: 2510 R0
Documentation generated by doxygen
Loading...
Searching...
No Matches
huffman.inl
Go to the documentation of this file.
1//==================================================================================================
2/// \file
3/// This header-file is part of module \alib_bitbuffer of the \aliblong.
4///
5/// \emoji :copyright: 2013-2025 A-Worx GmbH, Germany.
6/// Published under \ref mainpage_license "Boost Software License".
7//==================================================================================================
8ALIB_EXPORT namespace alib { namespace bitbuffer { namespace ac_v1 {
9
10/// This class, together with sibling \alib{bitbuffer::ac_v1;HuffmanDecoder} implements the well known
11/// \https{Huffman compression,en.wikipedia.org/wiki/Huffman_coding}.
12/// The class uses type \alib{bitbuffer;BitWriter} to write the bit stream into an underlying
13/// \alib{bitbuffer;BitBuffer}.
14///
15/// The use of this class is quite straight forward:
16/// - Create an instance of this class
17/// - Feed the data to be compressed once to the algorithm using method
18/// \alib{bitbuffer::ac_v1::HuffmanEncoder;CountSymbol}
19/// - Invoke method \alib{bitbuffer::ac_v1::HuffmanEncoder;Generate}
20/// - Feed all data a second time using method
21/// \alib{bitbuffer::ac_v1::HuffmanEncoder;Write}.
23{
24 public:
25 static constexpr int WORD_SIZE = 32; ///< Codes are stored in two words of size 32
26 ///< (this constant)
27 static constexpr int MAX_WORDS = 2; ///< Codes are stored in two (this constant) words
28 ///< of size 32.
29
30 /// The maximum length of a huffman code.
31 /// In theory, with 256 symbols, a maximum code length of 255 could occur. For this, each
32 /// next symbol needed to have the double frequency than the previous. Hence, setting the
33 /// value to 64, limits the encoded data size to 2^64 symbols, which should be more than enaugh
34 /// for the next decades.
35 static constexpr int MAX_CODE_LENGTH = 64;
36
37 /// Information about the encoding of symbols. The symbol's value (between \c 0 and \c 255)
38 /// is not included, but deduced from the objects' position in the symbol array found in
39 /// field \alib{bitbuffer::ac_v1::HuffmanEncoder;symbols}.
40 struct Symbol
41 {
42 std::size_t frequency = 0; ///< The number of occurrences of the symbol.
43 alib::ShiftOpRHS wordLength = 0; ///< 0: symbol not used, otherwise between 1 and 255.
44 uint32_t words[MAX_WORDS] = {0,0}; ///< The bitcode of the symbol.
45 };
46
47 protected:
48
49 BitWriter& bw; ///< The bit writer to use for encoding the data.
50 Symbol symbols[256]; ///< The symbol table.
51
52ALIB_DBG( bool dbgAllValuesAreSame; )
53
54 public:
55 /// Constructor.
56 /// @param bitWriter The bit writer to write the encoding information as well as the encoded
57 /// symbols to. (Stored in field #bw.)
59 : bw(bitWriter)
60 { }
61
62 /// Counts a symbol.
63 /// This method has to be invoked for each byte to compress later.
64 /// @param symbol The symbol to count.
65 void CountSymbol(uint8_t symbol)
66 {
67 symbols[symbol].frequency++;
68 }
69
70 /// Generates the huffman encoding table and writes this information to the bit writer.
72 void Generate();
73
74 /// Writes the given \p symbol to the bit stream.
75 /// @param symbol The symbol to write.
76 void Write(uint8_t symbol)
77 {
78 auto& rec = symbols[symbol];
79 int bitsLeft= rec.wordLength;
80 #if ALIB_STRINGS
81 ALIB_ASSERT_ERROR( (bitsLeft != 0) | dbgAllValuesAreSame, "BITBUFFER/AC/HFMN",
82 "Try to write symbol unknown to Huffman table: {}, ", symbol, bitsLeft )
83 #else
84 ALIB_ASSERT_ERROR( bitsLeft | dbgAllValuesAreSame, "BITBUFFER/AC/HFMN",
85 "Try to write symbol unknown to Huffman table: ", symbol )
86 #endif
87
88
89 auto* word= rec.words;
90 if( bitsLeft > 32)
91 {
92 bw.Write<32>( *word );
93 word++;
94 bitsLeft-= 32;
95 }
96
97 bw.Write(bitsLeft, *word );
98 }
99
100}; // HuffmanEncoder
101
102/// This class, together with sibling \alib{bitbuffer::ac_v1;HuffmanEncoder} implements the well known
103/// \https{Huffman compression,en.wikipedia.org/wiki/Huffman_coding}.
104/// The class uses type \alib{bitbuffer;BitReader} to read the bit stream from an underlying
105/// \alib{bitbuffer;BitBuffer}.
106///
107/// The use of this class is quite straight forward:
108/// - Prepare a \b BitReader to point to the beginning of the bit stream generated with
109/// class \b HuffmanEncoder.
110/// - Invoke method \alib{bitbuffer::ac_v1::HuffmanDecoder;ReadTree} once.
111/// - For each encoded byte, invoke \alib{bitbuffer::ac_v1::HuffmanDecoder;Read}.
112///
113/// Note, that the length of the stream (the number of bytes to be decompressed) have to be
114/// known by the using software. This class is not responsible for storing this piece of information.
116{
117 protected:
118 /// Internal struct representing nodes of the huffman code tree.
119 struct Node
120 {
121 Node* left; ///< The left child node.
122 Node* right; ///< The right child node.
123 uint8_t symbol; ///< If this is a leaf node (neither #left nor #right are set, then
124 ///< then this is the symbol found.
125
126 /// Constructor.
128 : left (nullptr)
129 , right (nullptr)
130 {}
131 };
132
133 static constexpr int MAX_NODES= 511; ///< The maximum number of nodes in the tree.
134
135 BitReader& br; ///< The bit reader given in the constructor.
136 Node tree; ///< The root node of the symbol tree.
137 Node nodePool[MAX_NODES]; ///< Pre-allocated node objects.
138 int npNext= 0; ///< The next node in #nodePool to use.
139
140 public:
141 /// Constructor.
142 /// @param bitReader The bit reader to read the huffman encoding table and then the encoded
143 /// symbols from. (Stored in field #br.)
145 :br(bitReader)
146 {}
147
148 /// Reads the information to decode the data from the beginning of the bit stream.
149 /// This method has to be invoked once before reading the symbols with method #Read.
151 void ReadTree();
152
153 /// Reads a symbol from the bit stream.
154 /// This method has to be invoked for every symbol stored, after once reading the encoding
155 /// information with method #ReadTree.
156 /// @return The symbol read.
157 uint8_t Read()
158 {
159 Node* node= &tree;
160 while(true)
161 {
162 if( node->left == nullptr) // (could also test on right)
163 return node->symbol;
164
165 auto v= br.Read<1>();
166 node= v ? node->right
167 : node->left;
168 }
169 }
170
171}; // HuffmanDecoder
172
173}}} // namespace [alib::bitbuffer::ac_v1]
174
Reads bits from a BitBufferBase.
Writes bits into a BitBufferBase.
int npNext
The next node in nodePool to use.
Definition huffman.inl:138
static constexpr int MAX_NODES
The maximum number of nodes in the tree.
Definition huffman.inl:133
HuffmanDecoder(BitReader &bitReader)
Definition huffman.inl:144
Node tree
The root node of the symbol tree.
Definition huffman.inl:136
Node nodePool[MAX_NODES]
Pre-allocated node objects.
Definition huffman.inl:137
BitReader & br
The bit reader given in the constructor.
Definition huffman.inl:135
BitWriter & bw
The bit writer to use for encoding the data.
Definition huffman.inl:49
HuffmanEncoder(BitWriter &bitWriter)
Definition huffman.inl:58
static constexpr int MAX_CODE_LENGTH
Definition huffman.inl:35
Symbol symbols[256]
The symbol table.
Definition huffman.inl:50
ALIB_DLL void Generate()
Generates the huffman encoding table and writes this information to the bit writer.
Definition huffman.cpp:107
#define ALIB_DLL
Definition alib.inl:496
#define ALIB_EXPORT
Definition alib.inl:488
#define ALIB_DBG(...)
Definition alib.inl:836
#define ALIB_ASSERT_ERROR(cond, domain,...)
Definition alib.inl:1049
int ShiftOpRHS
Type alias in namespace alib.
Internal struct representing nodes of the huffman code tree.
Definition huffman.inl:120
Node * right
The right child node.
Definition huffman.inl:122
std::size_t frequency
The number of occurrences of the symbol.
Definition huffman.inl:42
uint32_t words[MAX_WORDS]
The bitcode of the symbol.
Definition huffman.inl:44
alib::ShiftOpRHS wordLength
0: symbol not used, otherwise between 1 and 255.
Definition huffman.inl:43