ALib C++ Library
Library Version: 2511 R0
Documentation generated by doxygen
Loading...
Searching...
No Matches
huffman.cpp
1//##################################################################################################
2// ALib C++ Library
3//
4// Copyright 2013-2025 A-Worx GmbH, Germany
5// Published under 'Boost Software License' (a free software license, see LICENSE.txt)
6//##################################################################################################
7#include "alib_precompile.hpp"
8#if !defined(ALIB_C20_MODULES) || ((ALIB_C20_MODULES != 0) && (ALIB_C20_MODULES != 1))
9# error "Symbol ALIB_C20_MODULES has to be given to the compiler as either 0 or 1"
10#endif
11#if ALIB_C20_MODULES
12 module;
13#endif
14//========================================= Global Fragment ========================================
16
17//============================================== Module ============================================
18#if ALIB_C20_MODULES
19 module ALib.BitBuffer;
20 import ALib.Containers.FixedCapacityVector;
21#else
23# include "ALib.BitBuffer.H"
24#endif
25
26namespace alib { namespace bitbuffer { namespace ac_v1 {
27
28#if !DOXYGEN
29//#define TEMP_PT(...) __VA_ARGS__
30# define TEMP_PT(...)
31#endif
32
33#if !DOXYGEN
34namespace
35{
36/// Internal struct representing nodes of the huffman code tree.
37class Node
38{
39 using Symbol= HuffmanEncoder::Symbol;
40
41 protected:
42 /// Either a pointer to a left subtree or to a symbol.
43 /// The latter makes the node a leaf.
44 union LeftPointer
45 {
46 Symbol* symbol; ///< The symbol represented by this node (if not \c nullptr).
47 Node* left; ///< The left subtree.
48
49 LeftPointer() = default; ///< Non-initializing default constructor.
50
51 /// Constructs this union with a symbol pointer.
52 /// @param s The symbol to set.
53 LeftPointer( Symbol* s )
54 : symbol( s ) {}
55
56 /// Constructs this union with a cursor.
57 /// @param l The node to set.
58 LeftPointer( Node* l )
59 : left ( l ) {}
60 };
61
62 LeftPointer left; ///< If #right set, then this is a pointer to the left subtree,
63 ///< otherwise a pointer to a symbol.
64 Node* right; ///< The right subtree.
65
66
67 public:
68 size_t frequency; ///< The frequency of the symbol or the sum of the two subtrees.
69
70 /// Default constructor used when defining arrays of nodes on stack memory.
71 Node() =default;
72
73 /// Constructs a node representing a symbol (leaf).
74 /// Left and right pointers are set to nullptr
75 /// @param s Pointer to the symbol in #symbols, that this node represents.
76 Node(Symbol* s)
77 : left (s)
78 , right (nullptr)
79 , frequency (s->frequency) {}
80
81 /// Constructs a node as an internal non-leaf node.
82 /// Left and right pointers are set to as given.
83 /// @param l Pointer to the left node.
84 /// @param r Pointer to the right node.
85 Node(Node* l, Node* r)
86 : left (l)
87 , right (r)
88 , frequency (l->frequency + r->frequency) {}
89
90 /// Determines if this is a leaf node.
91 /// @returns \c true if this is a leaf node.
92 bool isLeaf() const { return right == nullptr; }
93
94 /// Must be called only for leaf nodes.
95 /// @returns The pointer to the symbol.
96 Symbol* getSymbol() const { return left.symbol; }
97
98 /// Must be called only for non-leaf nodes.
99 /// @returns The left node.
100 Node* getLeft() const { return left.left; }
101
102 /// @returns The right node.
103 Node* getRight() const { return right; }
104}; //struct Node
105} // anonymous namespace
106
107#endif
108
109
111 constexpr int maxNodesNeeded= 256 + 255;
112 Node nodePool[maxNodesNeeded];
113
114 // buildTree
115 Node* tree;
116 {
117 struct cmpHN { bool operator()(Node* l, Node* r) { return (l->frequency > r->frequency); } };
118
119 // create a list of nodes, sorted by frequency (least in front)
120 // Note: for this, we first create fixed array on the stack, where we fill the
121 // nodes unsorted. Then we move this into the priority queue, what sorts them
122 // once. (unfortunately the move copies the array, but still it is faster than
123 // if the heap was sorted with each insert)
124 int npNext= 0;
125 FixedCapacityVector<Node*, 256> underlyingNodeVector;
126 for (std::size_t i = 0; i < 256 ; ++i)
127 if( (symbols + i)->frequency > 0 )
128 underlyingNodeVector.push_back( new (nodePool + npNext++) Node(symbols + i) );
129 FixedSizePriorityQueue<Node*, 256, cmpHN> sortedNodes(cmpHN(), std::move(underlyingNodeVector));
130
131ALIB_DBG( dbgAllValuesAreSame= (sortedNodes.size() == 1); )
132 // Merge two front nodes into one, until one node remains
133 while (sortedNodes.size() > 1) {
134 // Extract two least freq nodes
135 Node* left = sortedNodes.top(); sortedNodes.pop();
136 Node* right= sortedNodes.top(); sortedNodes.pop();
137
138 // New internal node with frequency equal to the sum of the two nodes frequencies.
139 // The two extracted nodes become left and right children
140 sortedNodes.push(new ( nodePool + npNext++ ) Node(left, right) );
141 }
142
143 tree= sortedNodes.top();
144 ALIB_ASSERT_ERROR( npNext <= maxNodesNeeded , "BITBUFFER/AC/HFMN", "This can never happen" )
145 }
146
147 // generate codes and write tree information to bit buffer
148 {
149 struct Stack
150 {
151 Node* node;
152 int walked;
153 };
154
155 int depth = 0;
156 Stack stack[MAX_CODE_LENGTH];
157 stack[0] = Stack{ tree, 0 };
158 uint32_t words[MAX_WORDS] = {};
159
160TEMP_PT(Log_Warning("------ Huffman Encoding Table ----------") )
161
162 // 'recursion loop'
163 while(depth>=0) {
164 auto* node= stack[depth].node;
165
166 auto wordNo= depth / WORD_SIZE;
167 auto bitNo = depth % WORD_SIZE;
168
169 // leaf?
170 if( node->isLeaf() ) {
171 // write '1' for leaf and symbol value
172 bw.Write<9>( 1 | static_cast<unsigned>(node->getSymbol() - symbols) << 1 );
173
174 // store word length and words to symbol's data
175 node->getSymbol()->wordLength= depth;
176 for( int i= 0 ; i <= wordNo ; ++i )
177 node->getSymbol()->words[i]= words[i];
178
179TEMP_PT( NString512 bits; bits << Bin(node->symbol->words[0], node->symbol->wordLength);
180 bits.Reverse();
181 Lox_Warning("HM I: {:3}: {:<15} (len={!ATAB:2}, freq={:>5})",
182 (node->symbol - symbols),
183 bits,
184 node->symbol->wordLength,
185 node->symbol->frequency ) )
186
187 // clear last bit and step up
188 words[wordNo]&= ~(uint32_t(1) << ( bitNo ) );
189 --depth;
190 continue;
191 }
192
193 // write '0' for not being a leave
194 bw.Write<1>( 0u );
195
196 // process left child
197 if( stack[depth].walked == 0) {
198 stack[depth].walked++;
199 stack[depth+1]= Stack{ node->getLeft() , 0};
200 depth++;
201 continue;
202 }
203
204 // process right child
205 if( stack[depth].walked == 1) {
206 stack[depth].walked++;
207 words[wordNo] |= (1 << ( bitNo ) );
208 stack[depth+1]= Stack{ node->getRight() , 0};
209 depth++;
210 continue;
211 }
212
213 // clear last bit and step up
214 words[wordNo]&= ~(uint32_t(1) << ( bitNo ) );
215 --depth;
216 }
217TEMP_PT( Log_Warning("------End of Huffman Encoding Table ----------") )
218} }
219
221TEMP_PT( Log_Warning("------ Huffman Decoding Table ----------") )
222TEMP_PT( String512 dbgWord; )
223 struct Stack
224 {
225 Node* node;
226 int read; // 0: none, 1: left, 2: both
227 };
228
229 int depth;
231 stack[depth= 0]= Stack{ &tree, 0 };
232 while(depth>=0) {
233 auto* node= stack[depth].node;
234 // leaf?
235 if( br.Read<1>() ) {
236 node->symbol= uint8_t(br.Read<8>());
237TEMP_PT( Log_Warning("HM D: {:3}: {:5} (len={})",
238 (node->symbol),
239 dbgWord,
240 dbgWord.Length() ) )
241
242 --depth;
243TEMP_PT( dbgWord.DeleteEnd(1); )
244 continue;
245 }
246
247 // left 'recursion'
248 if( stack[depth].read == 0) {
249 ++stack[depth].read;
250 node->left= new ( nodePool + npNext++ ) Node();
251TEMP_PT( dbgWord << '0'; )
252 stack[depth+1]= Stack{ node->left , 0 };
253 ++depth;
254 continue;
255 }
256
257 // right 'recursion'
258 if( stack[depth].read == 1) {
259 ++stack[depth].read;
260TEMP_PT( dbgWord << '1'; )
261 node->right= new ( nodePool + npNext++ ) Node();
262 stack[depth+1]= Stack{ node->right , 0 };
263 ++depth;
264 continue;
265 }
266
267TEMP_PT( dbgWord.DeleteEnd(1); )
268 --depth;
269 }
270
271ALIB_ASSERT_ERROR( npNext <= MAX_NODES, "BITBUFFER/AC/HFMN", "This can never happen" )
272
273TEMP_PT( Log_Warning("------End of Huffman Decoding Table ----------") )
274}
275
276
277
278
279#undef TEMP_PT
280
281}}} // namespace [alib::bitbuffer::ac_v1]
int npNext
The next node in nodePool to use.
Definition huffman.inl:131
static constexpr int MAX_NODES
The maximum number of nodes in the tree.
Definition huffman.inl:126
Node tree
The root node of the symbol tree.
Definition huffman.inl:129
Node nodePool[MAX_NODES]
Pre-allocated node objects.
Definition huffman.inl:130
BitReader & br
The bit reader given in the constructor.
Definition huffman.inl:128
BitWriter & bw
The bit writer to use for encoding the data.
Definition huffman.inl:49
static constexpr int MAX_CODE_LENGTH
Definition huffman.inl:35
Symbol symbols[256]
The symbol table.
Definition huffman.inl:50
ALIB_DLL void Generate()
Generates the huffman encoding table and writes this information to the bit writer.
Definition huffman.cpp:110
#define Log_Warning(...)
#define ALIB_DBG(...)
Definition alib.inl:853
#define ALIB_ASSERT_ERROR(cond, domain,...)
Definition alib.inl:1066
#define Lox_Warning(...)
std::priority_queue< T, FixedCapacityVector< T, TSize >, TCompare > FixedSizePriorityQueue
LocalString< 512 > String512
Type alias name for TLocalString<character,512>.
containers::FixedCapacityVector< T, TSize > FixedCapacityVector
Type alias in namespace alib.
NLocalString< 512 > NString512
Type alias name for TLocalString<nchar,512>.
strings::TBin< character > Bin
Type alias in namespace alib.
Definition format.inl:568
Internal struct representing nodes of the huffman code tree.
Definition huffman.inl:114