ALib C++ Library
Library Version: 2412 R0
Documentation generated by doxygen
Loading...
Searching...
No Matches
huffman.hpp
Go to the documentation of this file.
1//==================================================================================================
2/// \file
3/// This header file is part of module \alib_bitbuffer of the \aliblong.
4///
5/// \emoji :copyright: 2013-2024 A-Worx GmbH, Germany.
6/// Published under \ref mainpage_license "Boost Software License".
7//==================================================================================================
8#ifndef HPP_ALIB_BITBUFFER_AC_V1_HUFFMAN
9#define HPP_ALIB_BITBUFFER_AC_V1_HUFFMAN 1
10#pragma once
13
14namespace alib { namespace bitbuffer { namespace ac_v1 {
15
16/// This class, together with sibling \alib{bitbuffer::ac_v1;HuffmanDecoder} implements the well known
17/// \https{Huffman compression,en.wikipedia.org/wiki/Huffman_coding}.
18/// The class uses type \alib{bitbuffer;BitWriter} to write the bit stream into an underlying
19/// \alib{bitbuffer;BitBuffer}.
20///
21/// The use of this class is quite straight forward:
22/// - Create an instance of this class
23/// - Feed the data to be compressed once to the algorithm using method
24/// \alib{bitbuffer::ac_v1::HuffmanEncoder;CountSymbol}
25/// - Invoke method \alib{bitbuffer::ac_v1::HuffmanEncoder;Generate}
26/// - Feed all data a second time using method
27/// \alib{bitbuffer::ac_v1::HuffmanEncoder;Write}.
29{
30 public:
31 static constexpr int WORD_SIZE = 32; ///< Codes are stored in two words of size 32
32 ///< (this constant)
33 static constexpr int MAX_WORDS = 2; ///< Codes are stored in two (this constant) words
34 ///< of size 32.
35
36 /// The maximum length of a huffman code.
37 /// In theory, with 256 symbols, a maximum code length of 255 could occur. For this, each
38 /// next symbol needed to have the double frequency than the previous. Hence, setting the
39 /// value to 64, limits the encoded data size to 2^64 symbols, which should be more than enaugh
40 /// for the next decades.
41 static constexpr int MAX_CODE_LENGTH = 64;
42
43 /// Information about the encoding of symbols. The symbol's value (between \c 0 and \c 255)
44 /// is not included, but deduced from the objects' position in the symbol array found in
45 /// field \alib{bitbuffer::ac_v1::HuffmanEncoder;symbols}.
46 struct Symbol
47 {
48 std::size_t frequency = 0; ///< The number of occurrences of the symbol.
49 alib::ShiftOpRHS wordLength = 0; ///< 0: symbol not used, otherwise between 1 and 255.
50 uint32_t words[MAX_WORDS] = {0,0}; ///< The bitcode of the symbol.
51 };
52
53 protected:
54
55 BitWriter& bw; ///< The bit writer to use for encoding the data.
56 Symbol symbols[256]; ///< The symbol table.
57
58ALIB_DBG( bool dbgAllValuesAreSame; )
59
60 public:
61 /// Constructor.
62 /// @param bitWriter The bit writer to write the encoding information as well as the encoded
63 /// symbols to. (Stored in field #bw.)
65 : bw(bitWriter)
66 { }
67
68 /// Counts a symbol.
69 /// This method has to be invoked for each byte to compress later.
70 /// @param symbol The symbol to count.
78
79 /// Generates the huffman encoding table and writes this information to the bit writer.
81 void Generate();
82
83 /// Writes the given \p symbol to the bit stream.
84 /// @param symbol The symbol to write.
85 void Write(uint8_t symbol)
86 {
88 auto& rec = symbols[symbol];
89 int bitsLeft= rec.wordLength;
90 #if ALIB_STRINGS
91 ALIB_ASSERT_ERROR( (bitsLeft != 0) | dbgAllValuesAreSame, "BITBUFFER/AC/HFMN",
92 NString256("Try to write symbol unknown to Huffman table: ")
93 << symbol << ", " << bitsLeft )
94 #else
95 ALIB_ASSERT_ERROR( bitsLeft | dbgAllValuesAreSame, "BITBUFFER/AC/HFMN",
96 "Try to write symbol unknown to Huffman table: ", symbol )
97 #endif
98
99
100 auto* word= rec.words;
101 if( bitsLeft > 32)
102 {
103 bw.Write<32>( *word );
104 word++;
105 bitsLeft-= 32;
106 }
108
109 bw.Write(bitsLeft, *word );
110 }
111
112}; // HuffmanEncoder
113
114/// This class, together with sibling \alib{bitbuffer::ac_v1;HuffmanEncoder} implements the well known
115/// \https{Huffman compression,en.wikipedia.org/wiki/Huffman_coding}.
116/// The class uses type \alib{bitbuffer;BitReader} to read the bit stream from an underlying
117/// \alib{bitbuffer;BitBuffer}.
118///
119/// The use of this class is quite straight forward:
120/// - Prepare a \b BitReader to point to the beginning of the bit stream generated with
121/// class \b HuffmanEncoder.
122/// - Invoke method \alib{bitbuffer::ac_v1::HuffmanDecoder;ReadTree} once.
123/// - For each encoded byte, invoke \alib{bitbuffer::ac_v1::HuffmanDecoder;Read}.
124///
125/// Note, that the length of the stream (the number of bytes to be decompressed) have to be
126/// known by the using software. This class is not responsible for storing this piece of information.
128{
129 protected:
130 /// Internal struct representing nodes of the huffman code tree.
131 struct Node
132 {
133 Node* left; ///< The left child node.
134 Node* right; ///< The right child node.
135 uint8_t symbol; ///< If this is a leaf node (neither #left nor #right are set, then
136 ///< then this is the symbol found.
137
138 /// Constructor.
140 : left (nullptr)
141 , right (nullptr)
142 {}
143 };
144
145 static constexpr int MAX_NODES= 511; ///< The maximum number of nodes in the tree.
146
147 BitReader& br; ///< The bit reader given in the constructor.
148 Node tree; ///< The root node of the symbol tree.
149 Node nodePool[MAX_NODES]; ///< Pre-allocated node objects.
150 int npNext= 0; ///< The next node in #nodePool to use.
151
152 public:
153 /// Constructor.
154 /// @param bitReader The bit reader to read the huffman encoding table and then the encoded
155 /// symbols from. (Stored in field #br.)
157 :br(bitReader)
158 {}
159
160 /// Reads the information to decode the data from the beginning of the bit stream.
161 /// This method has to be invoked once before reading the symbols with method #Read.
163 void ReadTree();
164
165 /// Reads a symbol from the bit stream.
166 /// This method has to be invoked for every symbol stored, after once reading the encoding
167 /// information with method #ReadTree.
168 /// @return The symbol read.
169 uint8_t Read()
170 {
171 Node* node= &tree;
172 while(true)
173 {
174 if( node->left == nullptr) // (could also test on right)
175 return node->symbol;
176
177 auto v= br.Read<1>();
178 node= v ? node->right
179 : node->left;
180 }
181 }
182
183}; // HuffmanDecoder
184
185}}} // namespace [alib::bitbuffer::ac_v1]
186
187#endif // HPP_ALIB_BITBUFFER_AC_V1_HUFFMAN
188
Reads bits from a BitBufferBase.
Writes bits into a BitBufferBase.
void Write(TIntegral value)
int npNext
The next node in nodePool to use.
Definition huffman.hpp:150
BitReader & br
The bit reader given in the constructor.
Definition huffman.hpp:147
static constexpr int MAX_NODES
The maximum number of nodes in the tree.
Definition huffman.hpp:145
Node tree
The root node of the symbol tree.
Definition huffman.hpp:148
Node nodePool[MAX_NODES]
Pre-allocated node objects.
Definition huffman.hpp:149
HuffmanDecoder(BitReader &bitReader)
Definition huffman.hpp:156
ALIB_API void Generate()
Generates the huffman encoding table and writes this information to the bit writer.
Definition huffman.cpp:96
Symbol symbols[256]
The symbol table.
Definition huffman.hpp:56
static constexpr int MAX_CODE_LENGTH
Definition huffman.hpp:41
ALIB_FORCE_INLINE void CountSymbol(uint8_t symbol)
Definition huffman.hpp:72
BitWriter & bw
The bit writer to use for encoding the data.
Definition huffman.hpp:55
#define ALIB_WARNINGS_RESTORE
Definition alib.hpp:849
#define ALIB_FORCE_INLINE
Definition alib.hpp:650
#define ALIB_API
Definition alib.hpp:639
#define ALIB_ASSERT_ERROR(cond,...)
Definition alib.hpp:1271
#define ALIB_WARNINGS_ALLOW_UNSAFE_BUFFER_USAGE
Definition alib.hpp:760
#define ALIB_DBG(...)
Definition alib.hpp:390
Definition alib.cpp:69
NLocalString< 256 > NString256
Type alias name for TLocalString<nchar,256>.
int ShiftOpRHS
Type alias in namespace alib.
Internal struct representing nodes of the huffman code tree.
Definition huffman.hpp:132
Node * right
The right child node.
Definition huffman.hpp:134
uint32_t words[MAX_WORDS]
The bitcode of the symbol.
Definition huffman.hpp:50
alib::ShiftOpRHS wordLength
0: symbol not used, otherwise between 1 and 255.
Definition huffman.hpp:49
std::size_t frequency
The number of occurrences of the symbol.
Definition huffman.hpp:48