mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-06-10 18:10:56 +09:00
LibCompress: Remove unused Lzma compression and decompression
This commit is contained in:
parent
f30fd8af4c
commit
0c882c441e
Notes:
github-actions[bot]
2025-02-10 16:23:35 +00:00
Author: https://github.com/cqundefine
Commit: 0c882c441e
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/3510
Reviewed-by: https://github.com/AtkinsSJ ✅
12 changed files with 0 additions and 1938 deletions
|
@ -150,10 +150,6 @@
|
|||
# cmakedefine01 LINE_EDITOR_DEBUG
|
||||
#endif
|
||||
|
||||
#ifndef LZMA_DEBUG
|
||||
# cmakedefine01 LZMA_DEBUG
|
||||
#endif
|
||||
|
||||
#ifndef LZW_DEBUG
|
||||
# cmakedefine01 LZW_DEBUG
|
||||
#endif
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
set(SOURCES
|
||||
Deflate.cpp
|
||||
Lzma.cpp
|
||||
PackBitsDecoder.cpp
|
||||
Zlib.cpp
|
||||
Gzip.cpp
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,276 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Tim Schumacher <timschumi@gmx.de>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/CircularBuffer.h>
|
||||
#include <AK/FixedArray.h>
|
||||
#include <AK/MaybeOwned.h>
|
||||
#include <AK/NonnullOwnPtr.h>
|
||||
#include <AK/Stream.h>
|
||||
|
||||
namespace Compress {
|
||||
|
||||
// This implementation is mostly based on the LZMA specification contained in the 7-Zip SDK, which has been placed in the public domain.
|
||||
// LZMA Specification Draft (2015): https://www.7-zip.org/a/lzma-specification.7z
|
||||
|
||||
struct LzmaModelProperties {
|
||||
u8 literal_context_bits;
|
||||
u8 literal_position_bits;
|
||||
u8 position_bits;
|
||||
};
|
||||
|
||||
struct LzmaDecompressorOptions {
|
||||
u8 literal_context_bits { 0 };
|
||||
u8 literal_position_bits { 0 };
|
||||
u8 position_bits { 0 };
|
||||
u32 dictionary_size { 0 };
|
||||
Optional<u64> uncompressed_size;
|
||||
bool reject_end_of_stream_marker { false };
|
||||
};
|
||||
|
||||
struct LzmaCompressorOptions {
|
||||
// Note: The default settings have been chosen based on the default settings of other LZMA compressors.
|
||||
u8 literal_context_bits { 3 };
|
||||
u8 literal_position_bits { 0 };
|
||||
u8 position_bits { 2 };
|
||||
u32 dictionary_size { 8 * MiB };
|
||||
Optional<u64> uncompressed_size {};
|
||||
};
|
||||
|
||||
// Described in section "lzma file format".
|
||||
struct [[gnu::packed]] LzmaHeader {
|
||||
u32 dictionary_size() const;
|
||||
Optional<u64> uncompressed_size() const;
|
||||
|
||||
ErrorOr<LzmaDecompressorOptions> as_decompressor_options() const;
|
||||
static ErrorOr<LzmaHeader> from_compressor_options(LzmaCompressorOptions const&);
|
||||
|
||||
static ErrorOr<LzmaModelProperties> decode_model_properties(u8 input_bits);
|
||||
static ErrorOr<u8> encode_model_properties(LzmaModelProperties const&);
|
||||
|
||||
u8 encoded_model_properties;
|
||||
u32 unchecked_dictionary_size;
|
||||
u64 encoded_uncompressed_size;
|
||||
|
||||
static constexpr u64 placeholder_for_unknown_uncompressed_size = UINT64_MAX;
|
||||
};
|
||||
static_assert(sizeof(LzmaHeader) == 13);
|
||||
|
||||
class LzmaState {
|
||||
protected:
|
||||
// LZMA uses 11-bit probability counters, but they are usually stored in 16-bit variables.
|
||||
// Therefore, we can model probabilities with a resolution of up to 1 / 2^11 (which is equal to 1 / 2048).
|
||||
// The default probability for most counters is 0.5.
|
||||
using Probability = u16;
|
||||
static constexpr size_t probability_bit_count = 11;
|
||||
static constexpr Probability default_probability = (1 << probability_bit_count) / 2;
|
||||
static void initialize_to_default_probability(Span<Probability>);
|
||||
|
||||
// The significance of the shift width is not explained and appears to be a magic constant.
|
||||
static constexpr size_t probability_shift_width = 5;
|
||||
|
||||
// "The value of the "Range" variable before each bit decoding can not be smaller than ((UInt32)1 << 24)."
|
||||
static constexpr u32 minimum_range_value = 1 << 24;
|
||||
|
||||
LzmaState(FixedArray<Probability> literal_probabilities);
|
||||
|
||||
u64 m_total_processed_bytes { 0 };
|
||||
|
||||
static constexpr size_t literal_probability_table_size = 0x300;
|
||||
FixedArray<Probability> m_literal_probabilities;
|
||||
|
||||
struct LzmaLengthCoderState {
|
||||
public:
|
||||
LzmaLengthCoderState();
|
||||
|
||||
Probability m_first_choice_probability { default_probability };
|
||||
Probability m_second_choice_probability { default_probability };
|
||||
|
||||
static constexpr size_t maximum_number_of_position_bits = 4;
|
||||
Array<Array<Probability, (1 << 3)>, (1 << maximum_number_of_position_bits)> m_low_length_probabilities;
|
||||
Array<Array<Probability, (1 << 3)>, (1 << maximum_number_of_position_bits)> m_medium_length_probabilities;
|
||||
Array<Probability, (1 << 8)> m_high_length_probabilities;
|
||||
};
|
||||
|
||||
LzmaLengthCoderState m_length_coder;
|
||||
LzmaLengthCoderState m_rep_length_coder;
|
||||
|
||||
static constexpr u16 normalized_to_real_match_length_offset = 2;
|
||||
static constexpr u32 normalized_to_real_match_distance_offset = 1;
|
||||
|
||||
// According to the specification, the largest possible normalized match length is provided by the high coder,
|
||||
// which processes 8 bits (0 to 255) and adds a displacement of 16 on top.
|
||||
// This is the minimum size that our input buffer has to have to not miss any possible repetitions while encoding.
|
||||
static constexpr u16 largest_real_match_length = 255 + 16 + normalized_to_real_match_length_offset;
|
||||
|
||||
static constexpr u32 end_of_stream_marker = 0xFFFFFFFF;
|
||||
|
||||
static constexpr size_t number_of_length_to_position_states = 4;
|
||||
Array<Array<Probability, (1 << 6)>, number_of_length_to_position_states> m_length_to_position_states;
|
||||
|
||||
static constexpr size_t first_position_slot_with_binary_tree_bits = 4;
|
||||
static constexpr size_t first_position_slot_with_direct_encoded_bits = 14;
|
||||
|
||||
// This is a bit wasteful on memory and not in the specification, but it makes the math easier.
|
||||
static constexpr size_t number_of_binary_tree_distance_slots = first_position_slot_with_direct_encoded_bits - first_position_slot_with_binary_tree_bits;
|
||||
static constexpr size_t largest_number_of_binary_tree_distance_bits = 5;
|
||||
Array<Array<Probability, (1 << largest_number_of_binary_tree_distance_bits)>, number_of_binary_tree_distance_slots> m_binary_tree_distance_probabilities;
|
||||
|
||||
static constexpr size_t number_of_alignment_bits = 4;
|
||||
Array<Probability, (1 << number_of_alignment_bits)> m_alignment_bit_probabilities;
|
||||
|
||||
// LZ state tracking.
|
||||
u16 m_state { 0 };
|
||||
u32 m_rep0 { 0 };
|
||||
u32 m_rep1 { 0 };
|
||||
u32 m_rep2 { 0 };
|
||||
u32 m_rep3 { 0 };
|
||||
u32 current_repetition_offset() const;
|
||||
|
||||
void update_state_after_literal();
|
||||
void update_state_after_match();
|
||||
void update_state_after_rep();
|
||||
void update_state_after_short_rep();
|
||||
|
||||
static constexpr size_t maximum_number_of_position_bits = 4;
|
||||
static constexpr size_t number_of_states = 12;
|
||||
Array<Probability, (number_of_states << maximum_number_of_position_bits)> m_is_match_probabilities;
|
||||
Array<Probability, number_of_states> m_is_rep_probabilities;
|
||||
Array<Probability, number_of_states> m_is_rep_g0_probabilities;
|
||||
Array<Probability, number_of_states> m_is_rep_g1_probabilities;
|
||||
Array<Probability, number_of_states> m_is_rep_g2_probabilities;
|
||||
Array<Probability, (number_of_states << maximum_number_of_position_bits)> m_is_rep0_long_probabilities;
|
||||
|
||||
enum MatchType {
|
||||
Literal,
|
||||
SimpleMatch,
|
||||
RepMatch0,
|
||||
ShortRepMatch,
|
||||
RepMatch1,
|
||||
RepMatch2,
|
||||
RepMatch3,
|
||||
};
|
||||
};
|
||||
|
||||
class LzmaDecompressor : public Stream
|
||||
, LzmaState {
|
||||
public:
|
||||
/// Creates a decompressor from a standalone LZMA container (.lzma file extension, occasionally known as an LZMA 'archive').
|
||||
static ErrorOr<NonnullOwnPtr<LzmaDecompressor>> create_from_container(MaybeOwned<Stream>, Optional<MaybeOwned<CircularBuffer>> dictionary = {});
|
||||
|
||||
/// Creates a decompressor from a raw stream of LZMA-compressed data (found inside an LZMA container or embedded in other file formats).
|
||||
static ErrorOr<NonnullOwnPtr<LzmaDecompressor>> create_from_raw_stream(MaybeOwned<Stream>, LzmaDecompressorOptions const&, Optional<MaybeOwned<CircularBuffer>> dictionary = {});
|
||||
|
||||
ErrorOr<void> append_input_stream(MaybeOwned<Stream>, Optional<u64> uncompressed_size);
|
||||
|
||||
virtual ErrorOr<Bytes> read_some(Bytes) override;
|
||||
virtual ErrorOr<size_t> write_some(ReadonlyBytes) override;
|
||||
virtual bool is_eof() const override;
|
||||
virtual bool is_open() const override;
|
||||
virtual void close() override;
|
||||
|
||||
private:
|
||||
LzmaDecompressor(MaybeOwned<Stream>, LzmaDecompressorOptions, MaybeOwned<CircularBuffer>, FixedArray<Probability> literal_probabilities);
|
||||
|
||||
MaybeOwned<Stream> m_stream;
|
||||
LzmaDecompressorOptions m_options;
|
||||
|
||||
// This doubles as an output buffer, since we have to write all of our results into this anyways.
|
||||
MaybeOwned<CircularBuffer> m_dictionary;
|
||||
bool m_found_end_of_stream_marker { false };
|
||||
bool is_range_decoder_in_clean_state() const;
|
||||
bool has_reached_expected_data_size() const;
|
||||
Optional<u16> m_leftover_match_length;
|
||||
|
||||
// Range decoder state (initialized with stream data in LzmaDecompressor::create).
|
||||
u32 m_range_decoder_range { 0xFFFFFFFF };
|
||||
u32 m_range_decoder_code { 0 };
|
||||
|
||||
ErrorOr<void> initialize_range_decoder();
|
||||
ErrorOr<void> normalize_range_decoder();
|
||||
ErrorOr<u8> decode_direct_bit();
|
||||
ErrorOr<u8> decode_bit_with_probability(Probability& probability);
|
||||
|
||||
ErrorOr<MatchType> decode_match_type();
|
||||
|
||||
// Decodes a multi-bit symbol using a given probability tree (either in normal or in reverse order).
|
||||
// The specification states that "unsigned" is at least 16 bits in size, our implementation assumes this as the maximum symbol size.
|
||||
ErrorOr<u16> decode_symbol_using_bit_tree(size_t bit_count, Span<Probability> probability_tree);
|
||||
ErrorOr<u16> decode_symbol_using_reverse_bit_tree(size_t bit_count, Span<Probability> probability_tree);
|
||||
|
||||
ErrorOr<void> decode_literal_to_output_buffer();
|
||||
|
||||
ErrorOr<u16> decode_normalized_match_length(LzmaLengthCoderState&);
|
||||
|
||||
// This deviates from the specification, which states that "unsigned" is at least 16-bit.
|
||||
// However, the match distance needs to be at least 32-bit, at the very least to hold the 0xFFFFFFFF end marker value.
|
||||
ErrorOr<u32> decode_normalized_match_distance(u16 normalized_match_length);
|
||||
};
|
||||
|
||||
class LzmaCompressor : public Stream
|
||||
, LzmaState {
|
||||
public:
|
||||
/// Creates a compressor for a standalone LZMA container (.lzma file extension, occasionally known as an LZMA 'archive').
|
||||
static ErrorOr<NonnullOwnPtr<LzmaCompressor>> create_container(MaybeOwned<Stream>, LzmaCompressorOptions const&);
|
||||
|
||||
/// Finishes the archive by writing out the remaining data from the range coder.
|
||||
ErrorOr<void> flush();
|
||||
|
||||
virtual ErrorOr<Bytes> read_some(Bytes) override;
|
||||
virtual ErrorOr<size_t> write_some(ReadonlyBytes) override;
|
||||
virtual bool is_eof() const override;
|
||||
virtual bool is_open() const override;
|
||||
virtual void close() override;
|
||||
|
||||
virtual ~LzmaCompressor();
|
||||
|
||||
private:
|
||||
LzmaCompressor(MaybeOwned<Stream>, LzmaCompressorOptions, MaybeOwned<SearchableCircularBuffer>, FixedArray<Probability> literal_probabilities);
|
||||
|
||||
ErrorOr<void> shift_range_encoder();
|
||||
ErrorOr<void> normalize_range_encoder();
|
||||
ErrorOr<void> encode_direct_bit(u8 value);
|
||||
ErrorOr<void> encode_bit_with_probability(Probability&, u8 value);
|
||||
ErrorOr<void> encode_symbol_using_bit_tree(size_t bit_count, Span<Probability> probability_tree, u16 value);
|
||||
ErrorOr<void> encode_symbol_using_reverse_bit_tree(size_t bit_count, Span<Probability> probability_tree, u16 value);
|
||||
ErrorOr<void> encode_normalized_match_length(LzmaLengthCoderState&, u16 normalized_length);
|
||||
ErrorOr<void> encode_normalized_match_distance(u16 normalized_match_length, u32 normalized_match_distance);
|
||||
|
||||
ErrorOr<void> encode_match_type(MatchType);
|
||||
ErrorOr<void> encode_literal(u8 literal);
|
||||
ErrorOr<void> encode_existing_match(size_t real_distance, size_t real_length);
|
||||
ErrorOr<void> encode_new_match(size_t real_distance, size_t real_length);
|
||||
ErrorOr<void> encode_normalized_simple_match(u32 normalized_distance, u16 normalized_length);
|
||||
|
||||
ErrorOr<void> encode_once();
|
||||
|
||||
bool m_has_flushed_data { false };
|
||||
|
||||
MaybeOwned<Stream> m_stream;
|
||||
LzmaCompressorOptions m_options;
|
||||
|
||||
// This doubles as an input buffer, which is appended at the very front of the buffer.
|
||||
// Therefore, the size of this should at least be the dictionary size + the largest possible repetition length.
|
||||
MaybeOwned<SearchableCircularBuffer> m_dictionary;
|
||||
|
||||
// Range encoder state.
|
||||
u32 m_range_encoder_range { 0xFFFFFFFF };
|
||||
u64 m_range_encoder_code { 0 };
|
||||
|
||||
// Since the range is only 32-bits, we can overflow at most +1 into the next byte beyond the usual 32-bit code.
|
||||
// Therefore, it is sufficient to store the highest byte (which may still change due to that +1 overflow) and
|
||||
// the length of the chain of 0xFF bytes that may end up propagating that change.
|
||||
u8 m_range_encoder_cached_byte { 0x00 };
|
||||
size_t m_range_encoder_ff_chain_length { 0 };
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
template<>
|
||||
struct AK::Traits<Compress::LzmaHeader> : public AK::DefaultTraits<Compress::LzmaHeader> {
|
||||
static constexpr bool is_trivially_serializable() { return true; }
|
||||
};
|
|
@ -33,7 +33,6 @@ set(LIBWEB_CSS_ANIMATION_DEBUG ON)
|
|||
set(LIBWEB_CSS_DEBUG ON)
|
||||
set(LIBWEB_WASM_DEBUG ON)
|
||||
set(LINE_EDITOR_DEBUG ON)
|
||||
set(LZMA_DEBUG ON)
|
||||
set(LZW_DEBUG ON)
|
||||
set(MACH_PORT_DEBUG ON)
|
||||
set(MATROSKA_DEBUG ON)
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Tim Schumacher <timschumi@gmx.de>.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/MemoryStream.h>
|
||||
#include <LibCompress/Lzma.h>
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size)
|
||||
{
|
||||
AK::set_debug_enabled(false);
|
||||
|
||||
// LibFuzzer has a default memory limit of 2048 MB, so limit the dictionary size to a
|
||||
// reasonable number to make sure that we don't actually run into it by allocating a
|
||||
// huge dictionary. The chosen value is double of what the largest dictionary in the
|
||||
// specifications test files is, so it should be more than enough for fuzzing everything
|
||||
// that we would want to fuzz.
|
||||
constexpr size_t largest_reasonable_dictionary_size = 16 * MiB;
|
||||
|
||||
if (size >= sizeof(Compress::LzmaHeader)) {
|
||||
auto const* header = reinterpret_cast<Compress::LzmaHeader const*>(data);
|
||||
if (header->dictionary_size() > largest_reasonable_dictionary_size)
|
||||
return -1;
|
||||
}
|
||||
|
||||
auto stream = make<FixedMemoryStream>(ReadonlyBytes { data, size });
|
||||
auto decompressor_or_error = Compress::LzmaDecompressor::create_from_container(move(stream));
|
||||
if (decompressor_or_error.is_error())
|
||||
return 0;
|
||||
auto decompressor = decompressor_or_error.release_value();
|
||||
while (!decompressor->is_eof()) {
|
||||
auto maybe_error = decompressor->discard(4096);
|
||||
if (maybe_error.is_error())
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Tim Schumacher <timschumi@gmx.de>.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/MemoryStream.h>
|
||||
#include <LibCompress/Lzma.h>
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(uint8_t const* data, size_t size)
|
||||
{
|
||||
AK::set_debug_enabled(false);
|
||||
|
||||
AllocatingMemoryStream stream {};
|
||||
|
||||
auto compressor = MUST(Compress::LzmaCompressor::create_container(MaybeOwned<Stream> { stream }, {}));
|
||||
MUST(compressor->write_until_depleted({ data, size }));
|
||||
MUST(compressor->flush());
|
||||
|
||||
auto decompressor = MUST(Compress::LzmaDecompressor::create_from_container(MaybeOwned<Stream> { stream }));
|
||||
auto result = MUST(decompressor->read_until_eof());
|
||||
|
||||
VERIFY((ReadonlyBytes { data, size }) == result.span());
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -12,8 +12,6 @@ set(FUZZER_TARGETS
|
|||
JPEGLoader
|
||||
Js
|
||||
JsonParser
|
||||
LzmaDecompression
|
||||
LzmaRoundtrip
|
||||
MatroskaReader
|
||||
MD5
|
||||
PEM
|
||||
|
@ -56,8 +54,6 @@ set(FUZZER_DEPENDENCIES_GzipRoundtrip LibCompress)
|
|||
set(FUZZER_DEPENDENCIES_ICOLoader LibGfx)
|
||||
set(FUZZER_DEPENDENCIES_JPEGLoader LibGfx)
|
||||
set(FUZZER_DEPENDENCIES_Js LibJS LibGC)
|
||||
set(FUZZER_DEPENDENCIES_LzmaDecompression LibCompress)
|
||||
set(FUZZER_DEPENDENCIES_LzmaRoundtrip LibCompress)
|
||||
set(FUZZER_DEPENDENCIES_MatroskaReader LibMedia)
|
||||
set(FUZZER_DEPENDENCIES_MD5 LibCrypto)
|
||||
set(FUZZER_DEPENDENCIES_PEM LibCrypto)
|
||||
|
|
|
@ -254,7 +254,6 @@ write_cmake_config("ak_debug_gen") {
|
|||
"LIBWEB_CSS_DEBUG=",
|
||||
"LIBWEB_WASM_DEBUG=",
|
||||
"LINE_EDITOR_DEBUG=",
|
||||
"LZMA_DEBUG=",
|
||||
"LZW_DEBUG=",
|
||||
"MACH_PORT_DEBUG=",
|
||||
"MATROSKA_DEBUG=",
|
||||
|
|
|
@ -4,7 +4,6 @@ shared_library("LibCompress") {
|
|||
sources = [
|
||||
"Deflate.cpp",
|
||||
"Gzip.cpp",
|
||||
"Lzma.cpp",
|
||||
"PackBitsDecoder.cpp",
|
||||
"Zlib.cpp",
|
||||
]
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
set(TEST_SOURCES
|
||||
TestDeflate.cpp
|
||||
TestGzip.cpp
|
||||
TestLzma.cpp
|
||||
TestLzw.cpp
|
||||
TestPackBits.cpp
|
||||
TestZlib.cpp
|
||||
|
|
|
@ -1,257 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Tim Schumacher <timschumi@gmx.de>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <LibTest/TestCase.h>
|
||||
|
||||
#include <AK/MemoryStream.h>
|
||||
#include <LibCompress/Lzma.h>
|
||||
|
||||
TEST_CASE(repetition_length_beyond_distance)
|
||||
{
|
||||
// This test exists to ensure correctness when repeating data from the dictionary that has been
|
||||
// written earlier during the same repetition.
|
||||
// While this test case is not large enough to testify how well this is optimized, it may still
|
||||
// be a constellation that is improperly implemented as a whole.
|
||||
|
||||
Array<u8, 21> const compressed {
|
||||
0x5D, // Model properties (lc = 3, lp = 0, pb = 2)
|
||||
0x00, 0x10, 0x00, 0x00, // Dictionary size (4 KB)
|
||||
0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Uncompressed size (7)
|
||||
|
||||
// Encode a literal 'A' and a literal 'B', followed by a repetition from (real) distance 2 with a (real) length of 5.
|
||||
0x00, 0x20, 0x90, 0x9F, 0x04, 0x00, 0x00, 0x00
|
||||
};
|
||||
|
||||
auto stream = MUST(try_make<FixedMemoryStream>(compressed));
|
||||
auto decompressor = MUST(Compress::LzmaDecompressor::create_from_container(move(stream)));
|
||||
auto buffer = TRY_OR_FAIL(decompressor->read_until_eof(PAGE_SIZE));
|
||||
EXPECT_EQ("ABABABA"sv.bytes(), buffer.span());
|
||||
}
|
||||
|
||||
TEST_CASE(compress_decompress_roundtrip_with_known_size)
|
||||
{
|
||||
auto const uncompressed = "Well hello friends, this is a simple text file :)"sv.bytes();
|
||||
|
||||
auto stream = MUST(try_make<AllocatingMemoryStream>());
|
||||
|
||||
Compress::LzmaCompressorOptions const compressor_options {
|
||||
.literal_context_bits = 3,
|
||||
.literal_position_bits = 0,
|
||||
.position_bits = 2,
|
||||
.dictionary_size = 4 * KiB,
|
||||
.uncompressed_size = uncompressed.size(),
|
||||
};
|
||||
auto compressor = TRY_OR_FAIL(Compress::LzmaCompressor::create_container(MaybeOwned<Stream> { *stream }, compressor_options));
|
||||
TRY_OR_FAIL(compressor->write_until_depleted(uncompressed));
|
||||
|
||||
auto decompressor = TRY_OR_FAIL(Compress::LzmaDecompressor::create_from_container(MaybeOwned<Stream> { *stream }));
|
||||
auto result = TRY_OR_FAIL(decompressor->read_until_eof());
|
||||
|
||||
EXPECT_EQ(uncompressed, result.span());
|
||||
}
|
||||
|
||||
TEST_CASE(compress_decompress_roundtrip_with_unknown_size)
|
||||
{
|
||||
auto const uncompressed = "Well hello friends, this is a simple text file :)"sv.bytes();
|
||||
|
||||
auto stream = MUST(try_make<AllocatingMemoryStream>());
|
||||
|
||||
Compress::LzmaCompressorOptions const compressor_options {
|
||||
.literal_context_bits = 3,
|
||||
.literal_position_bits = 0,
|
||||
.position_bits = 2,
|
||||
.dictionary_size = 4 * KiB,
|
||||
};
|
||||
auto compressor = TRY_OR_FAIL(Compress::LzmaCompressor::create_container(MaybeOwned<Stream> { *stream }, compressor_options));
|
||||
TRY_OR_FAIL(compressor->write_until_depleted(uncompressed));
|
||||
TRY_OR_FAIL(compressor->flush());
|
||||
|
||||
auto decompressor = TRY_OR_FAIL(Compress::LzmaDecompressor::create_from_container(MaybeOwned<Stream> { *stream }));
|
||||
auto result = TRY_OR_FAIL(decompressor->read_until_eof());
|
||||
|
||||
EXPECT_EQ(uncompressed, result.span());
|
||||
}
|
||||
|
||||
TEST_CASE(compress_long_overflow_chain)
|
||||
{
|
||||
// Encoding 0xFF followed by the end-of-stream marker results in a chain of bytes that doesn't fit into 64 bits,
|
||||
// which breaks naive implementations of "hold back the byte until it no longer changes".
|
||||
|
||||
Array<u8, 1> const uncompressed {
|
||||
0xFF
|
||||
};
|
||||
|
||||
auto stream = MUST(try_make<AllocatingMemoryStream>());
|
||||
auto compressor = TRY_OR_FAIL(Compress::LzmaCompressor::create_container(MaybeOwned<Stream> { *stream }, {}));
|
||||
TRY_OR_FAIL(compressor->write_until_depleted(uncompressed));
|
||||
TRY_OR_FAIL(compressor->flush());
|
||||
|
||||
auto decompressor = TRY_OR_FAIL(Compress::LzmaDecompressor::create_from_container(MaybeOwned<Stream> { *stream }));
|
||||
auto result = TRY_OR_FAIL(decompressor->read_until_eof());
|
||||
|
||||
EXPECT_EQ(uncompressed, result.span());
|
||||
}
|
||||
|
||||
// The following tests are based on test files from the LZMA specification, which has been placed in the public domain.
|
||||
// LZMA Specification Draft (2015): https://www.7-zip.org/a/lzma-specification.7z
|
||||
|
||||
Array<u8, 327> const specification_a_txt {
|
||||
0x4C, 0x5A, 0x4D, 0x41, 0x20, 0x64, 0x65, 0x63, 0x6F, 0x64, 0x65, 0x72, 0x20, 0x74, 0x65, 0x73,
|
||||
0x74, 0x20, 0x65, 0x78, 0x61, 0x6D, 0x70, 0x6C, 0x65, 0x0D, 0x0A, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D,
|
||||
0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D,
|
||||
0x3D, 0x3D, 0x3D, 0x3D, 0x0D, 0x0A, 0x21, 0x20, 0x4C, 0x5A, 0x4D, 0x41, 0x20, 0x21, 0x20, 0x44,
|
||||
0x65, 0x63, 0x6F, 0x64, 0x65, 0x72, 0x20, 0x21, 0x20, 0x54, 0x45, 0x53, 0x54, 0x20, 0x21, 0x0D,
|
||||
0x0A, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D,
|
||||
0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x0D, 0x0A, 0x21, 0x20, 0x54, 0x45,
|
||||
0x53, 0x54, 0x20, 0x21, 0x20, 0x4C, 0x5A, 0x4D, 0x41, 0x20, 0x21, 0x20, 0x44, 0x65, 0x63, 0x6F,
|
||||
0x64, 0x65, 0x72, 0x20, 0x21, 0x0D, 0x0A, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D,
|
||||
0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D,
|
||||
0x0D, 0x0A, 0x2D, 0x2D, 0x2D, 0x2D, 0x20, 0x54, 0x65, 0x73, 0x74, 0x20, 0x4C, 0x69, 0x6E, 0x65,
|
||||
0x20, 0x31, 0x20, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x20, 0x0D, 0x0A, 0x3D, 0x3D,
|
||||
0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D,
|
||||
0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x0D, 0x0A, 0x2D, 0x2D, 0x2D, 0x2D, 0x20, 0x54, 0x65,
|
||||
0x73, 0x74, 0x20, 0x4C, 0x69, 0x6E, 0x65, 0x20, 0x32, 0x20, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D, 0x2D,
|
||||
0x2D, 0x2D, 0x20, 0x0D, 0x0A, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D,
|
||||
0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x0D, 0x0A,
|
||||
0x3D, 0x3D, 0x3D, 0x20, 0x45, 0x6E, 0x64, 0x20, 0x6F, 0x66, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20,
|
||||
0x66, 0x69, 0x6C, 0x65, 0x20, 0x3D, 0x3D, 0x3D, 0x3D, 0x20, 0x0D, 0x0A, 0x3D, 0x3D, 0x3D, 0x3D,
|
||||
0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D,
|
||||
0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x0D, 0x0A
|
||||
};
|
||||
|
||||
TEST_CASE(specification_a_lzma_decompress)
|
||||
{
|
||||
Array<u8, 117> const compressed {
|
||||
0x5D, 0x00, 0x00, 0x80, 0x00, 0x47, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0x16,
|
||||
0x85, 0xBC, 0x45, 0xF0, 0xDF, 0xFF, 0xD2, 0xE8, 0x41, 0xF5, 0xCE, 0xE5, 0x90, 0xE1, 0xC8, 0x20,
|
||||
0xEA, 0xC6, 0x37, 0xBE, 0x2B, 0xD1, 0xF4, 0xC3, 0x34, 0x6F, 0x2F, 0x83, 0xC2, 0xA6, 0x7C, 0x6F,
|
||||
0x3D, 0x88, 0xA0, 0x58, 0x22, 0x1F, 0x3A, 0xBA, 0x7B, 0xC6, 0xDD, 0x66, 0xFE, 0xF8, 0x92, 0xE4,
|
||||
0xCB, 0x1C, 0xC4, 0x19, 0x0A, 0x0C, 0x8B, 0x2E, 0x39, 0xB8, 0xB8, 0x03, 0xCD, 0x5A, 0x9E, 0x10,
|
||||
0x3A, 0x4F, 0x65, 0xFA, 0x41, 0xCB, 0xF2, 0x79, 0x65, 0xD7, 0xF1, 0x9F, 0xAB, 0x70, 0x1D, 0x6F,
|
||||
0xF7, 0xB6, 0x79, 0xCC, 0x8A, 0x7D, 0xCE, 0xDB, 0xF8, 0xF6, 0x9E, 0xC9, 0x12, 0x9F, 0xAA, 0xBF,
|
||||
0x89, 0xFE, 0x05, 0x36, 0x80
|
||||
};
|
||||
|
||||
auto stream = MUST(try_make<FixedMemoryStream>(compressed));
|
||||
auto decompressor = MUST(Compress::LzmaDecompressor::create_from_container(move(stream)));
|
||||
auto buffer = TRY_OR_FAIL(decompressor->read_until_eof(PAGE_SIZE));
|
||||
EXPECT_EQ(specification_a_txt, buffer.span());
|
||||
}
|
||||
|
||||
TEST_CASE(specification_a_eos_lzma_decompress)
|
||||
{
|
||||
Array<u8, 122> const compressed {
|
||||
0x5D, 0x00, 0x00, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x26, 0x16,
|
||||
0x85, 0xBC, 0x45, 0xF0, 0xDF, 0xFF, 0xD2, 0xE8, 0x41, 0xF5, 0xCE, 0xE5, 0x90, 0xE1, 0xC8, 0x20,
|
||||
0xEA, 0xC6, 0x37, 0xBE, 0x2B, 0xD1, 0xF4, 0xC3, 0x34, 0x6F, 0x2F, 0x83, 0xC2, 0xA6, 0x7C, 0x6F,
|
||||
0x3D, 0x88, 0xA0, 0x58, 0x22, 0x1F, 0x3A, 0xBA, 0x7B, 0xC6, 0xDD, 0x66, 0xFE, 0xF8, 0x92, 0xE4,
|
||||
0xCB, 0x1C, 0xC4, 0x19, 0x0A, 0x0C, 0x8B, 0x2E, 0x39, 0xB8, 0xB8, 0x03, 0xCD, 0x5A, 0x9E, 0x10,
|
||||
0x3A, 0x4F, 0x65, 0xFA, 0x41, 0xCB, 0xF2, 0x79, 0x65, 0xD7, 0xF1, 0x9F, 0xAB, 0x70, 0x1D, 0x6F,
|
||||
0xF7, 0xB6, 0x79, 0xCC, 0x8A, 0x7D, 0xCE, 0xDB, 0xF8, 0xF6, 0x9E, 0xC9, 0x12, 0x9F, 0xAA, 0xBF,
|
||||
0x8A, 0x08, 0xF5, 0x99, 0x8D, 0x7F, 0xFA, 0x18, 0x0A, 0x52
|
||||
};
|
||||
|
||||
auto stream = MUST(try_make<FixedMemoryStream>(compressed));
|
||||
auto decompressor = MUST(Compress::LzmaDecompressor::create_from_container(move(stream)));
|
||||
auto buffer = TRY_OR_FAIL(decompressor->read_until_eof(PAGE_SIZE));
|
||||
EXPECT_EQ(specification_a_txt, buffer.span());
|
||||
}
|
||||
|
||||
TEST_CASE(specification_a_eos_and_size_lzma_decompress)
|
||||
{
|
||||
Array<u8, 122> const compressed {
|
||||
0x5D, 0x00, 0x00, 0x01, 0x00, 0x47, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0x16,
|
||||
0x85, 0xBC, 0x45, 0xF0, 0xDF, 0xFF, 0xD2, 0xE8, 0x41, 0xF5, 0xCE, 0xE5, 0x90, 0xE1, 0xC8, 0x20,
|
||||
0xEA, 0xC6, 0x37, 0xBE, 0x2B, 0xD1, 0xF4, 0xC3, 0x34, 0x6F, 0x2F, 0x83, 0xC2, 0xA6, 0x7C, 0x6F,
|
||||
0x3D, 0x88, 0xA0, 0x58, 0x22, 0x1F, 0x3A, 0xBA, 0x7B, 0xC6, 0xDD, 0x66, 0xFE, 0xF8, 0x92, 0xE4,
|
||||
0xCB, 0x1C, 0xC4, 0x19, 0x0A, 0x0C, 0x8B, 0x2E, 0x39, 0xB8, 0xB8, 0x03, 0xCD, 0x5A, 0x9E, 0x10,
|
||||
0x3A, 0x4F, 0x65, 0xFA, 0x41, 0xCB, 0xF2, 0x79, 0x65, 0xD7, 0xF1, 0x9F, 0xAB, 0x70, 0x1D, 0x6F,
|
||||
0xF7, 0xB6, 0x79, 0xCC, 0x8A, 0x7D, 0xCE, 0xDB, 0xF8, 0xF6, 0x9E, 0xC9, 0x12, 0x9F, 0xAA, 0xBF,
|
||||
0x8A, 0x08, 0xF5, 0x99, 0x8D, 0x7F, 0xFA, 0x18, 0x0A, 0x52
|
||||
};
|
||||
|
||||
auto stream = MUST(try_make<FixedMemoryStream>(compressed));
|
||||
auto decompressor = MUST(Compress::LzmaDecompressor::create_from_container(move(stream)));
|
||||
auto buffer = TRY_OR_FAIL(decompressor->read_until_eof(PAGE_SIZE));
|
||||
EXPECT_EQ(specification_a_txt, buffer.span());
|
||||
}
|
||||
|
||||
TEST_CASE(specification_a_lp1_lc2_pb1_lzma_decompress)
|
||||
{
|
||||
// Note: The name of this test file (and the accompanying info.txt) is wrong. It is encoded with lc = 1 instead of lc = 2.
|
||||
Array<u8, 117> const compressed {
|
||||
0x37, 0x00, 0x00, 0x01, 0x00, 0x47, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0x16,
|
||||
0x86, 0x23, 0xBC, 0x5C, 0xC9, 0x40, 0x2B, 0x6B, 0x91, 0x5B, 0xCD, 0x90, 0x40, 0xCB, 0x9A, 0x71,
|
||||
0x5B, 0x84, 0x68, 0xE0, 0x5A, 0xAB, 0xA3, 0xE9, 0x04, 0xF7, 0xA3, 0xA6, 0x8E, 0x5F, 0xAA, 0x24,
|
||||
0x8B, 0xFC, 0x20, 0x38, 0xA6, 0xB7, 0x2A, 0x47, 0xAF, 0x07, 0xF7, 0x14, 0xAC, 0xE8, 0xB4, 0xD9,
|
||||
0x96, 0x27, 0xE0, 0xF4, 0x47, 0x8D, 0xE9, 0xDD, 0x05, 0x28, 0x1A, 0xDF, 0xB1, 0xED, 0x1A, 0xDC,
|
||||
0x0B, 0x55, 0xB2, 0xBD, 0x55, 0x69, 0x6C, 0xD9, 0xFC, 0x70, 0x43, 0xA7, 0x16, 0x58, 0x99, 0xFE,
|
||||
0x97, 0x04, 0x11, 0x27, 0x56, 0x5E, 0xC6, 0xB0, 0x4E, 0x31, 0xA0, 0xCB, 0x17, 0x27, 0xEC, 0x72,
|
||||
0x36, 0x0E, 0x9A, 0xAD, 0x00
|
||||
};
|
||||
|
||||
auto stream = MUST(try_make<FixedMemoryStream>(compressed));
|
||||
auto decompressor = MUST(Compress::LzmaDecompressor::create_from_container(move(stream)));
|
||||
auto buffer = TRY_OR_FAIL(decompressor->read_until_eof(PAGE_SIZE));
|
||||
EXPECT_EQ(specification_a_txt, buffer.span());
|
||||
}
|
||||
|
||||
TEST_CASE(specification_bad_corrupted_lzma_decompress)
|
||||
{
|
||||
Array<u8, 117> const compressed {
|
||||
0x5D, 0x00, 0x00, 0x80, 0x00, 0x47, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0x16,
|
||||
0x85, 0xBC, 0x45, 0xF0, 0xDF, 0xFF, 0xD2, 0xE8, 0x41, 0xF5, 0xCE, 0xE5, 0x90, 0xE1, 0xC8, 0x20,
|
||||
0xEA, 0xC6, 0x37, 0xBE, 0x2B, 0xD1, 0xF4, 0xC3, 0x34, 0x6F, 0x2F, 0x83, 0xC2, 0xA6, 0x7C, 0x6F,
|
||||
0x3D, 0x88, 0xA0, 0x58, 0x22, 0x1F, 0x3A, 0xBA, 0x7B, 0xC6, 0xDD, 0x66, 0xFE, 0xF8, 0x92, 0xE4,
|
||||
0xCB, 0x1C, 0xC4, 0x19, 0x0A, 0x0C, 0x8B, 0x2E, 0x39, 0xB8, 0xB8, 0x03, 0xCD, 0x5A, 0x9E, 0x10,
|
||||
0x3A, 0x4F, 0x65, 0xFA, 0x41, 0xCB, 0xF2, 0x79, 0x65, 0xD7, 0xF1, 0xFF, 0xFF, 0xFF, 0x1D, 0x6F,
|
||||
0xF7, 0xB6, 0x79, 0xCC, 0x8A, 0x7D, 0xCE, 0xDB, 0xF8, 0xF6, 0x9E, 0xC9, 0x12, 0x9F, 0xAA, 0xBF,
|
||||
0x89, 0xFE, 0x05, 0x36, 0x80
|
||||
};
|
||||
|
||||
auto stream = MUST(try_make<FixedMemoryStream>(compressed));
|
||||
auto decompressor = MUST(Compress::LzmaDecompressor::create_from_container(move(stream)));
|
||||
auto buffer_or_error = decompressor->read_until_eof(PAGE_SIZE);
|
||||
EXPECT(buffer_or_error.is_error());
|
||||
}
|
||||
|
||||
TEST_CASE(specification_bad_eos_incorrect_size_lzma_decompress)
|
||||
{
|
||||
Array<u8, 122> const compressed {
|
||||
0x5D, 0x00, 0x00, 0x01, 0x00, 0x48, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0x16,
|
||||
0x85, 0xBC, 0x45, 0xF0, 0xDF, 0xFF, 0xD2, 0xE8, 0x41, 0xF5, 0xCE, 0xE5, 0x90, 0xE1, 0xC8, 0x20,
|
||||
0xEA, 0xC6, 0x37, 0xBE, 0x2B, 0xD1, 0xF4, 0xC3, 0x34, 0x6F, 0x2F, 0x83, 0xC2, 0xA6, 0x7C, 0x6F,
|
||||
0x3D, 0x88, 0xA0, 0x58, 0x22, 0x1F, 0x3A, 0xBA, 0x7B, 0xC6, 0xDD, 0x66, 0xFE, 0xF8, 0x92, 0xE4,
|
||||
0xCB, 0x1C, 0xC4, 0x19, 0x0A, 0x0C, 0x8B, 0x2E, 0x39, 0xB8, 0xB8, 0x03, 0xCD, 0x5A, 0x9E, 0x10,
|
||||
0x3A, 0x4F, 0x65, 0xFA, 0x41, 0xCB, 0xF2, 0x79, 0x65, 0xD7, 0xF1, 0x9F, 0xAB, 0x70, 0x1D, 0x6F,
|
||||
0xF7, 0xB6, 0x79, 0xCC, 0x8A, 0x7D, 0xCE, 0xDB, 0xF8, 0xF6, 0x9E, 0xC9, 0x12, 0x9F, 0xAA, 0xBF,
|
||||
0x8A, 0x08, 0xF5, 0x99, 0x8D, 0x7F, 0xFA, 0x18, 0x0A, 0x52
|
||||
};
|
||||
|
||||
auto stream = MUST(try_make<FixedMemoryStream>(compressed));
|
||||
auto decompressor = MUST(Compress::LzmaDecompressor::create_from_container(move(stream)));
|
||||
auto buffer_or_error = decompressor->read_until_eof(PAGE_SIZE);
|
||||
EXPECT(buffer_or_error.is_error());
|
||||
}
|
||||
|
||||
TEST_CASE(specification_bad_incorrect_size_lzma_decompress)
|
||||
{
|
||||
Array<u8, 117> const compressed {
|
||||
0x5D, 0x00, 0x00, 0x80, 0x00, 0x22, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0x16,
|
||||
0x85, 0xBC, 0x45, 0xF0, 0xDF, 0xFF, 0xD2, 0xE8, 0x41, 0xF5, 0xCE, 0xE5, 0x90, 0xE1, 0xC8, 0x20,
|
||||
0xEA, 0xC6, 0x37, 0xBE, 0x2B, 0xD1, 0xF4, 0xC3, 0x34, 0x6F, 0x2F, 0x83, 0xC2, 0xA6, 0x7C, 0x6F,
|
||||
0x3D, 0x88, 0xA0, 0x58, 0x22, 0x1F, 0x3A, 0xBA, 0x7B, 0xC6, 0xDD, 0x66, 0xFE, 0xF8, 0x92, 0xE4,
|
||||
0xCB, 0x1C, 0xC4, 0x19, 0x0A, 0x0C, 0x8B, 0x2E, 0x39, 0xB8, 0xB8, 0x03, 0xCD, 0x5A, 0x9E, 0x10,
|
||||
0x3A, 0x4F, 0x65, 0xFA, 0x41, 0xCB, 0xF2, 0x79, 0x65, 0xD7, 0xF1, 0x9F, 0xAB, 0x70, 0x1D, 0x6F,
|
||||
0xF7, 0xB6, 0x79, 0xCC, 0x8A, 0x7D, 0xCE, 0xDB, 0xF8, 0xF6, 0x9E, 0xC9, 0x12, 0x9F, 0xAA, 0xBF,
|
||||
0x89, 0xFE, 0x05, 0x36, 0x80
|
||||
};
|
||||
|
||||
auto stream = MUST(try_make<FixedMemoryStream>(compressed));
|
||||
auto decompressor = MUST(Compress::LzmaDecompressor::create_from_container(move(stream)));
|
||||
auto buffer_or_error = decompressor->read_until_eof(PAGE_SIZE);
|
||||
EXPECT(buffer_or_error.is_error());
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue