mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-06-08 05:27:14 +09:00

This is preparation for removing the endianness override, since it was only used by a single client: LibTextCodec. While here, add helpers and make use of simdutf for fast conversion.
50 lines
1.4 KiB
C++
50 lines
1.4 KiB
C++
/*
|
|
* Copyright (c) 2022, the SerenityOS developers.
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include <AK/String.h>
|
|
#include <AK/Vector.h>
|
|
#include <LibTest/TestCase.h>
|
|
#include <LibTextCodec/Decoder.h>
|
|
|
|
TEST_CASE(test_utf8_decode)
|
|
{
|
|
auto decoder = TextCodec::UTF8Decoder();
|
|
// Bytes for U+1F600 GRINNING FACE
|
|
auto test_string = "\xf0\x9f\x98\x80"sv;
|
|
|
|
EXPECT(decoder.validate(test_string));
|
|
|
|
Vector<u32> processed_code_points;
|
|
MUST(decoder.process(test_string, [&](u32 code_point) {
|
|
return processed_code_points.try_append(code_point);
|
|
}));
|
|
EXPECT(processed_code_points.size() == 1);
|
|
EXPECT(processed_code_points[0] == 0x1F600);
|
|
|
|
EXPECT(MUST(decoder.to_utf8(test_string)) == test_string);
|
|
}
|
|
|
|
TEST_CASE(test_utf16be_decode)
|
|
{
|
|
auto decoder = TextCodec::UTF16BEDecoder();
|
|
// This is the output of `python3 -c "print('säk😀'.encode('utf-16be'))"`.
|
|
auto test_string = "\x00s\x00\xe4\x00k\xd8=\xde\x00"sv;
|
|
|
|
EXPECT(decoder.validate(test_string));
|
|
auto utf8 = MUST(decoder.to_utf8(test_string));
|
|
EXPECT_EQ(utf8, "säk😀"sv);
|
|
}
|
|
|
|
TEST_CASE(test_utf16le_decode)
|
|
{
|
|
auto decoder = TextCodec::UTF16LEDecoder();
|
|
// This is the output of `python3 -c "print('säk😀'.encode('utf-16le'))"`.
|
|
auto test_string = "s\x00\xe4\x00k\x00=\xd8\x00\xde"sv;
|
|
|
|
EXPECT(decoder.validate(test_string));
|
|
auto utf8 = MUST(decoder.to_utf8(test_string));
|
|
EXPECT_EQ(utf8, "säk😀"sv);
|
|
}
|