diff --git a/AK/Utf8View.cpp b/AK/Utf8View.cpp index 0826eb0503f..8d6074aa9fe 100644 --- a/AK/Utf8View.cpp +++ b/AK/Utf8View.cpp @@ -240,4 +240,21 @@ u32 Utf8CodepointIterator::operator*() const return code_point_value_so_far; } +Optional Utf8CodepointIterator::peek(size_t offset) const +{ + if (offset == 0) { + if (this->done()) + return {}; + return this->operator*(); + } + + auto new_iterator = *this; + for (size_t index = 0; index < offset; ++index) { + ++new_iterator; + if (new_iterator.done()) + return {}; + } + return *new_iterator; +} + } diff --git a/AK/Utf8View.h b/AK/Utf8View.h index 7324bd29847..2acf627d299 100644 --- a/AK/Utf8View.h +++ b/AK/Utf8View.h @@ -25,6 +25,8 @@ public: bool operator!=(const Utf8CodepointIterator&) const; Utf8CodepointIterator& operator++(); u32 operator*() const; + // NOTE: This returns {} if the peek is at or past EOF. + Optional peek(size_t offset = 0) const; ssize_t operator-(const Utf8CodepointIterator& other) const { diff --git a/Tests/AK/TestUtf8.cpp b/Tests/AK/TestUtf8.cpp index 4a3f4e80294..522de3dcc9c 100644 --- a/Tests/AK/TestUtf8.cpp +++ b/Tests/AK/TestUtf8.cpp @@ -67,3 +67,39 @@ TEST_CASE(validate_invalid_ut8) EXPECT(!utf8_4.validate(valid_bytes)); EXPECT(valid_bytes == 0); } + +TEST_CASE(iterate_utf8) +{ + Utf8View view("Some weird characters \u00A9\u266A\uA755"); + Utf8CodepointIterator iterator = view.begin(); + + EXPECT(*iterator == 'S'); + EXPECT(iterator.peek().has_value() && iterator.peek().value() == 'S'); + EXPECT(iterator.peek(0).has_value() && iterator.peek(0).value() == 'S'); + EXPECT(iterator.peek(1).has_value() && iterator.peek(1).value() == 'o'); + EXPECT(iterator.peek(22).has_value() && iterator.peek(22).value() == 0x00A9); + EXPECT(iterator.peek(24).has_value() && iterator.peek(24).value() == 0xA755); + EXPECT(!iterator.peek(25).has_value()); + + ++iterator; + + EXPECT(*iterator == 'o'); + EXPECT(iterator.peek(23).has_value() && iterator.peek(23).value() == 0xA755); + + for (size_t i = 0; i < 23; ++i) + ++iterator; + + EXPECT(!iterator.done()); + EXPECT(*iterator == 0xA755); + EXPECT(iterator.peek().has_value() && iterator.peek().value() == 0xA755); + EXPECT(!iterator.peek(1).has_value()); + + ++iterator; + + EXPECT(iterator.done()); + EXPECT(!iterator.peek(0).has_value()); + EXPECT_CRASH("Dereferencing Utf8CodepointIterator which is already done.", [&iterator] { + *iterator; + return Test::Crash::Failure::DidNotCrash; + }); +}