1
0
Fork 0
mirror of https://github.com/LadybirdBrowser/ladybird.git synced 2025-06-08 05:27:14 +09:00

LibRegex: Use an interned string table for capture group names

This avoids messing around with unsafe string pointers and removes the
only non-FlyString-able user of DeprecatedFlyString.
This commit is contained in:
Ali Mohammad Pur 2025-04-01 16:49:07 +02:00 committed by Andreas Kling
parent 6bb0d585e3
commit 4136d8d13e
Notes: github-actions[bot] 2025-04-02 09:44:16 +00:00
6 changed files with 103 additions and 24 deletions

View file

@ -11,7 +11,6 @@
#include <AK/ByteString.h>
#include <AK/CharacterTypes.h>
#include <AK/Debug.h>
#include <AK/DeprecatedFlyString.h>
#include <AK/GenericLexer.h>
#include <AK/ScopeGuard.h>
#include <AK/StringBuilder.h>
@ -813,7 +812,7 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
NegativeLookbehind,
} group_mode { Normal };
consume();
Optional<StringView> capture_group_name;
Optional<FlyString> capture_group_name;
bool prevent_capture_group = false;
if (match(TokenType::Questionmark)) {
consume();
@ -836,7 +835,7 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
++capture_group_name_length;
last_token = consume();
}
capture_group_name = StringView(start_token.value().characters_without_null_termination(), capture_group_name_length);
capture_group_name = MUST(FlyString::from_utf8(m_parser_state.lexer.input().substring_view_starting_from_substring(start_token.value()).substring_view(0, capture_group_name_length)));
++m_parser_state.named_capture_groups_count;
} else if (match(TokenType::EqualSign)) { // positive lookahead
@ -982,7 +981,7 @@ bool ECMA262Parser::parse_pattern(ByteCode& stack, size_t& match_length_minimum,
return parse_disjunction(stack, match_length_minimum, flags);
}
bool ECMA262Parser::has_duplicate_in_current_alternative(DeprecatedFlyString const& name)
bool ECMA262Parser::has_duplicate_in_current_alternative(FlyString const& name)
{
auto it = m_parser_state.named_capture_groups.find(name);
if (it == m_parser_state.named_capture_groups.end())
@ -2503,7 +2502,7 @@ bool ECMA262Parser::parse_unicode_property_escape(PropertyEscape& property, bool
[](Empty&) -> bool { VERIFY_NOT_REACHED(); });
}
DeprecatedFlyString ECMA262Parser::read_capture_group_specifier(bool take_starting_angle_bracket)
FlyString ECMA262Parser::read_capture_group_specifier(bool take_starting_angle_bracket)
{
static constexpr u32 const REPLACEMENT_CHARACTER = 0xFFFD;
constexpr u32 const ZERO_WIDTH_NON_JOINER { 0x200C };
@ -2604,7 +2603,7 @@ DeprecatedFlyString ECMA262Parser::read_capture_group_specifier(bool take_starti
builder.append_code_point(code_point);
}
DeprecatedFlyString name = builder.to_byte_string();
auto name = MUST(builder.to_fly_string());
if (!hit_end || name.is_empty())
set_error(Error::InvalidNameForCaptureGroup);
@ -2720,7 +2719,7 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
stack.insert_bytecode_group_capture_left(group_index);
stack.extend(move(capture_group_bytecode));
stack.insert_bytecode_group_capture_right(group_index, name.view());
stack.insert_bytecode_group_capture_right(group_index, name);
match_length_minimum += length;