1
0
Fork 0
mirror of https://github.com/LadybirdBrowser/ladybird.git synced 2025-06-09 17:44:56 +09:00

LibRegex: Ensure nested capture groups have non-conflicting names

Take record of the named capture group prior to parsing the group's
body. This requires removal of the recorded minimum length of the named
capture group directly, and now needs to be looked up via the group
minimu lengths table.
This commit is contained in:
Marc Jessome 2024-11-23 18:38:57 -05:00 committed by Ali Mohammad Pur
parent e37c9eaeff
commit efcaf991e6
Notes: github-actions[bot] 2024-11-24 09:27:05 +00:00
3 changed files with 12 additions and 10 deletions

View file

@ -1627,9 +1627,14 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
set_error(Error::InvalidNameForCaptureGroup); set_error(Error::InvalidNameForCaptureGroup);
return false; return false;
} }
match_length_minimum += maybe_capture_group->minimum_length; auto maybe_length = m_parser_state.capture_group_minimum_lengths.get(maybe_capture_group.value());
if (!maybe_length.has_value()) {
set_error(Error::InvalidNameForCaptureGroup);
return false;
}
match_length_minimum += maybe_length.value();
stack.insert_bytecode_compare_values({ { CharacterCompareType::Reference, (ByteCodeValueType)maybe_capture_group->group_index } }); stack.insert_bytecode_compare_values({ { CharacterCompareType::Reference, (ByteCodeValueType)maybe_capture_group.value() } });
return true; return true;
} }
@ -2674,6 +2679,8 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
return false; return false;
} }
m_parser_state.named_capture_groups.set(name, group_index);
ByteCode capture_group_bytecode; ByteCode capture_group_bytecode;
size_t length = 0; size_t length = 0;
enter_capture_group_scope(); enter_capture_group_scope();
@ -2693,7 +2700,6 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
match_length_minimum += length; match_length_minimum += length;
m_parser_state.capture_group_minimum_lengths.set(group_index, length); m_parser_state.capture_group_minimum_lengths.set(group_index, length);
m_parser_state.named_capture_groups.set(name, { group_index, length });
return true; return true;
} }

View file

@ -98,11 +98,6 @@ protected:
size_t tell() const { return m_parser_state.current_token.position(); } size_t tell() const { return m_parser_state.current_token.position(); }
struct NamedCaptureGroup {
size_t group_index { 0 };
size_t minimum_length { 0 };
};
struct ParserState { struct ParserState {
Lexer& lexer; Lexer& lexer;
Token current_token; Token current_token;
@ -114,8 +109,8 @@ protected:
size_t match_length_minimum { 0 }; size_t match_length_minimum { 0 };
size_t repetition_mark_count { 0 }; size_t repetition_mark_count { 0 };
AllOptions regex_options; AllOptions regex_options;
HashMap<int, size_t> capture_group_minimum_lengths; HashMap<size_t, size_t> capture_group_minimum_lengths;
HashMap<DeprecatedFlyString, NamedCaptureGroup> named_capture_groups; HashMap<DeprecatedFlyString, size_t> named_capture_groups;
explicit ParserState(Lexer& lexer) explicit ParserState(Lexer& lexer)
: lexer(lexer) : lexer(lexer)

View file

@ -597,6 +597,7 @@ TEST_CASE(ECMA262_parse)
{ "a{9007199254740992,9007199254740992}"sv, regex::Error::InvalidBraceContent }, { "a{9007199254740992,9007199254740992}"sv, regex::Error::InvalidBraceContent },
{ "(?<a>a)(?<a>b)"sv, regex::Error::DuplicateNamedCapture }, { "(?<a>a)(?<a>b)"sv, regex::Error::DuplicateNamedCapture },
{ "(?<a>a)(?<b>b)(?<a>c)"sv, regex::Error::DuplicateNamedCapture }, { "(?<a>a)(?<b>b)(?<a>c)"sv, regex::Error::DuplicateNamedCapture },
{ "(?<a>(?<a>a))"sv, regex::Error::DuplicateNamedCapture },
{ "(?<1a>a)"sv, regex::Error::InvalidNameForCaptureGroup }, { "(?<1a>a)"sv, regex::Error::InvalidNameForCaptureGroup },
{ "(?<\\a>a)"sv, regex::Error::InvalidNameForCaptureGroup }, { "(?<\\a>a)"sv, regex::Error::InvalidNameForCaptureGroup },
{ "(?<\ta>a)"sv, regex::Error::InvalidNameForCaptureGroup }, { "(?<\ta>a)"sv, regex::Error::InvalidNameForCaptureGroup },