From 76f5dce3db9d63c7209db37a4d734b9e9b27602f Mon Sep 17 00:00:00 2001 From: Ali Mohammad Pur Date: Tue, 15 Apr 2025 15:31:08 +0200 Subject: [PATCH] LibRegex: Flatten capture group list in MatchState This makes copying the capture group COWVector significantly cheaper, as we no longer have to run any constructors for it - just memcpy. --- AK/COWVector.h | 7 +++ Libraries/LibJS/Runtime/RegExpObject.h | 1 - Libraries/LibJS/Runtime/RegExpPrototype.cpp | 2 +- Libraries/LibRegex/RegexByteCode.cpp | 49 ++++++++------------- Libraries/LibRegex/RegexDebug.h | 2 +- Libraries/LibRegex/RegexDefs.h | 13 +++--- Libraries/LibRegex/RegexMatch.h | 31 ++++++++++++- Libraries/LibRegex/RegexMatcher.cpp | 47 ++++++++------------ Libraries/LibRegex/RegexMatcher.h | 4 +- Libraries/LibRegex/RegexOptimizer.cpp | 16 +++---- Libraries/LibRegex/RegexOptions.h | 2 - Libraries/LibRegex/RegexParser.cpp | 6 +-- Libraries/LibURL/Pattern/Component.cpp | 3 +- Tests/LibRegex/Regex.cpp | 2 +- 14 files changed, 98 insertions(+), 87 deletions(-) diff --git a/AK/COWVector.h b/AK/COWVector.h index 96a4103c6fe..e7deb2cb036 100644 --- a/AK/COWVector.h +++ b/AK/COWVector.h @@ -127,6 +127,13 @@ public: return m_detail->m_members[index]; } + Span span() const { return m_detail->m_members; } + Span mutable_span() + { + copy(); + return m_detail->m_members; + } + size_t capacity() const { return m_detail->m_members.capacity(); diff --git a/Libraries/LibJS/Runtime/RegExpObject.h b/Libraries/LibJS/Runtime/RegExpObject.h index b7ebe63facc..397a0d53b22 100644 --- a/Libraries/LibJS/Runtime/RegExpObject.h +++ b/Libraries/LibJS/Runtime/RegExpObject.h @@ -35,7 +35,6 @@ public: static constexpr regex::RegexOptions default_flags { (regex::ECMAScriptFlags)regex::AllFlags::SingleMatch | (regex::ECMAScriptFlags)regex::AllFlags::Global - | (regex::ECMAScriptFlags)regex::AllFlags::SkipTrimEmptyMatches | regex::ECMAScriptFlags::BrowserExtended }; diff --git a/Libraries/LibJS/Runtime/RegExpPrototype.cpp b/Libraries/LibJS/Runtime/RegExpPrototype.cpp index 5ab059e1ce9..ae40469c791 100644 --- a/Libraries/LibJS/Runtime/RegExpPrototype.cpp +++ b/Libraries/LibJS/Runtime/RegExpPrototype.cpp @@ -294,7 +294,7 @@ static ThrowCompletionOr regexp_builtin_exec(VM& vm, RegExpObject& regexp // 33. For each integer i such that i ≥ 1 and i ≤ n, in ascending order, do for (size_t i = 1; i <= result.n_capture_groups; ++i) { // a. Let captureI be ith element of r's captures List. - auto& capture = result.capture_group_matches[0][i]; + auto& capture = result.capture_group_matches[0][i - 1]; Value captured_value; diff --git a/Libraries/LibRegex/RegexByteCode.cpp b/Libraries/LibRegex/RegexByteCode.cpp index 6d190836a83..689b707035b 100644 --- a/Libraries/LibRegex/RegexByteCode.cpp +++ b/Libraries/LibRegex/RegexByteCode.cpp @@ -341,40 +341,29 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(MatchInput const& input, ALWAYS_INLINE ExecutionResult OpCode_ClearCaptureGroup::execute(MatchInput const& input, MatchState& state) const { - if (input.match_index < state.capture_group_matches.size()) { - auto& group = state.capture_group_matches.mutable_at(input.match_index); - auto group_id = id(); - if (group_id >= group.size()) - group.resize(group_id + 1); - - group[group_id].reset(); + if (input.match_index < state.capture_group_matches_size()) { + auto group = state.mutable_capture_group_matches(input.match_index); + group[id() - 1].reset(); } return ExecutionResult::Continue; } ALWAYS_INLINE ExecutionResult OpCode_SaveLeftCaptureGroup::execute(MatchInput const& input, MatchState& state) const { - if (input.match_index >= state.capture_group_matches.size()) { - state.capture_group_matches.ensure_capacity(input.match_index); - auto capacity = state.capture_group_matches.capacity(); - for (size_t i = state.capture_group_matches.size(); i <= capacity; ++i) - state.capture_group_matches.empend(); + if (input.match_index >= state.capture_group_matches_size()) { + state.flat_capture_group_matches.ensure_capacity((input.match_index + 1) * state.capture_group_count); + for (size_t i = state.capture_group_matches_size(); i <= input.match_index; ++i) + for (size_t j = 0; j < state.capture_group_count; ++j) + state.flat_capture_group_matches.append({}); } - if (id() >= state.capture_group_matches.at(input.match_index).size()) { - state.capture_group_matches.mutable_at(input.match_index).ensure_capacity(id()); - auto capacity = state.capture_group_matches.at(input.match_index).capacity(); - for (size_t i = state.capture_group_matches.at(input.match_index).size(); i <= capacity; ++i) - state.capture_group_matches.mutable_at(input.match_index).empend(); - } - - state.capture_group_matches.mutable_at(input.match_index).at(id()).left_column = state.string_position; + state.mutable_capture_group_matches(input.match_index).at(id() - 1).left_column = state.string_position; return ExecutionResult::Continue; } ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput const& input, MatchState& state) const { - auto& match = state.capture_group_matches.mutable_at(input.match_index).at(id()); + auto& match = state.capture_group_matches(input.match_index).at(id() - 1); auto start_position = match.left_column; if (state.string_position < start_position) { dbgln("Right capture group {} is before left capture group {}!", state.string_position, start_position); @@ -388,14 +377,14 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput c VERIFY(start_position + length <= input.view.length()); - match = { input.view.substring_view(start_position, length), input.line, start_position, input.global_offset + start_position }; + state.mutable_capture_group_matches(input.match_index).at(id() - 1) = { input.view.substring_view(start_position, length), input.line, start_position, input.global_offset + start_position }; return ExecutionResult::Continue; } ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchInput const& input, MatchState& state) const { - auto& match = state.capture_group_matches.mutable_at(input.match_index).at(id()); + auto& match = state.capture_group_matches(input.match_index).at(id() - 1); auto start_position = match.left_column; if (state.string_position < start_position) return ExecutionResult::Failed_ExecuteLowPrioForks; @@ -409,7 +398,7 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchIn auto view = input.view.substring_view(start_position, length); - match = { view, name_string_table_index(), input.line, start_position, input.global_offset + start_position }; + state.mutable_capture_group_matches(input.match_index).at(id() - 1) = { view, name_string_table_index(), input.line, start_position, input.global_offset + start_position }; return ExecutionResult::Continue; } @@ -584,11 +573,11 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M break; } case CharacterCompareType::Reference: { - auto reference_number = (size_t)m_bytecode->at(offset++); - if (input.match_index >= state.capture_group_matches.size()) + auto reference_number = ((size_t)m_bytecode->at(offset++)) - 1; + if (input.match_index >= state.capture_group_matches_size()) return ExecutionResult::Failed_ExecuteLowPrioForks; - auto& groups = state.capture_group_matches.at(input.match_index); + auto groups = state.capture_group_matches(input.match_index); if (groups.size() <= reference_number) return ExecutionResult::Failed_ExecuteLowPrioForks; @@ -988,8 +977,8 @@ Vector OpCode_Compare::variable_arguments_to_byte_string(Optionalat(offset++); result.empend(ByteString::formatted(" number={}", ref)); if (input.has_value()) { - if (state().capture_group_matches.size() > input->match_index) { - auto& match = state().capture_group_matches[input->match_index]; + if (state().capture_group_matches_size() > input->match_index) { + auto match = state().capture_group_matches(input->match_index); if (match.size() > ref) { auto& group = match[ref]; result.empend(ByteString::formatted(" left={}", group.left_column)); @@ -999,7 +988,7 @@ Vector OpCode_Compare::variable_arguments_to_byte_string(Optionalmatch_index, state().capture_group_matches.size() - 1)); + result.empend(ByteString::formatted(" (invalid index {}, max={})", input->match_index, state().capture_group_matches_size() - 1)); } } } else if (compare_type == CharacterCompareType::String) { diff --git a/Libraries/LibRegex/RegexDebug.h b/Libraries/LibRegex/RegexDebug.h index f8494263716..8414ca0d579 100644 --- a/Libraries/LibRegex/RegexDebug.h +++ b/Libraries/LibRegex/RegexDebug.h @@ -39,7 +39,7 @@ public: void print_bytecode(ByteCode const& bytecode) const { - MatchState state; + auto state = MatchState::only_for_enumeration(); for (;;) { auto& opcode = bytecode.get_opcode(state); print_opcode("PrintBytecode", opcode, state); diff --git a/Libraries/LibRegex/RegexDefs.h b/Libraries/LibRegex/RegexDefs.h index 9cc8ad8fdd6..6a9f60fa97f 100644 --- a/Libraries/LibRegex/RegexDefs.h +++ b/Libraries/LibRegex/RegexDefs.h @@ -43,12 +43,11 @@ enum __RegexAllFlags { __Regex_SingleLine = __Regex_Global << 10, // Dot matches newline characters __Regex_Sticky = __Regex_Global << 11, // Force the pattern to only match consecutive matches from where the previous match ended. __Regex_Multiline = __Regex_Global << 12, // Handle newline characters. Match each line, one by one. - __Regex_SkipTrimEmptyMatches = __Regex_Global << 13, // Do not remove empty capture group results. - __Regex_SingleMatch = __Regex_Global << 14, // Stop after acquiring a single match. - __Regex_UnicodeSets = __Regex_Global << 15, // ECMA262 Parser specific: Allow set operations in char classes. - __Regex_Internal_Stateful = __Regex_Global << 16, // Internal flag; enables stateful matches. - __Regex_Internal_BrowserExtended = __Regex_Global << 17, // Internal flag; enable browser-specific ECMA262 extensions. - __Regex_Internal_ConsiderNewline = __Regex_Global << 18, // Internal flag; allow matchers to consider newlines as line separators. - __Regex_Internal_ECMA262DotSemantics = __Regex_Global << 19, // Internal flag; use ECMA262 semantics for dot ('.') - disallow CR/LF/LS/PS instead of just CR. + __Regex_SingleMatch = __Regex_Global << 13, // Stop after acquiring a single match. + __Regex_UnicodeSets = __Regex_Global << 14, // ECMA262 Parser specific: Allow set operations in char classes. + __Regex_Internal_Stateful = __Regex_Global << 15, // Internal flag; enables stateful matches. + __Regex_Internal_BrowserExtended = __Regex_Global << 16, // Internal flag; enable browser-specific ECMA262 extensions. + __Regex_Internal_ConsiderNewline = __Regex_Global << 17, // Internal flag; allow matchers to consider newlines as line separators. + __Regex_Internal_ECMA262DotSemantics = __Regex_Global << 18, // Internal flag; use ECMA262 semantics for dot ('.') - disallow CR/LF/LS/PS instead of just CR. __Regex_Last = __Regex_Internal_ECMA262DotSemantics, }; diff --git a/Libraries/LibRegex/RegexMatch.h b/Libraries/LibRegex/RegexMatch.h index 320a999edae..7a81f61834d 100644 --- a/Libraries/LibRegex/RegexMatch.h +++ b/Libraries/LibRegex/RegexMatch.h @@ -369,6 +369,7 @@ struct MatchInput { }; struct MatchState { + size_t capture_group_count; size_t string_position_before_match { 0 }; size_t string_position { 0 }; size_t string_position_in_code_units { 0 }; @@ -377,10 +378,38 @@ struct MatchState { size_t forks_since_last_save { 0 }; Optional initiating_fork; COWVector matches; - COWVector> capture_group_matches; + COWVector flat_capture_group_matches; // Vector> indexed by match index, then by capture group id; flattened for performance COWVector repetition_marks; Vector checkpoints; + explicit MatchState(size_t capture_group_count) + : capture_group_count(capture_group_count) + { + } + + MatchState(MatchState const&) = default; + MatchState(MatchState&&) = default; + + MatchState& operator=(MatchState const&) = default; + MatchState& operator=(MatchState&&) = default; + + static MatchState only_for_enumeration() { return MatchState { 0 }; } + + size_t capture_group_matches_size() const + { + return flat_capture_group_matches.size() / capture_group_count; + } + + Span capture_group_matches(size_t match_index) const + { + return flat_capture_group_matches.span().slice(match_index * capture_group_count, capture_group_count); + } + + Span mutable_capture_group_matches(size_t match_index) + { + return flat_capture_group_matches.mutable_span().slice(match_index * capture_group_count, capture_group_count); + } + // For size_t in {0..100}, ips in {0..500} and repetitions in {0..30}, there are zero collisions. // For the full range, zero collisions were found in 8 million random samples. u64 u64_hash() const diff --git a/Libraries/LibRegex/RegexMatcher.cpp b/Libraries/LibRegex/RegexMatcher.cpp index afcf7d9c13b..fed306ce706 100644 --- a/Libraries/LibRegex/RegexMatcher.cpp +++ b/Libraries/LibRegex/RegexMatcher.cpp @@ -164,7 +164,7 @@ RegexResult Matcher::match(Vector const& views, Optiona size_t match_count { 0 }; MatchInput input; - MatchState state; + MatchState state { m_pattern->parser_result.capture_groups_count }; size_t operations = 0; input.regex_options = m_regex_options | regex_options.value_or({}).value(); @@ -189,20 +189,6 @@ RegexResult Matcher::match(Vector const& views, Optiona } } - if (c_match_preallocation_count) { - state.matches.ensure_capacity(c_match_preallocation_count); - state.capture_group_matches.ensure_capacity(c_match_preallocation_count); - auto& capture_groups_count = m_pattern->parser_result.capture_groups_count; - - for (size_t j = 0; j < c_match_preallocation_count; ++j) { - state.matches.empend(); - state.capture_group_matches.empend(); - state.capture_group_matches.mutable_at(j).ensure_capacity(capture_groups_count); - for (size_t k = 0; k < capture_groups_count; ++k) - state.capture_group_matches.mutable_at(j).unchecked_append({}); - } - } - auto append_match = [](auto& input, auto& state, auto& start_position) { if (state.matches.size() == input.match_index) state.matches.empend(); @@ -343,29 +329,34 @@ RegexResult Matcher::match(Vector const& views, Optiona break; } + auto flat_capture_group_matches = move(state.flat_capture_group_matches).release(); + if (flat_capture_group_matches.size() < state.capture_group_count * match_count) { + flat_capture_group_matches.ensure_capacity(match_count * state.capture_group_count); + for (size_t i = flat_capture_group_matches.size(); i < match_count * state.capture_group_count; ++i) + flat_capture_group_matches.empend(); + } + + Vector> capture_group_matches; + for (size_t i = 0; i < match_count; ++i) { + auto span = flat_capture_group_matches.span().slice(state.capture_group_count * i, state.capture_group_count); + capture_group_matches.append(span); + } + RegexResult result { match_count != 0, match_count, move(state.matches).release(), - move(state.capture_group_matches).release(), + move(flat_capture_group_matches), + move(capture_group_matches), operations, m_pattern->parser_result.capture_groups_count, m_pattern->parser_result.named_capture_groups_count, }; - if (match_count) { - // Make sure there are as many capture matches as there are actual matches. - if (result.capture_group_matches.size() < match_count) - result.capture_group_matches.resize(match_count); - for (auto& matches : result.capture_group_matches) - matches.resize(m_pattern->parser_result.capture_groups_count + 1); - if (!input.regex_options.has_flag_set(AllFlags::SkipTrimEmptyMatches)) { - for (auto& matches : result.capture_group_matches) - matches.remove_all_matching([](auto& match) { return match.view.is_null(); }); - } - } else { + if (match_count > 0) + VERIFY(result.capture_group_matches.size() >= match_count); + else result.capture_group_matches.clear_with_capacity(); - } return result; } diff --git a/Libraries/LibRegex/RegexMatcher.h b/Libraries/LibRegex/RegexMatcher.h index 7dd3509d5ad..4332be0b4bb 100644 --- a/Libraries/LibRegex/RegexMatcher.h +++ b/Libraries/LibRegex/RegexMatcher.h @@ -31,13 +31,13 @@ struct Block { } static constexpr size_t const c_max_recursion = 5000; -static constexpr size_t const c_match_preallocation_count = 0; struct RegexResult final { bool success { false }; size_t count { 0 }; Vector matches; - Vector> capture_group_matches; + Vector flat_capture_group_matches; + Vector> capture_group_matches; size_t n_operations { 0 }; size_t n_capture_groups { 0 }; size_t n_named_capture_groups { 0 }; diff --git a/Libraries/LibRegex/RegexOptimizer.cpp b/Libraries/LibRegex/RegexOptimizer.cpp index a519c616bac..b84462e71b0 100644 --- a/Libraries/LibRegex/RegexOptimizer.cpp +++ b/Libraries/LibRegex/RegexOptimizer.cpp @@ -37,7 +37,7 @@ void Regex::run_optimization_passes() attempt_rewrite_loops_as_atomic_groups(blocks); // FIXME: "There are a few more conditions this can be true in (e.g. within an arbitrarily nested capture group)" - MatchState state; + auto state = MatchState::only_for_enumeration(); auto& opcode = parser_result.bytecode.get_opcode(state); if (opcode.opcode_id() == OpCodeId::CheckBegin) parser_result.optimization_data.only_start_of_line = true; @@ -53,7 +53,7 @@ typename Regex::BasicBlockList Regex::split_basic_blocks(ByteCod auto bytecode_size = bytecode.size(); - MatchState state; + auto state = MatchState::only_for_enumeration(); state.instruction_position = 0; auto check_jump = [&](OpCode const& opcode) { auto& op = static_cast(opcode); @@ -512,7 +512,7 @@ enum class AtomicRewritePreconditionResult { static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode, Block repeated_block, Block following_block, auto const& all_blocks) { Vector> repeated_values; - MatchState state; + auto state = MatchState::only_for_enumeration(); auto has_seen_actionable_opcode = false; for (state.instruction_position = repeated_block.start; state.instruction_position < repeated_block.end;) { auto& opcode = bytecode.get_opcode(state); @@ -680,7 +680,7 @@ bool Regex::attempt_rewrite_entire_match_as_substring_search(BasicBlockL // We have a single basic block, let's see if it's a series of character or string compares. StringBuilder final_string; - MatchState state; + auto state = MatchState::only_for_enumeration(); while (state.instruction_position < bytecode.size()) { auto& opcode = bytecode.get_opcode(state); switch (opcode.opcode_id()) { @@ -796,7 +796,7 @@ void Regex::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const& Optional fork_fallback_block; if (i + 1 < basic_blocks.size()) fork_fallback_block = basic_blocks[i + 1]; - MatchState state; + auto state = MatchState::only_for_enumeration(); // Check if the last instruction in this block is a jump to the block itself: { state.instruction_position = forking_block.end; @@ -913,7 +913,7 @@ void Regex::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const& } if (!needed_patches.is_empty()) { - MatchState state; + auto state = MatchState::only_for_enumeration(); auto bytecode_size = bytecode.size(); state.instruction_position = 0; struct Patch { @@ -1039,7 +1039,7 @@ void Optimizer::append_alternation(ByteCode& target, Span alternatives auto has_any_backwards_jump = false; - MatchState state; + auto state = MatchState::only_for_enumeration(); for (size_t i = 0; i < alternatives.size(); ++i) { auto& alternative = alternatives[i]; @@ -1144,7 +1144,7 @@ void Optimizer::append_alternation(ByteCode& target, Span alternatives node.metadata_value().size(), node.metadata_value().size() == 1 ? "" : "s"); - MatchState state; + auto state = MatchState::only_for_enumeration(); state.instruction_position = node.metadata_value().first().instruction_position; auto& opcode = alternatives[node.metadata_value().first().alternative_index].get_opcode(state); insn = ByteString::formatted("{} {}", opcode.to_byte_string(), opcode.arguments_string()); diff --git a/Libraries/LibRegex/RegexOptions.h b/Libraries/LibRegex/RegexOptions.h index bddcb1ba181..ae7e71839ba 100644 --- a/Libraries/LibRegex/RegexOptions.h +++ b/Libraries/LibRegex/RegexOptions.h @@ -28,7 +28,6 @@ enum class AllFlags { SingleLine = __Regex_SingleLine, // Dot matches newline characters Sticky = __Regex_Sticky, // Force the pattern to only match consecutive matches from where the previous match ended. Multiline = __Regex_Multiline, // Handle newline characters. Match each line, one by one. - SkipTrimEmptyMatches = __Regex_SkipTrimEmptyMatches, // Do not remove empty capture group results. SingleMatch = __Regex_SingleMatch, // Stop after acquiring a single match. UnicodeSets = __Regex_UnicodeSets, // Only for ECMA262, Allow set operations in character classes. Internal_Stateful = __Regex_Internal_Stateful, // Make global matches match one result at a time, and further match() calls on the same instance continue where the previous one left off. @@ -49,7 +48,6 @@ enum class PosixFlags : FlagsUnderlyingType { MatchNotBeginOfLine = (FlagsUnderlyingType)AllFlags::MatchNotBeginOfLine, MatchNotEndOfLine = (FlagsUnderlyingType)AllFlags::MatchNotEndOfLine, SkipSubExprResults = (FlagsUnderlyingType)AllFlags::SkipSubExprResults, - SkipTrimEmptyMatches = (FlagsUnderlyingType)AllFlags::SkipTrimEmptyMatches, Multiline = (FlagsUnderlyingType)AllFlags::Multiline, SingleMatch = (FlagsUnderlyingType)AllFlags::SingleMatch, }; diff --git a/Libraries/LibRegex/RegexParser.cpp b/Libraries/LibRegex/RegexParser.cpp index fc6156fcc34..c7d47ab38f7 100644 --- a/Libraries/LibRegex/RegexParser.cpp +++ b/Libraries/LibRegex/RegexParser.cpp @@ -857,7 +857,7 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si auto current_capture_group = m_parser_state.capture_groups_count; if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) { - bytecode.insert_bytecode_group_capture_left(current_capture_group); + bytecode.insert_bytecode_group_capture_left(current_capture_group + 1); m_parser_state.capture_groups_count++; } @@ -888,9 +888,9 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) { if (capture_group_name.has_value()) - bytecode.insert_bytecode_group_capture_right(current_capture_group, capture_group_name.value()); + bytecode.insert_bytecode_group_capture_right(current_capture_group + 1, capture_group_name.value()); else - bytecode.insert_bytecode_group_capture_right(current_capture_group); + bytecode.insert_bytecode_group_capture_right(current_capture_group + 1); } should_parse_repetition_symbol = true; break; diff --git a/Libraries/LibURL/Pattern/Component.cpp b/Libraries/LibURL/Pattern/Component.cpp index 2c857975d74..d5f78908fc1 100644 --- a/Libraries/LibURL/Pattern/Component.cpp +++ b/Libraries/LibURL/Pattern/Component.cpp @@ -228,7 +228,6 @@ PatternErrorOr Component::compile(Utf8View const& input, PatternParse auto flags = regex::RegexOptions { (regex::ECMAScriptFlags)regex::AllFlags::SingleMatch | (regex::ECMAScriptFlags)regex::AllFlags::Global - | (regex::ECMAScriptFlags)regex::AllFlags::SkipTrimEmptyMatches | regex::ECMAScriptFlags::BrowserExtended }; @@ -288,7 +287,7 @@ Component::Result Component::create_match_result(String const& input, regex::Reg // 4. Let index be 1. // 5. While index is less than Get(execResult, "length"): for (size_t index = 1; index <= exec_result.n_capture_groups; ++index) { - auto const& capture = exec_result.capture_group_matches[0][index]; + auto const& capture = exec_result.capture_group_matches[0][index - 1]; // 1. Let name be component’s group name list[index − 1]. auto name = group_name_list[index - 1]; diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index efa93e29ef7..d0cbc73790a 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -373,7 +373,7 @@ TEST_CASE(ini_file_entries) } EXPECT_EQ(result.matches.at(0).view, "[Window]"); - EXPECT_EQ(result.capture_group_matches.at(0).at(0).view, "Window"); + EXPECT_EQ(result.capture_group_matches.at(0).at(1).view, "Window"); EXPECT_EQ(result.matches.at(1).view, "Opacity=255"); EXPECT_EQ(result.matches.at(1).line, 1u); EXPECT_EQ(result.matches.at(1).column, 0u);