From 76f5dce3db9d63c7209db37a4d734b9e9b27602f Mon Sep 17 00:00:00 2001
From: Ali Mohammad Pur <ali.mpfard@gmail.com>
Date: Tue, 15 Apr 2025 15:31:08 +0200
Subject: [PATCH] LibRegex: Flatten capture group list in MatchState

This makes copying the capture group COWVector significantly cheaper,
as we no longer have to run any constructors for it - just memcpy.
---
 AK/COWVector.h                              |  7 +++
 Libraries/LibJS/Runtime/RegExpObject.h      |  1 -
 Libraries/LibJS/Runtime/RegExpPrototype.cpp |  2 +-
 Libraries/LibRegex/RegexByteCode.cpp        | 49 ++++++++-------------
 Libraries/LibRegex/RegexDebug.h             |  2 +-
 Libraries/LibRegex/RegexDefs.h              | 13 +++---
 Libraries/LibRegex/RegexMatch.h             | 31 ++++++++++++-
 Libraries/LibRegex/RegexMatcher.cpp         | 47 ++++++++------------
 Libraries/LibRegex/RegexMatcher.h           |  4 +-
 Libraries/LibRegex/RegexOptimizer.cpp       | 16 +++----
 Libraries/LibRegex/RegexOptions.h           |  2 -
 Libraries/LibRegex/RegexParser.cpp          |  6 +--
 Libraries/LibURL/Pattern/Component.cpp      |  3 +-
 Tests/LibRegex/Regex.cpp                    |  2 +-
 14 files changed, 98 insertions(+), 87 deletions(-)
diff --git a/AK/COWVector.h b/AK/COWVector.h
index 96a4103c6fe..e7deb2cb036 100644
--- a/AK/COWVector.h
+++ b/AK/COWVector.h
@@ -127,6 +127,13 @@ public:
         return m_detail->m_members[index];
     }
 
+    Span<T const> span() const { return m_detail->m_members; }
+    Span<T> mutable_span()
+    {
+        copy();
+        return m_detail->m_members;
+    }
+
     size_t capacity() const
     {
         return m_detail->m_members.capacity();
diff --git a/Libraries/LibJS/Runtime/RegExpObject.h b/Libraries/LibJS/Runtime/RegExpObject.h
index b7ebe63facc..397a0d53b22 100644
--- a/Libraries/LibJS/Runtime/RegExpObject.h
+++ b/Libraries/LibJS/Runtime/RegExpObject.h
@@ -35,7 +35,6 @@ public:
     static constexpr regex::RegexOptions<ECMAScriptFlags> default_flags {
         (regex::ECMAScriptFlags)regex::AllFlags::SingleMatch
         | (regex::ECMAScriptFlags)regex::AllFlags::Global
-        | (regex::ECMAScriptFlags)regex::AllFlags::SkipTrimEmptyMatches
         | regex::ECMAScriptFlags::BrowserExtended
     };
 
diff --git a/Libraries/LibJS/Runtime/RegExpPrototype.cpp b/Libraries/LibJS/Runtime/RegExpPrototype.cpp
index 5ab059e1ce9..ae40469c791 100644
--- a/Libraries/LibJS/Runtime/RegExpPrototype.cpp
+++ b/Libraries/LibJS/Runtime/RegExpPrototype.cpp
@@ -294,7 +294,7 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp
     // 33. For each integer i such that i ≥ 1 and i ≤ n, in ascending order, do
     for (size_t i = 1; i <= result.n_capture_groups; ++i) {
         // a. Let captureI be ith element of r's captures List.
-        auto& capture = result.capture_group_matches[0][i];
+        auto& capture = result.capture_group_matches[0][i - 1];
 
         Value captured_value;
 
diff --git a/Libraries/LibRegex/RegexByteCode.cpp b/Libraries/LibRegex/RegexByteCode.cpp
index 6d190836a83..689b707035b 100644
--- a/Libraries/LibRegex/RegexByteCode.cpp
+++ b/Libraries/LibRegex/RegexByteCode.cpp
@@ -341,40 +341,29 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(MatchInput const& input,
 
 ALWAYS_INLINE ExecutionResult OpCode_ClearCaptureGroup::execute(MatchInput const& input, MatchState& state) const
 {
-    if (input.match_index < state.capture_group_matches.size()) {
-        auto& group = state.capture_group_matches.mutable_at(input.match_index);
-        auto group_id = id();
-        if (group_id >= group.size())
-            group.resize(group_id + 1);
-
-        group[group_id].reset();
+    if (input.match_index < state.capture_group_matches_size()) {
+        auto group = state.mutable_capture_group_matches(input.match_index);
+        group[id() - 1].reset();
     }
     return ExecutionResult::Continue;
 }
 
 ALWAYS_INLINE ExecutionResult OpCode_SaveLeftCaptureGroup::execute(MatchInput const& input, MatchState& state) const
 {
-    if (input.match_index >= state.capture_group_matches.size()) {
-        state.capture_group_matches.ensure_capacity(input.match_index);
-        auto capacity = state.capture_group_matches.capacity();
-        for (size_t i = state.capture_group_matches.size(); i <= capacity; ++i)
-            state.capture_group_matches.empend();
+    if (input.match_index >= state.capture_group_matches_size()) {
+        state.flat_capture_group_matches.ensure_capacity((input.match_index + 1) * state.capture_group_count);
+        for (size_t i = state.capture_group_matches_size(); i <= input.match_index; ++i)
+            for (size_t j = 0; j < state.capture_group_count; ++j)
+                state.flat_capture_group_matches.append({});
     }
 
-    if (id() >= state.capture_group_matches.at(input.match_index).size()) {
-        state.capture_group_matches.mutable_at(input.match_index).ensure_capacity(id());
-        auto capacity = state.capture_group_matches.at(input.match_index).capacity();
-        for (size_t i = state.capture_group_matches.at(input.match_index).size(); i <= capacity; ++i)
-            state.capture_group_matches.mutable_at(input.match_index).empend();
-    }
-
-    state.capture_group_matches.mutable_at(input.match_index).at(id()).left_column = state.string_position;
+    state.mutable_capture_group_matches(input.match_index).at(id() - 1).left_column = state.string_position;
     return ExecutionResult::Continue;
 }
 
 ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput const& input, MatchState& state) const
 {
-    auto& match = state.capture_group_matches.mutable_at(input.match_index).at(id());
+    auto& match = state.capture_group_matches(input.match_index).at(id() - 1);
     auto start_position = match.left_column;
     if (state.string_position < start_position) {
         dbgln("Right capture group {} is before left capture group {}!", state.string_position, start_position);
@@ -388,14 +377,14 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput c
 
     VERIFY(start_position + length <= input.view.length());
 
-    match = { input.view.substring_view(start_position, length), input.line, start_position, input.global_offset + start_position };
+    state.mutable_capture_group_matches(input.match_index).at(id() - 1) = { input.view.substring_view(start_position, length), input.line, start_position, input.global_offset + start_position };
 
     return ExecutionResult::Continue;
 }
 
 ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchInput const& input, MatchState& state) const
 {
-    auto& match = state.capture_group_matches.mutable_at(input.match_index).at(id());
+    auto& match = state.capture_group_matches(input.match_index).at(id() - 1);
     auto start_position = match.left_column;
     if (state.string_position < start_position)
         return ExecutionResult::Failed_ExecuteLowPrioForks;
@@ -409,7 +398,7 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchIn
 
     auto view = input.view.substring_view(start_position, length);
 
-    match = { view, name_string_table_index(), input.line, start_position, input.global_offset + start_position };
+    state.mutable_capture_group_matches(input.match_index).at(id() - 1) = { view, name_string_table_index(), input.line, start_position, input.global_offset + start_position };
 
     return ExecutionResult::Continue;
 }
@@ -584,11 +573,11 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
             break;
         }
         case CharacterCompareType::Reference: {
-            auto reference_number = (size_t)m_bytecode->at(offset++);
-            if (input.match_index >= state.capture_group_matches.size())
+            auto reference_number = ((size_t)m_bytecode->at(offset++)) - 1;
+            if (input.match_index >= state.capture_group_matches_size())
                 return ExecutionResult::Failed_ExecuteLowPrioForks;
 
-            auto& groups = state.capture_group_matches.at(input.match_index);
+            auto groups = state.capture_group_matches(input.match_index);
             if (groups.size() <= reference_number)
                 return ExecutionResult::Failed_ExecuteLowPrioForks;
 
@@ -988,8 +977,8 @@ Vector<ByteString> OpCode_Compare::variable_arguments_to_byte_string(Optional<Ma
             auto ref = m_bytecode->at(offset++);
             result.empend(ByteString::formatted(" number={}", ref));
             if (input.has_value()) {
-                if (state().capture_group_matches.size() > input->match_index) {
-                    auto& match = state().capture_group_matches[input->match_index];
+                if (state().capture_group_matches_size() > input->match_index) {
+                    auto match = state().capture_group_matches(input->match_index);
                     if (match.size() > ref) {
                         auto& group = match[ref];
                         result.empend(ByteString::formatted(" left={}", group.left_column));
@@ -999,7 +988,7 @@ Vector<ByteString> OpCode_Compare::variable_arguments_to_byte_string(Optional<Ma
                         result.empend(ByteString::formatted(" (invalid ref, max={})", match.size() - 1));
                     }
                 } else {
-                    result.empend(ByteString::formatted(" (invalid index {}, max={})", input->match_index, state().capture_group_matches.size() - 1));
+                    result.empend(ByteString::formatted(" (invalid index {}, max={})", input->match_index, state().capture_group_matches_size() - 1));
                 }
             }
         } else if (compare_type == CharacterCompareType::String) {
diff --git a/Libraries/LibRegex/RegexDebug.h b/Libraries/LibRegex/RegexDebug.h
index f8494263716..8414ca0d579 100644
--- a/Libraries/LibRegex/RegexDebug.h
+++ b/Libraries/LibRegex/RegexDebug.h
@@ -39,7 +39,7 @@ public:
 
     void print_bytecode(ByteCode const& bytecode) const
     {
-        MatchState state;
+        auto state = MatchState::only_for_enumeration();
         for (;;) {
             auto& opcode = bytecode.get_opcode(state);
             print_opcode("PrintBytecode", opcode, state);
diff --git a/Libraries/LibRegex/RegexDefs.h b/Libraries/LibRegex/RegexDefs.h
index 9cc8ad8fdd6..6a9f60fa97f 100644
--- a/Libraries/LibRegex/RegexDefs.h
+++ b/Libraries/LibRegex/RegexDefs.h
@@ -43,12 +43,11 @@ enum __RegexAllFlags {
     __Regex_SingleLine = __Regex_Global << 10,                   // Dot matches newline characters
     __Regex_Sticky = __Regex_Global << 11,                       // Force the pattern to only match consecutive matches from where the previous match ended.
     __Regex_Multiline = __Regex_Global << 12,                    // Handle newline characters. Match each line, one by one.
-    __Regex_SkipTrimEmptyMatches = __Regex_Global << 13,         // Do not remove empty capture group results.
-    __Regex_SingleMatch = __Regex_Global << 14,                  // Stop after acquiring a single match.
-    __Regex_UnicodeSets = __Regex_Global << 15,                  // ECMA262 Parser specific: Allow set operations in char classes.
-    __Regex_Internal_Stateful = __Regex_Global << 16,            // Internal flag; enables stateful matches.
-    __Regex_Internal_BrowserExtended = __Regex_Global << 17,     // Internal flag; enable browser-specific ECMA262 extensions.
-    __Regex_Internal_ConsiderNewline = __Regex_Global << 18,     // Internal flag; allow matchers to consider newlines as line separators.
-    __Regex_Internal_ECMA262DotSemantics = __Regex_Global << 19, // Internal flag; use ECMA262 semantics for dot ('.') - disallow CR/LF/LS/PS instead of just CR.
+    __Regex_SingleMatch = __Regex_Global << 13,                  // Stop after acquiring a single match.
+    __Regex_UnicodeSets = __Regex_Global << 14,                  // ECMA262 Parser specific: Allow set operations in char classes.
+    __Regex_Internal_Stateful = __Regex_Global << 15,            // Internal flag; enables stateful matches.
+    __Regex_Internal_BrowserExtended = __Regex_Global << 16,     // Internal flag; enable browser-specific ECMA262 extensions.
+    __Regex_Internal_ConsiderNewline = __Regex_Global << 17,     // Internal flag; allow matchers to consider newlines as line separators.
+    __Regex_Internal_ECMA262DotSemantics = __Regex_Global << 18, // Internal flag; use ECMA262 semantics for dot ('.') - disallow CR/LF/LS/PS instead of just CR.
     __Regex_Last = __Regex_Internal_ECMA262DotSemantics,
 };
diff --git a/Libraries/LibRegex/RegexMatch.h b/Libraries/LibRegex/RegexMatch.h
index 320a999edae..7a81f61834d 100644
--- a/Libraries/LibRegex/RegexMatch.h
+++ b/Libraries/LibRegex/RegexMatch.h
@@ -369,6 +369,7 @@ struct MatchInput {
 };
 
 struct MatchState {
+    size_t capture_group_count;
     size_t string_position_before_match { 0 };
     size_t string_position { 0 };
     size_t string_position_in_code_units { 0 };
@@ -377,10 +378,38 @@ struct MatchState {
     size_t forks_since_last_save { 0 };
     Optional<size_t> initiating_fork;
     COWVector<Match> matches;
-    COWVector<Vector<Match>> capture_group_matches;
+    COWVector<Match> flat_capture_group_matches; // Vector<Vector<Match>> indexed by match index, then by capture group id; flattened for performance
     COWVector<u64> repetition_marks;
     Vector<u64, 64> checkpoints;
 
+    explicit MatchState(size_t capture_group_count)
+        : capture_group_count(capture_group_count)
+    {
+    }
+
+    MatchState(MatchState const&) = default;
+    MatchState(MatchState&&) = default;
+
+    MatchState& operator=(MatchState const&) = default;
+    MatchState& operator=(MatchState&&) = default;
+
+    static MatchState only_for_enumeration() { return MatchState { 0 }; }
+
+    size_t capture_group_matches_size() const
+    {
+        return flat_capture_group_matches.size() / capture_group_count;
+    }
+
+    Span<Match const> capture_group_matches(size_t match_index) const
+    {
+        return flat_capture_group_matches.span().slice(match_index * capture_group_count, capture_group_count);
+    }
+
+    Span<Match> mutable_capture_group_matches(size_t match_index)
+    {
+        return flat_capture_group_matches.mutable_span().slice(match_index * capture_group_count, capture_group_count);
+    }
+
     // For size_t in {0..100}, ips in {0..500} and repetitions in {0..30}, there are zero collisions.
     // For the full range, zero collisions were found in 8 million random samples.
     u64 u64_hash() const
diff --git a/Libraries/LibRegex/RegexMatcher.cpp b/Libraries/LibRegex/RegexMatcher.cpp
index afcf7d9c13b..fed306ce706 100644
--- a/Libraries/LibRegex/RegexMatcher.cpp
+++ b/Libraries/LibRegex/RegexMatcher.cpp
@@ -164,7 +164,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
     size_t match_count { 0 };
 
     MatchInput input;
-    MatchState state;
+    MatchState state { m_pattern->parser_result.capture_groups_count };
     size_t operations = 0;
 
     input.regex_options = m_regex_options | regex_options.value_or({}).value();
@@ -189,20 +189,6 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
         }
     }
 
-    if (c_match_preallocation_count) {
-        state.matches.ensure_capacity(c_match_preallocation_count);
-        state.capture_group_matches.ensure_capacity(c_match_preallocation_count);
-        auto& capture_groups_count = m_pattern->parser_result.capture_groups_count;
-
-        for (size_t j = 0; j < c_match_preallocation_count; ++j) {
-            state.matches.empend();
-            state.capture_group_matches.empend();
-            state.capture_group_matches.mutable_at(j).ensure_capacity(capture_groups_count);
-            for (size_t k = 0; k < capture_groups_count; ++k)
-                state.capture_group_matches.mutable_at(j).unchecked_append({});
-        }
-    }
-
     auto append_match = [](auto& input, auto& state, auto& start_position) {
         if (state.matches.size() == input.match_index)
             state.matches.empend();
@@ -343,29 +329,34 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
             break;
     }
 
+    auto flat_capture_group_matches = move(state.flat_capture_group_matches).release();
+    if (flat_capture_group_matches.size() < state.capture_group_count * match_count) {
+        flat_capture_group_matches.ensure_capacity(match_count * state.capture_group_count);
+        for (size_t i = flat_capture_group_matches.size(); i < match_count * state.capture_group_count; ++i)
+            flat_capture_group_matches.empend();
+    }
+
+    Vector<Span<Match>> capture_group_matches;
+    for (size_t i = 0; i < match_count; ++i) {
+        auto span = flat_capture_group_matches.span().slice(state.capture_group_count * i, state.capture_group_count);
+        capture_group_matches.append(span);
+    }
+
     RegexResult result {
         match_count != 0,
         match_count,
         move(state.matches).release(),
-        move(state.capture_group_matches).release(),
+        move(flat_capture_group_matches),
+        move(capture_group_matches),
         operations,
         m_pattern->parser_result.capture_groups_count,
         m_pattern->parser_result.named_capture_groups_count,
     };
 
-    if (match_count) {
-        // Make sure there are as many capture matches as there are actual matches.
-        if (result.capture_group_matches.size() < match_count)
-            result.capture_group_matches.resize(match_count);
-        for (auto& matches : result.capture_group_matches)
-            matches.resize(m_pattern->parser_result.capture_groups_count + 1);
-        if (!input.regex_options.has_flag_set(AllFlags::SkipTrimEmptyMatches)) {
-            for (auto& matches : result.capture_group_matches)
-                matches.remove_all_matching([](auto& match) { return match.view.is_null(); });
-        }
-    } else {
+    if (match_count > 0)
+        VERIFY(result.capture_group_matches.size() >= match_count);
+    else
         result.capture_group_matches.clear_with_capacity();
-    }
 
     return result;
 }
diff --git a/Libraries/LibRegex/RegexMatcher.h b/Libraries/LibRegex/RegexMatcher.h
index 7dd3509d5ad..4332be0b4bb 100644
--- a/Libraries/LibRegex/RegexMatcher.h
+++ b/Libraries/LibRegex/RegexMatcher.h
@@ -31,13 +31,13 @@ struct Block {
 }
 
 static constexpr size_t const c_max_recursion = 5000;
-static constexpr size_t const c_match_preallocation_count = 0;
 
 struct RegexResult final {
     bool success { false };
     size_t count { 0 };
     Vector<Match> matches;
-    Vector<Vector<Match>> capture_group_matches;
+    Vector<Match> flat_capture_group_matches;
+    Vector<Span<Match>> capture_group_matches;
     size_t n_operations { 0 };
     size_t n_capture_groups { 0 };
     size_t n_named_capture_groups { 0 };
diff --git a/Libraries/LibRegex/RegexOptimizer.cpp b/Libraries/LibRegex/RegexOptimizer.cpp
index a519c616bac..b84462e71b0 100644
--- a/Libraries/LibRegex/RegexOptimizer.cpp
+++ b/Libraries/LibRegex/RegexOptimizer.cpp
@@ -37,7 +37,7 @@ void Regex<Parser>::run_optimization_passes()
     attempt_rewrite_loops_as_atomic_groups(blocks);
 
     // FIXME: "There are a few more conditions this can be true in (e.g. within an arbitrarily nested capture group)"
-    MatchState state;
+    auto state = MatchState::only_for_enumeration();
     auto& opcode = parser_result.bytecode.get_opcode(state);
     if (opcode.opcode_id() == OpCodeId::CheckBegin)
         parser_result.optimization_data.only_start_of_line = true;
@@ -53,7 +53,7 @@ typename Regex<Parser>::BasicBlockList Regex<Parser>::split_basic_blocks(ByteCod
 
     auto bytecode_size = bytecode.size();
 
-    MatchState state;
+    auto state = MatchState::only_for_enumeration();
     state.instruction_position = 0;
     auto check_jump = [&]<typename T>(OpCode const& opcode) {
         auto& op = static_cast<T const&>(opcode);
@@ -512,7 +512,7 @@ enum class AtomicRewritePreconditionResult {
 static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode, Block repeated_block, Block following_block, auto const& all_blocks)
 {
     Vector<Vector<CompareTypeAndValuePair>> repeated_values;
-    MatchState state;
+    auto state = MatchState::only_for_enumeration();
     auto has_seen_actionable_opcode = false;
     for (state.instruction_position = repeated_block.start; state.instruction_position < repeated_block.end;) {
         auto& opcode = bytecode.get_opcode(state);
@@ -680,7 +680,7 @@ bool Regex<Parser>::attempt_rewrite_entire_match_as_substring_search(BasicBlockL
 
     // We have a single basic block, let's see if it's a series of character or string compares.
     StringBuilder final_string;
-    MatchState state;
+    auto state = MatchState::only_for_enumeration();
     while (state.instruction_position < bytecode.size()) {
         auto& opcode = bytecode.get_opcode(state);
         switch (opcode.opcode_id()) {
@@ -796,7 +796,7 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
         Optional<Block> fork_fallback_block;
         if (i + 1 < basic_blocks.size())
             fork_fallback_block = basic_blocks[i + 1];
-        MatchState state;
+        auto state = MatchState::only_for_enumeration();
         // Check if the last instruction in this block is a jump to the block itself:
         {
             state.instruction_position = forking_block.end;
@@ -913,7 +913,7 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
     }
 
     if (!needed_patches.is_empty()) {
-        MatchState state;
+        auto state = MatchState::only_for_enumeration();
         auto bytecode_size = bytecode.size();
         state.instruction_position = 0;
         struct Patch {
@@ -1039,7 +1039,7 @@ void Optimizer::append_alternation(ByteCode& target, Span<ByteCode> alternatives
 
     auto has_any_backwards_jump = false;
 
-    MatchState state;
+    auto state = MatchState::only_for_enumeration();
 
     for (size_t i = 0; i < alternatives.size(); ++i) {
         auto& alternative = alternatives[i];
@@ -1144,7 +1144,7 @@ void Optimizer::append_alternation(ByteCode& target, Span<ByteCode> alternatives
                     node.metadata_value().size(),
                     node.metadata_value().size() == 1 ? "" : "s");
 
-                MatchState state;
+                auto state = MatchState::only_for_enumeration();
                 state.instruction_position = node.metadata_value().first().instruction_position;
                 auto& opcode = alternatives[node.metadata_value().first().alternative_index].get_opcode(state);
                 insn = ByteString::formatted("{} {}", opcode.to_byte_string(), opcode.arguments_string());
diff --git a/Libraries/LibRegex/RegexOptions.h b/Libraries/LibRegex/RegexOptions.h
index bddcb1ba181..ae7e71839ba 100644
--- a/Libraries/LibRegex/RegexOptions.h
+++ b/Libraries/LibRegex/RegexOptions.h
@@ -28,7 +28,6 @@ enum class AllFlags {
     SingleLine = __Regex_SingleLine,                                     // Dot matches newline characters
     Sticky = __Regex_Sticky,                                             // Force the pattern to only match consecutive matches from where the previous match ended.
     Multiline = __Regex_Multiline,                                       // Handle newline characters. Match each line, one by one.
-    SkipTrimEmptyMatches = __Regex_SkipTrimEmptyMatches,                 // Do not remove empty capture group results.
     SingleMatch = __Regex_SingleMatch,                                   // Stop after acquiring a single match.
     UnicodeSets = __Regex_UnicodeSets,                                   // Only for ECMA262, Allow set operations in character classes.
     Internal_Stateful = __Regex_Internal_Stateful,                       // Make global matches match one result at a time, and further match() calls on the same instance continue where the previous one left off.
@@ -49,7 +48,6 @@ enum class PosixFlags : FlagsUnderlyingType {
     MatchNotBeginOfLine = (FlagsUnderlyingType)AllFlags::MatchNotBeginOfLine,
     MatchNotEndOfLine = (FlagsUnderlyingType)AllFlags::MatchNotEndOfLine,
     SkipSubExprResults = (FlagsUnderlyingType)AllFlags::SkipSubExprResults,
-    SkipTrimEmptyMatches = (FlagsUnderlyingType)AllFlags::SkipTrimEmptyMatches,
     Multiline = (FlagsUnderlyingType)AllFlags::Multiline,
     SingleMatch = (FlagsUnderlyingType)AllFlags::SingleMatch,
 };
diff --git a/Libraries/LibRegex/RegexParser.cpp b/Libraries/LibRegex/RegexParser.cpp
index fc6156fcc34..c7d47ab38f7 100644
--- a/Libraries/LibRegex/RegexParser.cpp
+++ b/Libraries/LibRegex/RegexParser.cpp
@@ -857,7 +857,7 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
 
             auto current_capture_group = m_parser_state.capture_groups_count;
             if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) {
-                bytecode.insert_bytecode_group_capture_left(current_capture_group);
+                bytecode.insert_bytecode_group_capture_left(current_capture_group + 1);
                 m_parser_state.capture_groups_count++;
             }
 
@@ -888,9 +888,9 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
 
             if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) {
                 if (capture_group_name.has_value())
-                    bytecode.insert_bytecode_group_capture_right(current_capture_group, capture_group_name.value());
+                    bytecode.insert_bytecode_group_capture_right(current_capture_group + 1, capture_group_name.value());
                 else
-                    bytecode.insert_bytecode_group_capture_right(current_capture_group);
+                    bytecode.insert_bytecode_group_capture_right(current_capture_group + 1);
             }
             should_parse_repetition_symbol = true;
             break;
diff --git a/Libraries/LibURL/Pattern/Component.cpp b/Libraries/LibURL/Pattern/Component.cpp
index 2c857975d74..d5f78908fc1 100644
--- a/Libraries/LibURL/Pattern/Component.cpp
+++ b/Libraries/LibURL/Pattern/Component.cpp
@@ -228,7 +228,6 @@ PatternErrorOr<Component> Component::compile(Utf8View const& input, PatternParse
     auto flags = regex::RegexOptions<ECMAScriptFlags> {
         (regex::ECMAScriptFlags)regex::AllFlags::SingleMatch
         | (regex::ECMAScriptFlags)regex::AllFlags::Global
-        | (regex::ECMAScriptFlags)regex::AllFlags::SkipTrimEmptyMatches
         | regex::ECMAScriptFlags::BrowserExtended
     };
 
@@ -288,7 +287,7 @@ Component::Result Component::create_match_result(String const& input, regex::Reg
     // 4. Let index be 1.
     // 5. While index is less than Get(execResult, "length"):
     for (size_t index = 1; index <= exec_result.n_capture_groups; ++index) {
-        auto const& capture = exec_result.capture_group_matches[0][index];
+        auto const& capture = exec_result.capture_group_matches[0][index - 1];
 
         // 1. Let name be component’s group name list[index − 1].
         auto name = group_name_list[index - 1];
diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp
index efa93e29ef7..d0cbc73790a 100644
--- a/Tests/LibRegex/Regex.cpp
+++ b/Tests/LibRegex/Regex.cpp
@@ -373,7 +373,7 @@ TEST_CASE(ini_file_entries)
     }
 
     EXPECT_EQ(result.matches.at(0).view, "[Window]");
-    EXPECT_EQ(result.capture_group_matches.at(0).at(0).view, "Window");
+    EXPECT_EQ(result.capture_group_matches.at(0).at(1).view, "Window");
     EXPECT_EQ(result.matches.at(1).view, "Opacity=255");
     EXPECT_EQ(result.matches.at(1).line, 1u);
     EXPECT_EQ(result.matches.at(1).column, 0u);