mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-06-08 05:27:14 +09:00
LibRegex: Flatten capture group list in MatchState
This makes copying the capture group COWVector significantly cheaper, as we no longer have to run any constructors for it - just memcpy.
This commit is contained in:
parent
bbef0e8375
commit
76f5dce3db
Notes:
github-actions[bot]
2025-04-18 15:10:37 +00:00
Author: https://github.com/alimpfard
Commit: 76f5dce3db
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/4375
14 changed files with 98 additions and 87 deletions
|
@ -127,6 +127,13 @@ public:
|
||||||
return m_detail->m_members[index];
|
return m_detail->m_members[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Span<T const> span() const { return m_detail->m_members; }
|
||||||
|
Span<T> mutable_span()
|
||||||
|
{
|
||||||
|
copy();
|
||||||
|
return m_detail->m_members;
|
||||||
|
}
|
||||||
|
|
||||||
size_t capacity() const
|
size_t capacity() const
|
||||||
{
|
{
|
||||||
return m_detail->m_members.capacity();
|
return m_detail->m_members.capacity();
|
||||||
|
|
|
@ -35,7 +35,6 @@ public:
|
||||||
static constexpr regex::RegexOptions<ECMAScriptFlags> default_flags {
|
static constexpr regex::RegexOptions<ECMAScriptFlags> default_flags {
|
||||||
(regex::ECMAScriptFlags)regex::AllFlags::SingleMatch
|
(regex::ECMAScriptFlags)regex::AllFlags::SingleMatch
|
||||||
| (regex::ECMAScriptFlags)regex::AllFlags::Global
|
| (regex::ECMAScriptFlags)regex::AllFlags::Global
|
||||||
| (regex::ECMAScriptFlags)regex::AllFlags::SkipTrimEmptyMatches
|
|
||||||
| regex::ECMAScriptFlags::BrowserExtended
|
| regex::ECMAScriptFlags::BrowserExtended
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -294,7 +294,7 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp
|
||||||
// 33. For each integer i such that i ≥ 1 and i ≤ n, in ascending order, do
|
// 33. For each integer i such that i ≥ 1 and i ≤ n, in ascending order, do
|
||||||
for (size_t i = 1; i <= result.n_capture_groups; ++i) {
|
for (size_t i = 1; i <= result.n_capture_groups; ++i) {
|
||||||
// a. Let captureI be ith element of r's captures List.
|
// a. Let captureI be ith element of r's captures List.
|
||||||
auto& capture = result.capture_group_matches[0][i];
|
auto& capture = result.capture_group_matches[0][i - 1];
|
||||||
|
|
||||||
Value captured_value;
|
Value captured_value;
|
||||||
|
|
||||||
|
|
|
@ -341,40 +341,29 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(MatchInput const& input,
|
||||||
|
|
||||||
ALWAYS_INLINE ExecutionResult OpCode_ClearCaptureGroup::execute(MatchInput const& input, MatchState& state) const
|
ALWAYS_INLINE ExecutionResult OpCode_ClearCaptureGroup::execute(MatchInput const& input, MatchState& state) const
|
||||||
{
|
{
|
||||||
if (input.match_index < state.capture_group_matches.size()) {
|
if (input.match_index < state.capture_group_matches_size()) {
|
||||||
auto& group = state.capture_group_matches.mutable_at(input.match_index);
|
auto group = state.mutable_capture_group_matches(input.match_index);
|
||||||
auto group_id = id();
|
group[id() - 1].reset();
|
||||||
if (group_id >= group.size())
|
|
||||||
group.resize(group_id + 1);
|
|
||||||
|
|
||||||
group[group_id].reset();
|
|
||||||
}
|
}
|
||||||
return ExecutionResult::Continue;
|
return ExecutionResult::Continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE ExecutionResult OpCode_SaveLeftCaptureGroup::execute(MatchInput const& input, MatchState& state) const
|
ALWAYS_INLINE ExecutionResult OpCode_SaveLeftCaptureGroup::execute(MatchInput const& input, MatchState& state) const
|
||||||
{
|
{
|
||||||
if (input.match_index >= state.capture_group_matches.size()) {
|
if (input.match_index >= state.capture_group_matches_size()) {
|
||||||
state.capture_group_matches.ensure_capacity(input.match_index);
|
state.flat_capture_group_matches.ensure_capacity((input.match_index + 1) * state.capture_group_count);
|
||||||
auto capacity = state.capture_group_matches.capacity();
|
for (size_t i = state.capture_group_matches_size(); i <= input.match_index; ++i)
|
||||||
for (size_t i = state.capture_group_matches.size(); i <= capacity; ++i)
|
for (size_t j = 0; j < state.capture_group_count; ++j)
|
||||||
state.capture_group_matches.empend();
|
state.flat_capture_group_matches.append({});
|
||||||
}
|
}
|
||||||
|
|
||||||
if (id() >= state.capture_group_matches.at(input.match_index).size()) {
|
state.mutable_capture_group_matches(input.match_index).at(id() - 1).left_column = state.string_position;
|
||||||
state.capture_group_matches.mutable_at(input.match_index).ensure_capacity(id());
|
|
||||||
auto capacity = state.capture_group_matches.at(input.match_index).capacity();
|
|
||||||
for (size_t i = state.capture_group_matches.at(input.match_index).size(); i <= capacity; ++i)
|
|
||||||
state.capture_group_matches.mutable_at(input.match_index).empend();
|
|
||||||
}
|
|
||||||
|
|
||||||
state.capture_group_matches.mutable_at(input.match_index).at(id()).left_column = state.string_position;
|
|
||||||
return ExecutionResult::Continue;
|
return ExecutionResult::Continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput const& input, MatchState& state) const
|
ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput const& input, MatchState& state) const
|
||||||
{
|
{
|
||||||
auto& match = state.capture_group_matches.mutable_at(input.match_index).at(id());
|
auto& match = state.capture_group_matches(input.match_index).at(id() - 1);
|
||||||
auto start_position = match.left_column;
|
auto start_position = match.left_column;
|
||||||
if (state.string_position < start_position) {
|
if (state.string_position < start_position) {
|
||||||
dbgln("Right capture group {} is before left capture group {}!", state.string_position, start_position);
|
dbgln("Right capture group {} is before left capture group {}!", state.string_position, start_position);
|
||||||
|
@ -388,14 +377,14 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput c
|
||||||
|
|
||||||
VERIFY(start_position + length <= input.view.length());
|
VERIFY(start_position + length <= input.view.length());
|
||||||
|
|
||||||
match = { input.view.substring_view(start_position, length), input.line, start_position, input.global_offset + start_position };
|
state.mutable_capture_group_matches(input.match_index).at(id() - 1) = { input.view.substring_view(start_position, length), input.line, start_position, input.global_offset + start_position };
|
||||||
|
|
||||||
return ExecutionResult::Continue;
|
return ExecutionResult::Continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchInput const& input, MatchState& state) const
|
ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchInput const& input, MatchState& state) const
|
||||||
{
|
{
|
||||||
auto& match = state.capture_group_matches.mutable_at(input.match_index).at(id());
|
auto& match = state.capture_group_matches(input.match_index).at(id() - 1);
|
||||||
auto start_position = match.left_column;
|
auto start_position = match.left_column;
|
||||||
if (state.string_position < start_position)
|
if (state.string_position < start_position)
|
||||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||||
|
@ -409,7 +398,7 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchIn
|
||||||
|
|
||||||
auto view = input.view.substring_view(start_position, length);
|
auto view = input.view.substring_view(start_position, length);
|
||||||
|
|
||||||
match = { view, name_string_table_index(), input.line, start_position, input.global_offset + start_position };
|
state.mutable_capture_group_matches(input.match_index).at(id() - 1) = { view, name_string_table_index(), input.line, start_position, input.global_offset + start_position };
|
||||||
|
|
||||||
return ExecutionResult::Continue;
|
return ExecutionResult::Continue;
|
||||||
}
|
}
|
||||||
|
@ -584,11 +573,11 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case CharacterCompareType::Reference: {
|
case CharacterCompareType::Reference: {
|
||||||
auto reference_number = (size_t)m_bytecode->at(offset++);
|
auto reference_number = ((size_t)m_bytecode->at(offset++)) - 1;
|
||||||
if (input.match_index >= state.capture_group_matches.size())
|
if (input.match_index >= state.capture_group_matches_size())
|
||||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||||
|
|
||||||
auto& groups = state.capture_group_matches.at(input.match_index);
|
auto groups = state.capture_group_matches(input.match_index);
|
||||||
if (groups.size() <= reference_number)
|
if (groups.size() <= reference_number)
|
||||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||||
|
|
||||||
|
@ -988,8 +977,8 @@ Vector<ByteString> OpCode_Compare::variable_arguments_to_byte_string(Optional<Ma
|
||||||
auto ref = m_bytecode->at(offset++);
|
auto ref = m_bytecode->at(offset++);
|
||||||
result.empend(ByteString::formatted(" number={}", ref));
|
result.empend(ByteString::formatted(" number={}", ref));
|
||||||
if (input.has_value()) {
|
if (input.has_value()) {
|
||||||
if (state().capture_group_matches.size() > input->match_index) {
|
if (state().capture_group_matches_size() > input->match_index) {
|
||||||
auto& match = state().capture_group_matches[input->match_index];
|
auto match = state().capture_group_matches(input->match_index);
|
||||||
if (match.size() > ref) {
|
if (match.size() > ref) {
|
||||||
auto& group = match[ref];
|
auto& group = match[ref];
|
||||||
result.empend(ByteString::formatted(" left={}", group.left_column));
|
result.empend(ByteString::formatted(" left={}", group.left_column));
|
||||||
|
@ -999,7 +988,7 @@ Vector<ByteString> OpCode_Compare::variable_arguments_to_byte_string(Optional<Ma
|
||||||
result.empend(ByteString::formatted(" (invalid ref, max={})", match.size() - 1));
|
result.empend(ByteString::formatted(" (invalid ref, max={})", match.size() - 1));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
result.empend(ByteString::formatted(" (invalid index {}, max={})", input->match_index, state().capture_group_matches.size() - 1));
|
result.empend(ByteString::formatted(" (invalid index {}, max={})", input->match_index, state().capture_group_matches_size() - 1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (compare_type == CharacterCompareType::String) {
|
} else if (compare_type == CharacterCompareType::String) {
|
||||||
|
|
|
@ -39,7 +39,7 @@ public:
|
||||||
|
|
||||||
void print_bytecode(ByteCode const& bytecode) const
|
void print_bytecode(ByteCode const& bytecode) const
|
||||||
{
|
{
|
||||||
MatchState state;
|
auto state = MatchState::only_for_enumeration();
|
||||||
for (;;) {
|
for (;;) {
|
||||||
auto& opcode = bytecode.get_opcode(state);
|
auto& opcode = bytecode.get_opcode(state);
|
||||||
print_opcode("PrintBytecode", opcode, state);
|
print_opcode("PrintBytecode", opcode, state);
|
||||||
|
|
|
@ -43,12 +43,11 @@ enum __RegexAllFlags {
|
||||||
__Regex_SingleLine = __Regex_Global << 10, // Dot matches newline characters
|
__Regex_SingleLine = __Regex_Global << 10, // Dot matches newline characters
|
||||||
__Regex_Sticky = __Regex_Global << 11, // Force the pattern to only match consecutive matches from where the previous match ended.
|
__Regex_Sticky = __Regex_Global << 11, // Force the pattern to only match consecutive matches from where the previous match ended.
|
||||||
__Regex_Multiline = __Regex_Global << 12, // Handle newline characters. Match each line, one by one.
|
__Regex_Multiline = __Regex_Global << 12, // Handle newline characters. Match each line, one by one.
|
||||||
__Regex_SkipTrimEmptyMatches = __Regex_Global << 13, // Do not remove empty capture group results.
|
__Regex_SingleMatch = __Regex_Global << 13, // Stop after acquiring a single match.
|
||||||
__Regex_SingleMatch = __Regex_Global << 14, // Stop after acquiring a single match.
|
__Regex_UnicodeSets = __Regex_Global << 14, // ECMA262 Parser specific: Allow set operations in char classes.
|
||||||
__Regex_UnicodeSets = __Regex_Global << 15, // ECMA262 Parser specific: Allow set operations in char classes.
|
__Regex_Internal_Stateful = __Regex_Global << 15, // Internal flag; enables stateful matches.
|
||||||
__Regex_Internal_Stateful = __Regex_Global << 16, // Internal flag; enables stateful matches.
|
__Regex_Internal_BrowserExtended = __Regex_Global << 16, // Internal flag; enable browser-specific ECMA262 extensions.
|
||||||
__Regex_Internal_BrowserExtended = __Regex_Global << 17, // Internal flag; enable browser-specific ECMA262 extensions.
|
__Regex_Internal_ConsiderNewline = __Regex_Global << 17, // Internal flag; allow matchers to consider newlines as line separators.
|
||||||
__Regex_Internal_ConsiderNewline = __Regex_Global << 18, // Internal flag; allow matchers to consider newlines as line separators.
|
__Regex_Internal_ECMA262DotSemantics = __Regex_Global << 18, // Internal flag; use ECMA262 semantics for dot ('.') - disallow CR/LF/LS/PS instead of just CR.
|
||||||
__Regex_Internal_ECMA262DotSemantics = __Regex_Global << 19, // Internal flag; use ECMA262 semantics for dot ('.') - disallow CR/LF/LS/PS instead of just CR.
|
|
||||||
__Regex_Last = __Regex_Internal_ECMA262DotSemantics,
|
__Regex_Last = __Regex_Internal_ECMA262DotSemantics,
|
||||||
};
|
};
|
||||||
|
|
|
@ -369,6 +369,7 @@ struct MatchInput {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct MatchState {
|
struct MatchState {
|
||||||
|
size_t capture_group_count;
|
||||||
size_t string_position_before_match { 0 };
|
size_t string_position_before_match { 0 };
|
||||||
size_t string_position { 0 };
|
size_t string_position { 0 };
|
||||||
size_t string_position_in_code_units { 0 };
|
size_t string_position_in_code_units { 0 };
|
||||||
|
@ -377,10 +378,38 @@ struct MatchState {
|
||||||
size_t forks_since_last_save { 0 };
|
size_t forks_since_last_save { 0 };
|
||||||
Optional<size_t> initiating_fork;
|
Optional<size_t> initiating_fork;
|
||||||
COWVector<Match> matches;
|
COWVector<Match> matches;
|
||||||
COWVector<Vector<Match>> capture_group_matches;
|
COWVector<Match> flat_capture_group_matches; // Vector<Vector<Match>> indexed by match index, then by capture group id; flattened for performance
|
||||||
COWVector<u64> repetition_marks;
|
COWVector<u64> repetition_marks;
|
||||||
Vector<u64, 64> checkpoints;
|
Vector<u64, 64> checkpoints;
|
||||||
|
|
||||||
|
explicit MatchState(size_t capture_group_count)
|
||||||
|
: capture_group_count(capture_group_count)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
MatchState(MatchState const&) = default;
|
||||||
|
MatchState(MatchState&&) = default;
|
||||||
|
|
||||||
|
MatchState& operator=(MatchState const&) = default;
|
||||||
|
MatchState& operator=(MatchState&&) = default;
|
||||||
|
|
||||||
|
static MatchState only_for_enumeration() { return MatchState { 0 }; }
|
||||||
|
|
||||||
|
size_t capture_group_matches_size() const
|
||||||
|
{
|
||||||
|
return flat_capture_group_matches.size() / capture_group_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
Span<Match const> capture_group_matches(size_t match_index) const
|
||||||
|
{
|
||||||
|
return flat_capture_group_matches.span().slice(match_index * capture_group_count, capture_group_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
Span<Match> mutable_capture_group_matches(size_t match_index)
|
||||||
|
{
|
||||||
|
return flat_capture_group_matches.mutable_span().slice(match_index * capture_group_count, capture_group_count);
|
||||||
|
}
|
||||||
|
|
||||||
// For size_t in {0..100}, ips in {0..500} and repetitions in {0..30}, there are zero collisions.
|
// For size_t in {0..100}, ips in {0..500} and repetitions in {0..30}, there are zero collisions.
|
||||||
// For the full range, zero collisions were found in 8 million random samples.
|
// For the full range, zero collisions were found in 8 million random samples.
|
||||||
u64 u64_hash() const
|
u64 u64_hash() const
|
||||||
|
|
|
@ -164,7 +164,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
|
||||||
size_t match_count { 0 };
|
size_t match_count { 0 };
|
||||||
|
|
||||||
MatchInput input;
|
MatchInput input;
|
||||||
MatchState state;
|
MatchState state { m_pattern->parser_result.capture_groups_count };
|
||||||
size_t operations = 0;
|
size_t operations = 0;
|
||||||
|
|
||||||
input.regex_options = m_regex_options | regex_options.value_or({}).value();
|
input.regex_options = m_regex_options | regex_options.value_or({}).value();
|
||||||
|
@ -189,20 +189,6 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c_match_preallocation_count) {
|
|
||||||
state.matches.ensure_capacity(c_match_preallocation_count);
|
|
||||||
state.capture_group_matches.ensure_capacity(c_match_preallocation_count);
|
|
||||||
auto& capture_groups_count = m_pattern->parser_result.capture_groups_count;
|
|
||||||
|
|
||||||
for (size_t j = 0; j < c_match_preallocation_count; ++j) {
|
|
||||||
state.matches.empend();
|
|
||||||
state.capture_group_matches.empend();
|
|
||||||
state.capture_group_matches.mutable_at(j).ensure_capacity(capture_groups_count);
|
|
||||||
for (size_t k = 0; k < capture_groups_count; ++k)
|
|
||||||
state.capture_group_matches.mutable_at(j).unchecked_append({});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
auto append_match = [](auto& input, auto& state, auto& start_position) {
|
auto append_match = [](auto& input, auto& state, auto& start_position) {
|
||||||
if (state.matches.size() == input.match_index)
|
if (state.matches.size() == input.match_index)
|
||||||
state.matches.empend();
|
state.matches.empend();
|
||||||
|
@ -343,29 +329,34 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto flat_capture_group_matches = move(state.flat_capture_group_matches).release();
|
||||||
|
if (flat_capture_group_matches.size() < state.capture_group_count * match_count) {
|
||||||
|
flat_capture_group_matches.ensure_capacity(match_count * state.capture_group_count);
|
||||||
|
for (size_t i = flat_capture_group_matches.size(); i < match_count * state.capture_group_count; ++i)
|
||||||
|
flat_capture_group_matches.empend();
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector<Span<Match>> capture_group_matches;
|
||||||
|
for (size_t i = 0; i < match_count; ++i) {
|
||||||
|
auto span = flat_capture_group_matches.span().slice(state.capture_group_count * i, state.capture_group_count);
|
||||||
|
capture_group_matches.append(span);
|
||||||
|
}
|
||||||
|
|
||||||
RegexResult result {
|
RegexResult result {
|
||||||
match_count != 0,
|
match_count != 0,
|
||||||
match_count,
|
match_count,
|
||||||
move(state.matches).release(),
|
move(state.matches).release(),
|
||||||
move(state.capture_group_matches).release(),
|
move(flat_capture_group_matches),
|
||||||
|
move(capture_group_matches),
|
||||||
operations,
|
operations,
|
||||||
m_pattern->parser_result.capture_groups_count,
|
m_pattern->parser_result.capture_groups_count,
|
||||||
m_pattern->parser_result.named_capture_groups_count,
|
m_pattern->parser_result.named_capture_groups_count,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (match_count) {
|
if (match_count > 0)
|
||||||
// Make sure there are as many capture matches as there are actual matches.
|
VERIFY(result.capture_group_matches.size() >= match_count);
|
||||||
if (result.capture_group_matches.size() < match_count)
|
else
|
||||||
result.capture_group_matches.resize(match_count);
|
|
||||||
for (auto& matches : result.capture_group_matches)
|
|
||||||
matches.resize(m_pattern->parser_result.capture_groups_count + 1);
|
|
||||||
if (!input.regex_options.has_flag_set(AllFlags::SkipTrimEmptyMatches)) {
|
|
||||||
for (auto& matches : result.capture_group_matches)
|
|
||||||
matches.remove_all_matching([](auto& match) { return match.view.is_null(); });
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
result.capture_group_matches.clear_with_capacity();
|
result.capture_group_matches.clear_with_capacity();
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,13 +31,13 @@ struct Block {
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr size_t const c_max_recursion = 5000;
|
static constexpr size_t const c_max_recursion = 5000;
|
||||||
static constexpr size_t const c_match_preallocation_count = 0;
|
|
||||||
|
|
||||||
struct RegexResult final {
|
struct RegexResult final {
|
||||||
bool success { false };
|
bool success { false };
|
||||||
size_t count { 0 };
|
size_t count { 0 };
|
||||||
Vector<Match> matches;
|
Vector<Match> matches;
|
||||||
Vector<Vector<Match>> capture_group_matches;
|
Vector<Match> flat_capture_group_matches;
|
||||||
|
Vector<Span<Match>> capture_group_matches;
|
||||||
size_t n_operations { 0 };
|
size_t n_operations { 0 };
|
||||||
size_t n_capture_groups { 0 };
|
size_t n_capture_groups { 0 };
|
||||||
size_t n_named_capture_groups { 0 };
|
size_t n_named_capture_groups { 0 };
|
||||||
|
|
|
@ -37,7 +37,7 @@ void Regex<Parser>::run_optimization_passes()
|
||||||
attempt_rewrite_loops_as_atomic_groups(blocks);
|
attempt_rewrite_loops_as_atomic_groups(blocks);
|
||||||
|
|
||||||
// FIXME: "There are a few more conditions this can be true in (e.g. within an arbitrarily nested capture group)"
|
// FIXME: "There are a few more conditions this can be true in (e.g. within an arbitrarily nested capture group)"
|
||||||
MatchState state;
|
auto state = MatchState::only_for_enumeration();
|
||||||
auto& opcode = parser_result.bytecode.get_opcode(state);
|
auto& opcode = parser_result.bytecode.get_opcode(state);
|
||||||
if (opcode.opcode_id() == OpCodeId::CheckBegin)
|
if (opcode.opcode_id() == OpCodeId::CheckBegin)
|
||||||
parser_result.optimization_data.only_start_of_line = true;
|
parser_result.optimization_data.only_start_of_line = true;
|
||||||
|
@ -53,7 +53,7 @@ typename Regex<Parser>::BasicBlockList Regex<Parser>::split_basic_blocks(ByteCod
|
||||||
|
|
||||||
auto bytecode_size = bytecode.size();
|
auto bytecode_size = bytecode.size();
|
||||||
|
|
||||||
MatchState state;
|
auto state = MatchState::only_for_enumeration();
|
||||||
state.instruction_position = 0;
|
state.instruction_position = 0;
|
||||||
auto check_jump = [&]<typename T>(OpCode const& opcode) {
|
auto check_jump = [&]<typename T>(OpCode const& opcode) {
|
||||||
auto& op = static_cast<T const&>(opcode);
|
auto& op = static_cast<T const&>(opcode);
|
||||||
|
@ -512,7 +512,7 @@ enum class AtomicRewritePreconditionResult {
|
||||||
static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode, Block repeated_block, Block following_block, auto const& all_blocks)
|
static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode, Block repeated_block, Block following_block, auto const& all_blocks)
|
||||||
{
|
{
|
||||||
Vector<Vector<CompareTypeAndValuePair>> repeated_values;
|
Vector<Vector<CompareTypeAndValuePair>> repeated_values;
|
||||||
MatchState state;
|
auto state = MatchState::only_for_enumeration();
|
||||||
auto has_seen_actionable_opcode = false;
|
auto has_seen_actionable_opcode = false;
|
||||||
for (state.instruction_position = repeated_block.start; state.instruction_position < repeated_block.end;) {
|
for (state.instruction_position = repeated_block.start; state.instruction_position < repeated_block.end;) {
|
||||||
auto& opcode = bytecode.get_opcode(state);
|
auto& opcode = bytecode.get_opcode(state);
|
||||||
|
@ -680,7 +680,7 @@ bool Regex<Parser>::attempt_rewrite_entire_match_as_substring_search(BasicBlockL
|
||||||
|
|
||||||
// We have a single basic block, let's see if it's a series of character or string compares.
|
// We have a single basic block, let's see if it's a series of character or string compares.
|
||||||
StringBuilder final_string;
|
StringBuilder final_string;
|
||||||
MatchState state;
|
auto state = MatchState::only_for_enumeration();
|
||||||
while (state.instruction_position < bytecode.size()) {
|
while (state.instruction_position < bytecode.size()) {
|
||||||
auto& opcode = bytecode.get_opcode(state);
|
auto& opcode = bytecode.get_opcode(state);
|
||||||
switch (opcode.opcode_id()) {
|
switch (opcode.opcode_id()) {
|
||||||
|
@ -796,7 +796,7 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
|
||||||
Optional<Block> fork_fallback_block;
|
Optional<Block> fork_fallback_block;
|
||||||
if (i + 1 < basic_blocks.size())
|
if (i + 1 < basic_blocks.size())
|
||||||
fork_fallback_block = basic_blocks[i + 1];
|
fork_fallback_block = basic_blocks[i + 1];
|
||||||
MatchState state;
|
auto state = MatchState::only_for_enumeration();
|
||||||
// Check if the last instruction in this block is a jump to the block itself:
|
// Check if the last instruction in this block is a jump to the block itself:
|
||||||
{
|
{
|
||||||
state.instruction_position = forking_block.end;
|
state.instruction_position = forking_block.end;
|
||||||
|
@ -913,7 +913,7 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!needed_patches.is_empty()) {
|
if (!needed_patches.is_empty()) {
|
||||||
MatchState state;
|
auto state = MatchState::only_for_enumeration();
|
||||||
auto bytecode_size = bytecode.size();
|
auto bytecode_size = bytecode.size();
|
||||||
state.instruction_position = 0;
|
state.instruction_position = 0;
|
||||||
struct Patch {
|
struct Patch {
|
||||||
|
@ -1039,7 +1039,7 @@ void Optimizer::append_alternation(ByteCode& target, Span<ByteCode> alternatives
|
||||||
|
|
||||||
auto has_any_backwards_jump = false;
|
auto has_any_backwards_jump = false;
|
||||||
|
|
||||||
MatchState state;
|
auto state = MatchState::only_for_enumeration();
|
||||||
|
|
||||||
for (size_t i = 0; i < alternatives.size(); ++i) {
|
for (size_t i = 0; i < alternatives.size(); ++i) {
|
||||||
auto& alternative = alternatives[i];
|
auto& alternative = alternatives[i];
|
||||||
|
@ -1144,7 +1144,7 @@ void Optimizer::append_alternation(ByteCode& target, Span<ByteCode> alternatives
|
||||||
node.metadata_value().size(),
|
node.metadata_value().size(),
|
||||||
node.metadata_value().size() == 1 ? "" : "s");
|
node.metadata_value().size() == 1 ? "" : "s");
|
||||||
|
|
||||||
MatchState state;
|
auto state = MatchState::only_for_enumeration();
|
||||||
state.instruction_position = node.metadata_value().first().instruction_position;
|
state.instruction_position = node.metadata_value().first().instruction_position;
|
||||||
auto& opcode = alternatives[node.metadata_value().first().alternative_index].get_opcode(state);
|
auto& opcode = alternatives[node.metadata_value().first().alternative_index].get_opcode(state);
|
||||||
insn = ByteString::formatted("{} {}", opcode.to_byte_string(), opcode.arguments_string());
|
insn = ByteString::formatted("{} {}", opcode.to_byte_string(), opcode.arguments_string());
|
||||||
|
|
|
@ -28,7 +28,6 @@ enum class AllFlags {
|
||||||
SingleLine = __Regex_SingleLine, // Dot matches newline characters
|
SingleLine = __Regex_SingleLine, // Dot matches newline characters
|
||||||
Sticky = __Regex_Sticky, // Force the pattern to only match consecutive matches from where the previous match ended.
|
Sticky = __Regex_Sticky, // Force the pattern to only match consecutive matches from where the previous match ended.
|
||||||
Multiline = __Regex_Multiline, // Handle newline characters. Match each line, one by one.
|
Multiline = __Regex_Multiline, // Handle newline characters. Match each line, one by one.
|
||||||
SkipTrimEmptyMatches = __Regex_SkipTrimEmptyMatches, // Do not remove empty capture group results.
|
|
||||||
SingleMatch = __Regex_SingleMatch, // Stop after acquiring a single match.
|
SingleMatch = __Regex_SingleMatch, // Stop after acquiring a single match.
|
||||||
UnicodeSets = __Regex_UnicodeSets, // Only for ECMA262, Allow set operations in character classes.
|
UnicodeSets = __Regex_UnicodeSets, // Only for ECMA262, Allow set operations in character classes.
|
||||||
Internal_Stateful = __Regex_Internal_Stateful, // Make global matches match one result at a time, and further match() calls on the same instance continue where the previous one left off.
|
Internal_Stateful = __Regex_Internal_Stateful, // Make global matches match one result at a time, and further match() calls on the same instance continue where the previous one left off.
|
||||||
|
@ -49,7 +48,6 @@ enum class PosixFlags : FlagsUnderlyingType {
|
||||||
MatchNotBeginOfLine = (FlagsUnderlyingType)AllFlags::MatchNotBeginOfLine,
|
MatchNotBeginOfLine = (FlagsUnderlyingType)AllFlags::MatchNotBeginOfLine,
|
||||||
MatchNotEndOfLine = (FlagsUnderlyingType)AllFlags::MatchNotEndOfLine,
|
MatchNotEndOfLine = (FlagsUnderlyingType)AllFlags::MatchNotEndOfLine,
|
||||||
SkipSubExprResults = (FlagsUnderlyingType)AllFlags::SkipSubExprResults,
|
SkipSubExprResults = (FlagsUnderlyingType)AllFlags::SkipSubExprResults,
|
||||||
SkipTrimEmptyMatches = (FlagsUnderlyingType)AllFlags::SkipTrimEmptyMatches,
|
|
||||||
Multiline = (FlagsUnderlyingType)AllFlags::Multiline,
|
Multiline = (FlagsUnderlyingType)AllFlags::Multiline,
|
||||||
SingleMatch = (FlagsUnderlyingType)AllFlags::SingleMatch,
|
SingleMatch = (FlagsUnderlyingType)AllFlags::SingleMatch,
|
||||||
};
|
};
|
||||||
|
|
|
@ -857,7 +857,7 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
|
||||||
|
|
||||||
auto current_capture_group = m_parser_state.capture_groups_count;
|
auto current_capture_group = m_parser_state.capture_groups_count;
|
||||||
if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) {
|
if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) {
|
||||||
bytecode.insert_bytecode_group_capture_left(current_capture_group);
|
bytecode.insert_bytecode_group_capture_left(current_capture_group + 1);
|
||||||
m_parser_state.capture_groups_count++;
|
m_parser_state.capture_groups_count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -888,9 +888,9 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
|
||||||
|
|
||||||
if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) {
|
if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) {
|
||||||
if (capture_group_name.has_value())
|
if (capture_group_name.has_value())
|
||||||
bytecode.insert_bytecode_group_capture_right(current_capture_group, capture_group_name.value());
|
bytecode.insert_bytecode_group_capture_right(current_capture_group + 1, capture_group_name.value());
|
||||||
else
|
else
|
||||||
bytecode.insert_bytecode_group_capture_right(current_capture_group);
|
bytecode.insert_bytecode_group_capture_right(current_capture_group + 1);
|
||||||
}
|
}
|
||||||
should_parse_repetition_symbol = true;
|
should_parse_repetition_symbol = true;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -228,7 +228,6 @@ PatternErrorOr<Component> Component::compile(Utf8View const& input, PatternParse
|
||||||
auto flags = regex::RegexOptions<ECMAScriptFlags> {
|
auto flags = regex::RegexOptions<ECMAScriptFlags> {
|
||||||
(regex::ECMAScriptFlags)regex::AllFlags::SingleMatch
|
(regex::ECMAScriptFlags)regex::AllFlags::SingleMatch
|
||||||
| (regex::ECMAScriptFlags)regex::AllFlags::Global
|
| (regex::ECMAScriptFlags)regex::AllFlags::Global
|
||||||
| (regex::ECMAScriptFlags)regex::AllFlags::SkipTrimEmptyMatches
|
|
||||||
| regex::ECMAScriptFlags::BrowserExtended
|
| regex::ECMAScriptFlags::BrowserExtended
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -288,7 +287,7 @@ Component::Result Component::create_match_result(String const& input, regex::Reg
|
||||||
// 4. Let index be 1.
|
// 4. Let index be 1.
|
||||||
// 5. While index is less than Get(execResult, "length"):
|
// 5. While index is less than Get(execResult, "length"):
|
||||||
for (size_t index = 1; index <= exec_result.n_capture_groups; ++index) {
|
for (size_t index = 1; index <= exec_result.n_capture_groups; ++index) {
|
||||||
auto const& capture = exec_result.capture_group_matches[0][index];
|
auto const& capture = exec_result.capture_group_matches[0][index - 1];
|
||||||
|
|
||||||
// 1. Let name be component’s group name list[index − 1].
|
// 1. Let name be component’s group name list[index − 1].
|
||||||
auto name = group_name_list[index - 1];
|
auto name = group_name_list[index - 1];
|
||||||
|
|
|
@ -373,7 +373,7 @@ TEST_CASE(ini_file_entries)
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPECT_EQ(result.matches.at(0).view, "[Window]");
|
EXPECT_EQ(result.matches.at(0).view, "[Window]");
|
||||||
EXPECT_EQ(result.capture_group_matches.at(0).at(0).view, "Window");
|
EXPECT_EQ(result.capture_group_matches.at(0).at(1).view, "Window");
|
||||||
EXPECT_EQ(result.matches.at(1).view, "Opacity=255");
|
EXPECT_EQ(result.matches.at(1).view, "Opacity=255");
|
||||||
EXPECT_EQ(result.matches.at(1).line, 1u);
|
EXPECT_EQ(result.matches.at(1).line, 1u);
|
||||||
EXPECT_EQ(result.matches.at(1).column, 0u);
|
EXPECT_EQ(result.matches.at(1).column, 0u);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue