mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-06-08 05:27:14 +09:00
LibURL/Pattern: Implement ability to compile a component
This provides the infrastructure for taking a part list from the pattern parser and generating the actual regexp object which is used for matching against URLs from the pattern.
This commit is contained in:
parent
934f1ec30d
commit
c9e6ad562c
Notes:
github-actions[bot]
2025-04-06 12:27:29 +00:00
Author: https://github.com/shannonbooth
Commit: c9e6ad562c
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/3847
Reviewed-by: https://github.com/trflynn89
3 changed files with 261 additions and 0 deletions
|
@ -8,6 +8,7 @@ set(SOURCES
|
||||||
URL.cpp
|
URL.cpp
|
||||||
${PUBLIC_SUFFIX_SOURCES}
|
${PUBLIC_SUFFIX_SOURCES}
|
||||||
Pattern/Canonicalization.cpp
|
Pattern/Canonicalization.cpp
|
||||||
|
Pattern/Component.cpp
|
||||||
Pattern/ConstructorStringParser.cpp
|
Pattern/ConstructorStringParser.cpp
|
||||||
Pattern/Init.cpp
|
Pattern/Init.cpp
|
||||||
Pattern/Options.cpp
|
Pattern/Options.cpp
|
||||||
|
|
257
Libraries/LibURL/Pattern/Component.cpp
Normal file
257
Libraries/LibURL/Pattern/Component.cpp
Normal file
|
@ -0,0 +1,257 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2025, Shannon Booth <shannon@serenityos.org>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <LibRegex/Regex.h>
|
||||||
|
#include <LibURL/Pattern/Component.h>
|
||||||
|
#include <LibURL/Pattern/PatternParser.h>
|
||||||
|
#include <LibURL/Pattern/String.h>
|
||||||
|
|
||||||
|
namespace URL::Pattern {
|
||||||
|
|
||||||
|
// https://urlpattern.spec.whatwg.org/#generate-a-regular-expression-and-name-list
|
||||||
|
struct RegularExpressionAndNameList {
|
||||||
|
String regular_expression;
|
||||||
|
Vector<String> name_list;
|
||||||
|
};
|
||||||
|
|
||||||
|
static RegularExpressionAndNameList generate_a_regular_expression_and_name_list(Vector<Part> const& part_list, Options const& options)
|
||||||
|
{
|
||||||
|
// 1. Let result be "^".
|
||||||
|
StringBuilder result;
|
||||||
|
result.append('^');
|
||||||
|
|
||||||
|
// 2. Let name list be a new list.
|
||||||
|
Vector<String> name_list;
|
||||||
|
|
||||||
|
// 3. For each part of part list:
|
||||||
|
for (auto const& part : part_list) {
|
||||||
|
// 1. If part’s type is "fixed-text":
|
||||||
|
if (part.type == Part::Type::FixedText) {
|
||||||
|
// 1. If part’s modifier is "none", then append the result of running escape a regexp string given part’s
|
||||||
|
// value to the end of result.
|
||||||
|
if (part.modifier == Part::Modifier::None) {
|
||||||
|
result.append(escape_a_regexp_string(part.value));
|
||||||
|
}
|
||||||
|
// 2. Otherwise:
|
||||||
|
else {
|
||||||
|
// 1. Append "(?:" to the end of result.
|
||||||
|
result.append("(?:"sv);
|
||||||
|
|
||||||
|
// 2. Append the result of running escape a regexp string given part’s value to the end of result.
|
||||||
|
result.append(escape_a_regexp_string(part.value));
|
||||||
|
|
||||||
|
// 3. Append ")" to the end of result.
|
||||||
|
result.append(')');
|
||||||
|
|
||||||
|
// 4. Append the result of running convert a modifier to a string given part’s modifier to the end of result.
|
||||||
|
result.append(Part::convert_modifier_to_string(part.modifier));
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Continue.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Assert: part’s name is not the empty string.
|
||||||
|
VERIFY(!part.name.is_empty());
|
||||||
|
|
||||||
|
// 3. Append part’s name to name list.
|
||||||
|
name_list.append(part.name);
|
||||||
|
|
||||||
|
// 4. Let regexp value be part’s value.
|
||||||
|
auto regexp_value = part.value;
|
||||||
|
|
||||||
|
// 5. If part’s type is "segment-wildcard", then set regexp value to the result of running generate a segment wildcard regexp given options.
|
||||||
|
if (part.type == Part::Type::SegmentWildcard) {
|
||||||
|
regexp_value = generate_a_segment_wildcard_regexp(options);
|
||||||
|
}
|
||||||
|
// 6. Otherwise if part’s type is "full-wildcard", then set regexp value to full wildcard regexp value.
|
||||||
|
else if (part.type == Part::Type::FullWildcard) {
|
||||||
|
regexp_value = MUST(String::from_utf8(full_wildcard_regexp_value));
|
||||||
|
}
|
||||||
|
|
||||||
|
// 7. If part’s prefix is the empty string and part’s suffix is the empty string:
|
||||||
|
if (part.prefix.is_empty() && part.suffix.is_empty()) {
|
||||||
|
// 1. If part’s modifier is "none" or "optional", then:
|
||||||
|
if (part.modifier == Part::Modifier::None || part.modifier == Part::Modifier::Optional) {
|
||||||
|
// 1. Append "(" to the end of result.
|
||||||
|
result.append('(');
|
||||||
|
|
||||||
|
// 2. Append regexp value to the end of result.
|
||||||
|
result.append(regexp_value);
|
||||||
|
|
||||||
|
// 3. Append ")" to the end of result.
|
||||||
|
result.append(')');
|
||||||
|
|
||||||
|
// 4. Append the result of running convert a modifier to a string given part’s modifier to the end of result.
|
||||||
|
result.append(Part::convert_modifier_to_string(part.modifier));
|
||||||
|
}
|
||||||
|
// 2. Otherwise:
|
||||||
|
else {
|
||||||
|
// 1. Append "((?:" to the end of result.
|
||||||
|
result.append("((?:"sv);
|
||||||
|
|
||||||
|
// 2. Append regexp value to the end of result.
|
||||||
|
result.append(regexp_value);
|
||||||
|
|
||||||
|
// 3. Append ")" to the end of result.
|
||||||
|
result.append(')');
|
||||||
|
|
||||||
|
// 4. Append the result of running convert a modifier to a string given part’s modifier to the end of result.
|
||||||
|
result.append(Part::convert_modifier_to_string(part.modifier));
|
||||||
|
|
||||||
|
// 5. Append ")" to the end of result.
|
||||||
|
result.append(')');
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Continue.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 8. If part’s modifier is "none" or "optional":
|
||||||
|
if (part.modifier == Part::Modifier::None || part.modifier == Part::Modifier::Optional) {
|
||||||
|
// 1. Append "(?:" to the end of result.
|
||||||
|
result.append("(?:"sv);
|
||||||
|
|
||||||
|
// 2. Append the result of running escape a regexp string given part’s prefix to the end of result.
|
||||||
|
result.append(escape_a_regexp_string(part.prefix));
|
||||||
|
|
||||||
|
// 3. Append "(" to the end of result.
|
||||||
|
result.append('(');
|
||||||
|
|
||||||
|
// 4. Append regexp value to the end of result.
|
||||||
|
result.append(regexp_value);
|
||||||
|
|
||||||
|
// 5. Append ")" to the end of result.
|
||||||
|
result.append(')');
|
||||||
|
|
||||||
|
// 6. Append the result of running escape a regexp string given part’s suffix to the end of result.
|
||||||
|
result.append(escape_a_regexp_string(part.suffix));
|
||||||
|
|
||||||
|
// 7. Append ")" to the end of result.
|
||||||
|
result.append(')');
|
||||||
|
|
||||||
|
// 8. Append the result of running convert a modifier to a string given part’s modifier to the end of result.
|
||||||
|
result.append(Part::convert_modifier_to_string(part.modifier));
|
||||||
|
|
||||||
|
// 9. Continue.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 9. Assert: part’s modifier is "zero-or-more" or "one-or-more".
|
||||||
|
VERIFY(part.modifier == Part::Modifier::ZeroOrMore || part.modifier == Part::Modifier::OneOrMore);
|
||||||
|
|
||||||
|
// 10. Assert: part’s prefix is not the empty string or part’s suffix is not the empty string.
|
||||||
|
VERIFY(!part.prefix.is_empty() || !part.suffix.is_empty());
|
||||||
|
|
||||||
|
// 11. Append "(?:" to the end of result.
|
||||||
|
result.append("(?:"sv);
|
||||||
|
|
||||||
|
// 12. Append the result of running escape a regexp string given part’s prefix to the end of result.
|
||||||
|
result.append(escape_a_regexp_string(part.prefix));
|
||||||
|
|
||||||
|
// 13. Append "((?:" to the end of result.
|
||||||
|
result.append("((?:"sv);
|
||||||
|
|
||||||
|
// 14. Append regexp value to the end of result.
|
||||||
|
result.append(regexp_value);
|
||||||
|
|
||||||
|
// 15. Append ")(?:" to the end of result.
|
||||||
|
result.append(")(?:"sv);
|
||||||
|
|
||||||
|
// 16. Append the result of running escape a regexp string given part’s suffix to the end of result.
|
||||||
|
result.append(escape_a_regexp_string(part.suffix));
|
||||||
|
|
||||||
|
// 17. Append the result of running escape a regexp string given part’s prefix to the end of result.
|
||||||
|
result.append(escape_a_regexp_string(part.prefix));
|
||||||
|
|
||||||
|
// 18. Append "(?:" to the end of result.
|
||||||
|
result.append("(?:"sv);
|
||||||
|
|
||||||
|
// 19. Append regexp value to the end of result.
|
||||||
|
result.append(regexp_value);
|
||||||
|
|
||||||
|
// 20. Append "))*)" to the end of result.
|
||||||
|
result.append("))*)"sv);
|
||||||
|
|
||||||
|
// 21. Append the result of running escape a regexp string given part’s suffix to the end of result.
|
||||||
|
result.append(escape_a_regexp_string(part.suffix));
|
||||||
|
|
||||||
|
// 22. Append ")" to the end of result.
|
||||||
|
result.append(')');
|
||||||
|
|
||||||
|
// 23. If part’s modifier is "zero-or-more" then append "?" to the end of result.
|
||||||
|
if (part.modifier == Part::Modifier::ZeroOrMore)
|
||||||
|
result.append('?');
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Append "$" to the end of result.
|
||||||
|
result.append('$');
|
||||||
|
|
||||||
|
// 5. Return (result, name list).
|
||||||
|
return { result.to_string_without_validation(), move(name_list) };
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://urlpattern.spec.whatwg.org/#compile-a-component
|
||||||
|
PatternErrorOr<Component> Component::compile(Utf8View const& input, PatternParser::EncodingCallback encoding_callback, Options const& options)
|
||||||
|
{
|
||||||
|
// 1. Let part list be the result of running parse a pattern string given input, options, and encoding callback.
|
||||||
|
auto part_list = TRY(PatternParser::parse(input, options, move(encoding_callback)));
|
||||||
|
|
||||||
|
// 2. Let (regular expression string, name list) be the result of running generate a regular expression and name
|
||||||
|
// list given part list and options.
|
||||||
|
auto [regular_expression_string, name_list] = generate_a_regular_expression_and_name_list(part_list, options);
|
||||||
|
|
||||||
|
// 3. Let flags be an empty string.
|
||||||
|
// NOTE: These flags match the flags for the empty string of the LibJS RegExp implementation.
|
||||||
|
auto flags = regex::RegexOptions<ECMAScriptFlags> {
|
||||||
|
(regex::ECMAScriptFlags)regex::AllFlags::SingleMatch
|
||||||
|
| (regex::ECMAScriptFlags)regex::AllFlags::Global
|
||||||
|
| (regex::ECMAScriptFlags)regex::AllFlags::SkipTrimEmptyMatches
|
||||||
|
| regex::ECMAScriptFlags::BrowserExtended
|
||||||
|
};
|
||||||
|
|
||||||
|
// 4. If options’s ignore case is true then set flags to "vi".
|
||||||
|
if (options.ignore_case) {
|
||||||
|
flags |= regex::ECMAScriptFlags::UnicodeSets;
|
||||||
|
flags |= regex::ECMAScriptFlags::Insensitive;
|
||||||
|
}
|
||||||
|
// 5. Otherwise set flags to "v"
|
||||||
|
else {
|
||||||
|
flags |= regex::ECMAScriptFlags::UnicodeSets;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 6. Let regular expression be RegExpCreate(regular expression string, flags). If this throws an exception, catch
|
||||||
|
// it, and throw a TypeError.
|
||||||
|
auto regex = make<Regex<ECMA262>>(regular_expression_string.to_byte_string(), flags);
|
||||||
|
if (regex->parser_result.error != regex::Error::NoError)
|
||||||
|
return ErrorInfo { MUST(String::formatted("RegExp compile error: {}", regex->error_string())) };
|
||||||
|
|
||||||
|
// 7. Let pattern string be the result of running generate a pattern string given part list and options.
|
||||||
|
auto pattern_string = generate_a_pattern_string(part_list, options);
|
||||||
|
|
||||||
|
// 8. Let has regexp groups be false.
|
||||||
|
bool has_regexp_groups = false;
|
||||||
|
|
||||||
|
// 9. For each part of part list:
|
||||||
|
for (auto const& part : part_list) {
|
||||||
|
// 1. If part’s type is "regexp", then set has regexp groups to true.
|
||||||
|
if (part.type == Part::Type::Regexp) {
|
||||||
|
has_regexp_groups = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 10. Return a new component whose pattern string is pattern string, regular expression is regular expression,
|
||||||
|
// group name list is name list, and has regexp groups is has regexp groups.
|
||||||
|
return Component {
|
||||||
|
.pattern_string = move(pattern_string),
|
||||||
|
.regular_expression = move(regex),
|
||||||
|
.group_name_list = move(name_list),
|
||||||
|
.has_regexp_groups = has_regexp_groups,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -9,6 +9,7 @@
|
||||||
#include <AK/OwnPtr.h>
|
#include <AK/OwnPtr.h>
|
||||||
#include <AK/String.h>
|
#include <AK/String.h>
|
||||||
#include <LibRegex/Regex.h>
|
#include <LibRegex/Regex.h>
|
||||||
|
#include <LibURL/Pattern/PatternParser.h>
|
||||||
|
|
||||||
namespace URL::Pattern {
|
namespace URL::Pattern {
|
||||||
|
|
||||||
|
@ -29,6 +30,8 @@ struct Component {
|
||||||
// https://urlpattern.spec.whatwg.org/#component-has-regexp-groups
|
// https://urlpattern.spec.whatwg.org/#component-has-regexp-groups
|
||||||
// has regexp groups, a boolean
|
// has regexp groups, a boolean
|
||||||
bool has_regexp_groups {};
|
bool has_regexp_groups {};
|
||||||
|
|
||||||
|
static PatternErrorOr<Component> compile(Utf8View const& input, PatternParser::EncodingCallback, Options const&);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue