mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-06-12 10:40:39 +09:00
LibXML: Include line and column numbers in parse error messages
This commit is contained in:
parent
82ed253477
commit
c0f30f31d1
Notes:
github-actions[bot]
2024-08-03 16:24:28 +00:00
Author: https://github.com/tcl3
Commit: c0f30f31d1
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/943
Reviewed-by: https://github.com/alimpfard ✅
4 changed files with 30 additions and 29 deletions
|
@ -1 +1,2 @@
|
||||||
<this-will-not-parse
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<this-will-not-parse</this-will-not-parse>
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
Got load event
|
Got load event
|
||||||
[object HTMLDocument]
|
[object HTMLDocument]
|
||||||
Failed to parse XML document: Expected '>' at offset 21
|
Failed to parse XML document: Expected '>' at line: 1, col: 20 (offset 59)
|
||||||
|
|
|
@ -195,7 +195,7 @@ ErrorOr<void, ParseError> Parser::skip_whitespace(Required required)
|
||||||
// S ::= (#x20 | #x9 | #xD | #xA)+
|
// S ::= (#x20 | #x9 | #xD | #xA)+
|
||||||
auto matched = m_lexer.consume_while(is_any_of("\x20\x09\x0d\x0a"sv));
|
auto matched = m_lexer.consume_while(is_any_of("\x20\x09\x0d\x0a"sv));
|
||||||
if (required == Required::Yes && matched.is_empty())
|
if (required == Required::Yes && matched.is_empty())
|
||||||
return parse_error(m_lexer.tell(), "Expected whitespace");
|
return parse_error(m_lexer.current_position(), "Expected whitespace");
|
||||||
|
|
||||||
rollback.disarm();
|
rollback.disarm();
|
||||||
return {};
|
return {};
|
||||||
|
@ -220,12 +220,12 @@ ErrorOr<void, ParseError> Parser::parse_internal()
|
||||||
auto matched_source = m_source.substring_view(0, m_lexer.tell());
|
auto matched_source = m_source.substring_view(0, m_lexer.tell());
|
||||||
if (auto it = find_if(matched_source.begin(), matched_source.end(), s_restricted_characters); !it.is_end()) {
|
if (auto it = find_if(matched_source.begin(), matched_source.end(), s_restricted_characters); !it.is_end()) {
|
||||||
return parse_error(
|
return parse_error(
|
||||||
it.index(),
|
m_lexer.position_for(it.index()),
|
||||||
ByteString::formatted("Invalid character #{:x} used in document", *it));
|
ByteString::formatted("Invalid character #{:x} used in document", *it));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!m_lexer.is_eof())
|
if (!m_lexer.is_eof())
|
||||||
return parse_error(m_lexer.tell(), "Garbage after document");
|
return parse_error(m_lexer.current_position(), "Garbage after document");
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
@ -236,7 +236,7 @@ ErrorOr<void, ParseError> Parser::expect(StringView expected)
|
||||||
|
|
||||||
if (!m_lexer.consume_specific(expected)) {
|
if (!m_lexer.consume_specific(expected)) {
|
||||||
if (m_options.treat_errors_as_fatal)
|
if (m_options.treat_errors_as_fatal)
|
||||||
return parse_error(m_lexer.tell(), ByteString::formatted("Expected '{}'", expected));
|
return parse_error(m_lexer.current_position(), ByteString::formatted("Expected '{}'", expected));
|
||||||
}
|
}
|
||||||
|
|
||||||
rollback.disarm();
|
rollback.disarm();
|
||||||
|
@ -250,7 +250,7 @@ requires(IsCallableWithArguments<Pred, bool, char>) ErrorOr<StringView, ParseErr
|
||||||
auto start = m_lexer.tell();
|
auto start = m_lexer.tell();
|
||||||
if (!m_lexer.next_is(predicate)) {
|
if (!m_lexer.next_is(predicate)) {
|
||||||
if (m_options.treat_errors_as_fatal)
|
if (m_options.treat_errors_as_fatal)
|
||||||
return parse_error(m_lexer.tell(), ByteString::formatted("Expected {}", description));
|
return parse_error(m_lexer.current_position(), ByteString::formatted("Expected {}", description));
|
||||||
}
|
}
|
||||||
|
|
||||||
m_lexer.ignore();
|
m_lexer.ignore();
|
||||||
|
@ -271,7 +271,7 @@ requires(IsCallableWithArguments<Pred, bool, char>) ErrorOr<StringView, ParseErr
|
||||||
|
|
||||||
if (m_lexer.tell() == start) {
|
if (m_lexer.tell() == start) {
|
||||||
if (m_options.treat_errors_as_fatal) {
|
if (m_options.treat_errors_as_fatal) {
|
||||||
return parse_error(m_lexer.tell(), ByteString::formatted("Expected {}", description));
|
return parse_error(m_lexer.current_position(), ByteString::formatted("Expected {}", description));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -353,7 +353,7 @@ ErrorOr<void, ParseError> Parser::parse_version_info()
|
||||||
m_in_compatibility_mode = true;
|
m_in_compatibility_mode = true;
|
||||||
} else {
|
} else {
|
||||||
if (version_string != "1.1" && m_options.treat_errors_as_fatal)
|
if (version_string != "1.1" && m_options.treat_errors_as_fatal)
|
||||||
return parse_error(m_lexer.tell(), ByteString::formatted("Expected '1.1', found '{}'", version_string));
|
return parse_error(m_lexer.current_position(), ByteString::formatted("Expected '1.1', found '{}'", version_string));
|
||||||
}
|
}
|
||||||
|
|
||||||
m_version = Version::Version11;
|
m_version = Version::Version11;
|
||||||
|
@ -415,7 +415,7 @@ ErrorOr<void, ParseError> Parser::parse_standalone_document_decl()
|
||||||
|
|
||||||
auto value = m_lexer.consume_quoted_string();
|
auto value = m_lexer.consume_quoted_string();
|
||||||
if (!value.is_one_of("yes", "no"))
|
if (!value.is_one_of("yes", "no"))
|
||||||
return parse_error(m_lexer.tell() - value.length(), "Expected one of 'yes' or 'no'");
|
return parse_error(m_lexer.position_for(m_lexer.tell() - value.length()), "Expected one of 'yes' or 'no'");
|
||||||
|
|
||||||
m_standalone = value == "yes";
|
m_standalone = value == "yes";
|
||||||
|
|
||||||
|
@ -445,7 +445,7 @@ ErrorOr<void, ParseError> Parser::parse_misc()
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
return parse_error(m_lexer.tell(), "Expected a match for 'Misc', but found none");
|
return parse_error(m_lexer.current_position(), "Expected a match for 'Misc', but found none");
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2.5.15 Comment, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Comment
|
// 2.5.15 Comment, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Comment
|
||||||
|
@ -521,7 +521,7 @@ ErrorOr<Name, ParseError> Parser::parse_processing_instruction_target()
|
||||||
|
|
||||||
if (target.equals_ignoring_ascii_case("xml"sv) && m_options.treat_errors_as_fatal) {
|
if (target.equals_ignoring_ascii_case("xml"sv) && m_options.treat_errors_as_fatal) {
|
||||||
return parse_error(
|
return parse_error(
|
||||||
m_lexer.tell() - target.length(),
|
m_lexer.position_for(m_lexer.tell() - target.length()),
|
||||||
"Use of the reserved 'xml' name for processing instruction target name is disallowed");
|
"Use of the reserved 'xml' name for processing instruction target name is disallowed");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -579,7 +579,7 @@ ErrorOr<void, ParseError> Parser::parse_doctype_decl()
|
||||||
auto declarations = TRY(parse_external_subset());
|
auto declarations = TRY(parse_external_subset());
|
||||||
if (!m_lexer.is_eof()) {
|
if (!m_lexer.is_eof()) {
|
||||||
return parse_error(
|
return parse_error(
|
||||||
m_lexer.tell(),
|
m_lexer.current_position(),
|
||||||
ByteString::formatted("Failed to resolve external subset '{}': garbage after declarations", doctype.external_id->system_id.system_literal));
|
ByteString::formatted("Failed to resolve external subset '{}': garbage after declarations", doctype.external_id->system_id.system_literal));
|
||||||
}
|
}
|
||||||
doctype.markup_declarations.extend(move(declarations));
|
doctype.markup_declarations.extend(move(declarations));
|
||||||
|
@ -634,7 +634,7 @@ ErrorOr<void, ParseError> Parser::parse_element()
|
||||||
|
|
||||||
// Well-formedness constraint: The Name in an element's end-tag MUST match the element type in the start-tag.
|
// Well-formedness constraint: The Name in an element's end-tag MUST match the element type in the start-tag.
|
||||||
if (m_options.treat_errors_as_fatal && closing_name != tag.name)
|
if (m_options.treat_errors_as_fatal && closing_name != tag.name)
|
||||||
return parse_error(tag_location, "Invalid closing tag");
|
return parse_error(m_lexer.position_for(tag_location), "Invalid closing tag");
|
||||||
|
|
||||||
rollback.disarm();
|
rollback.disarm();
|
||||||
return {};
|
return {};
|
||||||
|
@ -720,7 +720,7 @@ ErrorOr<ByteString, ParseError> Parser::parse_attribute_value_inner(StringView d
|
||||||
|
|
||||||
if (m_lexer.next_is('<')) {
|
if (m_lexer.next_is('<')) {
|
||||||
// Not allowed, return a nice error to make it easier to debug.
|
// Not allowed, return a nice error to make it easier to debug.
|
||||||
return parse_error(m_lexer.tell(), "Unescaped '<' not allowed in attribute values");
|
return parse_error(m_lexer.current_position(), "Unescaped '<' not allowed in attribute values");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_lexer.next_is('&')) {
|
if (m_lexer.next_is('&')) {
|
||||||
|
@ -774,7 +774,7 @@ ErrorOr<Variant<Parser::EntityReference, ByteString>, ParseError> Parser::parse_
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!code_point.has_value() || !s_characters.contains(*code_point))
|
if (!code_point.has_value() || !s_characters.contains(*code_point))
|
||||||
return parse_error(reference_start, "Invalid character reference");
|
return parse_error(m_lexer.position_for(reference_start), "Invalid character reference");
|
||||||
|
|
||||||
TRY(expect(";"sv));
|
TRY(expect(";"sv));
|
||||||
|
|
||||||
|
@ -995,7 +995,7 @@ ErrorOr<Optional<MarkupDeclaration>, ParseError> Parser::parse_markup_declaratio
|
||||||
return Optional<MarkupDeclaration> {};
|
return Optional<MarkupDeclaration> {};
|
||||||
}
|
}
|
||||||
|
|
||||||
return parse_error(m_lexer.tell(), "Expected one of elementdecl, attlistdecl, entitydecl, notationdecl, PI or comment");
|
return parse_error(m_lexer.current_position(), "Expected one of elementdecl, attlistdecl, entitydecl, notationdecl, PI or comment");
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2.8.28a DeclSep, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-DeclSep
|
// 2.8.28a DeclSep, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-DeclSep
|
||||||
|
@ -1016,7 +1016,7 @@ ErrorOr<Optional<ByteString>, ParseError> Parser::parse_declaration_separator()
|
||||||
return Optional<ByteString> {};
|
return Optional<ByteString> {};
|
||||||
}
|
}
|
||||||
|
|
||||||
return parse_error(m_lexer.tell(), "Expected either whitespace, or a PEReference");
|
return parse_error(m_lexer.current_position(), "Expected either whitespace, or a PEReference");
|
||||||
}
|
}
|
||||||
|
|
||||||
// 4.1.69 PEReference, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PEReference
|
// 4.1.69 PEReference, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PEReference
|
||||||
|
@ -1269,7 +1269,7 @@ ErrorOr<ElementDeclaration::ContentSpec, ParseError> Parser::parse_content_spec(
|
||||||
if (auto result = parse_name(); !result.is_error())
|
if (auto result = parse_name(); !result.is_error())
|
||||||
names.set(result.release_value());
|
names.set(result.release_value());
|
||||||
else
|
else
|
||||||
return parse_error(m_lexer.tell(), "Expected a Name");
|
return parse_error(m_lexer.current_position(), "Expected a Name");
|
||||||
}
|
}
|
||||||
TRY(skip_whitespace());
|
TRY(skip_whitespace());
|
||||||
TRY(expect(")*"sv));
|
TRY(expect(")*"sv));
|
||||||
|
@ -1331,7 +1331,7 @@ ErrorOr<ElementDeclaration::ContentSpec, ParseError> Parser::parse_content_spec(
|
||||||
TRY(expect(")"sv));
|
TRY(expect(")"sv));
|
||||||
|
|
||||||
if (choices.size() < 2)
|
if (choices.size() < 2)
|
||||||
return parse_error(m_lexer.tell(), "Expected more than one choice");
|
return parse_error(m_lexer.current_position(), "Expected more than one choice");
|
||||||
|
|
||||||
TRY(skip_whitespace());
|
TRY(skip_whitespace());
|
||||||
auto qualifier = parse_qualifier();
|
auto qualifier = parse_qualifier();
|
||||||
|
@ -1709,7 +1709,7 @@ ErrorOr<ByteString, ParseError> Parser::resolve_reference(EntityReference const&
|
||||||
{
|
{
|
||||||
static HashTable<Name> reference_lookup {};
|
static HashTable<Name> reference_lookup {};
|
||||||
if (reference_lookup.contains(reference.name))
|
if (reference_lookup.contains(reference.name))
|
||||||
return parse_error(m_lexer.tell(), ByteString::formatted("Invalid recursive definition for '{}'", reference.name));
|
return parse_error(m_lexer.current_position(), ByteString::formatted("Invalid recursive definition for '{}'", reference.name));
|
||||||
|
|
||||||
reference_lookup.set(reference.name);
|
reference_lookup.set(reference.name);
|
||||||
ScopeGuard remove_lookup {
|
ScopeGuard remove_lookup {
|
||||||
|
@ -1737,17 +1737,17 @@ ErrorOr<ByteString, ParseError> Parser::resolve_reference(EntityReference const&
|
||||||
},
|
},
|
||||||
[&](EntityDefinition const& definition) -> ErrorOr<void, ParseError> {
|
[&](EntityDefinition const& definition) -> ErrorOr<void, ParseError> {
|
||||||
if (placement == ReferencePlacement::AttributeValue)
|
if (placement == ReferencePlacement::AttributeValue)
|
||||||
return parse_error(m_lexer.tell(), ByteString::formatted("Attribute references external entity '{}'", reference.name));
|
return parse_error(m_lexer.current_position(), ByteString::formatted("Attribute references external entity '{}'", reference.name));
|
||||||
|
|
||||||
if (definition.notation.has_value())
|
if (definition.notation.has_value())
|
||||||
return parse_error(0u, ByteString::formatted("Entity reference to unparsed entity '{}'", reference.name));
|
return parse_error(m_lexer.position_for(0), ByteString::formatted("Entity reference to unparsed entity '{}'", reference.name));
|
||||||
|
|
||||||
if (!m_options.resolve_external_resource)
|
if (!m_options.resolve_external_resource)
|
||||||
return parse_error(0u, ByteString::formatted("Failed to resolve external entity '{}'", reference.name));
|
return parse_error(m_lexer.position_for(0), ByteString::formatted("Failed to resolve external entity '{}'", reference.name));
|
||||||
|
|
||||||
auto result = m_options.resolve_external_resource(definition.id.system_id, definition.id.public_id);
|
auto result = m_options.resolve_external_resource(definition.id.system_id, definition.id.public_id);
|
||||||
if (result.is_error())
|
if (result.is_error())
|
||||||
return parse_error(0u, ByteString::formatted("Failed to resolve external entity '{}': {}", reference.name, result.error()));
|
return parse_error(m_lexer.position_for(0), ByteString::formatted("Failed to resolve external entity '{}': {}", reference.name, result.error()));
|
||||||
|
|
||||||
resolved = result.release_value();
|
resolved = result.release_value();
|
||||||
return {};
|
return {};
|
||||||
|
@ -1767,7 +1767,7 @@ ErrorOr<ByteString, ParseError> Parser::resolve_reference(EntityReference const&
|
||||||
return "'";
|
return "'";
|
||||||
if (reference.name == "quot")
|
if (reference.name == "quot")
|
||||||
return "\"";
|
return "\"";
|
||||||
return parse_error(0u, ByteString::formatted("Reference to undeclared entity '{}'", reference.name));
|
return parse_error(m_lexer.position_for(0), ByteString::formatted("Reference to undeclared entity '{}'", reference.name));
|
||||||
}
|
}
|
||||||
|
|
||||||
StringView resolved_source = *resolved;
|
StringView resolved_source = *resolved;
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
namespace XML {
|
namespace XML {
|
||||||
|
|
||||||
struct ParseError {
|
struct ParseError {
|
||||||
size_t offset;
|
LineTrackingLexer::Position position {};
|
||||||
ByteString error;
|
ByteString error;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -185,7 +185,7 @@ private:
|
||||||
if (rule_name.starts_with("parse_"sv))
|
if (rule_name.starts_with("parse_"sv))
|
||||||
rule_name = rule_name.substring_view(6);
|
rule_name = rule_name.substring_view(6);
|
||||||
m_parse_errors.append({
|
m_parse_errors.append({
|
||||||
error.offset,
|
error.position,
|
||||||
ByteString::formatted("{}: {}", rule_name, error.error),
|
ByteString::formatted("{}: {}", rule_name, error.error),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -219,6 +219,6 @@ template<>
|
||||||
struct AK::Formatter<XML::ParseError> : public AK::Formatter<FormatString> {
|
struct AK::Formatter<XML::ParseError> : public AK::Formatter<FormatString> {
|
||||||
ErrorOr<void> format(FormatBuilder& builder, XML::ParseError const& error)
|
ErrorOr<void> format(FormatBuilder& builder, XML::ParseError const& error)
|
||||||
{
|
{
|
||||||
return Formatter<FormatString>::format(builder, "{} at offset {}"sv, error.error, error.offset);
|
return Formatter<FormatString>::format(builder, "{} at line: {}, col: {} (offset {})"sv, error.error, error.position.line, error.position.column, error.position.offset);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue