1
0
Fork 0
mirror of https://github.com/LadybirdBrowser/ladybird.git synced 2025-06-11 02:13:56 +09:00

LibWeb: Consider content-type charset when determining XML encoding

This commit is contained in:
Idan Horowitz 2023-11-17 15:12:03 +02:00 committed by Andreas Kling
parent 07ea3ab306
commit 278e8afb42
Notes: sideshowbarker 2024-07-17 04:01:41 +09:00
3 changed files with 24 additions and 15 deletions

View file

@ -147,10 +147,17 @@ static bool build_gemini_document(DOM::Document& document, ByteBuffer const& dat
return true;
}
bool build_xml_document(DOM::Document& document, ByteBuffer const& data)
bool build_xml_document(DOM::Document& document, ByteBuffer const& data, Optional<String> content_encoding)
{
auto encoding = HTML::run_encoding_sniffing_algorithm(document, data);
auto decoder = TextCodec::decoder_for(encoding);
Optional<TextCodec::Decoder&> decoder;
// The actual HTTP headers and other metadata, not the headers as mutated or implied by the algorithms given in this specification,
// are the ones that must be used when determining the character encoding according to the rules given in the above specifications.
if (content_encoding.has_value())
decoder = TextCodec::decoder_for(*content_encoding);
if (!decoder.has_value()) {
auto encoding = HTML::run_encoding_sniffing_algorithm(document, data);
decoder = TextCodec::decoder_for(encoding);
}
VERIFY(decoder.has_value());
auto source = decoder->to_utf8(data).release_value_but_fixme_should_propagate_errors();
XML::Parser parser(source, { .resolve_external_resource = resolve_xml_resource });
@ -199,7 +206,7 @@ static bool build_audio_document(DOM::Document& document)
return true;
}
bool parse_document(DOM::Document& document, ByteBuffer const& data)
bool parse_document(DOM::Document& document, ByteBuffer const& data, Optional<String> content_encoding)
{
auto& mime_type = document.content_type();
if (mime_type == "text/html") {
@ -208,7 +215,7 @@ bool parse_document(DOM::Document& document, ByteBuffer const& data)
return true;
}
if (mime_type.ends_with_bytes("+xml"sv) || mime_type.is_one_of("text/xml", "application/xml"))
return build_xml_document(document, data);
return build_xml_document(document, data, move(content_encoding));
if (mime_type.starts_with_bytes("image/"sv))
return build_image_document(document, data);
if (mime_type.starts_with_bytes("video/"sv))
@ -252,20 +259,22 @@ JS::GCPtr<DOM::Document> load_document(Optional<HTML::NavigationParams> navigati
VERIFY(navigation_params.has_value());
auto extracted_mime_type = navigation_params->response->header_list()->extract_mime_type().release_value_but_fixme_should_propagate_errors();
auto mime_type = extracted_mime_type.has_value() ? extracted_mime_type.value().essence().bytes_as_string_view() : StringView {};
if (!is_supported_document_mime_type(mime_type)) {
if (!extracted_mime_type.has_value())
return nullptr;
auto mime_type = extracted_mime_type.release_value();
if (!is_supported_document_mime_type(mime_type.essence()))
return nullptr;
}
auto document = DOM::Document::create_and_initialize(DOM::Document::Type::HTML, "text/html", *navigation_params).release_value_but_fixme_should_propagate_errors();
document->set_content_type(String::from_utf8(mime_type).release_value_but_fixme_should_propagate_errors());
document->set_content_type(mime_type.essence());
auto& realm = document->realm();
if (navigation_params->response->body()) {
auto process_body = [document, url = navigation_params->response->url().value()](ByteBuffer bytes) {
if (parse_document(*document, bytes))
Optional<String> content_encoding = mime_type.parameters().get("charset"sv);
auto process_body = [document, url = navigation_params->response->url().value(), encoding = move(content_encoding)](ByteBuffer bytes) {
if (parse_document(*document, bytes, move(encoding)))
return;
document->remove_all_children(true);
auto error_html = load_error_page(url).release_value_but_fixme_should_propagate_errors();

View file

@ -11,8 +11,8 @@
namespace Web {
bool build_xml_document(DOM::Document& document, ByteBuffer const& data);
bool parse_document(DOM::Document& document, ByteBuffer const& data);
bool build_xml_document(DOM::Document& document, ByteBuffer const& data, Optional<String> content_encoding);
bool parse_document(DOM::Document& document, ByteBuffer const& data, Optional<String> content_encoding);
JS::GCPtr<DOM::Document> load_document(Optional<HTML::NavigationParams> navigation_params);
JS::GCPtr<DOM::Document> create_document_for_inline_content(JS::GCPtr<HTML::Navigable> navigable, Optional<String> navigation_id, StringView content_html);

View file

@ -319,7 +319,7 @@ void XMLHttpRequest::set_document_response()
// 6. Otherwise, let document be a document that represents the result of running the XML parser with XML scripting support disabled on xhrs received bytes. If that fails (unsupported character encoding, namespace well-formedness error, etc.), then return null.
else {
document = DOM::XMLDocument::create(realm(), m_response->url().value_or({}));
if (!Web::build_xml_document(*document, m_received_bytes)) {
if (!Web::build_xml_document(*document, m_received_bytes, {})) {
m_response_object = Empty {};
return;
}