From 6147557999bea085ff7084617600435dc61b3d38 Mon Sep 17 00:00:00 2001 From: Sam Atkins Date: Wed, 8 Jan 2025 13:47:24 +0000 Subject: [PATCH] LibWebView: Normalize source-code text before highlighting it The previous code to determine the SourceDocument's lines was too naive: the source text can contain other newline characters and sequences, and the HTML/CSS/JS syntax highlighters would take those into account when determining what line a token is on. This disagreement would cause incorrect highlighting, or even crashes, if the source didn't solely use `\n` for its newlines. In order to have everyone agree on what a line is, this patch first processes the source to replace all newlines with `\n`. The need to copy the source like this is unfortunate, but viewing the source is a rare enough action that this should not cause any noticeable performance problems. As the callers have a String, and we want a String, this also changes the function parameters to keep the source as a String instead of converting it to StringView and back. Fixes https://github.com/LadybirdBrowser/ladybird/issues/3169 --- Libraries/LibWebView/SourceHighlighter.cpp | 47 ++++++++++++++++++---- Libraries/LibWebView/SourceHighlighter.h | 10 ++--- 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/Libraries/LibWebView/SourceHighlighter.cpp b/Libraries/LibWebView/SourceHighlighter.cpp index 038d97d6e48..8a632d75843 100644 --- a/Libraries/LibWebView/SourceHighlighter.cpp +++ b/Libraries/LibWebView/SourceHighlighter.cpp @@ -17,12 +17,45 @@ namespace WebView { -SourceDocument::SourceDocument(StringView source) - : m_source(source) +SourceDocument::SourceDocument(String const& source) { - m_source.for_each_split_view('\n', AK::SplitBehavior::KeepEmpty, [&](auto line) { - m_lines.append(Syntax::TextDocumentLine { *this, line }); - }); + // HTML, CSS and JS differ slightly on what they consider a newline to be. + // In order to make them get along in documents that include a mix of the three, process the source to make the + // newlines consistent before doing any highlighting. + + // Optimization: If all the newlines are \n, just use the input string. + if (!source.code_points().contains_any_of(Array { '\r', 0x2028, 0x2029 })) { + m_source = source; + } else { + StringBuilder builder { source.byte_count() }; + // Convert any '\r\n', \r, or to \n + bool previous_was_cr = false; + for (u32 code_point : source.code_points()) { + if (previous_was_cr && code_point != '\n') + builder.append('\n'); + previous_was_cr = false; + + switch (code_point) { + case '\r': + previous_was_cr = true; + break; + case JS::LINE_SEPARATOR: + case JS::PARAGRAPH_SEPARATOR: + builder.append('\n'); + break; + default: + builder.append_code_point(code_point); + } + } + m_source = builder.to_string_without_validation(); + } + + m_source.code_points().for_each_split_view( + [](u32 it) { return it == '\n'; }, + SplitBehavior::KeepEmpty, + [&](auto line) { + m_lines.append(Syntax::TextDocumentLine { *this, line.as_string() }); + }); } Syntax::TextDocumentLine& SourceDocument::line(size_t line_index) @@ -35,7 +68,7 @@ Syntax::TextDocumentLine const& SourceDocument::line(size_t line_index) const return m_lines[line_index]; } -SourceHighlighterClient::SourceHighlighterClient(StringView source, Syntax::Language language) +SourceHighlighterClient::SourceHighlighterClient(String const& source, Syntax::Language language) : m_document(SourceDocument::create(source)) { // HACK: Syntax highlighters require a palette, but we don't actually care about the output styling, only the type of token for each span. @@ -114,7 +147,7 @@ void SourceHighlighterClient::highlighter_did_set_folding_regions(Vector create(StringView source) + static NonnullRefPtr create(String const& source) { return adopt_ref(*new (nothrow) SourceDocument(source)); } @@ -38,18 +38,18 @@ public: virtual Syntax::TextDocumentLine& line(size_t line_index) override; private: - SourceDocument(StringView source); + SourceDocument(String const& source); // ^ Syntax::Document virtual void update_views(Badge) override { } - StringView m_source; + String m_source; Vector m_lines; }; class SourceHighlighterClient final : public Syntax::HighlighterClient { public: - SourceHighlighterClient(StringView source, Syntax::Language); + SourceHighlighterClient(String const& source, Syntax::Language); virtual ~SourceHighlighterClient() = default; String to_html_string(URL::URL const& url, URL::URL const& base_url, HighlightOutputMode) const; @@ -75,7 +75,7 @@ private: OwnPtr m_highlighter; }; -String highlight_source(URL::URL const& url, URL::URL const& base_url, StringView, Syntax::Language, HighlightOutputMode); +String highlight_source(URL::URL const& url, URL::URL const& base_url, String const& source, Syntax::Language, HighlightOutputMode); constexpr inline StringView HTML_HIGHLIGHTER_STYLE = R"~~~( @media (prefers-color-scheme: dark) {