1
0
Fork 0
mirror of https://github.com/LadybirdBrowser/ladybird.git synced 2025-06-08 13:37:10 +09:00
ladybird/Tests/LibWebView/TestWebViewURL.cpp
Timothy Flynn dbf4b189a4 LibWebView: Do not use AK::format to format search engine URLs
This is to prepare for custom search engines. If we use AK::format, it
would be trivial for a user (or bad actor) to come up with a template
search engine URL that ultimately crashes the browser due to internal
assertions in AK::format. For example:

    https://example.com/crash={1}

Rather than coming up with a complicated pre-format validator, let's
just not use AK::format. Custom URLs will signify their template query
parameters with "%s". So we can do the same with our built-in engines.
When it comes time to format the URL, we will do a simple string
replacement.
2025-04-06 13:45:10 +02:00

200 lines
9.9 KiB
C++

/*
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
* Copyright (c) 2025, Manuel Zahariev <manuel@duck.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibTest/TestCase.h>
#include <LibWebView/SearchEngine.h>
#include <LibWebView/URL.h>
static WebView::SearchEngine s_test_engine {
.name = "Test"_string,
.query_url = "https://ecosia.org/search?q=%s"_string
};
static void compare_url_parts(StringView url, WebView::URLParts const& expected)
{
auto result = WebView::break_url_into_parts(url);
VERIFY(result.has_value());
EXPECT_EQ(result->scheme_and_subdomain, expected.scheme_and_subdomain);
EXPECT_EQ(result->effective_tld_plus_one, expected.effective_tld_plus_one);
EXPECT_EQ(result->remainder, expected.remainder);
}
static bool is_sanitized_url_the_same(StringView url)
{
auto sanitized_url = WebView::sanitize_url(url);
if (!sanitized_url.has_value())
return false;
return sanitized_url->to_string() == url;
}
static void expect_url_equals_sanitized_url(StringView test_url, StringView url, WebView::AppendTLD append_tld = WebView::AppendTLD::No)
{
auto sanitized_url = WebView::sanitize_url(url, s_test_engine, append_tld);
EXPECT(sanitized_url.has_value());
EXPECT_EQ(sanitized_url->to_string(), test_url);
}
static void expect_search_url_equals_sanitized_url(StringView url)
{
auto search_url = s_test_engine.format_search_query_for_navigation(url);
auto sanitized_url = WebView::sanitize_url(url, s_test_engine);
EXPECT(sanitized_url.has_value());
EXPECT_EQ(sanitized_url->to_string(), search_url);
}
TEST_CASE(invalid_url)
{
EXPECT(!WebView::break_url_into_parts(""sv).has_value());
EXPECT(!WebView::break_url_into_parts(":"sv).has_value());
EXPECT(!WebView::break_url_into_parts(":/"sv).has_value());
EXPECT(!WebView::break_url_into_parts("://"sv).has_value());
EXPECT(!WebView::break_url_into_parts("/"sv).has_value());
EXPECT(!WebView::break_url_into_parts("//"sv).has_value());
EXPECT(!WebView::break_url_into_parts("/h"sv).has_value());
EXPECT(!WebView::break_url_into_parts("f"sv).has_value());
EXPECT(!WebView::break_url_into_parts("fi"sv).has_value());
EXPECT(!WebView::break_url_into_parts("fil"sv).has_value());
EXPECT(!WebView::break_url_into_parts("file"sv).has_value());
EXPECT(!WebView::break_url_into_parts("file:"sv).has_value());
EXPECT(!WebView::break_url_into_parts("file:/"sv).has_value());
EXPECT(!WebView::break_url_into_parts("h"sv).has_value());
EXPECT(!WebView::break_url_into_parts("ht"sv).has_value());
EXPECT(!WebView::break_url_into_parts("htt"sv).has_value());
EXPECT(!WebView::break_url_into_parts("http"sv).has_value());
EXPECT(!WebView::break_url_into_parts("http:"sv).has_value());
EXPECT(!WebView::break_url_into_parts("http:/"sv).has_value());
EXPECT(!WebView::break_url_into_parts("http://"sv).has_value());
EXPECT(!WebView::break_url_into_parts("https"sv).has_value());
EXPECT(!WebView::break_url_into_parts("https:"sv).has_value());
EXPECT(!WebView::break_url_into_parts("https:/"sv).has_value());
EXPECT(!WebView::break_url_into_parts("https://"sv).has_value());
EXPECT(!WebView::break_url_into_parts("a"sv).has_value());
EXPECT(!WebView::break_url_into_parts("ab"sv).has_value());
EXPECT(!WebView::break_url_into_parts("abo"sv).has_value());
EXPECT(!WebView::break_url_into_parts("abou"sv).has_value());
EXPECT(!WebView::break_url_into_parts("about"sv).has_value());
EXPECT(!WebView::break_url_into_parts("d"sv).has_value());
EXPECT(!WebView::break_url_into_parts("da"sv).has_value());
EXPECT(!WebView::break_url_into_parts("dat"sv).has_value());
EXPECT(!WebView::break_url_into_parts("data"sv).has_value());
}
TEST_CASE(file_url)
{
compare_url_parts("file://"sv, { "file://"sv, ""sv, {} });
compare_url_parts("file://a"sv, { "file://"sv, "a"sv, {} });
compare_url_parts("file:///a"sv, { "file://"sv, "/a"sv, {} });
compare_url_parts("file:///abc"sv, { "file://"sv, "/abc"sv, {} });
}
TEST_CASE(http_url)
{
compare_url_parts("http://a"sv, { "http://"sv, "a"sv, {} });
compare_url_parts("http://abc"sv, { "http://"sv, "abc"sv, {} });
compare_url_parts("http://com"sv, { "http://"sv, "com"sv, {} });
compare_url_parts("http://abc."sv, { "http://"sv, "abc."sv, {} });
compare_url_parts("http://abc.c"sv, { "http://"sv, "abc.c"sv, {} });
compare_url_parts("http://abc.com"sv, { "http://"sv, "abc.com"sv, {} });
compare_url_parts("http://abc.com."sv, { "http://"sv, "abc.com."sv, {} });
compare_url_parts("http://abc.com."sv, { "http://"sv, "abc.com."sv, {} });
compare_url_parts("http://abc.com.org"sv, { "http://abc."sv, "com.org"sv, {} });
compare_url_parts("http://abc.com.org.gov"sv, { "http://abc.com."sv, "org.gov"sv, {} });
compare_url_parts("http://abc/path"sv, { "http://"sv, "abc"sv, "/path"sv });
compare_url_parts("http://abc#anchor"sv, { "http://"sv, "abc"sv, "#anchor"sv });
compare_url_parts("http://abc?query"sv, { "http://"sv, "abc"sv, "?query"sv });
compare_url_parts("http://abc.def.com"sv, { "http://abc."sv, "def.com"sv, {} });
compare_url_parts("http://abc.def.com/path"sv, { "http://abc."sv, "def.com"sv, "/path"sv });
compare_url_parts("http://abc.def.com#anchor"sv, { "http://abc."sv, "def.com"sv, "#anchor"sv });
compare_url_parts("http://abc.def.com?query"sv, { "http://abc."sv, "def.com"sv, "?query"sv });
}
TEST_CASE(about_url)
{
compare_url_parts("about:"sv, { "about:"sv, {}, {} });
compare_url_parts("about:a"sv, { "about:"sv, "a"sv, {} });
compare_url_parts("about:ab"sv, { "about:"sv, "ab"sv, {} });
compare_url_parts("about:abc"sv, { "about:"sv, "abc"sv, {} });
compare_url_parts("about:abc/def"sv, { "about:"sv, "abc/def"sv, {} });
EXPECT(!is_sanitized_url_the_same("about"sv));
EXPECT(!is_sanitized_url_the_same("about blabla:"sv));
EXPECT(!is_sanitized_url_the_same("blabla about:"sv));
EXPECT(is_sanitized_url_the_same("about:about"sv));
EXPECT(is_sanitized_url_the_same("about:version"sv));
}
TEST_CASE(data_url)
{
compare_url_parts("data:"sv, { "data:"sv, {}, {} });
compare_url_parts("data:a"sv, { "data:"sv, "a"sv, {} });
compare_url_parts("data:ab"sv, { "data:"sv, "ab"sv, {} });
compare_url_parts("data:abc"sv, { "data:"sv, "abc"sv, {} });
compare_url_parts("data:abc/def"sv, { "data:"sv, "abc/def"sv, {} });
EXPECT(is_sanitized_url_the_same("data:text/html"sv));
EXPECT(!is_sanitized_url_the_same("data text/html"sv));
EXPECT(!is_sanitized_url_the_same("text/html data:"sv));
}
TEST_CASE(location_to_search_or_url)
{
expect_search_url_equals_sanitized_url("hello"sv); // Search.
expect_search_url_equals_sanitized_url("hello world"sv);
expect_search_url_equals_sanitized_url("\"example.org\""sv);
expect_search_url_equals_sanitized_url("\"example.org"sv);
expect_search_url_equals_sanitized_url("\"http://example.org\""sv);
expect_search_url_equals_sanitized_url("example.org hello"sv);
expect_search_url_equals_sanitized_url("http://example.org and example sites"sv);
expect_search_url_equals_sanitized_url("ftp://example.org"sv); // ftp:// is not in SUPPORTED_SCHEMES
expect_search_url_equals_sanitized_url("https://exa\"mple.com/what"sv);
// If it can feed create_with_url_or_path -- it is a url.
expect_url_equals_sanitized_url("https://example.com/%20some%20cool%20page"sv, "https://example.com/ some cool page"sv);
expect_url_equals_sanitized_url("https://example.com/some%20cool%20page"sv, "https://example.com/some cool page"sv);
expect_url_equals_sanitized_url("https://example.com/%22what%22"sv, "https://example.com/\"what\""sv);
expect_url_equals_sanitized_url("https://example.org/"sv, "example.org"sv); // Valid domain.
expect_url_equals_sanitized_url("https://example.abc/"sv, "example.abc"sv); // .abc is a recognized TLD.
expect_url_equals_sanitized_url("https://example.test/path"sv, "example.test/path"sv); // Reserved TLDs.
expect_url_equals_sanitized_url("https://example.example/path"sv, "example.example/path"sv);
expect_url_equals_sanitized_url("https://example.invalid/path"sv, "example.invalid/path"sv);
expect_url_equals_sanitized_url("https://example.localhost/path"sv, "example.localhost/path"sv);
expect_search_url_equals_sanitized_url("example.def"sv); // Invalid domain but no scheme: search (Like Firefox or Chrome).
expect_url_equals_sanitized_url("https://example.org/"sv, "https://example.org"sv); // Scheme.
// Respect the user if the url has a valid scheme but not a public suffix (.def is not a recognized TLD).
expect_url_equals_sanitized_url("https://example.def/"sv, "https://example.def"sv);
expect_url_equals_sanitized_url("https://localhost/"sv, "localhost"sv); // Respect localhost.
expect_url_equals_sanitized_url("https://localhost/hello"sv, "localhost/hello"sv);
expect_url_equals_sanitized_url("https://localhost/hello.world"sv, "localhost/hello.world"sv);
expect_url_equals_sanitized_url("https://localhost/hello.world?query=123"sv, "localhost/hello.world?query=123"sv);
expect_url_equals_sanitized_url("https://example.com/"sv, "example"sv, WebView::AppendTLD::Yes); // User holds down the Ctrl key.
expect_url_equals_sanitized_url("https://example.def.com/"sv, "example.def"sv, WebView::AppendTLD::Yes);
expect_url_equals_sanitized_url("https://com.com/"sv, "com"sv, WebView::AppendTLD::Yes);
expect_url_equals_sanitized_url("https://example.com/index.html"sv, "example/index.html"sv, WebView::AppendTLD::Yes);
expect_search_url_equals_sanitized_url("whatever:example.com"sv); // Invalid scheme.
expect_search_url_equals_sanitized_url("mailto:hello@example.com"sv); // For now, unsupported scheme.
// FIXME: Add support for opening mailto: scheme (below). Firefox opens mailto: locations
// expect_url_equals_sanitized_url("mailto:hello@example.com"sv, "mailto:hello@example.com"sv);
}