1
0
Fork 0
mirror of https://github.com/LadybirdBrowser/ladybird.git synced 2025-06-10 01:51:03 +09:00

AK: Construct Strings from StringBuilder without re-allocating the data

Currently, invoking StringBuilder::to_string will re-allocate the string
data to construct the String. This is wasteful both in terms of memory
and speed.

The goal here is to simply hand the string buffer over to String, and
let String take ownership of that buffer. To do this, StringBuilder must
have the same memory layout as Detail::StringData. This layout is just
the members of the StringData class followed by the string itself.

So when a StringBuilder is created, we reserve sizeof(StringData) bytes
at the front of the buffer. StringData can then construct itself into
the buffer with placement new.

Things to note:
* StringData must now be aware of the actual capacity of its buffer, as
  that can be larger than the string size.
* We must take care not to pass ownership of inlined string buffers, as
  these live on the stack.
This commit is contained in:
Timothy Flynn 2024-07-19 15:38:41 -04:00 committed by Andreas Kling
parent 77eef8a8f6
commit 29879a69a4
Notes: github-actions[bot] 2024-07-20 07:31:38 +00:00
9 changed files with 139 additions and 28 deletions

View file

@ -8,6 +8,7 @@
#pragma once #pragma once
#include <AK/Assertions.h> #include <AK/Assertions.h>
#include <AK/Badge.h>
#include <AK/Error.h> #include <AK/Error.h>
#include <AK/Span.h> #include <AK/Span.h>
#include <AK/Types.h> #include <AK/Types.h>
@ -301,6 +302,23 @@ public:
operator ReadonlyBytes() const { return bytes(); } operator ReadonlyBytes() const { return bytes(); }
ALWAYS_INLINE size_t capacity() const { return m_inline ? inline_capacity : m_outline_capacity; } ALWAYS_INLINE size_t capacity() const { return m_inline ? inline_capacity : m_outline_capacity; }
ALWAYS_INLINE bool is_inline() const { return m_inline; }
struct OutlineBuffer {
Bytes buffer;
size_t capacity { 0 };
};
Optional<OutlineBuffer> leak_outline_buffer(Badge<StringBuilder>)
{
if (m_inline)
return {};
auto buffer = bytes();
m_inline = true;
m_size = 0;
return OutlineBuffer { buffer, capacity() };
}
private: private:
void move_from(ByteBuffer&& other) void move_from(ByteBuffer&& other)

View file

@ -16,6 +16,8 @@ namespace AK {
namespace Detail { namespace Detail {
template<size_t inline_capacity> template<size_t inline_capacity>
class ByteBuffer; class ByteBuffer;
class StringData;
} }
enum class TrailingCodePointTransformation : u8; enum class TrailingCodePointTransformation : u8;

View file

@ -96,6 +96,23 @@ ErrorOr<String> String::from_stream(Stream& stream, size_t byte_count)
return result; return result;
} }
ErrorOr<String> String::from_string_builder(Badge<StringBuilder>, StringBuilder& builder)
{
if (!Utf8View { builder.string_view() }.validate())
return Error::from_string_literal("String::from_string_builder: Input was not valid UTF-8");
String result;
result.replace_with_string_builder(builder);
return result;
}
String String::from_string_builder_without_validation(Badge<StringBuilder>, StringBuilder& builder)
{
String result;
result.replace_with_string_builder(builder);
return result;
}
ErrorOr<String> String::repeated(u32 code_point, size_t count) ErrorOr<String> String::repeated(u32 code_point, size_t count)
{ {
VERIFY(is_unicode(code_point)); VERIFY(is_unicode(code_point));

View file

@ -57,6 +57,9 @@ public:
[[nodiscard]] static String from_utf8_without_validation(ReadonlyBytes); [[nodiscard]] static String from_utf8_without_validation(ReadonlyBytes);
static ErrorOr<String> from_string_builder(Badge<StringBuilder>, StringBuilder&);
[[nodiscard]] static String from_string_builder_without_validation(Badge<StringBuilder>, StringBuilder&);
// Creates a new String from a sequence of UTF-16 encoded code points. // Creates a new String from a sequence of UTF-16 encoded code points.
static ErrorOr<String> from_utf16(Utf16View const&); static ErrorOr<String> from_utf16(Utf16View const&);

View file

@ -90,6 +90,19 @@ bool StringBase::operator==(StringBase const& other) const
return bytes() == other.bytes(); return bytes() == other.bytes();
} }
void StringBase::replace_with_string_builder(StringBuilder& builder)
{
if (builder.length() <= MAX_SHORT_STRING_BYTE_COUNT) {
return replace_with_new_short_string(builder.length(), [&](Bytes buffer) {
builder.string_view().bytes().copy_to(buffer);
});
}
destroy_string();
m_data = &StringData::create_from_string_builder(builder).leak_ref();
}
ErrorOr<Bytes> StringBase::replace_with_uninitialized_buffer(size_t byte_count) ErrorOr<Bytes> StringBase::replace_with_uninitialized_buffer(size_t byte_count)
{ {
if (byte_count <= MAX_SHORT_STRING_BYTE_COUNT) if (byte_count <= MAX_SHORT_STRING_BYTE_COUNT)

View file

@ -92,6 +92,8 @@ protected:
callback(buffer); callback(buffer);
} }
void replace_with_string_builder(StringBuilder&);
// This is not a trivial operation with storage, so it does not belong here. Unfortunately, it // This is not a trivial operation with storage, so it does not belong here. Unfortunately, it
// is impossible to implement it without access to StringData. // is impossible to implement it without access to StringData.
ErrorOr<StringBase> substring_from_byte_offset_with_shared_superstring(size_t start, size_t byte_count) const; ErrorOr<StringBase> substring_from_byte_offset_with_shared_superstring(size_t start, size_t byte_count) const;

View file

@ -11,6 +11,7 @@
#include <AK/FlyString.h> #include <AK/FlyString.h>
#include <AK/String.h> #include <AK/String.h>
#include <AK/StringBuilder.h> #include <AK/StringBuilder.h>
#include <AK/StringData.h>
#include <AK/StringView.h> #include <AK/StringView.h>
#include <AK/UnicodeUtils.h> #include <AK/UnicodeUtils.h>
#include <AK/Utf16View.h> #include <AK/Utf16View.h>
@ -18,16 +19,33 @@
namespace AK { namespace AK {
static constexpr auto STRING_BASE_PREFIX_SIZE = sizeof(Detail::StringData);
static ErrorOr<StringBuilder::Buffer> create_buffer(size_t capacity)
{
StringBuilder::Buffer buffer;
if (capacity > StringBuilder::inline_capacity)
TRY(buffer.try_ensure_capacity(STRING_BASE_PREFIX_SIZE + capacity));
TRY(buffer.try_resize(STRING_BASE_PREFIX_SIZE));
return buffer;
}
ErrorOr<StringBuilder> StringBuilder::create(size_t initial_capacity) ErrorOr<StringBuilder> StringBuilder::create(size_t initial_capacity)
{ {
StringBuilder builder; auto buffer = TRY(create_buffer(initial_capacity));
TRY(builder.m_buffer.try_ensure_capacity(initial_capacity)); return StringBuilder { move(buffer) };
return builder;
} }
StringBuilder::StringBuilder(size_t initial_capacity) StringBuilder::StringBuilder(size_t initial_capacity)
: m_buffer(MUST(create_buffer(initial_capacity)))
{
}
StringBuilder::StringBuilder(Buffer buffer)
: m_buffer(move(buffer))
{ {
m_buffer.ensure_capacity(initial_capacity);
} }
inline ErrorOr<void> StringBuilder::will_append(size_t size) inline ErrorOr<void> StringBuilder::will_append(size_t size)
@ -47,12 +65,12 @@ inline ErrorOr<void> StringBuilder::will_append(size_t size)
size_t StringBuilder::length() const size_t StringBuilder::length() const
{ {
return m_buffer.size(); return m_buffer.size() - STRING_BASE_PREFIX_SIZE;
} }
bool StringBuilder::is_empty() const bool StringBuilder::is_empty() const
{ {
return m_buffer.is_empty(); return length() == 0;
} }
void StringBuilder::trim(size_t count) void StringBuilder::trim(size_t count)
@ -122,14 +140,18 @@ ByteString StringBuilder::to_byte_string() const
return ByteString((char const*)data(), length()); return ByteString((char const*)data(), length());
} }
ErrorOr<String> StringBuilder::to_string() const ErrorOr<String> StringBuilder::to_string()
{ {
return String::from_utf8(string_view()); if (m_buffer.is_inline())
return String::from_utf8(string_view());
return String::from_string_builder({}, *this);
} }
String StringBuilder::to_string_without_validation() const String StringBuilder::to_string_without_validation()
{ {
return String::from_utf8_without_validation(string_view().bytes()); if (m_buffer.is_inline())
return String::from_utf8_without_validation(string_view().bytes());
return String::from_string_builder_without_validation({}, *this);
} }
FlyString StringBuilder::to_fly_string_without_validation() const FlyString StringBuilder::to_fly_string_without_validation() const
@ -144,22 +166,22 @@ ErrorOr<FlyString> StringBuilder::to_fly_string() const
u8* StringBuilder::data() u8* StringBuilder::data()
{ {
return m_buffer.data(); return m_buffer.data() + STRING_BASE_PREFIX_SIZE;
} }
u8 const* StringBuilder::data() const u8 const* StringBuilder::data() const
{ {
return m_buffer.data(); return m_buffer.data() + STRING_BASE_PREFIX_SIZE;
} }
StringView StringBuilder::string_view() const StringView StringBuilder::string_view() const
{ {
return StringView { data(), m_buffer.size() }; return m_buffer.span().slice(STRING_BASE_PREFIX_SIZE);
} }
void StringBuilder::clear() void StringBuilder::clear()
{ {
m_buffer.clear(); m_buffer.resize(STRING_BASE_PREFIX_SIZE);
} }
ErrorOr<void> StringBuilder::try_append_code_point(u32 code_point) ErrorOr<void> StringBuilder::try_append_code_point(u32 code_point)
@ -272,4 +294,14 @@ ErrorOr<void> StringBuilder::try_append_escaped_for_json(StringView string)
return {}; return {};
} }
auto StringBuilder::leak_buffer_for_string_construction(Badge<Detail::StringData>) -> Optional<Buffer::OutlineBuffer>
{
if (auto buffer = m_buffer.leak_outline_buffer({}); buffer.has_value()) {
clear();
return buffer;
}
return {};
}
} }

View file

@ -18,6 +18,7 @@ class StringBuilder {
public: public:
static constexpr size_t inline_capacity = 256; static constexpr size_t inline_capacity = 256;
using Buffer = Detail::ByteBuffer<inline_capacity>;
using OutputType = ByteString; using OutputType = ByteString;
static ErrorOr<StringBuilder> create(size_t initial_capacity = inline_capacity); static ErrorOr<StringBuilder> create(size_t initial_capacity = inline_capacity);
@ -61,8 +62,8 @@ public:
[[nodiscard]] ByteString to_byte_string() const; [[nodiscard]] ByteString to_byte_string() const;
[[nodiscard]] String to_string_without_validation() const; [[nodiscard]] String to_string_without_validation();
ErrorOr<String> to_string() const; ErrorOr<String> to_string();
[[nodiscard]] FlyString to_fly_string_without_validation() const; [[nodiscard]] FlyString to_fly_string_without_validation() const;
ErrorOr<FlyString> to_fly_string() const; ErrorOr<FlyString> to_fly_string() const;
@ -95,12 +96,16 @@ public:
return {}; return {};
} }
Optional<Buffer::OutlineBuffer> leak_buffer_for_string_construction(Badge<Detail::StringData>);
private: private:
explicit StringBuilder(Buffer);
ErrorOr<void> will_append(size_t); ErrorOr<void> will_append(size_t);
u8* data(); u8* data();
u8 const* data() const; u8 const* data() const;
Detail::ByteBuffer<inline_capacity> m_buffer; Buffer m_buffer;
}; };
} }

View file

@ -11,6 +11,7 @@
#include <AK/NonnullRefPtr.h> #include <AK/NonnullRefPtr.h>
#include <AK/RefCounted.h> #include <AK/RefCounted.h>
#include <AK/StringBase.h> #include <AK/StringBase.h>
#include <AK/StringBuilder.h>
#include <AK/kmalloc.h> #include <AK/kmalloc.h>
namespace AK::Detail { namespace AK::Detail {
@ -20,25 +21,39 @@ public:
static ErrorOr<NonnullRefPtr<StringData>> create_uninitialized(size_t byte_count, u8*& buffer) static ErrorOr<NonnullRefPtr<StringData>> create_uninitialized(size_t byte_count, u8*& buffer)
{ {
VERIFY(byte_count); VERIFY(byte_count);
void* slot = malloc(allocation_size_for_string_data(byte_count));
if (!slot) { auto capacity = allocation_size_for_string_data(byte_count);
void* slot = malloc(capacity);
if (!slot)
return Error::from_errno(ENOMEM); return Error::from_errno(ENOMEM);
}
auto new_string_data = adopt_ref(*new (slot) StringData(byte_count)); auto new_string_data = adopt_ref(*new (slot) StringData(byte_count, capacity));
buffer = const_cast<u8*>(new_string_data->bytes().data()); buffer = const_cast<u8*>(new_string_data->bytes().data());
return new_string_data; return new_string_data;
} }
static NonnullRefPtr<StringData> create_from_string_builder(StringBuilder& builder)
{
auto byte_count = builder.length();
VERIFY(byte_count > MAX_SHORT_STRING_BYTE_COUNT);
auto buffer = builder.leak_buffer_for_string_construction({});
VERIFY(buffer.has_value()); // We should only arrive here if the buffer is outlined.
return adopt_ref(*new (buffer->buffer.data()) StringData(byte_count, buffer->capacity));
}
static ErrorOr<NonnullRefPtr<StringData>> create_substring(StringData const& superstring, size_t start, size_t byte_count) static ErrorOr<NonnullRefPtr<StringData>> create_substring(StringData const& superstring, size_t start, size_t byte_count)
{ {
// Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization. // Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization.
VERIFY(byte_count > MAX_SHORT_STRING_BYTE_COUNT); VERIFY(byte_count > MAX_SHORT_STRING_BYTE_COUNT);
void* slot = malloc(sizeof(StringData) + sizeof(StringData::SubstringData)); auto capacity = sizeof(StringData) + sizeof(StringData::SubstringData);
if (!slot) { void* slot = malloc(capacity);
if (!slot)
return Error::from_errno(ENOMEM); return Error::from_errno(ENOMEM);
}
return adopt_ref(*new (slot) StringData(superstring, start, byte_count)); return adopt_ref(*new (slot) StringData(superstring, start, byte_count, capacity));
} }
struct SubstringData { struct SubstringData {
@ -48,7 +63,7 @@ public:
void operator delete(void* ptr) void operator delete(void* ptr)
{ {
kfree_sized(ptr, allocation_size_for_string_data(static_cast<StringData const*>(ptr)->m_byte_count)); kfree_sized(ptr, static_cast<StringData const*>(ptr)->m_capacity);
} }
~StringData() ~StringData()
@ -99,13 +114,15 @@ private:
return sizeof(StringData) + (sizeof(char) * length); return sizeof(StringData) + (sizeof(char) * length);
} }
explicit StringData(size_t byte_count) StringData(size_t byte_count, size_t capacity)
: m_byte_count(byte_count) : m_byte_count(byte_count)
, m_capacity(capacity)
{ {
} }
StringData(StringData const& superstring, size_t start, size_t byte_count) StringData(StringData const& superstring, size_t start, size_t byte_count, size_t capacity)
: m_byte_count(byte_count) : m_byte_count(byte_count)
, m_capacity(capacity)
, m_substring(true) , m_substring(true)
{ {
auto& data = const_cast<SubstringData&>(substring_data()); auto& data = const_cast<SubstringData&>(substring_data());
@ -125,6 +142,8 @@ private:
} }
u32 m_byte_count { 0 }; u32 m_byte_count { 0 };
u32 m_capacity { 0 };
mutable unsigned m_hash { 0 }; mutable unsigned m_hash { 0 };
mutable bool m_has_hash { false }; mutable bool m_has_hash { false };
bool m_substring { false }; bool m_substring { false };