1
0
Fork 0
mirror of https://github.com/LadybirdBrowser/ladybird.git synced 2025-06-10 01:51:03 +09:00

LibJS: Handle hex and unicode escape sequences in string literals

Introduces the following syntax:

'\x55'
'\u26a0'
'\u{1f41e}'
This commit is contained in:
Matthew Olsson 2020-05-16 23:27:25 -07:00 committed by Andreas Kling
parent b3090678a9
commit e415dd4e9c
Notes: sideshowbarker 2024-07-19 06:29:20 +09:00
5 changed files with 118 additions and 9 deletions

View file

@ -405,7 +405,7 @@ NonnullRefPtr<Expression> Parser::parse_primary_expression()
case TokenType::BoolLiteral:
return create_ast_node<BooleanLiteral>(consume().bool_value());
case TokenType::StringLiteral:
return create_ast_node<StringLiteral>(consume().string_value());
return parse_string_literal(consume());
case TokenType::NullLiteral:
consume();
return create_ast_node<NullLiteral>();
@ -494,7 +494,7 @@ NonnullRefPtr<ObjectExpression> Parser::parse_object_expression()
property_value = create_ast_node<Identifier>(identifier);
need_colon = false;
} else if (match(TokenType::StringLiteral)) {
property_key = create_ast_node<StringLiteral>(consume(TokenType::StringLiteral).string_value());
property_key = parse_string_literal(consume());
} else if (match(TokenType::NumericLiteral)) {
property_key = create_ast_node<StringLiteral>(consume(TokenType::NumericLiteral).value());
} else if (match(TokenType::BracketOpen)) {
@ -559,6 +559,28 @@ NonnullRefPtr<ArrayExpression> Parser::parse_array_expression()
return create_ast_node<ArrayExpression>(move(elements));
}
NonnullRefPtr<StringLiteral> Parser::parse_string_literal(Token token)
{
auto status = Token::StringValueStatus::Ok;
auto string = token.string_value(status);
if (status != Token::StringValueStatus::Ok) {
String message;
if (status == Token::StringValueStatus::MalformedHexEscape || status == Token::StringValueStatus::MalformedUnicodeEscape) {
auto type = status == Token::StringValueStatus::MalformedUnicodeEscape ? "unicode" : "hexadecimal";
message = String::format("Malformed %s escape sequence", type);
} else if (status == Token::StringValueStatus::UnicodeEscapeOverflow) {
message = "Unicode codepoint must not be greater than 0x10ffff in escape sequence";
}
syntax_error(
message,
m_parser_state.m_current_token.line_number(),
m_parser_state.m_current_token.line_column()
);
}
return create_ast_node<StringLiteral>(string);
}
NonnullRefPtr<TemplateLiteral> Parser::parse_template_literal(bool is_tagged)
{
consume(TokenType::TemplateLiteralStart);
@ -579,7 +601,7 @@ NonnullRefPtr<TemplateLiteral> Parser::parse_template_literal(bool is_tagged)
while (!match(TokenType::TemplateLiteralEnd) && !match(TokenType::UnterminatedTemplateLiteral)) {
if (match(TokenType::TemplateLiteralString)) {
auto token = consume();
expressions.append(create_ast_node<StringLiteral>(token.string_value()));
expressions.append(parse_string_literal(token));
if (is_tagged)
raw_strings.append(create_ast_node<StringLiteral>(token.value()));
} else if (match(TokenType::TemplateLiteralExprStart)) {

View file

@ -70,6 +70,7 @@ public:
NonnullRefPtr<Expression> parse_unary_prefixed_expression();
NonnullRefPtr<ObjectExpression> parse_object_expression();
NonnullRefPtr<ArrayExpression> parse_array_expression();
NonnullRefPtr<StringLiteral> parse_string_literal(Token token);
NonnullRefPtr<TemplateLiteral> parse_template_literal(bool is_tagged);
NonnullRefPtr<Expression> parse_secondary_expression(NonnullRefPtr<Expression>, int min_precedence, Associativity associate = Associativity::Right);
NonnullRefPtr<CallExpression> parse_call_expression(NonnullRefPtr<Expression>);

View file

@ -0,0 +1,17 @@
load("test-common.js")
try {
assert("\x55" === "U");
assert("\X55" === "X55");
assert(`\x55` === "U");
assert(`\X55` === "X55");
assert("\u26a0" === "⚠");
assert(`\u26a0` === "⚠");
assert("\u{1f41e}" === "🐞");
assert(`\u{1f41e}` === "🐞");
console.log("PASS");
} catch (e) {
console.log("FAIL: " + e);
}

View file

@ -27,6 +27,7 @@
#include "Token.h"
#include <AK/Assertions.h>
#include <AK/StringBuilder.h>
#include <AK/Utf32View.h>
#include <ctype.h>
namespace JS {
@ -72,13 +73,26 @@ double Token::double_value() const
return strtod(value_string.characters(), nullptr);
}
String Token::string_value() const
static u32 hex2int(char x)
{
ASSERT(isxdigit(x));
if (x >= '0' && x <= '9')
return x - '0';
return 10u + (tolower(x) - 'a');
}
String Token::string_value(StringValueStatus& status) const
{
ASSERT(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteralString);
auto is_template = type() == TokenType::TemplateLiteralString;
auto offset = type() == TokenType::TemplateLiteralString ? 0 : 1;
auto encoding_failure = [&status](StringValueStatus parse_status) -> String {
status = parse_status;
return {};
};
StringBuilder builder;
for (size_t i = offset; i < m_value.length() - offset; ++i) {
if (m_value[i] == '\\' && i + 1 < m_value.length() - offset) {
@ -114,14 +128,62 @@ String Token::string_value() const
case '\\':
builder.append('\\');
break;
case 'x': {
if (i + 2 >= m_value.length() - offset)
return encoding_failure(StringValueStatus::MalformedHexEscape);
auto digit1 = m_value[++i];
auto digit2 = m_value[++i];
if (!isxdigit(digit1) || !isxdigit(digit2))
return encoding_failure(StringValueStatus::MalformedHexEscape);
builder.append(static_cast<char>(hex2int(digit1) * 16 + hex2int(digit2)));
break;
}
case 'u': {
if (i + 1 >= m_value.length() - offset)
return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
u32 code_point = m_value[++i];
if (code_point == '{') {
code_point = 0;
do {
if (i + 1 >= m_value.length() - offset)
return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
auto ch = m_value[++i];
if (!isxdigit(ch))
return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
auto new_code_point = (code_point << 4u) | hex2int(ch);
if (new_code_point < code_point)
return encoding_failure(StringValueStatus::UnicodeEscapeOverflow);
code_point = new_code_point;
} while (m_value[i + 1] != '}');
++i;
} else {
if (i + 3 >= m_value.length() - offset || !isxdigit(code_point))
return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
code_point = hex2int(code_point);
for (int j = 0; j < 3; ++j) {
auto ch = m_value[++i];
if (!isxdigit(ch))
return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
code_point = (code_point << 4u) | hex2int(ch);
}
}
builder.append({ &code_point, 1 });
break;
}
default:
if (is_template && (m_value[i] == '$' || m_value[i] == '`')) {
builder.append(m_value[i]);
} else {
// FIXME: Also parse octal, hex and unicode sequences
// should anything else generate a syntax error?
builder.append(m_value[i]);
break;
}
// FIXME: Also parse octal. Should anything else generate a syntax error?
builder.append(m_value[i]);
}
} else {
builder.append(m_value[i]);

View file

@ -172,9 +172,16 @@ public:
size_t line_number() const { return m_line_number; }
size_t line_column() const { return m_line_column; }
double double_value() const;
String string_value() const;
bool bool_value() const;
enum class StringValueStatus {
Ok,
MalformedHexEscape,
MalformedUnicodeEscape,
UnicodeEscapeOverflow,
};
String string_value(StringValueStatus& status) const;
bool is_identifier_name() const;
private: