mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-06-10 01:51:03 +09:00
LibJS: Handle hex and unicode escape sequences in string literals
Introduces the following syntax: '\x55' '\u26a0' '\u{1f41e}'
This commit is contained in:
parent
b3090678a9
commit
e415dd4e9c
Notes:
sideshowbarker
2024-07-19 06:29:20 +09:00
Author: https://github.com/mattco98
Commit: e415dd4e9c
Pull-request: https://github.com/SerenityOS/serenity/pull/2268
Reviewed-by: https://github.com/awesomekling
5 changed files with 118 additions and 9 deletions
|
@ -405,7 +405,7 @@ NonnullRefPtr<Expression> Parser::parse_primary_expression()
|
|||
case TokenType::BoolLiteral:
|
||||
return create_ast_node<BooleanLiteral>(consume().bool_value());
|
||||
case TokenType::StringLiteral:
|
||||
return create_ast_node<StringLiteral>(consume().string_value());
|
||||
return parse_string_literal(consume());
|
||||
case TokenType::NullLiteral:
|
||||
consume();
|
||||
return create_ast_node<NullLiteral>();
|
||||
|
@ -494,7 +494,7 @@ NonnullRefPtr<ObjectExpression> Parser::parse_object_expression()
|
|||
property_value = create_ast_node<Identifier>(identifier);
|
||||
need_colon = false;
|
||||
} else if (match(TokenType::StringLiteral)) {
|
||||
property_key = create_ast_node<StringLiteral>(consume(TokenType::StringLiteral).string_value());
|
||||
property_key = parse_string_literal(consume());
|
||||
} else if (match(TokenType::NumericLiteral)) {
|
||||
property_key = create_ast_node<StringLiteral>(consume(TokenType::NumericLiteral).value());
|
||||
} else if (match(TokenType::BracketOpen)) {
|
||||
|
@ -559,6 +559,28 @@ NonnullRefPtr<ArrayExpression> Parser::parse_array_expression()
|
|||
return create_ast_node<ArrayExpression>(move(elements));
|
||||
}
|
||||
|
||||
NonnullRefPtr<StringLiteral> Parser::parse_string_literal(Token token)
|
||||
{
|
||||
auto status = Token::StringValueStatus::Ok;
|
||||
auto string = token.string_value(status);
|
||||
if (status != Token::StringValueStatus::Ok) {
|
||||
String message;
|
||||
if (status == Token::StringValueStatus::MalformedHexEscape || status == Token::StringValueStatus::MalformedUnicodeEscape) {
|
||||
auto type = status == Token::StringValueStatus::MalformedUnicodeEscape ? "unicode" : "hexadecimal";
|
||||
message = String::format("Malformed %s escape sequence", type);
|
||||
} else if (status == Token::StringValueStatus::UnicodeEscapeOverflow) {
|
||||
message = "Unicode codepoint must not be greater than 0x10ffff in escape sequence";
|
||||
}
|
||||
|
||||
syntax_error(
|
||||
message,
|
||||
m_parser_state.m_current_token.line_number(),
|
||||
m_parser_state.m_current_token.line_column()
|
||||
);
|
||||
}
|
||||
return create_ast_node<StringLiteral>(string);
|
||||
}
|
||||
|
||||
NonnullRefPtr<TemplateLiteral> Parser::parse_template_literal(bool is_tagged)
|
||||
{
|
||||
consume(TokenType::TemplateLiteralStart);
|
||||
|
@ -579,7 +601,7 @@ NonnullRefPtr<TemplateLiteral> Parser::parse_template_literal(bool is_tagged)
|
|||
while (!match(TokenType::TemplateLiteralEnd) && !match(TokenType::UnterminatedTemplateLiteral)) {
|
||||
if (match(TokenType::TemplateLiteralString)) {
|
||||
auto token = consume();
|
||||
expressions.append(create_ast_node<StringLiteral>(token.string_value()));
|
||||
expressions.append(parse_string_literal(token));
|
||||
if (is_tagged)
|
||||
raw_strings.append(create_ast_node<StringLiteral>(token.value()));
|
||||
} else if (match(TokenType::TemplateLiteralExprStart)) {
|
||||
|
|
|
@ -70,6 +70,7 @@ public:
|
|||
NonnullRefPtr<Expression> parse_unary_prefixed_expression();
|
||||
NonnullRefPtr<ObjectExpression> parse_object_expression();
|
||||
NonnullRefPtr<ArrayExpression> parse_array_expression();
|
||||
NonnullRefPtr<StringLiteral> parse_string_literal(Token token);
|
||||
NonnullRefPtr<TemplateLiteral> parse_template_literal(bool is_tagged);
|
||||
NonnullRefPtr<Expression> parse_secondary_expression(NonnullRefPtr<Expression>, int min_precedence, Associativity associate = Associativity::Right);
|
||||
NonnullRefPtr<CallExpression> parse_call_expression(NonnullRefPtr<Expression>);
|
||||
|
|
17
Libraries/LibJS/Tests/string-escapes.js
Normal file
17
Libraries/LibJS/Tests/string-escapes.js
Normal file
|
@ -0,0 +1,17 @@
|
|||
load("test-common.js")
|
||||
|
||||
try {
|
||||
assert("\x55" === "U");
|
||||
assert("\X55" === "X55");
|
||||
assert(`\x55` === "U");
|
||||
assert(`\X55` === "X55");
|
||||
|
||||
assert("\u26a0" === "⚠");
|
||||
assert(`\u26a0` === "⚠");
|
||||
assert("\u{1f41e}" === "🐞");
|
||||
assert(`\u{1f41e}` === "🐞");
|
||||
|
||||
console.log("PASS");
|
||||
} catch (e) {
|
||||
console.log("FAIL: " + e);
|
||||
}
|
|
@ -27,6 +27,7 @@
|
|||
#include "Token.h"
|
||||
#include <AK/Assertions.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/Utf32View.h>
|
||||
#include <ctype.h>
|
||||
|
||||
namespace JS {
|
||||
|
@ -72,13 +73,26 @@ double Token::double_value() const
|
|||
return strtod(value_string.characters(), nullptr);
|
||||
}
|
||||
|
||||
String Token::string_value() const
|
||||
static u32 hex2int(char x)
|
||||
{
|
||||
ASSERT(isxdigit(x));
|
||||
if (x >= '0' && x <= '9')
|
||||
return x - '0';
|
||||
return 10u + (tolower(x) - 'a');
|
||||
}
|
||||
|
||||
String Token::string_value(StringValueStatus& status) const
|
||||
{
|
||||
ASSERT(type() == TokenType::StringLiteral || type() == TokenType::TemplateLiteralString);
|
||||
auto is_template = type() == TokenType::TemplateLiteralString;
|
||||
|
||||
auto offset = type() == TokenType::TemplateLiteralString ? 0 : 1;
|
||||
|
||||
auto encoding_failure = [&status](StringValueStatus parse_status) -> String {
|
||||
status = parse_status;
|
||||
return {};
|
||||
};
|
||||
|
||||
StringBuilder builder;
|
||||
for (size_t i = offset; i < m_value.length() - offset; ++i) {
|
||||
if (m_value[i] == '\\' && i + 1 < m_value.length() - offset) {
|
||||
|
@ -114,14 +128,62 @@ String Token::string_value() const
|
|||
case '\\':
|
||||
builder.append('\\');
|
||||
break;
|
||||
case 'x': {
|
||||
if (i + 2 >= m_value.length() - offset)
|
||||
return encoding_failure(StringValueStatus::MalformedHexEscape);
|
||||
|
||||
auto digit1 = m_value[++i];
|
||||
auto digit2 = m_value[++i];
|
||||
if (!isxdigit(digit1) || !isxdigit(digit2))
|
||||
return encoding_failure(StringValueStatus::MalformedHexEscape);
|
||||
builder.append(static_cast<char>(hex2int(digit1) * 16 + hex2int(digit2)));
|
||||
break;
|
||||
}
|
||||
case 'u': {
|
||||
if (i + 1 >= m_value.length() - offset)
|
||||
return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
|
||||
u32 code_point = m_value[++i];
|
||||
|
||||
if (code_point == '{') {
|
||||
code_point = 0;
|
||||
do {
|
||||
if (i + 1 >= m_value.length() - offset)
|
||||
return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
|
||||
|
||||
auto ch = m_value[++i];
|
||||
if (!isxdigit(ch))
|
||||
return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
|
||||
|
||||
auto new_code_point = (code_point << 4u) | hex2int(ch);
|
||||
if (new_code_point < code_point)
|
||||
return encoding_failure(StringValueStatus::UnicodeEscapeOverflow);
|
||||
code_point = new_code_point;
|
||||
} while (m_value[i + 1] != '}');
|
||||
++i;
|
||||
} else {
|
||||
if (i + 3 >= m_value.length() - offset || !isxdigit(code_point))
|
||||
return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
|
||||
|
||||
code_point = hex2int(code_point);
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
auto ch = m_value[++i];
|
||||
if (!isxdigit(ch))
|
||||
return encoding_failure(StringValueStatus::MalformedUnicodeEscape);
|
||||
code_point = (code_point << 4u) | hex2int(ch);
|
||||
}
|
||||
}
|
||||
|
||||
builder.append({ &code_point, 1 });
|
||||
break;
|
||||
}
|
||||
default:
|
||||
if (is_template && (m_value[i] == '$' || m_value[i] == '`')) {
|
||||
builder.append(m_value[i]);
|
||||
} else {
|
||||
// FIXME: Also parse octal, hex and unicode sequences
|
||||
// should anything else generate a syntax error?
|
||||
builder.append(m_value[i]);
|
||||
break;
|
||||
}
|
||||
|
||||
// FIXME: Also parse octal. Should anything else generate a syntax error?
|
||||
builder.append(m_value[i]);
|
||||
}
|
||||
} else {
|
||||
builder.append(m_value[i]);
|
||||
|
|
|
@ -172,9 +172,16 @@ public:
|
|||
size_t line_number() const { return m_line_number; }
|
||||
size_t line_column() const { return m_line_column; }
|
||||
double double_value() const;
|
||||
String string_value() const;
|
||||
bool bool_value() const;
|
||||
|
||||
enum class StringValueStatus {
|
||||
Ok,
|
||||
MalformedHexEscape,
|
||||
MalformedUnicodeEscape,
|
||||
UnicodeEscapeOverflow,
|
||||
};
|
||||
String string_value(StringValueStatus& status) const;
|
||||
|
||||
bool is_identifier_name() const;
|
||||
|
||||
private:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue