mirror of
https://gitee.com/openharmony/arkcompiler_runtime_core
synced 2025-02-18 14:20:42 +00:00
![huangyu](/assets/img/avatar_default.png)
Issue: https://gitee.com/openharmony/arkcompiler_runtime_core/issues/I5G96F Test: Test262 suit, ark unittest, rk3568 XTS, ark previewer demo Signed-off-by: huangyu <huangyu76@huawei.com> Change-Id: I3f63d129a07deaa27a390f556dcaa5651c098185
398 lines
12 KiB
C++
398 lines
12 KiB
C++
/**
|
|
* Copyright (c) 2021-2022 Huawei Device Co., Ltd.
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "lexer.h"
|
|
|
|
namespace panda::pandasm {
|
|
|
|
/*-------------------------------*/
|
|
|
|
/* Is this a delimiter ? */
|
|
Token::Type FindDelim(char c)
|
|
{
|
|
/* The map of delimiters */
|
|
static const std::unordered_map<char, Token::Type> DELIM = {{',', Token::Type::DEL_COMMA},
|
|
{':', Token::Type::DEL_COLON},
|
|
{'{', Token::Type::DEL_BRACE_L},
|
|
{'}', Token::Type::DEL_BRACE_R},
|
|
{'(', Token::Type::DEL_BRACKET_L},
|
|
{')', Token::Type::DEL_BRACKET_R},
|
|
{'<', Token::Type::DEL_LT},
|
|
{'>', Token::Type::DEL_GT},
|
|
{'=', Token::Type::DEL_EQ},
|
|
{'[', Token::Type::DEL_SQUARE_BRACKET_L},
|
|
{']', Token::Type::DEL_SQUARE_BRACKET_R}};
|
|
|
|
auto iter = DELIM.find(c);
|
|
|
|
if (iter == DELIM.end()) {
|
|
return Token::Type::ID_BAD;
|
|
}
|
|
|
|
return DELIM.at(c);
|
|
}
|
|
|
|
Token::Type FindOperation(std::string_view s)
|
|
{
|
|
/* Generate the map of OPERATIONS from ISA: */
|
|
static const std::unordered_map<std::string_view, Token::Type> OPERATIONS = {
|
|
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
|
|
#define OPLIST(inst_code, name, optype, width, flags, dst_idx, use_idxs) \
|
|
{std::string_view(name), Token::Type::ID_OP_##inst_code},
|
|
PANDA_INSTRUCTION_LIST(OPLIST)
|
|
#undef OPLIST
|
|
};
|
|
|
|
auto iter = OPERATIONS.find(s);
|
|
|
|
if (iter == OPERATIONS.end()) {
|
|
return Token::Type::ID_BAD;
|
|
}
|
|
|
|
return OPERATIONS.at(s);
|
|
}
|
|
|
|
Token::Type Findkeyword(std::string_view s)
|
|
{
|
|
/* Generate the map of KEYWORDS: */
|
|
static const std::unordered_map<std::string_view, Token::Type> KEYWORDS = {
|
|
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
|
|
#define KEYWORDS(name, inst_code) {std::string_view(name), Token::Type::ID_##inst_code},
|
|
KEYWORDS_LIST(KEYWORDS)
|
|
#undef KEYWORDS
|
|
};
|
|
|
|
auto iter = KEYWORDS.find(s);
|
|
|
|
if (iter == KEYWORDS.end()) {
|
|
return Token::Type::ID_BAD;
|
|
}
|
|
|
|
return KEYWORDS.at(s);
|
|
}
|
|
|
|
std::string_view TokenTypeWhat(Token::Type t)
|
|
{
|
|
if (t >= Token::Type::OPERATION && t < Token::Type::KEYWORD) {
|
|
return "OPERATION";
|
|
}
|
|
|
|
if (t >= Token::Type::KEYWORD) {
|
|
return "KEYWORD";
|
|
}
|
|
|
|
switch (t) {
|
|
case Token::Type::ID_BAD: {
|
|
return "ID_BAD";
|
|
}
|
|
case Token::Type::DEL_COMMA: {
|
|
return "DEL_COMMA";
|
|
}
|
|
case Token::Type::DEL_COLON: {
|
|
return "DEL_COLON";
|
|
}
|
|
case Token::Type::DEL_BRACE_L: {
|
|
return "DEL_BRACE_L";
|
|
}
|
|
case Token::Type::DEL_BRACE_R: {
|
|
return "DEL_BRACE_R";
|
|
}
|
|
case Token::Type::DEL_BRACKET_L: {
|
|
return "DEL_BRACKET_L";
|
|
}
|
|
case Token::Type::DEL_BRACKET_R: {
|
|
return "DEL_BRACKET_R";
|
|
}
|
|
case Token::Type::DEL_SQUARE_BRACKET_L: {
|
|
return "DEL_SQUARE_BRACKET_L";
|
|
}
|
|
case Token::Type::DEL_SQUARE_BRACKET_R: {
|
|
return "DEL_SQUARE_BRACKET_R";
|
|
}
|
|
case Token::Type::DEL_GT: {
|
|
return "DEL_GT";
|
|
}
|
|
case Token::Type::DEL_LT: {
|
|
return "DEL_LT";
|
|
}
|
|
case Token::Type::DEL_EQ: {
|
|
return "DEL_EQ";
|
|
}
|
|
case Token::Type::DEL_DOT: {
|
|
return "DEL_DOT";
|
|
}
|
|
case Token::Type::ID: {
|
|
return "ID";
|
|
}
|
|
case Token::Type::ID_STRING: {
|
|
return "ID_STRING";
|
|
}
|
|
default:
|
|
return "NONE";
|
|
}
|
|
}
|
|
|
|
static bool IsQuote(char c)
|
|
{
|
|
return c == '"';
|
|
}
|
|
|
|
Lexer::Lexer() : curr_line_(nullptr)
|
|
{
|
|
LOG(DEBUG, ASSEMBLER) << "element of class Lexer initialized";
|
|
}
|
|
|
|
Lexer::~Lexer()
|
|
{
|
|
LOG(DEBUG, ASSEMBLER) << "element of class Lexer destructed";
|
|
}
|
|
|
|
Tokens Lexer::TokenizeString(const std::string &source_str)
|
|
{
|
|
LOG(DEBUG, ASSEMBLER) << "started tokenizing of line " << lines_.size() + 1 << ": ";
|
|
|
|
lines_.emplace_back(source_str);
|
|
|
|
curr_line_ = &lines_.back();
|
|
|
|
LOG(DEBUG, ASSEMBLER) << std::string_view(&*(curr_line_->buffer.begin() + curr_line_->pos),
|
|
curr_line_->end - curr_line_->pos);
|
|
|
|
AnalyzeLine();
|
|
|
|
LOG(DEBUG, ASSEMBLER) << "tokenization of line " << lines_.size() << " is successful";
|
|
LOG(DEBUG, ASSEMBLER) << " tokens identified: ";
|
|
|
|
for (const auto &f_i : lines_.back().tokens) {
|
|
LOG(DEBUG, ASSEMBLER) << "\n "
|
|
<< std::string_view(&*(f_i.whole_line.begin() + f_i.bound_left),
|
|
f_i.bound_right - f_i.bound_left)
|
|
<< " (type: " << TokenTypeWhat(f_i.type) << ")";
|
|
|
|
LOG(DEBUG, ASSEMBLER);
|
|
LOG(DEBUG, ASSEMBLER);
|
|
}
|
|
return std::pair<std::vector<Token>, Error>(lines_.back().tokens, err_);
|
|
}
|
|
|
|
/* End of line? */
|
|
bool Lexer::Eol() const
|
|
{
|
|
return curr_line_->pos == curr_line_->end;
|
|
}
|
|
|
|
/* Return the type of token */
|
|
Token::Type Lexer::LexGetType(size_t beg, size_t end) const
|
|
{
|
|
if (FindDelim(curr_line_->buffer[beg]) != Token::Type::ID_BAD) { /* delimiter */
|
|
return FindDelim(curr_line_->buffer[beg]);
|
|
}
|
|
|
|
std::string_view p(&*(curr_line_->buffer.begin() + beg), end - beg);
|
|
|
|
Token::Type type = Findkeyword(p);
|
|
|
|
if (type != Token::Type::ID_BAD) {
|
|
return type;
|
|
}
|
|
|
|
type = FindOperation(p);
|
|
|
|
if (type != Token::Type::ID_BAD) {
|
|
return type;
|
|
}
|
|
|
|
if (IsQuote(curr_line_->buffer[beg])) {
|
|
return Token::Type::ID_STRING;
|
|
}
|
|
|
|
return Token::Type::ID; /* other */
|
|
}
|
|
|
|
/* Handle string literal */
|
|
bool Lexer::LexString()
|
|
{
|
|
bool is_escape_seq = false;
|
|
char quote = curr_line_->buffer[curr_line_->pos];
|
|
size_t begin = curr_line_->pos;
|
|
while (!Eol()) {
|
|
++(curr_line_->pos);
|
|
|
|
char c = curr_line_->buffer[curr_line_->pos];
|
|
|
|
if (is_escape_seq) {
|
|
is_escape_seq = false;
|
|
continue;
|
|
}
|
|
|
|
if (c == '\\') {
|
|
is_escape_seq = true;
|
|
}
|
|
|
|
if (c == quote) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (curr_line_->buffer[curr_line_->pos] != quote) {
|
|
err_ = Error(std::string("Missing terminating ") + quote + " character", 0,
|
|
Error::ErrorType::ERR_STRING_MISSING_TERMINATING_CHARACTER, "", begin, curr_line_->pos,
|
|
curr_line_->buffer);
|
|
return false;
|
|
}
|
|
|
|
++(curr_line_->pos);
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Tokens handling: set a corresponding
|
|
* elements bound_left and bound_right of the array tokens
|
|
* to the first and last characters of a corresponding token.
|
|
*
|
|
* bound_r1 bound_r2 bound_r3
|
|
* | | |
|
|
* v v v
|
|
* token1 token2 token3 ... token1 token2 token3 ...
|
|
* => ^ ^ ^
|
|
* | | |
|
|
* bound1 bound2 bound3 ... bound_l1 bound_l2 bound_l3 ...
|
|
*
|
|
*/
|
|
void Lexer::LexTokens()
|
|
{
|
|
if (Eol()) {
|
|
return;
|
|
}
|
|
|
|
LOG(DEBUG, ASSEMBLER) << "token search started (line " << lines_.size() << "): "
|
|
<< std::string_view(&*(curr_line_->buffer.begin() + curr_line_->pos),
|
|
curr_line_->end - curr_line_->pos);
|
|
|
|
while (curr_line_->end > curr_line_->pos && isspace(curr_line_->buffer[curr_line_->end - 1]) != 0) {
|
|
--(curr_line_->end);
|
|
}
|
|
|
|
while (isspace(curr_line_->buffer[curr_line_->pos]) != 0 && !Eol()) {
|
|
++(curr_line_->pos);
|
|
}
|
|
|
|
size_t bound_right;
|
|
|
|
size_t bound_left;
|
|
|
|
for (int i = 0; !Eol(); ++i) {
|
|
bound_left = curr_line_->pos;
|
|
|
|
if (FindDelim(curr_line_->buffer[curr_line_->pos]) != Token::Type::ID_BAD) {
|
|
++(curr_line_->pos);
|
|
} else if (IsQuote(curr_line_->buffer[curr_line_->pos])) {
|
|
if (!LexString()) {
|
|
return;
|
|
}
|
|
} else {
|
|
while (!Eol() && FindDelim(curr_line_->buffer[curr_line_->pos]) == Token::Type::ID_BAD &&
|
|
isspace(curr_line_->buffer[curr_line_->pos]) == 0) {
|
|
++(curr_line_->pos);
|
|
}
|
|
}
|
|
|
|
bound_right = curr_line_->pos;
|
|
|
|
LOG(DEBUG, ASSEMBLER) << "token identified (line " << lines_.size() << ", "
|
|
<< "token " << curr_line_->tokens.size() + 1 << "): "
|
|
<< std::string_view(&*(curr_line_->buffer.begin() + bound_left), bound_right - bound_left)
|
|
<< " ("
|
|
<< "type: " << TokenTypeWhat(LexGetType(bound_left, bound_right)) << ")";
|
|
|
|
curr_line_->tokens.emplace_back(bound_left, bound_right, LexGetType(bound_left, bound_right),
|
|
curr_line_->buffer);
|
|
|
|
while (isspace(curr_line_->buffer[curr_line_->pos]) != 0 && !Eol()) {
|
|
++(curr_line_->pos);
|
|
}
|
|
}
|
|
|
|
LOG(DEBUG, ASSEMBLER) << "all tokens identified (line " << lines_.size() << ")";
|
|
}
|
|
|
|
/*
|
|
* Ignore comments:
|
|
* find PARSE_COMMENT_MARKER and move line->end
|
|
* to another position (next after the last character of the last
|
|
* significant (this is no a comment) element in a current
|
|
* line: line->buffer).
|
|
*
|
|
* Ex:
|
|
* [Label:] operation operand[,operand] [# comment]
|
|
*
|
|
* L1: mov v0, v1 # moving! L1: mov v0, v1 # moving!
|
|
* ^ => ^
|
|
* | |
|
|
* end end
|
|
*/
|
|
void Lexer::LexPreprocess()
|
|
{
|
|
LOG(DEBUG, ASSEMBLER) << "started removing comments (line " << lines_.size() << "): "
|
|
<< std::string_view(&*(curr_line_->buffer.begin() + curr_line_->pos),
|
|
curr_line_->end - curr_line_->pos);
|
|
|
|
// Searching for comment marker located outside of string literals.
|
|
bool inside_str_lit = curr_line_->buffer.size() > 0 && curr_line_->buffer[0] == '\"';
|
|
size_t cmt_pos = curr_line_->buffer.find_first_of("\"#", 0);
|
|
if (cmt_pos != std::string::npos) {
|
|
do {
|
|
if (cmt_pos != 0 && curr_line_->buffer[cmt_pos - 1] != '\\' && curr_line_->buffer[cmt_pos] == '\"') {
|
|
inside_str_lit = !inside_str_lit;
|
|
} else if (curr_line_->buffer[cmt_pos] == PARSE_COMMENT_MARKER && !inside_str_lit) {
|
|
break;
|
|
}
|
|
} while ((cmt_pos = curr_line_->buffer.find_first_of("\"#", cmt_pos + 1)) != std::string::npos);
|
|
}
|
|
|
|
if (cmt_pos != std::string::npos) {
|
|
curr_line_->end = cmt_pos;
|
|
}
|
|
|
|
while (curr_line_->end > curr_line_->pos && isspace(curr_line_->buffer[curr_line_->end - 1]) != 0) {
|
|
--(curr_line_->end);
|
|
}
|
|
|
|
LOG(DEBUG, ASSEMBLER) << "comments removed (line " << lines_.size() << "): "
|
|
<< std::string_view(&*(curr_line_->buffer.begin() + curr_line_->pos),
|
|
curr_line_->end - curr_line_->pos);
|
|
}
|
|
|
|
void Lexer::SkipSpace()
|
|
{
|
|
while (!Eol() && isspace(curr_line_->buffer[curr_line_->pos]) != 0) {
|
|
++(curr_line_->pos);
|
|
}
|
|
}
|
|
|
|
void Lexer::AnalyzeLine()
|
|
{
|
|
LexPreprocess();
|
|
|
|
SkipSpace();
|
|
|
|
LexTokens();
|
|
}
|
|
|
|
/*-------------------------------*/
|
|
|
|
} // namespace panda::pandasm
|