mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-14 11:39:35 +00:00
2a77441117
This is UB by the standard, and caught by the libstdc++ asserts Differential Revision: https://reviews.llvm.org/D87892
337 lines
9.2 KiB
C++
337 lines
9.2 KiB
C++
//===-- lib/Parser/token-sequence.cpp -------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "token-sequence.h"
|
|
#include "flang/Parser/characters.h"
|
|
#include "flang/Parser/message.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
namespace Fortran::parser {
|
|
|
|
TokenSequence &TokenSequence::operator=(TokenSequence &&that) {
|
|
clear();
|
|
swap(that);
|
|
return *this;
|
|
}
|
|
|
|
void TokenSequence::clear() {
|
|
start_.clear();
|
|
nextStart_ = 0;
|
|
char_.clear();
|
|
provenances_.clear();
|
|
}
|
|
|
|
void TokenSequence::pop_back() {
|
|
std::size_t bytes{nextStart_ - start_.back()};
|
|
nextStart_ = start_.back();
|
|
start_.pop_back();
|
|
char_.resize(nextStart_);
|
|
provenances_.RemoveLastBytes(bytes);
|
|
}
|
|
|
|
void TokenSequence::shrink_to_fit() {
|
|
start_.shrink_to_fit();
|
|
char_.shrink_to_fit();
|
|
provenances_.shrink_to_fit();
|
|
}
|
|
|
|
void TokenSequence::swap(TokenSequence &that) {
|
|
start_.swap(that.start_);
|
|
std::swap(nextStart_, that.nextStart_);
|
|
char_.swap(that.char_);
|
|
provenances_.swap(that.provenances_);
|
|
}
|
|
|
|
std::size_t TokenSequence::SkipBlanks(std::size_t at) const {
|
|
std::size_t tokens{start_.size()};
|
|
for (; at < tokens; ++at) {
|
|
if (!TokenAt(at).IsBlank()) {
|
|
return at;
|
|
}
|
|
}
|
|
return tokens; // even if at > tokens
|
|
}
|
|
|
|
// C-style /*comments*/ are removed from preprocessing directive
|
|
// token sequences by the prescanner, but not C++ or Fortran
|
|
// free-form line-ending comments (//... and !...) because
|
|
// ignoring them is directive-specific.
|
|
bool TokenSequence::IsAnythingLeft(std::size_t at) const {
|
|
std::size_t tokens{start_.size()};
|
|
for (; at < tokens; ++at) {
|
|
auto tok{TokenAt(at)};
|
|
const char *end{tok.end()};
|
|
for (const char *p{tok.begin()}; p < end; ++p) {
|
|
switch (*p) {
|
|
case '/':
|
|
return p + 1 >= end || p[1] != '/';
|
|
case '!':
|
|
return false;
|
|
case ' ':
|
|
break;
|
|
default:
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void TokenSequence::RemoveLastToken() {
|
|
CHECK(!start_.empty());
|
|
CHECK(nextStart_ > start_.back());
|
|
std::size_t bytes{nextStart_ - start_.back()};
|
|
nextStart_ = start_.back();
|
|
start_.pop_back();
|
|
char_.erase(char_.begin() + nextStart_, char_.end());
|
|
provenances_.RemoveLastBytes(bytes);
|
|
}
|
|
|
|
void TokenSequence::Put(const TokenSequence &that) {
|
|
if (nextStart_ < char_.size()) {
|
|
start_.push_back(nextStart_);
|
|
}
|
|
int offset = char_.size();
|
|
for (int st : that.start_) {
|
|
start_.push_back(st + offset);
|
|
}
|
|
char_.insert(char_.end(), that.char_.begin(), that.char_.end());
|
|
nextStart_ = char_.size();
|
|
provenances_.Put(that.provenances_);
|
|
}
|
|
|
|
void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) {
|
|
std::size_t offset{0};
|
|
std::size_t tokens{that.SizeInTokens()};
|
|
for (std::size_t j{0}; j < tokens; ++j) {
|
|
CharBlock tok{that.TokenAt(j)};
|
|
Put(tok, range.OffsetMember(offset));
|
|
offset += tok.size();
|
|
}
|
|
CHECK(offset == range.size());
|
|
}
|
|
|
|
void TokenSequence::Put(
|
|
const TokenSequence &that, std::size_t at, std::size_t tokens) {
|
|
ProvenanceRange provenance;
|
|
std::size_t offset{0};
|
|
for (; tokens-- > 0; ++at) {
|
|
CharBlock tok{that.TokenAt(at)};
|
|
std::size_t tokBytes{tok.size()};
|
|
for (std::size_t j{0}; j < tokBytes; ++j) {
|
|
if (offset == provenance.size()) {
|
|
provenance = that.provenances_.Map(that.start_[at] + j);
|
|
offset = 0;
|
|
}
|
|
PutNextTokenChar(tok[j], provenance.OffsetMember(offset++));
|
|
}
|
|
CloseToken();
|
|
}
|
|
}
|
|
|
|
void TokenSequence::Put(
|
|
const char *s, std::size_t bytes, Provenance provenance) {
|
|
for (std::size_t j{0}; j < bytes; ++j) {
|
|
PutNextTokenChar(s[j], provenance + j);
|
|
}
|
|
CloseToken();
|
|
}
|
|
|
|
void TokenSequence::Put(const CharBlock &t, Provenance provenance) {
|
|
Put(&t[0], t.size(), provenance);
|
|
}
|
|
|
|
void TokenSequence::Put(const std::string &s, Provenance provenance) {
|
|
Put(s.data(), s.size(), provenance);
|
|
}
|
|
|
|
void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) {
|
|
Put(ss.str(), provenance);
|
|
}
|
|
|
|
TokenSequence &TokenSequence::ToLowerCase() {
|
|
std::size_t tokens{start_.size()};
|
|
std::size_t chars{char_.size()};
|
|
std::size_t atToken{0};
|
|
for (std::size_t j{0}; j < chars;) {
|
|
std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
|
|
char *p{&char_[j]};
|
|
char const *limit{char_.data() + nextStart};
|
|
j = nextStart;
|
|
if (IsDecimalDigit(*p)) {
|
|
while (p < limit && IsDecimalDigit(*p)) {
|
|
++p;
|
|
}
|
|
if (p >= limit) {
|
|
} else if (*p == 'h' || *p == 'H') {
|
|
// Hollerith
|
|
*p = 'h';
|
|
} else if (*p == '_') {
|
|
// kind-prefixed character literal (e.g., 1_"ABC")
|
|
} else {
|
|
// exponent
|
|
for (; p < limit; ++p) {
|
|
*p = ToLowerCaseLetter(*p);
|
|
}
|
|
}
|
|
} else if (limit[-1] == '\'' || limit[-1] == '"') {
|
|
if (*p == limit[-1]) {
|
|
// Character literal without prefix
|
|
} else if (p[1] == limit[-1]) {
|
|
// BOZX-prefixed constant
|
|
for (; p < limit; ++p) {
|
|
*p = ToLowerCaseLetter(*p);
|
|
}
|
|
} else {
|
|
// Literal with kind-param prefix name (e.g., K_"ABC").
|
|
for (; *p != limit[-1]; ++p) {
|
|
*p = ToLowerCaseLetter(*p);
|
|
}
|
|
}
|
|
} else {
|
|
for (; p < limit; ++p) {
|
|
*p = ToLowerCaseLetter(*p);
|
|
}
|
|
}
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
bool TokenSequence::HasBlanks(std::size_t firstChar) const {
|
|
std::size_t tokens{SizeInTokens()};
|
|
for (std::size_t j{0}; j < tokens; ++j) {
|
|
if (start_[j] >= firstChar && TokenAt(j).IsBlank()) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const {
|
|
std::size_t tokens{SizeInTokens()};
|
|
bool lastWasBlank{false};
|
|
for (std::size_t j{0}; j < tokens; ++j) {
|
|
bool isBlank{TokenAt(j).IsBlank()};
|
|
if (isBlank && lastWasBlank && start_[j] >= firstChar) {
|
|
return true;
|
|
}
|
|
lastWasBlank = isBlank;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) {
|
|
std::size_t tokens{SizeInTokens()};
|
|
TokenSequence result;
|
|
for (std::size_t j{0}; j < tokens; ++j) {
|
|
if (!TokenAt(j).IsBlank() || start_[j] < firstChar) {
|
|
result.Put(*this, j);
|
|
}
|
|
}
|
|
swap(result);
|
|
return *this;
|
|
}
|
|
|
|
TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) {
|
|
std::size_t tokens{SizeInTokens()};
|
|
TokenSequence result;
|
|
bool lastWasBlank{false};
|
|
for (std::size_t j{0}; j < tokens; ++j) {
|
|
bool isBlank{TokenAt(j).IsBlank()};
|
|
if (!isBlank || !lastWasBlank || start_[j] < firstChar) {
|
|
result.Put(*this, j);
|
|
}
|
|
lastWasBlank = isBlank;
|
|
}
|
|
swap(result);
|
|
return *this;
|
|
}
|
|
|
|
TokenSequence &TokenSequence::ClipComment(bool skipFirst) {
|
|
std::size_t tokens{SizeInTokens()};
|
|
for (std::size_t j{0}; j < tokens; ++j) {
|
|
if (TokenAt(j).FirstNonBlank() == '!') {
|
|
if (skipFirst) {
|
|
skipFirst = false;
|
|
} else {
|
|
TokenSequence result;
|
|
if (j > 0) {
|
|
result.Put(*this, 0, j - 1);
|
|
}
|
|
swap(result);
|
|
return *this;
|
|
}
|
|
}
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
void TokenSequence::Emit(CookedSource &cooked) const {
|
|
cooked.Put(&char_[0], char_.size());
|
|
cooked.PutProvenanceMappings(provenances_);
|
|
}
|
|
|
|
void TokenSequence::Dump(llvm::raw_ostream &o) const {
|
|
o << "TokenSequence has " << char_.size() << " chars; nextStart_ "
|
|
<< nextStart_ << '\n';
|
|
for (std::size_t j{0}; j < start_.size(); ++j) {
|
|
o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString()
|
|
<< "'\n";
|
|
}
|
|
}
|
|
|
|
Provenance TokenSequence::GetTokenProvenance(
|
|
std::size_t token, std::size_t offset) const {
|
|
ProvenanceRange range{provenances_.Map(start_[token] + offset)};
|
|
return range.start();
|
|
}
|
|
|
|
ProvenanceRange TokenSequence::GetTokenProvenanceRange(
|
|
std::size_t token, std::size_t offset) const {
|
|
ProvenanceRange range{provenances_.Map(start_[token] + offset)};
|
|
return range.Prefix(TokenBytes(token) - offset);
|
|
}
|
|
|
|
ProvenanceRange TokenSequence::GetIntervalProvenanceRange(
|
|
std::size_t token, std::size_t tokens) const {
|
|
if (tokens == 0) {
|
|
return {};
|
|
}
|
|
ProvenanceRange range{provenances_.Map(start_[token])};
|
|
while (--tokens > 0 &&
|
|
range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) {
|
|
}
|
|
return range;
|
|
}
|
|
|
|
ProvenanceRange TokenSequence::GetProvenanceRange() const {
|
|
return GetIntervalProvenanceRange(0, start_.size());
|
|
}
|
|
|
|
const TokenSequence &TokenSequence::CheckBadFortranCharacters(
|
|
Messages &messages) const {
|
|
std::size_t tokens{SizeInTokens()};
|
|
for (std::size_t j{0}; j < tokens; ++j) {
|
|
CharBlock token{TokenAt(j)};
|
|
char ch{token.FirstNonBlank()};
|
|
if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) {
|
|
if (ch == '!' && j == 0) {
|
|
// allow in !dir$
|
|
} else if (ch < ' ' || ch >= '\x7f') {
|
|
messages.Say(GetTokenProvenanceRange(j),
|
|
"bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff);
|
|
} else {
|
|
messages.Say(GetTokenProvenanceRange(j),
|
|
"bad character ('%c') in Fortran token"_err_en_US, ch);
|
|
}
|
|
}
|
|
}
|
|
return *this;
|
|
}
|
|
} // namespace Fortran::parser
|