mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-05-14 01:46:41 +00:00

The MCInstDesc table changed. Bsides this only minor changes were done and some additional code is emitted now for LLVM. This commit is the combination of all previous Auto-Sync commits. The list of commit messages follows: ----------- Combination of all commits of the refactored tablegen backends. These are the changes made for LLVM 16. Refactor Capstone relevant TableGen Emitter backends. This commit extracts the code which emits generated tables into two printer classes. The Printer is called whenever actual code is written to a file. There is the PrinterLLVM which emits tht code as before and PrinterCapstone which is tailored to or needs (emitting C and generates more info). Additionally missing memory access properties were added to ARMs td files. Emit a single header for all files. Captialize Target name for enums. Add lay metric to emit enum value for Banked and system regs. Malloc substr Sort instructions in ascending order. Free substr after use Add vanished constrainsts Fix `regInfoEmitEnums()` and indent Fix `GenDisassemblerTables.inc#checkDecoderPredicate()` Fix `TriCoreGenRegisterInfo.inc` | `PrinterCapstone::regInfoEmitRegClasses` revert changes to NEON instructions Add instructions with duplicate operands as Matchables. Add memory load and store info Correct memory access and out operand info Set register lists again as read ops due to https://github.com/llvm/llvm-project/issues/62455 Make printAliasInstr and getMnemonic static. Generate CS instruction enums from actual mnemonic. Not via the flawed AsmMatcher. Fix typo in InstrInfoEmitter.cpp Add deprecated QPX feature Replace + and - with p and m Add AssemblerPredicates to PPC Generate RegEncodingTable Define functions which are called by the Mapper as static. Necessary because these functions are present in each arch' Remove set_mem_access(). The cases where this is used to mark access to actual memory operands are either very rare, or those are neon lane indicies. Generate correct op type for absolute addresses. Check for RegisterPointer operands first to prevent mis-categorization. Add missing Operand types Generate Instruction formats for PPC. Add Paired Single instructions. Partly revert 94e41ce23a7fd863a96288ec05b6c7202c3cfbf1 (introduces accidentially removed code.) Set correct operand types for PS operands Add memory read/write attributes Add missing operand types Add mayLoad and mayStore information. Add documentation. Handle special AArch64 operand Replace C++ with C code. Check for duplicate enum instr. names Check for duplicate defintions of system registers. Add note about missing target names. Resolve templates in a single static method and add docs about it. Revert printing target name in upper case. Revert partially C++ syntax fixes in .td files. They break the TemplateCOllector since it searches for exactly those references but can't find any' Add all SubtargetFeatures to feature enum. Not just the one used by CGIs. Pass Decoder Enable to check specific table fields to determine if reg enum must be emitted. Allow to add namespace to type name/ Formatting Rework emitting of tables. The system operands are now emitted in reg, imm and aliass groups. Also a bug was fixed which emitted incorrect code.. Check for rename IMPLICIT_IMM operand types Pass DecodeComplete as pointer not as reference Print undef when it needs to be printed. Add namespace ids to all types and functions. Rework C translation. Pass MCOp as pointer not as ref Add missing SysImm type Fix syntax mistakes Generate additonal sys immediates and op groups. Handle edge case for printSVERegOp Handle default arguments of template functions. Add two missing op groups Generate a static RecEncodingTable Set enum values to encodings of the sys ops Generate a single Enum value file for system operands. Replace System operand groups with their operand types Fix missing braces warning Emit MCOperand validator. Emit lookupByName functions for sys operands Add namespaces for ARM. Check for Target if default arguments of template functions are resolved. auto-sync opcode & operand encoding info generation (#14) * Added operand and opcode info generation * Wrapped deprecated macro under an intellisense check Basically intellisense fails, causing multiple errors in other files, so when intellisense parses the code it will use the different version of the macro * Fixed a small bug Used double braces to prevent an old bug Removed extra new line and fixed a bug regarding move semantics
180 lines
6.4 KiB
C++
180 lines
6.4 KiB
C++
//===- StringMatcher.cpp - Generate a matcher for input strings -----------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements the StringMatcher class.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "Printer.h"
|
|
#include "llvm/TableGen/StringMatcher.h"
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include "llvm/TableGen/Error.h"
|
|
#include <cassert>
|
|
#include <map>
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
using namespace llvm;
|
|
|
|
/// FindFirstNonCommonLetter - Find the first character in the keys of the
|
|
/// string pairs that is not shared across the whole set of strings. All
|
|
/// strings are assumed to have the same length.
|
|
static unsigned
|
|
FindFirstNonCommonLetter(const std::vector<const
|
|
StringMatcher::StringPair*> &Matches) {
|
|
assert(!Matches.empty());
|
|
for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) {
|
|
// Check to see if letter i is the same across the set.
|
|
char Letter = Matches[0]->first[i];
|
|
|
|
for (const StringMatcher::StringPair *Match : Matches)
|
|
if (Match->first[i] != Letter)
|
|
return i;
|
|
}
|
|
|
|
return Matches[0]->first.size();
|
|
}
|
|
|
|
/// EmitStringMatcherForChar - Given a set of strings that are known to be the
|
|
/// same length and whose characters leading up to CharNo are the same, emit
|
|
/// code to verify that CharNo and later are the same.
|
|
///
|
|
/// \return - True if control can leave the emitted code fragment.
|
|
bool StringMatcher::EmitStringMatcherForChar(
|
|
const std::vector<const StringPair *> &Matches, unsigned CharNo,
|
|
unsigned IndentCount, bool IgnoreDuplicates) const {
|
|
switch(PrinterLLVM::getLanguage()) {
|
|
default:
|
|
PrintFatalNote("Printer language not known to StringMatcher.");
|
|
case PRINTER_LANG_CPP:
|
|
return EmitStringMatcherForCharCPP(Matches, CharNo, IndentCount, IgnoreDuplicates);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool StringMatcher::EmitStringMatcherForCharCPP(
|
|
const std::vector<const StringPair *> &Matches, unsigned CharNo,
|
|
unsigned IndentCount, bool IgnoreDuplicates) const {
|
|
assert(!Matches.empty() && "Must have at least one string to match!");
|
|
std::string Indent(IndentCount * 2 + 4, ' ');
|
|
|
|
// If we have verified that the entire string matches, we're done: output the
|
|
// matching code.
|
|
if (CharNo == Matches[0]->first.size()) {
|
|
if (Matches.size() > 1 && !IgnoreDuplicates)
|
|
report_fatal_error("Had duplicate keys to match on");
|
|
|
|
// If the to-execute code has \n's in it, indent each subsequent line.
|
|
StringRef Code = Matches[0]->second;
|
|
|
|
std::pair<StringRef, StringRef> Split = Code.split('\n');
|
|
OS << Indent << Split.first << "\t // \"" << Matches[0]->first << "\"\n";
|
|
|
|
Code = Split.second;
|
|
while (!Code.empty()) {
|
|
Split = Code.split('\n');
|
|
OS << Indent << Split.first << "\n";
|
|
Code = Split.second;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Bucket the matches by the character we are comparing.
|
|
std::map<char, std::vector<const StringPair*>> MatchesByLetter;
|
|
|
|
for (const StringPair *Match : Matches)
|
|
MatchesByLetter[Match->first[CharNo]].push_back(Match);
|
|
|
|
// If we have exactly one bucket to match, see how many characters are common
|
|
// across the whole set and match all of them at once.
|
|
if (MatchesByLetter.size() == 1) {
|
|
unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches);
|
|
unsigned NumChars = FirstNonCommonLetter-CharNo;
|
|
|
|
// Emit code to break out if the prefix doesn't match.
|
|
if (NumChars == 1) {
|
|
// Do the comparison with if (Str[1] != 'f')
|
|
// FIXME: Need to escape general characters.
|
|
OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] != '"
|
|
<< Matches[0]->first[CharNo] << "')\n";
|
|
OS << Indent << " break;\n";
|
|
} else {
|
|
// Do the comparison with if memcmp(Str.data()+1, "foo", 3).
|
|
// FIXME: Need to escape general strings.
|
|
OS << Indent << "if (memcmp(" << StrVariableName << ".data()+" << CharNo
|
|
<< ", \"" << Matches[0]->first.substr(CharNo, NumChars) << "\", "
|
|
<< NumChars << ") != 0)\n";
|
|
OS << Indent << " break;\n";
|
|
}
|
|
|
|
return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount,
|
|
IgnoreDuplicates);
|
|
}
|
|
|
|
// Otherwise, we have multiple possible things, emit a switch on the
|
|
// character.
|
|
OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n";
|
|
OS << Indent << "default: break;\n";
|
|
|
|
for (const auto &LI : MatchesByLetter) {
|
|
// TODO: escape hard stuff (like \n) if we ever care about it.
|
|
OS << Indent << "case '" << LI.first << "':\t // " << LI.second.size()
|
|
<< " string";
|
|
if (LI.second.size() != 1)
|
|
OS << 's';
|
|
OS << " to match.\n";
|
|
if (EmitStringMatcherForChar(LI.second, CharNo + 1, IndentCount + 1,
|
|
IgnoreDuplicates))
|
|
OS << Indent << " break;\n";
|
|
}
|
|
|
|
OS << Indent << "}\n";
|
|
return true;
|
|
}
|
|
|
|
/// Emit - Top level entry point.
|
|
///
|
|
void StringMatcher::Emit(unsigned Indent, bool IgnoreDuplicates) const {
|
|
switch(PrinterLLVM::getLanguage()) {
|
|
default:
|
|
PrintFatalNote("Printer language not known to StringMatcher.");
|
|
case PRINTER_LANG_CPP:
|
|
EmitCPP(Indent, IgnoreDuplicates);
|
|
break;
|
|
}
|
|
}
|
|
|
|
void StringMatcher::EmitCPP(unsigned Indent, bool IgnoreDuplicates) const {
|
|
// If nothing to match, just fall through.
|
|
if (Matches.empty()) return;
|
|
|
|
// First level categorization: group strings by length.
|
|
std::map<unsigned, std::vector<const StringPair*>> MatchesByLength;
|
|
|
|
for (const StringPair &Match : Matches)
|
|
MatchesByLength[Match.first.size()].push_back(&Match);
|
|
|
|
// Output a switch statement on length and categorize the elements within each
|
|
// bin.
|
|
OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n";
|
|
OS.indent(Indent*2+2) << "default: break;\n";
|
|
|
|
for (const auto &LI : MatchesByLength) {
|
|
OS.indent(Indent * 2 + 2)
|
|
<< "case " << LI.first << ":\t // " << LI.second.size() << " string"
|
|
<< (LI.second.size() == 1 ? "" : "s") << " to match.\n";
|
|
if (EmitStringMatcherForChar(LI.second, 0, Indent, IgnoreDuplicates))
|
|
OS.indent(Indent*2+4) << "break;\n";
|
|
}
|
|
|
|
OS.indent(Indent*2+2) << "}\n";
|
|
}
|