capstone/MCInstPrinter.c
Rot127 ef89b18a88 Architecture updater (auto-sync) - Updating AArch64 (#2026)
* Update sysop inc file

* Fix missing  braces warning

* Handle new system operands

* Fix build errors by renaming.

* Fix segfault

* Fix segfault

* Add custom MCOperand valiadtors

* Add AArch64 case for getFeatureBits

* Fix infinite loop

* Fix braces warning.

* Implement loopuo by name for sys operands

* Fix incorrect translation which remove else if statements.

* Fix several segfaults

* Rename GetRegFromClass patch

* Fix segfaults and asserts

* Fix segfault

* Move MRI setting to Mapping

* Remove unused code

* Add add_op_X functinos for AArch64.

* Add fill detail functins

* Handle RegWithShiftExtend operands

* Handle TypedVectorList operands.

* Handle ComplexRoatation operands

* Handle MemExtend operands

* Handle ImmRangeScale operands

* Handle ExactFPImm operands

* Handle GPRSeqPairsClass operands

* Handle Imm8OptLsl operands

* Handle ImmScale operands

* Handle LogicalImm operands

* Handle Matrix operands

* Handle SME Matrix tiles and vectors.

* Handle normal operands.

* Fix segfault.

* Handle PostInc operands.

* Reorder VecLayout enum to have no duplicate enum value.

* Handle PredicateAsCounter operands

* Handle ZPRasFPR operands

* Handle VectorIndex operands

* Handle UImm12Offset operands.

* Move reg suffix to enum val to single function.

* Handle SVERegOp operands

* Handle SVELogicalImm operands

* Handle SImm operand

* Handle PrefetchOp operands

* Handle Imm and ImmHex operands

* Handle GPR64as32 and GPR64x8 operands

* Add missing break

* Handle FPImm operand

* Handle ExtendedRegister opreand

* Handle CondCode operands

* Handle BTIHintOp operands

* Handle BarrierOption operands

* Handle BarrierXSOption

* Add not implemeted case again

* Handle ArithExtend operands

* Handle AdrpLabel and AlignedLabel operands

* Handle AMNoIndex operands

* Handle AddSubImm operands

* Handle MSRSystemRegisters and MRSSystemRegister operands

* Handle PSBHntOp and RPRFMOperand operands

* Remove unused variables

* Handle InverseCondCode operands

* Handle ImplicityTypedVectorList operands

* Handle ShiftedRegister operands

* Handle Shifter operands

* Handle SIMDType10Operand operands

* Handle SVCROp operands

* Handle SVEPattern operands

* Handle SVEVecLenSpecifier operands

* Handle SysCROperands

* Handle SysXzrPair operands

* Handle PState operands

* Handle VRegOperands

* Primt SME oeprands.

* Fix cs_operand.h include

* Rename arm64 -> aarch64 in python bindings.

* Add Python bindings for SH

* Fix ARM Python bindings (#2127)

* Restructure auto-sync update scripts.

* Move Helper functions to Updater dir

* Move requirements.txt

* Add basic ASUpdater.py

* Run black.

* Add inc file generater to updater

* Add option to select certain inc files fore generation.

* Enable clean build and implement patcher for inc files.

* Format config

* Patch main header files after inc generation.

* Implement clang-format function (unused yet, because it takes forever.)

* Copy generated inc files to arch dir

* Invert clean option (noramlly we need to clean the build dir.)

* Clearify arg doc

* Rename SystemRegister file for AArch64

* Centralize handling of path variables.

* Check if SystemOperands had to be generated before renaming on of its files.

* Replace class parameters by calling get_path

* Remove updater config which only contained paths.

* Add refactor option.

* Remove more path handling in the Configurator.

* Add translation step to updater.

* Fix includes after CppTranslator was moved into the Updater

* Remove updater config

* Fix several issue in the Configurator

* Fix file operations

* Remove addition argument from translator.

* Add Differ step to updater.

* Add path variable for arch_config

* Add diff step.

* Fix typo

* Introduce .clang-format path variable.

* Remove duplicate functions

* Add option to select update steps to execute.

* Check in write functions for write flag.

* Rename PatchMainHeader -> HeaderPatcher

* Move .gitignore

* Add README to vendor dir.

* Add all system operands to cstool output

* Update cstest with aarch64 changes

* Remove wb flag of aarch64 detail struct

* Set updates_flag after decoding

* Set writeback after decoding.

* Rename ARM64 -> AArch64

* Update printer and op mapping

* Exit normally

* Add AArch64 alias

* Fix some tmeplate function calls

* Fix flag check after rebase.

* Fix build by commentig unnused code.

* Add memory operand flag

* Handle memory operands printed via generic printOperand function.

* Handle UImm memory offsets

* Introduce MEM_REG and MEM_IMM op types

* Handle scaled memory immediates

* Check for op_count before checking for mem op at -1 index.

* Update memory operand flags.

* Pass imm/reg memory ops in set_imm/reg to set_mem.

* Add missing set_sme_operand call and fix assert.

* Remove CS_OP_MEM flag before entering switch.

* Preidcates are registers.

* Add shift info always to the previous operand

* Check for generic system regs

* Handle NumLanes = 0 LaneKind = q case

* Replace printImm call with normal print logic. Otherwise ops get added twice to detail.

* Handle FP operands in printOperand.

* Add access information to float operands.

* Rewrite SME matrix handling.

* Set correct SME layouts and allow for immediate range sme offsets.

* Handle cases of unknown system alias by setting their raw values

* Update cstool and header file with new SME offset handling

* Handle SME Tile lists.

* Fix build error in cstest

* Update MC tests for AArch64

* Handle TLBI operands and fix printing bug.

* Fix: Print signed value as signed.

* Add more system alias to detail.

* Remove duplicate hex prefix

* Set correct values for the register info

* Replace tabs with white spaces

* Move string append logic to own function.

* Set DecodeComplete = true before decoding (as originally in the LLVM code).

* Change type of feature argument, since only LLVM features are passed, not CS groups.

* Imitate lower_bound for the index table binary search.

* Remove trailing comments from test files.

* Print shift amount in decimal

* Save detail of shift alias instructions.

* Add extension details fot ext instruction alias

* Print LSB and width in decimal

* Fix LLVM bug. The feature check for V8_2a doesn't check if all features are enabled.

* Fix lower_bounds check.
For m == 0 we wrap around 0 of cause.

* Fix feature check. Add check for FeatureAll since it includes XS

* Operate on temporary MCInst when trying decoding.

* Add lower_bound behavior to IndexTypeStr binsearch.

* Fix MC tests which were incorrect because of missing FeatureAll check

* Add Alias handling for AArch64

* Update system operands with SYSIMM types and add additional sysop category.

* Add macros for meta programming (ARM64 <-> AArch64 selection).

* Fix union/struct confusion and add raw_value member to uninions.

* Allow to set Syntax and mode options for AArch64

* Fix build warning by using correct type

* Print shift value in decimal

* Add missing call to add_cs_detail.

* Update name map files with normalized names.

* Remove unused function

* Add check if detail should be filled.

* Fill detail for real instructions if only real detail is requested.

* Add always the extension.

* Make dir creation log message debug level

* Implement ADR immediate operand printer.

See: c3484b1fdc

* Check for flag registers beeing written and update flag.

* Move multiple CondCode helpers to aarch64.h because they are so freaking useful.

+ Print CC if it is EQ

* Fix incorrectly initialized CC and VectorLayout.

* Add LSL shift type for extensions.

* Fix case when shift amount is 0

* Fix post-index memory instructions.

* Pass raw immediate through getShiftValue to extract actual shift amount

* Setup AArch64 detail ops.

* Add flag for operands part of a list.

* Set vector indices for all relevant registers.

* Add missing call to add_cs_detail for postIncOperands

* Add ugly yet reliable way to determine post-index addressing mode

* Add support for old Capstone register alias.

* Remove leading space before some alias mnemonics.

* add AARCH64 to `cmake.sh`

* add HAS_AARCH64 to `cs.c`

* should probably just reference `cs_operand.h` in `aarch64.h`

* hint compiler at `AArch64_SYSREG` enum type for casting purposes

* update `Makefile` for AARCH64

leaves `CAPSTONE_HAS_ARM64` supported

* `testFeatureBits` platform function check

`testFeatureBits` should check if the platform function is visible first

* update tests to use AARCH64 convention

* hack: avoid enum casts for `MCInst` Values

Apple compiler really hates typecasting a enum, even if bounded from a unsigned. Lets set the raw_value directly

is a hack and needs proper review

* Check for present detail before accessing it.

* Add CS only groups

* Use general map ins_op type

* Fix build warning about str size computation.

* Disable warning about unitialized value for GCC 11.

Imm is initialized and the warning does not appear
in later versions.

* Use correct include guard for PPC

* Add missing requirements

* Update SystemOperand enums.

* Fix overlapping comparison warning

* Fix reachable assert where OpNum is not of type IMM

* Handle 0.0 operand for fcmp

* Fix incorrect variable passed.

* Fix for MacOS which doesn't know the warning and throws another one.

* Make getExtendEncoding static to fix build warning on MSVC.

* Fix build error: 'missing binary operator before token' by checking __GNUC__

* Add string search to add vector layout info.

* Add missing mem disponents of several ldr and str instructions.

* Add 0 immediates to several instructions.

* Rename v regs to q and d variant.

The cs_regname API can not pass the variant name of the register requested.
So we simply emit the default variant name.

* Fix incorrect enum value.

* Fix tests for system operands.

* Fix syntax issues in tests.

* Rename Arm64 -> AArch64 Python bindings.

* Fix Python bindings C structs.

* Fix generation of constants (ARMCC skipped because it starts with ARM)

* Update const files

* Remove -Wmaybe-uninitialized warning since it fails fuzz build

* Add missing comma

* Fix case

* Fix AArch64 Python bindings:

- Do not generate constants automatically (dscript is way too buggy).
- Update printing of details.

* Rename ARM64 -> AArch64 in test_corpus.py

* Rename test_arm64 -> test_aarch64

* Rename ARM-64 -> AArch64

* Fix diff CI test by disassembling AArch64 at former ARM64 place

* Fix several wrong types and remove unnecessary memebers from Python binding

* Fix: Same printing format of detail for cstool, test_ and test_*.py

* Fix: pass correct op index for mov alias with op[1] == reg wzr.

* Set prfm op manuall in case of unnown sysop. set_imm would add it to an memory operand wihtout base.

* Fix: If barrier ops are not set an assert is reached.

We fix it here by simply getting the immediate as the printing code does.

---------

Co-authored-by: Peace-Maker <peace-maker@wcfan.de>
Co-authored-by: Dayton <5340801+watbulb@users.noreply.github.com>
2023-11-15 12:12:14 +08:00

253 lines
7.5 KiB
C

/* Capstone Disassembly Engine */
/* By Rot127 <unisono@quyllur.org>, 2023 */
#include "MCInstPrinter.h"
#include "cs_priv.h"
#include <capstone/platform.h>
extern bool ARM_getFeatureBits(unsigned int mode, unsigned int feature);
extern bool PPC_getFeatureBits(unsigned int mode, unsigned int feature);
extern bool AArch64_getFeatureBits(unsigned int mode, unsigned int feature);
static bool testFeatureBits(const MCInst *MI, uint32_t Value)
{
assert(MI && MI->csh);
switch (MI->csh->arch) {
default:
assert(0 && "Not implemented for current arch.");
return false;
#ifdef CAPSTONE_HAS_ARM
case CS_ARCH_ARM:
return ARM_getFeatureBits(MI->csh->mode, Value);
#endif
#ifdef CAPSTONE_HAS_POWERPC
case CS_ARCH_PPC:
return PPC_getFeatureBits(MI->csh->mode, Value);
#endif
#ifdef CAPSTONE_HAS_AARCH64
case CS_ARCH_AARCH64:
return AArch64_getFeatureBits(MI->csh->mode, Value);
#endif
}
}
static bool matchAliasCondition(MCInst *MI, const MCRegisterInfo *MRI,
unsigned *OpIdx, const AliasMatchingData *M,
const AliasPatternCond *C,
bool *OrPredicateResult)
{
// Feature tests are special, they don't consume operands.
if (C->Kind == AliasPatternCond_K_Feature)
return testFeatureBits(MI, C->Value);
if (C->Kind == AliasPatternCond_K_NegFeature)
return !testFeatureBits(MI, C->Value);
// For feature tests where just one feature is required in a list, set the
// predicate result bit to whether the expression will return true, and only
// return the real result at the end of list marker.
if (C->Kind == AliasPatternCond_K_OrFeature) {
*OrPredicateResult |= testFeatureBits(MI, C->Value);
return true;
}
if (C->Kind == AliasPatternCond_K_OrNegFeature) {
*OrPredicateResult |= !(testFeatureBits(MI, C->Value));
return true;
}
if (C->Kind == AliasPatternCond_K_EndOrFeatures) {
bool Res = *OrPredicateResult;
*OrPredicateResult = false;
return Res;
}
// Get and consume an operand.
MCOperand *Opnd = MCInst_getOperand(MI, *OpIdx);
++(*OpIdx);
// Check the specific condition for the operand.
switch (C->Kind) {
default:
assert(0 && "invalid kind");
case AliasPatternCond_K_Imm:
// Operand must be a specific immediate.
return MCOperand_isImm(Opnd) &&
MCOperand_getImm(Opnd) == (int32_t)C->Value;
case AliasPatternCond_K_Reg:
// Operand must be a specific register.
return MCOperand_isReg(Opnd) && MCOperand_getReg(Opnd) == C->Value;
case AliasPatternCond_K_TiedReg:
// Operand must match the register of another operand.
return MCOperand_isReg(Opnd) &&
MCOperand_getReg(Opnd) ==
MCOperand_getReg(MCInst_getOperand(MI, C->Value));
case AliasPatternCond_K_RegClass:
// Operand must be a register in this class. Value is a register class
// id.
return MCOperand_isReg(Opnd) &&
MCRegisterClass_contains(
MCRegisterInfo_getRegClass(MRI, C->Value),
MCOperand_getReg(Opnd));
case AliasPatternCond_K_Custom:
// Operand must match some custom criteria.
assert(M->ValidateMCOperand && "A custom validator should be set but isn't.");
return M->ValidateMCOperand(Opnd, C->Value);
case AliasPatternCond_K_Ignore:
// Operand can be anything.
return true;
case AliasPatternCond_K_Feature:
case AliasPatternCond_K_NegFeature:
case AliasPatternCond_K_OrFeature:
case AliasPatternCond_K_OrNegFeature:
case AliasPatternCond_K_EndOrFeatures:
assert(0 && "handled earlier");
}
return false;
}
/// Check if PatternsForOpcode is all zero.
static inline bool validOpToPatter(const PatternsForOpcode *P)
{
return !(P->Opcode == 0 && P->PatternStart == 0 && P->NumPatterns == 0);
}
const char *matchAliasPatterns(MCInst *MI, const AliasMatchingData *M)
{
// TODO Rewrite to C
// auto It = lower_bound(M.OpToPatterns, MI->getOpcode(),
// [](const PatternsForOpcode &L, unsigned Opcode) {
// return L.Opcode < Opcode;
// });
// if (It == M.OpToPatterns.end() || It->Opcode != MI->getOpcode())
// return nullptr;
// Binary search by opcode. Return false if there are no aliases for this
// opcode.
unsigned MIOpcode = MI->Opcode;
size_t i = 0;
uint32_t PatternOpcode = M->OpToPatterns[i].Opcode;
while (PatternOpcode < MIOpcode && validOpToPatter(&M->OpToPatterns[i]))
PatternOpcode = M->OpToPatterns[++i].Opcode;
if (PatternOpcode != MI->Opcode || !validOpToPatter(&M->OpToPatterns[i]))
return NULL;
// // Try all patterns for this opcode.
uint32_t AsmStrOffset = ~0U;
const AliasPattern *Patterns = M->Patterns + M->OpToPatterns[i].PatternStart;
for (const AliasPattern *P = Patterns;
P != Patterns + M->OpToPatterns[i].NumPatterns; ++P) {
// Check operand count first.
if (MCInst_getNumOperands(MI) != P->NumOperands)
return NULL;
// Test all conditions for this pattern.
const AliasPatternCond *Conds = M->PatternConds + P->AliasCondStart;
unsigned OpIdx = 0;
bool OrPredicateResult = false;
bool allMatch = true;
for (const AliasPatternCond *C = Conds; C != Conds + P->NumConds; ++C) {
if (!matchAliasCondition(MI, MI->MRI, &OpIdx, M, C, &OrPredicateResult)) {
allMatch = false;
break;
}
}
if (allMatch) {
AsmStrOffset = P->AsmStrOffset;
break;
}
}
// If no alias matched, don't print an alias.
if (AsmStrOffset == ~0U)
return NULL;
// Go to offset AsmStrOffset and use the null terminated string there. The
// offset should point to the beginning of an alias string, so it should
// either be zero or be preceded by a null byte.
return M->AsmStrings + AsmStrOffset;
}
// TODO Add functionality to toggle the flag.
bool getUseMarkup(void) { return false; }
/// Utility functions to make adding mark ups simpler.
const char *markup(const char *s)
{
static const char *no_markup = "";
if (getUseMarkup())
return s;
else
return no_markup;
}
// binary search for encoding in IndexType array
// return -1 if not found, or index if found
unsigned int binsearch_IndexTypeEncoding(const struct IndexType *index, size_t size, uint16_t encoding)
{
// binary searching since the index is sorted in encoding order
size_t left, right, m;
right = size - 1;
if (encoding < index[0].encoding || encoding > index[right].encoding)
// not found
return -1;
left = 0;
while(left <= right) {
m = (left + right) / 2;
if (encoding == index[m].encoding) {
// LLVM actually uses lower_bound for the index table search
// Here we need to check if a previous entry is of the same encoding
// and return the first one.
while (m > 0 && encoding == index[m - 1].encoding)
--m;
return m;
}
if (encoding < index[m].encoding)
right = m - 1;
else
left = m + 1;
}
// not found
return -1;
}
// binary search for encoding in IndexTypeStr array
// return -1 if not found, or index if found
unsigned int binsearch_IndexTypeStrEncoding(const struct IndexTypeStr *index, size_t size, const char *name)
{
// binary searching since the index is sorted in encoding order
size_t left, right, m;
right = size - 1;
size_t str_left_cmp = strcmp(name, index[0].name);
size_t str_right_cmp = strcmp(name, index[right].name);
if (str_left_cmp < 0 || str_right_cmp > 0)
// not found
return -1;
left = 0;
while(left <= right) {
m = (left + right) / 2;
if (strcmp(name, index[m].name) == 0) {
// LLVM actually uses lower_bound for the index table search
// Here we need to check if a previous entry is of the same encoding
// and return the first one.
while (m > 0 && (strcmp(name, index[m - 1].name) == 0))
--m;
return m;
}
if (strcmp(name, index[m].name) < 0)
right = m - 1;
else
left = m + 1;
}
// not found
return -1;
}