capstone/arch/RISCV/RISCVDisassembler.c
wxrdnx 404912f068
Add access support for RISC-V (#2393)
* resolve conflict for loongarch and RISCV in Mapping.c and Mapping.h

* Use RISCV_get_detail for simplicity

Co-authored-by: Rot127 <45763064+Rot127@users.noreply.github.com>

* Use detail_is_set for simplicity

Co-authored-by: Rot127 <45763064+Rot127@users.noreply.github.com>

* Change comment style

Co-authored-by: Rot127 <45763064+Rot127@users.noreply.github.com>

* remove redundant add_str

* fix bug for RISCV_add_detail

* fix operands for csr instructions

* add python binding and tester for RISC-V

* add more test cases for RISC-V (M,A,F,D,C instructions)

* fix incorrect operand and access for sc.w and sc.d

* fix incorrect operand for fence and sfence.vma

* assert -> CS_ASSERT

* some instructions in test_riscv.c should be RISCV64

* add cs details test

* update python testers

---------

Co-authored-by: Rot127 <45763064+Rot127@users.noreply.github.com>
2024-07-10 11:36:39 +08:00

461 lines
15 KiB
C

//===-- RISCVDisassembler.cpp - Disassembler for RISCV --------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* Capstone Disassembly Engine */
/* RISC-V Backend By Rodrigo Cortes Porto <porto703@gmail.com> &
Shawn Chang <citypw@gmail.com>, HardenedLinux@2018 */
#ifdef CAPSTONE_HAS_RISCV
#include <stdio.h> // DEBUG
#include <stdlib.h>
#include <string.h>
#include "../../cs_priv.h"
#include "../../utils.h"
#include "../../MCInst.h"
#include "../../MCInstrDesc.h"
#include "../../MCFixedLenDisassembler.h"
#include "../../MCRegisterInfo.h"
#include "../../MCDisassembler.h"
#include "../../MathExtras.h"
#include "../../Mapping.h"
#include "RISCVBaseInfo.h"
#include "RISCVDisassembler.h"
/* Need the feature infos define in
RISCVGenSubtargetInfo.inc. */
#define GET_SUBTARGETINFO_ENUM
#include "RISCVGenSubtargetInfo.inc"
/* When we specify the RISCV64 mode, It means It is RV64IMAFD.
Similar, RISCV32 means RV32IMAFD.
*/
static uint64_t getFeatureBits(int mode)
{
uint64_t ret = RISCV_FeatureStdExtM | RISCV_FeatureStdExtA |
RISCV_FeatureStdExtF | RISCV_FeatureStdExtD ;
if (mode & CS_MODE_RISCV64)
ret |= RISCV_Feature64Bit;
if (mode & CS_MODE_RISCVC)
ret |= RISCV_FeatureStdExtC;
return ret;
}
#define GET_REGINFO_ENUM
#define GET_REGINFO_MC_DESC
#include "RISCVGenRegisterInfo.inc"
#define GET_INSTRINFO_ENUM
#include "RISCVGenInstrInfo.inc"
static const unsigned GPRDecoderTable[] = {
RISCV_X0, RISCV_X1, RISCV_X2, RISCV_X3,
RISCV_X4, RISCV_X5, RISCV_X6, RISCV_X7,
RISCV_X8, RISCV_X9, RISCV_X10, RISCV_X11,
RISCV_X12, RISCV_X13, RISCV_X14, RISCV_X15,
RISCV_X16, RISCV_X17, RISCV_X18, RISCV_X19,
RISCV_X20, RISCV_X21, RISCV_X22, RISCV_X23,
RISCV_X24, RISCV_X25, RISCV_X26, RISCV_X27,
RISCV_X28, RISCV_X29, RISCV_X30, RISCV_X31
};
static DecodeStatus DecodeGPRRegisterClass(MCInst *Inst, uint64_t RegNo,
uint64_t Address, const void *Decoder)
{
unsigned Reg = 0;
if (RegNo >= ARR_SIZE(GPRDecoderTable))
return MCDisassembler_Fail;
// We must define our own mapping from RegNo to register identifier.
// Accessing index RegNo in the register class will work in the case that
// registers were added in ascending order, but not in general.
Reg = GPRDecoderTable[RegNo];
//Inst.addOperand(MCOperand::createReg(Reg));
MCOperand_CreateReg0(Inst, Reg);
return MCDisassembler_Success;
}
static const unsigned FPR32DecoderTable[] = {
RISCV_F0_32, RISCV_F1_32, RISCV_F2_32, RISCV_F3_32,
RISCV_F4_32, RISCV_F5_32, RISCV_F6_32, RISCV_F7_32,
RISCV_F8_32, RISCV_F9_32, RISCV_F10_32, RISCV_F11_32,
RISCV_F12_32, RISCV_F13_32, RISCV_F14_32, RISCV_F15_32,
RISCV_F16_32, RISCV_F17_32, RISCV_F18_32, RISCV_F19_32,
RISCV_F20_32, RISCV_F21_32, RISCV_F22_32, RISCV_F23_32,
RISCV_F24_32, RISCV_F25_32, RISCV_F26_32, RISCV_F27_32,
RISCV_F28_32, RISCV_F29_32, RISCV_F30_32, RISCV_F31_32
};
static DecodeStatus DecodeFPR32RegisterClass(MCInst *Inst, uint64_t RegNo,
uint64_t Address, const void *Decoder)
{
unsigned Reg = 0;
if (RegNo >= ARR_SIZE(FPR32DecoderTable))
return MCDisassembler_Fail;
// We must define our own mapping from RegNo to register identifier.
// Accessing index RegNo in the register class will work in the case that
// registers were added in ascending order, but not in general.
Reg = FPR32DecoderTable[RegNo];
MCOperand_CreateReg0(Inst, Reg);
return MCDisassembler_Success;
}
static DecodeStatus DecodeFPR32CRegisterClass(MCInst *Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder)
{
unsigned Reg = 0;
if (RegNo > 8)
return MCDisassembler_Fail;
Reg = FPR32DecoderTable[RegNo + 8];
MCOperand_CreateReg0(Inst, Reg);
return MCDisassembler_Success;
}
static const unsigned FPR64DecoderTable[] = {
RISCV_F0_64, RISCV_F1_64, RISCV_F2_64, RISCV_F3_64,
RISCV_F4_64, RISCV_F5_64, RISCV_F6_64, RISCV_F7_64,
RISCV_F8_64, RISCV_F9_64, RISCV_F10_64, RISCV_F11_64,
RISCV_F12_64, RISCV_F13_64, RISCV_F14_64, RISCV_F15_64,
RISCV_F16_64, RISCV_F17_64, RISCV_F18_64, RISCV_F19_64,
RISCV_F20_64, RISCV_F21_64, RISCV_F22_64, RISCV_F23_64,
RISCV_F24_64, RISCV_F25_64, RISCV_F26_64, RISCV_F27_64,
RISCV_F28_64, RISCV_F29_64, RISCV_F30_64, RISCV_F31_64
};
static DecodeStatus DecodeFPR64RegisterClass(MCInst *Inst, uint64_t RegNo,
uint64_t Address, const void *Decoder)
{
unsigned Reg = 0;
if (RegNo >= ARR_SIZE(FPR64DecoderTable))
return MCDisassembler_Fail;
// We must define our own mapping from RegNo to register identifier.
// Accessing index RegNo in the register class will work in the case that
// registers were added in ascending order, but not in general.
Reg = FPR64DecoderTable[RegNo];
MCOperand_CreateReg0(Inst, Reg);
return MCDisassembler_Success;
}
static DecodeStatus DecodeFPR64CRegisterClass(MCInst *Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder)
{
unsigned Reg = 0;
if (RegNo > 8)
return MCDisassembler_Fail;
Reg = FPR64DecoderTable[RegNo + 8];
MCOperand_CreateReg0(Inst, Reg);
return MCDisassembler_Success;
}
static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst *Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder)
{
if (RegNo == 0)
return MCDisassembler_Fail;
return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
}
static DecodeStatus DecodeGPRNoX0X2RegisterClass(MCInst *Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder)
{
if (RegNo == 2)
return MCDisassembler_Fail;
return DecodeGPRNoX0RegisterClass(Inst, RegNo, Address, Decoder);
}
static DecodeStatus DecodeGPRCRegisterClass(MCInst *Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder)
{
unsigned Reg = 0;
if (RegNo > 8)
return MCDisassembler_Fail;
Reg = GPRDecoderTable[RegNo + 8];
MCOperand_CreateReg0(Inst, Reg);
return MCDisassembler_Success;
}
// Add implied SP operand for instructions *SP compressed instructions. The SP
// operand isn't explicitly encoded in the instruction.
static void addImplySP(MCInst *Inst, int64_t Address, const void *Decoder)
{
if (MCInst_getOpcode(Inst) == RISCV_C_LWSP ||
MCInst_getOpcode(Inst) == RISCV_C_SWSP ||
MCInst_getOpcode(Inst) == RISCV_C_LDSP ||
MCInst_getOpcode(Inst) == RISCV_C_SDSP ||
MCInst_getOpcode(Inst) == RISCV_C_FLWSP ||
MCInst_getOpcode(Inst) == RISCV_C_FSWSP ||
MCInst_getOpcode(Inst) == RISCV_C_FLDSP ||
MCInst_getOpcode(Inst) == RISCV_C_FSDSP ||
MCInst_getOpcode(Inst) == RISCV_C_ADDI4SPN) {
DecodeGPRRegisterClass(Inst, 2, Address, Decoder);
}
if (MCInst_getOpcode(Inst) == RISCV_C_ADDI16SP) {
DecodeGPRRegisterClass(Inst, 2, Address, Decoder);
DecodeGPRRegisterClass(Inst, 2, Address, Decoder);
}
}
static DecodeStatus decodeUImmOperand(MCInst *Inst, uint64_t Imm,
int64_t Address, const void *Decoder,
unsigned N)
{
//CS_ASSERT(isUInt<N>(Imm) && "Invalid immediate");
addImplySP(Inst, Address, Decoder);
//Inst.addOperand(MCOperand::createImm(Imm));
MCOperand_CreateImm0(Inst, Imm);
return MCDisassembler_Success;
}
static DecodeStatus decodeUImmNonZeroOperand(MCInst *Inst, uint64_t Imm,
int64_t Address,
const void *Decoder,
unsigned N)
{
if (Imm == 0)
return MCDisassembler_Fail;
return decodeUImmOperand(Inst, Imm, Address, Decoder, N);
}
static DecodeStatus decodeSImmOperand(MCInst *Inst, uint64_t Imm,
int64_t Address, const void *Decoder,
unsigned N)
{
//CS_ASSERT(isUInt<N>(Imm) && "Invalid immediate");
addImplySP(Inst, Address, Decoder);
// Sign-extend the number in the bottom N bits of Imm
//Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm)));
MCOperand_CreateImm0(Inst, SignExtend64(Imm, N));
return MCDisassembler_Success;
}
static DecodeStatus decodeSImmNonZeroOperand(MCInst *Inst, uint64_t Imm,
int64_t Address,
const void *Decoder,
unsigned N)
{
if (Imm == 0)
return MCDisassembler_Fail;
return decodeSImmOperand(Inst, Imm, Address, Decoder, N);
}
static DecodeStatus decodeSImmOperandAndLsl1(MCInst *Inst, uint64_t Imm,
int64_t Address,
const void *Decoder,
unsigned N)
{
//CS_ASSERT(isUInt<N>(Imm) && "Invalid immediate");
// Sign-extend the number in the bottom N bits of Imm after accounting for
// the fact that the N bit immediate is stored in N-1 bits (the LSB is
// always zero)
//Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm << 1)));
MCOperand_CreateImm0(Inst, SignExtend64(Imm << 1, N));
return MCDisassembler_Success;
}
static DecodeStatus decodeCLUIImmOperand(MCInst *Inst, uint64_t Imm,
int64_t Address,
const void *Decoder)
{
//CS_ASSERT(isUInt<6>(Imm) && "Invalid immediate");
if (Imm > 31) {
Imm = (SignExtend64(Imm, 6) & 0xfffff);
}
//Inst.addOperand(MCOperand::createImm(Imm));
MCOperand_CreateImm0(Inst, Imm);
return MCDisassembler_Success;
}
static DecodeStatus decodeFRMArg(MCInst *Inst, uint64_t Imm,
int64_t Address,
const void *Decoder)
{
//CS_ASSERT(isUInt<3>(Imm) && "Invalid immediate");
if (!RISCVFPRndMode_isValidRoundingMode(Imm))
return MCDisassembler_Fail;
//Inst.addOperand(MCOperand::createImm(Imm));
MCOperand_CreateImm0(Inst, Imm);
return MCDisassembler_Success;
}
#include "RISCVGenDisassemblerTables.inc"
static void init_MI_insn_detail(MCInst *MI)
{
if (MI->flat_insn->detail) {
memset(MI->flat_insn->detail, 0, sizeof(cs_detail));
}
return;
}
// mark the load/store instructions through the opcode.
static void markLSInsn(MCInst *MI, uint32_t in)
{
/*
I ld 0000011 = 0x03
st 0100011 = 0x23
F/D ld 0000111 = 0x07
st 0100111 = 0x27
st 0101111 = 0x2f
*/
#define MASK_LS_INSN 0x0000007f
uint32_t opcode = in & MASK_LS_INSN;
if (0 == (opcode ^ 0x03) || 0 == (opcode ^ 0x07) ||
0 == (opcode ^ 0x23) || 0 == (opcode ^ 0x27) ||
0 == (opcode ^ 0x2f))
MI->flat_insn->detail->riscv.need_effective_addr = true;
#undef MASK_LS_INSN
return;
}
static void markCLSInsn(MCInst *MI, uint32_t in)
{
// Unfortunately there is no obvious pattern in terms of RISC-V C instructions
// Thus, we compare the instruction IDs to see if it is a load/store instruction
unsigned id = MCInst_getOpcode(MI);
if (id == RISCV_C_FLD || id == RISCV_C_LW ||
id == RISCV_C_FLW || id == RISCV_C_LD ||
id == RISCV_C_FSD || id == RISCV_C_SW ||
id == RISCV_C_FSW || id == RISCV_C_SD ||
id == RISCV_C_FLDSP || id == RISCV_C_LWSP ||
id == RISCV_C_FLWSP || id == RISCV_C_LDSP ||
id == RISCV_C_FSDSP || id == RISCV_C_SWSP ||
id == RISCV_C_FSWSP || id == RISCV_C_SDSP) {
RISCV_get_detail(MI)->need_effective_addr = true;
}
return;
}
static DecodeStatus RISCVDisassembler_getInstruction(int mode, MCInst *MI,
const uint8_t *code, size_t code_len,
uint16_t *Size, uint64_t Address,
MCRegisterInfo *MRI)
{
// TODO: This will need modification when supporting instruction set
// extensions with instructions > 32-bits (up to 176 bits wide).
uint32_t Inst = 0;
DecodeStatus Result;
// It's a 32 bit instruction if bit 0 and 1 are 1.
if ((code[0] & 0x3) == 0x3) {
if (code_len < 4) {
*Size = 0;
return MCDisassembler_Fail;
}
*Size = 4;
// Get the four bytes of the instruction.
//Encoded as little endian 32 bits.
Inst = code[0] | (code[1] << 8) | (code[2] << 16) | ((uint32_t)code[3] << 24);
init_MI_insn_detail(MI);
// Now we need mark what instruction need fix effective address output.
if (MI->csh->detail_opt)
markLSInsn(MI, Inst);
Result = decodeInstruction(DecoderTable32, MI, Inst, Address, MRI, mode);
} else {
if (code_len < 2) {
*Size = 0;
return MCDisassembler_Fail;
}
// If not b4bit.
if (! (getFeatureBits(mode) & ((uint64_t)RISCV_Feature64Bit))) {
// Trying RISCV32Only_16 table (16-bit Instruction)
Inst = code[0] | (code[1] << 8);
init_MI_insn_detail(MI);
Result = decodeInstruction(DecoderTableRISCV32Only_16, MI, Inst, Address,
MRI, mode);
if (Result != MCDisassembler_Fail) {
*Size = 2;
return Result;
}
}
// Trying RISCV_C table (16-bit Instruction)
Inst = code[0] | (code[1] << 8);
init_MI_insn_detail(MI);
// Calling the auto-generated decoder function.
Result = decodeInstruction(DecoderTable16, MI, Inst, Address, MRI, mode);
// Now we need mark what instruction need fix effective address output.
// Note that we mark it AFTER the instruction is decoded
// This is because there is no obvious pattern in terms of RISC-V C instructions
// So we compare the instruction IDs one by one
if (detail_is_set(MI))
markCLSInsn(MI, Inst);
*Size = 2;
}
return Result;
}
bool RISCV_getInstruction(csh ud, const uint8_t *code, size_t code_len,
MCInst *instr, uint16_t *size, uint64_t address,
void *info)
{
cs_struct *handle = (cs_struct *)(uintptr_t)ud;
return MCDisassembler_Success ==
RISCVDisassembler_getInstruction(handle->mode, instr,
code, code_len,
size, address,
(MCRegisterInfo *)info);
}
void RISCV_init(MCRegisterInfo * MRI)
{
/*
InitMCRegisterInfo(RISCVRegDesc, 97, RA, PC,
RISCVMCRegisterClasses, 11,
RISCVRegUnitRoots,
64,
RISCVRegDiffLists,
RISCVLaneMaskLists,
RISCVRegStrings,
RISCVRegClassStrings,
RISCVSubRegIdxLists,
2,
RISCVSubRegIdxRanges,
RISCVRegEncodingTable);
*/
MCRegisterInfo_InitMCRegisterInfo(MRI, RISCVRegDesc, 97, 0, 0,
RISCVMCRegisterClasses, 11,
0,
0,
RISCVRegDiffLists,
0,
RISCVSubRegIdxLists,
2,
0);
}
#endif