mirror of
https://github.com/capstone-engine/capstone.git
synced 2024-11-23 13:39:46 +00:00
380 lines
14 KiB
Python
Executable File
380 lines
14 KiB
Python
Executable File
#!/usr/bin/python
|
|
# print MappingInsn.inc file from LLVM GenAsmMatcher.inc, for Capstone disassembler.
|
|
# by Nguyen Anh Quynh, 2019
|
|
|
|
import sys
|
|
|
|
if len(sys.argv) == 1:
|
|
print("Syntax: %s <GenAsmMatcher.inc> <GenInstrInfo.inc> <MappingInsnOp.inc>" %sys.argv[0])
|
|
sys.exit(1)
|
|
|
|
f = open(sys.argv[3])
|
|
mapping = f.readlines()
|
|
f.close()
|
|
|
|
print("""/* Capstone Disassembly Engine, http://www.capstone-engine.org */
|
|
/* This is auto-gen data for Capstone disassembly engine (www.capstone-engine.org) */
|
|
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2019 */
|
|
""")
|
|
|
|
# lib/Target/X86/X86GenAsmMatcher.inc
|
|
# static const MatchEntry MatchTable1[] = {
|
|
# { 0 /* aaa */, X86::AAA, Convert_NoOperands, Feature_Not64BitMode, { }, },
|
|
|
|
# extract insn from GenAsmMatcher Table
|
|
# return (arch, mnem, insn_id)
|
|
def extract_insn(line):
|
|
tmp = line.split(',')
|
|
insn_raw = tmp[1].strip()
|
|
insn_mnem = tmp[0].split(' ')[3]
|
|
# X86 mov.s
|
|
if '.' in insn_mnem:
|
|
tmp = insn_mnem.split('.')
|
|
insn_mnem = tmp[0]
|
|
tmp = insn_raw.split('::')
|
|
arch = tmp[0]
|
|
# AArch64 -> ARM64
|
|
#if arch.upper() == 'AARCH64':
|
|
# arch = 'ARM64'
|
|
return (arch, insn_mnem, tmp[1])
|
|
|
|
|
|
# extract all insn lines from GenAsmMatcher
|
|
# return arch, first_insn, insn_id_list
|
|
def extract_matcher(filename):
|
|
f = open(filename)
|
|
lines = f.readlines()
|
|
f.close()
|
|
|
|
match_count = 0
|
|
insn_id_list = {}
|
|
arch = None
|
|
first_insn = None
|
|
|
|
pattern = None
|
|
# first we try to find Table1, or Table0
|
|
for line in lines:
|
|
if 'MatchEntry MatchTable0[] = {' in line.strip():
|
|
pattern = 'MatchEntry MatchTable0[] = {'
|
|
elif 'AArch64::' in line and pattern:
|
|
# We do not care about Apple Assembly
|
|
break
|
|
elif 'MatchEntry MatchTable1[] = {' in line.strip():
|
|
pattern = 'MatchEntry MatchTable1[] = {'
|
|
# last pattern, done
|
|
break
|
|
|
|
for line in lines:
|
|
line = line.rstrip()
|
|
|
|
# skip empty line
|
|
if len(line.strip()) == 0:
|
|
continue
|
|
|
|
if pattern in line.strip():
|
|
match_count += 1
|
|
#print(line.strip())
|
|
continue
|
|
|
|
line = line.strip()
|
|
if match_count == 1:
|
|
if line == '};':
|
|
# done with first enum
|
|
break
|
|
else:
|
|
_arch, mnem, insn_id = extract_insn(line)
|
|
# skip pseudo instructions
|
|
if not mnem.startswith('__'):
|
|
if not first_insn:
|
|
arch, first_insn = _arch, insn_id
|
|
if not insn_id in insn_id_list:
|
|
# save this
|
|
insn_id_list[insn_id] = mnem
|
|
|
|
#return arch, first_insn, insn_id_list
|
|
return arch, first_insn, insn_id_list
|
|
|
|
|
|
#arch, first_insn, insn_id_list, match_lines = extract_matcher(sys.argv[1])
|
|
arch, first_insn, insn_id_list = extract_matcher(sys.argv[1])
|
|
#arch = arch.upper()
|
|
|
|
#for line in insn_id_list:
|
|
# print(line)
|
|
|
|
#{ /* X86_AAA, X86_INS_AAA: aaa */
|
|
# X86_EFLAGS_UNDEFINED_OF | X86_EFLAGS_UNDEFINED_SF | X86_EFLAGS_UNDEFINED_ZF | X86_EFLAGS_MODIFY_AF | X86_EFLAGS_UNDEFINED_PF | X86_EFLAGS_MODIFY_CF,
|
|
# { 0 }
|
|
#},
|
|
|
|
#{ /* ARM_ADCri, ARM_INS_ADC: adc${s}${p} $rd, $rn, $imm */
|
|
# { CS_AC_WRITE, CS_AC_READ, 0 }
|
|
#},
|
|
|
|
def print_entry(arch, insn_id, mnem, mapping, mnem_can_be_wrong):
|
|
insn = "%s_%s" %(arch, insn_id)
|
|
arch1 = arch
|
|
if arch.upper() == 'AARCH64':
|
|
arch1 = 'ARM64'
|
|
# first, try to find this entry in old MappingInsn.inc file
|
|
for i in range(len(mapping)):
|
|
if mapping[i].startswith('{') and '/*' in mapping[i]:
|
|
#print(mapping[i])
|
|
tmp = mapping[i].split('/*')
|
|
tmp = tmp[1].strip()
|
|
tmp = tmp.split(',')
|
|
#print("insn2 = |%s|" %tmp.strip())
|
|
if tmp[0].strip() == insn:
|
|
if not mnem_can_be_wrong:
|
|
if arch.upper() == 'ARM':
|
|
print('''
|
|
{\t/* %s, %s_INS_%s: %s */
|
|
\t%s
|
|
},'''% (insn, arch1, mnem, mnem.lower(), mapping[i + 1].strip()))
|
|
else: # ARM64
|
|
print('''
|
|
{\t/* %s, %s_INS_%s: %s */
|
|
\t%s
|
|
\t%s
|
|
},'''% (insn, arch, mnem, mnem.lower(), mapping[i + 1].strip(), mapping[i + 2].strip()))
|
|
else:
|
|
if arch.upper() == 'ARM':
|
|
print('''
|
|
{\t/* %s, %s
|
|
\t%s
|
|
},'''% (insn, ''.join(tmp[1:]), mapping[i + 1].strip()))
|
|
else: # ARM64
|
|
print('''
|
|
{\t/* %s, %s
|
|
\t%s
|
|
\t%s
|
|
},'''% (insn, ''.join(tmp[1:]), mapping[i + 1].strip(), mapping[i + 2].strip()))
|
|
|
|
return
|
|
|
|
if mnem_can_be_wrong:
|
|
#print("======== CANNOT FIND %s, mapping to %s" %(insn, mnem))
|
|
return
|
|
pass
|
|
|
|
# this insn does not exist in mapping table
|
|
if arch.upper() == 'ARM':
|
|
print('''
|
|
{\t/* %s, %s_INS_%s: %s */
|
|
\t{ 0 }
|
|
},'''% (insn, arch1, mnem, mnem.lower()))
|
|
else:
|
|
print('''
|
|
{\t/* %s, %s_INS_%s: %s */
|
|
\t0,
|
|
\t{ 0 }
|
|
},'''% (insn, arch, mnem, mnem.lower()))
|
|
|
|
|
|
# extract from GenInstrInfo.inc, because the insn id is in order
|
|
enum_count = 0
|
|
meet_insn = False
|
|
|
|
f = open(sys.argv[2])
|
|
lines = f.readlines()
|
|
f.close()
|
|
|
|
|
|
count = 0
|
|
last_mnem = None
|
|
|
|
|
|
def is_pseudo_insn(insn, lines):
|
|
return False
|
|
for line in lines:
|
|
tmp = '= %s' %insn
|
|
if tmp in line and 'MCID::Pseudo' in line:
|
|
return True
|
|
return False
|
|
|
|
|
|
# 1st enum is register enum
|
|
for line in lines:
|
|
line = line.rstrip()
|
|
|
|
if len(line.strip()) == 0:
|
|
continue
|
|
|
|
if line.strip() == 'enum {':
|
|
enum_count += 1
|
|
#print(line.strip())
|
|
continue
|
|
|
|
line = line.strip()
|
|
if enum_count == 1:
|
|
# skip pseudo instructions
|
|
if '__' in line or 'setjmp' in line or 'longjmp' in line or 'Pseudo' in line:
|
|
continue
|
|
elif 'INSTRUCTION_LIST_END' in line:
|
|
break
|
|
else:
|
|
insn = line.split('=')[0].strip()
|
|
|
|
# skip more pseudo instruction
|
|
if is_pseudo_insn(insn, lines):
|
|
continue
|
|
'''
|
|
insn = None
|
|
if meet_insn:
|
|
# enum items
|
|
insn = line.split('=')[0].strip()
|
|
if 'CALLSTACK' in insn or 'TAILJUMP' in insn:
|
|
# pseudo instruction
|
|
insn = None
|
|
elif line.startswith(first_insn):
|
|
insn = line.split('=')[0].strip()
|
|
meet_insn = True
|
|
|
|
if insn:
|
|
count += 1
|
|
if insn == 'BSWAP16r_BAD':
|
|
last_mnem = 'BSWAP'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'CMOVNP_Fp32':
|
|
last_mnem = 'FCMOVNP'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'CMOVP_Fp3':
|
|
last_mnem = 'FCMOVP'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'CMPSDrm_Int':
|
|
last_mnem = 'CMPSD'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'MOVSX16rm16':
|
|
last_mnem = 'MOVSX'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'MOVZX16rm16':
|
|
last_mnem = 'MOVZX'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'ST_Fp32m':
|
|
last_mnem = 'FST'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'CMOVNP_Fp64':
|
|
last_mnem = 'FCMOVNU'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'CMPSDrr_Int':
|
|
last_mnem = 'CMPSD'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'CMPSSrm_Int':
|
|
last_mnem = 'CMPSS'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VCMPSDrm_Int':
|
|
last_mnem = 'VCMPSD'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VCMPSSrm_Int':
|
|
last_mnem = 'VCMPSS'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VPCMOVYrrr_REV':
|
|
last_mnem = 'VPCMOV'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VRNDSCALESDZm':
|
|
last_mnem = 'VRNDSCALESD'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VRNDSCALESSZm':
|
|
last_mnem = 'VRNDSCALESS'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VMAXCPDZ128rm':
|
|
last_mnem = 'VMAXPD'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VMAXCPSZ128rm':
|
|
last_mnem = 'VMAXPS'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VMAXCSDZrm':
|
|
last_mnem = 'VMAXSD'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VMAXCSSZrm':
|
|
last_mnem = 'VMAXSS'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VMINCPDZ128rm':
|
|
last_mnem = 'VMINPD'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VMINCPSZ128rm':
|
|
last_mnem = 'VMINPS'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VMINCSDZrm':
|
|
last_mnem = 'VMINSD'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VMINCSSZrm':
|
|
last_mnem = 'VMINSS'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VMOV64toPQIZrm':
|
|
last_mnem = 'VMOVQ'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VPERMIL2PDYrr_REV':
|
|
last_mnem = 'VPERMILPD'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VPERMIL2PSYrr_REV':
|
|
last_mnem = 'VPERMILPS'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VCVTSD2SI64Zrm_Int':
|
|
last_mnem = 'VCVTSD2SI'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VCVTSD2SSrm_Int':
|
|
last_mnem = 'VCVTSD2SS'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VCVTSS2SI64Zrm_Int':
|
|
last_mnem = 'VCVTSS2SI'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VCVTTSD2SI64Zrm_Int':
|
|
last_mnem = 'VCVTTSD2SI'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
elif insn == 'VCVTTSS2SI64Zrm_Int':
|
|
last_mnem = 'VCVTTSS2SI'
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
|
|
elif insn.startswith('VFMSUBADD'):
|
|
if insn[len('VFMSUBADD')].isdigit():
|
|
last_mnem = insn[:len('VFMSUBADD123xy')]
|
|
else:
|
|
last_mnem = insn[:len('VFMSUBADDSS')]
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
|
|
elif insn.startswith('VFMADDSUB'):
|
|
if insn[len('VFMADDSUB')].isdigit():
|
|
last_mnem = insn[:len('VFMADDSUB123xy')]
|
|
else:
|
|
last_mnem = insn[:len('VFMADDSUBSS')]
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
|
|
elif insn.startswith('VFMADD'):
|
|
if insn[len('VFMADD')].isdigit():
|
|
last_mnem = insn[:len('VFMADD123PD')]
|
|
else:
|
|
last_mnem = insn[:len('VFMADDPD')]
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
|
|
elif insn.startswith('VFMSUB'):
|
|
if insn[len('VFMSUB')].isdigit():
|
|
last_mnem = insn[:len('VFMSUB123PD')]
|
|
else:
|
|
last_mnem = insn[:len('VFMSUBPD')]
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
|
|
elif insn.startswith('VFNMADD'):
|
|
if insn[len('VFNMADD')].isdigit():
|
|
last_mnem = insn[:len('VFNMADD123xy')]
|
|
else:
|
|
last_mnem = insn[:len('VFNMADDSS')]
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
|
|
elif insn.startswith('VFNMSUB'):
|
|
if insn[len('VFNMSUB')].isdigit():
|
|
last_mnem = insn[:len('VFNMSUB123xy')]
|
|
else:
|
|
last_mnem = insn[:len('VFNMSUBSS')]
|
|
print_entry(arch.upper(), insn, last_mnem, mapping, False)
|
|
'''
|
|
|
|
if insn in insn_id_list:
|
|
# trust old mapping table
|
|
last_mnem = insn_id_list[insn].upper()
|
|
print_entry(arch, insn, insn_id_list[insn].upper(), mapping, False)
|
|
else:
|
|
#pass
|
|
# the last option when we cannot find mnem: use the last good mnem
|
|
print_entry(arch, insn, last_mnem, mapping, True)
|