capstone/suite/test_corpus3.py
Rot127 9c5b48b57f
AArch64 update to LLVM 18 (#2298)
* Run clang-format

* Remove arm.h header from AArch64 files

* Update all AArch64 module files to LLVM-18.

* Add check if the differs save file is up-to-date with the current files.

* Add new generator for MC test trnaslation.

* Fix warnings

* Update generated AsmWriter files

* Remove unused variable

* Change MCPhysReg type to int16_t as LLVM 18 dictates.

With LLVM 18 the MCPhysReg value's type is changed to int16_t.
If we update modules to LLVM 18, they will generate
compiler warnings that uint16_t* should not be casted to int16_t*.

This makes changing the all tables to int16_t necessary, because the alternative is
to duplicate all MCPhysReg related code. Which is even worse.

* Assign enum values to raw_struct member

* Add printAdrAdrpLabel def

* Add header to regression test files.

* Write files to build dir and ignore more parsing errors.

* Fix parsing of MC test files.

* Reset parser after every block

* Add write and patch header step.

* Add and update MC tests for AArch64

* Fix clang-tidy warnings

* Don't warn about padding issues.

They break automatically initialized structs we can not change easily.

* Fix: Incorrect access of LLVM instruction descriptions.

* Initialize DecoderComplete flag

* Add more mapping and flag details

* Add function to get MCInstDesc from table

* Fix incorrect memory operand access types.

* Fix test where memory was not written, ut only read.

* Attempt to fix Windows build

* Fix 2268

The enum values were different and hence lead to different decoding.

* Refactor SME operands.

- Splits SME operands in Matrix and Predicate operands.
- Fixes general problems of incorrect detections with
the vector select/index operands of predicate registers.
- Simplifies code.

* Fix up typo in WRITE

* Print actual path to struct fields

* Add Registers of SME operands to the reg-read list

* Add tests for SME operands.

* Use Capstone reg enum for comparison

* Fix tests: 'Vector arra...' to 'operands[x].vas'

* Add the developer fuzz option.

* Fix Python bindings for SME operands

* Fix variable shadowing.

* Fix clang-tidy warnings

* Add missing break.

* Fix varg usage

* Brackets for case

* Handle AArch64_OP_GROUP_AdrAdrpLabel

* Fix endian issue with fuzzing start bytes

* Move previous sme.pred to it's own operand type.

* Fix calculation for imm ranges

* Print list member flag

* Fix up operand strings for cstest

* Do only a shallow clone of the cmocka stable branch

* Fix: Don't categorize ZT0 as a SME matrix operand.

* Remove unused code.

* Add flag to distinguish Vn and Qn registers.

* Add all registers to detail struct, even if emitted in the asm text

* Fix: Increment op count after each list member is added.

* Remove implicit write to NZCV for MSR Imm instructions.

* Handle several alias operands.

* Add details for zero alias with za0.h

* Add SME tile to write list if written

* Add write access flags to operands which are zeroed.

* Add SME tests of #2285

* Fix tests with latest syntax changes.

* Fix segfault if memory operand is only a label without register.

* Fix python bindings

* Attempt to fix clang-tidy warning for some configurations.

* Add missing test file (accidentially blocked by gitignore.)

* Print clang-tidy version before linting.

* Update differ save file

* Formatting

* Use clang-tidy-15 as if possible.

* Remove search patterns for MC tests, since they need to be reworked anyways.

* Enum to upper case change

* Add information to read the OSS fuzz result.

* Fix special case of SVE2 operands.

Apparently ZT0 registers can an index attached,
get which is BOUND to it. We have no "index for reg" field.
So it is simply saved as an immediate.

* Handle LLVM expressions without asserts.

* Ensure choices are always saved.

* OP_GROUP enums can't be all upper case because they contain type information.

* Fix compatibility header patching

* Update saved_choices.json

* Allow mode == None in test_corpus
2024-07-08 10:28:54 +08:00

180 lines
7.5 KiB
Python
Executable File

#!/usr/bin/env python3
# Test tool to compare Capstone output with llvm-mc. By Nguyen Anh Quynh, 2014
import sys
import os
from capstone import *
from pathlib import Path
import codecs
def test_file(fname):
print("Test %s" % fname)
fpath = Path(fname) if isinstance(fname, str) else fname
if fpath.is_dir():
if fpath.exists() is False:
return
for f in fpath.iterdir():
test_file(f)
return
with fpath.open() as f:
lines = f.readlines()
if not lines[0].startswith('# '):
print("ERROR: decoding information is missing")
return
# skip '# ' at the front, then split line to get out hexcode
# Note: option can be '', or 'None'
# print lines[0]
# print lines[0][2:].split(', ')
(arch, mode, option) = lines[0][2:].split(', ')
mode = mode.replace(' ', '')
option = option.strip()
archs = {
"CS_ARCH_ARM": CS_ARCH_ARM,
"CS_ARCH_AARCH64": CS_ARCH_AARCH64,
"CS_ARCH_MIPS": CS_ARCH_MIPS,
"CS_ARCH_PPC": CS_ARCH_PPC,
"CS_ARCH_SPARC": CS_ARCH_SPARC,
"CS_ARCH_SYSZ": CS_ARCH_SYSZ,
"CS_ARCH_X86": CS_ARCH_X86,
"CS_ARCH_XCORE": CS_ARCH_XCORE,
"CS_ARCH_RISCV": CS_ARCH_RISCV,
"CS_ARCH_TRICORE": CS_ARCH_TRICORE,
"CS_ARCH_ALPHA": CS_ARCH_ALPHA,
"CS_ARCH_HPPA": CS_ARCH_HPPA,
}
modes = {
"CS_MODE_16": CS_MODE_16,
"CS_MODE_32": CS_MODE_32,
"CS_MODE_64": CS_MODE_64,
"CS_MODE_MIPS32": CS_MODE_MIPS32,
"CS_MODE_MIPS64": CS_MODE_MIPS64,
"0": CS_MODE_ARM,
"CS_MODE_ARM": CS_MODE_ARM,
"CS_MODE_THUMB": CS_MODE_THUMB,
"CS_MODE_ARM+CS_MODE_V8": CS_MODE_ARM + CS_MODE_V8,
"CS_MODE_THUMB+CS_MODE_V8": CS_MODE_THUMB + CS_MODE_V8,
"CS_MODE_THUMB+CS_MODE_MCLASS": CS_MODE_THUMB + CS_MODE_MCLASS,
"CS_MODE_THUMB+CS_MODE_V8+CS_MODE_MCLASS": CS_MODE_THUMB+CS_MODE_V8+CS_MODE_MCLASS,
"CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN,
"CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN,
"CS_MODE_64+CS_MODE_LITTLE_ENDIAN": CS_MODE_64 + CS_MODE_LITTLE_ENDIAN,
"CS_MODE_64+CS_MODE_BIG_ENDIAN": CS_MODE_64 + CS_MODE_BIG_ENDIAN,
"CS_MODE_MIPS32+CS_MODE_MICRO": CS_MODE_MIPS32 + CS_MODE_MICRO,
"CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32 + CS_MODE_MICRO + CS_MODE_BIG_ENDIAN,
"CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO": CS_MODE_MIPS32 + CS_MODE_MICRO + CS_MODE_BIG_ENDIAN,
"CS_MODE_BIG_ENDIAN+CS_MODE_V9": CS_MODE_BIG_ENDIAN + CS_MODE_V9,
"CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN,
"CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS32 + CS_MODE_LITTLE_ENDIAN,
"CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS64 + CS_MODE_LITTLE_ENDIAN,
"CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN,
"CS_MODE_RISCV32": CS_MODE_RISCV32,
"CS_MODE_RISCV64": CS_MODE_RISCV64,
"CS_MODE_TRICORE_110": CS_MODE_TRICORE_110,
"CS_MODE_TRICORE_120": CS_MODE_TRICORE_120,
"CS_MODE_TRICORE_130": CS_MODE_TRICORE_130,
"CS_MODE_TRICORE_131": CS_MODE_TRICORE_131,
"CS_MODE_TRICORE_160": CS_MODE_TRICORE_160,
"CS_MODE_TRICORE_161": CS_MODE_TRICORE_161,
"CS_MODE_TRICORE_162": CS_MODE_TRICORE_162,
"CS_MODE_BIG_ENDIAN+CS_MODE_QPX": CS_MODE_BIG_ENDIAN+CS_MODE_QPX,
"CS_MODE_HPPA_11": CS_MODE_HPPA_11,
"CS_MODE_HPPA_20": CS_MODE_HPPA_20,
"CS_MODE_HPPA_20W": CS_MODE_HPPA_20W,
}
mc_modes = {
("CS_ARCH_X86", "CS_MODE_32"): 0,
("CS_ARCH_X86", "CS_MODE_64"): 1,
("CS_ARCH_ARM", "CS_MODE_ARM"): 2,
("CS_ARCH_ARM", "CS_MODE_THUMB"): 3,
("CS_ARCH_ARM", "CS_MODE_ARM+CS_MODE_V8"): 4,
("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_V8"): 5,
("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_MCLASS"): 6,
("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_V8+CS_MODE_MCLASS"): 7,
("CS_ARCH_AARCH64", "0"): 8,
("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN"): 9,
("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO"): 10,
("CS_ARCH_MIPS", "CS_MODE_MIPS64"): 11,
("CS_ARCH_MIPS", "CS_MODE_MIPS32"): 12,
("CS_ARCH_MIPS", "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN"): 13,
("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): 14,
("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO"): 14,
("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): 15,
("CS_ARCH_SPARC", "CS_MODE_BIG_ENDIAN"): 16,
("CS_ARCH_SPARC", "CS_MODE_BIG_ENDIAN+CS_MODE_V9"): 17,
("CS_ARCH_SYSZ", "0"): 18,
("CS_ARCH_XCORE", "0"): 19,
("CS_ARCH_MIPS", "CS_MODE_MIPS32R6+CS_MODE_BIG_ENDIAN"): 20,
("CS_ARCH_MIPS", "CS_MODE_MIPS32R6+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): 21,
("CS_ARCH_MIPS", "CS_MODE_MIPS32R6"): 22,
("CS_ARCH_MIPS", "CS_MODE_MIPS32R6+CS_MODE_MICRO"): 23,
("CS_ARCH_M68K", "0"): 24,
("CS_ARCH_M680X", "CS_MODE_M680X_6809"): 25,
("CS_ARCH_EVM", "0"): 26,
("CS_ARCH_BPF", "CS_MODE_LITTLE_ENDIAN+CS_MODE_BPF_CLASSIC"): 30,
("CS_ARCH_BPF", "CS_MODE_LITTLE_ENDIAN+CS_MODE_BPF_EXTENDED"): 31,
("CS_ARCH_BPF", "CS_MODE_BIG_ENDIAN+CS_MODE_BPF_CLASSIC"): 32,
("CS_ARCH_BPF", "CS_MODE_BIG_ENDIAN+CS_MODE_BPF_EXTENDED"): 33,
("CS_ARCH_RISCV", "CS_MODE_RISCV32"): 45,
("CS_ARCH_RISCV", "CS_MODE_RISCV64"): 46,
("CS_ARCH_TRICORE", "CS_MODE_TRICORE_110"): 47,
("CS_ARCH_TRICORE", "CS_MODE_TRICORE_120"): 48,
("CS_ARCH_TRICORE", "CS_MODE_TRICORE_130"): 49,
("CS_ARCH_TRICORE", "CS_MODE_TRICORE_131"): 50,
("CS_ARCH_TRICORE", "CS_MODE_TRICORE_160"): 51,
("CS_ARCH_TRICORE", "CS_MODE_TRICORE_161"): 52,
("CS_ARCH_TRICORE", "CS_MODE_TRICORE_162"): 53,
("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN+CS_MODE_QPX"): 54,
("CS_ARCH_ALPHA", "CS_MODE_LITTLE_ENDIAN"): 55,
("CS_ARCH_ALPHA", "CS_MODE_BIG_ENDIAN"): 56,
("CS_ARCH_HPPA", "CS_MODE_HPPA_11+CS_MODE_BIG_ENDIAN"): 57,
("CS_ARCH_HPPA", "CS_MODE_HPPA_20+CS_MODE_BIG_ENDIAN"): 58,
}
# if not option in ('', 'None'):
# print archs[arch], modes[mode], options[option]
for line in lines[1:]:
# ignore all the input lines having # in front.
if line.startswith('#'):
continue
if line.startswith('// '):
line = line[3:]
# print("Check %s" %line)
code = line.split(' = ')[0]
if len(code) < 2:
continue
if code.find('//') >= 0:
continue
hex_code = code.replace('0x', '').replace(',', '').replace(' ', '').strip()
try:
hex_data = bytes.fromhex(hex_code)
fpath = Path("fuzz/corpus/%s_%s" % (os.path.basename(fname), hex_code))
if fpath.parent.exists() is False:
fpath.parent.mkdir(parents=True)
with fpath.open('wb') as fout:
if (arch, mode) not in mc_modes:
print("fail", arch, mode)
if mode == "None":
mode = "0"
fout.write(mc_modes[(arch, mode)].to_bytes(1, 'little'))
fout.write(hex_data)
except Exception as e:
print(f"skipping: {hex_code} with: {e}")
continue
if __name__ == '__main__':
if len(sys.argv) == 1:
fnames = sys.stdin.readlines()
for fname in fnames:
test_file(fname.strip())
else:
# print("Usage: ./test_mc.py <input-file.s.cs>")
test_file(sys.argv[1])