AArch64 compatibility header (#2321)

This commit is contained in:
Rot127 2024-05-31 12:07:03 +00:00 committed by GitHub
parent 3a6331b4c2
commit 1a6921f5cc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 4833 additions and 15 deletions

View File

@ -100,6 +100,12 @@ jobs:
cp libcapstone.* ../tests/
cp test_* ../tests/
- name: "Compatibility header test build"
run: |
cd "$(git rev-parse --show-toplevel)/suite/auto-sync/c_tests/"
clang -lcapstone src/test_arm64_compatibility_header.c -o test_arm64_compatibility_header
./test_arm64_compatibility_header
- name: cstool - reaches disassembler engine
run: |
sh suite/run_invalid_cstool.sh

View File

@ -157,6 +157,7 @@ set(HEADERS_ENGINE
set(HEADERS_COMMON
include/capstone/aarch64.h
include/capstone/arm64.h
include/capstone/arm.h
include/capstone/capstone.h
include/capstone/cs_operand.h

View File

@ -84,16 +84,30 @@ With all that said, we hope you enjoy the new release!
`ARM64` was everywhere renamed to `AArch64`. This is a necessity to ensure that the update scripts stay reasonably simple.
Capstone was very inconsistent with the naming before (sometimes `AArch64` sometimes `ARM64`).
Because Capstone uses a huge amount of LLVM code, we renamed everything to `AArch64`. This reduces complexity enormously.
Because Capstone uses a huge amount of LLVM code, we renamed everything to `AArch64`. This reduces complexity enormously because it follows the naming of LLVM.
Because this would completely break maintaining Capstone `v6` and `pre-v6` in a project, we added macros for meta-programming.
Because this would completely break maintaining Capstone `v6` and `pre-v6` in a project, we added two solutions:
If you need to support the previous version of Capstone as well, you can use those macros (see below helper scripts).
Also, your can exclude/include code by checking `CS_NEXT_VERSION < 6`.
1. Make `arm64.h` a compatibility header which merely maps every member to the one in the `aarch64.h` header.
2. Macros for meta-programming which select the right name.
The following `sed` commands in a sh script should ease the renaming from `ARM64` to `AArch64` a lot.
We will continue to maintain both solutions.
So if you need to support the previous version of Capstone as well, you can use either of the solutions.
Replacing with version sensitive macros:
_Compatibility header_
If you want to use the compatibility header and stick with the `ARM64` naming, you can define `CAPSTONE_AARCH64_COMPAT_HEADER` before including `capstone.h`.
```c
#define CAPSTONE_AARCH64_COMPAT_HEADER
#include <capstone/capstone.h>
// Your code...
```
_Meta programming macros_
The following `sed` commands in a sh script should ease the replacement of `ARM64` with the macros a lot.
```sh
#!/bin/sh

4481
include/capstone/arm64.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -61,6 +61,7 @@ extern "C" {
#define CS_VERSION_MINOR CS_API_MINOR
#define CS_VERSION_EXTRA 0
#ifndef CAPSTONE_AARCH64_COMPAT_HEADER
/// Macro for meta programming.
/// Meant for projects using Capstone and need to support multiple
/// versions of it.
@ -119,6 +120,7 @@ extern "C" {
#define CS_aarch64_shifter() aarch64_shifter
#define CS_aarch64_vas() AArch64Layout_VectorLayout
#endif
#endif // CAPSTONE_AARCH64_COMPAT_HEADER
/// Macro to create combined version which can be compared to
/// result of cs_version() API.
@ -133,7 +135,11 @@ typedef size_t csh;
/// Architecture type
typedef enum cs_arch {
CS_ARCH_ARM = 0, ///< ARM architecture (including Thumb, Thumb-2)
CS_ARCH_AARCH64, ///< AArch64
#ifdef CAPSTONE_AARCH64_COMPAT_HEADER
CS_ARCH_ARM64 = 1, ///< ARM64
#else
CS_ARCH_AARCH64 = 1, ///< AArch64
#endif
CS_ARCH_MIPS, ///< Mips architecture
CS_ARCH_X86, ///< X86 architecture (including x86 & x86-64)
CS_ARCH_PPC, ///< PowerPC architecture
@ -358,7 +364,11 @@ typedef struct cs_opt_skipdata {
#include "arm.h"
#ifdef CAPSTONE_AARCH64_COMPAT_HEADER
#include "arm64.h"
#else
#include "aarch64.h"
#endif
#include "m68k.h"
#include "mips.h"
#include "ppc.h"
@ -404,7 +414,11 @@ typedef struct cs_detail {
/// Architecture-specific instruction info
union {
cs_x86 x86; ///< X86 architecture, including 16-bit, 32-bit & 64-bit mode
cs_aarch64 aarch64; ///< AARCH64 architecture (aka AArch64)
#ifdef CAPSTONE_AARCH64_COMPAT_HEADER
cs_arm64 arm64;
#else
cs_aarch64 aarch64; ///< AArch6464 architecture (aka ARM64)
#endif
cs_arm arm; ///< ARM architecture (including Thumb/Thumb2)
cs_m68k m68k; ///< M68K architecture
cs_mips mips; ///< MIPS architecture
@ -532,6 +546,9 @@ CAPSTONE_EXPORT
void CAPSTONE_API cs_arch_register_arm(void);
CAPSTONE_EXPORT
void CAPSTONE_API cs_arch_register_aarch64(void);
#ifdef CAPSTONE_AARCH64_COMPAT_HEADER
#define cs_arch_register_aarch64 cs_arch_register_arm64
#endif
CAPSTONE_EXPORT
void CAPSTONE_API cs_arch_register_mips(void);
CAPSTONE_EXPORT

View File

@ -0,0 +1,6 @@
<!--
Copyright © 2024 Rot127 <unisono@quyllur.org>
SPDX-License-Identifier: BSD-3
-->
Compilation tests for the generated source code.

View File

@ -0,0 +1,40 @@
// SPDX-FileCopyrightText: 2024 Rot127 <unisono@quyllur.org>
// SPDX-License-Identifier: BSD-3.0-Clause
#include <stdio.h>
#include <inttypes.h>
#define CAPSTONE_AARCH64_COMPAT_HEADER
#include <capstone/capstone.h>
int main(void)
{
csh handle;
if (cs_open(CS_ARCH_ARM64, CS_MODE_BIG_ENDIAN, &handle) != CS_ERR_OK) {
printf("cs_open failed\n");
return -1;
}
cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON);
cs_insn *insn;
uint8_t bytes[] = "0x1a,0x48,0xa0,0xf8";
size_t count = cs_disasm(handle, bytes, sizeof(bytes), 0x1000, 1, &insn);
if (count > 0) {
printf("0x%" PRIx64 ":\t%s\t\t%s\n", insn[0].address,
insn[0].mnemonic, insn[0].op_str);
printf("A register = %s\n", cs_reg_name(handle, insn[0].detail->arm64.operands[0].reg));
printf("An imm = 0x%" PRIx64 "\n", insn[0].detail->arm64.operands[1].imm);
cs_free(insn, count);
} else {
printf("ERROR: Failed to disassemble given code!\n");
cs_close(&handle);
return -1;
}
cs_close(&handle);
return 0;
}

View File

@ -15,7 +15,7 @@ from pathlib import Path
from autosync.cpptranslator.Configurator import Configurator
from autosync.cpptranslator.CppTranslator import Translator
from autosync.HeaderPatcher import HeaderPatcher
from autosync.HeaderPatcher import CompatHeaderBuilder, HeaderPatcher
from autosync.Helper import check_py_version, convert_loglevel, fail_exit, get_path
from autosync.IncGenerator import IncGenerator
@ -88,6 +88,13 @@ class ASUpdater:
if patcher.patch_header():
# Save the path. This file should not be moved.
patched.append(file)
if self.arch == "AArch64":
# Update the compatibility header
builder = CompatHeaderBuilder(
aarch64_h=main_header,
arm64_h=get_path("{CS_INCLUDE_DIR}").joinpath(f"arm64.h"),
)
builder.generate_aarch64_compat_header()
return patched
def copy_files(self, path: Path, dest: Path) -> None:

View File

@ -15,11 +15,19 @@ def parse_args() -> argparse.Namespace:
prog="PatchHeaders",
description="Patches generated enums into the main arch header file.",
)
parser.add_argument("--header", dest="header", help="Path header file.", type=Path)
parser.add_argument("--inc", dest="inc", help="Path inc file.", type=Path)
parser.add_argument(
"--header", dest="header", help="Path header file.", type=Path, required=True
"--aarch64", dest="aarch64", help="aarch64.h header file location", type=Path
)
parser.add_argument(
"--inc", dest="inc", help="Path inc file.", type=Path, required=True
"--arm64", dest="arm64", help="arm64.h header file location", type=Path
)
parser.add_argument(
"-c", dest="compat", help="Generate compatibility header", action="store_true"
)
parser.add_argument(
"-p", dest="patch", help="Patch inc file into header", action="store_true"
)
arguments = parser.parse_args()
return arguments
@ -104,7 +112,147 @@ class HeaderPatcher:
return True
class CompatHeaderBuilder:
def __init__(self, aarch64_h: Path, arm64_h: Path):
self.aarch64_h = aarch64_h
self.arm64_h = arm64_h
def replace_typedef_struct(self, aarch64_lines: list[str]) -> list[str]:
output = list()
typedef = ""
for line in aarch64_lines:
if typedef:
if not re.search(r"^}\s[\w_]+;", line):
# Skip struct content
continue
type_name = re.findall(r"[\w_]+", line)[0]
output.append(
f"typedef {type_name} {re.sub('aarch64','arm64', type_name)};\n"
)
typedef = ""
continue
if re.search(f"^typedef\s+(struct|union)", line):
typedef = line
continue
output.append(line)
return output
def replace_typedef_enum(self, aarch64_lines: list[str]) -> list[str]:
output = list()
typedef = ""
for line in aarch64_lines:
if typedef:
if not re.search(r"^}\s[\w_]+;", line):
# Replace name
if "AArch64" not in line:
output.append(line)
continue
entry_name: str = re.findall(r"AArch64[\w_]+", line)[0]
arm64_name = entry_name.replace("AArch64", "ARM64")
patched_line = re.sub(
r"AArch64.+", f"{arm64_name} = {entry_name},", line
)
output.append(patched_line)
continue
# We still have LLVM and CS naming conventions mixed
p = re.sub(r"aarch64", "arm64", line)
p = re.sub(r"AArch64", "ARM64", p)
output.append(p)
typedef = ""
continue
if re.search(f"^typedef\s+enum", line):
typedef = line
output.append("typedef enum {\n")
continue
output.append(line)
return output
def remove_comments(self, aarch64_lines: list[str]) -> list[str]:
output = list()
for line in aarch64_lines:
if re.search(r"^\s*//", line) and "// SPDX" not in line:
continue
output.append(line)
return output
def replace_aarch64(self, aarch64_lines: list[str]) -> list[str]:
output = list()
in_typedef = False
for line in aarch64_lines:
if in_typedef:
if re.search(r"^}\s[\w_]+;", line):
in_typedef = False
output.append(line)
continue
if re.search(f"^typedef", line):
in_typedef = True
output.append(line)
continue
output.append(re.sub(r"AArch64", "ARM64", line))
return output
def replace_include_guards(self, aarch64_lines: list[str]) -> list[str]:
output = list()
for line in aarch64_lines:
if not re.search(r"^#(ifndef|define)", line):
output.append(line)
continue
output.append(re.sub(r"AARCH64", "ARM64", line))
return output
def inject_aarch64_header(self, aarch64_lines: list[str]) -> list[str]:
output = list()
header_inserted = False
for line in aarch64_lines:
if re.search(r"^#include", line):
if not header_inserted:
output.append("#include <capstone/aarch64.h>\n")
header_inserted = True
output.append(line)
return output
def generate_aarch64_compat_header(self) -> bool:
"""
Translates the aarch64.h header into the arm64.h header and renames all aarch64 occurrences.
It does simple regex matching and replacing.
"""
log.info("Generate compatibility header")
with open(self.aarch64_h) as f:
aarch64 = f.readlines()
patched = self.replace_typedef_struct(aarch64)
patched = self.replace_typedef_enum(patched)
patched = self.remove_comments(patched)
patched = self.replace_aarch64(patched)
patched = self.replace_include_guards(patched)
patched = self.inject_aarch64_header(patched)
with open(self.arm64_h, "w+") as f:
f.writelines(patched)
if __name__ == "__main__":
args = parse_args()
patcher = HeaderPatcher(args.header, args.inc)
patcher.patch_header()
if (not args.patch and not args.compat) or (args.patch and args.compat):
print("You need to specify either -c or -p")
exit(1)
if args.compat and not (args.aarch64 and args.arm64):
print(
"Generating the arm64 compatibility header requires --arm64 and --aarch64"
)
exit(1)
if args.patch and not (args.inc and args.header):
print("Patching headers requires --inc and --header")
exit(1)
if args.patch:
patcher = HeaderPatcher(args.header, args.inc)
patcher.patch_header()
exit(0)
builder = CompatHeaderBuilder(args.aarch64, args.arm64)
builder.generate_aarch64_compat_header()

View File

@ -0,0 +1,49 @@
// SPDX-FileCopyrightText: 2024 Rot127 <unisono@quyllur.org>
// SPDX-License-Identifier: BSD-3
#ifndef CAPSTONE_AARCH64_H
#define CAPSTONE_AARCH64_H
#include "cs_operand.h"
inline static unsigned AArch64CC_getNZCVToSatisfyCondCode(AArch64CC_CondCode Code)
{
// NZCV flags encoded as expected by ccmp instructions, ARMv8 ISA 5.5.7.
enum { N = 8, Z = 4, C = 2, V = 1 };
switch (Code) {
default:
assert(0 && "Unknown condition code");
case AArch64CC_EQ:
return Z; // Z == 1
}
}
typedef union {
aarch64_dbnxs dbnxs;
aarch64_exactfpimm exactfpimm;
} aarch64_sysop_imm;
typedef enum aarch64_op_type {
AArch64_OP_SYSALIAS = CS_OP_SPECIAL + 27, // Equal Equal
AArch64_OP_SYSALIASI,
AArch64_OP_SYSALIASII = 0,
AArch64_OP_SYSALIASIII, // Comment
} aarch64_op_type;
#define MAX_AARCH64_OPS 8
/// Instruction structure
typedef struct cs_aarch64 {
AArch64CC_CondCode cc; ///< conditional code for this insn
bool update_flags; ///< does this insn update flags?
bool post_index; ///< only set if writeback is 'True', if 'False' pre-index, otherwise post.
bool is_doing_sme; ///< True if a SME operand is currently edited.
/// Number of operands of this instruction,
/// or 0 when instruction has no operand.
uint8_t op_count;
cs_aarch64_op operands[MAX_AARCH64_OPS]; ///< operands for this instruction.
} cs_aarch64;
#endif

View File

@ -0,0 +1,34 @@
// SPDX-FileCopyrightText: 2024 Rot127 <unisono@quyllur.org>
// SPDX-License-Identifier: BSD-3
#ifndef CAPSTONE_ARM64_H
#define CAPSTONE_ARM64_H
#include <capstone/aarch64.h>
#include "cs_operand.h"
inline static unsigned ARM64CC_getNZCVToSatisfyCondCode(ARM64CC_CondCode Code)
{
enum { N = 8, Z = 4, C = 2, V = 1 };
switch (Code) {
default:
assert(0 && "Unknown condition code");
case ARM64CC_EQ:
return Z; // Z == 1
}
}
typedef aarch64_sysop_imm arm64_sysop_imm;
typedef enum {
ARM64_OP_SYSALIAS = AArch64_OP_SYSALIAS,
ARM64_OP_SYSALIASI = AArch64_OP_SYSALIASI,
ARM64_OP_SYSALIASII = AArch64_OP_SYSALIASII,
ARM64_OP_SYSALIASIII = AArch64_OP_SYSALIASIII,
} arm64_op_type;
#define MAX_ARM64_OPS 8
typedef cs_aarch64 cs_arm64;
#endif

View File

@ -3,7 +3,7 @@
import unittest
from autosync.HeaderPatcher import HeaderPatcher
from autosync.HeaderPatcher import CompatHeaderBuilder, HeaderPatcher
from autosync.Helper import get_path
@ -15,6 +15,10 @@ class TestHeaderPatcher(unittest.TestCase):
get_path("{HEADER_PATCHER_TEST_INC_FILE}"),
write_file=False,
)
cls.compat_gen = CompatHeaderBuilder(
get_path("{HEADER_GEN_TEST_AARCH64_FILE}"),
get_path("{HEADER_GEN_TEST_ARM64_OUT_FILE}"),
)
def test_header_patching(self):
self.hpatcher.patch_header()
@ -45,3 +49,10 @@ class TestHeaderPatcher(unittest.TestCase):
"\n"
),
)
def test_compat_header_gen(self):
self.compat_gen.generate_aarch64_compat_header()
with open(get_path("{HEADER_GEN_TEST_ARM64_FILE}")) as f:
correct = f.read()
with open(get_path("{HEADER_GEN_TEST_ARM64_OUT_FILE}")) as f:
self.assertEqual(f.read(), correct)

View File

@ -22,6 +22,9 @@
"{CS_CLANG_FORMAT_FILE}": "{CS_ROOT}/.clang-format",
"{HEADER_PATCHER_TEST_HEADER_FILE}": "{AUTO_SYNC_SRC}/Tests/test_header.h",
"{HEADER_PATCHER_TEST_INC_FILE}": "{AUTO_SYNC_SRC}/Tests/test_include.inc",
"{HEADER_GEN_TEST_AARCH64_FILE}": "{AUTO_SYNC_SRC}/Tests/test_aarch64_header.h",
"{HEADER_GEN_TEST_ARM64_FILE}": "{AUTO_SYNC_SRC}/Tests/test_arm64_header.h",
"{HEADER_GEN_TEST_ARM64_OUT_FILE}": "{AUTO_SYNC_SRC}/Tests/test_arm64_header.h.out",
"{DIFFER_TEST_DIR}": "{CPP_TRANSLATOR_TEST_DIR}/Differ/",
"{DIFFER_TEST_CONFIG_FILE}": "{DIFFER_TEST_DIR}/test_arch_config.json",
"{DIFFER_TEST_OLD_SRC_DIR}": "{DIFFER_TEST_DIR}/old_src/",
@ -33,7 +36,8 @@
"{C_INC_OUT_DIR}",
"{CPP_INC_OUT_DIR}",
"{CPP_TRANSLATOR_TRANSLATION_OUT_DIR}",
"{CPP_TRANSLATOR_DIFF_OUT_DIR}"
"{CPP_TRANSLATOR_DIFF_OUT_DIR}",
"{HEADER_GEN_TEST_ARM64_OUT_FILE}"
],
"ignore_missing": [
"{DIFFER_TEST_PERSISTENCE_FILE}"