mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-27 07:31:28 +00:00
b71edfaa4e
This is the first commit in a series that will reformat all the python files in the LLVM repository. Reformatting is done with `black`. See more information here: https://discourse.llvm.org/t/rfc-document-and-standardize-python-code-style Reviewed By: jhenderson, JDevlieghere, MatzeB Differential Revision: https://reviews.llvm.org/D150545
154 lines
4.7 KiB
Python
Executable File
154 lines
4.7 KiB
Python
Executable File
#!/usr/bin/env python
|
|
from __future__ import print_function
|
|
|
|
"""
|
|
Helper script to print out the raw content of an ELF section.
|
|
Example usages:
|
|
```
|
|
# print out as bits by default
|
|
extract-section.py .text --input-file=foo.o
|
|
```
|
|
```
|
|
# read from stdin and print out in hex
|
|
cat foo.o | extract-section.py -h .text
|
|
```
|
|
This is merely a wrapper around `llvm-readobj` that focuses on the binary
|
|
content as well as providing more formatting options.
|
|
"""
|
|
|
|
# Unfortunately reading binary from stdin is not so trivial in Python...
|
|
def read_raw_stdin():
|
|
import sys
|
|
|
|
if sys.version_info >= (3, 0):
|
|
reading_source = sys.stdin.buffer
|
|
else:
|
|
# Windows will always read as string so we need some
|
|
# special handling
|
|
if sys.platform == "win32":
|
|
import os, msvcrt
|
|
|
|
msvcrt.setformat(sys.stdin.fileno(), os.O_BINARY)
|
|
reading_source = sys.stdin
|
|
return reading_source.read()
|
|
|
|
|
|
def get_raw_section_dump(readobj_path, section_name, input_file):
|
|
import subprocess
|
|
|
|
cmd = [
|
|
readobj_path,
|
|
"--elf-output-style=GNU",
|
|
"--hex-dump={}".format(section_name),
|
|
input_file,
|
|
]
|
|
proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
|
|
|
if input_file == "-":
|
|
# From stdin
|
|
out, _ = proc.communicate(input=read_raw_stdin())
|
|
else:
|
|
out, _ = proc.communicate()
|
|
|
|
return out.decode("utf-8") if type(out) is not str else out
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
# The default '-h' (--help) will conflict with our '-h' (hex) format
|
|
arg_parser = argparse.ArgumentParser(add_help=False)
|
|
arg_parser.add_argument(
|
|
"--readobj-path",
|
|
metavar="<executable path>",
|
|
type=str,
|
|
help="Path to llvm-readobj",
|
|
)
|
|
arg_parser.add_argument(
|
|
"--input-file",
|
|
metavar="<file>",
|
|
type=str,
|
|
help="Input object file, or '-' to read from stdin",
|
|
)
|
|
arg_parser.add_argument(
|
|
"section", metavar="<name>", type=str, help="Name of the section to extract"
|
|
)
|
|
# Output format
|
|
format_group = arg_parser.add_mutually_exclusive_group()
|
|
format_group.add_argument(
|
|
"-b",
|
|
dest="format",
|
|
action="store_const",
|
|
const="bits",
|
|
help="Print out in bits",
|
|
)
|
|
arg_parser.add_argument(
|
|
"--byte-indicator",
|
|
action="store_true",
|
|
help="Whether to print a '.' every 8 bits in bits printing mode",
|
|
)
|
|
arg_parser.add_argument(
|
|
"--bits-endian",
|
|
metavar="<little/big>",
|
|
type=str,
|
|
choices=["little", "big"],
|
|
help="Print out bits in specified endianness (little or big); defaults to big",
|
|
)
|
|
format_group.add_argument(
|
|
"-h",
|
|
dest="format",
|
|
action="store_const",
|
|
const="hex",
|
|
help="Print out in hexadecimal",
|
|
)
|
|
arg_parser.add_argument(
|
|
"--hex-width",
|
|
metavar="<# of bytes>",
|
|
type=int,
|
|
help="The width (in byte) of every element in hex printing mode",
|
|
)
|
|
|
|
arg_parser.add_argument("--help", action="help")
|
|
arg_parser.set_defaults(
|
|
format="bits",
|
|
tool_path="llvm-readobj",
|
|
input_file="-",
|
|
byte_indicator=False,
|
|
hex_width=4,
|
|
bits_endian="big",
|
|
)
|
|
args = arg_parser.parse_args()
|
|
|
|
raw_section = get_raw_section_dump(args.tool_path, args.section, args.input_file)
|
|
|
|
results = []
|
|
for line in raw_section.splitlines(False):
|
|
if line.startswith("Hex dump"):
|
|
continue
|
|
parts = line.strip().split(" ")[1:]
|
|
for part in parts[:4]:
|
|
# exclude any non-hex dump string
|
|
try:
|
|
val = int(part, 16)
|
|
if args.format == "bits":
|
|
# divided into bytes first
|
|
offsets = (24, 16, 8, 0)
|
|
if args.bits_endian == "little":
|
|
offsets = (0, 8, 16, 24)
|
|
for byte in [(val >> off) & 0xFF for off in offsets]:
|
|
for bit in [(byte >> off) & 1 for off in range(7, -1, -1)]:
|
|
results.append(str(bit))
|
|
if args.byte_indicator:
|
|
results.append(".")
|
|
elif args.format == "hex":
|
|
assert args.hex_width <= 4 and args.hex_width > 0
|
|
width_bits = args.hex_width * 8
|
|
offsets = [off for off in range(32 - width_bits, -1, -width_bits)]
|
|
mask = (1 << width_bits) - 1
|
|
format_str = "{:0" + str(args.hex_width * 2) + "x}"
|
|
for word in [(val >> i) & mask for i in offsets]:
|
|
results.append(format_str.format(word))
|
|
except:
|
|
break
|
|
print(" ".join(results), end="")
|