mirror of
https://github.com/avast/retdec.git
synced 2024-11-23 04:49:53 +00:00
Initial commit.
This commit is contained in:
commit
d9230cbc94
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/build/
|
33
.gitmodules
vendored
Normal file
33
.gitmodules
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
[submodule "deps/fileformat"]
|
||||
path = deps/fileformat
|
||||
url = https://github.com/avast-tl/fileformat
|
||||
[submodule "deps/fnc-patterns"]
|
||||
path = deps/fnc-patterns
|
||||
url = https://github.com/avast-tl/fnc-patterns
|
||||
[submodule "deps/googletest"]
|
||||
path = deps/googletest
|
||||
url = https://github.com/avast-tl/googletest
|
||||
[submodule "deps/libdwarf"]
|
||||
path = deps/libdwarf
|
||||
url = https://github.com/avast-tl/libdwarf
|
||||
[submodule "deps/llvm"]
|
||||
path = deps/llvm
|
||||
url = https://github.com/avast-tl/llvm
|
||||
[submodule "deps/pdbparser"]
|
||||
path = deps/pdbparser
|
||||
url = https://github.com/avast-tl/pdbparser
|
||||
[submodule "deps/retdec-config"]
|
||||
path = deps/retdec-config
|
||||
url = https://github.com/avast-tl/retdec-config
|
||||
[submodule "deps/tl-cpputils"]
|
||||
path = deps/tl-cpputils
|
||||
url = https://github.com/avast-tl/tl-cpputils
|
||||
[submodule "deps/demangler"]
|
||||
path = deps/demangler
|
||||
url = https://github.com/avast-tl/demangler
|
||||
[submodule "deps/ctypes"]
|
||||
path = deps/ctypes
|
||||
url = https://github.com/avast-tl/ctypes
|
||||
[submodule "deps/capstone2llvmir"]
|
||||
path = deps/capstone2llvmir
|
||||
url = https://github.com/avast-tl/capstone2llvmir
|
32
CMakeLists.txt
Normal file
32
CMakeLists.txt
Normal file
@ -0,0 +1,32 @@
|
||||
|
||||
cmake_minimum_required(VERSION 3.6)
|
||||
|
||||
project(retdec CXX)
|
||||
|
||||
# Set the default build type to 'Release'
|
||||
if (NOT CMAKE_BUILD_TYPE)
|
||||
set(default_build_type "Release")
|
||||
message(STATUS "Setting build type to '${default_build_type}' as none was specified.")
|
||||
set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE STRING "Choose the type of build." FORCE)
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
|
||||
|
||||
option(RETDEC_DOC "Build public API documentation (requires Doxygen)." OFF)
|
||||
option(RETDEC_TESTS "Build tests." OFF)
|
||||
|
||||
set(DEPS_TESTS ${RETDEC_TESTS})
|
||||
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/deps-config.cmake)
|
||||
|
||||
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/install-external.cmake)
|
||||
|
||||
add_subdirectory(deps)
|
||||
if(RETDEC_DOC)
|
||||
add_subdirectory(doc)
|
||||
endif()
|
||||
add_subdirectory(scripts)
|
||||
add_subdirectory(src)
|
||||
if(RETDEC_TESTS)
|
||||
add_subdirectory(tests)
|
||||
endif()
|
21
LICENSE
Normal file
21
LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2017 Avast Software
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is furnished to do
|
||||
so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
1367
LICENSE-THIRD-PARTY
Normal file
1367
LICENSE-THIRD-PARTY
Normal file
File diff suppressed because it is too large
Load Diff
119
README.md
Normal file
119
README.md
Normal file
@ -0,0 +1,119 @@
|
||||
## RetDec
|
||||
|
||||
[RetDec](https://retdec.com/) is a retargetable machine-code decompiler based on [LLVM](https://llvm.org/).
|
||||
|
||||
The decompiler is not limited to any particular target architecture, operating system, or executable file format:
|
||||
* Supported file formats: ELF, PE, Mach-O, COFF, AR (archive), Intel HEX, and raw machine code.
|
||||
* Supported architectures (32b only): Intel x86, ARM, MIPS, PIC32, and PowerPC.
|
||||
|
||||
Features:
|
||||
* Static analysis of executable files with detailed information.
|
||||
* Compiler and packer detection.
|
||||
* Loading and instruction decoding.
|
||||
* Signature-based removal of statically linked library code.
|
||||
* Extraction and utilization of debugging information (DWARF, PDB).
|
||||
* Reconstruction of instruction idioms.
|
||||
* Detection and reconstruction of C++ class hierarchies (RTTI, vtables).
|
||||
* Demangling of symbols from C++ binaries (GCC, MSVC, Borland).
|
||||
* Reconstruction of functions, types, and high-level constructs.
|
||||
* Integrated disassembler.
|
||||
* Output in two high-level languages: C and a Python-like language.
|
||||
* Generation of call graphs, control-flow graphs, and various statistics.
|
||||
|
||||
## Requirements
|
||||
|
||||
* A compiler supporting C++14
|
||||
* On Windows, only Microsoft Visual C++ is supported (version >= Visual Studio 2015 Update 2).
|
||||
* CMake (version >= 3.6)
|
||||
* Perl
|
||||
* On Windows, [Active Perl](https://www.activestate.com/activeperl) needs to be the first Perl in `PATH`, or it has to be provided to CMake using `CMAKE_PROGRAM_PATH` variable, e.g. `-DCMAKE_PROGRAM_PATH=/c/perl/bin`.
|
||||
* [GNU Bison](https://www.gnu.org/software/bison/), [Flex](https://www.gnu.org/software/flex/), [GNU Tar](https://www.gnu.org/software/tar/), `wget`, `sha256sum`.
|
||||
* On Windows, you can follow RetDec's Windows environment setup [guide](https://github.com/avast-tl/retdec/wiki/Windows-Environment) to help you get everything you need.
|
||||
|
||||
Additionally, to run the decompiler once it is built and installed the following tools are needed:
|
||||
* [GNU Bash](https://www.gnu.org/software/bash/), [UPX](https://upx.github.io/), [bc](https://www.gnu.org/software/bc/), [dot](http://www.graphviz.org/).
|
||||
* As before, you can follow RetDec's Windows environment setup [guide](https://github.com/avast-tl/retdec/wiki/Windows-Environment) to help you get everything you need on Windows.
|
||||
|
||||
## Build and Installation
|
||||
|
||||
* Recursively clone the repository (it contains submodules):
|
||||
* `git clone --recursive https://github.com/avast-tl/retdec`
|
||||
* Linux:
|
||||
* `cd retdec`
|
||||
* `mkdir build && cd build`
|
||||
* `cmake .. -DCMAKE_INSTALL_PREFIX=<path>`
|
||||
* `make && make install`
|
||||
* Windows:
|
||||
* Open MSBuild command prompt, or any terminal that is configured to run the `msbuild` command.
|
||||
* Make sure you can run required commands listed in the Requirements section.
|
||||
* `cd retdec`
|
||||
* `mkdir build && cd build`
|
||||
* `cmake .. -DCMAKE_INSTALL_PREFIX=<path> -G<generator>`
|
||||
* `msbuild /m /p:Configuration=Release retdec.sln`
|
||||
* `msbuild /m /p:Configuration=Release INSTALL.vcxproj`
|
||||
* Alternatively, you can open `retdec.sln` generated by `cmake` in Visual Studio IDE.
|
||||
|
||||
You must pass the following parameters to `cmake`:
|
||||
* `-DCMAKE_INSTALL_PREFIX=<path>` to set the installation path to `<path>`.
|
||||
* (Windows only) `-G<generator>` is `-G"Visual Studio 14 2015"` for 32-bit build using Visual Studio 2015, or `-G"Visual Studio 14 2015 Win64"` for 64-bit build using Visual Studio 2015. Later versions of Visual Studio may be used.
|
||||
|
||||
You can pass the following additional parameters to `cmake`:
|
||||
* `-DRETDEC_DOC=ON` to build with API documentation (requires Doxygen and Graphviz, disabled by default).
|
||||
* `-DRETDEC_TESTS=ON` to build with tests, including all the tests in dependency submodules (disabled by default).
|
||||
* `-DCMAKE_BUILD_TYPE=Debug` to build with debugging information, which is useful during development. By default, the project is built in the `Release` mode. This has no effect on Windows, but the same thing can be achieved by running `msbuild` with the `/p:Configuration=Debug` parameter.
|
||||
* `-DCMAKE_PROGRAM_PATH=<path>` to use Perl at `<path>` (probably useful only on Windows).
|
||||
|
||||
## Usage Example
|
||||
|
||||
To decompile a binary file named `test.bin` run:
|
||||
```
|
||||
./decompile.sh test.bin
|
||||
```
|
||||
|
||||
Run `./decompile.sh --help` to list all the available options.
|
||||
|
||||
## Repository Overview
|
||||
|
||||
This repository contains the following libraries:
|
||||
* `bin2llvmir` -- library of LLVM passes for translating binaries into LLVM IR modules.
|
||||
* `debugformat` -- library for uniform representation of DWARF and PDB debugging information.
|
||||
* `dwarfparser` -- library for high-level representation of DWARF debugging information.
|
||||
* `llvm-support` -- set of LLVM related utility functions.
|
||||
* `llvmir2hll` -- library for translating LLVM IR modules to high-level source codes (C, Python-like language).
|
||||
|
||||
This repository contains the following tools:
|
||||
* `bin2llvmirtool` -- frontend for the `bin2llvmir` library.
|
||||
* `llvm2hlltool` -- frontend for the `llvmir2hll` library.
|
||||
|
||||
This repository contains the following scripts:
|
||||
* `decompile.sh` -- the main decompilation script binding it all together. This is the tool to use for full binary-to-C decompilations.
|
||||
* Support scripts used by `decompile.sh`:
|
||||
* `color-c.py` -- decorates output C sources with IDA color tags -- syntax highlighting for IDA.
|
||||
* `config.sh` -- decompiler's configuration file.
|
||||
* `decompile-archive.sh` -- decompiles objects in the given AR archive.
|
||||
* `fileinfo.sh` -- a Fileinfo tool wrapper.
|
||||
* `signature-from-library.sh` -- extracts function signatures from the given library.
|
||||
* `unpack.sh` -- tries to unpack the given executable file by using any of the supported unpackers.
|
||||
* Other utility scripts:
|
||||
* `decompile-all.sh` -- decompiles all executables in the given directory and subdirectories.
|
||||
* `run-unit-test.sh` -- run all tests in the unit test directory.
|
||||
* `utils.sh` -- a collection of bash utilities.
|
||||
|
||||
## Related repositories
|
||||
|
||||
* [RetDec IDA plugin](https://github.com/avast-tl/retdec-idaplugin) -- embeds RetDec into IDA (Interactive Disassembler) and makes its use much easier.
|
||||
* [RetDec Regression Tests](https://github.com/avast-tl/retdec-regression-tests-framework) -- provides means to run and create regression tests for RetDec and related tools. This is a must if you plan to contribute to the RetDec project.
|
||||
|
||||
## License
|
||||
|
||||
Copyright (c) 2017 Avast Software, licensed under the MIT license. See the `LICENSE` file for more details.
|
||||
|
||||
RetDec uses third-party libraries or other resources listed, along with their licenses, in the `LICENSE-THIRD-PARTY` file.
|
||||
|
||||
## Contributing
|
||||
|
||||
See [RetDec contribution guidelines](https://github.com/avast-tl/retdec/wiki/Contribution-Guidelines).
|
||||
|
||||
## Acknowledgements
|
||||
|
||||
This software was supported by the research funding TACR (Technology Agency of the Czech Republic), ALFA Programme No. TA01010667.
|
29
cmake/deps-config.cmake
Normal file
29
cmake/deps-config.cmake
Normal file
@ -0,0 +1,29 @@
|
||||
|
||||
# capstone2llvmir
|
||||
set(CAPSTONE2LLVMIR_TOOLS ON CACHE BOOL "enable capstone2llvmir" FORCE)
|
||||
set(CAPSTONE2LLVMIR_TESTS ${DEPS_TESTS} CACHE BOOL "enable capstone2llvmir tests" FORCE)
|
||||
|
||||
# csim
|
||||
set(CLANG_DIR "${RETDEC_DEV_SUPPORT_DIR}/clang")
|
||||
set(CSIM_TOOLS ON CACHE BOOL "enable csim" FORCE)
|
||||
set(CSIM_TESTS ${DEPS_TESTS} CACHE BOOL "enable csim tests" FORCE)
|
||||
|
||||
# ctypes
|
||||
set(CTYPES_TESTS ${DEPS_TESTS} CACHE BOOL "enable ctypes tests" FORCE)
|
||||
|
||||
# demangler
|
||||
set(DEMANGLER_TOOLS OFF CACHE BOOL "enable demangler tools" FORCE)
|
||||
set(DEMANGLER_TESTS ${DEPS_TESTS} CACHE BOOL "enable demangler tests" FORCE)
|
||||
|
||||
# fileformat
|
||||
set(FILEFORMAT_TESTS ${DEPS_TESTS} CACHE BOOL "enable fileformat tests" FORCE)
|
||||
|
||||
# retdec-config
|
||||
set(RETDEC_CONFIG_TOOLS ON CACHE BOOL "enable retdec-config tools" FORCE)
|
||||
set(RETDEC_CONFIG_TESTS ${DEPS_TESTS} CACHE BOOL "enable retdec-config tests" FORCE)
|
||||
|
||||
# tl-cpputils
|
||||
set(TL_CPPUTILS_TESTS ${DEPS_TESTS} CACHE BOOL "enable tl-cpputils tests" FORCE)
|
||||
|
||||
# yaramod
|
||||
set(YARAMOD_TESTS ${DEPS_TESTS} CACHE BOOL "enable yaramod tests" FORCE)
|
10
cmake/install-external.cmake
Normal file
10
cmake/install-external.cmake
Normal file
@ -0,0 +1,10 @@
|
||||
|
||||
install (CODE "
|
||||
execute_process(
|
||||
COMMAND bash \"${CMAKE_SOURCE_DIR}/cmake/install-share.sh\" \"${CMAKE_INSTALL_PREFIX}\"
|
||||
RESULT_VARIABLE INSTALL_SHARE_RES
|
||||
)
|
||||
if(INSTALL_SHARE_RES)
|
||||
message(FATAL_ERROR \"RetDec share directory installation FAILED\")
|
||||
endif()
|
||||
")
|
103
cmake/install-share.sh
Normal file
103
cmake/install-share.sh
Normal file
@ -0,0 +1,103 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Get RetDec share directory.
|
||||
#
|
||||
|
||||
###############################################################################
|
||||
|
||||
VERSION_FILE_NAME="version.txt"
|
||||
ARCH_SUFFIX="tar.xz"
|
||||
|
||||
SHA256SUM_REF="6376af57a77147f1363896963d8c1b3745ddb9a6bcec83d63a5846c3f78aeef9"
|
||||
VERSION="2017-12-12"
|
||||
|
||||
###############################################################################
|
||||
|
||||
ARCH_NAME="retdec-support"_"$VERSION.$ARCH_SUFFIX"
|
||||
|
||||
cleanup()
|
||||
{
|
||||
rm -f "$INSTALL_PATH/$ARCH_NAME"
|
||||
rm -rf "$SHARE_DIR"
|
||||
}
|
||||
|
||||
# Check arguments.
|
||||
if [ "$#" -ne 1 ]; then
|
||||
echo "ERROR: Unexpected number of arguments."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get install path from script options.
|
||||
INSTALL_PATH="$1"
|
||||
# Convert from Windows to Unix path on Windows.
|
||||
if [[ $(uname -s) == *MINGW* ]] || [[ $(uname -s) == *MSYS* ]]; then
|
||||
INSTALL_PATH="$(sed -e 's/\\/\//g' -e 's/://' <<< "/$INSTALL_PATH")"
|
||||
fi
|
||||
|
||||
SHARE_DIR="$INSTALL_PATH/share"
|
||||
|
||||
# Share directory exists.
|
||||
if [ -d "$SHARE_DIR" ]; then
|
||||
# Version file exists.
|
||||
if [ -f "$SHARE_DIR/$VERSION_FILE_NAME" ]; then
|
||||
VERSION_FROM_FILE=$(cat "$SHARE_DIR/$VERSION_FILE_NAME")
|
||||
# Version is ok.
|
||||
if [ "$VERSION" = "$VERSION_FROM_FILE" ]; then
|
||||
echo "$SHARE_DIR already exists, version is ok"
|
||||
exit
|
||||
else
|
||||
echo "versions is not as expected -> replace with expected version"
|
||||
fi
|
||||
fi
|
||||
|
||||
rm -rf "$SHARE_DIR"
|
||||
fi
|
||||
|
||||
# Make sure destination directory exists.
|
||||
mkdir -p "$INSTALL_PATH"
|
||||
|
||||
# Get archive using wget.
|
||||
WGET_PARAMS=("https://github.com/avast-tl/retdec-support/releases/download/$VERSION/$ARCH_NAME" -O "$INSTALL_PATH/$ARCH_NAME")
|
||||
echo "RUN: wget ${WGET_PARAMS[@]}"
|
||||
wget "${WGET_PARAMS[@]}"
|
||||
WGET_RC=$?
|
||||
if [ "$WGET_RC" -ne 0 ]; then
|
||||
echo "ERROR: wget failed"
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Compute hash of the downloaded archive.
|
||||
SHA256SUM_PARAMS=("$INSTALL_PATH/$ARCH_NAME")
|
||||
echo "RUN: sha256sum ${SHA256SUM_PARAMS[@]}"
|
||||
SHA256SUM=$(sha256sum "${SHA256SUM_PARAMS[@]}" | cut -d' ' -f1)
|
||||
SHA256SUM_RC=$?
|
||||
if [ "$SHA256SUM_RC" -ne 0 ]; then
|
||||
echo "ERROR: sha256sum failed"
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check that hash is ok.
|
||||
if [ "$SHA256SUM" != "$SHA256SUM_REF" ]; then
|
||||
echo "ERROR: hash check failed"
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Unpack archive.
|
||||
UNPACK_PARAMS=("$INSTALL_PATH/$ARCH_NAME" "--directory=$INSTALL_PATH")
|
||||
echo "RUN: tar xf ${UNPACK_PARAMS[@]}"
|
||||
tar xf "${UNPACK_PARAMS[@]}" &> /dev/null
|
||||
UNPACK_RC=$?
|
||||
if [ "$UNPACK_RC" -ne 0 ]; then
|
||||
echo "ERROR: unpacking failed"
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Remove archive.
|
||||
rm -f "$INSTALL_PATH/$ARCH_NAME"
|
||||
|
||||
echo "RetDec share directory downloaded OK"
|
||||
exit
|
14
deps/CMakeLists.txt
vendored
Normal file
14
deps/CMakeLists.txt
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
|
||||
add_subdirectory(capstone2llvmir)
|
||||
add_subdirectory(ctypes)
|
||||
add_subdirectory(demangler)
|
||||
add_subdirectory(fileformat)
|
||||
add_subdirectory(fnc-patterns)
|
||||
if(RETDEC_TESTS)
|
||||
add_subdirectory(googletest)
|
||||
endif()
|
||||
add_subdirectory(libdwarf)
|
||||
add_subdirectory(llvm)
|
||||
add_subdirectory(pdbparser)
|
||||
add_subdirectory(retdec-config)
|
||||
add_subdirectory(tl-cpputils)
|
1
deps/capstone2llvmir
vendored
Submodule
1
deps/capstone2llvmir
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 1a131ba03ad30a466a747e6b4463fa9dcb562f55
|
1
deps/ctypes
vendored
Submodule
1
deps/ctypes
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 64c231d2ae6b3def60034329f0afe97a93bab1da
|
1
deps/demangler
vendored
Submodule
1
deps/demangler
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 1f62d30425479d83a3996ff581fdf600cffbe462
|
1
deps/fileformat
vendored
Submodule
1
deps/fileformat
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 737baad9cd2bc1238f6d5229707ecdf1606c3405
|
1
deps/fnc-patterns
vendored
Submodule
1
deps/fnc-patterns
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit f3f0ac03a53f21f14c79fead645f3ce91f71ef0b
|
1
deps/googletest
vendored
Submodule
1
deps/googletest
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit bf640a07212fb1776f733a2cbc409d187ba27894
|
1
deps/libdwarf
vendored
Submodule
1
deps/libdwarf
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 687a608548ad96cc72c83a661e130c43d52541a2
|
1
deps/llvm
vendored
Submodule
1
deps/llvm
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 4876ee1861200df998c90c683dc25640aba733f2
|
1
deps/pdbparser
vendored
Submodule
1
deps/pdbparser
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit e7fc4a497a22d9eb1f8ee22169e9d8176a0f5e9a
|
1
deps/retdec-config
vendored
Submodule
1
deps/retdec-config
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 3862693eb45768c40655768c3c30a056fd44799d
|
1
deps/tl-cpputils
vendored
Submodule
1
deps/tl-cpputils
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 543516675a7d253d570250b063f2f58db5f7caf8
|
34
doc/CMakeLists.txt
Normal file
34
doc/CMakeLists.txt
Normal file
@ -0,0 +1,34 @@
|
||||
|
||||
find_package(Doxygen REQUIRED)
|
||||
|
||||
# Variables.
|
||||
set(DOXYGEN_CFG_IN "doxygen.in")
|
||||
set(DOXYGEN_CFG "${CMAKE_CURRENT_BINARY_DIR}/doxygen.cfg")
|
||||
# The trailing '/' after html is significant.
|
||||
set(DOXYGEN_OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/html/")
|
||||
|
||||
# Properly configure the Doxygen configuration file.
|
||||
set(DOXYGEN_WARNINGS "NO")
|
||||
set(DOXYGEN_EXTRACT_PRIVATE "YES")
|
||||
set(DOXYGEN_EXTRACT_LOCAL_CLASSES "YES")
|
||||
set(DOXYGEN_INTERNAL_DOCS "YES")
|
||||
set(DOXYGEN_EXCLUDE_PATTERNS "")
|
||||
set(DOXYGEN_EXCLUDE_SYMBOLS "")
|
||||
set(DOXYGEN_ENABLED_SECTIONS "internal")
|
||||
|
||||
# Configuration.
|
||||
configure_file("${DOXYGEN_CFG_IN}" "${DOXYGEN_CFG}" @ONLY)
|
||||
|
||||
# Add a target to generate the documentation by running Doxygen.
|
||||
add_custom_target(doc ALL
|
||||
COMMAND ${DOXYGEN_EXECUTABLE} "${DOXYGEN_CFG}"
|
||||
SOURCES "${DOXYGEN_CFG}"
|
||||
WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
|
||||
COMMENT "Generating API documentation with Doxygen" VERBATIM
|
||||
)
|
||||
|
||||
# Cleanup.
|
||||
set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${DOXYGEN_OUTPUT_DIR}")
|
||||
|
||||
# Install.
|
||||
install(DIRECTORY "${DOXYGEN_OUTPUT_DIR}" DESTINATION doc)
|
12
doc/doxygen.h
Normal file
12
doc/doxygen.h
Normal file
@ -0,0 +1,12 @@
|
||||
/**
|
||||
* @file doc/doxygen.h
|
||||
* @brief Documentation of the main page and the used namespaces.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
/**
|
||||
@mainpage
|
||||
|
||||
This is an automatically generated API documentation for the
|
||||
<a href="https://github.com/avast-tl/retdec">RetDec project</a>.
|
||||
*/
|
2507
doc/doxygen.in
Normal file
2507
doc/doxygen.in
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,34 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/analyses/indirectly_called_funcs_analysis.h
|
||||
* @brief Indirect calls analysis.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_ANALYSES_INDIRECTLY_CALLED_FUNCS_ANALYSIS_H
|
||||
#define BIN2LLVMIR_ANALYSES_INDIRECTLY_CALLED_FUNCS_ANALYSIS_H
|
||||
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/Instruction.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
|
||||
#include "bin2llvmir/utils/defs.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Analysis for finding out which functions can be indirectly called.
|
||||
*/
|
||||
class IndirectlyCalledFuncsAnalysis
|
||||
{
|
||||
public:
|
||||
static FuncSet getFuncsForIndirectCalls(
|
||||
const CallInstSet &call,
|
||||
llvm::Module::FunctionListType &funcsToCheck);
|
||||
static FuncSet getFuncsForIndirectCall(
|
||||
const llvm::CallInst &call,
|
||||
const FuncVec &funcsToCheck);
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
44
include/bin2llvmir/analyses/reachable_funcs_analysis.h
Normal file
44
include/bin2llvmir/analyses/reachable_funcs_analysis.h
Normal file
@ -0,0 +1,44 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/analyses/reachable_funcs_analysis.h
|
||||
* @brief Reachable functions analysis.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_ANALYSES_REACHABLE_FUNCS_ANALYSIS_H
|
||||
#define BIN2LLVMIR_ANALYSES_REACHABLE_FUNCS_ANALYSIS_H
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <llvm/ADT/SCCIterator.h>
|
||||
|
||||
#include "bin2llvmir/utils/defs.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Analysis for finding out which defined functions are directly and
|
||||
* indirectly reachable from some function.
|
||||
*/
|
||||
class ReachableFuncsAnalysis {
|
||||
public:
|
||||
ReachableFuncsAnalysis();
|
||||
~ReachableFuncsAnalysis();
|
||||
|
||||
std::string getName() const { return "ReachableFuncsAnalysis"; }
|
||||
|
||||
static FuncSet getReachableDefinedFuncsFor(llvm::Function &func,
|
||||
llvm::Module &module, llvm::CallGraph &callGraph);
|
||||
static FuncSet getGloballyReachableFuncsFor(llvm::Module &module);
|
||||
|
||||
private:
|
||||
FuncSet getDirectlyReachableDefinedFuncsFor(
|
||||
const FuncSet &funcs, llvm::CallGraph &callGraph) const;
|
||||
FuncSet getDirectlyReachableDefinedFuncsFor(
|
||||
llvm::CallGraphNode &reachableFrom) const;
|
||||
FuncSet getIndirectlyReachableDefinedFuncsFor(
|
||||
const FuncSet &funcs, llvm::Module &module) const;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
160
include/bin2llvmir/analyses/reaching_definitions.h
Normal file
160
include/bin2llvmir/analyses/reaching_definitions.h
Normal file
@ -0,0 +1,160 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/analyses/reaching_definitions.h
|
||||
* @brief Reaching definitions analysis (RDA) builds UD and DU chains.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*
|
||||
* Right now, this works on an entire module. But we could insert an another layer
|
||||
* that represents functions. Then it would be possible to associate BBs with
|
||||
* functions that own them and recompute RDA only for the selected function.
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_ANALYSES_REACHING_DEFINITIONS_H
|
||||
#define BIN2LLVMIR_ANALYSES_REACHING_DEFINITIONS_H
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include <llvm/ADT/SmallPtrSet.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
#include "bin2llvmir/utils/defs.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class Definition;
|
||||
class Use;
|
||||
class BasicBlockEntry;
|
||||
class ReachingDefinitionsAnalysis;
|
||||
|
||||
using Changed = bool;
|
||||
|
||||
using BBEntrySet = std::unordered_set<BasicBlockEntry*>;
|
||||
|
||||
using DefSet = std::unordered_set<Definition*>;
|
||||
using UseSet = std::unordered_set<Use*>;
|
||||
// TODO: it looks like this is running faster, but it often seqfault
|
||||
//using DefSet = llvm::SmallPtrSet<Definition*, 32>;
|
||||
//using UseSet = llvm::SmallPtrSet<Use*, 32>;
|
||||
|
||||
using DefVector = std::vector<Definition>;
|
||||
using UseVector = std::vector<Use>;
|
||||
|
||||
class Definition
|
||||
{
|
||||
public:
|
||||
Definition(llvm::Instruction* d, llvm::Value* s);
|
||||
bool operator==(const Definition& o) const;
|
||||
|
||||
llvm::Value* getSource();
|
||||
|
||||
public:
|
||||
llvm::Instruction* def;
|
||||
llvm::Value* src;
|
||||
UseSet uses;
|
||||
};
|
||||
|
||||
class Use
|
||||
{
|
||||
public:
|
||||
Use(llvm::Instruction* u, llvm::Value* s);
|
||||
bool operator==(const Use &o) const;
|
||||
|
||||
bool isUndef() const;
|
||||
|
||||
public:
|
||||
llvm::Instruction* use;
|
||||
llvm::Value* src;
|
||||
DefSet defs;
|
||||
};
|
||||
|
||||
class BasicBlockEntry
|
||||
{
|
||||
public:
|
||||
BasicBlockEntry(const llvm::BasicBlock* b = nullptr);
|
||||
|
||||
std::string getName() const;
|
||||
friend std::ostream& operator<<(
|
||||
std::ostream& out,
|
||||
const BasicBlockEntry& bbe);
|
||||
|
||||
void initializeKillDefSets();
|
||||
Changed initDefsOut();
|
||||
|
||||
const DefSet& defsFromUse(const llvm::Instruction* I) const;
|
||||
const UseSet& usesFromDef(const llvm::Instruction* I) const;
|
||||
const Definition* getDef(const llvm::Instruction* I) const;
|
||||
const Use* getUse(const llvm::Instruction* I) const;
|
||||
|
||||
public:
|
||||
const llvm::BasicBlock* bb;
|
||||
|
||||
DefVector defs;
|
||||
UseVector uses;
|
||||
|
||||
BBEntrySet prevBBs;
|
||||
|
||||
// defsIn is union of prevBBs' defsOuts
|
||||
DefSet defsOut;
|
||||
DefSet genDefs;
|
||||
UnorderedValSet killDefs;
|
||||
|
||||
bool changed = false;
|
||||
|
||||
private:
|
||||
unsigned id;
|
||||
static int newUID;
|
||||
};
|
||||
|
||||
class ReachingDefinitionsAnalysis
|
||||
{
|
||||
public:
|
||||
bool runOnModule(
|
||||
llvm::Module& M,
|
||||
Config* c = nullptr,
|
||||
bool trackFlagRegs = false);
|
||||
bool runOnFunction(
|
||||
llvm::Function& F,
|
||||
Config* c = nullptr,
|
||||
bool trackFlagRegs = false);
|
||||
void clear();
|
||||
bool wasRun() const;
|
||||
|
||||
public:
|
||||
const DefSet& defsFromUse(const llvm::Instruction* I) const;
|
||||
const UseSet& usesFromDef(const llvm::Instruction* I) const;
|
||||
const Definition* getDef(const llvm::Instruction* I) const;
|
||||
const Use* getUse(const llvm::Instruction* I) const;
|
||||
|
||||
friend std::ostream& operator<<(
|
||||
std::ostream& out,
|
||||
const ReachingDefinitionsAnalysis& rda);
|
||||
|
||||
private:
|
||||
void run();
|
||||
const BasicBlockEntry& getBasicBlockEntry(const llvm::Instruction* I) const;
|
||||
void initializeBasicBlocks(llvm::Module& M);
|
||||
void initializeBasicBlocks(llvm::Function& F);
|
||||
void initializeBasicBlocksPrev();
|
||||
void initializeKillGenSets();
|
||||
void propagate();
|
||||
void initializeDefsAndUses();
|
||||
void clearInternal();
|
||||
|
||||
private:
|
||||
std::map<const llvm::Function*, std::map<const llvm::BasicBlock*, BasicBlockEntry>> bbMap;
|
||||
// std::map<const llvm::BasicBlock*, BasicBlockEntry> bbMap;
|
||||
bool _trackFlagRegs = false;
|
||||
const llvm::GlobalVariable* _specialGlobal = nullptr;
|
||||
bool _run = false;
|
||||
|
||||
public:
|
||||
Config* _config = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
584
include/bin2llvmir/analyses/store_load_analysis.h
Normal file
584
include/bin2llvmir/analyses/store_load_analysis.h
Normal file
@ -0,0 +1,584 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/analyses/store_load_analysis.h
|
||||
* @brief Analysis that find out relations between load and store instructions.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_ANALYSES_STORE_LOAD_ANALYSIS_H
|
||||
#define BIN2LLVMIR_ANALYSES_STORE_LOAD_ANALYSIS_H
|
||||
|
||||
#include <llvm/IR/Module.h>
|
||||
|
||||
#include "bin2llvmir/analyses/traversal/bb_traversal_analysis.h"
|
||||
#include "bin2llvmir/analyses/traversal/func_traversal_analysis.h"
|
||||
#include "bin2llvmir/analyses/uses_analysis.h"
|
||||
#include "bin2llvmir/utils/defs.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Contains some support for info that is contained by analysis.
|
||||
*
|
||||
* Contains support for:
|
||||
* - For extended right uses.
|
||||
* - For not go through.
|
||||
* - For last left uses.
|
||||
* - For global variables for indirect calls.
|
||||
* - For global variables for calls for functions defined out of module.
|
||||
* - For more details about this info @see StoreLoadAnalysis
|
||||
*/
|
||||
class AnalysisInfo {
|
||||
public:
|
||||
/**
|
||||
* @brief Class that has one storage which is map where instruction set is
|
||||
* mapped by value. This class implements support methods for this map.
|
||||
*/
|
||||
class ValInstSetMap {
|
||||
private:
|
||||
/// Mapping of a value to instruction set.
|
||||
using Storage = std::map<llvm::Value *, InstSet>;
|
||||
|
||||
private:
|
||||
/// Storage for this class.
|
||||
Storage storage;
|
||||
|
||||
public:
|
||||
/// ValInstSetMap constant iterator.
|
||||
/// Attributes (@c i is an iterator):
|
||||
/// - @c i->first is the key,
|
||||
/// - @c i->second is the mapped set of instructions.
|
||||
using iterator = Storage::const_iterator;
|
||||
|
||||
/// @name ValInstSetMap constant accessors.
|
||||
/// @{
|
||||
iterator begin() const;
|
||||
iterator end() const;
|
||||
/// @}
|
||||
|
||||
public:
|
||||
ValInstSetMap();
|
||||
~ValInstSetMap();
|
||||
|
||||
void addInst(llvm::Value &value, llvm::Instruction &inst);
|
||||
void addInsts(llvm::Value &value, const InstSet &instSet);
|
||||
void addFrom(const ValInstSetMap &toAdd);
|
||||
void appendInstsFor(llvm::Value &value, InstSet &instSet);
|
||||
void clear();
|
||||
bool empty();
|
||||
bool isDifferentFrom(const ValInstSetMap &toDiff);
|
||||
bool isValIn(llvm::Value &value);
|
||||
bool isIn(llvm::Value &value, llvm::Instruction &inst);
|
||||
llvm::Instruction *getInstFor(llvm::Value &leftOp,
|
||||
llvm::Value &rightOp);
|
||||
bool hasExcept(llvm::Value &value, const InstSet &except);
|
||||
void tryToRemoveValue(llvm::Value &value);
|
||||
iterator find(llvm::Value &value) const;
|
||||
|
||||
void print();
|
||||
};
|
||||
|
||||
/// Extended right uses iterator.
|
||||
using extRUses_iterator = ValInstSetMap::iterator;
|
||||
|
||||
/// Not go through global variables constant iterator.
|
||||
using notGoThrough_iterator = ValSet::const_iterator;
|
||||
|
||||
/// Last left uses constant iterator.
|
||||
using lastLUses_iterator = ValInstSetMap::iterator;
|
||||
|
||||
public:
|
||||
AnalysisInfo();
|
||||
~AnalysisInfo();
|
||||
|
||||
/// @name Extended right uses constant accessors.
|
||||
/// @{
|
||||
extRUses_iterator extRUses_begin() const;
|
||||
extRUses_iterator extRUses_end() const;
|
||||
/// @}
|
||||
|
||||
/// @name Not go through global variables constant accessors.
|
||||
/// @{
|
||||
notGoThrough_iterator notGoThrough_begin() const;
|
||||
notGoThrough_iterator notGoThrough_end() const;
|
||||
/// @}
|
||||
|
||||
/// @name Last left uses constant accessors.
|
||||
/// @{
|
||||
lastLUses_iterator lastLUses_begin() const;
|
||||
lastLUses_iterator lastLUses_end() const;
|
||||
/// @}
|
||||
|
||||
void addExtRUse(llvm::Value &value, llvm::Instruction &inst);
|
||||
void addExtRUses(const AnalysisInfo &toAdd);
|
||||
bool areExtRUsesDiff(const AnalysisInfo &toDiff);
|
||||
bool isInExtRUses(llvm::Value &value);
|
||||
llvm::Instruction *getInstFromExtRUses(llvm::Value &leftOp,
|
||||
llvm::Value &rightOp);
|
||||
void appendFromExtRUses(llvm::Value &value, InstSet &instSet);
|
||||
void copyExtRUses(ValInstSetMap &toCopy);
|
||||
void removeValFromExtRUses(llvm::Value &value);
|
||||
void clearExtRUses();
|
||||
void clearGlobsForIndirectCalls();
|
||||
void clearGlobsForCallsForFuncsOutOfModule();
|
||||
void addNotGoThrough(llvm::Value &value);
|
||||
void addNotGoThrough(const AnalysisInfo &toAdd);
|
||||
bool isInNotGoThrough(llvm::Value &value) const;
|
||||
void intersectNotGoThrough(const AnalysisInfo &toAdd);
|
||||
bool areNotGoThroughDiff(const AnalysisInfo &toDiff);
|
||||
void replaceExceptLastLUses(const AnalysisInfo toReplace);
|
||||
bool emptyNotGoThrough();
|
||||
void addLastLUse(llvm::Value &value, llvm::Instruction &inst);
|
||||
void addLastLUses(llvm::Value &value, const InstSet &instSet);
|
||||
bool areLastLUsesDiff(const AnalysisInfo &toDiff);
|
||||
bool emptyLastLUses();
|
||||
void addGlobForIndirectCalls(llvm::Value &value);
|
||||
void addToGlobsForIndirectCalls(const AnalysisInfo &toAdd);
|
||||
bool areGlobsForIndirectCallsDiff(const AnalysisInfo &toDiff);
|
||||
bool isInGlobsForIndirectCalls(llvm::Value &value) const;
|
||||
void removeGlobFromGlobsForIndirectCalls(llvm::Value &value);
|
||||
void addToGlobsForCallsForFuncsOutOfModule(llvm::Value &value);
|
||||
void addGlobsForCallsForFuncsOutOfModule(const AnalysisInfo &toAdd);
|
||||
bool areGlobsForCallsForFuncsOutOfModuleDiff(const AnalysisInfo &toDiff);
|
||||
bool isInGlobsForCallsForFuncsOutOfModule(llvm::Value &value) const;
|
||||
void removeGlobFromGlobsForCallForFuncsOutOfModule(llvm::Value &value);
|
||||
|
||||
void printExtRUses();
|
||||
void printNotGoThrough();
|
||||
void printLastLUses();
|
||||
void printGlobsForIndirectCalls();
|
||||
void printGlobsForCallsForFuncsOutOfModule();
|
||||
|
||||
private:
|
||||
/// Right uses with saved instructions for these right uses.
|
||||
ValInstSetMap extRUses;
|
||||
|
||||
/// Not go through global variables.
|
||||
ValSet notGoThrough;
|
||||
|
||||
/// Last left uses.
|
||||
ValInstSetMap lastLUses;
|
||||
|
||||
/// Global variables for indirect calls.
|
||||
ValSet globsForIndirectCalls;
|
||||
|
||||
/// Global variables for calls of functions defined out of module.
|
||||
ValSet globsForCallsForFuncsOutOfModule;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Analysis that finds out relations between load and store instructions.
|
||||
*
|
||||
* Before use info from this analysis is need to run @c doAnalysis().
|
||||
*
|
||||
* For this analysis we need to know two terms.
|
||||
* - Left use: Assign something to global variable.
|
||||
* @code
|
||||
* store i32 1, i32* @glob0
|
||||
* @endcode
|
||||
* - Right use: Read something from global variable.
|
||||
* @code
|
||||
* %x = load i32, i32* @glob0
|
||||
* @endcode
|
||||
*
|
||||
* This analysis contains these information:
|
||||
* - Extended right uses:
|
||||
* @code
|
||||
* func() {
|
||||
* x = g; // g is global variable. Right use.
|
||||
* }
|
||||
* @endcode
|
||||
* Assign of global variable @c g we call right use in @c func because this right
|
||||
* use is not connected with some store. We can have this information also for
|
||||
* basic block. Extended is because we map global variable to all right uses for
|
||||
* this global variable.
|
||||
*
|
||||
* - Last left uses:
|
||||
* @code
|
||||
* func() {
|
||||
* g = 4;
|
||||
* g = 2; // g is global variable. Last left use.
|
||||
* }
|
||||
* @endcode
|
||||
* Assign value 2 to global variable we call last left use in @c func because
|
||||
* after end of this function will global variable @c g have value 2. This info
|
||||
* is saved only for function.
|
||||
*
|
||||
* - Not go through:
|
||||
* @code
|
||||
* func() {
|
||||
* ... ; // Not go through for global variable g.
|
||||
* g = 2;
|
||||
* }
|
||||
* @endcode
|
||||
* Before assign to global variable @c g we have saved not go through for this
|
||||
* global variable. It means that we have saved last left use for this way of
|
||||
* execution. This info is saved for basic block and function.
|
||||
*
|
||||
* - Right uses for left use:
|
||||
* @code
|
||||
* bb:
|
||||
* store i32 1, @glob0
|
||||
* br label i1 1, label %left, label %right
|
||||
* left:
|
||||
* %x = load i32, i32* @glob0
|
||||
* right:
|
||||
* %y = load i32, i32* @glob.
|
||||
* @endcode
|
||||
* We can get info about relations between store and load instructions. Relation
|
||||
* we mean that which load instructions can be reached with some specific store
|
||||
* instruction. So in this info we have saved that both of loads can be reached
|
||||
* with mentioned store.
|
||||
*
|
||||
* - Left uses for call of functions defined out of module:
|
||||
* Analysis saves all stores that can reach call of functions defined out of
|
||||
* module.
|
||||
*
|
||||
* - Left uses for indirect calls:
|
||||
* Analysis saves all stores that can reach indirect calls.
|
||||
*
|
||||
* - Extended right uses for indirect calls:
|
||||
* Analysis saves all extended right uses that is before indirect call of some
|
||||
* function. This extended right uses are saved to all functions that can be
|
||||
* indirectly called.
|
||||
* For example:
|
||||
* @code
|
||||
* indirect_call_of_some_func().
|
||||
* %x = load i32, i32* @glob0
|
||||
* @endcode
|
||||
* We save right uses below the indirect call to all functions that can be
|
||||
* indirectly called. In right uses can be saved some indirect call which means
|
||||
* that some indirect call can be reached from this indirect call.
|
||||
*/
|
||||
class StoreLoadAnalysis {
|
||||
private:
|
||||
/// Mapping of a function to @c ValInstSetMap.
|
||||
using FuncValInstSetMap = std::map<llvm::Function *,
|
||||
AnalysisInfo::ValInstSetMap>;
|
||||
|
||||
public:
|
||||
/// Right uses for left use constant iterator.
|
||||
/// - @c i->first is the left use,
|
||||
/// - @c i->second is the set of right uses that can be reached with left
|
||||
/// use.
|
||||
using rUsesForLUse_iterator = InstInstSetMap::const_iterator;
|
||||
|
||||
/// Extended right uses constant iterator.
|
||||
/// - @c i->first is the global variable for extended right uses,
|
||||
/// - @c i->second is the right uses.
|
||||
using extRUses_iterator = AnalysisInfo::extRUses_iterator;
|
||||
|
||||
/// Last left uses constant iterator.
|
||||
/// Attributes (@c i is an iterator):
|
||||
/// - @c i->first is the global variable for left uses,
|
||||
/// - @c i->second is the left uses.
|
||||
using lastLUses_iterator = AnalysisInfo::lastLUses_iterator;
|
||||
|
||||
/// Constant iterator for left uses that can reach call of functions defined
|
||||
/// out of module.
|
||||
using lUsesOutFunc_iterator = InstSet::const_iterator;
|
||||
|
||||
/// Constant iterator for left uses that can reach indirect call.
|
||||
using lUsesIndir_iterator = InstSet::const_iterator;
|
||||
|
||||
/// Constant iterator for extended right uses that can reach indirect call.
|
||||
/// Attributes (@c i is an iterator):
|
||||
/// - @c i->first is the function that can be reached by extended right
|
||||
/// uses,
|
||||
/// - @c i->second is the extended right uses that can reach indirect
|
||||
/// call.
|
||||
using extRUsesIndir_iterator = FuncValInstSetMap::const_iterator;
|
||||
|
||||
public:
|
||||
StoreLoadAnalysis();
|
||||
~StoreLoadAnalysis();
|
||||
|
||||
/// @name Right uses for left use constant accessors.
|
||||
/// @{
|
||||
rUsesForLUse_iterator rUsesForLUse_begin(llvm::Function &func) const;
|
||||
rUsesForLUse_iterator rUsesForLUse_end(llvm::Function &func) const;
|
||||
/// @}
|
||||
|
||||
/// @name Extended right uses constant accessors.
|
||||
/// @{
|
||||
extRUses_iterator extRUses_begin(llvm::Function &func) const;
|
||||
extRUses_iterator extRUses_end(llvm::Function &func) const;
|
||||
/// @}
|
||||
|
||||
/// @name Last left uses constant accessors.
|
||||
/// @{
|
||||
lastLUses_iterator lastLUses_begin(llvm::Function &func) const;
|
||||
lastLUses_iterator lastLUses_end(llvm::Function &func) const;
|
||||
/// @}
|
||||
|
||||
/// @name Constant accessors for left uses that can reach call of functions
|
||||
/// defined out of module.
|
||||
/// @{
|
||||
lUsesOutFunc_iterator lUsesOutFunc_begin() const;
|
||||
lUsesOutFunc_iterator lUsesOutFunc_end() const;
|
||||
/// @}
|
||||
|
||||
/// @name Constant accessors for left uses that can reach indirect call.
|
||||
/// @{
|
||||
lUsesIndir_iterator lUsesIndir_begin() const;
|
||||
lUsesIndir_iterator lUsesIndir_end() const;
|
||||
/// @}
|
||||
|
||||
/// @name Constant accessors for extended right uses that can reach indirect
|
||||
/// call.
|
||||
/// @{
|
||||
extRUsesIndir_iterator extRUsesIndir_begin() const;
|
||||
extRUsesIndir_iterator extRUsesIndir_end() const;
|
||||
/// @}
|
||||
|
||||
void doAnalysis(llvm::Module &module, GlobVarSet &globs,
|
||||
llvm::CallGraph &callGraph, bool funcsOutOfModule);
|
||||
bool isInLUsesForFuncOutOfModule(llvm::StoreInst &inst);
|
||||
bool hasSomeRUseEffectOutOfFunc(llvm::Value &globValue,
|
||||
llvm::Function &func);
|
||||
bool isInNotGoThrough(llvm::Value &globValue, llvm::Function &func);
|
||||
llvm::Instruction *getInstFromExtRUses(llvm::Value &leftOp,
|
||||
llvm::Value &rightOp, llvm::Function &func);
|
||||
InstSet getRUsesForLUse(llvm::Instruction &lUse);
|
||||
|
||||
void printFuncInfos();
|
||||
void printFuncInfo(llvm::Function &func);
|
||||
|
||||
private:
|
||||
/**
|
||||
* @brief Class for basic block info.
|
||||
*/
|
||||
class BBInfo {
|
||||
public:
|
||||
/// Ordered instructions constant iterator.
|
||||
using ordInsts_iterator = InstVec::const_iterator;
|
||||
|
||||
public:
|
||||
BBInfo(llvm::BasicBlock &bb);
|
||||
~BBInfo();
|
||||
|
||||
/// @name Ordered instructions accessors.
|
||||
/// @{
|
||||
ordInsts_iterator ordInsts_begin();
|
||||
ordInsts_iterator ordInsts_end();
|
||||
/// @}
|
||||
|
||||
llvm::BasicBlock &getBB();
|
||||
void addIndirectCall(llvm::CallInst &callInst);
|
||||
void addCallForFuncOutOfModule(llvm::CallInst &callInst);
|
||||
void addAllGlobsToNotGoThrough(const GlobVarSet &globs);
|
||||
void addAllGlobsToGlobsForIndirectCall(
|
||||
const GlobVarSet &globs);
|
||||
void addAllGlobsToGlobsForCallsForFuncsOutOfModule(
|
||||
const GlobVarSet &globs);
|
||||
void tryToAddInstToOrderedList(llvm::Instruction &inst);
|
||||
void clearExtRUses();
|
||||
void clearGlobsForIndirectCalls();
|
||||
void clearCallsForFuncsOutOfModule();
|
||||
void clearInfo();
|
||||
void doCopyOfBBInfoAndCreateNew();
|
||||
void diffBBInfoFromLastVisitAndSetIfChanged();
|
||||
bool hasAnalyzedOrdsInsts();
|
||||
bool hasChangedInfo();
|
||||
bool hasLastLUse(llvm::Value &globValue);
|
||||
void markAsAnalyzedOrdInsts();
|
||||
void markAsNotAnalyzedOrdInsts();
|
||||
void markAsChangedInfo();
|
||||
void markAsNotChangedInfo();
|
||||
|
||||
void printBBInfo();
|
||||
|
||||
public:
|
||||
// Basic block info.
|
||||
AnalysisInfo *bbInfo;
|
||||
|
||||
// Snapshot of basic block info.
|
||||
AnalysisInfo *copyBBInfo;
|
||||
|
||||
private:
|
||||
// For this basic block is created info.
|
||||
llvm::BasicBlock &bb;
|
||||
|
||||
// If has saved useful instructions.
|
||||
bool hasAnalyzedOrdInsts;
|
||||
|
||||
// If info was changed from last visit.
|
||||
bool changedInfo;
|
||||
|
||||
/// Saves order of useful instructions in basic block.
|
||||
InstVec orderedInstVec;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Class for function info.
|
||||
*/
|
||||
class FuncInfo {
|
||||
public:
|
||||
FuncInfo(llvm::Function &func);
|
||||
~FuncInfo();
|
||||
|
||||
/// @name Right uses for left use constant accessors.
|
||||
/// @{
|
||||
rUsesForLUse_iterator rUsesForLUse_begin() const;
|
||||
rUsesForLUse_iterator rUsesForLUse_end() const;
|
||||
/// @}
|
||||
|
||||
/// @name Extended right uses constant accessors.
|
||||
/// @{
|
||||
extRUses_iterator extRUses_begin() const;
|
||||
extRUses_iterator extRUses_end() const;
|
||||
/// @}
|
||||
|
||||
/// @name Last left uses constant accessors.
|
||||
/// @{
|
||||
lastLUses_iterator lastLUses_begin() const;
|
||||
lastLUses_iterator lastLUses_end() const;
|
||||
/// @}
|
||||
|
||||
llvm::Function &getFunc();
|
||||
void addInRUsesForLUse(llvm::Instruction &lUse,
|
||||
const InstSet &rUse);
|
||||
InstSet getRUsesForLUse(llvm::Instruction &lUse);
|
||||
bool isBBVisited(llvm::BasicBlock &bb);
|
||||
llvm::Instruction *getInstFromExtRUses(llvm::Value &leftOp,
|
||||
llvm::Value &rightOp);
|
||||
void markBBAsVisited(llvm::BasicBlock &bb);
|
||||
bool isAnalyzed();
|
||||
void markAsAnalyzed();
|
||||
void markAsNotAnalyzed();
|
||||
void doCopyOfFuncInfoAndCreateNew();
|
||||
void clearVisitedBBs();
|
||||
bool isDiffFuncInfoFromLastVisit();
|
||||
bool isInExtRUses(llvm::Value &globValue);
|
||||
bool isInNotGoThrough(llvm::Value &globValue);
|
||||
void setFuncInfo(BBInfo &bbInfo);
|
||||
void removeFromRUsesForLUse(llvm::Instruction &lUse);
|
||||
|
||||
void printFuncInfo();
|
||||
|
||||
public:
|
||||
// Function info.
|
||||
AnalysisInfo *funcInfo;
|
||||
|
||||
// Snapshot of function info.
|
||||
AnalysisInfo *copyFuncInfo;
|
||||
|
||||
private:
|
||||
/// For this function is created this function info.
|
||||
llvm::Function &func;
|
||||
|
||||
// Right uses for left use are saved in function where left use occur.
|
||||
/// Mapping of right uses for left use.
|
||||
InstInstSetMap rUsesForLUse;
|
||||
|
||||
/// Signalizes if this function was analyzed before.
|
||||
bool isAnalyzedFunc;
|
||||
|
||||
/// Visited basic blocks for this function.
|
||||
BBSet visitedBBs;
|
||||
};
|
||||
|
||||
/// Mapping of a function to function info.
|
||||
using FuncFuncInfoMap = std::map<llvm::Function *, FuncInfo *>;
|
||||
|
||||
/// Mapping of a basic block to basic block info.
|
||||
using BBBBInfoMap = std::map<llvm::BasicBlock *, BBInfo *>;
|
||||
|
||||
private:
|
||||
void runAnalysis();
|
||||
void setFuncsInModule(llvm::Module &module);
|
||||
void createInfoForAllFuncs();
|
||||
void analyzeFuncsInSCC();
|
||||
void analyzeFuncNotInSCC();
|
||||
bool goThroughFuncsInSCCAndReturnIfChanged();
|
||||
void initBeforeProcessFuncInSCC(llvm::Function &func);
|
||||
void clearBBInfosIn(llvm::Function &func);
|
||||
void processFunc(llvm::Function &func);
|
||||
void goThroughBBsAndAnalyzeThem(llvm::Function &func);
|
||||
void afterAnalysisBBsInFunc();
|
||||
void visitBBsInSCC();
|
||||
void visitBBNotInSCC();
|
||||
void spreadNotGoThroughInSCC();
|
||||
bool goThroughBBsSCCAndReturnIfChanged();
|
||||
void processBB(llvm::BasicBlock &bb);
|
||||
bool isNeedToProcessBB(llvm::BasicBlock &bb, bool isBBVisited);
|
||||
void addInfoFromSuccBBsFor(llvm::BasicBlock &bb, bool isVisitedBB);
|
||||
void processInstsInBB(llvm::BasicBlock::InstListType &instList);
|
||||
void processInstsInBB();
|
||||
void tryToProcessInst(llvm::Instruction &inst);
|
||||
void processFuncCall(llvm::CallInst &callInst);
|
||||
void processCalledFuncInfo(const AnalysisInfo &calledFuncInfo);
|
||||
void solveIndirectCall(llvm::CallInst &callInst);
|
||||
void solveCallForFuncsOutOfModule();
|
||||
void addToExtRUsesForIndirectCall(
|
||||
const AnalysisInfo::ValInstSetMap &extRUses,
|
||||
llvm::Function &func);
|
||||
void processExtRUsesAfterFuncCall(const AnalysisInfo &calledFuncInfo);
|
||||
void processGlobsForIndirectCallAfterFuncCall(const AnalysisInfo &calledFuncInfo);
|
||||
void processCallsForFuncOutOfModuleAfterFuncCall(
|
||||
const AnalysisInfo &calledFuncInfo);
|
||||
void addInRUsesForLUses(const InstSet &lUses,
|
||||
const InstSet &rUses);
|
||||
void addInRUsesForLUse(llvm::Instruction &lUse,
|
||||
const InstSet &rUses);
|
||||
void addInLUsesForIndirectCalls(const InstSet &lUses);
|
||||
void addInLUsesForFuncsOutOfModule(const InstSet &lUses);
|
||||
void addToLastLUsesWithCheck(const AnalysisInfo &toAdd);
|
||||
void processLUse(llvm::Value &globValue, llvm::Instruction &lUse);
|
||||
void solveIndirectCallsForLUses(llvm::Value &globValue,
|
||||
const InstSet &lUses);
|
||||
void solveCallsForFuncsOutOfModuleForLUses(llvm::Value &globValue,
|
||||
const InstSet &lUses);
|
||||
void tryToAddLastLUse(llvm::Value &globValue, llvm::Instruction &lUse);
|
||||
void setCurrBBInfo(llvm::BasicBlock &bb);
|
||||
void setCurrFuncInfo(llvm::Function &func);
|
||||
FuncInfo &getFuncInfoFor(llvm::Function &func) const;
|
||||
BBInfo *getBBInfoFor(llvm::BasicBlock &bb);
|
||||
void clear();
|
||||
|
||||
void printBBInfos();
|
||||
|
||||
private:
|
||||
/// Uses analysis.
|
||||
UsesAnalysis usesAnalysis;
|
||||
|
||||
/// Function traversal analysis.
|
||||
FuncTraversalAnalysis funcTraversalAnalysis;
|
||||
|
||||
/// Basic block traversal analysis.
|
||||
BBTraversalAnalysis bbTraversalAnalysis;
|
||||
|
||||
/// Signalizes if analysis have to count with functions defined out of
|
||||
/// module.
|
||||
bool countWithFuncsOutOfModule;
|
||||
|
||||
/// Functions that are in module.
|
||||
FuncVec funcsInModule;
|
||||
|
||||
/// Global variables that can be optimized.
|
||||
GlobVarSet globsToAnalyze;
|
||||
|
||||
/// Left uses that can reach indirect call.
|
||||
InstSet lUsesForIndirectCall;
|
||||
|
||||
/// Extended right uses that can reach indirect call.
|
||||
FuncValInstSetMap extRUsesForIndirectCall;
|
||||
|
||||
/// Left uses that can reach functions defined out of module.
|
||||
InstSet lUsesForFuncsOutOfModule;
|
||||
|
||||
/// Mapping of a function to its info.
|
||||
FuncFuncInfoMap funcInfoMap;
|
||||
|
||||
/// Mapping of a basic block to its info.
|
||||
BBBBInfoMap bbInfoMap;
|
||||
|
||||
/// Current function info.
|
||||
FuncInfo *currFuncInfo;
|
||||
|
||||
/// Current basic block info.
|
||||
BBInfo *currBBInfo;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
92
include/bin2llvmir/analyses/symbolic_tree.h
Normal file
92
include/bin2llvmir/analyses/symbolic_tree.h
Normal file
@ -0,0 +1,92 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/analyses/symbolic_tree.h
|
||||
* @brief Construction of symbolic tree from the given node.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*
|
||||
* This is an implementation of symbolic interpret. It is provided with
|
||||
* an initial node (llvm::Value) and it builds symbolic tree representing
|
||||
* the value of the node.
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_ANALYSES_SYMBOLIC_TREE_H
|
||||
#define BIN2LLVMIR_ANALYSES_SYMBOLIC_TREE_H
|
||||
|
||||
#include <set>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/Instruction.h>
|
||||
#include <llvm/IR/Instructions.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
|
||||
#include "bin2llvmir/analyses/reaching_definitions.h"
|
||||
#include "bin2llvmir/providers/fileimage.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class SymbolicTree
|
||||
{
|
||||
public:
|
||||
SymbolicTree(
|
||||
ReachingDefinitionsAnalysis& rda,
|
||||
llvm::Value* v,
|
||||
std::map<llvm::Value*, llvm::Value*>* val2val = nullptr,
|
||||
unsigned maxUniqueNodes = 80,
|
||||
bool debug = false);
|
||||
SymbolicTree(
|
||||
ReachingDefinitionsAnalysis* rda,
|
||||
llvm::Value* v,
|
||||
llvm::Value* u,
|
||||
std::unordered_set<llvm::Value*>& processed,
|
||||
unsigned maxUniqueNodes,
|
||||
std::map<llvm::Value*, llvm::Value*>* v2v = nullptr);
|
||||
|
||||
SymbolicTree(const SymbolicTree& other) = default;
|
||||
SymbolicTree(SymbolicTree&& other) = default;
|
||||
SymbolicTree& operator=(SymbolicTree&& other);
|
||||
friend std::ostream& operator<<(
|
||||
std::ostream& out,
|
||||
const SymbolicTree& s);
|
||||
|
||||
bool isConstructedSuccessfully() const;
|
||||
bool isVal2ValMapUsed() const;
|
||||
void removeRegisterValues(Config* config);
|
||||
void removeGeneralRegisterLoads(Config* config);
|
||||
void removeStackLoads(Config* config);
|
||||
|
||||
void simplifyNode(Config* config);
|
||||
void _simplifyNode(Config* config);
|
||||
void simplifyNodeLoadStore();
|
||||
|
||||
void solveMemoryLoads(FileImage* image);
|
||||
SymbolicTree* getMaxIntValue();
|
||||
std::string print(unsigned indent = 0) const;
|
||||
|
||||
std::vector<SymbolicTree*> getPreOrder() const;
|
||||
std::vector<SymbolicTree*> getPostOrder() const;
|
||||
|
||||
private:
|
||||
void expandNode(
|
||||
ReachingDefinitionsAnalysis* RDA,
|
||||
std::map<llvm::Value*, llvm::Value*>* val2val,
|
||||
unsigned maxUniqueNodes,
|
||||
std::unordered_set<llvm::Value*>& processed);
|
||||
void propagateFlags();
|
||||
|
||||
void _getPreOrder(std::vector<SymbolicTree*>& res) const;
|
||||
void _getPostOrder(std::vector<SymbolicTree*>& res) const;
|
||||
|
||||
public:
|
||||
llvm::Value* value = nullptr;
|
||||
llvm::Value* user = nullptr;
|
||||
std::vector<SymbolicTree> ops;
|
||||
|
||||
private:
|
||||
bool _failed = false;
|
||||
bool _val2valUsed = false;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,80 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/analyses/traversal/bb_traversal_analysis.h
|
||||
* @brief Post-order traversal analysis for basic blocks.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_ANALYSES_TRAVERSAL_BB_TRAVERSAL_ANALYSIS_H
|
||||
#define BIN2LLVMIR_ANALYSES_TRAVERSAL_BB_TRAVERSAL_ANALYSIS_H
|
||||
|
||||
#include <llvm/IR/BasicBlock.h>
|
||||
|
||||
#include "bin2llvmir/analyses/traversal/traversal_analysis.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Post-order traversal analysis for basic blocks.
|
||||
*
|
||||
* Before use this analysis you have to run @c doBBsAnalysis().
|
||||
*
|
||||
* For this example below analysis returns basic blocks in this order: left,
|
||||
* right and bb. As you can see in post-order traversal.
|
||||
* @code
|
||||
* bb:
|
||||
* br i1 1, label %left, label %right
|
||||
* left:
|
||||
* ret i32 0
|
||||
* right:
|
||||
* ret i32 0
|
||||
* @endcode
|
||||
*
|
||||
* Basic blocks can be in strongly connected component. Example below shows this
|
||||
* situation. In this situation is not possible to do a correct post-order. It is
|
||||
* because basic blocks sccbb and sccbb1 creates strongly connected component.
|
||||
* You can use here two types of traversal:
|
||||
* -# When you use for getting next block only method @c getNextBB(), than you
|
||||
* get basic blocks in this order: end, sccbb1, sccbb, bb. All basic blocks
|
||||
* only once.
|
||||
* -# When you are sure by @c isNextInSCC() that the next basic block is in
|
||||
* strongly connected component than you can use @c getNextBBInSCC(). This
|
||||
* method causes iterating through strongly connected component until you use
|
||||
* method @c stopIteratingSCC(). Then use @c getNextBB() which returns the next
|
||||
* basic block which is out from iterated strongly connected component.
|
||||
*
|
||||
* For example order:
|
||||
* -# @c getNextBB() - returns end.
|
||||
* -# @c isNextInSCC() => returns @c true then @c getNextBBInSCC() -
|
||||
* returns sccb1.
|
||||
* -# 2 times @c getNextBBInSCC() - returns sccbb and sccbb1.
|
||||
* -# @c stopIteratingSCC() and @c getNextBB() - returns bb.
|
||||
* @code
|
||||
* bb:
|
||||
* br label %sccbb
|
||||
* sccbb:
|
||||
* br label %sccbb1
|
||||
* sccbb1:
|
||||
* br i1 1, label %bb, label %end
|
||||
* end:
|
||||
* ret i32 0
|
||||
* @endcode
|
||||
*/
|
||||
class BBTraversalAnalysis: public TraversalAnalysis {
|
||||
public:
|
||||
BBTraversalAnalysis();
|
||||
~BBTraversalAnalysis();
|
||||
|
||||
void doBBsAnalysis(llvm::Function &func);
|
||||
llvm::BasicBlock *getNextBB();
|
||||
llvm::BasicBlock *getNextBBInSCC();
|
||||
|
||||
private:
|
||||
Node *processBBsInSCC(const BBVec &sccBBs, Node *prevNode);
|
||||
Node *processBBNotInSCC(const BBVec &sccBBs, Node *prevNode);
|
||||
|
||||
void print();
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
100
include/bin2llvmir/analyses/traversal/func_traversal_analysis.h
Normal file
100
include/bin2llvmir/analyses/traversal/func_traversal_analysis.h
Normal file
@ -0,0 +1,100 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/analyses/traversal/func_traversal_analysis.h
|
||||
* @brief Post-order traversal analysis for functions.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_ANALYSES_TRAVERSAL_FUNC_TRAVERSAL_ANALYSIS_H
|
||||
#define BIN2LLVMIR_ANALYSES_TRAVERSAL_FUNC_TRAVERSAL_ANALYSIS_H
|
||||
|
||||
#include <llvm/Analysis/CallGraphSCCPass.h>
|
||||
|
||||
#include "bin2llvmir/analyses/traversal/traversal_analysis.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Post-order traversal analysis for functions.
|
||||
*
|
||||
* Before use this analysis you have to run @c doFuncsAnalysis().
|
||||
*
|
||||
* For this example below analysis returns functions in this order: funcFirst,
|
||||
* funcSecond and funcTop. As you can see in post-order traversal.
|
||||
* @code
|
||||
* define void @funcTop() {
|
||||
* call void @funcFirst()
|
||||
* call void @funcSecond()
|
||||
* ret void
|
||||
* }
|
||||
*
|
||||
* define void @funcFirst() {
|
||||
* ret void
|
||||
* }
|
||||
*
|
||||
* define void @funcSecond() {
|
||||
* ret void
|
||||
* }
|
||||
* @endcode
|
||||
*
|
||||
* Functions can be in strongly connected component. Example below shows this
|
||||
* situation. In this situation is not possible to do a correct post-order. It is
|
||||
* because functions sccFunc and sccFunc1 creates strongly connected component.
|
||||
* You can use here two types of traversal:
|
||||
* -# When you use for getting next function only method @c getNextFunc(), than
|
||||
* you get functions in this order: endFunc, sccFunc1, sccFunc, funcTop. All
|
||||
* functions only once.
|
||||
* -# When you are sure by @c isNextInSCC() that the next function is in strongly
|
||||
* connected component than you can use @c getNextFuncInSCC(). This method
|
||||
* causes iterating through strongly connected component until you use method
|
||||
* @c stopIteratingSCC(). Then use @c getNextFunc() which returns the next
|
||||
* function which is out from iterated strongly connected component.
|
||||
*
|
||||
* For example order:
|
||||
* -# @c getNextFunc() - returns endFunc.
|
||||
* -# @c isNextInSCC() => returns @c true then @c getNextFuncInSCC() -
|
||||
* returns sccFunc1.
|
||||
* -# 2 times @c getNextFuncInSCC() - returns sccFunc and sccFunc1.
|
||||
* -# @c stopIteratingSCC() and @c getNextFunc() - returns funcTop.
|
||||
* @code
|
||||
* define void @funcTop() {
|
||||
* call void @sccFunc()
|
||||
* ret void
|
||||
* }
|
||||
*
|
||||
* define void @sccFunc() {
|
||||
* call void @sccFunc1()
|
||||
* ret void
|
||||
* }
|
||||
*
|
||||
* define void @sccFunc1() {
|
||||
* call void @sccFunc()
|
||||
* call void @endFunc()
|
||||
* ret void
|
||||
* }
|
||||
*
|
||||
* define void @endFunc() {
|
||||
* ret void
|
||||
* }
|
||||
* @endcode
|
||||
*/
|
||||
class FuncTraversalAnalysis: public TraversalAnalysis {
|
||||
public:
|
||||
FuncTraversalAnalysis();
|
||||
~FuncTraversalAnalysis();
|
||||
|
||||
void doFuncsAnalysis(llvm::CallGraph &callGraph);
|
||||
llvm::Function *getNextFunc();
|
||||
llvm::Function *getNextFuncInSCC();
|
||||
|
||||
private:
|
||||
Node *processFuncsInSCC(const CallGraphNodeVec &callNodesVec,
|
||||
Node *prevNode);
|
||||
Node *processFuncNotInSCC(const CallGraphNodeVec &callNodesVec,
|
||||
Node *prevNode);
|
||||
|
||||
void print();
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
76
include/bin2llvmir/analyses/traversal/traversal_analysis.h
Normal file
76
include/bin2llvmir/analyses/traversal/traversal_analysis.h
Normal file
@ -0,0 +1,76 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/analyses/traversal/traversal_analysis.h
|
||||
* @brief Base class for traversal analyses.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_ANALYSES_TRAVERSAL_TRAVERSAL_ANALYSIS_H
|
||||
#define BIN2LLVMIR_ANALYSES_TRAVERSAL_TRAVERSAL_ANALYSIS_H
|
||||
|
||||
#include <llvm/IR/Value.h>
|
||||
|
||||
#include "bin2llvmir/utils/defs.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Base class for traversal analyses.
|
||||
*
|
||||
* This class contains shared implementation for traversal analyses so you can't
|
||||
* create instance of this class. You have to use specific traversal.
|
||||
*/
|
||||
class TraversalAnalysis {
|
||||
public:
|
||||
bool hasSomethingToReturn();
|
||||
bool isNextInSCC();
|
||||
bool causeNextNewSCCIteration();
|
||||
void stopIteratingSCC();
|
||||
|
||||
protected:
|
||||
/// One node in linked list.
|
||||
struct Node {
|
||||
static Node *createNodeInSCC(llvm::Value &val);
|
||||
static Node *createNodeNotInSCC(llvm::Value &val);
|
||||
|
||||
/// Value of node.
|
||||
llvm::Value &value;
|
||||
|
||||
/// If this node represents one node of SCC.
|
||||
bool isInSCC;
|
||||
|
||||
/// Next node.
|
||||
Node *nextNode;
|
||||
|
||||
/// First node in SCC.
|
||||
Node *sccRevNode;
|
||||
|
||||
private:
|
||||
Node(llvm::Value &value, bool isInSCC): value(value), isInSCC(isInSCC),
|
||||
nextNode(nullptr), sccRevNode(nullptr) {}
|
||||
};
|
||||
|
||||
protected:
|
||||
TraversalAnalysis();
|
||||
~TraversalAnalysis();
|
||||
|
||||
llvm::Value *getNextVal();
|
||||
llvm::Value *getNextValInSCC();
|
||||
void solveConnectionWithNextNode(Node *prevNode, Node *nextNode);
|
||||
void clear();
|
||||
|
||||
void print();
|
||||
|
||||
protected:
|
||||
/// The first node in linked list.
|
||||
Node *headNode;
|
||||
|
||||
/// Current node.
|
||||
Node *currNode;
|
||||
|
||||
/// Signalizes if next basic block causes new SCC iteration.
|
||||
bool causesNextNewSCCIter;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
132
include/bin2llvmir/analyses/uses_analysis.h
Normal file
132
include/bin2llvmir/analyses/uses_analysis.h
Normal file
@ -0,0 +1,132 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/analyses/uses_analysis.h
|
||||
* @brief Uses analysis.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_ANALYSES_USES_ANALYSIS_H
|
||||
#define BIN2LLVMIR_ANALYSES_USES_ANALYSIS_H
|
||||
|
||||
#include <llvm/IR/GlobalVariable.h>
|
||||
|
||||
#include "bin2llvmir/utils/defs.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Analysis that provides support for getting information about uses of
|
||||
* LLVM IR values.
|
||||
*
|
||||
* This class contains two types of methods.
|
||||
* -# Is need to run @c doUsesAnalyses(). It is need for method @c getUseInfo().
|
||||
* -# Nothing to need run before. Other methods.
|
||||
*/
|
||||
class UsesAnalysis {
|
||||
public:
|
||||
/**
|
||||
* @brief Structure to save info about use.
|
||||
*
|
||||
* Uses for global variables are these:
|
||||
* Left use because something is assigned to global variable:
|
||||
* @code
|
||||
* store ..., i32* @glob
|
||||
* @endcode
|
||||
* Right use because something is loaded from global variable:
|
||||
* @code
|
||||
* load i32, i32* @glob
|
||||
* @endcode
|
||||
*/
|
||||
struct UseInfo {
|
||||
/**
|
||||
* @brief Constructs a new @c UseInfo.
|
||||
*
|
||||
* @param[in] value Value of global variable for which is this use saved.
|
||||
* @param[in] isLUse If is a left use.
|
||||
*/
|
||||
explicit UseInfo(llvm::Value *value = nullptr, bool isLUse = false):
|
||||
isLUse(isLUse), value(value) {}
|
||||
|
||||
/**
|
||||
* @brief Returns a new left @c UseInfo.
|
||||
*
|
||||
* @param[in] value Value of global variable for which is this use saved.
|
||||
*/
|
||||
static UseInfo createLeftUseInfo(llvm::Value *value) {
|
||||
return UseInfo(value, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns a new right @c UseInfo.
|
||||
*
|
||||
* @param[in] value Value of global variable for which is this use saved.
|
||||
*/
|
||||
static UseInfo createRightUseInfo(llvm::Value *value) {
|
||||
return UseInfo(value, false);
|
||||
}
|
||||
|
||||
/// If is left use.
|
||||
bool isLUse;
|
||||
|
||||
/// Value of global variable for which is this use saved.
|
||||
llvm::Value *value;
|
||||
};
|
||||
|
||||
public:
|
||||
UsesAnalysis();
|
||||
~UsesAnalysis();
|
||||
|
||||
std::string getName() const { return "UsesAnalysis"; }
|
||||
|
||||
void doUsesAnalysis(const GlobVarSet &globs);
|
||||
const UseInfo *getUseInfo(llvm::BasicBlock &bb, llvm::Instruction &inst);
|
||||
|
||||
static bool hasValueUsesExcept(llvm::Value &value,
|
||||
const InstSet &instSet);
|
||||
static bool hasNoUse(llvm::GlobalVariable &glob);
|
||||
static bool hasUsesOnlyInOneFunc(llvm::GlobalVariable &glob);
|
||||
static bool hasSomeUseVolatileLoadOrStore(llvm::GlobalVariable &glob);
|
||||
|
||||
void printBBsUses();
|
||||
void printBBUses(llvm::BasicBlock &bb);
|
||||
|
||||
private:
|
||||
/// Mapping of an instruction to @c UseInfo.
|
||||
using InstUseInfoMap = std::map<llvm::Instruction *, UseInfo>;
|
||||
|
||||
/**
|
||||
* Class that contains uses info for basic block.
|
||||
*/
|
||||
class BBUses {
|
||||
public:
|
||||
BBUses();
|
||||
~BBUses();
|
||||
|
||||
void addNewLUse(llvm::Instruction &lUse);
|
||||
void addNewRUse(llvm::Instruction &rUse);
|
||||
const UseInfo *getUseInfo(llvm::Instruction &inst);
|
||||
|
||||
void printBBUses();
|
||||
|
||||
private:
|
||||
/// Contains uses of global variables.
|
||||
InstUseInfoMap useInfoMap;
|
||||
};
|
||||
|
||||
/// Mapping of a basic block to basic block uses.
|
||||
using BBBBUsesMap = std::map<llvm::BasicBlock *, BBUses *>;
|
||||
|
||||
private:
|
||||
BBUses &getIfExistsOrCreateNewBBInfo(llvm::BasicBlock &bb);
|
||||
void goThroughUses(llvm::GlobalVariable &glob);
|
||||
void addNewLUse(llvm::BasicBlock &bb, llvm::Instruction &lUse);
|
||||
void addNewRUse(llvm::BasicBlock &bb, llvm::Instruction &rUse);
|
||||
void clear();
|
||||
|
||||
private:
|
||||
/// Contains uses of global variables for basic blocks.
|
||||
BBBBUsesMap bbUseInfoMap;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
184
include/bin2llvmir/analyses/var_depend_analysis.h
Normal file
184
include/bin2llvmir/analyses/var_depend_analysis.h
Normal file
@ -0,0 +1,184 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/analyses/var_depend_analysis.h
|
||||
* @brief Analysis for variable dependency of PHINodes.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_ANALYSES_VAR_DEPEND_ANALYSIS_H
|
||||
#define BIN2LLVMIR_ANALYSES_VAR_DEPEND_ANALYSIS_H
|
||||
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <llvm/IR/BasicBlock.h>
|
||||
#include <llvm/IR/Instructions.h>
|
||||
|
||||
#include "bin2llvmir/utils/defs.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Analysis of variable dependency of PHI Nodes.
|
||||
*
|
||||
* This class supports two things:
|
||||
* - Detects the cycles and return PHI nodes that have to be optimized to remove
|
||||
* the cycles.
|
||||
* - Makes an analysis that returns PHI nodes in order that sequential
|
||||
* processing of PHI nodes is equivalent with parallel processing.
|
||||
*/
|
||||
class VarDependAnalysis {
|
||||
public:
|
||||
/// Vector of PHI nodes.
|
||||
using PHINodeVec = std::vector<llvm::PHINode *>;
|
||||
|
||||
/**
|
||||
* @brief Basic block with vector of PHI nodes.
|
||||
*/
|
||||
struct BBVecOfPHINodes {
|
||||
/**
|
||||
* @brief Constructs a new @c BBVecOfPHINodes.
|
||||
*
|
||||
* @param[in] bb Basic block that identifies PHI nodes in @a vecOfPHINodes.
|
||||
* @param[in] vecOfPHINodes Vector of PHI nodes.
|
||||
*/
|
||||
BBVecOfPHINodes(llvm::BasicBlock *bb, PHINodeVec
|
||||
vecOfPHINodes): bb(bb), phiNodeVec(vecOfPHINodes) {}
|
||||
|
||||
/**
|
||||
* @brief Move constructor.
|
||||
*
|
||||
* @param[in] other This value is moved.
|
||||
*/
|
||||
BBVecOfPHINodes(BBVecOfPHINodes &&other): bb(other.bb),
|
||||
phiNodeVec(std::move(other.phiNodeVec)) {}
|
||||
|
||||
/// Basic block that identifies PHI nodes.
|
||||
llvm::BasicBlock *bb;
|
||||
|
||||
/// Vector of PHI nodes that have to be optimized.
|
||||
PHINodeVec phiNodeVec;
|
||||
};
|
||||
|
||||
/// Map of string to basic block with vector of PHI nodes.
|
||||
using StringBBVecOfPHINodesMap = std::map<std::string, BBVecOfPHINodes>;
|
||||
|
||||
public:
|
||||
VarDependAnalysis();
|
||||
~VarDependAnalysis();
|
||||
|
||||
std::string getId() const { return "VarDependAnalysis"; }
|
||||
|
||||
void addEdge(const std::string &srcNodeName, const std::string &dstNodeName,
|
||||
llvm::BasicBlock &incBB, llvm::PHINode *phiNode);
|
||||
void clear();
|
||||
const StringBBVecOfPHINodesMap &detectCycleVarDependency();
|
||||
const PHINodeVec &detectNonCycleVarDependency();
|
||||
|
||||
private:
|
||||
/**
|
||||
* @brief Node of graph.
|
||||
*/
|
||||
class Node {
|
||||
public:
|
||||
/**
|
||||
* @brief Successor node with basic block.
|
||||
*/
|
||||
struct Successor {
|
||||
/**
|
||||
* @brief Constructs a new @c Successor.
|
||||
*
|
||||
* @param[in] succ Successor node.
|
||||
* @param[in] incBB Incoming basic block.
|
||||
*/
|
||||
Successor(Node *succ, llvm::BasicBlock &incBB): succ(succ) {
|
||||
incBBs.insert(&incBB);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Move constructor.
|
||||
*
|
||||
* @param[in] other This value is moved.
|
||||
*/
|
||||
Successor(Successor &&other): succ(other.succ),
|
||||
incBBs(std::move(other.incBBs)) {}
|
||||
|
||||
void print();
|
||||
|
||||
/// Successor node.
|
||||
Node *succ;
|
||||
|
||||
/// Basic block that characterize successor.
|
||||
BBSet incBBs;
|
||||
};
|
||||
|
||||
/// Mapping a string to successor.
|
||||
using SuccMap = std::map<std::string, Successor>;
|
||||
|
||||
public:
|
||||
Node(const std::string &name, llvm::PHINode *phiNode = nullptr);
|
||||
~Node();
|
||||
|
||||
void addSucc(Node &succ, llvm::BasicBlock &incBB);
|
||||
void markAsSolved();
|
||||
void markAsVisited();
|
||||
void markAsNotSolved();
|
||||
void markAsNotVisited();
|
||||
void markAsSolvedAndNotVisited();
|
||||
|
||||
void print();
|
||||
|
||||
public:
|
||||
/// Mapping destination node to successors for source node.
|
||||
SuccMap succMap;
|
||||
|
||||
/// Name of node.
|
||||
std::string name;
|
||||
|
||||
/// PHI node for this node.
|
||||
llvm::PHINode *phiNode;
|
||||
|
||||
/// Signalizes if this node was visited.
|
||||
bool visited;
|
||||
|
||||
/// Signalized if this node was solved.
|
||||
bool solved;
|
||||
};
|
||||
|
||||
/// String to Node map.
|
||||
using StringNodeMap = std::map<std::string, Node *>;
|
||||
|
||||
/// Vector of nodes.
|
||||
using NodeVec = std::vector<Node *>;
|
||||
|
||||
private:
|
||||
void addResultOfCycle(Node::Successor &successor);
|
||||
Node &findOrCreateNode(const std::string &nodeName);
|
||||
void iterateThroughNodesCycleDetect();
|
||||
void iterateThroughNodesNonCycleVarDependency();
|
||||
void iterateThroughSuccessorsAndVisitTheirNode(Node &node);
|
||||
Node *visitNodeCycleDetect(VarDependAnalysis::Node &node);
|
||||
void visitNodeNonCycleVarDependency(VarDependAnalysis::Node &node);
|
||||
void setAllNodesAsNotSolved();
|
||||
|
||||
void print();
|
||||
|
||||
private:
|
||||
/// Mapping of a name of node to nodes.
|
||||
StringNodeMap nodeMap;
|
||||
|
||||
/// This vector is used for ensure the same order of PHI nodes when we don't
|
||||
/// need any optimization.
|
||||
NodeVec nodeVec;
|
||||
|
||||
/// Result of cycles analysis.
|
||||
StringBBVecOfPHINodesMap resultForCycles;
|
||||
|
||||
/// Result of non-cycle variable dependency analysis.
|
||||
PHINodeVec resultOfNonCycle;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,58 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/adapter_methods/adapter_methods.h
|
||||
* @brief Detection of C++ adapter metods created by compiler.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_ADAPTER_METHODS_ADAPTER_METHODS_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_ADAPTER_METHODS_ADAPTER_METHODS_H
|
||||
|
||||
#include <map>
|
||||
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* This pass finds functions which are adapters to other functions.
|
||||
* They typically either directly call some other function or do some simple
|
||||
* arithmetics and then make the call.
|
||||
*
|
||||
* This pass *MUST* run after -instcombine to make instruction matching
|
||||
* simple (possible).
|
||||
*
|
||||
* Right now only one known patter is implemented.
|
||||
* When other patterns are found implement them here as separate functions.
|
||||
*
|
||||
* TODO: Right now, information is gathered but not used.
|
||||
* Use it in JSON config, modify functions' names or add functions' comments.
|
||||
*/
|
||||
class AdapterMethods: public llvm::FunctionPass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
AdapterMethods();
|
||||
virtual void getAnalysisUsage(llvm::AnalysisUsage& AU) const override;
|
||||
virtual bool runOnFunction(llvm::Function& F) override;
|
||||
|
||||
private:
|
||||
using AdapterToAdapteeMap = std::map<
|
||||
const llvm::Function*,
|
||||
const llvm::Function*>;
|
||||
|
||||
private:
|
||||
void searchForPattern1(llvm::Function& F);
|
||||
// more patterns ...
|
||||
void handleAdapter(llvm::Function* adapter, llvm::Function* target);
|
||||
|
||||
private:
|
||||
AdapterToAdapteeMap _adapters;
|
||||
Config* config = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,38 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/asm_inst_remover/asm_inst_remover.h
|
||||
* @brief Remove all special instructions used to map LLVM instructions to
|
||||
* ASM instructions.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_ASM_INST_REMOVER_ASM_INST_REMOVER_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_ASM_INST_REMOVER_ASM_INST_REMOVER_H
|
||||
|
||||
#include <set>
|
||||
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class AsmInstructionRemover : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
AsmInstructionRemover();
|
||||
virtual bool runOnModule(llvm::Module& M) override;
|
||||
bool runOnModuleCustom(llvm::Module& M, Config* c);
|
||||
|
||||
private:
|
||||
bool run(llvm::Module& M);
|
||||
bool renameTempVariables(llvm::Module& M);
|
||||
|
||||
private:
|
||||
Config* _config = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,42 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/cfg_function_detection/cfg_function_detection.h
|
||||
* @brief Detect functions using control flow graph.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_CFG_FUNCTION_DETECTION_CFG_FUNCTION_DETECTION_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_CFG_FUNCTION_DETECTION_CFG_FUNCTION_DETECTION_H
|
||||
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/providers/asm_instruction.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
#include "bin2llvmir/providers/fileimage.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class CfgFunctionDetection : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
CfgFunctionDetection();
|
||||
virtual bool runOnModule(llvm::Module& M) override;
|
||||
bool runOnModuleCustom(llvm::Module& M, Config* c, FileImage* i);
|
||||
|
||||
private:
|
||||
bool run();
|
||||
bool runOne();
|
||||
bool isArmDataInCode(AsmInstruction& ai);
|
||||
llvm::Instruction* isPotentialSplitInstruction(llvm::Instruction* i);
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
Config* _config = nullptr;
|
||||
FileImage* _image = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
62
include/bin2llvmir/optimizations/class_hierarchy/hierarchy.h
Normal file
62
include/bin2llvmir/optimizations/class_hierarchy/hierarchy.h
Normal file
@ -0,0 +1,62 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/class_hierarchy/hierarchy.h
|
||||
* @brief Represents class hierarchy.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_CLASS_HIERARCHY_HIERARCHY_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_CLASS_HIERARCHY_HIERARCHY_H
|
||||
|
||||
#include <list>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include <llvm/IR/Function.h>
|
||||
|
||||
#include "bin2llvmir/optimizations/vtable/vtable.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
class Class
|
||||
{
|
||||
public:
|
||||
std::string dump() const;
|
||||
retdec_config::Class getConfigClass(
|
||||
llvm::Module* m,
|
||||
Config* config) const;
|
||||
|
||||
public:
|
||||
std::string name;
|
||||
std::set<const llvm::Function*> constructors;
|
||||
std::set<const llvm::Function*> destructors;
|
||||
std::set<const llvm::Function*> methods;
|
||||
std::set<const llvm::Function*> virtualFunctions;
|
||||
std::set<const Vtable*> virtualFunctionTables;
|
||||
std::set<Class*> superClasses;
|
||||
llvm::Value* structure;
|
||||
|
||||
ClassTypeInfo* gccRtti;
|
||||
RTTITypeDescriptor* msvcRtti;
|
||||
};
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
class ClassHierarchy
|
||||
{
|
||||
public:
|
||||
Class* addAndGetNewClass();
|
||||
|
||||
std::string dump() const;
|
||||
|
||||
public:
|
||||
std::list<Class> classes;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,46 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/class_hierarchy/hierarchy_analysis.h
|
||||
* @brief Analyse results of other analyses to reconstruct class hierarchy.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_CLASS_HIERARCHY_HIERARCHY_ANALYSIS_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_CLASS_HIERARCHY_HIERARCHY_ANALYSIS_H
|
||||
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/optimizations/class_hierarchy/hierarchy.h"
|
||||
#include "bin2llvmir/optimizations/ctor_dtor/ctor_dtor.h"
|
||||
#include "bin2llvmir/optimizations/vtable/vtable.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
class ClassHierarchyAnalysis : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
ClassHierarchyAnalysis();
|
||||
virtual bool runOnModule(llvm::Module& M) override;
|
||||
virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override;
|
||||
|
||||
void processRttiGcc();
|
||||
void processRttiMsvc();
|
||||
void processVtablesGcc(std::map<ClassTypeInfo*, Class*> &rtti2class);
|
||||
void processVtablesMsvc(std::map<RTTITypeDescriptor*, Class*> &rtti2class);
|
||||
void processCtorsDtors();
|
||||
|
||||
void setToConfig(llvm::Module* m) const;
|
||||
|
||||
private:
|
||||
ClassHierarchy classHierarchy;
|
||||
Config* config = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,36 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/cond_branch_opt/cond_branch_opt.h
|
||||
* @brief Conditional branch optimization.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_COND_BRANCH_OPT_COND_BRANCH_OPT_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_COND_BRANCH_OPT_COND_BRANCH_OPT_H
|
||||
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class CondBranchOpt : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
CondBranchOpt();
|
||||
virtual bool runOnModule(llvm::Module& m) override;
|
||||
bool runOnModuleCustom(llvm::Module& m, Config* c);
|
||||
|
||||
private:
|
||||
bool run();
|
||||
bool runOnFunction(ReachingDefinitionsAnalysis& RDA, llvm::Function* f);
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
Config* _config = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
52
include/bin2llvmir/optimizations/constants/constants.h
Normal file
52
include/bin2llvmir/optimizations/constants/constants.h
Normal file
@ -0,0 +1,52 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/constants/constants.h
|
||||
* @brief Composite type reconstruction analysis.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_CONSTANTS_CONSTANTS_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_CONSTANTS_CONSTANTS_H
|
||||
|
||||
#include <set>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "tl-cpputils/address.h"
|
||||
#include "bin2llvmir/analyses/reaching_definitions.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
#include "bin2llvmir/providers/debugformat.h"
|
||||
#include "bin2llvmir/providers/fileimage.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class ConstantsAnalysis : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
ConstantsAnalysis();
|
||||
virtual bool runOnModule(llvm::Module& M) override;
|
||||
virtual void getAnalysisUsage(llvm::AnalysisUsage& AU) const override;
|
||||
|
||||
private:
|
||||
void checkForGlobalInInstruction(
|
||||
ReachingDefinitionsAnalysis& RDA,
|
||||
llvm::Instruction* inst,
|
||||
llvm::Value* val,
|
||||
bool storeValue = false);
|
||||
void tagFunctionsWithUsedCryptoGlobals();
|
||||
void setPic32GpValue(ReachingDefinitionsAnalysis& RDA);
|
||||
|
||||
private:
|
||||
llvm::Module * m_module = nullptr;
|
||||
Config* config = nullptr;
|
||||
FileImage* objf = nullptr;
|
||||
DebugFormat* dbgf = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
110
include/bin2llvmir/optimizations/control_flow/control_flow.h
Normal file
110
include/bin2llvmir/optimizations/control_flow/control_flow.h
Normal file
@ -0,0 +1,110 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/control_flow/control_flow.h
|
||||
* @brief Reconstruct control flow.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_CONTROL_FLOW_CONTROL_FLOW_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_CONTROL_FLOW_CONTROL_FLOW_H
|
||||
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/analyses/reaching_definitions.h"
|
||||
#include "bin2llvmir/analyses/symbolic_tree.h"
|
||||
#include "bin2llvmir/providers/asm_instruction.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
#include "bin2llvmir/providers/fileimage.h"
|
||||
#include "bin2llvmir/providers/lti.h"
|
||||
#include "bin2llvmir/utils/ir_modifier.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class ControlFlow : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
ControlFlow();
|
||||
virtual bool runOnModule(llvm::Module& m) override;
|
||||
bool runOnModuleCustom(
|
||||
llvm::Module& m,
|
||||
Config* c,
|
||||
FileImage* img);
|
||||
|
||||
private:
|
||||
bool run();
|
||||
|
||||
bool runX86();
|
||||
bool runX86Function(llvm::Function* f);
|
||||
bool runx86Return(AsmInstruction& ai);
|
||||
bool runx86Call(AsmInstruction& ai);
|
||||
bool runX86JmpNopNopPattern();
|
||||
|
||||
bool runMips();
|
||||
bool runMipsFunction(llvm::Function* f);
|
||||
bool runMipsReturn(AsmInstruction& ai);
|
||||
bool runMipsCall(AsmInstruction& ai);
|
||||
bool runMipsDynamicStubPatter();
|
||||
|
||||
bool runArm();
|
||||
bool runArmFunction(llvm::Function* f);
|
||||
bool runArmReturn(AsmInstruction& ai);
|
||||
bool runArmCall(AsmInstruction& ai);
|
||||
|
||||
bool runPowerpc();
|
||||
bool runPowerpcFunction(llvm::Function* f);
|
||||
bool runPowerpcReturn(AsmInstruction& ai);
|
||||
bool runPowerpcCall(AsmInstruction& ai);
|
||||
|
||||
bool runGeneric();
|
||||
bool runGenericFunction(llvm::Function* f);
|
||||
bool runGenericBr(AsmInstruction& ai, llvm::CallInst* call);
|
||||
bool runGenericCondBr(AsmInstruction& ai, llvm::CallInst* call);
|
||||
|
||||
llvm::ReturnInst* transformToReturn(
|
||||
AsmInstruction& ai,
|
||||
llvm::CallInst* call = nullptr);
|
||||
llvm::Value* getOrMakeFunction(tl_cpputils::Address addr);
|
||||
llvm::Value* makeFunction(tl_cpputils::Address addr);
|
||||
llvm::CallInst* transformToCall(
|
||||
AsmInstruction& ai,
|
||||
llvm::CallInst* brCall,
|
||||
llvm::Value* called);
|
||||
|
||||
llvm::GlobalVariable* getReturnObject();
|
||||
|
||||
bool toReturn();
|
||||
bool toCall();
|
||||
bool toFunction();
|
||||
bool toBr();
|
||||
bool toCondBr();
|
||||
bool toSwitch();
|
||||
bool fixMain();
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
Config* _config = nullptr;
|
||||
FileImage* _image = nullptr;
|
||||
ReachingDefinitionsAnalysis _RDA;
|
||||
IrModifier _irmodif;
|
||||
|
||||
struct SwitchEntry
|
||||
{
|
||||
llvm::CallInst* call = nullptr;
|
||||
AsmInstruction aiSource;
|
||||
llvm::Instruction* idx = nullptr;
|
||||
llvm::BasicBlock* defaultBb = nullptr;
|
||||
std::vector<std::pair<unsigned, AsmInstruction>> jmpTable;
|
||||
};
|
||||
|
||||
std::set<AsmInstruction> _toFunctions;
|
||||
std::set<std::pair<AsmInstruction, llvm::CallInst*>> _toReturn;
|
||||
std::set<std::pair<llvm::CallInst*, tl_cpputils::Address>> _toCall;
|
||||
std::set<std::pair<llvm::CallInst*, AsmInstruction>> _toBr;
|
||||
std::set<std::pair<llvm::CallInst*, AsmInstruction>> _toCondBr;
|
||||
std::list<SwitchEntry> _toSwitch;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
122
include/bin2llvmir/optimizations/ctor_dtor/ctor_dtor.h
Normal file
122
include/bin2llvmir/optimizations/ctor_dtor/ctor_dtor.h
Normal file
@ -0,0 +1,122 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/ctor_dtor/ctor_dtor.h
|
||||
* @brief Constructor and destructor detection analysis.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_CTOR_DTOR_CTOR_DTOR_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_CTOR_DTOR_CTOR_DTOR_H
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
#include <llvm/IR/Instructions.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/optimizations/vtable/vtable.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class CtorDtor : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
CtorDtor();
|
||||
virtual void getAnalysisUsage(llvm::AnalysisUsage& AU) const override;
|
||||
virtual bool runOnModule(llvm::Module& M) override;
|
||||
|
||||
public:
|
||||
class FunctionInfo
|
||||
{
|
||||
public:
|
||||
/// Super method calls in order.
|
||||
std::vector<const llvm::CallInst*> superMethods;
|
||||
/// Super method offsets in order.
|
||||
std::vector<int> superMethodOffsets;
|
||||
/// Virtual table stores in order.
|
||||
std::vector<std::pair<llvm::StoreInst*, Vtable*>> vftableStores;
|
||||
/// Virtual table offsets in order.
|
||||
std::vector<int> vftableOffsets;
|
||||
///
|
||||
bool ctor = false;
|
||||
bool dtor = false;
|
||||
};
|
||||
|
||||
public:
|
||||
using FunctionSet = std::set<llvm::Function*>;
|
||||
using FunctionToInfo = std::map<llvm::Function*, FunctionInfo>;
|
||||
using StoreToVtable = std::map<llvm::StoreInst*, Vtable*>;
|
||||
|
||||
public:
|
||||
FunctionToInfo& getResults();
|
||||
|
||||
private:
|
||||
void findPossibleCtorsDtors();
|
||||
void analyseFunction(llvm::Function* fnc);
|
||||
FunctionInfo analyseFunctionForward(llvm::Function* fnc);
|
||||
FunctionInfo analyseFunctionBackward(llvm::Function* fnc);
|
||||
int getOffset(const llvm::Value* ecxStoreOp);
|
||||
const llvm::StoreInst* findPreviousStoreToECX(
|
||||
const llvm::Instruction* inst);
|
||||
void propagateCtorDtor();
|
||||
void replaceVtablesPointersInStores(
|
||||
llvm::StoreInst* store,
|
||||
Vtable* vtable);
|
||||
|
||||
template<class T>
|
||||
FunctionInfo analyseFunctionCommon(T begin, T end);
|
||||
|
||||
private:
|
||||
llvm::Module *module = nullptr;
|
||||
Config* config = nullptr;
|
||||
FunctionSet possibleCtorsDtors;
|
||||
StoreToVtable stores2vtables;
|
||||
FunctionToInfo function2info;
|
||||
};
|
||||
|
||||
template<class T>
|
||||
CtorDtor::FunctionInfo CtorDtor::analyseFunctionCommon(T begin, T end)
|
||||
{
|
||||
enum
|
||||
{
|
||||
STEP_SUPER,
|
||||
STEP_VTABLES
|
||||
} step = STEP_SUPER;
|
||||
|
||||
CtorDtor::FunctionInfo result;
|
||||
|
||||
for (T it = begin; it != end; ++it)
|
||||
{
|
||||
llvm::Instruction *i = &(*it);
|
||||
if (step == STEP_SUPER)
|
||||
{
|
||||
if (llvm::CallInst *call = llvm::dyn_cast<llvm::CallInst>(i))
|
||||
{
|
||||
if (possibleCtorsDtors.count(call->getCalledFunction()))
|
||||
{
|
||||
result.superMethods.push_back(call);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (step == STEP_SUPER || step == STEP_VTABLES)
|
||||
{
|
||||
if (llvm::StoreInst *store = llvm::dyn_cast<llvm::StoreInst>(i))
|
||||
{
|
||||
auto fIt = stores2vtables.find(store);
|
||||
if (fIt != stores2vtables.end())
|
||||
{
|
||||
result.vftableStores.push_back( {store, fIt->second} );
|
||||
step = STEP_VTABLES;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,76 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/data_references/data_references.h
|
||||
* @brief Search for references in input file.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_DATA_REFERENCES_DATA_REFERENCES_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_DATA_REFERENCES_DATA_REFERENCES_H
|
||||
|
||||
#include <set>
|
||||
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/Pass.h>
|
||||
#include <llvm/Support/raw_ostream.h>
|
||||
|
||||
#include "tl-cpputils/address.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
#include "bin2llvmir/providers/fileimage.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* This pass scans an entire binary word by word for words that reference
|
||||
* some other location in the binary = it holds an address of that location.
|
||||
* If there is some known object at that location (function, global variable,
|
||||
* instruction, etc.) it is associated with the reference.
|
||||
*
|
||||
* TODO: This should not be a pass but analysis that works on demand. If it
|
||||
* needs persistent data (e.g. addresses of references) then add it to object
|
||||
* file provider or config or something like that.
|
||||
*/
|
||||
class DataReferences : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
DataReferences();
|
||||
virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override;
|
||||
virtual bool runOnModule(llvm::Module &) override;
|
||||
|
||||
private:
|
||||
class ReferencedObject
|
||||
{
|
||||
public:
|
||||
ReferencedObject(tl_cpputils::Address a);
|
||||
|
||||
public:
|
||||
tl_cpputils::Address addr;
|
||||
const loader::Segment *seg = nullptr;
|
||||
|
||||
llvm::Function *function = nullptr;
|
||||
llvm::GlobalVariable *globalVar = nullptr;
|
||||
llvm::Instruction *instruction = nullptr;
|
||||
};
|
||||
|
||||
public:
|
||||
using Addr2Obj = std::map<tl_cpputils::Address, ReferencedObject>;
|
||||
|
||||
public:
|
||||
const Addr2Obj& getAddressToObjectMapping() const;
|
||||
bool hasReferenceOnAddress(tl_cpputils::Address a) const;
|
||||
const ReferencedObject* getReferenceFromAddress(
|
||||
tl_cpputils::Address a) const;
|
||||
|
||||
private:
|
||||
void detectReferencesIntoSegments();
|
||||
void linkReferencesWithKnownObjects();
|
||||
|
||||
private:
|
||||
Config* config = nullptr;
|
||||
FileImage* objf = nullptr;
|
||||
Addr2Obj addr2obj;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
371
include/bin2llvmir/optimizations/decoder/decoder.h
Normal file
371
include/bin2llvmir/optimizations/decoder/decoder.h
Normal file
@ -0,0 +1,371 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/decoder/decoder.h
|
||||
* @brief Decode input binary into LLVM IR.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_DECODER_DECODER_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_DECODER_DECODER_H
|
||||
|
||||
#include <queue>
|
||||
#include <sstream>
|
||||
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "tl-cpputils/address.h"
|
||||
#include "bin2llvmir/providers/asm_instruction.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
#include "bin2llvmir/providers/debugformat.h"
|
||||
#include "bin2llvmir/providers/fileimage.h"
|
||||
#include "bin2llvmir/utils/ir_modifier.h"
|
||||
#include "capstone2llvmir/capstone2llvmir.h"
|
||||
#include "capstone2llvmir/x86/x86.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class Decoder : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
Decoder();
|
||||
virtual bool runOnModule(llvm::Module& m) override;
|
||||
bool runOnModuleCustom(
|
||||
llvm::Module& m,
|
||||
Config* c,
|
||||
FileImage* o,
|
||||
DebugFormat* d);
|
||||
|
||||
public:
|
||||
class JumpTarget
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Jump target type and its priority. Lower number -> higher
|
||||
* priority.
|
||||
*/
|
||||
enum class eType
|
||||
{
|
||||
ENTRY_POINT = 0,
|
||||
DELAY_SLOT,
|
||||
CONTROL_FLOW,
|
||||
SELECTED_RANGE_START,
|
||||
CONFIG_FUNCTION,
|
||||
// STATICALLY_LINKED_FUNCTION, // TODO: remove from here?
|
||||
DEBUG_FUNCTION,
|
||||
IMPORT_FUNCTION,
|
||||
EXPORT_FUNCTION,
|
||||
SYMBOL_FUNCTION_PUBLIC, // better than PRIVATE, and other.
|
||||
SYMBOL_FUNCTION,
|
||||
STATICALLY_LINKED_FUNCTION,
|
||||
DELPHI_FNC_TABLE_FUNCTION,
|
||||
CODE_POINTER_FROM_DATA,
|
||||
CODE_POINTER_FROM_OTHER,
|
||||
SECTION_START,
|
||||
};
|
||||
|
||||
public:
|
||||
JumpTarget() {} // just so it can be used in std::map.
|
||||
JumpTarget(
|
||||
Config* conf,
|
||||
tl_cpputils::Address a,
|
||||
eType t,
|
||||
cs_mode m,
|
||||
tl_cpputils::Address f = tl_cpputils::Address::getUndef,
|
||||
const std::string& n = "")
|
||||
:
|
||||
address(a),
|
||||
from(f),
|
||||
type(t),
|
||||
mode(m)
|
||||
{
|
||||
setName(n);
|
||||
|
||||
if (conf->getConfig().architecture.isArmOrThumb())
|
||||
{
|
||||
if (address % 2)
|
||||
{
|
||||
mode = CS_MODE_THUMB;
|
||||
--address;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool operator<(const JumpTarget& o) const
|
||||
{
|
||||
if (type == o.type)
|
||||
{
|
||||
return address < o.address;
|
||||
}
|
||||
else
|
||||
{
|
||||
return type < o.type;
|
||||
}
|
||||
}
|
||||
|
||||
bool createFunction() const
|
||||
{
|
||||
return type == eType::SECTION_START
|
||||
|| type == eType::ENTRY_POINT
|
||||
|| type == eType::CONFIG_FUNCTION
|
||||
|| type == eType::DEBUG_FUNCTION
|
||||
|| type == eType::SYMBOL_FUNCTION
|
||||
|| type == eType::SYMBOL_FUNCTION_PUBLIC
|
||||
|| type == eType::EXPORT_FUNCTION
|
||||
|| type == eType::IMPORT_FUNCTION
|
||||
|| type == eType::STATICALLY_LINKED_FUNCTION
|
||||
|| type == eType::SELECTED_RANGE_START
|
||||
|| type == eType::DELPHI_FNC_TABLE_FUNCTION
|
||||
;
|
||||
}
|
||||
|
||||
bool hasName() const
|
||||
{
|
||||
return !name.empty();
|
||||
}
|
||||
|
||||
std::string getName(Config* config = nullptr) const
|
||||
{
|
||||
return config && config->isPic32()
|
||||
? fixWeirdManglingOfPic32(name)
|
||||
: name;
|
||||
}
|
||||
|
||||
void setName(const std::string& n) const
|
||||
{
|
||||
name = n;
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream &out, const JumpTarget& jt);
|
||||
|
||||
bool isKnownMode() const
|
||||
{
|
||||
return !isUnknownMode();
|
||||
}
|
||||
bool isUnknownMode() const
|
||||
{
|
||||
return mode == CS_MODE_BIG_ENDIAN;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string fixWeirdManglingOfPic32(const std::string& n) const
|
||||
{
|
||||
std::string name = n;
|
||||
if (name.empty()) return name;
|
||||
|
||||
if (name.find("_d") == 0)
|
||||
{
|
||||
name = name.substr(2);
|
||||
}
|
||||
else if (name[0] == '_')
|
||||
{
|
||||
name = name.substr(1);
|
||||
}
|
||||
|
||||
if (name.empty()) return name;
|
||||
|
||||
if (name.find("_cd") != std::string::npos)
|
||||
{
|
||||
name = name.substr(0, name.find("_cd"));
|
||||
}
|
||||
else if (name.find("_eE") != std::string::npos)
|
||||
{
|
||||
name = name.substr(0, name.find("_eE"));
|
||||
}
|
||||
else if (name.find("_fF") != std::string::npos)
|
||||
{
|
||||
name = name.substr(0, name.find("_fF"));
|
||||
}
|
||||
else if (tl_cpputils::endsWith(name, "_s"))
|
||||
{
|
||||
name.pop_back();
|
||||
name.pop_back();
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
public:
|
||||
tl_cpputils::Address address;
|
||||
/// If jump target is code pointer, this is an address where
|
||||
/// it was found;
|
||||
tl_cpputils::Address from;
|
||||
eType type;
|
||||
cs_mode mode = CS_MODE_BIG_ENDIAN;
|
||||
|
||||
private:
|
||||
mutable std::string name;
|
||||
};
|
||||
|
||||
class JumpTargets
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream &out, const JumpTargets& jts);
|
||||
public:
|
||||
void push(const JumpTarget& jt)
|
||||
{
|
||||
if (jt.address.isDefined())
|
||||
{
|
||||
_data.insert(jt);
|
||||
}
|
||||
}
|
||||
|
||||
void push(
|
||||
Config* c,
|
||||
tl_cpputils::Address a,
|
||||
JumpTarget::eType t,
|
||||
cs_mode m)
|
||||
{
|
||||
if (a.isDefined())
|
||||
{
|
||||
_data.insert(JumpTarget(c, a, t, m));
|
||||
}
|
||||
}
|
||||
|
||||
void push(
|
||||
Config* c,
|
||||
tl_cpputils::Address a,
|
||||
JumpTarget::eType t,
|
||||
cs_mode m,
|
||||
tl_cpputils::Address f)
|
||||
{
|
||||
if (a.isDefined())
|
||||
{
|
||||
_data.insert(JumpTarget(c, a, t, m, f));
|
||||
}
|
||||
}
|
||||
|
||||
void push(
|
||||
Config* c,
|
||||
tl_cpputils::Address a,
|
||||
JumpTarget::eType t,
|
||||
cs_mode m,
|
||||
const std::string name)
|
||||
{
|
||||
if (a.isDefined())
|
||||
{
|
||||
_data.insert(JumpTarget(c, a, t, m, tl_cpputils::Address::getUndef, name));
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t size() const
|
||||
{
|
||||
return _data.size();
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
_data.clear();
|
||||
}
|
||||
|
||||
bool empty()
|
||||
{
|
||||
return _data.empty();
|
||||
}
|
||||
|
||||
const JumpTarget& top()
|
||||
{
|
||||
return *_data.begin();
|
||||
}
|
||||
|
||||
void pop()
|
||||
{
|
||||
_poped.insert(top().address);
|
||||
_data.erase(top());
|
||||
}
|
||||
|
||||
bool wasAlreadyPoped(JumpTarget& ct) const
|
||||
{
|
||||
return _poped.count(ct.address);
|
||||
}
|
||||
|
||||
auto begin()
|
||||
{
|
||||
return _data.begin();
|
||||
}
|
||||
auto end()
|
||||
{
|
||||
return _data.end();
|
||||
}
|
||||
|
||||
public:
|
||||
std::set<JumpTarget> _data;
|
||||
std::set<tl_cpputils::Address> _poped;
|
||||
};
|
||||
|
||||
private:
|
||||
bool runCatcher();
|
||||
bool run();
|
||||
void checkIfSomethingDecoded();
|
||||
|
||||
bool initTranslator();
|
||||
void initEnvironment();
|
||||
void initEnvironmentAsm2LlvmMapping();
|
||||
void initEnvironmentPseudoFunctions();
|
||||
void initEnvironmentRegisters();
|
||||
|
||||
void initRangesAndTargets();
|
||||
void initAllowedRangesWithSegments();
|
||||
void initAllowedRangesWithConfig();
|
||||
void initJumpTargets();
|
||||
void initJumpTargetsWithStaticCode();
|
||||
void removeZeroSequences(tl_cpputils::AddressRangeContainer& rs);
|
||||
|
||||
void doDecoding();
|
||||
bool looksLikeValidJumpTarget(tl_cpputils::Address addr);
|
||||
|
||||
void doStaticCodeRecognition();
|
||||
|
||||
tl_cpputils::Address getJumpTarget(llvm::Value* val);
|
||||
|
||||
void findDelphiFunctionTable();
|
||||
|
||||
bool fixMainName();
|
||||
std::string getFunctionNameFromLibAndOrd(
|
||||
const std::string& libName,
|
||||
int ord);
|
||||
bool loadOrds(const std::string& libName);
|
||||
void removeStaticallyLinkedFunctions();
|
||||
void hackDeleteKnownLinkedFunctions();
|
||||
|
||||
void fixMipsDelaySlots();
|
||||
|
||||
bool isArmOrThumb() const;
|
||||
cs_mode getUnknownMode() const;
|
||||
cs_mode determineMode(AsmInstruction ai, tl_cpputils::Address target) const;
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
Config* _config = nullptr;
|
||||
FileImage* _image = nullptr;
|
||||
DebugFormat* _debug = nullptr;
|
||||
|
||||
std::unique_ptr<capstone2llvmir::Capstone2LlvmIrTranslator> _c2l;
|
||||
|
||||
const std::string _asm2llvmGv = "_asm_program_counter";
|
||||
const std::string _asm2llvmMd = "llvmToAsmGlobalVariableName";
|
||||
const std::string _callFunction = "__pseudo_call";
|
||||
const std::string _returnFunction = "__pseudo_return";
|
||||
const std::string _branchFunction = "__pseudo_branch";
|
||||
const std::string _condBranchFunction = "__pseudo_cond_branch";
|
||||
|
||||
std::map<tl_cpputils::Address, std::pair<std::string, tl_cpputils::AddressRange>> _staticCode;
|
||||
tl_cpputils::AddressRangeContainer _allowedRanges;
|
||||
tl_cpputils::AddressRangeContainer _alternativeRanges;
|
||||
tl_cpputils::AddressRangeContainer _processedRanges;
|
||||
JumpTargets _jumpTargets;
|
||||
|
||||
std::size_t decodingChunk = 0x50;
|
||||
|
||||
std::map<llvm::Function*, std::pair<tl_cpputils::Address, tl_cpputils::Address>> _functions;
|
||||
|
||||
/// <ordinal number, function name>
|
||||
using OrdMap = std::map<int, std::string>;
|
||||
/// <library name without suffix ".dll", map with ordinals>
|
||||
std::map<std::string, OrdMap> _dllOrds;
|
||||
|
||||
cs_mode _currentMode;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,90 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/dsm_generator/dsm_generator.h
|
||||
* @brief Produce DSM output.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_DSM_GENERATOR_DSM_GENERATOR_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_DSM_GENERATOR_DSM_GENERATOR_H
|
||||
|
||||
#include <ostream>
|
||||
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/providers/asm_instruction.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
#include "bin2llvmir/providers/fileimage.h"
|
||||
#include "bin2llvmir/utils/instruction.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class DsmGenerator : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
DsmGenerator();
|
||||
virtual bool runOnModule(llvm::Module& m) override;
|
||||
bool runOnModuleCustom(
|
||||
llvm::Module& m,
|
||||
Config* c,
|
||||
FileImage* objf,
|
||||
std::ostream& ret);
|
||||
|
||||
private:
|
||||
void run(std::ostream& ret);
|
||||
void generateHeader(std::ostream& ret);
|
||||
void generateCode(std::ostream& ret);
|
||||
void generateCodeSeg(
|
||||
const loader::Segment* seg,
|
||||
std::ostream& ret);
|
||||
void generateFunction(
|
||||
retdec_config::Function* fnc,
|
||||
std::ostream& ret);
|
||||
void generateInstruction(AsmInstruction& ai, std::ostream& ret);
|
||||
void generateData(std::ostream& ret);
|
||||
void generateDataSeg(
|
||||
const loader::Segment* seg,
|
||||
std::ostream& ret);
|
||||
void generateDataRange(
|
||||
tl_cpputils::Address start,
|
||||
tl_cpputils::Address end,
|
||||
std::ostream& ret);
|
||||
void generateAlignedAddress(
|
||||
tl_cpputils::Address addr,
|
||||
std::ostream& ret);
|
||||
|
||||
void getAsmInstructionHex(AsmInstruction& ai, std::ostream& ret);
|
||||
std::string processInstructionDsm(AsmInstruction& ai);
|
||||
void generateData(
|
||||
std::ostream& ret,
|
||||
tl_cpputils::Address start,
|
||||
std::size_t size,
|
||||
const std::string& objVal = "");
|
||||
std::string escapeString(const std::string& str);
|
||||
std::string reduceNegativeNumbers(const std::string& str);
|
||||
void findLongestInstruction();
|
||||
void findLongestAddress();
|
||||
std::string getString(
|
||||
const retdec_config::Object* cgv,
|
||||
const llvm::ConstantDataArray* cda);
|
||||
|
||||
std::string getFunctionName(llvm::Function* f) const;
|
||||
std::string getFunctionName(retdec_config::Function* f) const;
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
Config* _config = nullptr;
|
||||
FileImage* _objf = nullptr;
|
||||
std::size_t _longestInst = 0;
|
||||
std::size_t _longestAddr = 0;
|
||||
std::map<tl_cpputils::Address, retdec_config::Function*> _addr2fnc;
|
||||
|
||||
const std::size_t DATA_SEGMENT_LINE = 16;
|
||||
const std::string ALIGN = " ";
|
||||
const std::string INSTR_SEPARATOR = "\t"; // maybe "\t"
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
25
include/bin2llvmir/optimizations/dump_module/dump_module.h
Normal file
25
include/bin2llvmir/optimizations/dump_module/dump_module.h
Normal file
@ -0,0 +1,25 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/dump_module/dump_module.h
|
||||
* @brief This is a utility debug pass that only dumps the module into LLVM IR.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_DUMP_MODULE_DUMP_MODULE_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_DUMP_MODULE_DUMP_MODULE_H
|
||||
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class DumpModule : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
DumpModule();
|
||||
virtual bool runOnModule(llvm::Module& M) override;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,57 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/globals/dead_global_assign.h
|
||||
* @brief Removes dead assignments to global variables.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_GLOBALS_DEAD_GLOBAL_ASSIGN_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_GLOBALS_DEAD_GLOBAL_ASSIGN_H
|
||||
|
||||
#include "bin2llvmir/optimizations/globals/global_to_local_and_dead_global_assign.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Optimization that removes dead global assigns.
|
||||
*
|
||||
* This optimization optimizes for example (@c g and @c v are global variables):
|
||||
* @code
|
||||
* g = 1; -- remove, not use this value.
|
||||
* g = 2;
|
||||
* v = 2; -- remove, only if v is not used anywhere from this place (also not
|
||||
* used in called functions).
|
||||
* x = g;
|
||||
* @endcode
|
||||
*
|
||||
* Also removes all definitions of global variables that don't have use.
|
||||
*
|
||||
* This optimization can run in two variants. Aggressive and not aggressive.
|
||||
* Not aggressive is run by:
|
||||
* @code
|
||||
* -dead-global-assign -not-aggressive
|
||||
* @endcode
|
||||
* Aggressive variant is default. Aggressive variant does not count that there
|
||||
* is some use in functions that are defined out of the module.
|
||||
*
|
||||
* This optimization can be run with statistics about how many global
|
||||
* declaration or how many dead global assigns were deleted. This is possible
|
||||
* with:
|
||||
* @code
|
||||
* --stats -dead-global-assign
|
||||
* @endcode
|
||||
*/
|
||||
class DeadGlobalAssign: public GlobalToLocalAndDeadGlobalAssign {
|
||||
public:
|
||||
DeadGlobalAssign();
|
||||
virtual ~DeadGlobalAssign() override;
|
||||
|
||||
static const char *getPassArg();
|
||||
virtual const char *getPassName() const override;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
110
include/bin2llvmir/optimizations/globals/global_to_local.h
Normal file
110
include/bin2llvmir/optimizations/globals/global_to_local.h
Normal file
@ -0,0 +1,110 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/globals/global_to_local.h
|
||||
* @brief Converts global variables to local variables.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_GLOBALS_GLOBAL_TO_LOCAL_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_GLOBALS_GLOBAL_TO_LOCAL_H
|
||||
|
||||
#include "bin2llvmir/optimizations/globals/global_to_local_and_dead_global_assign.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Optimization that converts global variables to local variables. Also
|
||||
* replaces uses of global variable with new created local variables.
|
||||
*
|
||||
* Optimization optimizes these situations:
|
||||
* @par I.
|
||||
* Converts global variables to local variables and replaces uses everywhere
|
||||
* where is it possible.
|
||||
* @code
|
||||
* func() {
|
||||
* g = 2; <- can be replaced with local variable.
|
||||
* v = g; <- can be replaced with local variable.
|
||||
* }
|
||||
* @endcode
|
||||
* can be optimized to
|
||||
* @code
|
||||
* func() {
|
||||
* int gLoc;
|
||||
* gLoc = 2;
|
||||
* v = gLoc;
|
||||
* }
|
||||
* @endcode
|
||||
*
|
||||
* @par II.
|
||||
* We can call this situation as pattern. This pattern contains assign from
|
||||
* global variable to temporary variable and at the end in all exits of function
|
||||
* is temporary variable assigned to global variable. When all body inside this
|
||||
* pattern can be optimized and we have no use of temporary variable then we can
|
||||
* remove instructions that creates pattern and optimize body of this pattern.
|
||||
* @code
|
||||
* func() {
|
||||
* tmp = g; <- can be removed.
|
||||
* g = 2; <- can be replaced with local variable.
|
||||
* v = g; <- can be replaced with local variable.
|
||||
* g = tmp; <- can be removed.
|
||||
* }
|
||||
* @endcode
|
||||
* can be optimized to
|
||||
* @code
|
||||
* func() {
|
||||
* int gLoc;
|
||||
* gLoc = 2;
|
||||
* v = gLoc;
|
||||
* }
|
||||
* @endcode
|
||||
*
|
||||
* @par III.
|
||||
* If uses of global variable is only in one function then we can move global
|
||||
* variable to local in this function and is it need to assign global variable
|
||||
* initializer to this new created local variable.
|
||||
* int g = 2;
|
||||
* @code
|
||||
* func() {
|
||||
* v = g; <- can be replaced with local variable.
|
||||
* }
|
||||
* @endcode
|
||||
* can be optimized to
|
||||
* @code
|
||||
* func() {
|
||||
* int gLoc;
|
||||
* gLoc = 2;
|
||||
* v = gLoc;
|
||||
* }
|
||||
* @endcode
|
||||
* @par IV.
|
||||
* Removes all definitions of global variables that don't have use.
|
||||
*
|
||||
* This optimization can run in two variants. Aggressive and not aggressive.
|
||||
* Not aggressive is run by:
|
||||
* @code
|
||||
* -global-to-local -not-aggressive
|
||||
* @endcode
|
||||
* Aggressive variant is default. Aggressive variant does not count that there
|
||||
* is some use in functions that are defined out of the module.
|
||||
*
|
||||
* This optimization can be run with statistics about how many global
|
||||
* declaration or how many dead global assigns were deleted. This is possible
|
||||
* with:
|
||||
* @code
|
||||
* --stats -global-to-local
|
||||
* @endcode
|
||||
*/
|
||||
class GlobalToLocal: public GlobalToLocalAndDeadGlobalAssign {
|
||||
public:
|
||||
GlobalToLocal();
|
||||
virtual ~GlobalToLocal() override;
|
||||
|
||||
static const char *getPassArg();
|
||||
virtual const char *getPassName() const override;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,168 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/globals/global_to_local_and_dead_global_assign.h
|
||||
* @brief Converts global variables to local variables and removes dead assigns.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_GLOBALS_GLOBAL_TO_LOCAL_AND_DEAD_GLOBAL_ASSIGN_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_GLOBALS_GLOBAL_TO_LOCAL_AND_DEAD_GLOBAL_ASSIGN_H
|
||||
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/IR/Value.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/analyses/store_load_analysis.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
#include "bin2llvmir/utils/defs.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Global to local and dead global assign optimization.
|
||||
*
|
||||
* What optimizes each optimization @see GlobalToLocal and @see DeadGlobalAssign.
|
||||
*
|
||||
* We don't optimize global variable in some special cases:
|
||||
* 1. Global variable is not single type (array, structure, etc.).
|
||||
* 2. Global variable is a pointer.
|
||||
* 3. Address of global variable can be taken.
|
||||
* 4. Global variable that doesn't have private or internal linkage.
|
||||
*
|
||||
* To decide when something optimize this optimization uses three pieces of
|
||||
* information that are obtained in this optimization:
|
||||
* - Extended Right uses (in code @c extRUses)
|
||||
* - Last left uses (in code @c lastLUses).
|
||||
* - Not go through global variables (in code @c notGoThrough).
|
||||
* How is this info obtained and what means @see StoreLoadAnalysis.
|
||||
*/
|
||||
class GlobalToLocalAndDeadGlobalAssign: public llvm::ModulePass {
|
||||
public:
|
||||
GlobalToLocalAndDeadGlobalAssign();
|
||||
virtual ~GlobalToLocalAndDeadGlobalAssign() override;
|
||||
|
||||
virtual bool runOnModule(llvm::Module &module) override;
|
||||
virtual void getAnalysisUsage(llvm::AnalysisUsage &au) const override;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
protected:
|
||||
// Signalizes that we want to do global to local optimization.
|
||||
bool globalToLocal;
|
||||
|
||||
// Signalizes that we want to do dead global assign optimization.
|
||||
bool deadGlobalAssign;
|
||||
|
||||
private:
|
||||
/**
|
||||
* @brief Class for function info.
|
||||
*/
|
||||
class FuncInfo {
|
||||
public:
|
||||
FuncInfo(llvm::Function &func, StoreLoadAnalysis &storeLoadAnalysis);
|
||||
~FuncInfo();
|
||||
|
||||
llvm::Function &getFunc();
|
||||
bool filterUsesThatCannotBeOptimizedReturnIfChanged(
|
||||
InstSet &lUsesNotToOptimize,
|
||||
InstSet &rUsesNotToOptimize);
|
||||
void findPatterns();
|
||||
bool hasPattern(llvm::Value &globValue,
|
||||
const InstSet &lastLUses);
|
||||
llvm::Instruction *findBeginfOfPattern(llvm::Value &globValue,
|
||||
llvm::Instruction &endInst);
|
||||
void savePatterns(llvm::Instruction &rUse,
|
||||
const InstSet &lUses);
|
||||
void convertGlobsToLocs(bool deadGlobalAssign);
|
||||
void removeDeadGlobalAssigns();
|
||||
void convertGlobToLocUseInOneFunc(llvm::GlobalVariable &glob);
|
||||
void removePatternInsts();
|
||||
void addExtRUsesToNotToOptimize(InstSet ¬ToOptimize);
|
||||
void addFilteredRUse(llvm::Instruction &rUse);
|
||||
void addFilteredLUse(llvm::Instruction &lUse);
|
||||
bool isInFilteredLUses(llvm::Instruction &lUse);
|
||||
|
||||
void printFuncInfo();
|
||||
|
||||
private:
|
||||
/// Mapping of a string to allocation instruction.
|
||||
using StringAllocaInstMap = std::map<std::string, llvm::AllocaInst *>;
|
||||
|
||||
private:
|
||||
llvm::Value *getLocVarFor(llvm::Value &glob);
|
||||
std::string getLocalVarNameFor(llvm::Value &globValue);
|
||||
bool canBeOptimized(llvm::Instruction &lUse, llvm::Function &func,
|
||||
const InstSet &rUses,
|
||||
const InstSet &rUsesNotToOptimize);
|
||||
bool isPartOfPattern(llvm::Instruction &inst);
|
||||
void replaceGlobToLocInInsts(llvm::Value &from, llvm::Value &to,
|
||||
llvm::Instruction &lUse, const InstSet &rUses);
|
||||
llvm::Instruction *getFirstNonAllocaInst(llvm::BasicBlock &bb);
|
||||
|
||||
private:
|
||||
/// For this function is created info.
|
||||
llvm::Function &func;
|
||||
|
||||
/// Mapping of a global variable name to a local variable.
|
||||
StringAllocaInstMap convertedGlobsToLoc;
|
||||
|
||||
/// Analysis for store and load instructions.
|
||||
StoreLoadAnalysis &storeLoadAnalysis;
|
||||
|
||||
/// Contains instructions that create patterns.
|
||||
InstSet patternInsts;
|
||||
|
||||
/// Saves filtered left uses in function.
|
||||
AnalysisInfo::ValInstSetMap filteredLUses;
|
||||
|
||||
/// Saves filtered right uses in function.
|
||||
AnalysisInfo::ValInstSetMap filteredRUses;
|
||||
};
|
||||
|
||||
/// Mapping of a function to function info.
|
||||
using FuncFuncInfoMap = std::map<llvm::Function *, FuncInfo *>;
|
||||
|
||||
private:
|
||||
void addMetadata(llvm::Module &module);
|
||||
void solveIndirectCallsAndNotAggressive();
|
||||
void solveNotAggressive();
|
||||
void solveLUsesForIndirectCalls();
|
||||
void solveLastLUsesForIndirectCalls();
|
||||
void goThroughLastLUsesAndSolveIndirectCalls(llvm::Function &func,
|
||||
const AnalysisInfo::ValInstSetMap &extRUses);
|
||||
void filterUsesThatCannotBeOptimized();
|
||||
bool goThroughFuncsInfoFilterAndReturnIfChanged(
|
||||
InstSet &lUsesNotToOptimize,
|
||||
InstSet &rUsesNotToOptimize);
|
||||
void doOptimization(llvm::Module &module,
|
||||
const GlobVarSet &globsToOptimize);
|
||||
void createInfoForAllFuncs(llvm::Module &module);
|
||||
void removePatternInsts();
|
||||
void convertGlobsToLocUseInOneFunc(const GlobVarSet &globs);
|
||||
bool hasSomeLUseForFuncOutOfModule(llvm::GlobalVariable &glob);
|
||||
void removeGlobsWithoutUse(llvm::Module::GlobalListType &globs);
|
||||
void addFilteredLUse(llvm::Instruction &lUse);
|
||||
void addFilteredRUse(llvm::Instruction &rUse);
|
||||
void addFilteredLUses(const InstSet &lUses);
|
||||
void addFilteredRUses(const InstSet &rUses);
|
||||
bool wasSomethingOptimized();
|
||||
FuncInfo &getFuncInfoFor(llvm::Function &func);
|
||||
|
||||
void printFuncInfos();
|
||||
|
||||
private:
|
||||
/// Analysis for store and load instructions.
|
||||
StoreLoadAnalysis storeLoadAnalysis;
|
||||
|
||||
/// Contains filtered right uses that can't be optimized.
|
||||
InstSet rUsesNotToOptimize;
|
||||
|
||||
/// Mapping of a function to its info.
|
||||
FuncFuncInfoMap funcInfoMap;
|
||||
|
||||
Config* config = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
45
include/bin2llvmir/optimizations/idioms/idioms.h
Normal file
45
include/bin2llvmir/optimizations/idioms/idioms.h
Normal file
@ -0,0 +1,45 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/idioms/idioms.h
|
||||
* @brief Instruction idioms analysis
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_H
|
||||
|
||||
#include <list>
|
||||
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_abstract.h"
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_analysis.h"
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_types.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Instruction idiom analysis.
|
||||
*/
|
||||
class LLVM_LIBRARY_VISIBILITY Idioms: public llvm::FunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
Idioms();
|
||||
virtual ~Idioms() override;
|
||||
|
||||
virtual bool runOnFunction(llvm::Function & f) override;
|
||||
virtual bool doInitialization(llvm::Module & M) override;
|
||||
virtual bool doFinalization(llvm::Module & M) override;
|
||||
|
||||
virtual void getAnalysisUsage(llvm::AnalysisUsage & AU) const override;
|
||||
IdiomsAnalysis * getCompilerAnalysis(llvm::Module & M);
|
||||
|
||||
private:
|
||||
IdiomsAnalysis * m_idioms;
|
||||
Config* m_config = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
51
include/bin2llvmir/optimizations/idioms/idioms_abstract.h
Normal file
51
include/bin2llvmir/optimizations/idioms/idioms_abstract.h
Normal file
@ -0,0 +1,51 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/idioms/idioms_abstract.h
|
||||
* @brief Instruction idioms analysis abstract class
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_ABSTRACT_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_ABSTRACT_H
|
||||
|
||||
#include <llvm/IR/IRBuilder.h>
|
||||
#include <llvm/IR/Instruction.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_types.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Instruction idiom analysis abstract class
|
||||
*/
|
||||
class IdiomsAbstract {
|
||||
private:
|
||||
CC_arch m_arch;
|
||||
CC_compiler m_compiler;
|
||||
llvm::Module * m_module;
|
||||
Config* m_config = nullptr;
|
||||
|
||||
protected:
|
||||
IdiomsAbstract();
|
||||
|
||||
void init(llvm::Module * M, Config* c, CC_compiler cc, CC_arch arch);
|
||||
|
||||
CC_compiler getCompiler() const { return m_compiler; }
|
||||
CC_arch getArch() const { return m_arch; }
|
||||
llvm::Module * getModule() const { return m_module; }
|
||||
Config * getConfig() const { return m_config; }
|
||||
|
||||
virtual bool doAnalysis(llvm::Function &, llvm::Pass *) = 0;
|
||||
virtual ~IdiomsAbstract() {}
|
||||
|
||||
bool findBranchDependingOn(llvm::BranchInst ** br, llvm::BasicBlock & bb,
|
||||
const llvm::Value * val) const;
|
||||
static void eraseInstFromBasicBlock(llvm::Value * val, llvm::BasicBlock * bb);
|
||||
static bool isPowerOfTwo(unsigned x);
|
||||
static bool isPowerOfTwoRepresentable(const llvm::ConstantInt *cnst);
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
64
include/bin2llvmir/optimizations/idioms/idioms_analysis.h
Normal file
64
include/bin2llvmir/optimizations/idioms/idioms_analysis.h
Normal file
@ -0,0 +1,64 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/idioms/idioms_analysis.h
|
||||
* @brief Instruction idioms analysis
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_ANALYSIS_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_ANALYSIS_H
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#include <llvm/ADT/Statistic.h>
|
||||
#include <llvm/IR/BasicBlock.h>
|
||||
#include <llvm/IR/Instruction.h>
|
||||
#include <llvm/Support/Debug.h>
|
||||
#include <llvm/Support/raw_ostream.h>
|
||||
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_abstract.h"
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_borland.h"
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_common.h"
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_gcc.h"
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_intel.h"
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_llvm.h"
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_magicdivmod.h"
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_owatcom.h"
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_types.h"
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_vstudio.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
#ifdef DEBUG_TYPE
|
||||
#undef DEBUG_TYPE // "idioms"
|
||||
#endif // DEBUG_TYPE "idioms"
|
||||
#define DEBUG_TYPE "idioms"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class IdiomsAnalysis:
|
||||
public IdiomsBorland,
|
||||
public IdiomsCommon,
|
||||
public IdiomsGCC,
|
||||
public IdiomsIntel,
|
||||
public IdiomsLLVM,
|
||||
public IdiomsMagicDivMod,
|
||||
public IdiomsOWatcom,
|
||||
public IdiomsVStudio {
|
||||
public:
|
||||
IdiomsAnalysis(llvm::Module * M, Config* c, CC_compiler cc, CC_arch arch)
|
||||
{
|
||||
init(M, c, cc, arch);
|
||||
}
|
||||
virtual bool doAnalysis(llvm::Function & f, llvm::Pass * p) override;
|
||||
|
||||
private:
|
||||
bool analyse(llvm::Function & f, llvm::Pass * p, int (IdiomsAnalysis::*exchanger)(llvm::Function &, llvm::Pass *) const, const char * fname);
|
||||
bool analyse(llvm::BasicBlock & bb, llvm::Instruction * (IdiomsAnalysis::*exchanger)(llvm::BasicBlock::iterator) const, const char * fname);
|
||||
|
||||
void print_dbg(const char * str, const llvm::Instruction & i) const {
|
||||
DEBUG(llvm::errs() << str << " detected an idiom starting at " << i.getName() << "\n");
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
24
include/bin2llvmir/optimizations/idioms/idioms_borland.h
Normal file
24
include/bin2llvmir/optimizations/idioms/idioms_borland.h
Normal file
@ -0,0 +1,24 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/idioms/idioms_borland.h
|
||||
* @brief Borland C/C++ instruction idioms
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_BORLAND_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_BORLAND_H
|
||||
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_abstract.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Borland C/C++ instruction idioms
|
||||
*/
|
||||
class IdiomsBorland: virtual public IdiomsAbstract {
|
||||
friend class IdiomsAnalysis;
|
||||
// Add idioms here, if you have found idioms specific for Borland compiler.
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
33
include/bin2llvmir/optimizations/idioms/idioms_common.h
Normal file
33
include/bin2llvmir/optimizations/idioms/idioms_common.h
Normal file
@ -0,0 +1,33 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/idioms/idioms_common.h
|
||||
* @brief Common compiler instruction idioms
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_COMMON_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_COMMON_H
|
||||
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_abstract.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Common compiler instruction idioms
|
||||
*/
|
||||
class IdiomsCommon: virtual public IdiomsAbstract {
|
||||
friend class IdiomsAnalysis;
|
||||
protected:
|
||||
llvm::Instruction * exchangeDivByMinusTwo(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeUnsignedModulo2n(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeLessThanZero(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeGreaterEqualZero(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeBitShiftSDiv1(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeBitShiftSDiv2(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeBitShiftUDiv(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeBitShiftMul(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeSignedModulo2n(llvm::BasicBlock::iterator iter) const;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
38
include/bin2llvmir/optimizations/idioms/idioms_gcc.h
Normal file
38
include/bin2llvmir/optimizations/idioms/idioms_gcc.h
Normal file
@ -0,0 +1,38 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/idioms/idioms_gcc.h
|
||||
* @brief GNU/GCC instruction idioms
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_GCC_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_GCC_H
|
||||
|
||||
#include <llvm/IR/Instruction.h>
|
||||
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_abstract.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief GNU/GCC instruction idioms
|
||||
*/
|
||||
class IdiomsGCC: virtual public IdiomsAbstract {
|
||||
friend class IdiomsAnalysis;
|
||||
|
||||
protected:
|
||||
llvm::Instruction * exchangeFloatNeg(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeXorMinusOne(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeSignedModuloByTwo(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeCondBitShiftDiv1(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeCondBitShiftDiv2(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeCondBitShiftDiv3(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeCopysign(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeFloatAbs(llvm::BasicBlock::iterator iter) const;
|
||||
|
||||
// multi BB idioms
|
||||
int exchangeCondBitShiftDivMultiBB(llvm::Function & f, llvm::Pass * pass) const;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
24
include/bin2llvmir/optimizations/idioms/idioms_intel.h
Normal file
24
include/bin2llvmir/optimizations/idioms/idioms_intel.h
Normal file
@ -0,0 +1,24 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/idioms/idioms_intel.h
|
||||
* @brief Intel compiler instruction idioms
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_INTEL_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_INTEL_H
|
||||
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_abstract.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Intel compiler instruction idioms
|
||||
*/
|
||||
class IdiomsIntel: virtual public IdiomsAbstract {
|
||||
friend class IdiomsAnalysis;
|
||||
// Add idioms here, if you have found idioms specific for Borland compiler.
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
30
include/bin2llvmir/optimizations/idioms/idioms_llvm.h
Normal file
30
include/bin2llvmir/optimizations/idioms/idioms_llvm.h
Normal file
@ -0,0 +1,30 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/idioms/idioms_llvm.h
|
||||
* @brief clang/LLVM instruction idioms
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_LLVM_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_LLVM_H
|
||||
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_abstract.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief clang/LLVM instruction idioms
|
||||
*/
|
||||
class IdiomsLLVM: virtual public IdiomsAbstract {
|
||||
friend class IdiomsAnalysis;
|
||||
|
||||
protected:
|
||||
llvm::Instruction * exchangeIsGreaterThanMinusOne(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeCompareEq(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeCompareNeq(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeCompareSlt(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * exchangeCompareSle(llvm::BasicBlock::iterator iter) const;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
56
include/bin2llvmir/optimizations/idioms/idioms_magicdivmod.h
Normal file
56
include/bin2llvmir/optimizations/idioms/idioms_magicdivmod.h
Normal file
@ -0,0 +1,56 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/idioms/idioms_magicdivmod.h
|
||||
* @brief Magic div and modulo exchangers
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_MAGICDIVMOD_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_MAGICDIVMOD_H
|
||||
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_abstract.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Division and modulo using magic number
|
||||
*
|
||||
* References:
|
||||
* GRANLUND, Torbjorn a Peter L. MONTGOMERY. Division by Invariant Integers
|
||||
* Using Multiplication. 1994.
|
||||
* Available online: http://gmplib.org/~tege/divcnst-pldi94.pdf
|
||||
*/
|
||||
class IdiomsMagicDivMod: virtual public IdiomsAbstract {
|
||||
friend class IdiomsAnalysis;
|
||||
|
||||
private:
|
||||
static unsigned divisorByMagicNumberUnsigned(unsigned magic_number, unsigned sh_pre, unsigned sh_post);
|
||||
static unsigned divisorByMagicNumberUnsigned2(unsigned magic_number, unsigned sh_post);
|
||||
static int divisorByMagicNumberSigned(int magic_number, unsigned sh_post);
|
||||
static int divisorByMagicNumberSigned2(int magic_number, unsigned sh_post);
|
||||
static unsigned divisorByMagicNumberSigned3(unsigned magic_number, unsigned shift);
|
||||
static unsigned divisorByMagicNumberSigned4(unsigned magic_number, unsigned shift);
|
||||
|
||||
llvm::Instruction * magicSignedDiv7(llvm::BasicBlock::iterator iter, bool negative) const;
|
||||
llvm::Instruction * magicSignedDiv8(llvm::BasicBlock::iterator iter, bool negative) const;
|
||||
protected:
|
||||
llvm::Instruction * magicUnsignedDiv1(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * magicUnsignedDiv2(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * magicSignedDiv1(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * magicSignedDiv2(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * magicSignedDiv3(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * magicSignedDiv4(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * magicSignedDiv5(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * magicSignedDiv6(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * magicSignedDiv7pos(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * magicSignedDiv7neg(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * magicSignedDiv8pos(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * magicSignedDiv8neg(llvm::BasicBlock::iterator iter) const;
|
||||
|
||||
llvm::Instruction * signedMod1(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * signedMod2(llvm::BasicBlock::iterator iter) const;
|
||||
llvm::Instruction * unsignedMod(llvm::BasicBlock::iterator iter) const;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
24
include/bin2llvmir/optimizations/idioms/idioms_owatcom.h
Normal file
24
include/bin2llvmir/optimizations/idioms/idioms_owatcom.h
Normal file
@ -0,0 +1,24 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/idioms/idioms_owatcom.h
|
||||
* @brief Open Watcom instruction idioms
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_OWATCOM_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_OWATCOM_H
|
||||
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_abstract.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Open Watcom instruction idioms
|
||||
*/
|
||||
class IdiomsOWatcom: virtual public IdiomsAbstract {
|
||||
friend class IdiomsAnalysis;
|
||||
// Add idioms here, if you have found idioms specific for Open Watcom compiler.
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
38
include/bin2llvmir/optimizations/idioms/idioms_types.h
Normal file
38
include/bin2llvmir/optimizations/idioms/idioms_types.h
Normal file
@ -0,0 +1,38 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/idioms/idioms_types.h
|
||||
* @brief Instruction idioms analysis types
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_TYPES_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_TYPES_H
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Compiler type
|
||||
*/
|
||||
enum CC_compiler {
|
||||
CC_ANY = 0, //unrecognized compiler
|
||||
CC_Borland,
|
||||
CC_GCC,
|
||||
CC_Intel,
|
||||
CC_LLVM,
|
||||
CC_OWatcom,
|
||||
CC_VStudio
|
||||
};
|
||||
/**
|
||||
* @brief Target architecture
|
||||
*/
|
||||
enum CC_arch {
|
||||
ARCH_ANY = 0, //unknown architecture
|
||||
ARCH_MIPS,
|
||||
ARCH_POWERPC,
|
||||
ARCH_ARM,
|
||||
ARCH_THUMB,
|
||||
ARCH_x86
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
28
include/bin2llvmir/optimizations/idioms/idioms_vstudio.h
Normal file
28
include/bin2llvmir/optimizations/idioms/idioms_vstudio.h
Normal file
@ -0,0 +1,28 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/idioms/idioms_vstudio.h
|
||||
* @brief Visual Studio instruction idioms
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_VSTUDIO_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_IDIOMS_VSTUDIO_H
|
||||
|
||||
#include <llvm/IR/Instruction.h>
|
||||
|
||||
#include "bin2llvmir/optimizations/idioms/idioms_abstract.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Visual Studio instruction idioms
|
||||
*/
|
||||
class IdiomsVStudio: virtual public IdiomsAbstract {
|
||||
friend class IdiomsAnalysis;
|
||||
protected:
|
||||
llvm::Instruction * exchangeAndZeroAssign(llvm::BasicBlock::iterator) const;
|
||||
llvm::Instruction * exchangeOrMinusOneAssign(llvm::BasicBlock::iterator) const;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,52 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/idioms_libgcc/idioms_libgcc.h
|
||||
* @brief Idioms produced by libgcc.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_LIBGCC_IDIOMS_LIBGCC_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_IDIOMS_LIBGCC_IDIOMS_LIBGCC_H
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class IdiomsLibgccImpl;
|
||||
|
||||
class IdiomsLibgcc : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
using Fnc2Action = std::vector<std::pair<std::string, std::function<void(llvm::CallInst*)>>>;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
IdiomsLibgcc();
|
||||
virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override;
|
||||
virtual bool runOnModule(llvm::Module& M) override;
|
||||
|
||||
static bool checkFunctionToActionMap(const Fnc2Action& fnc2action);
|
||||
|
||||
private:
|
||||
bool testArchAndInitialize();
|
||||
bool handleInstructions();
|
||||
bool handleInstruction(llvm::Instruction* inst);
|
||||
void localize();
|
||||
|
||||
private:
|
||||
std::unique_ptr<IdiomsLibgccImpl> _impl;
|
||||
llvm::Module* _module = nullptr;
|
||||
Config* _config = nullptr;
|
||||
|
||||
Fnc2Action _fnc2action;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
47
include/bin2llvmir/optimizations/inst_opt/inst_opt.h
Normal file
47
include/bin2llvmir/optimizations/inst_opt/inst_opt.h
Normal file
@ -0,0 +1,47 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/inst_opt/inst_opt.h
|
||||
* @brief Instruction optimizations which we want to do ourselves.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_INST_OPT_INST_OPT_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_INST_OPT_INST_OPT_H
|
||||
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/Instructions.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* Our own instruction optimizer.
|
||||
* We want to do some general (not related to any particular analysis)
|
||||
* instruction optimizations on our own, either because LLVM will not perform
|
||||
* them at all, or because it works on protected (volatilized) load/store
|
||||
* operations which can not be optimized by LLVM passes.
|
||||
*/
|
||||
class InstOpt : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
InstOpt();
|
||||
virtual bool runOnModule(llvm::Module& m) override;
|
||||
bool runOnModuleCustom(llvm::Module& m, Config* c = nullptr);
|
||||
|
||||
private:
|
||||
void removeInstructionNames();
|
||||
bool run();
|
||||
bool runGeneralOpts();
|
||||
bool fixX86RepAnalysis();
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
Config* _config = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
33
include/bin2llvmir/optimizations/local_vars/local_vars.h
Normal file
33
include/bin2llvmir/optimizations/local_vars/local_vars.h
Normal file
@ -0,0 +1,33 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/local_vars/local_vars.h
|
||||
* @brief Register localization.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_LOCAL_VARS_LOCAL_VARS_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_LOCAL_VARS_LOCAL_VARS_H
|
||||
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/analyses/reaching_definitions.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class LocalVars : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
LocalVars();
|
||||
virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override;
|
||||
virtual bool runOnModule(llvm::Module& M) override;
|
||||
|
||||
private:
|
||||
Config* config = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,51 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/main_detection/main_detection.h
|
||||
* @brief Detect main function.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_MAIN_DETECTION_MAIN_DETECTION_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_MAIN_DETECTION_MAIN_DETECTION_H
|
||||
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/Instructions.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "tl-cpputils/address.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
#include "bin2llvmir/providers/fileimage.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class MainDetection : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
MainDetection();
|
||||
virtual bool runOnModule(llvm::Module& M) override;
|
||||
bool runOnModuleCustom(
|
||||
llvm::Module& m,
|
||||
Config* c,
|
||||
FileImage* img = nullptr);
|
||||
|
||||
private:
|
||||
bool run();
|
||||
bool skipAnalysis();
|
||||
tl_cpputils::Address getFromFunctionNames();
|
||||
tl_cpputils::Address getFromContext();
|
||||
tl_cpputils::Address getFromEntryPointOffset(int offset);
|
||||
tl_cpputils::Address getFromCrtSetCheckCount();
|
||||
tl_cpputils::Address getFromInterlockedExchange();
|
||||
|
||||
bool applyResult(tl_cpputils::Address mainAddr);
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
Config* _config = nullptr;
|
||||
FileImage* _image = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,73 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/never_returning_funcs/never_returning_funcs.h
|
||||
* @brief Adds unreachable instruction after function that never returns.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_NEVER_RETURNING_FUNCS_NEVER_RETURNING_FUNCS_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_NEVER_RETURNING_FUNCS_NEVER_RETURNING_FUNCS_H
|
||||
|
||||
#include <llvm/IR/InstVisitor.h>
|
||||
#include <llvm/IR/Instructions.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/utils/defs.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Adds unreachable instruction after functions that never return.
|
||||
*
|
||||
* This optimization also removes instructions that are in basic block after
|
||||
* function that never returns.
|
||||
*/
|
||||
class NeverReturningFuncs: public llvm::FunctionPass,
|
||||
private llvm::InstVisitor<NeverReturningFuncs> {
|
||||
public:
|
||||
/// Set of terminator instructions.
|
||||
using TerminatorInstSet = std::set<llvm::TerminatorInst *>;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
NeverReturningFuncs();
|
||||
virtual bool doInitialization(llvm::Module &module) override;
|
||||
virtual bool doFinalization(llvm::Module &module) override;
|
||||
virtual bool runOnFunction(llvm::Function &func) override;
|
||||
bool runOnFunctionCustom(llvm::Function &func);
|
||||
|
||||
static const char *getName() { return NAME; }
|
||||
|
||||
private:
|
||||
friend class llvm::InstVisitor<NeverReturningFuncs>;
|
||||
|
||||
bool run(llvm::Function &func);
|
||||
void visitCallInst(llvm::CallInst &callInst);
|
||||
|
||||
void initBeforeRun();
|
||||
void initFuncNeverReturnsMap();
|
||||
void deinitFuncNeverReturnsMap();
|
||||
void addInstsThatWillBeRemoved(llvm::Instruction &inst);
|
||||
void replaceTerminatorInstsWithUnreachableInst(
|
||||
const TerminatorInstSet &toReplace);
|
||||
bool neverReturns(const llvm::Function *func);
|
||||
|
||||
private:
|
||||
/// Name of the optimization.
|
||||
static const char *NAME;
|
||||
|
||||
/// Mapping of functions that never return.
|
||||
static StringVecFuncMap funcNeverReturnsMap;
|
||||
|
||||
/// Optimized module.
|
||||
llvm::Module *module;
|
||||
|
||||
/// Set of instruction to remove.
|
||||
InstSet instsToRemove;
|
||||
|
||||
/// Set of terminator instructions to replace.
|
||||
TerminatorInstSet instsToReplace;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
173
include/bin2llvmir/optimizations/param_return/param_return.h
Normal file
173
include/bin2llvmir/optimizations/param_return/param_return.h
Normal file
@ -0,0 +1,173 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/param_return/param_return.h
|
||||
* @brief Detect functions' parameters and returns.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_PARAM_RETURN_PARAM_RETURN_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_PARAM_RETURN_PARAM_RETURN_H
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/Instructions.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/analyses/reaching_definitions.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
#include "bin2llvmir/providers/debugformat.h"
|
||||
#include "bin2llvmir/providers/fileimage.h"
|
||||
#include "bin2llvmir/providers/lti.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class CallEntry
|
||||
{
|
||||
public:
|
||||
CallEntry(llvm::CallInst* c);
|
||||
|
||||
public:
|
||||
void filterRegisters(Config* _config);
|
||||
void filterSort(Config* _config);
|
||||
void filterLeaveOnlyContinuousStackOffsets(Config* _config);
|
||||
void filterLeaveOnlyNeededStackOffsets(Config* _config);
|
||||
|
||||
void extractFormatString(ReachingDefinitionsAnalysis& _RDA);
|
||||
|
||||
public:
|
||||
llvm::CallInst* call = nullptr;
|
||||
std::vector<llvm::StoreInst*> possibleArgStores;
|
||||
std::vector<llvm::LoadInst*> possibleRetLoads;
|
||||
std::string formatStr;
|
||||
};
|
||||
|
||||
class ReturnEntry
|
||||
{
|
||||
public:
|
||||
ReturnEntry(llvm::ReturnInst* r);
|
||||
|
||||
public:
|
||||
llvm::ReturnInst* ret = nullptr;
|
||||
std::vector<llvm::StoreInst*> possibleRetStores;
|
||||
};
|
||||
|
||||
class DataFlowEntry
|
||||
{
|
||||
public:
|
||||
DataFlowEntry(
|
||||
llvm::Module* m,
|
||||
ReachingDefinitionsAnalysis& rda,
|
||||
Config* c,
|
||||
FileImage* img,
|
||||
DebugFormat* dbg,
|
||||
Lti* lti,
|
||||
llvm::Value* v);
|
||||
|
||||
bool isFunctionEntry() const;
|
||||
bool isValueEntry() const;
|
||||
llvm::Value* getValue() const;
|
||||
llvm::Function* getFunction() const;
|
||||
void dump() const;
|
||||
|
||||
void addCall(llvm::CallInst* call);
|
||||
|
||||
void filter();
|
||||
|
||||
void applyToIr();
|
||||
void applyToIrOrdinary();
|
||||
void applyToIrVariadic();
|
||||
void connectWrappers();
|
||||
|
||||
private:
|
||||
void addArgLoads();
|
||||
void addRetStores();
|
||||
void addCallArgs(llvm::CallInst* call, CallEntry& ce);
|
||||
void addCallReturns(llvm::CallInst* call, CallEntry& ce);
|
||||
|
||||
void callsFilterCommonRegisters();
|
||||
void callsFilterSameNumberOfStacks();
|
||||
|
||||
void setTypeFromExtraInfo();
|
||||
void setTypeFromUseContext();
|
||||
void setReturnType();
|
||||
void setArgumentTypes();
|
||||
|
||||
//
|
||||
void filterRegistersArgLoads();
|
||||
void filterSortArgLoads();
|
||||
|
||||
llvm::CallInst* isSimpleWrapper(llvm::Function* fnc);
|
||||
|
||||
public:
|
||||
llvm::Module* _module = nullptr;
|
||||
ReachingDefinitionsAnalysis& _RDA;
|
||||
Config* _config = nullptr;
|
||||
FileImage* _image = nullptr;
|
||||
Lti* _lti = nullptr;
|
||||
llvm::Value* called = nullptr;
|
||||
retdec_config::Function* configFnc = nullptr;
|
||||
retdec_config::Function* dbgFnc = nullptr;
|
||||
|
||||
// In caller.
|
||||
//
|
||||
std::vector<CallEntry> calls;
|
||||
|
||||
// In called function.
|
||||
//
|
||||
std::vector<llvm::LoadInst*> argLoads;
|
||||
std::vector<ReturnEntry> retStores;
|
||||
|
||||
// Result.
|
||||
//
|
||||
bool typeSet = false;
|
||||
llvm::Type* retType = nullptr;
|
||||
std::vector<llvm::Type*> argTypes;
|
||||
std::map<std::size_t, llvm::Value*> specialArgStorage;
|
||||
bool isVarArg = false;
|
||||
llvm::CallInst* wrappedCall = nullptr;
|
||||
std::vector<std::string> argNames;
|
||||
};
|
||||
|
||||
class ParamReturn : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
ParamReturn();
|
||||
bool runOnModuleCustom(
|
||||
llvm::Module& m,
|
||||
Config* c,
|
||||
FileImage* img = nullptr,
|
||||
DebugFormat* dbgf = nullptr,
|
||||
Lti* lti = nullptr);
|
||||
virtual bool runOnModule(llvm::Module& m) override;
|
||||
|
||||
private:
|
||||
bool run();
|
||||
void dumpInfo();
|
||||
|
||||
void collectAllCalls();
|
||||
std::string extractFormatString(llvm::CallInst* call);
|
||||
|
||||
void filterCalls();
|
||||
void filterSort(CallEntry& ce);
|
||||
void filterLeaveOnlyContinuousStackOffsets(CallEntry& ce);
|
||||
void filterLeaveOnlyNeededStackOffsets(CallEntry& ce);
|
||||
|
||||
void applyToIr();
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
Config* _config = nullptr;
|
||||
FileImage* _image = nullptr;
|
||||
DebugFormat* _dbgf = nullptr;
|
||||
Lti* _lti = nullptr;
|
||||
|
||||
std::map<llvm::Value*, DataFlowEntry> _fnc2calls;
|
||||
ReachingDefinitionsAnalysis _RDA;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
152
include/bin2llvmir/optimizations/phi2seq/phi2seq.h
Normal file
152
include/bin2llvmir/optimizations/phi2seq/phi2seq.h
Normal file
@ -0,0 +1,152 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/phi2seq/phi2seq.h
|
||||
* @brief Solves parallel processing of PHI nodes.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_PHI2SEQ_PHI2SEQ_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_PHI2SEQ_PHI2SEQ_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <llvm/IR/IRBuilder.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/analyses/var_depend_analysis.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Optimization that solves the problem of parallel processing of PHI
|
||||
* nodes.
|
||||
*
|
||||
* @pre The <em>Assign names to anonymous instructions</em>
|
||||
* (<tt>-instnamer</tt>) pass has to be run before this optimization. This
|
||||
* optimization automatically runs it.
|
||||
*
|
||||
* This optimization is needed because PHI nodes in a single block in LLVM IR
|
||||
* are executed in parallel basic block but our output languages like C or
|
||||
* Python are sequential. Hence, we need to transform the parallel processing to
|
||||
* equivalent sequential processing.
|
||||
*
|
||||
* We can divide this problem into two sub-problems. The first sub-problem
|
||||
* represents the dependency of variables like this:
|
||||
* @code
|
||||
* .bb
|
||||
* %A = phi i32 [ %D, %bb1 ], [ 10, %0 ]
|
||||
* %B = phi i32 [ %A, %bb1 ], [ 66, %0 ]
|
||||
* @endcode
|
||||
* If we look at this example with sequential processing, to variable A was
|
||||
* assigned the value of variable D and this value was assigned in B which is
|
||||
* not equivalent with parallel processing. The solution is to order PHI nodes
|
||||
* to a correct sequential order which is equivalent with parallel processing
|
||||
* like this:
|
||||
* @code
|
||||
* %B = phi i32 [ %A, %bb1 ], [ 66, %0 ]
|
||||
* %A = phi i32 [ %D, %bb1 ], [ 10, %0 ]
|
||||
* @endcode
|
||||
*
|
||||
* The second sub-problem that we need to solve is when we have a cycle
|
||||
* dependency of variables in PHI nodes. For example:
|
||||
* @code
|
||||
* .bb:
|
||||
* %A = phi i32 [ %B, %bb1 ], [ 1, %0 ]
|
||||
* %B = phi i32 [ %C, %bb1 ], [ 2, %0 ]
|
||||
* %C = phi i32 [ %A, %bb1 ], [ 3, %0 ]
|
||||
* @endcode
|
||||
* All PHI nodes are performed in parallel but we need for the back-end an
|
||||
* equivalent sequential processing. A solution is to create new PHI nodes in a
|
||||
* new basic block an update the dependencies. Something like this:
|
||||
* @code
|
||||
* .bb.phi2seq.pre:
|
||||
* %C.phi2seq.tmp = [ %C, %bb1 ]
|
||||
* br label %.bb
|
||||
*
|
||||
* .bb:
|
||||
* %A = phi i32 [ %B, %.bb.phi2seq.pre ], [ 1, %0 ]
|
||||
* %B = phi i32 [ %C.phi2seq.tmp, %.bb.phi2seq.pre ], [ 2, %0 ]
|
||||
* %C = phi i32 [ %A, %.bb.phi2seq.pre ], [ 3, %0 ]
|
||||
* @endcode
|
||||
*
|
||||
* This optimization solves both of these sub-problems, thus making sequential
|
||||
* processing of PHI nodes possible.
|
||||
*/
|
||||
class PHI2Seq: public llvm::FunctionPass {
|
||||
public:
|
||||
PHI2Seq();
|
||||
virtual ~PHI2Seq() override;
|
||||
|
||||
static const char *getName() { return NAME; }
|
||||
|
||||
virtual bool runOnFunction(llvm::Function &func) override;
|
||||
virtual void getAnalysisUsage(llvm::AnalysisUsage &au) const override;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
private:
|
||||
/// Name of the optimization.
|
||||
static const char *NAME;
|
||||
|
||||
/**
|
||||
* @brief Structure for PHI node on which we substitute values.
|
||||
*/
|
||||
struct PHINodeToSubs {
|
||||
/**
|
||||
* @brief Constructs a new @c PHINodeToSubs.
|
||||
*
|
||||
* @param[in] phiNodeToSubs PHI node to substitute.
|
||||
* @param[in] oldValue Old value that will be substituted.
|
||||
* @param[in] newValue New value to substitute.
|
||||
*/
|
||||
PHINodeToSubs(llvm::PHINode *phiNodeToSubs, llvm::Value *oldValue,
|
||||
llvm::Value *newValue): phiNodeToSubs(phiNodeToSubs),
|
||||
oldValue(oldValue), newValue(newValue) {}
|
||||
|
||||
/// PHI node to substitute.
|
||||
llvm::PHINode *phiNodeToSubs;
|
||||
|
||||
/// Old value that will be substituted.
|
||||
llvm::Value *oldValue;
|
||||
|
||||
/// New value to to substitute.
|
||||
llvm::Value *newValue;
|
||||
};
|
||||
|
||||
/// Vector of @c PHINodeToSubs.
|
||||
using PHINodeToSubsVec = std::vector<PHINodeToSubs>;
|
||||
|
||||
private:
|
||||
llvm::BasicBlock &createPreBBAndSolveConnection(
|
||||
const VarDependAnalysis::BBVecOfPHINodes &bbWithPHINodesVec,
|
||||
llvm::BasicBlock &currBB);
|
||||
void createTmpPHINodes(llvm::IRBuilder<> &builder,
|
||||
const VarDependAnalysis::BBVecOfPHINodes &bbWithPHINodesVec);
|
||||
void initVarDependAnalysis(llvm::BasicBlock &bb);
|
||||
void iteratePHINodesAndInitVarDependAnalysis(llvm::BasicBlock &bb);
|
||||
void iterateIncValuesAndInitVarDependAnalysis(llvm::PHINode &phiNode);
|
||||
void orderDependentPHINodes(llvm::BasicBlock &bb,
|
||||
const VarDependAnalysis::PHINodeVec &nonCyclesDependResult);
|
||||
void orderDependentPHINodesAndSolveCycles(llvm::BasicBlock &bb);
|
||||
void replaceValueForPHINode(llvm::PHINode &phiNodeToUpdate, llvm::Value
|
||||
&oldValue, llvm::Value &newValue, llvm::BasicBlock &pred);
|
||||
void solveCycleVarDependency(llvm::BasicBlock &bb,
|
||||
const VarDependAnalysis::StringBBVecOfPHINodesMap &cyclesDetectResult);
|
||||
void updateBBTermInstr(llvm::BasicBlock &bbToUpdate, llvm::BasicBlock
|
||||
&oldSucc, llvm::BasicBlock &newSucc);
|
||||
void updateBBWithCycle(llvm::BasicBlock &bb, llvm::BasicBlock &oldBB, llvm::
|
||||
BasicBlock &newBB);
|
||||
void updatePredecessorsInPHINodes(llvm::BasicBlock &bb, llvm::BasicBlock
|
||||
&oldBB, llvm::BasicBlock &newBB);
|
||||
|
||||
private:
|
||||
/// PHI nodes dependency analysis.
|
||||
VarDependAnalysis varDependAnalysis;
|
||||
|
||||
/// Vector of PHI nodes on which are substitute values.
|
||||
PHINodeToSubsVec phiNodeToSubsVec;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,26 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/provider_init/provider_init.h
|
||||
* @brief One time providers initialization.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_PROVIDER_INIT_PROVIDER_INIT_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_PROVIDER_INIT_PROVIDER_INIT_H
|
||||
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class ProviderInitialization : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
ProviderInitialization();
|
||||
virtual bool runOnModule(llvm::Module& m) override;
|
||||
virtual bool doFinalization(llvm::Module& m) override;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
58
include/bin2llvmir/optimizations/register/register.h
Normal file
58
include/bin2llvmir/optimizations/register/register.h
Normal file
@ -0,0 +1,58 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/register/register.h
|
||||
* @brief Solve register pseudo functions.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_REGISTER_REGISTER_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_REGISTER_REGISTER_H
|
||||
|
||||
#include <map>
|
||||
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/analyses/symbolic_tree.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class RegisterAnalysis : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
RegisterAnalysis();
|
||||
virtual bool runOnModule(llvm::Module& m) override;
|
||||
bool runOnModuleCustom(
|
||||
llvm::Module& m,
|
||||
Config* c);
|
||||
|
||||
private:
|
||||
bool run();
|
||||
bool x86FpuAnalysis();
|
||||
bool x86FpuAnalysisBb(
|
||||
tl_cpputils::NonIterableSet<llvm::BasicBlock*>& seenBbs,
|
||||
llvm::BasicBlock* bb,
|
||||
int topVal);
|
||||
|
||||
bool isRegisterStoreFunction(llvm::Function* f);
|
||||
bool isRegisterLoadFunction(llvm::Function* f);
|
||||
llvm::CallInst* isRegisterStoreFunctionCall(llvm::Value* val);
|
||||
llvm::CallInst* isRegisterLoadFunctionCall(llvm::Value* val);
|
||||
std::string getRegisterClass(llvm::Function* f);
|
||||
llvm::GlobalVariable* getLlvmRegister(
|
||||
const std::string& regClass,
|
||||
unsigned regNum);
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
Config* _config = nullptr;
|
||||
llvm::GlobalVariable* top = nullptr;
|
||||
|
||||
const std::string _regStoreFncName = "__frontend_reg_store";
|
||||
const std::string _regLoadFncName = "__frontend_reg_load";
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,38 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/select_functions/select_functions.h
|
||||
* @brief If ranges or functions are selected in config, remove bodies of all
|
||||
* functions that are not selected.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_SELECT_FUNCTIONS_SELECT_FUNCTIONS_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_SELECT_FUNCTIONS_SELECT_FUNCTIONS_H
|
||||
|
||||
#include <set>
|
||||
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class SelectFunctions : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
SelectFunctions();
|
||||
virtual bool runOnModule(llvm::Module& M) override;
|
||||
bool runOnModuleCustom(llvm::Module& M, Config* c);
|
||||
|
||||
private:
|
||||
bool findNotReturningFunctions(llvm::Module& M);
|
||||
bool run(llvm::Module& M);
|
||||
|
||||
private:
|
||||
Config* _config = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
234
include/bin2llvmir/optimizations/simple_types/simple_types.h
Normal file
234
include/bin2llvmir/optimizations/simple_types/simple_types.h
Normal file
@ -0,0 +1,234 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/simple_types/simple_types.h
|
||||
* @brief Simple type reconstruction analysis.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_SIMPLE_TYPES_SIMPLE_TYPES_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_SIMPLE_TYPES_SIMPLE_TYPES_H
|
||||
|
||||
#include <functional>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/providers/fileimage.h"
|
||||
#include "bin2llvmir/utils/defs.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class ValueEntry;
|
||||
class TypeEntry;
|
||||
class EquationEntry;
|
||||
class EqSet;
|
||||
class EqSetContainer;
|
||||
|
||||
/**
|
||||
* Priority of data type sources.
|
||||
* Higher values have higher priority.
|
||||
*/
|
||||
enum class eSourcePriority
|
||||
{
|
||||
PRIORITY_NONE = 0,
|
||||
PRIORITY_LTI,
|
||||
PRIORITY_DEBUG
|
||||
};
|
||||
|
||||
/**
|
||||
* Entry representing one value in @c EqSet.
|
||||
*/
|
||||
class ValueEntry
|
||||
{
|
||||
public:
|
||||
ValueEntry(llvm::Value* v = nullptr, eSourcePriority p = eSourcePriority::PRIORITY_NONE);
|
||||
llvm::Type* getTypeForPropagation() const;
|
||||
bool operator==(const ValueEntry& o) const;
|
||||
bool operator<(const ValueEntry& o) const;
|
||||
std::size_t hash() const;
|
||||
friend std::ostream& operator<<(std::ostream& out, const ValueEntry& ve);
|
||||
|
||||
public:
|
||||
llvm::Value* value = nullptr;
|
||||
eSourcePriority priority = eSourcePriority::PRIORITY_NONE;
|
||||
};
|
||||
struct ValueEntryHash
|
||||
{
|
||||
std::size_t operator() (const ValueEntry& v) const { return v.hash(); }
|
||||
};
|
||||
|
||||
/**
|
||||
* Entry representing one data type in @c EqSet.
|
||||
*/
|
||||
class TypeEntry
|
||||
{
|
||||
public:
|
||||
TypeEntry(llvm::Type* t = nullptr, eSourcePriority p = eSourcePriority::PRIORITY_NONE);
|
||||
bool operator==(const TypeEntry& o) const;
|
||||
bool operator<(const TypeEntry& o) const;
|
||||
std::size_t hash() const;
|
||||
friend std::ostream& operator<<(std::ostream& out, const TypeEntry& te);
|
||||
|
||||
public:
|
||||
llvm::Type* type = nullptr;
|
||||
eSourcePriority priority = eSourcePriority::PRIORITY_NONE;
|
||||
};
|
||||
struct TypeEntryHash
|
||||
{
|
||||
std::size_t operator() (const TypeEntry& t) const { return t.hash(); }
|
||||
};
|
||||
|
||||
/**
|
||||
* Entry representing equation (relation) between two equivalence sets.
|
||||
*/
|
||||
class EquationEntry
|
||||
{
|
||||
public:
|
||||
static EquationEntry otherIsPtrToThis(EqSet* o);
|
||||
static EquationEntry thisIsPtrToOther(EqSet* o);
|
||||
|
||||
bool operator==(const EquationEntry& o) const;
|
||||
bool operator<(const EquationEntry& o) const;
|
||||
std::size_t hash() const;
|
||||
friend std::ostream& operator<<(std::ostream& out, const EquationEntry& ee);
|
||||
|
||||
bool isOtherIsPtrToThis();
|
||||
bool isThisIsPtrToOther();
|
||||
|
||||
public:
|
||||
EqSet* other;
|
||||
|
||||
private:
|
||||
enum class eqType
|
||||
{
|
||||
otherIsPtrToThis,
|
||||
thisIsPtrToOther
|
||||
};
|
||||
|
||||
private:
|
||||
EquationEntry(EqSet* o, eqType t);
|
||||
|
||||
private:
|
||||
eqType type;
|
||||
};
|
||||
struct EquationEntryHash
|
||||
{
|
||||
std::size_t operator() (const EquationEntry& e) const { return e.hash(); }
|
||||
};
|
||||
|
||||
using ValueEntrySet = std::unordered_set<ValueEntry, ValueEntryHash>;
|
||||
using TypeEntrySet = std::unordered_set<TypeEntry, TypeEntryHash>;
|
||||
using EquationEntrySet = std::unordered_set<EquationEntry, EquationEntryHash>;
|
||||
|
||||
/**
|
||||
* Equivalence set -- object in set have to same type.
|
||||
*/
|
||||
class EqSet
|
||||
{
|
||||
public:
|
||||
EqSet();
|
||||
void insert(Config* config, llvm::Value* v, eSourcePriority p = eSourcePriority::PRIORITY_NONE);
|
||||
void insert(llvm::Type* t, eSourcePriority p = eSourcePriority::PRIORITY_NONE);
|
||||
void propagate(llvm::Module* module);
|
||||
void apply(
|
||||
llvm::Module* module,
|
||||
Config* config,
|
||||
FileImage* objf,
|
||||
UnorderedInstSet& instToErase);
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& out, const EqSet& eq);
|
||||
|
||||
private:
|
||||
llvm::Type* getHigherPriorityType(
|
||||
llvm::Module* module,
|
||||
llvm::Type* t1,
|
||||
llvm::Type* t2);
|
||||
llvm::Type* getHigherPriorityTypePrivate(
|
||||
llvm::Module* module,
|
||||
llvm::Type* t1,
|
||||
llvm::Type* t2,
|
||||
UnorderedTypeSet& seen);
|
||||
|
||||
public:
|
||||
/// Each instance gets its own unique ID for debug print purposes.
|
||||
static unsigned newUID;
|
||||
const unsigned id;
|
||||
|
||||
/// Type of an entire equivalence set.
|
||||
TypeEntry masterType;
|
||||
/// Values in the set.
|
||||
ValueEntrySet valSet;
|
||||
/// This allows to add certain types to set without having a value for them.
|
||||
TypeEntrySet typeSet;
|
||||
/// This allows to propagate type to another equivalence set, which may not
|
||||
/// have the same type as this set. E.g. this=pointer(other).
|
||||
EquationEntrySet equationSet;
|
||||
};
|
||||
|
||||
/**
|
||||
* Equivalence sets container.
|
||||
*/
|
||||
class EqSetContainer
|
||||
{
|
||||
public:
|
||||
EqSet& createEmptySet();
|
||||
void propagate(llvm::Module* module);
|
||||
void apply(
|
||||
llvm::Module* module,
|
||||
Config* config,
|
||||
FileImage* objf,
|
||||
UnorderedInstSet& valsToErase);
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& out, const EqSetContainer& eqs);
|
||||
|
||||
public:
|
||||
std::list<EqSet> eqSets;
|
||||
};
|
||||
|
||||
using ValueMap = std::unordered_map<llvm::Value*, EqSet*>;
|
||||
using ValuePair = std::pair<llvm::Value*, llvm::Value*>;
|
||||
using ValuePairList = std::list<ValuePair>;
|
||||
|
||||
/**
|
||||
* Simple data type analysis.
|
||||
*/
|
||||
class SimpleTypesAnalysis : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
SimpleTypesAnalysis();
|
||||
|
||||
virtual bool runOnModule(llvm::Module& m) override;
|
||||
virtual void getAnalysisUsage(llvm::AnalysisUsage& AU) const override;
|
||||
|
||||
private:
|
||||
void buildEqSets(llvm::Module& M);
|
||||
void buildEquations();
|
||||
void processRoot(llvm::Value* root);
|
||||
void processValue(std::queue<llvm::Value*>& toProcess, EqSet& eqSet);
|
||||
void processUse(llvm::Value* c, llvm::Value* x, std::queue<llvm::Value*>& toProcess, EqSet& eqSet);
|
||||
void eraseObsoleteInstructions();
|
||||
void setGlobalConstants();
|
||||
|
||||
private:
|
||||
ValueMap processedObjs;
|
||||
EqSetContainer eqSets;
|
||||
ValuePairList val2PtrVal;
|
||||
|
||||
ReachingDefinitionsAnalysis RDA;
|
||||
llvm::Module* module = nullptr;
|
||||
const llvm::GlobalVariable* _specialGlobal = nullptr;
|
||||
Config* config = nullptr;
|
||||
FileImage* objf = nullptr;
|
||||
|
||||
UnorderedInstSet instToErase;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
64
include/bin2llvmir/optimizations/stack/stack.h
Normal file
64
include/bin2llvmir/optimizations/stack/stack.h
Normal file
@ -0,0 +1,64 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/stack/stack.h
|
||||
* @brief Reconstruct stack.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_STACK_STACK_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_STACK_STACK_H
|
||||
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/analyses/symbolic_tree.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
#include "bin2llvmir/providers/debugformat.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* TODO:
|
||||
* At the moment, this is very similar to ConstantsAnalysis -> merge together.
|
||||
*/
|
||||
class StackAnalysis : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
StackAnalysis();
|
||||
virtual bool runOnModule(llvm::Module& m) override;
|
||||
bool runOnModuleCustom(
|
||||
llvm::Module& m,
|
||||
Config* c,
|
||||
DebugFormat* dbgf = nullptr);
|
||||
|
||||
private:
|
||||
struct ReplaceItem
|
||||
{
|
||||
llvm::Instruction* inst;
|
||||
llvm::Value* from;
|
||||
llvm::AllocaInst* to;
|
||||
};
|
||||
|
||||
private:
|
||||
bool run();
|
||||
bool runOnFunction(ReachingDefinitionsAnalysis& RDA, llvm::Function* f);
|
||||
bool handleInstruction(
|
||||
ReachingDefinitionsAnalysis& RDA,
|
||||
llvm::Instruction* inst,
|
||||
llvm::Value* val,
|
||||
llvm::Type* type,
|
||||
std::list<ReplaceItem>& _replaceItems,
|
||||
std::map<llvm::Value*, llvm::Value*>& val2val);
|
||||
retdec_config::Object* getDebugStackVariable(
|
||||
llvm::Function* fnc,
|
||||
SymbolicTree& root);
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
Config* _config = nullptr;
|
||||
DebugFormat* _dbgf = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,38 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/stack_pointer_ops/stack_pointer_ops.h
|
||||
* @brief Remove the remaining stack pointer operations.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_STACK_POINTER_OPS_STACK_POINTER_OPS_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_STACK_POINTER_OPS_STACK_POINTER_OPS_H
|
||||
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class StackPointerOpsRemove : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
StackPointerOpsRemove();
|
||||
virtual bool runOnModule(llvm::Module& M) override;
|
||||
bool runOnModuleCustom(llvm::Module& M, Config* c);
|
||||
|
||||
private:
|
||||
bool run();
|
||||
bool removeStackPointerStores();
|
||||
bool removePreservationStores();
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
Config* _config = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,41 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/stack_protect/stack_protect.h
|
||||
* @brief Protect stack variables from LLVM optimization passes.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_STACK_PROTECT_STACK_PROTECT_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_STACK_PROTECT_STACK_PROTECT_H
|
||||
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class StackProtect : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
StackProtect();
|
||||
virtual bool runOnModule(llvm::Module& M) override;
|
||||
bool runOnModuleCustom(llvm::Module& M, Config* c);
|
||||
|
||||
private:
|
||||
bool run();
|
||||
bool protectStack();
|
||||
bool unprotectStack(llvm::Function* fnc);
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
Config* _config = nullptr;
|
||||
|
||||
std::string _fncName = "__decompiler_undefined_function_";
|
||||
static std::map<llvm::Type*, llvm::Function*> _type2fnc;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
49
include/bin2llvmir/optimizations/syscalls/syscalls.h
Normal file
49
include/bin2llvmir/optimizations/syscalls/syscalls.h
Normal file
@ -0,0 +1,49 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/syscalls/syscalls.h
|
||||
* @brief Implement syscall identification and fixing pass @c SyscallFixer.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_SYSCALLS_SYSCALLS_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_SYSCALLS_SYSCALLS_H
|
||||
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
#include "bin2llvmir/providers/fileimage.h"
|
||||
#include "bin2llvmir/providers/lti.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class AsmInstruction;
|
||||
|
||||
class SyscallFixer : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
SyscallFixer();
|
||||
virtual bool runOnModule(llvm::Module& M) override;
|
||||
bool runOnModuleCustom(
|
||||
llvm::Module& M,
|
||||
Config* c,
|
||||
FileImage* img,
|
||||
Lti* lti);
|
||||
|
||||
private:
|
||||
bool run();
|
||||
bool runMips();
|
||||
bool runArm();
|
||||
bool runX86();
|
||||
bool x86TransformToDummySyscall(AsmInstruction& ai);
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
Config* _config = nullptr;
|
||||
FileImage* _image = nullptr;
|
||||
Lti* _lti = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,39 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/type_conversions/type_conversions.h
|
||||
* @brief Removes unnecessary data type conversions.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_TYPE_CONVERSIONS_TYPE_CONVERSIONS_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_TYPE_CONVERSIONS_TYPE_CONVERSIONS_H
|
||||
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class TypeConversions : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
TypeConversions();
|
||||
virtual bool doInitialization(llvm::Module& M) override;
|
||||
virtual bool runOnModule(llvm::Module& M) override;
|
||||
bool runOnFunction(llvm::Function& F);
|
||||
|
||||
private:
|
||||
bool runInInstruction(llvm::Instruction* instr);
|
||||
bool replaceByShortcut(
|
||||
llvm::Instruction* start,
|
||||
llvm::Instruction* lastGood,
|
||||
unsigned cntr);
|
||||
bool removePtrToIntToPtr(llvm::Instruction* instr);
|
||||
|
||||
private:
|
||||
llvm::Module* _module;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
@ -0,0 +1,66 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/unreachable_funcs/unreachable_funcs.h
|
||||
* @brief Removes unreachable functions from main.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_UNREACHABLE_FUNCS_UNREACHABLE_FUNCS_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_UNREACHABLE_FUNCS_UNREACHABLE_FUNCS_H
|
||||
|
||||
#include <llvm/IR/InstVisitor.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
#include "bin2llvmir/utils/defs.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* @brief Removes unreachable functions from main.
|
||||
*
|
||||
* @code
|
||||
* void func1() { ... } <- Not calls func2.
|
||||
* void func2() { ... } <- Unreachable function. Can be optimized.
|
||||
* int main() {
|
||||
* func1();
|
||||
* }
|
||||
* @endcode
|
||||
*/
|
||||
class UnreachableFuncs: public llvm::ModulePass {
|
||||
public:
|
||||
static char ID;
|
||||
UnreachableFuncs();
|
||||
|
||||
virtual bool runOnModule(llvm::Module &module) override;
|
||||
virtual void getAnalysisUsage(llvm::AnalysisUsage &au) const override;
|
||||
|
||||
static const char *getName() { return NAME; }
|
||||
|
||||
private:
|
||||
void initializeMainFunc(llvm::Module &module);
|
||||
bool optimizationCanRun() const;
|
||||
FuncSet getReachableFuncs(llvm::Function &startFunc,
|
||||
llvm::Module &module) const;
|
||||
void removeFuncsThatCanBeOptimized(
|
||||
const FuncSet &funcsThatCannotBeOptimized,
|
||||
llvm::Module &module) const;
|
||||
FuncSet getFuncsThatCannotBeOptimized(
|
||||
const FuncSet &reachableFuncs, llvm::Module &module) const;
|
||||
FuncSet getFuncsThatCanBeOptimized(
|
||||
const FuncSet funcsThatCannotBeOptimized,
|
||||
llvm::Module &module) const;
|
||||
void removeFuncsFromModule(const FuncSet &funcsToRemove) const;
|
||||
|
||||
private:
|
||||
/// Name of the optimization.
|
||||
static const char *NAME;
|
||||
|
||||
/// The main function.
|
||||
llvm::Function *mainFunc;
|
||||
|
||||
Config* config = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
56
include/bin2llvmir/optimizations/volatilize/volatilize.h
Normal file
56
include/bin2llvmir/optimizations/volatilize/volatilize.h
Normal file
@ -0,0 +1,56 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/volatilize/volatilize.h
|
||||
* @brief Make all loads and stores volatile to protected them.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_VOLATILIZE_VOLATILIZE_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_VOLATILIZE_VOLATILIZE_H
|
||||
|
||||
#include <set>
|
||||
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "bin2llvmir/utils/defs.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* This pass may either volatilize (state 1) or unvolatilize (state 2)
|
||||
* all loads and stores, depending on the state it is in.
|
||||
*
|
||||
* State 1 (default state) (volatilize):
|
||||
* All loads and stores are volatilized to protect them from bin2llvmirl
|
||||
* optimizations. Operations which already have been volatile before
|
||||
* this process are noted. State is changed to 2.
|
||||
*
|
||||
* State 2 (unvolatilize):
|
||||
* All loads and stores which are not noted (were not volatile before
|
||||
* state 1) are unvolatilized. State is changed to 1.
|
||||
*
|
||||
* Typical usage in pass chain:
|
||||
* (state 1) -volatilize (state 2 = operations protected)
|
||||
* BIN2LLVMIRL_PASSES (simplify LLVM IR but do not remove memory accesses)
|
||||
* DECOMPILER_PASSES
|
||||
* (state 2) -volatilize (state 1 = operations unprotected)
|
||||
*/
|
||||
class Volatilize : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
Volatilize();
|
||||
virtual bool runOnModule(llvm::Module& M) override;
|
||||
|
||||
private:
|
||||
bool volatilize(llvm::Module& M);
|
||||
bool unvolatilize(llvm::Module& M);
|
||||
|
||||
private:
|
||||
static bool _doVolatilization;
|
||||
static UnorderedValSet _alreadyVolatile;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
57
include/bin2llvmir/optimizations/vtable/rtti_analysis.h
Normal file
57
include/bin2llvmir/optimizations/vtable/rtti_analysis.h
Normal file
@ -0,0 +1,57 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/vtable/rtti_analysis.h
|
||||
* @brief Search for RTTI in input file.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_VTABLE_RTTI_ANALYSIS_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_VTABLE_RTTI_ANALYSIS_H
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
#include "tl-cpputils/address.h"
|
||||
#include "bin2llvmir/optimizations/data_references/data_references.h"
|
||||
#include "bin2llvmir/optimizations/vtable/rtti_gcc.h"
|
||||
#include "bin2llvmir/optimizations/vtable/rtti_msvc.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class RttiAnalysis
|
||||
{
|
||||
public:
|
||||
~RttiAnalysis();
|
||||
|
||||
ClassTypeInfo* parseGccRtti(
|
||||
loader::Image* objfile,
|
||||
DataReferences* RA,
|
||||
tl_cpputils::Address rttiAddr);
|
||||
void processGccRttis();
|
||||
|
||||
RTTICompleteObjectLocator* parseMsvcObjectLocator(
|
||||
loader::Image* objfile,
|
||||
tl_cpputils::Address rttiAddr);
|
||||
void processMsvcRttis();
|
||||
|
||||
private:
|
||||
RTTITypeDescriptor* parseMsvcTypeDescriptor(
|
||||
tl_cpputils::Address typeDescriptorAddr);
|
||||
RTTIClassHierarchyDescriptor* parseMsvcClassDescriptor(
|
||||
tl_cpputils::Address classDescriptorAddr);
|
||||
RTTIBaseClassDescriptor* parseMsvcBaseClassDescriptor(
|
||||
tl_cpputils::Address baseDescriptorAddr);
|
||||
|
||||
public:
|
||||
std::map<tl_cpputils::Address, ClassTypeInfo*> gccRttis;
|
||||
std::map<tl_cpputils::Address, RTTICompleteObjectLocator> msvcObjLocators;
|
||||
std::map<tl_cpputils::Address, RTTITypeDescriptor> msvcTypeDescriptors;
|
||||
std::map<tl_cpputils::Address, RTTIBaseClassDescriptor> msvcBaseClassDescriptors;
|
||||
std::map<tl_cpputils::Address, RTTIClassHierarchyDescriptor> msvcClassDescriptors;
|
||||
|
||||
private:
|
||||
loader::Image *objf = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
172
include/bin2llvmir/optimizations/vtable/rtti_gcc.h
Normal file
172
include/bin2llvmir/optimizations/vtable/rtti_gcc.h
Normal file
@ -0,0 +1,172 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/vtable/rtti_gcc.h
|
||||
* @brief Search for gcc&clang RTTI in input file.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*
|
||||
* @note See ABI: http://mentorembedded.github.io/cxx-abi/abi.html#rtti
|
||||
* UML visualization is in decompiler/doc/references/rtti_itanium
|
||||
*
|
||||
* TODO:
|
||||
* In ABI, there are more classes derived from type_info like
|
||||
* __fundamental_type_info or __array_type_info.
|
||||
* These are not for user-defined virtual classes, but for other
|
||||
* (simpler) types.
|
||||
* Maybe it would be possible to parse them and use them somehow,
|
||||
* but I do not know how.
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_VTABLE_RTTI_GCC_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_VTABLE_RTTI_GCC_H
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "tl-cpputils/address.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class TypeInfo;
|
||||
class ClassTypeInfo;
|
||||
class SiClassTypeInfo;
|
||||
class VmiClassTypeInfo;
|
||||
class BaseClassTypeInfo;
|
||||
|
||||
/**
|
||||
* ABI: @c type_info
|
||||
*/
|
||||
class TypeInfo
|
||||
{
|
||||
// ABI specification.
|
||||
//
|
||||
public:
|
||||
virtual ~TypeInfo();
|
||||
bool operator==(const TypeInfo& o) const;
|
||||
bool operator!=(const TypeInfo& o) const;
|
||||
|
||||
public:
|
||||
/// Pointer (address) of virtual table for this @c TypeInfo instance.
|
||||
tl_cpputils::Address vtableAddr;
|
||||
/// NTBS (null-terminated byte string) address.
|
||||
tl_cpputils::Address nameAddr;
|
||||
|
||||
// Our methods and data.
|
||||
//
|
||||
public:
|
||||
virtual std::string dump() const;
|
||||
|
||||
public:
|
||||
/// Position of this @c TypeInfo entry.
|
||||
tl_cpputils::Address address;
|
||||
/// String from @c nameAddr position.
|
||||
std::string name;
|
||||
};
|
||||
|
||||
/**
|
||||
* ABI: @c __class_type_info
|
||||
*
|
||||
* Used for class types having no bases, and is also a base type for
|
||||
* the other two class type representations.
|
||||
*/
|
||||
class ClassTypeInfo : public TypeInfo
|
||||
{
|
||||
// ABI specification.
|
||||
//
|
||||
// empty
|
||||
|
||||
// Our methods and data.
|
||||
//
|
||||
public:
|
||||
virtual std::string dump() const override;
|
||||
};
|
||||
|
||||
/**
|
||||
* ABI: @c __si_class_type_info
|
||||
*
|
||||
* For classes containing only a single, public, non-virtual base
|
||||
* at offset zero.
|
||||
*/
|
||||
class SiClassTypeInfo : public ClassTypeInfo
|
||||
{
|
||||
// ABI specification.
|
||||
//
|
||||
public:
|
||||
/// Address of the base class @c TypeInfo structure.
|
||||
tl_cpputils::Address baseClassAddr;
|
||||
|
||||
// Our methods and data.
|
||||
//
|
||||
public:
|
||||
virtual std::string dump() const override;
|
||||
|
||||
public:
|
||||
/// Object created for base on address @c baseClassAddr.
|
||||
ClassTypeInfo* baseClass = nullptr;
|
||||
};
|
||||
|
||||
/**
|
||||
* ABI: @c __vmi_class_type_info
|
||||
*
|
||||
* For classes with bases that don't satisfy the @c SiClassTypeInfo constraints.
|
||||
*/
|
||||
class VmiClassTypeInfo : public ClassTypeInfo
|
||||
{
|
||||
// ABI specification.
|
||||
//
|
||||
public:
|
||||
enum eFlagMasks
|
||||
{
|
||||
NON_DIAMOND_REPEAT_MASK = 0x1,
|
||||
DIAMOND_SHAPED_MASK = 0x2
|
||||
};
|
||||
|
||||
public:
|
||||
/// Details about the class structure. Flags refer to both
|
||||
/// direct and indirect bases.
|
||||
uint32_t flags = 0;
|
||||
/// Number of direct proper base class descriptions that follow
|
||||
uint32_t baseCount = 0;
|
||||
std::vector<BaseClassTypeInfo> baseInfo;
|
||||
|
||||
// Our methods and data.
|
||||
//
|
||||
public:
|
||||
virtual std::string dump() const override;
|
||||
};
|
||||
|
||||
/**
|
||||
* ABI: @c __base_class_type_info
|
||||
*
|
||||
* Base class descriptions -- one for every direct proper base.
|
||||
*/
|
||||
class BaseClassTypeInfo
|
||||
{
|
||||
// ABI specification.
|
||||
//
|
||||
public:
|
||||
enum eOffsetFlagsMasks
|
||||
{
|
||||
BASE_IS_VIRTUAL = 0x1,
|
||||
BASE_IS_PUBLIC = 0x2
|
||||
};
|
||||
|
||||
public:
|
||||
/// Address of the base class @c TypeInfo structure.
|
||||
tl_cpputils::Address baseClassAddr;
|
||||
/// Low-order byte is @c eOffsetFlagsMasks flags.
|
||||
/// High 3 bytes are signed offset.
|
||||
uint32_t offsetFlags = 0;
|
||||
|
||||
// Our methods and data.
|
||||
//
|
||||
public:
|
||||
std::string dump() const;
|
||||
|
||||
public:
|
||||
/// Object created for base on address @c baseClassAddr.
|
||||
ClassTypeInfo* baseClass = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
167
include/bin2llvmir/optimizations/vtable/rtti_msvc.h
Normal file
167
include/bin2llvmir/optimizations/vtable/rtti_msvc.h
Normal file
@ -0,0 +1,167 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/vtable/rtti_msvc.h
|
||||
* @brief Search for msvc RTTI in input file.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*
|
||||
* http://www.openrce.org/articles/full_view/21
|
||||
* http://www.openrce.org/articles/full_view/23
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_VTABLE_RTTI_MSVC_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_VTABLE_RTTI_MSVC_H
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "tl-cpputils/address.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class RTTICompleteObjectLocator;
|
||||
class RTTITypeDescriptor;
|
||||
class RTTIClassHierarchyDescriptor;
|
||||
class RTTIBaseClassDescriptor;
|
||||
|
||||
/**
|
||||
* Describes a single C++ type
|
||||
*/
|
||||
class RTTITypeDescriptor
|
||||
{
|
||||
// ABI specification.
|
||||
//
|
||||
public:
|
||||
/// Virtual table of @c type_info class.
|
||||
tl_cpputils::Address vtableAddr;
|
||||
/// Used to keep the demangled name returned by type_info::name()
|
||||
tl_cpputils::Address spare;
|
||||
/// Mangled type name, e.g. ".H" = "int", ".?AVA@@" = "class A".
|
||||
std::string name;
|
||||
|
||||
// Our methods and data.
|
||||
//
|
||||
public:
|
||||
std::string dump() const;
|
||||
|
||||
public:
|
||||
/// Position of this @c RTTITypeDescriptor entry.
|
||||
tl_cpputils::Address address;
|
||||
/// Virtual table object on address @c vtableAddr.
|
||||
/// ...
|
||||
};
|
||||
|
||||
/**
|
||||
* Describes base class together with information which allows compiler
|
||||
* to cast the derived class to it during execution of the _dynamic_cast_.
|
||||
*/
|
||||
class RTTIBaseClassDescriptor
|
||||
{
|
||||
// ABI specification.
|
||||
//
|
||||
public:
|
||||
struct PMD
|
||||
{
|
||||
/// Member displacement.
|
||||
int32_t mdisp = 0;
|
||||
/// Vbtable displacement
|
||||
int32_t pdisp = 0;
|
||||
/// Displacement inside vbtable.
|
||||
int32_t vdisp = 0;
|
||||
};
|
||||
|
||||
public:
|
||||
/// Type descriptor of the class.
|
||||
tl_cpputils::Address typeDescriptorAddr;
|
||||
/// Number of nested classes following in the Base Class Array.
|
||||
uint32_t numContainedBases = 0;
|
||||
/// Pointer-to-member displacement info.
|
||||
PMD where;
|
||||
/// Flags, usually 0.
|
||||
uint32_t attributes = 0;
|
||||
|
||||
// Our methods and data.
|
||||
//
|
||||
public:
|
||||
std::string dump() const;
|
||||
|
||||
public:
|
||||
/// Position of this @c RTTIBaseClassDescriptor entry.
|
||||
tl_cpputils::Address address;
|
||||
/// RTTITypeDescriptor object on address @c typeDescriptorAddr.
|
||||
RTTITypeDescriptor* typeDescriptor = nullptr;
|
||||
};
|
||||
|
||||
/**
|
||||
* Descriptor describes the inheritance hierarchy of the class.
|
||||
* It is shared by all COLs for a class.
|
||||
*/
|
||||
class RTTIClassHierarchyDescriptor
|
||||
{
|
||||
// ABI specification.
|
||||
//
|
||||
public:
|
||||
/// Always zero?
|
||||
uint32_t signature = 0;
|
||||
/// Bit 0 set = multiple inheritance, bit 1 set = virtual inheritance.
|
||||
uint32_t attributes = 0;
|
||||
/// Number of classes in pBaseClassArray.
|
||||
uint32_t numBaseClasses = 0;
|
||||
/// Address of base class objects array.
|
||||
tl_cpputils::Address baseClassArrayAddr;
|
||||
|
||||
// Our methods and data.
|
||||
//
|
||||
public:
|
||||
std::string dump() const;
|
||||
|
||||
public:
|
||||
/// Position of this @c RTTIClassHierarchyDescriptor entry.
|
||||
tl_cpputils::Address address;
|
||||
/// Array of addresses of base class objects on address
|
||||
/// @c baseClassArrayAddr.
|
||||
std::vector<tl_cpputils::Address> baseClassArray;
|
||||
/// Base class objects - on addresses from @c baseClassArray.
|
||||
/// Together contain information which allows compiler to cast the
|
||||
/// derived class to any of them during execution of the _dynamic_cast_.
|
||||
std::vector<RTTIBaseClassDescriptor*> baseClasses;
|
||||
};
|
||||
|
||||
/**
|
||||
* Compiler puts pointer to this structure just before vftable.
|
||||
* This structure allows to find the locations of the complete
|
||||
* object from a specific vftable pointer.
|
||||
*/
|
||||
class RTTICompleteObjectLocator
|
||||
{
|
||||
// ABI specification.
|
||||
//
|
||||
public:
|
||||
/// Always zero?
|
||||
uint32_t signature = 0;
|
||||
/// Offset of this vtable in the complete class.
|
||||
uint32_t offset = 0;
|
||||
/// Constructor displacement offset.
|
||||
uint32_t cdOffset = 0;
|
||||
/// Pointer (address) of type descriptor for this object.
|
||||
tl_cpputils::Address typeDescriptorAddr;
|
||||
/// Pointer (address) of class descriptor for this object.
|
||||
tl_cpputils::Address classDescriptorAddr;
|
||||
|
||||
// Our methods and data.
|
||||
//
|
||||
public:
|
||||
std::string dump() const;
|
||||
|
||||
public:
|
||||
/// Position of this @c RTTICompleteObjectLocator entry.
|
||||
tl_cpputils::Address address;
|
||||
/// RTTITypeDescriptor object on address @c typeDescriptorAddr.
|
||||
RTTITypeDescriptor* typeDescriptor = nullptr;
|
||||
/// RTTIClassHierarchyDescriptor object on address
|
||||
/// @c classDescriptorAddr.
|
||||
RTTIClassHierarchyDescriptor* classDescriptor = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
170
include/bin2llvmir/optimizations/vtable/vtable.h
Normal file
170
include/bin2llvmir/optimizations/vtable/vtable.h
Normal file
@ -0,0 +1,170 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/optimizations/vtable/vtable.h
|
||||
* @brief Search for vtables in input file.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_OPTIMIZATIONS_VTABLE_VTABLE_H
|
||||
#define BIN2LLVMIR_OPTIMIZATIONS_VTABLE_VTABLE_H
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include <llvm/Pass.h>
|
||||
|
||||
#include "tl-cpputils/address.h"
|
||||
#include "bin2llvmir/optimizations/data_references/data_references.h"
|
||||
#include "bin2llvmir/optimizations/vtable/rtti_analysis.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* One item in virtual table.
|
||||
* Item must have at least address set.
|
||||
* If there is a function on this address, it can be also set.
|
||||
* However, it is possible that function on address was not yet detected.
|
||||
* In such a case, we can use this virtual table entry to detect function.
|
||||
*/
|
||||
class VtableItem
|
||||
{
|
||||
public:
|
||||
VtableItem(tl_cpputils::Address a, llvm::Function* f = nullptr);
|
||||
|
||||
public:
|
||||
tl_cpputils::Address address;
|
||||
llvm::Function* function = nullptr;
|
||||
};
|
||||
|
||||
/**
|
||||
* Virtual table comes in two flavors: 1) gcc&clang, 2) MSVC.
|
||||
* This is a base class for both of them.
|
||||
*/
|
||||
class Vtable
|
||||
{
|
||||
public:
|
||||
Vtable(tl_cpputils::Address a);
|
||||
virtual ~Vtable() {}
|
||||
|
||||
virtual std::string getName() const;
|
||||
friend std::ostream& operator<<(std::ostream &out, const Vtable &v);
|
||||
|
||||
public:
|
||||
tl_cpputils::Address vtableAddress;
|
||||
std::vector<VtableItem> virtualFncAddresses;
|
||||
llvm::GlobalVariable* global = nullptr;
|
||||
};
|
||||
|
||||
/**
|
||||
* gcc&clang virtual table sturcture ( [] means array of entries ):
|
||||
*
|
||||
* [virtual call (vcall) offsets]
|
||||
* [virtual base (vbase) offsets]
|
||||
* offset to top
|
||||
* typeinfo (RTTI) pointer
|
||||
* [virtual function pointers] <- vtable address in instances points here
|
||||
*
|
||||
*/
|
||||
class VtableGcc : public Vtable
|
||||
{
|
||||
public:
|
||||
VtableGcc(tl_cpputils::Address a);
|
||||
|
||||
friend std::ostream& operator<<(std::ostream &out, const VtableGcc &v);
|
||||
|
||||
public:
|
||||
std::vector<int> vcallOffsets; ///< TODO: not set/used right now
|
||||
std::vector<int> vbaseOffsets; ///< TODO: not set/used right now
|
||||
int topOffset = 0; ///< TODO: not set/used right now
|
||||
tl_cpputils::Address rttiAddress;
|
||||
// Vtable::virtualFncAddresses
|
||||
|
||||
ClassTypeInfo* rtti = nullptr;
|
||||
};
|
||||
|
||||
/**
|
||||
* MSVC virtual table sturcture ( [] means array of entries ):
|
||||
*
|
||||
* complete object locator address
|
||||
* [virtual function pointers] <- vtable address in instances points here
|
||||
*
|
||||
*/
|
||||
class VtableMsvc : public Vtable
|
||||
{
|
||||
public:
|
||||
VtableMsvc(tl_cpputils::Address a);
|
||||
|
||||
friend std::ostream& operator<<(std::ostream &out, const VtableMsvc &v);
|
||||
|
||||
public:
|
||||
tl_cpputils::Address objLocatorAddress;
|
||||
// Vtable::virtualFncAddresses
|
||||
|
||||
RTTICompleteObjectLocator* rtti = nullptr;
|
||||
};
|
||||
|
||||
/**
|
||||
* This pass finds vtables in the binary file.
|
||||
* Vtables may have slightly different structure (see class Vtable).
|
||||
*
|
||||
* To find both flavors with the single algorithm, we use these constraints:
|
||||
*
|
||||
* 1) We search for continuous sequence of references into code section/segment.
|
||||
* 2) At leas one reference must point to function. At the end, all references
|
||||
* must be functions, but it is possible that the decompiler did not detect
|
||||
* them so far (stripped inputs) and some references are just to instructions.
|
||||
* This way, we can use vtables to detect functions and them rebuild them to
|
||||
* fix references.
|
||||
* 3) We iterate through all instructions and find stores that work with vtable
|
||||
* items.
|
||||
*/
|
||||
class VtableAnalysis : public llvm::ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID;
|
||||
VtableAnalysis();
|
||||
~VtableAnalysis();
|
||||
virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override;
|
||||
virtual bool runOnModule(llvm::Module &) override;
|
||||
|
||||
public:
|
||||
using VtableMap = std::map<tl_cpputils::Address, Vtable*>;
|
||||
using InstrToReferenceMap = std::map<llvm::Instruction*, tl_cpputils::Address>;
|
||||
using AddressSet = std::set<tl_cpputils::Address>;
|
||||
|
||||
public:
|
||||
const VtableMap& getVtableMap() const;
|
||||
Vtable* getVtableOnAddress(tl_cpputils::Address a) const;
|
||||
|
||||
private:
|
||||
void detectVtablesInData();
|
||||
void parseVtables();
|
||||
VtableGcc *createVtableGcc(tl_cpputils::Address a);
|
||||
VtableMsvc *createVtableMsvc(tl_cpputils::Address a);
|
||||
bool fillVtable(tl_cpputils::Address a, Vtable &vt);
|
||||
|
||||
void createFunctions();
|
||||
void createVtableStructures();
|
||||
void setVtablesToConfig();
|
||||
|
||||
public:
|
||||
RttiAnalysis rttiAnalysis;
|
||||
|
||||
private:
|
||||
VtableMap vtableMap;
|
||||
InstrToReferenceMap instrToRef;
|
||||
AddressSet possibleVtableAddresses;
|
||||
AddressSet processedAddresses;
|
||||
|
||||
llvm::Module *module = nullptr;
|
||||
Config* config = nullptr;
|
||||
FileImage* objf = nullptr;
|
||||
DataReferences* RA = nullptr;
|
||||
|
||||
bool msvc = false;
|
||||
bool gcc = false;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
138
include/bin2llvmir/providers/abi.h
Normal file
138
include/bin2llvmir/providers/abi.h
Normal file
@ -0,0 +1,138 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/providers/abi.h
|
||||
* @brief Module provides ABI information.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_PROVIDERS_ABI_H
|
||||
#define BIN2LLVMIR_PROVIDERS_ABI_H
|
||||
|
||||
#include <llvm/IR/Module.h>
|
||||
|
||||
#include "retdec-config/architecture.h"
|
||||
#include "retdec-config/calling_convention.h"
|
||||
#include "retdec-config/tool_info.h"
|
||||
#include "tl-cpputils/value.h"
|
||||
#include "bin2llvmir/utils/defs.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class Abi
|
||||
{
|
||||
public:
|
||||
static Abi armCdecl(llvm::Module* m, retdec_config::Architecture& a);
|
||||
static Abi ppcCdecl(llvm::Module* m, retdec_config::Architecture& a);
|
||||
static Abi x86Cdecl(llvm::Module* m, retdec_config::Architecture& a);
|
||||
static Abi x86Fastcall(llvm::Module* m, retdec_config::Architecture& a);
|
||||
static Abi x86Stdcall(llvm::Module* m, retdec_config::Architecture& a);
|
||||
static Abi mipsCdecl(llvm::Module* m, retdec_config::Architecture& a);
|
||||
static Abi mipsLlvmCdecl(llvm::Module* m, retdec_config::Architecture& a);
|
||||
static Abi mipsPic32Cdecl(llvm::Module* m, retdec_config::Architecture& a);
|
||||
static Abi mipsPspCdecl(llvm::Module* m, retdec_config::Architecture& a);
|
||||
|
||||
public:
|
||||
const retdec_config::Architecture& getArchitecture() const;
|
||||
const retdec_config::CallingConvention& getCallingConvention() const;
|
||||
|
||||
tl_cpputils::Maybe<size_t> getAlignedBitSize(llvm::Type* type) const;
|
||||
llvm::Type* getAlignedType(llvm::Type* type) const;
|
||||
|
||||
bool isStackDirectionUnknown() const;
|
||||
bool isStackDirectionLeft2Right() const;
|
||||
bool isStackDirectionRight2Left() const;
|
||||
|
||||
llvm::GlobalVariable* getStackPointer() const;
|
||||
tl_cpputils::Maybe<int> getParameterStartStackOffset() const;
|
||||
tl_cpputils::Maybe<int> getParameterStackAlignment() const;
|
||||
|
||||
bool isReturnAddressInRegister() const;
|
||||
bool isReturnAddressOnStack() const;
|
||||
llvm::GlobalVariable* getReturnAddressRegister() const;
|
||||
tl_cpputils::Maybe<int> getReturnAddressStackOffset() const;
|
||||
|
||||
bool isReturnValueInRegisters(llvm::Type* type) const;
|
||||
bool isReturnValueOnStack(llvm::Type* type) const;
|
||||
const RegisterCouple* getReturnValueRegister(llvm::Type* type) const;
|
||||
const std::pair<int, unsigned>* getReturnValueOnStack(llvm::Type* type) const;
|
||||
|
||||
const std::map<llvm::Type*, std::vector<RegisterCouple>>&
|
||||
getTypeToArgumentRegs() const;
|
||||
const std::vector<RegisterCouple>* getArgumentRegs(llvm::Type* type) const;
|
||||
bool hasArgumentRegs(llvm::Type* type) const;
|
||||
bool hasArgumentRegs() const;
|
||||
int getArgumentStackOffset(llvm::Type* type) const;
|
||||
|
||||
private:
|
||||
enum class eStackDirection
|
||||
{
|
||||
UNKNOWN,
|
||||
LEFT_2_RIGHT,
|
||||
RIGHT_2_LEFT
|
||||
};
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
llvm::Type* _defaultType = nullptr;
|
||||
retdec_config::Architecture _arch;
|
||||
retdec_config::CallingConvention _cc;
|
||||
llvm::Type* _defaultAlignType = nullptr;
|
||||
eStackDirection _stackDirection = eStackDirection::UNKNOWN;
|
||||
llvm::GlobalVariable* _stackPointer = nullptr;
|
||||
tl_cpputils::Maybe<int> _parameterStartOffset;
|
||||
tl_cpputils::Maybe<int> _parameterStackAlignment;
|
||||
llvm::GlobalVariable* _returnAddressReg = nullptr;
|
||||
tl_cpputils::Maybe<int> _returnAddressStackOffset;
|
||||
std::map<llvm::Type*, RegisterCouple> _typeToRetValInReg;
|
||||
std::map<llvm::Type*, std::pair<int, unsigned>> _typeToRetValOnStack;
|
||||
std::map<llvm::Type*, std::vector<RegisterCouple>> _typeToArgumentRegs;
|
||||
std::map<llvm::Type*, int> _typeToArgumentStackOffset;
|
||||
};
|
||||
|
||||
class ModuleAbis
|
||||
{
|
||||
public:
|
||||
ModuleAbis(
|
||||
llvm::Module* module,
|
||||
const retdec_config::Architecture& arch,
|
||||
const retdec_config::ToolInfoContainer& tools,
|
||||
const std::vector<std::string>& abis = std::vector<std::string>());
|
||||
|
||||
Abi* getAbi(retdec_config::CallingConvention cc);
|
||||
bool getAbi(retdec_config::CallingConvention cc, Abi*& abi);
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
retdec_config::Architecture _arch;
|
||||
retdec_config::ToolInfo _tool;
|
||||
std::map<retdec_config::CallingConvention, Abi> _abis;
|
||||
};
|
||||
|
||||
class AbiProvider
|
||||
{
|
||||
public:
|
||||
static ModuleAbis* addAbis(
|
||||
llvm::Module* module,
|
||||
const retdec_config::Architecture& arch,
|
||||
const retdec_config::ToolInfoContainer& tools,
|
||||
const std::vector<std::string>& abis = std::vector<std::string>());
|
||||
|
||||
static ModuleAbis* getAbis(llvm::Module* module);
|
||||
static bool getAbis(llvm::Module* module, ModuleAbis*& abis);
|
||||
|
||||
static Abi* getAbi(
|
||||
llvm::Module* module,
|
||||
retdec_config::CallingConvention cc);
|
||||
static bool getAbi(
|
||||
llvm::Module* module,
|
||||
retdec_config::CallingConvention cc,
|
||||
Abi*& abi);
|
||||
|
||||
static void clear();
|
||||
|
||||
private:
|
||||
static std::map<llvm::Module*, ModuleAbis> _module2abis;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
361
include/bin2llvmir/providers/asm_instruction.h
Normal file
361
include/bin2llvmir/providers/asm_instruction.h
Normal file
@ -0,0 +1,361 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/providers/asm_instruction.h
|
||||
* @brief Mapping of LLVM instructions to underlying ASM instructions.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_PROVIDERS_ASM_INSTRUCTION_H
|
||||
#define BIN2LLVMIR_PROVIDERS_ASM_INSTRUCTION_H
|
||||
|
||||
#include <capstone/capstone.h>
|
||||
#include <llvm/IR/Instructions.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
|
||||
#include "llvm-support/utils.h"
|
||||
#include "tl-cpputils/address.h"
|
||||
#include "tl-cpputils/value.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class Config;
|
||||
|
||||
class AsmInstruction
|
||||
{
|
||||
public:
|
||||
template<
|
||||
typename Category,
|
||||
typename Type,
|
||||
typename Reference = Type&,
|
||||
typename Pointer = Type*,
|
||||
typename Distance = std::ptrdiff_t>
|
||||
class iterator_impl
|
||||
{
|
||||
public:
|
||||
using difference_type = Distance;
|
||||
using value_type = Type;
|
||||
using reference = Reference;
|
||||
using pointer = Pointer;
|
||||
using iterator_category = Category;
|
||||
|
||||
public:
|
||||
iterator_impl(llvm::StoreInst* s, bool end = false)
|
||||
{
|
||||
_first = s;
|
||||
_last = s;
|
||||
if (s == nullptr)
|
||||
{
|
||||
return;
|
||||
}
|
||||
llvm::Instruction* i = s;
|
||||
|
||||
auto* bb = i->getParent();
|
||||
while (i && (i == _first
|
||||
|| !isLlvmToAsmInstruction(i)))
|
||||
{
|
||||
if (i != _first)
|
||||
{
|
||||
_last = i;
|
||||
if (_current == nullptr)
|
||||
{
|
||||
_current = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (&bb->back() == i)
|
||||
{
|
||||
if (&bb->getParent()->back() == bb)
|
||||
{
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
bb = bb->getNextNode();
|
||||
i = &bb->front();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
i = i->getNextNode();
|
||||
}
|
||||
}
|
||||
|
||||
if (end)
|
||||
{
|
||||
_current = nullptr;
|
||||
}
|
||||
}
|
||||
iterator_impl() = default;
|
||||
iterator_impl(const iterator_impl& itr) = default;
|
||||
iterator_impl& operator=(iterator_impl rhs)
|
||||
{
|
||||
_first = rhs._first;
|
||||
_last = rhs._last;
|
||||
_current = rhs._current;
|
||||
return *this;
|
||||
}
|
||||
|
||||
iterator_impl& operator++()
|
||||
{
|
||||
if (_current == nullptr)
|
||||
{
|
||||
return *this;
|
||||
}
|
||||
|
||||
auto* bb = _current->getParent()->getNextNode();
|
||||
_current = _current->getNextNode();
|
||||
if (_current == nullptr)
|
||||
{
|
||||
if (bb)
|
||||
{
|
||||
_current = &bb->front();
|
||||
}
|
||||
}
|
||||
if (isLlvmToAsmInstruction(_current))
|
||||
{
|
||||
_current = nullptr;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
iterator_impl operator++(int)
|
||||
{
|
||||
if (_current == nullptr)
|
||||
{
|
||||
return *this;
|
||||
}
|
||||
|
||||
iterator_impl tmp(*this);
|
||||
auto* bb = _current->getParent()->getNextNode();
|
||||
_current = _current->getNextNode();
|
||||
if (_current == nullptr)
|
||||
{
|
||||
if (bb)
|
||||
{
|
||||
_current = &bb->front();
|
||||
}
|
||||
}
|
||||
if (isLlvmToAsmInstruction(_current))
|
||||
{
|
||||
_current = nullptr;
|
||||
}
|
||||
return tmp;
|
||||
}
|
||||
|
||||
iterator_impl& operator--()
|
||||
{
|
||||
if (_current == _first)
|
||||
{
|
||||
return *this;
|
||||
}
|
||||
if (_current == nullptr)
|
||||
{
|
||||
_current = _last;
|
||||
return *this;
|
||||
}
|
||||
|
||||
auto* bb = _current->getParent()->getPrevNode();
|
||||
_current = _current->getPrevNode();
|
||||
if (_current == nullptr)
|
||||
{
|
||||
if (bb)
|
||||
{
|
||||
_current = &bb->back();
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
iterator_impl operator--(int)
|
||||
{
|
||||
if (_current == _first)
|
||||
{
|
||||
return *this;
|
||||
}
|
||||
|
||||
iterator_impl tmp(*this);
|
||||
|
||||
if (_current == nullptr)
|
||||
{
|
||||
_current = _last;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
auto* bb = _current->getParent()->getPrevNode();
|
||||
_current = _current->getPrevNode();
|
||||
if (_current == nullptr)
|
||||
{
|
||||
if (bb)
|
||||
{
|
||||
_current = &bb->back();
|
||||
}
|
||||
}
|
||||
return tmp;
|
||||
}
|
||||
|
||||
reference operator*()
|
||||
{
|
||||
assert(_first != _current);
|
||||
return *_current;
|
||||
}
|
||||
|
||||
pointer operator->()
|
||||
{
|
||||
assert(_first != _current);
|
||||
return &(*_current);
|
||||
}
|
||||
|
||||
bool operator==(const iterator_impl& rhs) const
|
||||
{
|
||||
return (_current == nullptr && rhs._current == nullptr)
|
||||
|| (_first == rhs._first
|
||||
&& _last == rhs._last
|
||||
&& _current == rhs._current);
|
||||
}
|
||||
|
||||
bool operator!=(const iterator_impl& rhs) const
|
||||
{
|
||||
return !(*this == rhs);
|
||||
}
|
||||
|
||||
private:
|
||||
bool isLlvmToAsmInstruction(const llvm::Instruction* i) const
|
||||
{
|
||||
auto* s = llvm::dyn_cast_or_null<llvm::StoreInst>(i);
|
||||
return s &&
|
||||
s->getPointerOperand() == _first->getPointerOperand();
|
||||
}
|
||||
|
||||
private:
|
||||
llvm::StoreInst* _first = nullptr;
|
||||
llvm::Instruction* _last = nullptr;
|
||||
llvm::Instruction* _current = nullptr;
|
||||
};
|
||||
|
||||
public:
|
||||
using iterator = iterator_impl<
|
||||
std::bidirectional_iterator_tag,
|
||||
llvm::Instruction>;
|
||||
using const_iterator = iterator_impl<
|
||||
std::bidirectional_iterator_tag,
|
||||
const llvm::Instruction>;
|
||||
using reverse_iterator = std::reverse_iterator<iterator>;
|
||||
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
|
||||
|
||||
iterator begin();
|
||||
iterator end();
|
||||
reverse_iterator rbegin();
|
||||
reverse_iterator rend();
|
||||
const_iterator begin() const;
|
||||
const_iterator end() const;
|
||||
const_reverse_iterator rbegin() const;
|
||||
const_reverse_iterator rend() const;
|
||||
|
||||
public:
|
||||
AsmInstruction();
|
||||
AsmInstruction(llvm::Instruction* inst);
|
||||
AsmInstruction(llvm::BasicBlock* bb);
|
||||
AsmInstruction(llvm::Function* f);
|
||||
AsmInstruction(llvm::Module* m, tl_cpputils::Address addr);
|
||||
|
||||
bool operator<(const AsmInstruction& o) const;
|
||||
bool operator==(const AsmInstruction& o) const;
|
||||
bool operator!=(const AsmInstruction& o) const;
|
||||
explicit operator bool() const;
|
||||
|
||||
bool isValid() const;
|
||||
bool isInvalid() const;
|
||||
bool isConditional(Config* conf) const;
|
||||
cs_insn* getCapstoneInsn() const;
|
||||
bool isThumb() const;
|
||||
|
||||
std::string getDsm() const;
|
||||
tl_cpputils::Maybe<unsigned> getLatency() const;
|
||||
tl_cpputils::Address getAddress() const;
|
||||
tl_cpputils::Address getEndAddress() const;
|
||||
std::size_t getByteSize() const;
|
||||
std::size_t getBitSize() const;
|
||||
bool contains(tl_cpputils::Address addr) const;
|
||||
|
||||
AsmInstruction getNext() const;
|
||||
AsmInstruction getPrev() const;
|
||||
|
||||
bool instructionsCanBeErased();
|
||||
bool eraseInstructions();
|
||||
llvm::TerminatorInst* makeTerminal();
|
||||
llvm::BasicBlock* makeStart();
|
||||
|
||||
llvm::BasicBlock* getBasicBlock() const;
|
||||
llvm::Function* getFunction() const;
|
||||
std::vector<llvm::Instruction*> getInstructions();
|
||||
std::vector<llvm::BasicBlock*> getBasicBlocks();
|
||||
std::string getBasicBlockLableName(
|
||||
const std::string& labelPrefix = "dec_label_pc_") const;
|
||||
|
||||
bool empty();
|
||||
llvm::Instruction* front();
|
||||
llvm::Instruction* back();
|
||||
llvm::StoreInst* getLlvmToAsmInstruction() const;
|
||||
|
||||
llvm::Instruction* insertBack(llvm::Instruction* i);
|
||||
llvm::Instruction* insertBackSafe(llvm::Instruction* i);
|
||||
|
||||
bool storesValue(llvm::Value* val) const;
|
||||
|
||||
std::string dump() const;
|
||||
friend std::ostream& operator<<(
|
||||
std::ostream& out,
|
||||
const AsmInstruction &a);
|
||||
|
||||
// Templates.
|
||||
//
|
||||
public:
|
||||
template<typename T>
|
||||
bool containsInstruction()
|
||||
{
|
||||
for (auto& i : *this)
|
||||
{
|
||||
if (llvm::isa<T>(&i))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T* getInstructionFirst()
|
||||
{
|
||||
for (auto& i : *this)
|
||||
{
|
||||
if (auto* ret = llvm::dyn_cast<T>(&i))
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
public:
|
||||
static const llvm::GlobalVariable* getLlvmToAsmGlobalVariable(
|
||||
const llvm::Module* m);
|
||||
static tl_cpputils::Address getInstructionAddress(
|
||||
llvm::Instruction* inst);
|
||||
static bool isLlvmToAsmInstruction(const llvm::Value* inst);
|
||||
static void clear();
|
||||
|
||||
private:
|
||||
const llvm::GlobalVariable* getLlvmToAsmGlobalVariablePrivate(
|
||||
llvm::Module* m) const;
|
||||
bool isLlvmToAsmInstructionPrivate(llvm::Value* inst) const;
|
||||
|
||||
private:
|
||||
using ModuleGlobalPair = std::pair<const llvm::Module*, const llvm::GlobalVariable*>;
|
||||
|
||||
private:
|
||||
llvm::StoreInst* _llvmToAsmInstr = nullptr;
|
||||
static std::vector<ModuleGlobalPair> _cache;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
188
include/bin2llvmir/providers/config.h
Normal file
188
include/bin2llvmir/providers/config.h
Normal file
@ -0,0 +1,188 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/providers/config.h
|
||||
* @brief Config DB provider for bin2llvmirl.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_PROVIDERS_CONFIG_H
|
||||
#define BIN2LLVMIR_PROVIDERS_CONFIG_H
|
||||
|
||||
#include <llvm/IR/Instructions.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
|
||||
#include "retdec-config/config.h"
|
||||
#include "tl-cpputils/address.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class Config
|
||||
{
|
||||
public:
|
||||
static Config empty(llvm::Module* m);
|
||||
static Config fromFile(llvm::Module* m, const std::string& path);
|
||||
static Config fromJsonString(llvm::Module* m, const std::string& json);
|
||||
|
||||
void doFinalization();
|
||||
|
||||
public:
|
||||
retdec_config::Config& getConfig();
|
||||
|
||||
// Function
|
||||
//
|
||||
retdec_config::Function* getConfigFunction(
|
||||
const llvm::Function* fnc);
|
||||
retdec_config::Function* getConfigFunction(
|
||||
tl_cpputils::Address startAddr);
|
||||
|
||||
llvm::Function* getLlvmFunction(
|
||||
tl_cpputils::Address startAddr);
|
||||
|
||||
tl_cpputils::Address getFunctionAddress(
|
||||
const llvm::Function* fnc);
|
||||
|
||||
bool isFrontendFunction(const llvm::Value* val);
|
||||
bool isFrontendFunctionCall(const llvm::Value* val);
|
||||
|
||||
// Register
|
||||
//
|
||||
const retdec_config::Object* getConfigRegister(
|
||||
const llvm::Value* val);
|
||||
tl_cpputils::Maybe<unsigned> getConfigRegisterNumber(
|
||||
const llvm::Value* val);
|
||||
std::string getConfigRegisterClass(
|
||||
const llvm::Value* val);
|
||||
llvm::GlobalVariable* getLlvmRegister(
|
||||
const std::string& name);
|
||||
|
||||
bool isRegister(const llvm::Value* val);
|
||||
bool isFlagRegister(const llvm::Value* val);
|
||||
bool isStackPointerRegister(const llvm::Value* val);
|
||||
bool isGeneralPurposeRegister(const llvm::Value* val);
|
||||
bool isFloatingPointRegister(const llvm::Value* val);
|
||||
|
||||
// Global
|
||||
//
|
||||
const retdec_config::Object* getConfigGlobalVariable(
|
||||
const llvm::GlobalVariable* gv);
|
||||
const retdec_config::Object* getConfigGlobalVariable(
|
||||
tl_cpputils::Address address);
|
||||
|
||||
llvm::GlobalVariable* getLlvmGlobalVariable(
|
||||
tl_cpputils::Address address);
|
||||
llvm::GlobalVariable* getLlvmGlobalVariable(
|
||||
const std::string& name,
|
||||
tl_cpputils::Address address);
|
||||
|
||||
tl_cpputils::Address getGlobalAddress(
|
||||
const llvm::GlobalVariable* gv);
|
||||
|
||||
bool isGlobalVariable(const llvm::Value* val);
|
||||
|
||||
// Local + Stack
|
||||
//
|
||||
const retdec_config::Object* getConfigLocalVariable(
|
||||
const llvm::Value* val);
|
||||
retdec_config::Object* getConfigStackVariable(
|
||||
const llvm::Value* val);
|
||||
|
||||
llvm::AllocaInst* getLlvmStackVariable(
|
||||
llvm::Function* fnc,
|
||||
int offset);
|
||||
|
||||
bool isStackVariable(const llvm::Value* val);
|
||||
tl_cpputils::Maybe<int> getStackVariableOffset(
|
||||
const llvm::Value* val);
|
||||
|
||||
// Insert
|
||||
//
|
||||
retdec_config::Object* insertGlobalVariable(
|
||||
const llvm::GlobalVariable* gv,
|
||||
tl_cpputils::Address address,
|
||||
bool fromDebug = false,
|
||||
const std::string& realName = "",
|
||||
const std::string& cryptoDesc = "");
|
||||
|
||||
retdec_config::Object* insertStackVariable(
|
||||
const llvm::AllocaInst* sv,
|
||||
int offset,
|
||||
bool fromDebug = false);
|
||||
|
||||
retdec_config::Function* insertFunction(
|
||||
const llvm::Function* fnc,
|
||||
tl_cpputils::Address start = tl_cpputils::Address::getUndef,
|
||||
tl_cpputils::Address end = tl_cpputils::Address::getUndef,
|
||||
bool fromDebug = false);
|
||||
|
||||
retdec_config::Function* renameFunction(
|
||||
retdec_config::Function* fnc,
|
||||
const std::string& name);
|
||||
|
||||
// LLVM to ASM
|
||||
//
|
||||
bool isLlvmToAsmGlobalVariable(const llvm::Value* gv) const;
|
||||
bool isLlvmToAsmInstruction(const llvm::Value* inst) const;
|
||||
llvm::GlobalVariable* getLlvmToAsmGlobalVariable() const;
|
||||
void setLlvmToAsmGlobalVariable(llvm::GlobalVariable* gv);
|
||||
|
||||
// Pseudo-functions.
|
||||
//
|
||||
void setLlvmCallPseudoFunction(llvm::Function* f);
|
||||
llvm::Function* getLlvmCallPseudoFunction() const;
|
||||
bool isLlvmCallPseudoFunction(llvm::Value* f);
|
||||
llvm::CallInst* isLlvmCallPseudoFunctionCall(llvm::Value* c);
|
||||
|
||||
void setLlvmReturnPseudoFunction(llvm::Function* f);
|
||||
llvm::Function* getLlvmReturnPseudoFunction() const;
|
||||
bool isLlvmReturnPseudoFunction(llvm::Value* f);
|
||||
llvm::CallInst* isLlvmReturnPseudoFunctionCall(llvm::Value* c);
|
||||
|
||||
void setLlvmBranchPseudoFunction(llvm::Function* f);
|
||||
llvm::Function* getLlvmBranchPseudoFunction() const;
|
||||
bool isLlvmBranchPseudoFunction(llvm::Value* f);
|
||||
llvm::CallInst* isLlvmBranchPseudoFunctionCall(llvm::Value* c);
|
||||
|
||||
void setLlvmCondBranchPseudoFunction(llvm::Function* f);
|
||||
llvm::Function* getLlvmCondBranchPseudoFunction() const;
|
||||
bool isLlvmCondBranchPseudoFunction(llvm::Value* f);
|
||||
llvm::CallInst* isLlvmCondBranchPseudoFunctionCall(llvm::Value* c);
|
||||
|
||||
llvm::CallInst* isLlvmAnyBranchPseudoFunctionCall(llvm::Value* c);
|
||||
llvm::CallInst* isLlvmAnyUncondBranchPseudoFunctionCall(llvm::Value* c);
|
||||
|
||||
// Other
|
||||
//
|
||||
bool isPic32() const;
|
||||
bool isMipsOrPic32() const;
|
||||
llvm::GlobalVariable* getGlobalDummy();
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
retdec_config::Config _configDB;
|
||||
std::string _configPath;
|
||||
llvm::GlobalVariable* _globalDummy = nullptr;
|
||||
llvm::GlobalVariable* _asm2llvmGv = nullptr;
|
||||
llvm::Function* _callFunction = nullptr;
|
||||
llvm::Function* _returnFunction = nullptr;
|
||||
llvm::Function* _branchFunction = nullptr;
|
||||
llvm::Function* _condBranchFunction = nullptr;
|
||||
};
|
||||
|
||||
class ConfigProvider
|
||||
{
|
||||
public:
|
||||
static Config* addConfigFile(llvm::Module* m, const std::string& path);
|
||||
static Config* addConfigJsonString(
|
||||
llvm::Module* m,
|
||||
const std::string& json);
|
||||
static Config* getConfig(llvm::Module* m);
|
||||
static bool getConfig(llvm::Module* m, Config*& c);
|
||||
static void doFinalization(llvm::Module* m);
|
||||
static void clear();
|
||||
|
||||
private:
|
||||
static std::map<llvm::Module*, Config> _module2config;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
59
include/bin2llvmir/providers/debugformat.h
Normal file
59
include/bin2llvmir/providers/debugformat.h
Normal file
@ -0,0 +1,59 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/providers/debugformat.h
|
||||
* @brief Debug format provider for bin2llvmirl.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_PROVIDERS_DEBUGFORMAT_H
|
||||
#define BIN2LLVMIR_PROVIDERS_DEBUGFORMAT_H
|
||||
|
||||
#include <llvm/IR/Module.h>
|
||||
|
||||
#include "bin2llvmir/providers/fileimage.h"
|
||||
#include "debugformat/debugformat.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class DebugFormat : public debugformat::DebugFormat
|
||||
{
|
||||
using debugformat::DebugFormat::DebugFormat;
|
||||
};
|
||||
|
||||
/**
|
||||
* Completely static object -- all members and methods are static -> it can be
|
||||
* used by anywhere in bin2llvmirl. It provides mapping of modules to debug info
|
||||
* associated with them.
|
||||
*
|
||||
* @attention Even though this is accessible anywhere in bin2llvmirl, use it only
|
||||
* in LLVM passes' prologs to initialize pass-local demangler object. All
|
||||
* analyses, utils and other modules *MUST NOT* use it. If they need to work
|
||||
* with debug info, they should accept it in parameter.
|
||||
*/
|
||||
class DebugFormatProvider
|
||||
{
|
||||
private:
|
||||
using SymbolTable = std::map<
|
||||
tl_cpputils::Address,
|
||||
const fileformat::Symbol*>;
|
||||
|
||||
public:
|
||||
static DebugFormat* addDebugFormat(
|
||||
llvm::Module* m,
|
||||
loader::Image* objf,
|
||||
const std::string& pdbFile,
|
||||
const tl_cpputils::Address& imageBase,
|
||||
demangler::CDemangler* demangler);
|
||||
|
||||
static DebugFormat* getDebugFormat(llvm::Module* m);
|
||||
static bool getDebugFormat(llvm::Module* m, DebugFormat*& df);
|
||||
|
||||
static void clear();
|
||||
|
||||
private:
|
||||
/// Mapping of modules to debug info associated with them.
|
||||
static std::map<llvm::Module*, DebugFormat> _module2debug;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
51
include/bin2llvmir/providers/demangler.h
Normal file
51
include/bin2llvmir/providers/demangler.h
Normal file
@ -0,0 +1,51 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/providers/demangler.h
|
||||
* @brief Demangler provider for bin2llvmirl.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_PROVIDERS_DEMANGLER_H
|
||||
#define BIN2LLVMIR_PROVIDERS_DEMANGLER_H
|
||||
|
||||
#include <map>
|
||||
|
||||
#include <llvm/IR/Module.h>
|
||||
|
||||
#include "retdec-config/tool_info.h"
|
||||
#include "demangler/demangler.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/**
|
||||
* Completely static object -- all members and methods are static -> it can be
|
||||
* used by anywhere in bin2llvmirl. It provides mapping of modules to demanglers
|
||||
* associated with them.
|
||||
*
|
||||
* @attention Even though this is accessible anywhere in bin2llvmirl, use it only
|
||||
* in LLVM passes' prologs to initialize pass-local demangler object. All
|
||||
* analyses, utils and other modules *MUST NOT* use it. If they need to work
|
||||
* with demangler, they should accept it in parameter.
|
||||
*/
|
||||
class DemanglerProvider
|
||||
{
|
||||
public:
|
||||
static demangler::CDemangler* addDemangler(
|
||||
llvm::Module* m,
|
||||
const retdec_config::ToolInfoContainer& t);
|
||||
|
||||
static demangler::CDemangler* getDemangler(llvm::Module* m);
|
||||
static bool getDemangler(
|
||||
llvm::Module* m,
|
||||
demangler::CDemangler*& d);
|
||||
|
||||
static void clear();
|
||||
|
||||
private:
|
||||
using Demangler = std::unique_ptr<demangler::CDemangler>;
|
||||
/// Mapping of modules to demanglers associated with them.
|
||||
static std::map<llvm::Module*, Demangler> _module2demangler;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
127
include/bin2llvmir/providers/fileimage.h
Normal file
127
include/bin2llvmir/providers/fileimage.h
Normal file
@ -0,0 +1,127 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/providers/fileimage.h
|
||||
* @brief File image provider for bin2llvmirl.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_PROVIDERS_FILEIMAGE_H
|
||||
#define BIN2LLVMIR_PROVIDERS_FILEIMAGE_H
|
||||
|
||||
#include <llvm/IR/Constants.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/IR/Value.h>
|
||||
|
||||
#include "tl-cpputils/address.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
#include "bin2llvmir/providers/debugformat.h"
|
||||
#include "loader/loader/image.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class DebugFormat;
|
||||
|
||||
class FileImage
|
||||
{
|
||||
public:
|
||||
FileImage(
|
||||
llvm::Module* m,
|
||||
const std::string& path,
|
||||
Config* config);
|
||||
FileImage(
|
||||
llvm::Module* m,
|
||||
const std::shared_ptr<fileformat::FileFormat>& ff,
|
||||
Config* config);
|
||||
FileImage(
|
||||
llvm::Module* m,
|
||||
std::unique_ptr<loader::Image> img,
|
||||
Config* config);
|
||||
|
||||
bool isOk() const;
|
||||
|
||||
loader::Image* getImage();
|
||||
fileformat::FileFormat* getFileFormat();
|
||||
|
||||
public:
|
||||
llvm::ConstantInt* getConstantInt(
|
||||
llvm::IntegerType* t,
|
||||
tl_cpputils::Address addr);
|
||||
llvm::ConstantInt* getConstantDefault(tl_cpputils::Address addr);
|
||||
llvm::Constant* getConstantHalf(tl_cpputils::Address addr);
|
||||
llvm::Constant* getConstantFloat(tl_cpputils::Address addr);
|
||||
llvm::Constant* getConstantDouble(tl_cpputils::Address addr);
|
||||
llvm::Constant* getConstantLongDouble(tl_cpputils::Address addr);
|
||||
llvm::Constant* getConstantCharPointer(tl_cpputils::Address addr);
|
||||
llvm::Constant* getConstantCharArrayNice(tl_cpputils::Address addr);
|
||||
llvm::Constant* getConstantPointer(
|
||||
llvm::PointerType* type,
|
||||
tl_cpputils::Address addr);
|
||||
llvm::Constant* getConstantStruct(
|
||||
llvm::StructType* type,
|
||||
tl_cpputils::Address addr);
|
||||
llvm::Constant* getConstantArray(
|
||||
llvm::ArrayType* type,
|
||||
tl_cpputils::Address addr);
|
||||
llvm::Constant* getConstant(
|
||||
llvm::Type* type,
|
||||
tl_cpputils::Address addr = tl_cpputils::Address::getUndef,
|
||||
bool wideString = false);
|
||||
llvm::Constant* getConstant(
|
||||
Config* config,
|
||||
DebugFormat* dbgf = nullptr,
|
||||
tl_cpputils::Address addr = tl_cpputils::Address::getUndef);
|
||||
|
||||
public:
|
||||
const fileformat::Symbol* getPreferredSymbol(
|
||||
tl_cpputils::Address addr);
|
||||
|
||||
public:
|
||||
auto& getSegments() const { return _image->getSegments(); }
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
std::unique_ptr<loader::Image> _image;
|
||||
};
|
||||
|
||||
/**
|
||||
* Completely static object -- all members and methods are static -> it can be
|
||||
* used by anywhere in bin2llvmirl. It provides mapping of modules to file
|
||||
* images associated with them.
|
||||
*
|
||||
* @attention Even though this is accessible anywhere in bin2llvmirl, use it only
|
||||
* in LLVM passes' prologs to initialize pass-local file image object. All
|
||||
* analyses, utils and other modules *MUST NOT* use it. If they need to work
|
||||
* with a file image, they should accept it in parameter.
|
||||
*/
|
||||
class FileImageProvider
|
||||
{
|
||||
public:
|
||||
static FileImage* addFileImage(
|
||||
llvm::Module* m,
|
||||
const std::string& path,
|
||||
Config* config);
|
||||
static FileImage* addFileImage(
|
||||
llvm::Module* m,
|
||||
const std::shared_ptr<fileformat::FileFormat>& ff,
|
||||
Config* config);
|
||||
|
||||
static FileImage* getFileImage(
|
||||
llvm::Module* m);
|
||||
static bool getFileImage(
|
||||
llvm::Module* m,
|
||||
FileImage*& img);
|
||||
|
||||
static void clear();
|
||||
|
||||
private:
|
||||
static FileImage* addFileImage(
|
||||
llvm::Module* m,
|
||||
FileImage img);
|
||||
|
||||
private:
|
||||
/// Mapping of modules to file images associated with them.
|
||||
static std::map<llvm::Module*, FileImage> _module2image;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
110
include/bin2llvmir/providers/lti.h
Normal file
110
include/bin2llvmir/providers/lti.h
Normal file
@ -0,0 +1,110 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/providers/lti.h
|
||||
* @brief Library type information provider for bin2llvmirl.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_PROVIDERS_LTI_H
|
||||
#define BIN2LLVMIR_PROVIDERS_LTI_H
|
||||
|
||||
#include <llvm/IR/Module.h>
|
||||
|
||||
#include "ctypes/context.h"
|
||||
#include "ctypes/module.h"
|
||||
#include "ctypes/type.h"
|
||||
#include "ctypes/visitor.h"
|
||||
#include "ctypesparser/json_ctypes_parser.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
#include "bin2llvmir/providers/fileimage.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class ToLlvmTypeVisitor: public ctypes::Visitor
|
||||
{
|
||||
public:
|
||||
ToLlvmTypeVisitor(llvm::Module* m, Config* c);
|
||||
virtual ~ToLlvmTypeVisitor() override;
|
||||
|
||||
virtual void visit(
|
||||
const std::shared_ptr<ctypes::ArrayType>&) override;
|
||||
virtual void visit(
|
||||
const std::shared_ptr<ctypes::EnumType>&) override;
|
||||
virtual void visit(
|
||||
const std::shared_ptr<ctypes::FloatingPointType>&) override;
|
||||
virtual void visit(
|
||||
const std::shared_ptr<ctypes::FunctionType>&) override;
|
||||
virtual void visit(
|
||||
const std::shared_ptr<ctypes::IntegralType>&) override;
|
||||
virtual void visit(
|
||||
const std::shared_ptr<ctypes::PointerType>&) override;
|
||||
virtual void visit(
|
||||
const std::shared_ptr<ctypes::StructType>&) override;
|
||||
virtual void visit(
|
||||
const std::shared_ptr<ctypes::TypedefedType>&) override;
|
||||
virtual void visit(
|
||||
const std::shared_ptr<ctypes::UnionType>&) override;
|
||||
virtual void visit(
|
||||
const std::shared_ptr<ctypes::UnknownType>&) override;
|
||||
virtual void visit(
|
||||
const std::shared_ptr<ctypes::VoidType>&) override;
|
||||
|
||||
llvm::Type* getLlvmType() const;
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
Config* _config = nullptr;
|
||||
llvm::Type* _type = nullptr;
|
||||
};
|
||||
|
||||
class Lti
|
||||
{
|
||||
public:
|
||||
using FunctionPair = std::pair<
|
||||
llvm::Function*,
|
||||
std::shared_ptr<ctypes::Function>>;
|
||||
|
||||
public:
|
||||
Lti(
|
||||
llvm::Module* m,
|
||||
Config* c,
|
||||
loader::Image* objf);
|
||||
|
||||
bool hasLtiFunction(const std::string& name);
|
||||
std::shared_ptr<ctypes::Function> getLtiFunction(
|
||||
const std::string& name);
|
||||
llvm::FunctionType* getLlvmFunctionType(const std::string& name);
|
||||
FunctionPair getPairFunctionFree(const std::string& name);
|
||||
llvm::Function* getLlvmFunctionFree(const std::string& name);
|
||||
FunctionPair getPairFunction(const std::string& name);
|
||||
llvm::Function* getLlvmFunction(const std::string& name);
|
||||
|
||||
private:
|
||||
void loadLtiFile(const std::string& filePath);
|
||||
llvm::Type* getLlvmType(std::shared_ptr<ctypes::Type> type);
|
||||
|
||||
private:
|
||||
llvm::Module* _module = nullptr;
|
||||
Config* _config = nullptr;
|
||||
loader::Image* _image = nullptr;
|
||||
std::unique_ptr<ctypes::Module> _ltiModule;
|
||||
ctypesparser::JSONCTypesParser _ltiParser;
|
||||
};
|
||||
|
||||
class LtiProvider
|
||||
{
|
||||
public:
|
||||
static Lti* addLti(
|
||||
llvm::Module* m,
|
||||
Config* c,
|
||||
loader::Image* objf);
|
||||
static Lti* getLti(llvm::Module* m);
|
||||
static bool getLti(llvm::Module* m, Lti*& lti);
|
||||
static void clear();
|
||||
|
||||
private:
|
||||
static std::map<llvm::Module*, Lti> _module2lti;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
100
include/bin2llvmir/utils/defs.h
Normal file
100
include/bin2llvmir/utils/defs.h
Normal file
@ -0,0 +1,100 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/utils/defs.h
|
||||
* @brief Aliases for several useful types with LLVM IR items.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_UTILS_DEFS_H
|
||||
#define BIN2LLVMIR_UTILS_DEFS_H
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include <llvm/Analysis/CallGraph.h>
|
||||
#include <llvm/IR/Instruction.h>
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
/// Vector of global variables.
|
||||
using GlobVarVec = std::vector<llvm::GlobalVariable*>;
|
||||
|
||||
/// Set of global variables.
|
||||
using GlobVarSet = std::set<llvm::GlobalVariable*>;
|
||||
|
||||
/// Vector of call-graph nodes.
|
||||
using CallGraphNodeVec = std::vector<llvm::CallGraphNode*>;
|
||||
|
||||
/// Vector of basic blocks.
|
||||
using BBVec = std::vector<llvm::BasicBlock*>;
|
||||
|
||||
/// Set of basic blocks.
|
||||
using BBSet = std::set<llvm::BasicBlock*>;
|
||||
|
||||
/// Vector of instructions.
|
||||
using InstVec = std::vector<llvm::Instruction*>;
|
||||
|
||||
/// Set of instructions.
|
||||
using InstSet = std::set<llvm::Instruction*>;
|
||||
|
||||
/// Unordered set of instructions.
|
||||
using UnorderedInstSet = std::unordered_set<llvm::Instruction*>;
|
||||
|
||||
/// Set of values.
|
||||
using ValSet = std::set<llvm::Value*>;
|
||||
|
||||
/// Unordered set of values.
|
||||
using UnorderedValSet = std::unordered_set<llvm::Value*>;
|
||||
|
||||
/// Mapping of a value to another value.
|
||||
using ValValMap = std::map<llvm::Value*, llvm::Value*>;
|
||||
|
||||
/// Vector of functions.
|
||||
using FuncVec = std::vector<llvm::Function*>;
|
||||
|
||||
/// Set of functions.
|
||||
using FuncSet = std::set<llvm::Function*>;
|
||||
|
||||
/// Set of @c CallInst.
|
||||
using CallInstSet = std::set<llvm::CallInst*>;
|
||||
|
||||
/// Mapping of an instruction to a set of instructions.
|
||||
using InstInstSetMap = std::map<llvm::Instruction*, InstSet>;
|
||||
|
||||
/// Mapping of a string to vector of functions.
|
||||
using StringVecFuncMap = std::map<std::string, FuncVec>;
|
||||
|
||||
/// Unordered set of values.
|
||||
using UnorderedTypeSet = std::unordered_set<llvm::Type*>;
|
||||
|
||||
class RegisterCouple
|
||||
{
|
||||
public:
|
||||
RegisterCouple(
|
||||
llvm::GlobalVariable* reg1 = nullptr,
|
||||
llvm::GlobalVariable* reg2 = nullptr);
|
||||
|
||||
bool hasFirst() const;
|
||||
bool hasSecond() const;
|
||||
|
||||
llvm::GlobalVariable* getFirst() const;
|
||||
llvm::GlobalVariable* getSecond() const;
|
||||
|
||||
void setFirst(llvm::GlobalVariable* reg);
|
||||
void setSecond(llvm::GlobalVariable* reg);
|
||||
|
||||
private:
|
||||
llvm::GlobalVariable* _reg1 = nullptr;
|
||||
llvm::GlobalVariable* _reg2 = nullptr;
|
||||
};
|
||||
|
||||
#define LOG \
|
||||
if (!debug_enabled) {} \
|
||||
else std::cout
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
42
include/bin2llvmir/utils/global_var.h
Normal file
42
include/bin2llvmir/utils/global_var.h
Normal file
@ -0,0 +1,42 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/utils/global_var.h
|
||||
* @brief LLVM global variable utilities.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_UTILS_GLOBAL_VAR_H
|
||||
#define BIN2LLVMIR_UTILS_GLOBAL_VAR_H
|
||||
|
||||
#include <llvm/IR/GlobalVariable.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/IR/Value.h>
|
||||
|
||||
#include "tl-cpputils/address.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
#include "bin2llvmir/providers/fileimage.h"
|
||||
#include "bin2llvmir/utils/defs.h"
|
||||
#include "debugformat/debugformat.h"
|
||||
#include "loader/loader.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
bool getGlobalInfoFromCryptoPatterns(
|
||||
llvm::Module* module,
|
||||
Config* config,
|
||||
tl_cpputils::Address addr,
|
||||
std::string& name,
|
||||
std::string& description,
|
||||
llvm::Type*& type);
|
||||
|
||||
llvm::GlobalVariable* getGlobalVariable(
|
||||
llvm::Module* module,
|
||||
Config* config,
|
||||
FileImage* objf,
|
||||
DebugFormat* dbgf,
|
||||
tl_cpputils::Address addr,
|
||||
bool strict = false,
|
||||
std::string name = "global_var");
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
93
include/bin2llvmir/utils/instruction.h
Normal file
93
include/bin2llvmir/utils/instruction.h
Normal file
@ -0,0 +1,93 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/utils/instruction.h
|
||||
* @brief LLVM instruction utilities.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_UTILS_INSTRUCTION_H
|
||||
#define BIN2LLVMIR_UTILS_INSTRUCTION_H
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
#include <llvm/IR/Constants.h>
|
||||
#include <llvm/IR/Instructions.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/IR/Value.h>
|
||||
#include <llvm/Support/raw_ostream.h>
|
||||
|
||||
#include "tl-cpputils/address.h"
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class Definition;
|
||||
class ReachingDefinitionsAnalysis;
|
||||
|
||||
std::set<llvm::Function*> getParentFuncsFor(llvm::User* user);
|
||||
bool isDirectCall(const llvm::CallInst& inst);
|
||||
bool isDirectCall(const llvm::CallInst* inst);
|
||||
bool isIndirectCall(const llvm::CallInst& inst);
|
||||
bool isIndirectCall(const llvm::CallInst* inst);
|
||||
bool isFncDeclarationCall(const llvm::CallInst& inst);
|
||||
bool isFncDeclarationCall(const llvm::CallInst* inst);
|
||||
bool isFncDefinitionCall(const llvm::CallInst& inst);
|
||||
bool isFncDefinitionCall(const llvm::CallInst* inst);
|
||||
|
||||
bool localizeDefinition(
|
||||
const ReachingDefinitionsAnalysis& RDA,
|
||||
const llvm::Instruction* def,
|
||||
llvm::Type* type = nullptr);
|
||||
bool localizeDefinition(
|
||||
const Definition* def,
|
||||
llvm::Type* type = nullptr);
|
||||
|
||||
llvm::ReturnInst* modifyReturnInst(llvm::ReturnInst* ret, llvm::Value* val);
|
||||
|
||||
llvm::CallInst* modifyCallInst(
|
||||
llvm::CallInst* call,
|
||||
llvm::Type* ret,
|
||||
llvm::ArrayRef<llvm::Value*> args);
|
||||
|
||||
llvm::CallInst* modifyCallInst(
|
||||
llvm::CallInst* call,
|
||||
llvm::Type* ret);
|
||||
|
||||
llvm::CallInst* modifyCallInst(
|
||||
llvm::CallInst* call,
|
||||
llvm::ArrayRef<llvm::Value*> args);
|
||||
|
||||
llvm::CallInst* addToVariadicCallInst(
|
||||
llvm::CallInst* call,
|
||||
llvm::ArrayRef<llvm::Value*> args);
|
||||
|
||||
using FunctionPair = std::pair<llvm::Function*, retdec_config::Function*>;
|
||||
FunctionPair modifyFunction(
|
||||
Config* config,
|
||||
llvm::Function* fnc,
|
||||
llvm::Type* ret,
|
||||
std::vector<llvm::Type*> args,
|
||||
bool isVarArg = false,
|
||||
const std::map<llvm::ReturnInst*, llvm::Value*>& rets2vals =
|
||||
std::map<llvm::ReturnInst*, llvm::Value*>(),
|
||||
const std::map<llvm::CallInst*, std::vector<llvm::Value*>>& calls2vals =
|
||||
std::map<llvm::CallInst*, std::vector<llvm::Value*>>(),
|
||||
llvm::Value* retVal = nullptr,
|
||||
const std::vector<llvm::Value*>& argStores =
|
||||
std::vector<llvm::Value*>(),
|
||||
const std::vector<std::string>& argNames = std::vector<std::string>());
|
||||
|
||||
llvm::Argument* modifyFunctionArgumentType(
|
||||
Config* config,
|
||||
llvm::Argument* arg,
|
||||
llvm::Type* type);
|
||||
|
||||
llvm::Function* splitFunctionOn(
|
||||
llvm::Instruction* inst,
|
||||
const std::string& fncName = "");
|
||||
|
||||
void insertAtBegin(llvm::Instruction* li, llvm::BasicBlock* bb);
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
53
include/bin2llvmir/utils/ir_modifier.h
Normal file
53
include/bin2llvmir/utils/ir_modifier.h
Normal file
@ -0,0 +1,53 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/utils/ir_modifier.h
|
||||
* @brief Modify both LLVM IR and config.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_UTILS_IR_MODIFIER_H
|
||||
#define BIN2LLVMIR_UTILS_IR_MODIFIER_H
|
||||
|
||||
#include <llvm/IR/Instructions.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
class IrModifier
|
||||
{
|
||||
public:
|
||||
using FunctionPair = std::pair<llvm::Function*, retdec_config::Function*>;
|
||||
using StackPair = std::pair<llvm::AllocaInst*, retdec_config::Object*>;
|
||||
|
||||
public:
|
||||
IrModifier();
|
||||
IrModifier(llvm::Module* m, Config* c);
|
||||
|
||||
public:
|
||||
FunctionPair renameFunction(
|
||||
llvm::Function* fnc,
|
||||
const std::string& fncName);
|
||||
FunctionPair splitFunctionOn(
|
||||
llvm::Instruction* inst,
|
||||
tl_cpputils::Address start,
|
||||
const std::string& fncName = "");
|
||||
FunctionPair addFunction(
|
||||
tl_cpputils::Address start,
|
||||
const std::string& fncName = "");
|
||||
FunctionPair addFunctionUnknown(tl_cpputils::Address start);
|
||||
|
||||
StackPair getStackVariable(
|
||||
llvm::Function* fnc,
|
||||
int offset,
|
||||
llvm::Type* type,
|
||||
const std::string& name = "stack_var");
|
||||
|
||||
protected:
|
||||
llvm::Module* _module = nullptr;
|
||||
Config* _config = nullptr;
|
||||
};
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
78
include/bin2llvmir/utils/type.h
Normal file
78
include/bin2llvmir/utils/type.h
Normal file
@ -0,0 +1,78 @@
|
||||
/**
|
||||
* @file include/bin2llvmir/utils/type.h
|
||||
* @brief LLVM type utilities.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef BIN2LLVMIR_UTILS_TYPE_H
|
||||
#define BIN2LLVMIR_UTILS_TYPE_H
|
||||
|
||||
#include <llvm/IR/LLVMContext.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/IR/Type.h>
|
||||
|
||||
#include "bin2llvmir/providers/config.h"
|
||||
#include "bin2llvmir/providers/fileimage.h"
|
||||
#include "bin2llvmir/utils/defs.h"
|
||||
|
||||
namespace bin2llvmir {
|
||||
|
||||
const unsigned DEFAULT_ADDR_SPACE = 0;
|
||||
|
||||
llvm::Type* stringToLlvmType(llvm::LLVMContext& ctx, const std::string& str);
|
||||
llvm::Type* stringToLlvmTypeDefault(llvm::Module* m, const std::string& str);
|
||||
|
||||
llvm::Value* convertValueToType(
|
||||
llvm::Value* val,
|
||||
llvm::Type* type,
|
||||
llvm::Instruction* before);
|
||||
|
||||
llvm::Value* convertValueToTypeAfter(
|
||||
llvm::Value* val,
|
||||
llvm::Type* type,
|
||||
llvm::Instruction* after);
|
||||
|
||||
llvm::Constant* convertConstantToType(
|
||||
llvm::Constant* val,
|
||||
llvm::Type* type);
|
||||
|
||||
llvm::Value* changeObjectType(
|
||||
Config* config,
|
||||
FileImage* objf,
|
||||
llvm::Module* module,
|
||||
llvm::Value* val,
|
||||
llvm::Type* toType,
|
||||
llvm::Constant* init = nullptr,
|
||||
UnorderedInstSet* instToErase = nullptr,
|
||||
bool dbg = false,
|
||||
bool wideString = false);
|
||||
|
||||
bool isBoolType(const llvm::Type* t);
|
||||
bool isStringArrayType(const llvm::Type* t);
|
||||
bool isStringArrayPointeType(const llvm::Type* t);
|
||||
bool isCharType(const llvm::Type* t);
|
||||
bool isCharPointerType(const llvm::Type* t);
|
||||
bool isVoidPointerType(const llvm::Type* t);
|
||||
|
||||
unsigned getDefaultTypeBitSize(llvm::Module* module);
|
||||
unsigned getDefaultTypeByteSize(llvm::Module* module);
|
||||
llvm::IntegerType* getDefaultType(llvm::Module* module);
|
||||
llvm::PointerType* getDefaultPointerType(llvm::Module* module);
|
||||
llvm::IntegerType* getCharType(llvm::LLVMContext& ctx);
|
||||
llvm::IntegerType* getCharType(llvm::LLVMContext* ctx);
|
||||
llvm::PointerType* getCharPointerType(llvm::LLVMContext& ctx);
|
||||
llvm::PointerType* getCharPointerType(llvm::LLVMContext* ctx);
|
||||
llvm::PointerType* getVoidPointerType(llvm::LLVMContext& ctx);
|
||||
llvm::PointerType* getVoidPointerType(llvm::LLVMContext* ctx);
|
||||
|
||||
size_t getTypeByteSizeInBinary(llvm::Module* module, llvm::Type* type);
|
||||
size_t getTypeBitSizeInBinary(llvm::Module* module, llvm::Type* type);
|
||||
|
||||
std::vector<llvm::Type*> parseFormatString(
|
||||
llvm::Module* module,
|
||||
const std::string& format,
|
||||
llvm::Function* calledFnc = nullptr);
|
||||
|
||||
} // namespace bin2llvmir
|
||||
|
||||
#endif
|
82
include/debugformat/debugformat.h
Normal file
82
include/debugformat/debugformat.h
Normal file
@ -0,0 +1,82 @@
|
||||
/**
|
||||
* @file include/debugformat/debugformat.h
|
||||
* @brief Common (DWARF and PDB) debug information representation library.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef DEBUGFORMAT_DEBUGFORMAT_H
|
||||
#define DEBUGFORMAT_DEBUGFORMAT_H
|
||||
|
||||
#include <pdbparser/pdb_file.h>
|
||||
|
||||
#include "retdec-config/functions.h"
|
||||
#include "retdec-config/objects.h"
|
||||
#include "retdec-config/types.h"
|
||||
#include "demangler/demangler.h"
|
||||
#include "dwarfparser/dwarf_file.h"
|
||||
#include "fileformat/fileformat.h"
|
||||
#include "loader/loader.h"
|
||||
|
||||
namespace debugformat {
|
||||
|
||||
/**
|
||||
* Common (PDB and DWARF) debug information representation.
|
||||
*/
|
||||
class DebugFormat
|
||||
{
|
||||
public:
|
||||
using SymbolTable = std::map<tl_cpputils::Address, const fileformat::Symbol*>;
|
||||
|
||||
public:
|
||||
DebugFormat();
|
||||
DebugFormat(
|
||||
loader::Image* inFile,
|
||||
const std::string& pdbFile,
|
||||
SymbolTable* symtab,
|
||||
demangler::CDemangler* demangler,
|
||||
unsigned long long imageBase = 0);
|
||||
|
||||
retdec_config::Function* getFunction(tl_cpputils::Address a);
|
||||
const retdec_config::Object* getGlobalVar(tl_cpputils::Address a);
|
||||
|
||||
bool hasInformation() const;
|
||||
|
||||
private:
|
||||
void loadPdb();
|
||||
void loadPdbTypes();
|
||||
void loadPdbGlobalVariables();
|
||||
void loadPdbFunctions();
|
||||
retdec_config::Type loadPdbType(pdbparser::PDBTypeDef* type);
|
||||
|
||||
void loadDwarf();
|
||||
void loadDwarfTypes();
|
||||
void loadDwarfGlobalVariables();
|
||||
void loadDwarfFunctions();
|
||||
retdec_config::Type loadDwarfType(dwarfparser::DwarfType* type);
|
||||
|
||||
void loadSymtab();
|
||||
|
||||
private:
|
||||
/// Input file used to initialize this debug information.
|
||||
std::string _inputFile;
|
||||
/// Symbol table to read symbols from.
|
||||
SymbolTable* _symtab = nullptr;
|
||||
/// Underlying binary file representation.
|
||||
loader::Image* _inFile = nullptr;
|
||||
/// Underlying PDB representation.
|
||||
pdbparser::PDBFile* _pdbFile = nullptr;
|
||||
/// Underlying DWARF representation.
|
||||
dwarfparser::DwarfFile* _dwarfFile = nullptr;
|
||||
/// Demangler.
|
||||
demangler::CDemangler* _demangler = nullptr;
|
||||
|
||||
public:
|
||||
retdec_config::GlobalVarContainer globals;
|
||||
retdec_config::TypeContainer types;
|
||||
|
||||
std::map<tl_cpputils::Address, retdec_config::Function> functions;
|
||||
};
|
||||
|
||||
} // namespace debugformat
|
||||
|
||||
#endif
|
176
include/dwarfparser/dwarf_base.h
Normal file
176
include/dwarfparser/dwarf_base.h
Normal file
@ -0,0 +1,176 @@
|
||||
/**
|
||||
* @file include/dwarfparser/dwarf_base.h
|
||||
* @brief Declaration of base classes used in dwarfparser.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef DWARFPARSER_DWARF_BASE_H
|
||||
#define DWARFPARSER_DWARF_BASE_H
|
||||
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <libdwarf/dwarf.h>
|
||||
#include <libdwarf/libdwarf.h>
|
||||
|
||||
namespace dwarfparser
|
||||
{
|
||||
|
||||
// Extern forward declarations.
|
||||
class DwarfFile;
|
||||
class DwarfCU;
|
||||
|
||||
// Locale forward declarations.
|
||||
template <class T>
|
||||
class DwarfBaseContainer;
|
||||
class DwarfBaseElement;
|
||||
|
||||
/**
|
||||
* @class DwarfBaseContainer.
|
||||
* @brief Base container class for all container objects used by dwarfparser.
|
||||
*/
|
||||
template <class T>
|
||||
class DwarfBaseContainer
|
||||
{
|
||||
//
|
||||
// Type aliases
|
||||
//
|
||||
public:
|
||||
using iterator = typename std::vector<T*>::iterator;
|
||||
using const_iterator = typename std::vector<T*>::const_iterator;
|
||||
|
||||
//
|
||||
// Non-virtual functions.
|
||||
//
|
||||
public:
|
||||
DwarfBaseContainer(DwarfFile *file, DwarfBaseElement *elem = nullptr) :
|
||||
m_res(0),
|
||||
m_error(nullptr),
|
||||
m_parentFile(file),
|
||||
m_parentElem(elem)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
std::size_t size() const { return m_data.size(); }
|
||||
void push_back(T *n) { m_data.push_back(n); }
|
||||
bool empty() const { return m_data.empty(); }
|
||||
iterator begin() { return m_data.begin(); }
|
||||
const_iterator begin() const { return m_data.begin(); }
|
||||
iterator end() { return m_data.end(); }
|
||||
const_iterator end() const { return m_data.end(); }
|
||||
|
||||
DwarfBaseElement *getParentElem() const { return m_parentElem; }
|
||||
DwarfFile *getParentFile() const { return m_parentFile; }
|
||||
|
||||
// TODO: get element by DIE?
|
||||
DwarfBaseElement *getElemByOffset(Dwarf_Off o);
|
||||
|
||||
//
|
||||
// Virtual functions.
|
||||
//
|
||||
public:
|
||||
virtual ~DwarfBaseContainer()
|
||||
{
|
||||
for (iterator it=begin(); it!=end(); ++it)
|
||||
delete (*it);
|
||||
m_data.clear();
|
||||
}
|
||||
|
||||
virtual void dump() const
|
||||
{
|
||||
for (const_iterator cit=begin(); cit!=end(); ++cit)
|
||||
(*cit)->dump();
|
||||
}
|
||||
|
||||
//
|
||||
// Pure virtual functions.
|
||||
//
|
||||
public:
|
||||
virtual T* loadAndGetDie(Dwarf_Die die, unsigned lvl) = 0;
|
||||
|
||||
//
|
||||
// Data.
|
||||
//
|
||||
protected:
|
||||
std::vector<T*> m_data; ///< Object container.
|
||||
|
||||
int m_res; ///< Global return value.
|
||||
Dwarf_Error m_error; ///< Global error code.
|
||||
|
||||
DwarfFile *m_parentFile; ///< Pointer to DWARF file representation.
|
||||
DwarfBaseElement *m_parentElem; ///< Pointer to parent element, if nullptr then parent is DWARF file.
|
||||
|
||||
public:
|
||||
/**
|
||||
* DIE offset to element mapping.
|
||||
* One element may have multiple offsets -- multiple mappings.
|
||||
* ==> *DO NOT* iterate through this container, use 'm_data'.
|
||||
*/
|
||||
std::map<Dwarf_Off, T*> off2data;
|
||||
};
|
||||
|
||||
/**
|
||||
* @class DwarfBaseElement
|
||||
* @brief Base element class for all objects used by dwarfparser.
|
||||
*/
|
||||
class DwarfBaseElement
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* @brief Types element
|
||||
*/
|
||||
enum type_t
|
||||
{
|
||||
CU,
|
||||
FUNCTION,
|
||||
LINE,
|
||||
TYPE,
|
||||
VAR
|
||||
};
|
||||
|
||||
DwarfBaseElement(type_t type, DwarfBaseContainer<DwarfBaseElement> *prnt, Dwarf_Off d);
|
||||
virtual ~DwarfBaseElement() {}
|
||||
virtual void dump() const = 0;
|
||||
|
||||
public:
|
||||
virtual const std::string& getName() const {return name;}
|
||||
|
||||
type_t getType() const {return m_type;}
|
||||
DwarfBaseContainer<DwarfBaseElement> *getPrntCont() const {return m_parent;}
|
||||
DwarfFile *getParentFile() const {return getPrntCont()->getParentFile();}
|
||||
DwarfCU *getCuParent() const {return m_cuParent;}
|
||||
|
||||
Dwarf_Debug &getLibdwarfDebug() const;
|
||||
void addOffset(Dwarf_Off o);
|
||||
std::string getDwarfdump2OffsetString() const;
|
||||
|
||||
public:
|
||||
std::string name;
|
||||
|
||||
protected:
|
||||
type_t m_type;
|
||||
DwarfBaseContainer<DwarfBaseElement> *m_parent; ///< Pointer to parent container that contains this element.
|
||||
DwarfCU *m_cuParent; ///< Pointer to parent CU element that contains this element.
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Get container's element with provided offset.
|
||||
* @param o Offset.
|
||||
* @return Element with offset.
|
||||
*/
|
||||
template <class T>
|
||||
DwarfBaseElement* DwarfBaseContainer<T>::getElemByOffset(Dwarf_Off o)
|
||||
{
|
||||
auto it = off2data.find(o);
|
||||
if (it != off2data.end())
|
||||
return it->second;
|
||||
else
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace dwarfparser
|
||||
|
||||
#endif
|
67
include/dwarfparser/dwarf_cu.h
Normal file
67
include/dwarfparser/dwarf_cu.h
Normal file
@ -0,0 +1,67 @@
|
||||
/**
|
||||
* @file include/dwarfparser/dwarf_cu.h
|
||||
* @brief Declaration of classes representing Compilation Units.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
#ifndef DWARFPARSER_DWARF_CU_H
|
||||
#define DWARFPARSER_DWARF_CU_H
|
||||
|
||||
#include <cstdlib>
|
||||
#include <list>
|
||||
|
||||
#include "dwarfparser/dwarf_base.h"
|
||||
|
||||
namespace dwarfparser
|
||||
{
|
||||
|
||||
// Extern forward declarations.
|
||||
class DwarfFile;
|
||||
|
||||
// Locale forward declarations.
|
||||
class DwarfCU;
|
||||
class DwarfCUContainer;
|
||||
|
||||
/**
|
||||
* @class DwarfCU
|
||||
* @brief Compilation unit object.
|
||||
*/
|
||||
class DwarfCU : public DwarfBaseElement
|
||||
{
|
||||
public:
|
||||
DwarfCU(DwarfCUContainer *prnt, Dwarf_Off o);
|
||||
virtual void dump() const override;
|
||||
|
||||
std::size_t srcFilesCount();
|
||||
void addSrcFile(std::string f);
|
||||
std::string *getSrcFile(unsigned idx);
|
||||
int findSrcFile(std::string f, const std::string **ret);
|
||||
bool IsLanguageC() const;
|
||||
bool IsLanguageCpp() const;
|
||||
|
||||
public:
|
||||
std::string compDir; ///< Name of compilation directory.
|
||||
std::string producer; ///< Name of compiler used to create CU.
|
||||
Dwarf_Addr lowAddr; ///< Lowest address of active range, base for loclists.
|
||||
Dwarf_Addr highAddr; ///< Highest address of active range.
|
||||
Dwarf_Unsigned language; ///< A code indicating the source language.
|
||||
|
||||
private:
|
||||
std::list<std::string> m_srcFiles; ///< List of source file of this compilation unit.
|
||||
};
|
||||
|
||||
/**
|
||||
* @class DwarfCUContainer
|
||||
* @brief Compilation unit container.
|
||||
*/
|
||||
class DwarfCUContainer : public DwarfBaseContainer<DwarfCU>
|
||||
{
|
||||
public:
|
||||
DwarfCUContainer(DwarfFile *file, DwarfBaseElement *elem = nullptr);
|
||||
virtual DwarfCU *loadAndGetDie(Dwarf_Die cuDie, unsigned lvl) override;
|
||||
virtual void dump() const override;
|
||||
};
|
||||
|
||||
} // namespace dwarfparser
|
||||
|
||||
#endif
|
142
include/dwarfparser/dwarf_file.h
Normal file
142
include/dwarfparser/dwarf_file.h
Normal file
@ -0,0 +1,142 @@
|
||||
/**
|
||||
* @file include/dwarfparser/dwarf_file.h
|
||||
* @brief Declarations of DwarfFile class which provides high-level access
|
||||
* to DWARF debugging informations.
|
||||
* @copyright (c) 2017 Avast Software, licensed under the MIT license
|
||||
*/
|
||||
|
||||
/*
|
||||
* TODO:
|
||||
* - Multiple CUs -> CUs should own other info that are inside them.
|
||||
* At the moment each object contains pointer to its CU.
|
||||
* It would be better designed if CU elements would contain all the different
|
||||
* containers (lines, functions, types, ...).
|
||||
* Problem is that there would be many containers for each element type, and
|
||||
* it is much easier for user of this library to have all functions, types, etc.
|
||||
* in single container -- methods getFunctions(), etc.
|
||||
* Possible solution would be to create new container when get*() method is called,
|
||||
* this container would contain all element from all CU containers of the type.
|
||||
* But I'm not sure that this is the best solution.
|
||||
* - add some method that enables printing debug info in all classes.
|
||||
* - class holding and managing active context, accessible from other classes.
|
||||
* - DW_AT_abstract_origin problem -- associate object with original DIE.
|
||||
* - void data type - is it the best solution ???
|
||||
*/
|
||||
|
||||
#ifndef DWARFPARSER_DWARF_FILE_H
|
||||
#define DWARFPARSER_DWARF_FILE_H
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <libdwarf/dwarf.h>
|
||||
#include <libdwarf/libdwarf.h>
|
||||
|
||||
#include "fileformat-libdwarf-interface/bin_interface.h"
|
||||
|
||||
#include "dwarfparser/dwarf_cu.h"
|
||||
#include "dwarfparser/dwarf_functions.h"
|
||||
#include "dwarfparser/dwarf_linenumbers.h"
|
||||
#include "dwarfparser/dwarf_locations.h"
|
||||
#include "dwarfparser/dwarf_parserdefs.h"
|
||||
#include "dwarfparser/dwarf_resources.h"
|
||||
#include "dwarfparser/dwarf_types.h"
|
||||
#include "dwarfparser/dwarf_utils.h"
|
||||
#include "dwarfparser/dwarf_vars.h"
|
||||
|
||||
namespace dwarfparser
|
||||
{
|
||||
|
||||
// Extern forward declarations.
|
||||
|
||||
// Locale forward declarations.
|
||||
class DwarfFile;
|
||||
|
||||
/**
|
||||
* @class DwarfFile
|
||||
* @brief Main class containing all DWARF information.
|
||||
*/
|
||||
class DwarfFile
|
||||
{
|
||||
//
|
||||
// Public methods.
|
||||
//
|
||||
public:
|
||||
DwarfFile(std::string fileName, fileformat::FileFormat *fileParser = nullptr);
|
||||
~DwarfFile();
|
||||
bool hasDwarfInfo();
|
||||
|
||||
//
|
||||
// Functions getting particular DWARF records.
|
||||
//
|
||||
public:
|
||||
DwarfCUContainer *getCUs();
|
||||
DwarfLineContainer *getLines();
|
||||
DwarfFunctionContainer *getFunctions();
|
||||
DwarfTypeContainer *getTypes();
|
||||
DwarfVarContainer *getGlobalVars();
|
||||
Dwarf_Debug &getDwarfDebug();
|
||||
|
||||
//
|
||||
// Private methods.
|
||||
//
|
||||
private:
|
||||
bool loadFile(std::string fileName, fileformat::FileFormat *fileParser);
|
||||
void loadFileCUs();
|
||||
void loadCUtree(Dwarf_Die die, DwarfBaseElement* parent, int lvl);
|
||||
void loadDIE(Dwarf_Die die, DwarfBaseElement* &parent, int lvl);
|
||||
void makeStructTypesUnique();
|
||||
|
||||
//
|
||||
// Containers storing high-level representation of DWARF data.
|
||||
//
|
||||
private:
|
||||
DwarfCUContainer m_CUs; ///< Compilation units.
|
||||
DwarfLineContainer m_lines; ///< Line numbers.
|
||||
DwarfFunctionContainer m_functions; ///< Functions.
|
||||
DwarfTypeContainer m_types; ///< Data types.
|
||||
DwarfVarContainer m_globalVars; ///< Global variables.
|
||||
|
||||
//
|
||||
// Some auxiliary variables.
|
||||
//
|
||||
private:
|
||||
bool m_hasDwarf; ///< Loaded file contains some DWARF information.
|
||||
int m_res; ///< Global return value.
|
||||
Dwarf_Debug m_dbg; ///< Libdwarf structure representing DWARF file.
|
||||
int m_fd; ///< File descriptor used in dwarf_init().
|
||||
Dwarf_Error m_error; ///< Global libdwarf error code.
|
||||
|
||||
//
|
||||
// Variables keep track of the context of DWARF tree.
|
||||
//
|
||||
private:
|
||||
DwarfCU *m_activeCU;
|
||||
|
||||
//
|
||||
// Resources.
|
||||
//
|
||||
public:
|
||||
void initMapping(eDefaultMap m);
|
||||
DwarfResources resources; ///< Class representing resources.
|
||||
|
||||
//
|
||||
// These classes need to access DWARF context tree.
|
||||
//
|
||||
template <class T> friend class DwarfBaseContainer;
|
||||
friend class DwarfCUContainer;
|
||||
friend class DwarfLineContainer;
|
||||
friend class DwarfFunctionContainer;
|
||||
friend class DwarfVarContainer;
|
||||
friend class DwarfTypeContainer;
|
||||
friend class DwarfBaseElement;
|
||||
friend class DwarfCU;
|
||||
friend class DwarfArrayType;
|
||||
friend class DwarfEnumType;
|
||||
friend class DwarfStructType;
|
||||
friend class DwarfLocationDesc;
|
||||
friend class AttrProcessor;
|
||||
};
|
||||
|
||||
} // namespace dwarfparser
|
||||
|
||||
#endif
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user