Updating branches/google/testing to r297704

git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/testing@298153 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Eric Christopher
2017-03-17 21:56:24 +00:00
5830 changed files with 699952 additions and 145488 deletions

5
.gitignore vendored
View File

@@ -21,6 +21,9 @@
#OS X specific files.
.DS_store
# Nested build directory
/build
#==============================================================================#
# Explicit files to ignore (only matches one).
#==============================================================================#
@@ -58,6 +61,8 @@ tools/lld
tools/llgo
# Polly, which is tracked independently.
tools/polly
# avrlit, which is tracked independently.
tools/avrlit
# Sphinx build tree, if building in-source dir.
docs/_build
# VSCode config files.

View File

@@ -20,7 +20,7 @@ if(POLICY CMP0057)
endif()
if(NOT DEFINED LLVM_VERSION_MAJOR)
set(LLVM_VERSION_MAJOR 4)
set(LLVM_VERSION_MAJOR 5)
endif()
if(NOT DEFINED LLVM_VERSION_MINOR)
set(LLVM_VERSION_MINOR 0)
@@ -56,17 +56,20 @@ endif()
# This should only apply if you are both on an Apple host, and targeting Apple.
if(CMAKE_HOST_APPLE AND APPLE)
if(NOT CMAKE_XCRUN)
find_program(CMAKE_XCRUN NAMES xcrun)
endif()
if(CMAKE_XCRUN)
execute_process(COMMAND ${CMAKE_XCRUN} -find libtool
OUTPUT_VARIABLE CMAKE_LIBTOOL
OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
# if CMAKE_LIBTOOL is not set, try and find it with xcrun or find_program
if(NOT CMAKE_LIBTOOL)
if(NOT CMAKE_XCRUN)
find_program(CMAKE_XCRUN NAMES xcrun)
endif()
if(CMAKE_XCRUN)
execute_process(COMMAND ${CMAKE_XCRUN} -find libtool
OUTPUT_VARIABLE CMAKE_LIBTOOL
OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
if(NOT CMAKE_LIBTOOL OR NOT EXISTS CMAKE_LIBTOOL)
find_program(CMAKE_LIBTOOL NAMES libtool)
if(NOT CMAKE_LIBTOOL OR NOT EXISTS CMAKE_LIBTOOL)
find_program(CMAKE_LIBTOOL NAMES libtool)
endif()
endif()
get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES)
@@ -132,18 +135,6 @@ foreach(proj ${LLVM_ENABLE_PROJECTS})
endif()
endforeach()
# The following only works with the Ninja generator in CMake >= 3.0.
set(LLVM_PARALLEL_COMPILE_JOBS "" CACHE STRING
"Define the maximum number of concurrent compilation jobs.")
if(LLVM_PARALLEL_COMPILE_JOBS)
if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja")
message(WARNING "Job pooling is only available with Ninja generators.")
else()
set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${LLVM_PARALLEL_COMPILE_JOBS})
set(CMAKE_JOB_POOL_COMPILE compile_job_pool)
endif()
endif()
# Build llvm with ccache if the package is present
set(LLVM_CCACHE_BUILD OFF CACHE BOOL "Set to ON for a ccache enabled build")
if(LLVM_CCACHE_BUILD)
@@ -178,21 +169,12 @@ if(LLVM_DEPENDENCY_DEBUGGING)
endif()
endif()
option(LLVM_BUILD_GLOBAL_ISEL "Experimental: Build GlobalISel" OFF)
option(LLVM_BUILD_GLOBAL_ISEL "Experimental: Build GlobalISel" ON)
if(LLVM_BUILD_GLOBAL_ISEL)
add_definitions(-DLLVM_BUILD_GLOBAL_ISEL)
endif()
set(LLVM_PARALLEL_LINK_JOBS "" CACHE STRING
"Define the maximum number of concurrent link jobs.")
if(LLVM_PARALLEL_LINK_JOBS)
if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja")
message(WARNING "Job pooling is only available with Ninja generators.")
else()
set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${LLVM_PARALLEL_LINK_JOBS})
set(CMAKE_JOB_POOL_LINK link_job_pool)
endif()
endif()
option(LLVM_ENABLE_DAGISEL_COV "Debug: Prints tablegen patterns that were used for selecting" OFF)
# Add path for custom modules
set(CMAKE_MODULE_PATH
@@ -414,9 +396,6 @@ option(LLVM_ENABLE_EXPENSIVE_CHECKS "Enable expensive checks" OFF)
set(LLVM_ABI_BREAKING_CHECKS "WITH_ASSERTS" CACHE STRING
"Enable abi-breaking checks. Can be WITH_ASSERTS, FORCE_ON or FORCE_OFF.")
option(LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
"Disable abi-breaking checks mismatch detection at link-tim." OFF)
option(LLVM_FORCE_USE_OLD_HOST_TOOLCHAIN
"Set to ON to force using an old, unsupported host toolchain." OFF)
@@ -737,6 +716,30 @@ configure_file(
${LLVM_MAIN_INCLUDE_DIR}/llvm/Support/DataTypes.h.cmake
${LLVM_INCLUDE_DIR}/llvm/Support/DataTypes.h)
# Add target for generating source rpm package.
set(LLVM_SRPM_USER_BINARY_SPECFILE ${CMAKE_CURRENT_SOURCE_DIR}/llvm.spec.in
CACHE FILEPATH ".spec file to use for srpm generation")
set(LLVM_SRPM_BINARY_SPECFILE ${CMAKE_CURRENT_BINARY_DIR}/llvm.spec)
set(LLVM_SRPM_DIR "${CMAKE_CURRENT_BINARY_DIR}/srpm")
# SVN_REVISION and GIT_COMMIT get set by the call to add_version_info_from_vcs.
# DUMMY_VAR contains a version string which we don't care about.
add_version_info_from_vcs(DUMMY_VAR)
if ( SVN_REVISION )
set(LLVM_RPM_SPEC_REVISION "r${SVN_REVISION}")
elseif ( GIT_COMMIT )
set (LLVM_RPM_SPEC_REVISION "g${GIT_COMMIT}")
endif()
configure_file(
${LLVM_SRPM_USER_BINARY_SPECFILE}
${LLVM_SRPM_BINARY_SPECFILE} @ONLY)
add_custom_target(srpm
COMMAND cpack -G TGZ --config CPackSourceConfig.cmake -B ${LLVM_SRPM_DIR}/SOURCES
COMMAND rpmbuild -bs --define '_topdir ${LLVM_SRPM_DIR}' ${LLVM_SRPM_BINARY_SPECFILE})
# They are not referenced. See set_output_directory().
set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/bin )
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} )
@@ -878,7 +881,7 @@ if( LLVM_INCLUDE_TESTS )
endif()
add_subdirectory(test)
add_subdirectory(unittests)
if (MSVC)
if (WIN32)
# This utility is used to prevent crashing tests from calling Dr. Watson on
# Windows.
add_subdirectory(utils/KillTheDoctor)
@@ -978,3 +981,8 @@ if(LLVM_DISTRIBUTION_COMPONENTS)
endif()
endforeach()
endif()
# This allows us to deploy the Universal CRT DLLs by passing -DCMAKE_INSTALL_UCRT_LIBRARIES=ON to CMake
if (MSVC)
include(InstallRequiredSystemLibraries)
endif()

View File

@@ -5,12 +5,9 @@ what goes in or not.
The list is sorted by surname and formatted to allow easy grepping and
beautification by scripts. The fields are: name (N), email (E), web-address
(W), PGP key ID and fingerprint (P), description (D), and snail-mail address
(S). Each entry should contain at least the (N), (E) and (D) fields.
N: Joe Abbey
E: jabbey@arxan.com
D: LLVM Bitcode (lib/Bitcode/* include/llvm/Bitcode/*)
(W), PGP key ID and fingerprint (P), description (D), snail-mail address
(S) and (I) IRC handle. Each entry should contain at least the (N), (E) and
(D) fields.
N: Justin Bogner
E: mail@justinbogner.com
@@ -21,6 +18,11 @@ N: Alex Bradbury
E: asb@lowrisc.org
D: RISC-V backend (lib/Target/RISCV/*)
N: Matthias Braun
E: matze@braunis.de
I: MatzeB
D: Instruction Scheduling
N: Chandler Carruth
E: chandlerc@gmail.com
E: chandlerc@google.com
@@ -34,6 +36,10 @@ N: Eric Christopher
E: echristo@gmail.com
D: Debug Information, inline assembly
N: Andrey Churbanov
E: andrey.churbanov@intel.com
D: OpenMP runtime library
N: Greg Clayton
E: gclayton@apple.com
D: LLDB
@@ -48,7 +54,7 @@ D: libc++
N: Peter Collingbourne
E: peter@pcc.me.uk
D: llgo, libLTO (lib/LTO/* tools/lto/*)
D: llgo, libLTO (lib/LTO/* tools/lto/*), LLVM Bitcode (lib/Bitcode/* include/llvm/Bitcode/*)
N: Quentin Colombet
E: qcolombet@apple.com
@@ -96,7 +102,7 @@ D: MCJIT, RuntimeDyld and JIT event listeners, Orcish Warchief
N: Teresa Johnson
E: tejohnson@google.com
D: Gold plugin (tools/gold/*)
D: Gold plugin (tools/gold/*) and IR Linker
N: Galina Kistanova
E: gkistanova@gmail.com
@@ -132,7 +138,7 @@ E: david.majnemer@gmail.com
D: IR Constant Folder, InstCombine
N: Dylan McKay
E: dylanmckay34@gmail.com
E: me@dylanmckay.io
D: AVR Backend
N: Tim Northover
@@ -180,9 +186,8 @@ E: alexei.starovoitov@gmail.com
D: BPF backend
N: Tom Stellard
E: thomas.stellard@amd.com
E: mesa-dev@lists.freedesktop.org
D: Release manager for the 3.5 and 3.6 branches, R600 Backend, libclc
E: tstellar@redhat.com
D: Stable release management (x.y.[1-9] releases), AMDGPU Backend, libclc
N: Evgeniy Stepanov
E: eugenis@google.com
@@ -192,18 +197,10 @@ N: Craig Topper
E: craig.topper@gmail.com
D: X86 Backend
N: Andrew Trick
E: atrick@apple.com
D: Instruction Scheduling
N: Ulrich Weigand
E: uweigand@de.ibm.com
D: SystemZ Backend
N: Teresa Johnson
E: tejohnson@google.com
D: IR Linker
N: Hans Wennborg
E: hans@chromium.org
D: Release management (x.y.0 releases)
@@ -211,7 +208,3 @@ D: Release management (x.y.0 releases)
N: whitequark
E: whitequark@whitequark.org
D: OCaml bindings
N: Andrey Churbanov
E: andrey.churbanov@intel.com
D: OpenMP runtime library

View File

@@ -457,6 +457,10 @@ N: Adam Treat
E: manyoso@yahoo.com
D: C++ bugs filed, and C++ front-end bug fixes.
N: Andrew Trick
E: atrick@apple.com
D: Instruction Scheduling, ...
N: Lauro Ramos Venancio
E: lauro.venancio@indt.org.br
D: ARM backend improvements

View File

@@ -4,7 +4,7 @@ LLVM Release License
University of Illinois/NCSA
Open Source License
Copyright (c) 2003-2016 University of Illinois at Urbana-Champaign.
Copyright (c) 2003-2017 University of Illinois at Urbana-Champaign.
All rights reserved.
Developed by:

View File

@@ -15,3 +15,4 @@ documentation setup.
If you are writing a package for LLVM, see docs/Packaging.rst for our
suggestions.

View File

@@ -51,7 +51,7 @@ E: diana.picus@linaro.org
T: AArch64
O: Linux
N: Vasileios Kalintiris
E: Vasileios.Kalintiris@imgtec.com
N: Simon Dardis
E: simon.dardis@imgtec.com
T: MIPS
O: Linux

View File

@@ -40,8 +40,8 @@ LLVMMetadataRef LLVMDIBuilderCreateCompileUnit(LLVMDIBuilderRef Dref,
int Optimized, const char *Flags,
unsigned RuntimeVersion) {
DIBuilder *D = unwrap(Dref);
return wrap(D->createCompileUnit(Lang, File, Dir, Producer, Optimized, Flags,
RuntimeVersion));
return wrap(D->createCompileUnit(Lang, D->createFile(File, Dir), Producer,
Optimized, Flags, RuntimeVersion));
}
LLVMMetadataRef LLVMDIBuilderCreateFile(LLVMDIBuilderRef Dref, const char *File,
@@ -119,7 +119,8 @@ LLVMMetadataRef LLVMDIBuilderCreatePointerType(LLVMDIBuilderRef Dref,
const char *Name) {
DIBuilder *D = unwrap(Dref);
return wrap(D->createPointerType(unwrap<DIType>(PointeeType), SizeInBits,
AlignInBits, Name));
AlignInBits, /* DWARFAddressSpace */ None,
Name));
}
LLVMMetadataRef

View File

@@ -43,6 +43,26 @@ func (pmb PassManagerBuilder) PopulateFunc(pm PassManager) {
C.LLVMPassManagerBuilderPopulateFunctionPassManager(pmb.C, pm.C)
}
func (pmb PassManagerBuilder) PopulateLTOPassManager(pm PassManager, internalize bool, runInliner bool) {
C.LLVMPassManagerBuilderPopulateLTOPassManager(pmb.C, pm.C, boolToLLVMBool(internalize), boolToLLVMBool(runInliner))
}
func (pmb PassManagerBuilder) Dispose() {
C.LLVMPassManagerBuilderDispose(pmb.C)
}
func (pmb PassManagerBuilder) SetDisableUnitAtATime(val bool) {
C.LLVMPassManagerBuilderSetDisableUnitAtATime(pmb.C, boolToLLVMBool(val))
}
func (pmb PassManagerBuilder) SetDisableUnrollLoops(val bool) {
C.LLVMPassManagerBuilderSetDisableUnrollLoops(pmb.C, boolToLLVMBool(val))
}
func (pmb PassManagerBuilder) SetDisableSimplifyLibCalls(val bool) {
C.LLVMPassManagerBuilderSetDisableSimplifyLibCalls(pmb.C, boolToLLVMBool(val))
}
func (pmb PassManagerBuilder) UseInlinerWithThreshold(threshold uint) {
C.LLVMPassManagerBuilderUseInlinerWithThreshold(pmb.C, C.uint(threshold))
}

View File

@@ -88,6 +88,15 @@ if(APPLE)
HAVE_CRASHREPORTER_INFO)
endif()
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
check_include_file(linux/magic.h HAVE_LINUX_MAGIC_H)
if(NOT HAVE_LINUX_MAGIC_H)
# older kernels use split files
check_include_file(linux/nfs_fs.h HAVE_LINUX_NFS_FS_H)
check_include_file(linux/smb.h HAVE_LINUX_SMB_H)
endif()
endif()
# library checks
if( NOT PURE_WINDOWS )
check_library_exists(pthread pthread_create "" HAVE_LIBPTHREAD)
@@ -115,7 +124,7 @@ if(HAVE_LIBPTHREAD)
set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
set(THREADS_HAVE_PTHREAD_ARG Off)
find_package(Threads REQUIRED)
set(PTHREAD_LIB ${CMAKE_THREAD_LIBS_INIT})
set(LLVM_PTHREAD_LIB ${CMAKE_THREAD_LIBS_INIT})
endif()
# Don't look for these libraries on Windows. Also don't look for them if we're
@@ -167,7 +176,10 @@ check_symbol_exists(futimens sys/stat.h HAVE_FUTIMENS)
check_symbol_exists(futimes sys/time.h HAVE_FUTIMES)
check_symbol_exists(posix_fallocate fcntl.h HAVE_POSIX_FALLOCATE)
# AddressSanitizer conflicts with lib/Support/Unix/Signals.inc
if( HAVE_SIGNAL_H AND NOT LLVM_USE_SANITIZER MATCHES ".*Address.*")
# Avoid sigaltstack on Apple platforms, where backtrace() cannot handle it
# (rdar://7089625) and _Unwind_Backtrace is unusable because it cannot unwind
# past the signal handler after an assertion failure (rdar://29866587).
if( HAVE_SIGNAL_H AND NOT LLVM_USE_SANITIZER MATCHES ".*Address.*" AND NOT APPLE )
check_symbol_exists(sigaltstack signal.h HAVE_SIGALTSTACK)
endif()
if( HAVE_SYS_UIO_H )
@@ -224,6 +236,7 @@ if( HAVE_DLFCN_H )
list(APPEND CMAKE_REQUIRED_LIBRARIES dl)
endif()
check_symbol_exists(dlopen dlfcn.h HAVE_DLOPEN)
check_symbol_exists(dladdr dlfcn.h HAVE_DLADDR)
if( HAVE_LIBDL )
list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES dl)
endif()
@@ -233,6 +246,14 @@ check_symbol_exists(__GLIBC__ stdio.h LLVM_USING_GLIBC)
if( LLVM_USING_GLIBC )
add_llvm_definitions( -D_GNU_SOURCE )
endif()
# This check requires _GNU_SOURCE
if(HAVE_LIBPTHREAD)
check_library_exists(pthread pthread_getname_np "" HAVE_PTHREAD_GETNAME_NP)
check_library_exists(pthread pthread_setname_np "" HAVE_PTHREAD_SETNAME_NP)
elseif(PTHREAD_IN_LIBC)
check_library_exists(c pthread_getname_np "" HAVE_PTHREAD_GETNAME_NP)
check_library_exists(c pthread_setname_np "" HAVE_PTHREAD_SETNAME_NP)
endif()
set(headers "sys/types.h")
@@ -313,7 +334,9 @@ else()
endif()
endif()
check_cxx_compiler_flag("-Wno-variadic-macros" SUPPORTS_NO_VARIADIC_MACROS_FLAG)
check_cxx_compiler_flag("-Wvariadic-macros" SUPPORTS_VARIADIC_MACROS_FLAG)
check_cxx_compiler_flag("-Wgnu-zero-variadic-macro-arguments"
SUPPORTS_GNU_ZERO_VARIADIC_MACRO_ARGUMENTS_FLAG)
set(USE_NO_MAYBE_UNINITIALIZED 0)
set(USE_NO_UNINITIALIZED 0)
@@ -450,8 +473,15 @@ if( MSVC )
else()
set(HAVE_DIA_SDK 0)
endif()
option(LLVM_ENABLE_DIA_SDK "Use MSVC DIA SDK for debugging if available."
${HAVE_DIA_SDK})
if(LLVM_ENABLE_DIA_SDK AND NOT HAVE_DIA_SDK)
message(FATAL_ERROR "DIA SDK not found. If you have both VS 2012 and 2013 installed, you may need to uninstall the former and re-install the latter afterwards.")
endif()
else()
set(HAVE_DIA_SDK 0)
set(LLVM_ENABLE_DIA_SDK 0)
endif( MSVC )
# FIXME: Signal handler return type, currently hardcoded to 'void'
@@ -477,8 +507,6 @@ if (LLVM_ENABLE_ZLIB )
endif()
endif()
set(LLVM_PREFIX ${CMAKE_INSTALL_PREFIX})
if (LLVM_ENABLE_DOXYGEN)
message(STATUS "Doxygen enabled.")
find_package(Doxygen REQUIRED)
@@ -535,6 +563,9 @@ set(LLVM_BINUTILS_INCDIR "" CACHE PATH
"PATH to binutils/include containing plugin-api.h for gold plugin.")
if(CMAKE_HOST_APPLE AND APPLE)
if(NOT CMAKE_XCRUN)
find_program(CMAKE_XCRUN NAMES xcrun)
endif()
if(CMAKE_XCRUN)
execute_process(COMMAND ${CMAKE_XCRUN} -find ld
OUTPUT_VARIABLE LD64_EXECUTABLE

View File

@@ -462,11 +462,9 @@ function(llvm_add_library name)
if(UNIX AND NOT APPLE AND NOT ARG_SONAME)
set_target_properties(${name}
PROPERTIES
# Concatenate the version numbers since ldconfig expects exactly
# one component indicating the ABI version, while LLVM uses
# major+minor for that.
SOVERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR}
VERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX})
# Since 4.0.0, the ABI version is indicated by the major version
SOVERSION ${LLVM_VERSION_MAJOR}
VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX})
endif()
endif()
@@ -720,11 +718,11 @@ macro(add_llvm_executable name)
if(NOT ARG_IGNORE_EXTERNALIZE_DEBUGINFO)
llvm_externalize_debuginfo(${name})
endif()
if (PTHREAD_LIB)
if (LLVM_PTHREAD_LIB)
# libpthreads overrides some standard library symbols, so main
# executable must be linked with it in order to provide consistent
# API for all shared libaries loaded by this executable.
target_link_libraries(${name} ${PTHREAD_LIB})
target_link_libraries(${name} ${LLVM_PTHREAD_LIB})
endif()
endmacro(add_llvm_executable name)
@@ -1007,13 +1005,18 @@ function(add_unittest test_suite test_name)
endif()
include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include)
include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googlemock/include)
if (NOT LLVM_ENABLE_THREADS)
list(APPEND LLVM_COMPILE_DEFINITIONS GTEST_HAS_PTHREAD=0)
endif ()
if (SUPPORTS_NO_VARIADIC_MACROS_FLAG)
if (SUPPORTS_VARIADIC_MACROS_FLAG)
list(APPEND LLVM_COMPILE_FLAGS "-Wno-variadic-macros")
endif ()
# Some parts of gtest rely on this GNU extension, don't warn on it.
if(SUPPORTS_GNU_ZERO_VARIADIC_MACRO_ARGUMENTS_FLAG)
list(APPEND LLVM_COMPILE_FLAGS "-Wno-gnu-zero-variadic-macro-arguments")
endif()
set(LLVM_REQUIRES_RTTI OFF)
@@ -1024,7 +1027,7 @@ function(add_unittest test_suite test_name)
# libpthreads overrides some standard library symbols, so main
# executable must be linked with it in order to provide consistent
# API for all shared libaries loaded by this executable.
target_link_libraries(${test_name} gtest_main gtest ${PTHREAD_LIB})
target_link_libraries(${test_name} gtest_main gtest ${LLVM_PTHREAD_LIB})
add_dependencies(${test_suite} ${test_name})
get_target_property(test_suite_folder ${test_suite} FOLDER)
@@ -1063,6 +1066,19 @@ function(llvm_add_go_executable binary pkgpath)
endif()
endfunction()
# This function canonicalize the CMake variables passed by names
# from CMake boolean to 0/1 suitable for passing into Python or C++,
# in place.
function(llvm_canonicalize_cmake_booleans)
foreach(var ${ARGN})
if(${var})
set(${var} 1 PARENT_SCOPE)
else()
set(${var} 0 PARENT_SCOPE)
endif()
endforeach()
endfunction(llvm_canonicalize_cmake_booleans)
# This function provides an automatic way to 'configure'-like generate a file
# based on a set of common and custom variables, specifically targeting the
# variables needed for the 'lit.site.cfg' files. This function bundles the
@@ -1290,6 +1306,8 @@ endfunction()
function(add_llvm_tool_symlink link_name target)
cmake_parse_arguments(ARG "ALWAYS_GENERATE" "OUTPUT_DIR" "" ${ARGN})
set(dest_binary "$<TARGET_FILE:${target}>")
# This got a bit gross... For multi-configuration generators the target
# properties return the resolved value of the string, not the build system
# expression. To reconstruct the platform-agnostic path we have to do some
@@ -1298,6 +1316,11 @@ function(add_llvm_tool_symlink link_name target)
# and replace it with CMAKE_CFG_INTDIR. This allows the build step to be type
# agnostic again.
if(NOT ARG_OUTPUT_DIR)
# If you're not overriding the OUTPUT_DIR, we can make the link relative in
# the same directory.
if(UNIX)
set(dest_binary "$<TARGET_FILE_NAME:${target}>")
endif()
if(CMAKE_CONFIGURATION_TYPES)
list(GET CMAKE_CONFIGURATION_TYPES 0 first_type)
string(TOUPPER ${first_type} first_type_upper)
@@ -1323,10 +1346,8 @@ function(add_llvm_tool_symlink link_name target)
if(UNIX)
set(LLVM_LINK_OR_COPY create_symlink)
set(dest_binary "$<TARGET_FILE_NAME:${target}>")
else()
set(LLVM_LINK_OR_COPY copy)
set(dest_binary "$<TARGET_FILE:${target}>")
endif()
set(output_path "${ARG_OUTPUT_DIR}/${link_name}${CMAKE_EXECUTABLE_SUFFIX}")
@@ -1366,7 +1387,11 @@ function(llvm_externalize_debuginfo name)
endif()
if(NOT LLVM_EXTERNALIZE_DEBUGINFO_SKIP_STRIP)
set(strip_command COMMAND xcrun strip -Sxl $<TARGET_FILE:${name}>)
if(APPLE)
set(strip_command COMMAND xcrun strip -Sxl $<TARGET_FILE:${name}>)
else()
set(strip_command COMMAND strip -gx $<TARGET_FILE:${name}>)
endif()
endif()
if(APPLE)
@@ -1382,7 +1407,11 @@ function(llvm_externalize_debuginfo name)
${strip_command}
)
else()
message(FATAL_ERROR "LLVM_EXTERNALIZE_DEBUGINFO isn't implemented for non-darwin platforms!")
add_custom_command(TARGET ${name} POST_BUILD
COMMAND objcopy --only-keep-debug $<TARGET_FILE:${name}> $<TARGET_FILE:${name}>.debug
${strip_command} -R .gnu_debuglink
COMMAND objcopy --add-gnu-debuglink=$<TARGET_FILE:${name}>.debug $<TARGET_FILE:${name}>
)
endif()
endfunction()

View File

@@ -1,5 +1,5 @@
# Check if the host compiler is new enough. LLVM requires at least GCC 4.8,
# MSVC 2013, or Clang 3.1.
# MSVC 2015 (Update 3), or Clang 3.1.
include(CheckCXXSourceCompiles)
@@ -17,8 +17,8 @@ if(NOT DEFINED LLVM_COMPILER_CHECKED)
endif()
if (CMAKE_CXX_SIMULATE_ID MATCHES "MSVC")
if (CMAKE_CXX_SIMULATE_VERSION VERSION_LESS 18.0)
message(FATAL_ERROR "Host Clang must have at least -fms-compatibility-version=18.0")
if (CMAKE_CXX_SIMULATE_VERSION VERSION_LESS 19.0)
message(FATAL_ERROR "Host Clang must have at least -fms-compatibility-version=19.0")
endif()
set(CLANG_CL 1)
elseif(NOT LLVM_ENABLE_LIBCXX)
@@ -41,10 +41,10 @@ int main() { return (float)x; }"
set(CMAKE_REQUIRED_LIBRARIES ${OLD_CMAKE_REQUIRED_LIBRARIES})
endif()
elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 18.0)
message(FATAL_ERROR "Host Visual Studio must be at least 2013")
elseif(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 18.0.31101)
message(WARNING "Host Visual Studio should at least be 2013 Update 4 (MSVC 18.0.31101)"
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.0)
message(FATAL_ERROR "Host Visual Studio must be at least 2015")
elseif(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.00.24213.1)
message(WARNING "Host Visual Studio should at least be 2015 Update 3 (MSVC 19.00.24213.1)"
" due to miscompiles from earlier versions")
endif()
endif()

View File

@@ -0,0 +1,8 @@
include(CheckCXXCompilerFlag)
function(check_linker_flag flag out_var)
set(OLD_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}")
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${flag}")
check_cxx_compiler_flag("" ${out_var})
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
endfunction()

View File

@@ -7,5 +7,7 @@ if(NOT DEFINED LLVM_COMPILER_IS_GCC_COMPATIBLE)
set(LLVM_COMPILER_IS_GCC_COMPATIBLE OFF)
elseif( "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON)
elseif( "${CMAKE_CXX_COMPILER_ID}" MATCHES "Intel" )
set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON)
endif()
endif()

View File

@@ -12,8 +12,38 @@ include(AddLLVMDefinitions)
include(CheckCCompilerFlag)
include(CheckCXXCompilerFlag)
if(CMAKE_LINKER MATCHES "lld-link.exe" OR (WIN32 AND LLVM_USE_LINKER STREQUAL "lld"))
set(LINKER_IS_LLD_LINK TRUE)
else()
set(LINKER_IS_LLD_LINK FALSE)
endif()
if (CMAKE_LINKER MATCHES "lld-link.exe")
# Ninja Job Pool support
# The following only works with the Ninja generator in CMake >= 3.0.
set(LLVM_PARALLEL_COMPILE_JOBS "" CACHE STRING
"Define the maximum number of concurrent compilation jobs.")
if(LLVM_PARALLEL_COMPILE_JOBS)
if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja")
message(WARNING "Job pooling is only available with Ninja generators.")
else()
set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${LLVM_PARALLEL_COMPILE_JOBS})
set(CMAKE_JOB_POOL_COMPILE compile_job_pool)
endif()
endif()
set(LLVM_PARALLEL_LINK_JOBS "" CACHE STRING
"Define the maximum number of concurrent link jobs.")
if(LLVM_PARALLEL_LINK_JOBS)
if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja")
message(WARNING "Job pooling is only available with Ninja generators.")
else()
set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${LLVM_PARALLEL_LINK_JOBS})
set(CMAKE_JOB_POOL_LINK link_job_pool)
endif()
endif()
if (LINKER_IS_LLD_LINK)
# Pass /MANIFEST:NO so that CMake doesn't run mt.exe on our binaries. Adding
# manifests with mt.exe breaks LLD's symbol tables and takes as much time as
# the link. See PR24476.
@@ -147,9 +177,19 @@ function(add_flag_or_print_warning flag name)
endif()
endfunction()
if(LLVM_ENABLE_LLD)
check_cxx_compiler_flag("-fuse-ld=lld" CXX_SUPPORTS_LLD)
append_if(CXX_SUPPORTS_LLD "-fuse-ld=lld"
if( LLVM_ENABLE_LLD )
if ( LLVM_USE_LINKER )
message(FATAL_ERROR "LLVM_ENABLE_LLD and LLVM_USE_LINKER can't be set at the same time")
endif()
set(LLVM_USE_LINKER "lld")
endif()
if( LLVM_USE_LINKER )
check_cxx_compiler_flag("-fuse-ld=${LLVM_USE_LINKER}" CXX_SUPPORTS_CUSTOM_LINKER)
if ( NOT CXX_SUPPORTS_CUSTOM_LINKER )
message(FATAL_ERROR "Host compiler does not support '-fuse-ld=${LLVM_USE_LINKER}'")
endif()
append("-fuse-ld=${LLVM_USE_LINKER}"
CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
endif()
@@ -232,6 +272,13 @@ if(MSVC)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /STACK:10000000")
elseif(MINGW) # FIXME: Also cygwin?
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--stack,16777216")
# Pass -mbig-obj to mingw gas on Win64. COFF has a 2**16 section limit, and
# on Win64, every COMDAT function creates at least 3 sections: .text, .pdata,
# and .xdata.
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
append("-Wa,-mbig-obj" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
endif()
if( MSVC )
@@ -366,11 +413,13 @@ if( MSVC )
# "Enforce type conversion rules".
append("/Zc:rvalueCast" CMAKE_CXX_FLAGS)
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT LLVM_ENABLE_LTO)
# clang-cl and cl by default produce non-deterministic binaries because
# link.exe /incremental requires a timestamp in the .obj file. clang-cl
# has the flag /Brepro to force deterministic binaries. We want to pass that
# whenever you're building with clang unless you're passing /incremental.
# whenever you're building with clang unless you're passing /incremental
# or using LTO (/Brepro with LTO would result in a warning about the flag
# being unused, because we're not generating object files).
# This checks CMAKE_CXX_COMPILER_ID in addition to check_cxx_compiler_flag()
# because cl.exe does not emit an error on flags it doesn't understand,
# letting check_cxx_compiler_flag() claim it understands all flags.
@@ -393,11 +442,6 @@ if( MSVC )
endif()
endif()
# Disable sized deallocation if the flag is supported. MSVC fails to compile
# the operator new overload in User otherwise.
check_c_compiler_flag("/WX /Zc:sizedDealloc-" SUPPORTS_SIZED_DEALLOC)
append_if(SUPPORTS_SIZED_DEALLOC "/Zc:sizedDealloc-" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE )
if (LLVM_ENABLE_WARNINGS)
append("-Wall -W -Wno-unused-parameter -Wwrite-strings" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
@@ -452,6 +496,9 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE )
if (NOT C_WCOMMENT_ALLOWS_LINE_WRAP)
append("-Wno-comment" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
# Enable -Wstring-conversion to catch misuse of string literals.
add_flag_if_supported("-Wstring-conversion" STRING_CONVERSION_FLAG)
endif (LLVM_ENABLE_WARNINGS)
append_if(LLVM_ENABLE_WERROR "-Werror" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
add_flag_if_supported("-Werror=date-time" WERROR_DATE_TIME)
@@ -522,7 +569,7 @@ macro(append_common_sanitizer_flags)
elseif (CLANG_CL)
# Keep frame pointers around.
append("/Oy-" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
if (CMAKE_LINKER MATCHES "lld-link.exe")
if (LINKER_IS_LLD_LINK)
# Use DWARF debug info with LLD.
append("-gdwarf" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
else()
@@ -550,6 +597,11 @@ if(LLVM_USE_SANITIZER)
append_common_sanitizer_flags()
append("-fsanitize=undefined -fno-sanitize=vptr,function -fno-sanitize-recover=all"
CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
set(BLACKLIST_FILE "${CMAKE_SOURCE_DIR}/utils/sanitizers/ubsan_blacklist.txt")
if (EXISTS "${BLACKLIST_FILE}")
append("-fsanitize-blacklist=${BLACKLIST_FILE}"
CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
elseif (LLVM_USE_SANITIZER STREQUAL "Thread")
append_common_sanitizer_flags()
append("-fsanitize=thread" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
@@ -571,8 +623,12 @@ if(LLVM_USE_SANITIZER)
else()
message(FATAL_ERROR "LLVM_USE_SANITIZER is not supported on this platform.")
endif()
if (LLVM_USE_SANITIZER MATCHES "(Undefined;)?Address(;Undefined)?")
add_flag_if_supported("-fsanitize-address-use-after-scope"
FSANITIZE_USE_AFTER_SCOPE_FLAG)
endif()
if (LLVM_USE_SANITIZE_COVERAGE)
append("-fsanitize-coverage=edge,indirect-calls,8bit-counters,trace-cmp" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
append("-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
endif()
@@ -592,6 +648,14 @@ if (UNIX AND
append("-fcolor-diagnostics" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
# lld doesn't print colored diagnostics when invoked from Ninja
if (UNIX AND CMAKE_GENERATOR STREQUAL "Ninja")
include(CheckLinkerFlag)
check_linker_flag("-Wl,-color-diagnostics" LINKER_SUPPORTS_COLOR_DIAGNOSTICS)
append_if(LINKER_SUPPORTS_COLOR_DIAGNOSTICS "-Wl,-color-diagnostics"
CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
endif()
# Add flags for add_dead_strip().
# FIXME: With MSVS, consider compiling with /Gy and linking with /OPT:REF?
# But MinSizeRel seems to add that automatically, so maybe disable these
@@ -643,20 +707,29 @@ append_if(LLVM_BUILD_INSTRUMENTED_COVERAGE "-fprofile-instr-generate='${LLVM_PRO
set(LLVM_ENABLE_LTO OFF CACHE STRING "Build LLVM with LTO. May be specified as Thin or Full to use a particular kind of LTO")
string(TOUPPER "${LLVM_ENABLE_LTO}" uppercase_LLVM_ENABLE_LTO)
if(LLVM_ENABLE_LTO AND LLVM_ON_WIN32 AND NOT LINKER_IS_LLD_LINK)
message(FATAL_ERROR "When compiling for Windows, LLVM_ENABLE_LTO requires using lld as the linker (point CMAKE_LINKER at lld-link.exe)")
endif()
if(uppercase_LLVM_ENABLE_LTO STREQUAL "THIN")
append("-flto=thin" CMAKE_CXX_FLAGS CMAKE_C_FLAGS
CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
append("-flto=thin" CMAKE_CXX_FLAGS CMAKE_C_FLAGS)
if(NOT LINKER_IS_LLD_LINK)
append("-flto=thin" CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
endif()
# On darwin, enable the lto cache. This improves initial build time a little
# since we re-link a lot of the same objects, and significantly improves
# incremental build time.
append_if(APPLE "-Wl,-cache_path_lto,${PROJECT_BINARY_DIR}/lto.cache"
CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
elseif(uppercase_LLVM_ENABLE_LTO STREQUAL "FULL")
append("-flto=full" CMAKE_CXX_FLAGS CMAKE_C_FLAGS
CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
append("-flto=full" CMAKE_CXX_FLAGS CMAKE_C_FLAGS)
if(NOT LINKER_IS_LLD_LINK)
append("-flto=full" CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
endif()
elseif(LLVM_ENABLE_LTO)
append("-flto" CMAKE_CXX_FLAGS CMAKE_C_FLAGS
CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
append("-flto" CMAKE_CXX_FLAGS CMAKE_C_FLAGS)
if(NOT LINKER_IS_LLD_LINK)
append("-flto" CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
endif()
endif()
# This option makes utils/extract_symbols.py be used to determine the list of

View File

@@ -8,27 +8,61 @@ function(link_system_libs target)
message(AUTHOR_WARNING "link_system_libs no longer needed")
endfunction()
# is_llvm_target_library(
# library
# Name of the LLVM library to check
# return_var
# Output variable name
# ALL_TARGETS;INCLUDED_TARGETS;OMITTED_TARGETS
# ALL_TARGETS - default looks at the full list of known targets
# INCLUDED_TARGETS - looks only at targets being configured
# OMITTED_TARGETS - looks only at targets that are not being configured
# )
function(is_llvm_target_library library return_var)
cmake_parse_arguments(ARG "ALL_TARGETS;INCLUDED_TARGETS;OMITTED_TARGETS" "" "" ${ARGN})
# Sets variable `return_var' to ON if `library' corresponds to a
# LLVM supported target. To OFF if it doesn't.
set(${return_var} OFF PARENT_SCOPE)
string(TOUPPER "${library}" capitalized_lib)
string(TOUPPER "${LLVM_ALL_TARGETS}" targets)
if(ARG_INCLUDED_TARGETS)
string(TOUPPER "${LLVM_TARGETS_TO_BUILD}" targets)
elseif(ARG_OMITTED_TARGETS)
set(omitted_targets ${LLVM_ALL_TARGETS})
list(REMOVE_ITEM omitted_targets ${LLVM_TARGETS_TO_BUILD})
string(TOUPPER "${omitted_targets}" targets)
else()
string(TOUPPER "${LLVM_ALL_TARGETS}" targets)
endif()
foreach(t ${targets})
if( capitalized_lib STREQUAL t OR
capitalized_lib STREQUAL "LLVM${t}" OR
capitalized_lib STREQUAL "LLVM${t}CODEGEN" OR
capitalized_lib STREQUAL "LLVM${t}ASMPARSER" OR
capitalized_lib STREQUAL "LLVM${t}ASMPRINTER" OR
capitalized_lib STREQUAL "LLVM${t}DISASSEMBLER" OR
capitalized_lib STREQUAL "LLVM${t}INFO" )
capitalized_lib STREQUAL "${t}" OR
capitalized_lib STREQUAL "${t}DESC" OR
capitalized_lib STREQUAL "${t}CODEGEN" OR
capitalized_lib STREQUAL "${t}ASMPARSER" OR
capitalized_lib STREQUAL "${t}ASMPRINTER" OR
capitalized_lib STREQUAL "${t}DISASSEMBLER" OR
capitalized_lib STREQUAL "${t}INFO" OR
capitalized_lib STREQUAL "${t}UTILS" )
set(${return_var} ON PARENT_SCOPE)
break()
endif()
endforeach()
endfunction(is_llvm_target_library)
function(is_llvm_target_specifier library return_var)
is_llvm_target_library(${library} ${return_var} ${ARGN})
string(TOUPPER "${library}" capitalized_lib)
if(NOT ${return_var})
if( capitalized_lib STREQUAL "ALLTARGETSASMPARSERS" OR
capitalized_lib STREQUAL "ALLTARGETSDESCS" OR
capitalized_lib STREQUAL "ALLTARGETSDISASSEMBLERS" OR
capitalized_lib STREQUAL "ALLTARGETSINFOS" OR
capitalized_lib STREQUAL "NATIVE" OR
capitalized_lib STREQUAL "NATIVECODEGEN" )
set(${return_var} ON PARENT_SCOPE)
endif()
endif()
endfunction()
macro(llvm_config executable)
cmake_parse_arguments(ARG "USE_SHARED" "" "" ${ARGN})
@@ -93,6 +127,21 @@ function(llvm_map_components_to_libnames out_libs)
endif()
string(TOUPPER "${LLVM_AVAILABLE_LIBS}" capitalized_libs)
get_property(LLVM_TARGETS_CONFIGURED GLOBAL PROPERTY LLVM_TARGETS_CONFIGURED)
# Generally in our build system we avoid order-dependence. Unfortunately since
# not all targets create the same set of libraries we actually need to ensure
# that all build targets associated with a target are added before we can
# process target dependencies.
if(NOT LLVM_TARGETS_CONFIGURED)
foreach(c ${link_components})
is_llvm_target_specifier(${c} iltl_result ALL_TARGETS)
if(iltl_result)
message(FATAL_ERROR "Specified target library before target registration is complete.")
endif()
endforeach()
endif()
# Expand some keywords:
list(FIND LLVM_TARGETS_TO_BUILD "${LLVM_NATIVE_ARCH}" have_native_backend)
list(FIND link_components "engine" engine_required)
@@ -141,6 +190,12 @@ function(llvm_map_components_to_libnames out_libs)
if( TARGET LLVM${c}Disassembler )
list(APPEND expanded_components "LLVM${c}Disassembler")
endif()
if( TARGET LLVM${c}Info )
list(APPEND expanded_components "LLVM${c}Info")
endif()
if( TARGET LLVM${c}Utils )
list(APPEND expanded_components "LLVM${c}Utils")
endif()
elseif( c STREQUAL "native" )
# already processed
elseif( c STREQUAL "nativecodegen" )
@@ -198,9 +253,16 @@ function(llvm_map_components_to_libnames out_libs)
list(FIND capitalized_libs LLVM${capitalized} lib_idx)
if( lib_idx LESS 0 )
# The component is unknown. Maybe is an omitted target?
is_llvm_target_library(${c} iltl_result)
if( NOT iltl_result )
message(FATAL_ERROR "Library `${c}' not found in list of llvm libraries.")
is_llvm_target_library(${c} iltl_result OMITTED_TARGETS)
if(iltl_result)
# A missing library to a directly referenced omitted target would be bad.
message(FATAL_ERROR "Library '${c}' is a direct reference to a target library for an omitted target.")
else()
# If it is not an omitted target we should assume it is a component
# that hasn't yet been processed by CMake. Missing components will
# cause errors later in the configuration, so we can safely assume
# that this is valid here.
list(APPEND expanded_components LLVM${c})
endif()
else( lib_idx LESS 0 )
list(GET LLVM_AVAILABLE_LIBS ${lib_idx} canonical_lib)

View File

@@ -37,12 +37,18 @@ set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS@)
set(LLVM_ENABLE_ZLIB @LLVM_ENABLE_ZLIB@)
set(LLVM_ENABLE_DIA_SDK @LLVM_ENABLE_DIA_SDK@)
set(LLVM_NATIVE_ARCH @LLVM_NATIVE_ARCH@)
set(LLVM_ENABLE_PIC @LLVM_ENABLE_PIC@)
set(LLVM_BUILD_32_BITS @LLVM_BUILD_32_BITS@)
if (NOT "@LLVM_PTHREAD_LIB@" STREQUAL "")
set(LLVM_PTHREAD_LIB "@LLVM_PTHREAD_LIB@")
endif()
set(LLVM_ENABLE_PLUGINS @LLVM_ENABLE_PLUGINS@)
set(LLVM_EXPORT_SYMBOLS_FOR_PLUGINS @LLVM_EXPORT_SYMBOLS_FOR_PLUGINS@)
set(LLVM_PLUGIN_EXT @LLVM_PLUGIN_EXT@)
@@ -73,4 +79,5 @@ if(NOT TARGET LLVMSupport)
@llvm_config_include_buildtree_only_exports@
endif()
set_property(GLOBAL PROPERTY LLVM_TARGETS_CONFIGURED On)
include(${LLVM_CMAKE_DIR}/LLVM-Config.cmake)

View File

@@ -45,6 +45,9 @@ function(llvm_ExternalProject_Add name source_dir)
canonicalize_tool_name(${name} nameCanon)
if(NOT ARG_TOOLCHAIN_TOOLS)
set(ARG_TOOLCHAIN_TOOLS clang lld)
if(NOT APPLE AND NOT WIN32)
list(APPEND ARG_TOOLCHAIN_TOOLS llvm-ar llvm-ranlib)
endif()
endif()
foreach(tool ${ARG_TOOLCHAIN_TOOLS})
if(TARGET ${tool})
@@ -104,6 +107,12 @@ function(llvm_ExternalProject_Add name source_dir)
set(compiler_args -DCMAKE_C_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang
-DCMAKE_CXX_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++)
endif()
if(llvm-ar IN_LIST TOOLCHAIN_TOOLS)
list(APPEND compiler_args -DCMAKE_AR=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-ar)
endif()
if(llvm-ranlib IN_LIST TOOLCHAIN_TOOLS)
list(APPEND compiler_args -DCMAKE_RANLIB=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-ranlib)
endif()
list(APPEND ARG_DEPENDS ${TOOLCHAIN_TOOLS})
endif()

View File

@@ -23,6 +23,13 @@ function(tablegen project ofn)
set(LLVM_TARGET_DEFINITIONS_ABSOLUTE
${CMAKE_CURRENT_SOURCE_DIR}/${LLVM_TARGET_DEFINITIONS})
endif()
if (LLVM_ENABLE_DAGISEL_COV)
list(FIND ARGN "-gen-dag-isel" idx)
if( NOT idx EQUAL -1 )
list(APPEND LLVM_TABLEGEN_FLAGS "-instrument-coverage")
endif()
endif()
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
# Generate tablegen output in a temporary file.
COMMAND ${${project}_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR}
@@ -92,7 +99,7 @@ macro(add_tablegen target project)
set(LLVM_ENABLE_OBJLIB ON)
endif()
add_llvm_utility(${target} ${ARGN})
add_llvm_executable(${target} DISABLE_LLVM_LINK_LLVM_DYLIB ${ARGN})
set(LLVM_LINK_COMPONENTS ${${target}_OLD_LLVM_LINK_COMPONENTS})
set(${project}_TABLEGEN "${target}" CACHE

View File

@@ -28,10 +28,11 @@ function(add_version_info_from_vcs VERS)
elseif( EXISTS ${SOURCE_DIR}/.git )
set(result "${result}git")
# Try to get a ref-id
if( EXISTS ${SOURCE_DIR}/.git/svn )
find_program(git_executable NAMES git git.exe git.cmd)
if( git_executable )
set(is_git_svn_rev_exact false)
find_program(git_executable NAMES git git.exe git.cmd)
if( git_executable )
if( EXISTS ${SOURCE_DIR}/.git/svn )
# Get the repository URL
execute_process(COMMAND
${git_executable} svn info
WORKING_DIRECTORY ${SOURCE_DIR}
@@ -43,42 +44,37 @@ function(add_version_info_from_vcs VERS)
if(svn_url)
set(LLVM_REPOSITORY ${CMAKE_MATCH_1} PARENT_SCOPE)
endif()
string(REGEX REPLACE "^(.*\n)?Revision: ([^\n]+).*"
"\\2" git_svn_rev_number "${git_output}")
set(SVN_REVISION ${git_svn_rev_number} PARENT_SCOPE)
set(git_svn_rev "-svn-${git_svn_rev}")
# Determine if the HEAD points directly at a subversion revision.
execute_process(COMMAND ${git_executable} svn find-rev HEAD
WORKING_DIRECTORY ${SOURCE_DIR}
TIMEOUT 5
RESULT_VARIABLE git_result
OUTPUT_VARIABLE git_output)
if( git_result EQUAL 0 )
string(STRIP "${git_output}" git_head_svn_rev_number)
if( git_head_svn_rev_number EQUAL git_svn_rev_number )
set(is_git_svn_rev_exact true)
endif()
endif()
else()
set(git_svn_rev "")
endif()
execute_process(COMMAND
${git_executable} rev-parse --short HEAD
# Get the svn revision number for this git commit if one exists.
execute_process(COMMAND ${git_executable} svn find-rev HEAD
WORKING_DIRECTORY ${SOURCE_DIR}
TIMEOUT 5
RESULT_VARIABLE git_result
OUTPUT_VARIABLE git_output)
if( git_result EQUAL 0 AND NOT is_git_svn_rev_exact )
string(STRIP "${git_output}" git_ref_id)
set(GIT_COMMIT ${git_ref_id} PARENT_SCOPE)
set(result "${result}${git_svn_rev}-${git_ref_id}")
OUTPUT_VARIABLE git_head_svn_rev_number
OUTPUT_STRIP_TRAILING_WHITESPACE)
if( git_result EQUAL 0 AND git_output)
set(SVN_REVISION ${git_head_svn_rev_number} PARENT_SCOPE)
set(git_svn_rev "-svn-${git_head_svn_rev_number}")
else()
set(result "${result}${git_svn_rev}")
set(git_svn_rev "")
endif()
endif()
# Get the git ref id
execute_process(COMMAND
${git_executable} rev-parse --short HEAD
WORKING_DIRECTORY ${SOURCE_DIR}
TIMEOUT 5
RESULT_VARIABLE git_result
OUTPUT_VARIABLE git_ref_id
OUTPUT_STRIP_TRAILING_WHITESPACE)
if( git_result EQUAL 0 )
set(GIT_COMMIT ${git_ref_id} PARENT_SCOPE)
set(result "${result}${git_svn_rev}-${git_ref_id}")
else()
set(result "${result}${git_svn_rev}")
endif()
endif()
endif()

View File

@@ -4,6 +4,7 @@ SET(CMAKE_SYSTEM_NAME Darwin)
SET(CMAKE_SYSTEM_VERSION 13)
SET(CMAKE_CXX_COMPILER_WORKS True)
SET(CMAKE_C_COMPILER_WORKS True)
SET(IOS True)
if(NOT CMAKE_OSX_SYSROOT)
execute_process(COMMAND xcodebuild -version -sdk iphoneos Path

View File

@@ -19,20 +19,73 @@ Address Spaces
The AMDGPU back-end uses the following address space mapping:
============= ============================================
Address Space Memory Space
============= ============================================
0 Private
1 Global
2 Constant
3 Local
4 Generic (Flat)
5 Region
============= ============================================
================== =================== ==============
LLVM Address Space DWARF Address Space Memory Space
================== =================== ==============
0 1 Private
1 N/A Global
2 N/A Constant
3 2 Local
4 N/A Generic (Flat)
5 N/A Region
================== =================== ==============
The terminology in the table, aside from the region memory space, is from the
OpenCL standard.
LLVM Address Space is used throughout LLVM (for example, in LLVM IR). DWARF
Address Space is emitted in DWARF, and is used by tools, such as debugger,
profiler and others.
Trap Handler ABI
----------------
The OS element of the target triple controls the trap handler behavior.
HSA OS
^^^^^^
For code objects generated by AMDGPU back-end for the HSA OS, the runtime
installs a trap handler that supports the s_trap instruction with the following
usage:
+--------------+-------------+-------------------+----------------------------+
|Usage |Code Sequence|Trap Handler Inputs|Description |
+==============+=============+===================+============================+
|reserved |s_trap 0x00 | |Reserved by hardware. |
+--------------+-------------+-------------------+----------------------------+
|HSA debugtrap |s_trap 0x01 |SGPR0-1: queue_ptr |Reserved for HSA debugtrap |
|(arg) | |VGPR0: arg |intrinsic (not implemented).|
+--------------+-------------+-------------------+----------------------------+
|llvm.trap |s_trap 0x02 |SGPR0-1: queue_ptr |Causes dispatch to be |
| | | |terminated and its |
| | | |associated queue put into |
| | | |the error state. |
+--------------+-------------+-------------------+----------------------------+
|llvm.debugtrap| s_trap 0x03 |SGPR0-1: queue_ptr |If debugger not installed |
| | | |handled same as llvm.trap. |
+--------------+-------------+-------------------+----------------------------+
|debugger |s_trap 0x07 | |Reserved for debugger |
|breakpoint | | |breakpoints. |
+--------------+-------------+-------------------+----------------------------+
|debugger |s_trap 0x08 | |Reserved for debugger. |
+--------------+-------------+-------------------+----------------------------+
|debugger |s_trap 0xfe | |Reserved for debugger. |
+--------------+-------------+-------------------+----------------------------+
|debugger |s_trap 0xff | |Reserved for debugger. |
+--------------+-------------+-------------------+----------------------------+
Non-HSA OS
^^^^^^^^^^
For code objects generated by AMDGPU back-end for non-HSA OS, the runtime does
not install a trap handler. The llvm.trap and llvm.debugtrap instructions are
handler as follows:
=============== ============= ===============================================
Usage Code Sequence Description
=============== ============= ===============================================
llvm.trap s_endpgm Causes wavefront to be terminated.
llvm.debugtrap s_nop No operation. Compiler warning generated that
there is no trap handler installed.
=============== ============= ===============================================
Assembler
=========
@@ -204,7 +257,7 @@ SOPP Instruction Examples
For full list of supported instructions, refer to "SOPP Instructions" in ISA Manual.
Unless otherwise mentioned, little verification is performed on the operands
of SOPP Instrucitons, so it is up to the programmer to be familiar with the
of SOPP Instructions, so it is up to the programmer to be familiar with the
range or acceptable values.
Vector ALU Instruction Examples

View File

@@ -123,11 +123,11 @@ To allow comparing different functions during inter-procedural analysis and
optimization, ``MD_prof`` nodes can also be assigned to a function definition.
The first operand is a string indicating the name of the associated counter.
Currently, one counter is supported: "function_entry_count". This is a 64-bit
counter that indicates the number of times that this function was invoked (in
the case of instrumentation-based profiles). In the case of sampling-based
profiles, this counter is an approximation of how many times the function was
invoked.
Currently, one counter is supported: "function_entry_count". The second operand
is a 64-bit counter that indicates the number of times that this function was
invoked (in the case of instrumentation-based profiles). In the case of
sampling-based profiles, this operand is an approximation of how many times
the function was invoked.
For example, in the code below, the instrumentation for function foo()
indicates that it was called 2,590 times at runtime.
@@ -138,3 +138,13 @@ indicates that it was called 2,590 times at runtime.
ret i32 0
}
!1 = !{!"function_entry_count", i64 2590}
If "function_entry_count" has more than 2 operands, the later operands are
the GUID of the functions that needs to be imported by ThinLTO. This is only
set by sampling based profile. It is needed because the sampling based profile
was collected on a binary that had already imported and inlined these functions,
and we need to ensure the IR matches in the ThinLTO backends for profile
annotation. The reason why we cannot annotate this on the callsite is that it
can only goes down 1 level in the call chain. For the cases where
foo_in_a_cc()->bar_in_b_cc()->baz_in_c_cc(), we will need to go down 2 levels
in the call chain to import both bar_in_b_cc and baz_in_c_cc.

View File

@@ -186,6 +186,8 @@ CMake manual, or execute ``cmake --help-variable VARIABLE_NAME``.
Sets the build type for ``make``-based generators. Possible values are
Release, Debug, RelWithDebInfo and MinSizeRel. If you are using an IDE such as
Visual Studio, you should use the IDE settings to set the build type.
Be aware that Release and RelWithDebInfo are not using the same optimization
level on most platform.
**CMAKE_INSTALL_PREFIX**:PATH
Path where LLVM will be installed if "make install" is invoked or the
@@ -366,6 +368,10 @@ LLVM-specific variables
Enable building with zlib to support compression/uncompression in LLVM tools.
Defaults to ON.
**LLVM_ENABLE_DIA_SDK**:BOOL
Enable building with MSVC DIA SDK for PDB debugging support. Available
only with MSVC. Defaults to ON.
**LLVM_USE_SANITIZER**:STRING
Define the sanitizer used to build LLVM binaries and tests. Possible values
are ``Address``, ``Memory``, ``MemoryWithOrigins``, ``Undefined``, ``Thread``,
@@ -376,6 +382,18 @@ LLVM-specific variables
lines, enabling link-time optimization. Possible values are ``Off``,
``On``, ``Thin`` and ``Full``. Defaults to OFF.
**LLVM_USE_LINKER**:STRING
Add ``-fuse-ld={name}`` to the link invocation. The possible value depend on
your compiler, for clang the value can be an absolute path to your custom
linker, otherwise clang will prefix the name with ``ld.`` and apply its usual
search. For example to link LLVM with the Gold linker, cmake can be invoked
with ``-DLLVM_USE_LINKER=gold``.
**LLVM_ENABLE_LLD**:BOOL
This option is equivalent to `-DLLVM_USE_LINKER=lld`, except during a 2-stage
build where a dependency is added from the first stage to the second ensuring
that lld is built before stage2 begins.
**LLVM_PARALLEL_COMPILE_JOBS**:STRING
Define the maximum number of concurrent compilation jobs.
@@ -451,6 +469,8 @@ LLVM-specific variables
**SPHINX_EXECUTABLE**:STRING
The path to the ``sphinx-build`` executable detected by CMake.
For installation instructions, see
http://www.sphinx-doc.org/en/latest/install.html
**SPHINX_OUTPUT_HTML**:BOOL
If enabled (and ``LLVM_ENABLE_SPHINX`` is enabled) then the targets for

View File

@@ -1005,7 +1005,7 @@ The TableGen DAG instruction selector generator reads the instruction patterns
in the ``.td`` file and automatically builds parts of the pattern matching code
for your target. It has the following strengths:
* At compiler-compiler time, it analyzes your instruction patterns and tells you
* At compiler-compile time, it analyzes your instruction patterns and tells you
if your patterns make sense or not.
* It can handle arbitrary constraints on operands for the pattern match. In
@@ -1026,7 +1026,7 @@ for your target. It has the following strengths:
* Targets can define their own (and rely on built-in) "pattern fragments".
Pattern fragments are chunks of reusable patterns that get inlined into your
patterns during compiler-compiler time. For example, the integer "``(not
patterns during compiler-compile time. For example, the integer "``(not
x)``" operation is actually defined as a pattern fragment that expands as
"``(xor x, -1)``", since the SelectionDAG does not have a native '``not``'
operation. Targets can define their own short-hand fragments as they see fit.

View File

@@ -131,9 +131,8 @@ unlikely to be supported by our host compilers.
* Delegating constructors: N1986_
* Default member initializers (non-static data member initializers): N2756_
* Only use these for scalar members that would otherwise be left
uninitialized. Non-scalar members generally have appropriate default
constructors.
* Feel free to use these wherever they make sense and where the `=`
syntax is allowed. Don't use braced initialization syntax.
.. _N2118: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2006/n2118.html
.. _N2439: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2439.htm

View File

@@ -77,6 +77,15 @@ OPTIONS
-verify``. With this option FileCheck will verify that input does not contain
warnings not covered by any ``CHECK:`` patterns.
.. option:: --enable-var-scope
Enables scope for regex variables.
Variables with names that start with ``$`` are considered global and
remain set throughout the file.
All other variables get undefined after each encountered ``CHECK-LABEL``.
.. option:: -version
Show the version number of this program.
@@ -344,6 +353,9 @@ matched by the directive cannot also be matched by any other check present in
other unique identifiers. Conceptually, the presence of ``CHECK-LABEL`` divides
the input stream into separate blocks, each of which is processed independently,
preventing a ``CHECK:`` directive in one block matching a line in another block.
If ``--enable-var-scope`` is in effect, all local variables are cleared at the
beginning of the block.
For example,
.. code-block:: llvm
@@ -436,6 +448,13 @@ were defined on. For example:
Can be useful if you want the operands of ``op`` to be the same register,
and don't care exactly which register it is.
If ``--enable-var-scope`` is in effect, variables with names that
start with ``$`` are considered to be global. All others variables are
local. All local variables get undefined at the beginning of each
CHECK-LABEL block. Global variables are not affected by CHECK-LABEL.
This makes it easier to ensure that individual tests are not affected
by variables set in preceding tests.
FileCheck Expressions
~~~~~~~~~~~~~~~~~~~~~

View File

@@ -56,7 +56,7 @@ GENERAL OPTIONS
Search for :file:`{NAME}.cfg` and :file:`{NAME}.site.cfg` when searching for
test suites, instead of :file:`lit.cfg` and :file:`lit.site.cfg`.
.. option:: -D NAME, -D NAME=VALUE, --param NAME, --param NAME=VALUE
.. option:: -D NAME[=VALUE], --param NAME[=VALUE]
Add a user defined parameter ``NAME`` with the given ``VALUE`` (or the empty
string if not given). The meaning and use of these parameters is test suite
@@ -152,6 +152,23 @@ SELECTION OPTIONS
Run the tests in a random order.
.. option:: --num-shards=M
Divide the set of selected tests into ``M`` equal-sized subsets or
"shards", and run only one of them. Must be used with the
``--run-shard=N`` option, which selects the shard to run. The environment
variable ``LIT_NUM_SHARDS`` can also be used in place of this
option. These two options provide a coarse mechanism for paritioning large
testsuites, for parallel execution on separate machines (say in a large
testing farm).
.. option:: --run-shard=N
Select which shard to run, assuming the ``--num-shards=M`` option was
provided. The two options must be used together, and the value of ``N``
must be in the range ``1..M``. The environment variable
``LIT_RUN_SHARD`` can also be used in place of this option.
ADDITIONAL OPTIONS
------------------
@@ -362,7 +379,7 @@ PRE-DEFINED SUBSTITUTIONS
~~~~~~~~~~~~~~~~~~~~~~~~~~
:program:`lit` provides various patterns that can be used with the RUN command.
These are defined in TestRunner.py.
These are defined in TestRunner.py. The base set of substitutions are:
========== ==============
Macro Substitution
@@ -374,17 +391,13 @@ These are defined in TestRunner.py.
%t temporary file name unique to the test
%T temporary directory unique to the test
%% %
%/s same as %s but replace all / with \\
%/S same as %S but replace all / with \\
%/p same as %p but replace all / with \\
%/t same as %t but replace all / with \\
%/T same as %T but replace all / with \\
========== ==============
Further substitution patterns might be defined by each test module.
See the modules :ref:`local-configuration-files`.
Other substitutions are provided that are variations on this base set and
further substitution patterns can be defined by each test module. See the
modules :ref:`local-configuration-files`.
More information on the testing infrastucture can be found in the
More detailed information on substitutions can be found in the
:doc:`../TestingGuide`.
TEST RUN OUTPUT FORMAT

View File

@@ -322,6 +322,10 @@ OPTIONS
universal binary or to use an architecture that does not match a
non-universal binary.
.. option:: -show-functions
Show coverage summaries for each function.
.. program:: llvm-cov export
.. _llvm-cov-export:

View File

@@ -35,8 +35,8 @@ by many Linux package managers; you probably need to install nvidia's package.
You will need CUDA 7.0, 7.5, or 8.0 to compile with clang.
CUDA compilation is supported on Linux, and on MacOS as of XXXX-XX-XX. Windows
support is planned but not yet in place.
CUDA compilation is supported on Linux, on MacOS as of 2016-11-18, and on
Windows as of 2017-01-05.
Invoking clang
--------------

View File

@@ -110,7 +110,7 @@ The LLVM IR for this coroutine looks like this:
call void @free(i8* %mem)
br label %suspend
suspend:
call void @llvm.coro.end(i8* %hdl, i1 false)
%unused = call i1 @llvm.coro.end(i8* %hdl, i1 false)
ret i8* %hdl
}
@@ -440,7 +440,7 @@ store the current value produced by a coroutine.
call void @free(i8* %mem)
br label %suspend
suspend:
call void @llvm.coro.end(i8* %hdl, i1 false)
%unused = call i1 @llvm.coro.end(i8* %hdl, i1 false)
ret i8* %hdl
}
@@ -955,41 +955,90 @@ A frontend should emit exactly one `coro.id` intrinsic per coroutine.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
::
declare void @llvm.coro.end(i8* <handle>, i1 <unwind>)
declare i1 @llvm.coro.end(i8* <handle>, i1 <unwind>)
Overview:
"""""""""
The '``llvm.coro.end``' marks the point where execution of the resume part of
the coroutine should end and control returns back to the caller.
the coroutine should end and control should return to the caller.
Arguments:
""""""""""
The first argument should refer to the coroutine handle of the enclosing coroutine.
The first argument should refer to the coroutine handle of the enclosing
coroutine. A frontend is allowed to supply null as the first parameter, in this
case `coro-early` pass will replace the null with an appropriate coroutine
handle value.
The second argument should be `true` if this coro.end is in the block that is
part of the unwind sequence leaving the coroutine body due to exception prior to
the first reaching any suspend points, and `false` otherwise.
part of the unwind sequence leaving the coroutine body due to an exception and
`false` otherwise.
Semantics:
""""""""""
The `coro.end`_ intrinsic is a no-op during an initial invocation of the
coroutine. When the coroutine resumes, the intrinsic marks the point when
coroutine need to return control back to the caller.
The purpose of this intrinsic is to allow frontends to mark the cleanup and
other code that is only relevant during the initial invocation of the coroutine
and should not be present in resume and destroy parts.
This intrinsic is removed by the CoroSplit pass when a coroutine is split into
the start, resume and destroy parts. In start part, the intrinsic is removed,
in resume and destroy parts, it is replaced with `ret void` instructions and
This intrinsic is lowered when a coroutine is split into
the start, resume and destroy parts. In the start part, it is a no-op,
in resume and destroy parts, it is replaced with `ret void` instruction and
the rest of the block containing `coro.end` instruction is discarded.
In landing pads it is replaced with an appropriate instruction to unwind to
caller.
caller. The handling of coro.end differs depending on whether the target is
using landingpad or WinEH exception model.
A frontend is allowed to supply null as the first parameter, in this case
`coro-early` pass will replace the null with an appropriate coroutine handle
value.
For landingpad based exception model, it is expected that frontend uses the
`coro.end`_ intrinsic as follows:
.. code-block:: llvm
ehcleanup:
%InResumePart = call i1 @llvm.coro.end(i8* null, i1 true)
br i1 %InResumePart, label %eh.resume, label %cleanup.cont
cleanup.cont:
; rest of the cleanup
eh.resume:
%exn = load i8*, i8** %exn.slot, align 8
%sel = load i32, i32* %ehselector.slot, align 4
%lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
%lpad.val29 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
resume { i8*, i32 } %lpad.val29
The `CoroSpit` pass replaces `coro.end` with ``True`` in the resume functions,
thus leading to immediate unwind to the caller, whereas in start function it
is replaced with ``False``, thus allowing to proceed to the rest of the cleanup
code that is only needed during initial invocation of the coroutine.
For Windows Exception handling model, a frontend should attach a funclet bundle
referring to an enclosing cleanuppad as follows:
.. code-block:: text
ehcleanup:
%tok = cleanuppad within none []
%unused = call i1 @llvm.coro.end(i8* null, i1 true) [ "funclet"(token %tok) ]
cleanupret from %tok unwind label %RestOfTheCleanup
The `CoroSplit` pass, if the funclet bundle is present, will insert
``cleanupret from %tok unwind to caller`` before
the `coro.end`_ intrinsic and will remove the rest of the block.
The following table summarizes the handling of `coro.end`_ intrinsic.
+--------------------------+-------------------+-------------------------------+
| | In Start Function | In Resume/Destroy Functions |
+--------------------------+-------------------+-------------------------------+
|unwind=false | nothing |``ret void`` |
+------------+-------------+-------------------+-------------------------------+
| | WinEH | nothing |``cleanupret unwind to caller``|
|unwind=true +-------------+-------------------+-------------------------------+
| | Landingpad | nothing | nothing |
+------------+-------------+-------------------+-------------------------------+
.. _coro.suspend:
.. _suspend points:

View File

@@ -21,7 +21,7 @@ to know how it works under the hood. A prior knowledge of how Clang's profile
guided optimization works is useful, but not required.
We start by showing how to use LLVM and Clang for code coverage analysis,
then we briefly desribe LLVM's code coverage mapping format and the
then we briefly describe LLVM's code coverage mapping format and the
way that Clang and LLVM's code coverage tool work with this format. After
the basics are down, more advanced features of the coverage mapping format
are discussed - such as the data structures, LLVM IR representation and

View File

@@ -62,7 +62,7 @@ way to see what other people are interested in and watching the flow of the
project as a whole.
We recommend that active developers register an email account with `LLVM
Bugzilla <http://llvm.org/bugs/>`_ and preferably subscribe to the `llvm-bugs
Bugzilla <https://bugs.llvm.org/>`_ and preferably subscribe to the `llvm-bugs
<http://lists.llvm.org/mailman/listinfo/llvm-bugs>`_ email list to keep track
of bugs and enhancements occurring in LLVM. We really appreciate people who are
proactive at catching incoming bugs in their components and dealing with them
@@ -261,7 +261,7 @@ the future that the change is responsible for. For example:
* The changes should not cause performance or correctness regressions in code
compiled by LLVM on all applicable targets.
* You are expected to address any `Bugzilla bugs <http://llvm.org/bugs/>`_ that
* You are expected to address any `Bugzilla bugs <https://bugs.llvm.org/>`_ that
result from your change.
We prefer for this to be handled before submission but understand that it isn't

View File

@@ -61,13 +61,13 @@ types ``IMAGE_REL_I386_SECREL`` (32-bit) or ``IMAGE_REL_AMD64_SECREL`` (64-bit).
the target. It corresponds to the COFF relocation types
``IMAGE_REL_I386_SECTION`` (32-bit) or ``IMAGE_REL_AMD64_SECTION`` (64-bit).
.. code-block:: gas
.. code-block:: none
.section .debug$S,"rn"
.long 4
.long 242
.long 40
.secrel32 _function_name
.secrel32 _function_name + 0
.secidx _function_name
...
@@ -204,9 +204,49 @@ For example, the following code creates two sections named ``.text``.
The unique number is not present in the resulting object at all. It is just used
in the assembler to differentiate the sections.
The 'm' flag is mapped to SHF_LINK_ORDER. If it is present, a symbol
must be given that identifies the section to be placed is the
.sh_link.
.. code-block:: gas
.section .foo,"a",@progbits
.Ltmp:
.section .bar,"am",@progbits,.Ltmp
which is equivalent to just
.. code-block:: gas
.section .foo,"a",@progbits
.section .bar,"am",@progbits,.foo
Target Specific Behaviour
=========================
X86
---
Relocations
^^^^^^^^^^^
``@ABS8`` can be applied to symbols which appear as immediate operands to
instructions that have an 8-bit immediate form for that operand. It causes
the assembler to use the 8-bit form and an 8-bit relocation (e.g. ``R_386_8``
or ``R_X86_64_8``) for the symbol.
For example:
.. code-block:: gas
cmpq $foo@ABS8, %rdi
This causes the assembler to select the form of the 64-bit ``cmpq`` instruction
that takes an 8-bit immediate operand that is sign extended to 64 bits, as
opposed to ``cmpq $foo, %rdi`` which takes a 32-bit immediate operand. This
is also not the same as ``cmpb $foo, %dil``, which is an 8-bit comparison.
Windows on ARM
--------------

View File

@@ -47,12 +47,18 @@ The format of this section is
uint32 : NumFaultingPCs
uint32 : Reserved (expected to be 0)
FunctionFaultInfo[NumFaultingPCs] {
uint32 : FaultKind = FaultMaps::FaultingLoad (only legal value currently)
uint32 : FaultKind
uint32 : FaultingPCOffset
uint32 : HandlerPCOffset
}
}
FailtKind describes the reason of expected fault. Currently three kind
of faults are supported:
1. ``FaultMaps::FaultingLoad`` - fault due to load from memory.
2. ``FaultMaps::FaultingLoadStore`` - fault due to instruction load and store.
3. ``FaultMaps::FaultingStore`` - fault due to store to memory.
The ``ImplicitNullChecks`` pass
===============================

View File

@@ -52,6 +52,18 @@ Here's the short story for getting up and running quickly with LLVM:
* ``cd llvm/tools``
* ``svn co http://llvm.org/svn/llvm-project/cfe/trunk clang``
#. Checkout LLD linker **[Optional]**:
* ``cd where-you-want-llvm-to-live``
* ``cd llvm/tools``
* ``svn co http://llvm.org/svn/llvm-project/lld/trunk lld``
#. Checkout Polly Loop Optimizer **[Optional]**:
* ``cd where-you-want-llvm-to-live``
* ``cd llvm/tools``
* ``svn co http://llvm.org/svn/llvm-project/polly/trunk polly``
#. Checkout Compiler-RT (required to build the sanitizers) **[Optional]**:
* ``cd where-you-want-llvm-to-live``
@@ -262,7 +274,7 @@ our build systems:
* Clang 3.1
* GCC 4.8
* Visual Studio 2015
* Visual Studio 2015 (Update 3)
Anything older than these toolchains *may* work, but will require forcing the
build system with a special option and is not really a supported host platform.
@@ -719,7 +731,7 @@ Or a combination of multiple projects:
% cd $TOP_LEVEL_DIR
% mkdir clang-build && cd clang-build
% cmake -GNinja ../llvm-project/llvm -DLLVM_ENABLE_PROJECTS="clang;libcxx;compiler-rt"
% cmake -GNinja ../llvm-project/llvm -DLLVM_ENABLE_PROJECTS="clang;libcxx;libcxxabi"
A helper script is provided in `llvm/utils/git-svn/git-llvm`. After you add it
to your path, you can push committed changes upstream with `git llvm push`.
@@ -732,7 +744,7 @@ to your path, you can push committed changes upstream with `git llvm push`.
While this is using SVN under the hood, it does not require any interaction from
you with git-svn.
After a few minutes, `git pull` should get back the changes as they were
commited. Note that a current limitation is that `git` does not directly record
committed. Note that a current limitation is that `git` does not directly record
file rename, and thus it is propagated to SVN as a combination of delete-add
instead of a file rename.

View File

@@ -358,41 +358,6 @@ existing patterns (as any pattern we can select is by definition legal).
Expanding that to describe legalization actions is a much larger but
potentially useful project.
.. _milegalizer-scalar-narrow:
Scalar narrow types
^^^^^^^^^^^^^^^^^^^
In the AArch64 port, we currently mark as legal operations on narrow integer
types that have a legal equivalent in a wider type.
For example, this:
%2(GPR,s8) = G_ADD %0, %1
is selected to a 32-bit instruction:
%2(GPR32) = ADDWrr %0, %1
This avoids unnecessarily legalizing operations that can be seen as legal:
8-bit additions are supported, but happen to have a 32-bit result with the high
24 bits undefined.
``TODO``:
This has implications regarding vreg classes (as narrow values can now be
represented by wider vregs) and should be investigated further.
``TODO``:
In particular, s1 comparison results can be represented as wider values in
different ways.
SelectionDAG has the notion of BooleanContents, which allows targets to choose
what true and false are when in a larger register:
* ``ZeroOrOne`` --- if only 0 and 1 are valid bools, even in a larger register.
* ``ZeroOrMinusOne`` --- if -1 is true (common for vector instructions,
where compares produce -1).
* ``Undefined`` --- if only the low bit is relevant in determining truth.
.. _milegalizer-non-power-of-2:
Non-power of 2 types

View File

@@ -6,9 +6,19 @@ Introduction
============
This document contains information about adding a build configuration and
buildslave to private slave builder to LLVM Buildbot Infrastructure
`<http://lab.llvm.org:8011>`_.
buildslave to private slave builder to LLVM Buildbot Infrastructure.
Buildmasters
============
There are two buildmasters running.
* The main buildmaster at `<http://lab.llvm.org:8011>`_. All builders attached
to this machine will notify commit authors every time they break the build.
* The staging buildbot at `<http://lab.llvm.org:8014>`_. All builders attached
to this machine will be completely silent by default when the build is broken.
Builders for experimental backends should generally be attached to this
buildmaster.
Steps To Add Builder To LLVM Buildbot
=====================================
@@ -73,6 +83,11 @@ Here are the steps you can follow to do so:
* slaves are added to ``buildbot/osuosl/master/config/slaves.py``
* builders are added to ``buildbot/osuosl/master/config/builders.py``
It is possible to whitelist email addresses to unconditionally receive notifications
on build failure; for this you'll need to add an ``InformativeMailNotifier`` to
``buildbot/osuosl/master/config/status.py``. This is particularly useful for the
staging buildmaster which is silent otherwise.
#. Send the buildslave access name and the access password directly to
`Galina Kistanova <mailto:gkistanova@gmail.com>`_, and wait till she
will let you know that your changes are applied and buildmaster is

View File

@@ -19,7 +19,7 @@ section to narrow down the bug so that the person who fixes it will be able
to find the problem more easily.
Once you have a reduced test-case, go to `the LLVM Bug Tracking System
<http://llvm.org/bugs/enter_bug.cgi>`_ and fill out the form with the
<https://bugs.llvm.org/enter_bug.cgi>`_ and fill out the form with the
necessary details (note that you don't need to pick a category, just use
the "new-bugs" category if you're not sure). The bug description should
contain the following information:

View File

@@ -321,4 +321,3 @@ the properties which are associated with that component.
``BuildTool`` components currently use the exact same properties as
``Tool`` components, the type distinction is only used to differentiate
what the tool is built for.

View File

@@ -195,7 +195,7 @@ linkage:
``private``
Global values with "``private``" linkage are only directly
accessible by objects in the current module. In particular, linking
code into a module with an private global value may cause the
code into a module with a private global value may cause the
private to be renamed as necessary to avoid collisions. Because the
symbol is private to the module, all references can be updated. This
doesn't show up in any symbol table in the object file.
@@ -1474,8 +1474,10 @@ example:
any mutable state (e.g. memory, control registers, etc) visible to
caller functions. It does not write through any pointer arguments
(including ``byval`` arguments) and never changes any state visible
to callers. This means that it cannot unwind exceptions by calling
the ``C++`` exception throwing methods.
to callers. This means while it cannot unwind exceptions by calling
the ``C++`` exception throwing methods (since they write to memory), there may
be non-``C++`` mechanisms that throw exceptions without writing to LLVM
visible memory.
On an argument, this attribute indicates that the function does not
dereference that pointer argument, even though it may read or write the
@@ -1487,9 +1489,10 @@ example:
caller functions. It may dereference pointer arguments and read
state that may be set in the caller. A readonly function always
returns the same value (or unwinds an exception identically) when
called with the same set of arguments and global state. It cannot
unwind an exception by calling the ``C++`` exception throwing
methods.
called with the same set of arguments and global state. This means while it
cannot unwind exceptions by calling the ``C++`` exception throwing methods
(since they write to memory), there may be non-``C++`` mechanisms that throw
exceptions without writing to LLVM visible memory.
On an argument, this attribute indicates that the function does not write
through this pointer argument, even though it may write to the memory that
@@ -2169,8 +2172,9 @@ Fast-Math Flags
LLVM IR floating-point binary ops (:ref:`fadd <i_fadd>`,
:ref:`fsub <i_fsub>`, :ref:`fmul <i_fmul>`, :ref:`fdiv <i_fdiv>`,
:ref:`frem <i_frem>`, :ref:`fcmp <i_fcmp>`) have the following flags that can
be set to enable otherwise unsafe floating point operations
:ref:`frem <i_frem>`, :ref:`fcmp <i_fcmp>`) and :ref:`call <i_call>`
instructions have the following flags that can be set to enable
otherwise unsafe floating point transformations.
``nnan``
No NaNs - Allow optimizations to assume the arguments and result are not
@@ -3198,6 +3202,22 @@ resulting assembly string is parsed by LLVM's integrated assembler unless it is
disabled -- even when emitting a ``.s`` file -- and thus must contain assembly
syntax known to LLVM.
LLVM also supports a few more substitions useful for writing inline assembly:
- ``${:uid}``: Expands to a decimal integer unique to this inline assembly blob.
This substitution is useful when declaring a local label. Many standard
compiler optimizations, such as inlining, may duplicate an inline asm blob.
Adding a blob-unique identifier ensures that the two labels will not conflict
during assembly. This is used to implement `GCC's %= special format
string <https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html>`_.
- ``${:comment}``: Expands to the comment character of the current target's
assembly dialect. This is usually ``#``, but many targets use other strings,
such as ``;``, ``//``, or ``!``.
- ``${:private}``: Expands to the assembler private label prefix. Labels with
this prefix will not appear in the symbol table of the assembled object.
Typically the prefix is ``L``, but targets may use other strings. ``.L`` is
relatively popular.
LLVM's support for inline asm is modeled closely on the requirements of Clang's
GCC-compatible inline-asm support. Thus, the feature-set and the constraint and
modifier codes listed here are similar or identical to those in GCC's inline asm
@@ -3940,15 +3960,28 @@ to the ``add`` instruction using the ``!dbg`` identifier:
%indvar.next = add i64 %indvar, 1, !dbg !21
Metadata can also be attached to a function definition. Here metadata ``!22``
is attached to the ``foo`` function using the ``!dbg`` identifier:
Metadata can also be attached to a function or a global variable. Here metadata
``!22`` is attached to the ``f1`` and ``f2 functions, and the globals ``g1``
and ``g2`` using the ``!dbg`` identifier:
.. code-block:: llvm
define void @foo() !dbg !22 {
declare !dbg !22 void @f1()
define void @f2() !dbg !22 {
ret void
}
@g1 = global i32 0, !dbg !22
@g2 = external global i32, !dbg !22
A transformation is required to drop any metadata attachment that it does not
know or know it can't preserve. Currently there is an exception for metadata
attachment to globals for ``!type`` and ``!absolute_symbol`` which can't be
unconditionally dropped unless the global is itself deleted.
Metadata attached to a module using named metadata may not be dropped, with
the exception of debug metadata (named metadata with the name ``!llvm.dbg.*``).
More information about specific metadata nodes recognized by the
optimizers and code generator is found below.
@@ -3973,7 +4006,9 @@ DICompileUnit
``retainedTypes:``, ``subprograms:``, ``globals:``, ``imports:`` and ``macros:``
fields are tuples containing the debug info to be emitted along with the compile
unit, regardless of code optimizations (some nodes are only emitted if there are
references to them from instructions).
references to them from instructions). The ``debugInfoForProfiling:`` field is a
boolean indicating whether or not line-table discriminators are updated to
provide more-accurate debug info for profiling results.
.. code-block:: text
@@ -3996,12 +4031,15 @@ DIFile
``DIFile`` nodes represent files. The ``filename:`` can include slashes.
.. code-block:: llvm
.. code-block:: none
!0 = !DIFile(filename: "path/to/file", directory: "/path/to/dir")
!0 = !DIFile(filename: "path/to/file", directory: "/path/to/dir",
checksumkind: CSK_MD5,
checksum: "000102030405060708090a0b0c0d0e0f")
Files are sometimes used in ``scope:`` fields, and are the only valid target
for ``file:`` fields.
Valid values for ``checksumkind:`` field are: {CSK_None, CSK_MD5, CSK_SHA1}
.. _DIBasicType:
@@ -4339,6 +4377,10 @@ The current supported vocabulary is limited:
- ``DW_OP_plus, 93`` adds ``93`` to the working expression.
- ``DW_OP_bit_piece, 16, 8`` specifies the offset and size (``16`` and ``8``
here, respectively) of the variable piece from the working expression.
- ``DW_OP_swap`` swaps top two stack entries.
- ``DW_OP_xderef`` provides extended dereference mechanism. The entry at the top
of the stack is treated as an address. The second stack entry is treated as an
address space identifier.
.. code-block:: text
@@ -4346,6 +4388,7 @@ The current supported vocabulary is limited:
!1 = !DIExpression(DW_OP_plus, 3)
!2 = !DIExpression(DW_OP_bit_piece, 3, 7)
!3 = !DIExpression(DW_OP_deref, DW_OP_plus, 3, DW_OP_bit_piece, 3, 7)
!4 = !DIExpression(DW_OP_constu, 2, DW_OP_swap, DW_OP_xderef)
DIObjCProperty
""""""""""""""
@@ -4398,37 +4441,156 @@ appear in the included source file.
^^^^^^^^^^^^^^^^^^^
In LLVM IR, memory does not have types, so LLVM's own type system is not
suitable for doing TBAA. Instead, metadata is added to the IR to
describe a type system of a higher level language. This can be used to
implement typical C/C++ TBAA, but it can also be used to implement
custom alias analysis behavior for other languages.
suitable for doing type based alias analysis (TBAA). Instead, metadata is
added to the IR to describe a type system of a higher level language. This
can be used to implement C/C++ strict type aliasing rules, but it can also
be used to implement custom alias analysis behavior for other languages.
The current metadata format is very simple. TBAA metadata nodes have up
to three fields, e.g.:
This description of LLVM's TBAA system is broken into two parts:
:ref:`Semantics<tbaa_node_semantics>` talks about high level issues, and
:ref:`Representation<tbaa_node_representation>` talks about the metadata
encoding of various entities.
.. code-block:: llvm
It is always possible to trace any TBAA node to a "root" TBAA node (details
in the :ref:`Representation<tbaa_node_representation>` section). TBAA
nodes with different roots have an unknown aliasing relationship, and LLVM
conservatively infers ``MayAlias`` between them. The rules mentioned in
this section only pertain to TBAA nodes living under the same root.
!0 = !{ !"an example type tree" }
!1 = !{ !"int", !0 }
!2 = !{ !"float", !0 }
!3 = !{ !"const float", !2, i64 1 }
.. _tbaa_node_semantics:
The first field is an identity field. It can be any value, usually a
metadata string, which uniquely identifies the type. The most important
name in the tree is the name of the root node. Two trees with different
root node names are entirely disjoint, even if they have leaves with
common names.
Semantics
"""""""""
The second field identifies the type's parent node in the tree, or is
null or omitted for a root node. A type is considered to alias all of
its descendants and all of its ancestors in the tree. Also, a type is
considered to alias all types in other trees, so that bitcode produced
from multiple front-ends is handled conservatively.
The TBAA metadata system, referred to as "struct path TBAA" (not to be
confused with ``tbaa.struct``), consists of the following high level
concepts: *Type Descriptors*, further subdivided into scalar type
descriptors and struct type descriptors; and *Access Tags*.
If the third field is present, it's an integer which if equal to 1
indicates that the type is "constant" (meaning
**Type descriptors** describe the type system of the higher level language
being compiled. **Scalar type descriptors** describe types that do not
contain other types. Each scalar type has a parent type, which must also
be a scalar type or the TBAA root. Via this parent relation, scalar types
within a TBAA root form a tree. **Struct type descriptors** denote types
that contain a sequence of other type descriptors, at known offsets. These
contained type descriptors can either be struct type descriptors themselves
or scalar type descriptors.
**Access tags** are metadata nodes attached to load and store instructions.
Access tags use type descriptors to describe the *location* being accessed
in terms of the type system of the higher level language. Access tags are
tuples consisting of a base type, an access type and an offset. The base
type is a scalar type descriptor or a struct type descriptor, the access
type is a scalar type descriptor, and the offset is a constant integer.
The access tag ``(BaseTy, AccessTy, Offset)`` can describe one of two
things:
* If ``BaseTy`` is a struct type, the tag describes a memory access (load
or store) of a value of type ``AccessTy`` contained in the struct type
``BaseTy`` at offset ``Offset``.
* If ``BaseTy`` is a scalar type, ``Offset`` must be 0 and ``BaseTy`` and
``AccessTy`` must be the same; and the access tag describes a scalar
access with scalar type ``AccessTy``.
We first define an ``ImmediateParent`` relation on ``(BaseTy, Offset)``
tuples this way:
* If ``BaseTy`` is a scalar type then ``ImmediateParent(BaseTy, 0)`` is
``(ParentTy, 0)`` where ``ParentTy`` is the parent of the scalar type as
described in the TBAA metadata. ``ImmediateParent(BaseTy, Offset)`` is
undefined if ``Offset`` is non-zero.
* If ``BaseTy`` is a struct type then ``ImmediateParent(BaseTy, Offset)``
is ``(NewTy, NewOffset)`` where ``NewTy`` is the type contained in
``BaseTy`` at offset ``Offset`` and ``NewOffset`` is ``Offset`` adjusted
to be relative within that inner type.
A memory access with an access tag ``(BaseTy1, AccessTy1, Offset1)``
aliases a memory access with an access tag ``(BaseTy2, AccessTy2,
Offset2)`` if either ``(BaseTy1, Offset1)`` is reachable from ``(Base2,
Offset2)`` via the ``Parent`` relation or vice versa.
As a concrete example, the type descriptor graph for the following program
.. code-block:: c
struct Inner {
int i; // offset 0
float f; // offset 4
};
struct Outer {
float f; // offset 0
double d; // offset 4
struct Inner inner_a; // offset 12
};
void f(struct Outer* outer, struct Inner* inner, float* f, int* i, char* c) {
outer->f = 0; // tag0: (OuterStructTy, FloatScalarTy, 0)
outer->inner_a.i = 0; // tag1: (OuterStructTy, IntScalarTy, 12)
outer->inner_a.f = 0.0; // tag2: (OuterStructTy, IntScalarTy, 16)
*f = 0.0; // tag3: (FloatScalarTy, FloatScalarTy, 0)
}
is (note that in C and C++, ``char`` can be used to access any arbitrary
type):
.. code-block:: text
Root = "TBAA Root"
CharScalarTy = ("char", Root, 0)
FloatScalarTy = ("float", CharScalarTy, 0)
DoubleScalarTy = ("double", CharScalarTy, 0)
IntScalarTy = ("int", CharScalarTy, 0)
InnerStructTy = {"Inner" (IntScalarTy, 0), (FloatScalarTy, 4)}
OuterStructTy = {"Outer", (FloatScalarTy, 0), (DoubleScalarTy, 4),
(InnerStructTy, 12)}
with (e.g.) ``ImmediateParent(OuterStructTy, 12)`` = ``(InnerStructTy,
0)``, ``ImmediateParent(InnerStructTy, 0)`` = ``(IntScalarTy, 0)``, and
``ImmediateParent(IntScalarTy, 0)`` = ``(CharScalarTy, 0)``.
.. _tbaa_node_representation:
Representation
""""""""""""""
The root node of a TBAA type hierarchy is an ``MDNode`` with 0 operands or
with exactly one ``MDString`` operand.
Scalar type descriptors are represented as an ``MDNode`` s with two
operands. The first operand is an ``MDString`` denoting the name of the
struct type. LLVM does not assign meaning to the value of this operand, it
only cares about it being an ``MDString``. The second operand is an
``MDNode`` which points to the parent for said scalar type descriptor,
which is either another scalar type descriptor or the TBAA root. Scalar
type descriptors can have an optional third argument, but that must be the
constant integer zero.
Struct type descriptors are represented as ``MDNode`` s with an odd number
of operands greater than 1. The first operand is an ``MDString`` denoting
the name of the struct type. Like in scalar type descriptors the actual
value of this name operand is irrelevant to LLVM. After the name operand,
the struct type descriptors have a sequence of alternating ``MDNode`` and
``ConstantInt`` operands. With N starting from 1, the 2N - 1 th operand,
an ``MDNode``, denotes a contained field, and the 2N th operand, a
``ConstantInt``, is the offset of the said contained field. The offsets
must be in non-decreasing order.
Access tags are represented as ``MDNode`` s with either 3 or 4 operands.
The first operand is an ``MDNode`` pointing to the node representing the
base type. The second operand is an ``MDNode`` pointing to the node
representing the access type. The third operand is a ``ConstantInt`` that
states the offset of the access. If a fourth field is present, it must be
a ``ConstantInt`` valued at 0 or 1. If it is 1 then the access tag states
that the location being accessed is "constant" (meaning
``pointsToConstantMemory`` should return true; see `other useful
AliasAnalysis methods <AliasAnalysis.html#OtherItfs>`_).
AliasAnalysis methods <AliasAnalysis.html#OtherItfs>`_). The TBAA root of
the access type and the base type of an access tag must be the same, and
that is the TBAA root of the access tag.
'``tbaa.struct``' Metadata
^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -4597,16 +4759,19 @@ declaration. It marks the declaration as a reference to an absolute symbol,
which causes the backend to use absolute relocations for the symbol even
in position independent code, and expresses the possible ranges that the
global variable's *address* (not its value) is in, in the same format as
``range`` metadata.
``range`` metadata, with the extension that the pair ``all-ones,all-ones``
may be used to represent the full set.
Example:
Example (assuming 64-bit pointers):
.. code-block:: llvm
@a = external global i8, !absolute_symbol !0 ; Absolute symbol in range [0,256)
@b = external global i8, !absolute_symbol !1 ; Absolute symbol in range [0,2^64)
...
!0 = !{ i64 0, i64 256 }
!1 = !{ i64 -1, i64 -1 }
'``unpredictable``' Metadata
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -4901,7 +5066,8 @@ The existence of the ``invariant.group`` metadata on the instruction tells
the optimizer that every ``load`` and ``store`` to the same pointer operand
within the same invariant group can be assumed to load or store the same
value (but see the ``llvm.invariant.group.barrier`` intrinsic which affects
when two pointers are considered the same).
when two pointers are considered the same). Pointers returned by bitcast or
getelementptr with only zero indices are considered the same.
Examples:
@@ -6163,7 +6329,9 @@ The value produced is the unsigned integer quotient of the two operands.
Note that unsigned integer division and signed integer division are
distinct operations; for signed integer division, use '``sdiv``'.
Division by zero leads to undefined behavior.
Division by zero is undefined behavior. For vectors, if any element
of the divisor is zero, the operation has undefined behavior.
If the ``exact`` keyword is present, the result value of the ``udiv`` is
a :ref:`poison value <poisonvalues>` if %op1 is not a multiple of %op2 (as
@@ -6208,9 +6376,10 @@ rounded towards zero.
Note that signed integer division and unsigned integer division are
distinct operations; for unsigned integer division, use '``udiv``'.
Division by zero leads to undefined behavior. Overflow also leads to
undefined behavior; this is a rare case, but can occur, for example, by
doing a 32-bit division of -2147483648 by -1.
Division by zero is undefined behavior. For vectors, if any element
of the divisor is zero, the operation has undefined behavior.
Overflow also leads to undefined behavior; this is a rare case, but can
occur, for example, by doing a 32-bit division of -2147483648 by -1.
If the ``exact`` keyword is present, the result value of the ``sdiv`` is
a :ref:`poison value <poisonvalues>` if the result would be rounded.
@@ -6293,8 +6462,10 @@ remainder.
Note that unsigned integer remainder and signed integer remainder are
distinct operations; for signed integer remainder, use '``srem``'.
Taking the remainder of a division by zero leads to undefined behavior.
Taking the remainder of a division by zero is undefined behavior.
For vectors, if any element of the divisor is zero, the operation has
undefined behavior.
Example:
""""""""
@@ -6344,7 +6515,9 @@ operation <http://en.wikipedia.org/wiki/Modulo_operation>`_.
Note that signed integer remainder and unsigned integer remainder are
distinct operations; for unsigned integer remainder, use '``urem``'.
Taking the remainder of a division by zero leads to undefined behavior.
Taking the remainder of a division by zero is undefined behavior.
For vectors, if any element of the divisor is zero, the operation has
undefined behavior.
Overflow also leads to undefined behavior; this is a rare case, but can
occur, for example, by taking the remainder of a 32-bit division of
-2147483648 by -1. (The remainder doesn't actually overflow, but this
@@ -7573,8 +7746,10 @@ offsets implied by the indices to the base address with infinitely
precise signed arithmetic are not an *in bounds* address of that
allocated object. The *in bounds* addresses for an allocated object are
all the addresses that point into the object, plus the address one byte
past the end. In cases where the base is a vector of pointers the
``inbounds`` keyword applies to each of the computations element-wise.
past the end. The only *in bounds* address for a null pointer in the
default address-space is the null pointer itself. In cases where the
base is a vector of pointers the ``inbounds`` keyword applies to each
of the computations element-wise.
If the ``inbounds`` keyword is not present, the offsets are added to the
base address with silently-wrapping two's complement arithmetic. If the
@@ -7658,7 +7833,7 @@ makes sense:
.. code-block:: c
// Let's assume that we vectorize the following loop:
double *A, B; int *C;
double *A, *B; int *C;
for (int i = 0; i < size; ++i) {
A[i] = B[C[i]];
}
@@ -10055,11 +10230,8 @@ Overview:
"""""""""
The '``llvm.sqrt``' intrinsics return the sqrt of the specified operand,
returning the same value as the libm '``sqrt``' functions would. Unlike
``sqrt`` in libm, however, ``llvm.sqrt`` has undefined behavior for
negative numbers other than -0.0 (which allows for better optimization,
because there is no need to worry about errno being set).
``llvm.sqrt(-0.0)`` is defined to return -0.0 like IEEE sqrt.
returning the same value as the libm '``sqrt``' functions would, but without
trapping or setting ``errno``.
Arguments:
""""""""""
@@ -11810,7 +11982,7 @@ The semantics of this operation are equivalent to a sequence of conditional scal
::
%res = call <4 x double> @llvm.masked.gather.v4f64 (<4 x double*> %ptrs, i32 8, <4 x i1>%mask, <4 x double> <true, true, true, true>)
%res = call <4 x double> @llvm.masked.gather.v4f64 (<4 x double*> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
;; The gather with all-true mask is equivalent to the following instruction sequence
%ptr0 = extractelement <4 x double*> %ptrs, i32 0
@@ -12043,6 +12215,277 @@ Semantics:
Returns another pointer that aliases its argument but which is considered different
for the purposes of ``load``/``store`` ``invariant.group`` metadata.
Constrained Floating Point Intrinsics
-------------------------------------
These intrinsics are used to provide special handling of floating point
operations when specific rounding mode or floating point exception behavior is
required. By default, LLVM optimization passes assume that the rounding mode is
round-to-nearest and that floating point exceptions will not be monitored.
Constrained FP intrinsics are used to support non-default rounding modes and
accurately preserve exception behavior without compromising LLVM's ability to
optimize FP code when the default behavior is used.
Each of these intrinsics corresponds to a normal floating point operation. The
first two arguments and the return value are the same as the corresponding FP
operation.
The third argument is a metadata argument specifying the rounding mode to be
assumed. This argument must be one of the following strings:
::
"round.dynamic"
"round.tonearest"
"round.downward"
"round.upward"
"round.towardzero"
If this argument is "round.dynamic" optimization passes must assume that the
rounding mode is unknown and may change at runtime. No transformations that
depend on rounding mode may be performed in this case.
The other possible values for the rounding mode argument correspond to the
similarly named IEEE rounding modes. If the argument is any of these values
optimization passes may perform transformations as long as they are consistent
with the specified rounding mode.
For example, 'x-0'->'x' is not a valid transformation if the rounding mode is
"round.downward" or "round.dynamic" because if the value of 'x' is +0 then
'x-0' should evaluate to '-0' when rounding downward. However, this
transformation is legal for all other rounding modes.
For values other than "round.dynamic" optimization passes may assume that the
actual runtime rounding mode (as defined in a target-specific manner) matches
the specified rounding mode, but this is not guaranteed. Using a specific
non-dynamic rounding mode which does not match the actual rounding mode at
runtime results in undefined behavior.
The fourth argument to the constrained floating point intrinsics specifies the
required exception behavior. This argument must be one of the following
strings:
::
"fpexcept.ignore"
"fpexcept.maytrap"
"fpexcept.strict"
If this argument is "fpexcept.ignore" optimization passes may assume that the
exception status flags will not be read and that floating point exceptions will
be masked. This allows transformations to be performed that may change the
exception semantics of the original code. For example, FP operations may be
speculatively executed in this case whereas they must not be for either of the
other possible values of this argument.
If the exception behavior argument is "fpexcept.maytrap" optimization passes
must avoid transformations that may raise exceptions that would not have been
raised by the original code (such as speculatively executing FP operations), but
passes are not required to preserve all exceptions that are implied by the
original code. For example, exceptions may be potentially hidden by constant
folding.
If the exception behavior argument is "fpexcept.strict" all transformations must
strictly preserve the floating point exception semantics of the original code.
Any FP exception that would have been raised by the original code must be raised
by the transformed code, and the transformed code must not raise any FP
exceptions that would not have been raised by the original code. This is the
exception behavior argument that will be used if the code being compiled reads
the FP exception status flags, but this mode can also be used with code that
unmasks FP exceptions.
The number and order of floating point exceptions is NOT guaranteed. For
example, a series of FP operations that each may raise exceptions may be
vectorized into a single instruction that raises each unique exception a single
time.
'``llvm.experimental.constrained.fadd``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare <type>
@llvm.experimental.constrained.fadd(<type> <op1>, <type> <op2>,
metadata <rounding mode>,
metadata <exception behavior>)
Overview:
"""""""""
The '``llvm.experimental.constrained.fadd``' intrinsic returns the sum of its
two operands.
Arguments:
""""""""""
The first two arguments to the '``llvm.experimental.constrained.fadd``'
intrinsic must be :ref:`floating point <t_floating>` or :ref:`vector <t_vector>`
of floating point values. Both arguments must have identical types.
The third and fourth arguments specify the rounding mode and exception
behavior as described above.
Semantics:
""""""""""
The value produced is the floating point sum of the two value operands and has
the same type as the operands.
'``llvm.experimental.constrained.fsub``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare <type>
@llvm.experimental.constrained.fsub(<type> <op1>, <type> <op2>,
metadata <rounding mode>,
metadata <exception behavior>)
Overview:
"""""""""
The '``llvm.experimental.constrained.fsub``' intrinsic returns the difference
of its two operands.
Arguments:
""""""""""
The first two arguments to the '``llvm.experimental.constrained.fsub``'
intrinsic must be :ref:`floating point <t_floating>` or :ref:`vector <t_vector>`
of floating point values. Both arguments must have identical types.
The third and fourth arguments specify the rounding mode and exception
behavior as described above.
Semantics:
""""""""""
The value produced is the floating point difference of the two value operands
and has the same type as the operands.
'``llvm.experimental.constrained.fmul``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare <type>
@llvm.experimental.constrained.fmul(<type> <op1>, <type> <op2>,
metadata <rounding mode>,
metadata <exception behavior>)
Overview:
"""""""""
The '``llvm.experimental.constrained.fmul``' intrinsic returns the product of
its two operands.
Arguments:
""""""""""
The first two arguments to the '``llvm.experimental.constrained.fmul``'
intrinsic must be :ref:`floating point <t_floating>` or :ref:`vector <t_vector>`
of floating point values. Both arguments must have identical types.
The third and fourth arguments specify the rounding mode and exception
behavior as described above.
Semantics:
""""""""""
The value produced is the floating point product of the two value operands and
has the same type as the operands.
'``llvm.experimental.constrained.fdiv``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare <type>
@llvm.experimental.constrained.fdiv(<type> <op1>, <type> <op2>,
metadata <rounding mode>,
metadata <exception behavior>)
Overview:
"""""""""
The '``llvm.experimental.constrained.fdiv``' intrinsic returns the quotient of
its two operands.
Arguments:
""""""""""
The first two arguments to the '``llvm.experimental.constrained.fdiv``'
intrinsic must be :ref:`floating point <t_floating>` or :ref:`vector <t_vector>`
of floating point values. Both arguments must have identical types.
The third and fourth arguments specify the rounding mode and exception
behavior as described above.
Semantics:
""""""""""
The value produced is the floating point quotient of the two value operands and
has the same type as the operands.
'``llvm.experimental.constrained.frem``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare <type>
@llvm.experimental.constrained.frem(<type> <op1>, <type> <op2>,
metadata <rounding mode>,
metadata <exception behavior>)
Overview:
"""""""""
The '``llvm.experimental.constrained.frem``' intrinsic returns the remainder
from the division of its two operands.
Arguments:
""""""""""
The first two arguments to the '``llvm.experimental.constrained.frem``'
intrinsic must be :ref:`floating point <t_floating>` or :ref:`vector <t_vector>`
of floating point values. Both arguments must have identical types.
The third and fourth arguments specify the rounding mode and exception
behavior as described above. The rounding mode argument has no effect, since
the result of frem is never rounded, but the argument is included for
consistency with the other constrained floating point intrinsics.
Semantics:
""""""""""
The value produced is the floating point remainder from the division of the two
value operands and has the same type as the operands. The remainder has the
same sign as the dividend.
General Intrinsics
------------------
@@ -12395,6 +12838,33 @@ sufficient overall improvement in code quality. For this reason,
that the optimizer can otherwise deduce or facts that are of little use to the
optimizer.
.. _int_ssa_copy:
'``llvm.ssa_copy``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare type @llvm.ssa_copy(type %operand) returned(1) readnone
Arguments:
""""""""""
The first argument is an operand which is used as the returned value.
Overview:
""""""""""
The ``llvm.ssa_copy`` intrinsic can be used to attach information to
operations by copying them and giving them new names. For example,
the PredicateInfo utility uses it to build Extended SSA form, and
attach various forms of information to operands that dominate specific
uses. It is not meant for general use, only for building temporary
renaming forms that require value splits at certain points.
.. _type.test:
'``llvm.type.test``' Intrinsic
@@ -12658,3 +13128,79 @@ Stack Map Intrinsics
LLVM provides experimental intrinsics to support runtime patching
mechanisms commonly desired in dynamic language JITs. These intrinsics
are described in :doc:`StackMaps`.
Element Wise Atomic Memory Intrinsics
-------------------------------------
These intrinsics are similar to the standard library memory intrinsics except
that they perform memory transfer as a sequence of atomic memory accesses.
.. _int_memcpy_element_atomic:
'``llvm.memcpy.element.atomic``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
This is an overloaded intrinsic. You can use ``llvm.memcpy.element.atomic`` on
any integer bit width and for different address spaces. Not all targets
support all bit widths however.
::
declare void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* <dest>, i8* <src>,
i64 <num_elements>, i32 <element_size>)
Overview:
"""""""""
The '``llvm.memcpy.element.atomic.*``' intrinsic performs copy of a block of
memory from the source location to the destination location as a sequence of
unordered atomic memory accesses where each access is a multiple of
``element_size`` bytes wide and aligned at an element size boundary. For example
each element is accessed atomically in source and destination buffers.
Arguments:
""""""""""
The first argument is a pointer to the destination, the second is a
pointer to the source. The third argument is an integer argument
specifying the number of elements to copy, the fourth argument is size of
the single element in bytes.
``element_size`` should be a power of two, greater than zero and less than
a target-specific atomic access size limit.
For each of the input pointers ``align`` parameter attribute must be specified.
It must be a power of two and greater than or equal to the ``element_size``.
Caller guarantees that both the source and destination pointers are aligned to
that boundary.
Semantics:
""""""""""
The '``llvm.memcpy.element.atomic.*``' intrinsic copies
'``num_elements`` * ``element_size``' bytes of memory from the source location to
the destination location. These locations are not allowed to overlap. Memory copy
is performed as a sequence of unordered atomic memory accesses where each access
is guaranteed to be a multiple of ``element_size`` bytes wide and aligned at an
element size boundary.
The order of the copy is unspecified. The same value may be read from the source
buffer many times, but only one write is issued to the destination buffer per
element. It is well defined to have concurrent reads and writes to both source
and destination provided those reads and writes are at least unordered atomic.
This intrinsic does not provide any additional ordering guarantees over those
provided by a set of unordered loads from the source location and stores to the
destination.
Lowering:
"""""""""
In the most general case call to the '``llvm.memcpy.element.atomic.*``' is lowered
to a call to the symbol ``__llvm_memcpy_element_atomic_*``. Where '*' is replaced
with an actual element size.
Optimizer is allowed to inline memory copy when it's profitable to do so.

View File

@@ -182,7 +182,7 @@ P
**PR**
Problem report. A bug filed on `the LLVM Bug Tracking System
<http://llvm.org/bugs/enter_bug.cgi>`_.
<https://bugs.llvm.org/enter_bug.cgi>`_.
**PRE**
Partial Redundancy Elimination

View File

@@ -84,6 +84,7 @@ Some important things to remember about fuzz targets:
* It must be as deterministic as possible. Non-determinism (e.g. random decisions not based on the input bytes) will make fuzzing inefficient.
* It must be fast. Try avoiding cubic or greater complexity, logging, or excessive memory consumption.
* Ideally, it should not modify any global state (although that's not strict).
* Usually, the narrower the target the better. E.g. if your target can parse several data formats, split it into several targets, one per format.
Building
@@ -340,9 +341,6 @@ possible event codes are:
``DONE``
The fuzzer has completed operation because it has reached the specified
iteration limit (``-runs``) or time limit (``-max_total_time``).
``MIN<n>``
The fuzzer is minimizing the combination of input corpus directories into
a single unified corpus (due to the ``-merge`` command line option).
``RELOAD``
The fuzzer is performing a periodic reload of inputs from the corpus
directory; this allows it to discover any inputs discovered by other
@@ -770,7 +768,7 @@ Trophies
* LLVM: `Clang <https://llvm.org/bugs/show_bug.cgi?id=23057>`_, `Clang-format <https://llvm.org/bugs/show_bug.cgi?id=23052>`_, `libc++ <https://llvm.org/bugs/show_bug.cgi?id=24411>`_, `llvm-as <https://llvm.org/bugs/show_bug.cgi?id=24639>`_, `Demangler <https://bugs.chromium.org/p/chromium/issues/detail?id=606626>`_, Disassembler: http://reviews.llvm.org/rL247405, http://reviews.llvm.org/rL247414, http://reviews.llvm.org/rL247416, http://reviews.llvm.org/rL247417, http://reviews.llvm.org/rL247420, http://reviews.llvm.org/rL247422.
* Tensorflow: `[1] <https://github.com/tensorflow/tensorflow/commit/7231d01fcb2cd9ef9ffbfea03b724892c8a4026e>`__
* Tensorflow: `[1] <https://da-data.blogspot.com/2017/01/finding-bugs-in-tensorflow-with.html>`__
* Ffmpeg: `[1] <https://github.com/FFmpeg/FFmpeg/commit/c92f55847a3d9cd12db60bfcd0831ff7f089c37c>`__ `[2] <https://github.com/FFmpeg/FFmpeg/commit/25ab1a65f3acb5ec67b53fb7a2463a7368f1ad16>`__ `[3] <https://github.com/FFmpeg/FFmpeg/commit/85d23e5cbc9ad6835eef870a5b4247de78febe56>`__ `[4] <https://github.com/FFmpeg/FFmpeg/commit/04bd1b38ee6b8df410d0ab8d4949546b6c4af26a>`__

View File

@@ -289,7 +289,7 @@ code often follows a pattern:
return my_function_precise(a);
}
The default value for all unspecified reflection parameters is zero.
The default value for all unspecified reflection parameters is zero.
The ``NVVMReflect`` pass should be executed early in the optimization
pipeline, immediately after the link stage. The ``internalize`` pass is also
@@ -326,6 +326,16 @@ often leave behind dead code of the form:
Therefore, it is recommended that ``NVVMReflect`` is executed early in the
optimization pipeline before dead-code elimination.
The NVPTX TargetMachine knows how to schedule ``NVVMReflect`` at the beginning
of your pass manager; just use the following code when setting up your pass
manager:
.. code-block:: c++
std::unique_ptr<TargetMachine> TM = ...;
PassManagerBuilder PMBuilder(...);
if (TM)
TM->adjustPassManager(PMBuilder);
Reflection Parameters
---------------------
@@ -339,35 +349,17 @@ Flag Description
``__CUDA_FTZ=[0,1]`` Use optimized code paths that flush subnormals to zero
==================== ======================================================
The value of this flag is determined by the "nvvm-reflect-ftz" module flag.
The following sets the ftz flag to 1.
Invoking NVVMReflect
--------------------
To ensure that all dead code caused by the reflection pass is eliminated, it
is recommended that the reflection pass is executed early in the LLVM IR
optimization pipeline. The pass takes an optional mapping of reflection
parameter name to an integer value. This mapping can be specified as either a
command-line option to ``opt`` or as an LLVM ``StringMap<int>`` object when
programmatically creating a pass pipeline.
With ``opt``:
.. code-block:: text
# opt -nvvm-reflect -nvvm-reflect-list=<var>=<value>,<var>=<value> module.bc -o module.reflect.bc
With programmatic pass pipeline:
.. code-block:: c++
extern FunctionPass *llvm::createNVVMReflectPass(const StringMap<int>& Mapping);
StringMap<int> ReflectParams;
ReflectParams["__CUDA_FTZ"] = 1;
Passes.add(createNVVMReflectPass(ReflectParams));
.. code-block:: llvm
!llvm.module.flag = !{!0}
!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
(``i32 4`` indicates that the value set here overrides the value in another
module we link with. See the `LangRef <LangRef.html#module-flags-metadata>`
for details.)
Executing PTX
=============

View File

@@ -60,11 +60,14 @@ like this:
clang -O2 -mllvm -opt-bisect-limit=256 my_file.c
The -opt-bisect-limit option may also be applied to link-time optimizations by
using a prefix to indicate that this is a plug-in option for the linker. The
using a prefix to indicate that this is a plug-in option for the linker. The
following syntax will set a bisect limit for LTO transformations:
::
# When using lld, or ld64 (macOS)
clang -flto -Wl,-mllvm,-opt-bisect-limit=256 my_file.o my_other_file.o
# When using Gold
clang -flto -Wl,-plugin-opt,-opt-bisect-limit=256 my_file.o my_other_file.o
LTO passes are run by a library instance invoked by the linker. Therefore any
@@ -186,12 +189,5 @@ Adding Finer Granularity
Once the pass in which an incorrect transformation is performed has been
determined, it may be useful to perform further analysis in order to determine
which specific transformation is causing the problem. Ideally all passes
would be instrumented to allow skipping of individual transformations. This
functionality is available through the OptBisect object but it is impractical
to proactively instrument every existing pass. It is hoped that as developers
find that they need a pass to be instrumented they will add the instrumentation
and contribute it back to the LLVM source base.
Helper functions will be added to simplify this level of instrumentation, but
this work is not yet completed. For more information, contact Andy Kaylor.
which specific transformation is causing the problem. Debug counters
can be used for this purpose.

View File

@@ -128,8 +128,12 @@ Committing a change
-------------------
Once a patch has been reviewed and approved on Phabricator it can then be
committed to trunk. There are multiple workflows to achieve this. Whichever
method you follow it is recommend that your commit message ends with the line:
committed to trunk. If you do not have commit access, someone has to
commit the change for you (with attribution). It is sufficient to add
a comment to the approved review indicating you cannot commit the patch
yourself. If you have commit access, there are multiple workflows to commit the
change. Whichever method you follow it is recommended that your commit message
ends with the line:
::

View File

@@ -331,16 +331,15 @@ There are two ways to customize the formatting behavior for a type.
to extend the mechanism for formatting a type that the library already knows how to
format. For that, we need something else.
2. Provide a **format adapter** with a non-static format method.
2. Provide a **format adapter** inheriting from ``llvm::FormatAdapter<T>``.
.. code-block:: c++
namespace anything {
struct format_int_custom {
int N;
explicit format_int_custom(int N) : N(N) {}
void format(llvm::raw_ostream &Stream, StringRef Style) {
// Do whatever is necessary to format ``N`` into ``Stream``
struct format_int_custom : public llvm::FormatAdapter<int> {
explicit format_int_custom(int N) : llvm::FormatAdapter<int>(N) {}
void format(llvm::raw_ostream &Stream, StringRef Style) override {
// Do whatever is necessary to format ``this->Item`` into ``Stream``
}
};
}
@@ -350,9 +349,8 @@ There are two ways to customize the formatting behavior for a type.
}
}
If the search for a specialization of ``format_provider<T>`` for the given type
fails, ``formatv`` will subsequently check the argument for an instance method
named ``format`` with the signature described above. If so, it will call the
If the type is detected to be derived from ``FormatAdapter<T>``, ``formatv``
will call the
``format`` method on the argument passing in the specified style. This allows
one to provide custom formatting of any type, including one which already has
a builtin format provider.
@@ -484,7 +482,7 @@ that inherits from the ErrorInfo utility, E.g.:
}
};
char FileExists::ID; // This should be declared in the C++ file.
char BadFileFormat::ID; // This should be declared in the C++ file.
Error printFormattedFile(StringRef Path) {
if (<check for valid format>)
@@ -566,18 +564,18 @@ the boolean conversion operator):
.. code-block:: c++
if (auto Err = canFail(...))
if (auto Err = mayFail(...))
return Err; // Failure value - move error to caller.
// Safe to continue: Err was checked.
In contrast, the following code will always cause an abort, even if ``canFail``
In contrast, the following code will always cause an abort, even if ``mayFail``
returns a success value:
.. code-block:: c++
canFail();
// Program will always abort here, even if canFail() returns Success, since
mayFail();
// Program will always abort here, even if mayFail() returns Success, since
// the value is not checked.
Failure values are considered checked once a handler for the error type has
@@ -635,6 +633,12 @@ exiting with an error code, the :ref:`ExitOnError <err_exitonerr>` utility
may be a better choice than handleErrors, as it simplifies control flow when
calling fallible functions.
In situations where it is known that a particular call to a fallible function
will always succeed (for example, a call to a function that can only fail on a
subset of inputs with an input that is known to be safe) the
:ref:`cantFail <err_cantfail>` functions can be used to remove the error type,
simplifying control flow.
StringError
"""""""""""
@@ -767,6 +771,43 @@ mapping can also be supplied from ``Error`` values to exit codes using the
Use ``ExitOnError`` in your tool code where possible as it can greatly improve
readability.
.. _err_cantfail:
Using cantFail to simplify safe callsites
"""""""""""""""""""""""""""""""""""""""""
Some functions may only fail for a subset of their inputs. For such functions
call-sites using known-safe inputs can assume that the result will be a success
value.
The cantFail functions encapsulate this by wrapping an assertion that their
argument is a success value and, in the case of Expected<T>, unwrapping the
T value from the Expected<T> argument:
.. code-block:: c++
Error mayFail(int X);
Expected<int> mayFail2(int X);
void foo() {
cantFail(mayFail(KnownSafeValue));
int Y = cantFail(mayFail2(KnownSafeValue));
...
}
Like the ExitOnError utility, cantFail simplifies control flow. Their treatment
of error cases is very different however: Where ExitOnError is guaranteed to
terminate the program on an error input, cantFile simply asserts that the result
is success. In debug builds this will result in an assertion failure if an error
is encountered. In release builds the behavior of cantFail for failure values is
undefined. As such, care must be taken in the use of cantFail: clients must be
certain that a cantFail wrapped call really can not fail under any
circumstances.
Use of the cantFail functions should be rare in library code, but they are
likely to be of more use in tool and unit-test code where inputs and/or
mocked-up classes or functions may be known to be safe.
Fallible constructors
"""""""""""""""""""""
@@ -866,7 +907,7 @@ completing the walk over the archive they could use the ``joinErrors`` utility:
The ``joinErrors`` routine builds a special error type called ``ErrorList``,
which holds a list of user defined errors. The ``handleErrors`` routine
recognizes this type and will attempt to handle each of the contained erorrs in
recognizes this type and will attempt to handle each of the contained errors in
order. If all contained errors can be handled, ``handleErrors`` will return
``Error::success()``, otherwise ``handleErrors`` will concatenate the remaining
errors and return the resulting ``ErrorList``.
@@ -1096,23 +1137,23 @@ uniform manner with the rest of the passes being executed.
There are many examples of ``Statistic`` uses, but the basics of using it are as
follows:
#. Define your statistic like this:
Define your statistic like this:
.. code-block:: c++
.. code-block:: c++
#define DEBUG_TYPE "mypassname" // This goes before any #includes.
STATISTIC(NumXForms, "The # of times I did stuff");
#define DEBUG_TYPE "mypassname" // This goes before any #includes.
STATISTIC(NumXForms, "The # of times I did stuff");
The ``STATISTIC`` macro defines a static variable, whose name is specified by
the first argument. The pass name is taken from the ``DEBUG_TYPE`` macro, and
the description is taken from the second argument. The variable defined
("NumXForms" in this case) acts like an unsigned integer.
The ``STATISTIC`` macro defines a static variable, whose name is specified by
the first argument. The pass name is taken from the ``DEBUG_TYPE`` macro, and
the description is taken from the second argument. The variable defined
("NumXForms" in this case) acts like an unsigned integer.
#. Whenever you make a transformation, bump the counter:
Whenever you make a transformation, bump the counter:
.. code-block:: c++
.. code-block:: c++
++NumXForms; // I did stuff!
++NumXForms; // I did stuff!
That's all you have to do. To get '``opt``' to print out the statistics
gathered, use the '``-stats``' option:
@@ -1160,6 +1201,71 @@ Obviously, with so many optimizations, having a unified framework for this stuff
is very nice. Making your pass fit well into the framework makes it more
maintainable and useful.
.. _DebugCounters:
Adding debug counters to aid in debugging your code
---------------------------------------------------
Sometimes, when writing new passes, or trying to track down bugs, it
is useful to be able to control whether certain things in your pass
happen or not. For example, there are times the minimization tooling
can only easily give you large testcases. You would like to narrow
your bug down to a specific transformation happening or not happening,
automatically, using bisection. This is where debug counters help.
They provide a framework for making parts of your code only execute a
certain number of times.
The ``llvm/Support/DebugCounter.h`` (`doxygen
<http://llvm.org/doxygen/DebugCounter_8h_source.html>`__) file
provides a class named ``DebugCounter`` that can be used to create
command line counter options that control execution of parts of your code.
Define your DebugCounter like this:
.. code-block:: c++
DEBUG_COUNTER(DeleteAnInstruction, "passname-delete-instruction",
"Controls which instructions get delete").
The ``DEBUG_COUNTER`` macro defines a static variable, whose name
is specified by the first argument. The name of the counter
(which is used on the command line) is specified by the second
argument, and the description used in the help is specified by the
third argument.
Whatever code you want that control, use ``DebugCounter::shouldExecute`` to control it.
.. code-block:: c++
if (DebugCounter::shouldExecute(DeleteAnInstruction))
I->eraseFromParent();
That's all you have to do. Now, using opt, you can control when this code triggers using
the '``--debug-counter``' option. There are two counters provided, ``skip`` and ``count``.
``skip`` is the number of times to skip execution of the codepath. ``count`` is the number
of times, once we are done skipping, to execute the codepath.
.. code-block:: none
$ opt --debug-counter=passname-delete-instruction-skip=1,passname-delete-instruction-count=2 -passname
This will skip the above code the first time we hit it, then execute it twice, then skip the rest of the executions.
So if executed on the following code:
.. code-block:: llvm
%1 = add i32 %a, %b
%2 = add i32 %a, %b
%3 = add i32 %a, %b
%4 = add i32 %a, %b
It would delete number ``%2`` and ``%3``.
A utility is provided in `utils/bisect-skip-count` to binary search
skip and count arguments. It can be used to automatically minimize the
skip and count for a debug-counter variable.
.. _ViewGraph:
Viewing graphs while debugging code
@@ -2196,6 +2302,22 @@ reverse) is O(1) worst case. Testing and setting bits within 128 bits (depends
on size) of the current bit is also O(1). As a general statement,
testing/setting bits in a SparseBitVector is O(distance away from last set bit).
.. _debugging:
Debugging
=========
A handful of `GDB pretty printers
<https://sourceware.org/gdb/onlinedocs/gdb/Pretty-Printing.html>`__ are
provided for some of the core LLVM libraries. To use them, execute the
following (or add it to your ``~/.gdbinit``)::
source /path/to/llvm/src/utils/gdb-scripts/prettyprinters.py
It also might be handy to enable the `print pretty
<http://ftp.gnu.org/old-gnu/Manuals/gdb/html_node/gdb_57.html>`__ option to
avoid data structures being printed as a big block of text.
.. _common:
Helpful Hints for Common Operations
@@ -2243,18 +2365,12 @@ of a ``BasicBlock`` and the number of ``Instruction``\ s it contains:
.. code-block:: c++
// func is a pointer to a Function instance
for (Function::iterator i = func->begin(), e = func->end(); i != e; ++i)
Function &Func = ...
for (BasicBlock &BB : Func)
// Print out the name of the basic block if it has one, and then the
// number of instructions that it contains
errs() << "Basic block (name=" << i->getName() << ") has "
<< i->size() << " instructions.\n";
Note that i can be used as if it were a pointer for the purposes of invoking
member functions of the ``Instruction`` class. This is because the indirection
operator is overloaded for the iterator classes. In the above code, the
expression ``i->size()`` is exactly equivalent to ``(*i).size()`` just like
you'd expect.
errs() << "Basic block (name=" << BB.getName() << ") has "
<< BB.size() << " instructions.\n";
.. _iterate_basicblock:
@@ -2267,17 +2383,17 @@ a code snippet that prints out each instruction in a ``BasicBlock``:
.. code-block:: c++
// blk is a pointer to a BasicBlock instance
for (BasicBlock::iterator i = blk->begin(), e = blk->end(); i != e; ++i)
BasicBlock& BB = ...
for (Instruction &I : BB)
// The next statement works since operator<<(ostream&,...)
// is overloaded for Instruction&
errs() << *i << "\n";
errs() << I << "\n";
However, this isn't really the best way to print out the contents of a
``BasicBlock``! Since the ostream operators are overloaded for virtually
anything you'll care about, you could have just invoked the print routine on the
basic block itself: ``errs() << *blk << "\n";``.
basic block itself: ``errs() << BB << "\n";``.
.. _iterate_insiter:
@@ -2411,13 +2527,13 @@ method):
OurFunctionPass(): callCounter(0) { }
virtual runOnFunction(Function& F) {
for (Function::iterator b = F.begin(), be = F.end(); b != be; ++b) {
for (BasicBlock::iterator i = b->begin(), ie = b->end(); i != ie; ++i) {
if (CallInst* callInst = dyn_cast<CallInst>(&*i)) {
for (BasicBlock &B : F) {
for (Instruction &I: B) {
if (auto *CallInst = dyn_cast<CallInst>(&I)) {
// We know we've encountered a call instruction, so we
// need to determine if it's a call to the
// function pointed to by m_func or not.
if (callInst->getCalledFunction() == targetFunc)
if (CallInst->getCalledFunction() == targetFunc)
++callCounter;
}
}
@@ -2510,12 +2626,11 @@ iterate over all predecessors of BB:
#include "llvm/IR/CFG.h"
BasicBlock *BB = ...;
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *Pred = *PI;
for (BasicBlock *Pred : predecessors(BB)) {
// ...
}
Similarly, to iterate over successors use ``succ_iterator/succ_begin/succ_end``.
Similarly, to iterate over successors use ``successors``.
.. _simplechanges:
@@ -2540,7 +2655,7 @@ For example, an ``AllocaInst`` only *requires* a (const-ptr-to) ``Type``. Thus:
.. code-block:: c++
AllocaInst* ai = new AllocaInst(Type::Int32Ty);
auto *ai = new AllocaInst(Type::Int32Ty);
will create an ``AllocaInst`` instance that represents the allocation of one
integer in the current stack frame, at run time. Each ``Instruction`` subclass
@@ -2565,7 +2680,7 @@ intending to use it within the same ``Function``. I might do:
.. code-block:: c++
AllocaInst* pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc");
auto *pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc");
where ``indexLoc`` is now the logical name of the instruction's execution value,
which is a pointer to an integer on the run time stack.
@@ -2585,7 +2700,7 @@ sequence of instructions that form a ``BasicBlock``:
BasicBlock *pb = ...;
Instruction *pi = ...;
Instruction *newInst = new Instruction(...);
auto *newInst = new Instruction(...);
pb->getInstList().insert(pi, newInst); // Inserts newInst before pi in pb
@@ -2597,7 +2712,7 @@ sequence of instructions that form a ``BasicBlock``:
.. code-block:: c++
BasicBlock *pb = ...;
Instruction *newInst = new Instruction(...);
auto *newInst = new Instruction(...);
pb->getInstList().push_back(newInst); // Appends newInst to pb
@@ -2606,7 +2721,7 @@ sequence of instructions that form a ``BasicBlock``:
.. code-block:: c++
BasicBlock *pb = ...;
Instruction *newInst = new Instruction(..., pb);
auto *newInst = new Instruction(..., pb);
which is much cleaner, especially if you are creating long instruction
streams.
@@ -2621,7 +2736,7 @@ sequence of instructions that form a ``BasicBlock``:
.. code-block:: c++
Instruction *pi = ...;
Instruction *newInst = new Instruction(...);
auto *newInst = new Instruction(...);
pi->getParent()->getInstList().insert(pi, newInst);
@@ -2637,7 +2752,7 @@ sequence of instructions that form a ``BasicBlock``:
.. code-block:: c++
Instruction* pi = ...;
Instruction* newInst = new Instruction(..., pi);
auto *newInst = new Instruction(..., pi);
which is much cleaner, especially if you're creating a lot of instructions and
adding them to ``BasicBlock``\ s.
@@ -2889,7 +3004,7 @@ Another way is to only call ``getPointerToFunction()`` from the
When the JIT is configured to compile lazily (using
``ExecutionEngine::DisableLazyCompilation(false)``), there is currently a `race
condition <http://llvm.org/bugs/show_bug.cgi?id=5184>`_ in updating call sites
condition <https://bugs.llvm.org/show_bug.cgi?id=5184>`_ in updating call sites
after a function is lazily-jitted. It's still possible to use the lazy JIT in a
threaded program if you ensure that only one thread at a time can call any
particular lazy stub and that the JIT lock guards any IR access, but we suggest

View File

@@ -30,7 +30,7 @@ This proposal relates only to moving the hosting of our source-code repository
from SVN hosted on our own servers to Git hosted on GitHub. We are not proposing
using GitHub's issue tracker, pull-requests, or code-review.
Contributers will continue to earn commit access on demand under the Developer
Contributors will continue to earn commit access on demand under the Developer
Policy, except that that a GitHub account will be required instead of SVN
username/password-hash.
@@ -433,7 +433,7 @@ Concerns
* Using the monolithic repository may add overhead for those *integrating* a
standalone sub-project, even if they aren't contributing to it, due to the
same disk space concern as the point above. The availability of the
sub-project Git mirror addesses this, even without SVN access.
sub-project Git mirror addresses this, even without SVN access.
* Preservation of the existing read/write SVN-based workflows relies on the
GitHub SVN bridge, which is an extra dependency. Maintaining this locks us
into GitHub and could restrict future workflow changes.

View File

@@ -1,21 +1,21 @@
========================
LLVM 4.0.0 Release Notes
LLVM 5.0.0 Release Notes
========================
.. contents::
:local:
.. warning::
These are in-progress notes for the upcoming LLVM 4.0.0 release. You may
prefer the `LLVM 3.9 Release Notes <http://llvm.org/releases/3.9.0/docs
/ReleaseNotes.html>`_.
These are in-progress notes for the upcoming LLVM 5 release.
Release notes for previous releases can be found on
`the Download Page <http://releases.llvm.org/download.html>`_.
Introduction
============
This document contains the release notes for the LLVM Compiler Infrastructure,
release 4.0.0. Here we describe the status of LLVM, including major improvements
release 5.0.0. Here we describe the status of LLVM, including major improvements
from the previous release, improvements in various subprojects of LLVM, and
some of the current users of the code. All LLVM releases may be downloaded
from the `LLVM releases web site <http://llvm.org/releases/>`_.
@@ -33,13 +33,6 @@ page <http://llvm.org/releases/>`_.
Non-comprehensive list of changes in this release
=================================================
* The C API functions LLVMAddFunctionAttr, LLVMGetFunctionAttr,
LLVMRemoveFunctionAttr, LLVMAddAttribute, LLVMRemoveAttribute,
LLVMGetAttribute, LLVMAddInstrAttribute and
LLVMRemoveInstrAttribute have been removed.
* The C API enum LLVMAttribute has been deleted.
.. NOTE
For small 1-3 sentence descriptions, just add an entry at the end of
this list. If your description won't fit comfortably in one bullet
@@ -47,16 +40,6 @@ Non-comprehensive list of changes in this release
functionality, or simply have a lot to talk about), see the `NOTE` below
for adding a new subsection.
* The definition and uses of LLVM_ATRIBUTE_UNUSED_RESULT in the LLVM source
were replaced with LLVM_NODISCARD, which matches the C++17 [[nodiscard]]
semantics rather than gcc's __attribute__((warn_unused_result)).
* Minimum compiler version to build has been raised to GCC 4.8 and VS 2015.
* The Timer related APIs now expect a Name and Description. When upgrading code
the previously used names should become descriptions and a short name in the
style of a programming language identifier should be added.
* ... next change ...
.. NOTE
@@ -102,19 +85,16 @@ Changes to the AMDGPU Target
Changes to the AVR Target
-----------------------------
* The entire backend has been merged in-tree with all tests passing. All of
the instruction selection code and the machine code backend has landed
recently and is fully usable.
During this release ...
Changes to the OCaml bindings
-----------------------------
* The attribute API was completely overhauled, following the changes
to the C API.
During this release ...
External Open Source Projects Using LLVM 4.0.0
==============================================
External Open Source Projects Using LLVM 5
==========================================
* A project...

View File

@@ -13,6 +13,13 @@ The Scudo Hardened Allocator is a user-mode allocator based on LLVM Sanitizer's
CombinedAllocator, which aims at providing additional mitigations against heap
based vulnerabilities, while maintaining good performance.
Currently, the allocator supports (was tested on) the following architectures:
- i386 (& i686) (32-bit);
- x86_64 (64-bit);
- armhf (32-bit);
- AArch64 (64-bit).
The name "Scudo" has been retained from the initial implementation (Escudo
meaning Shield in Spanish and Portuguese).
@@ -31,29 +38,25 @@ header is accessed, and the process terminated.
The following information is stored in the header:
- the 16-bit checksum;
- the user requested size for that chunk, which is necessary for reallocation
purposes;
- the unused bytes amount for that chunk, which is necessary for computing the
size of the chunk;
- the state of the chunk (available, allocated or quarantined);
- the allocation type (malloc, new, new[] or memalign), to detect potential
mismatches in the allocation APIs used;
- whether or not the chunk is offseted (ie: if the chunk beginning is different
than the backend allocation beginning, which is most often the case with some
aligned allocations);
- the associated offset;
- a 16-bit salt.
- the offset of the chunk, which is the distance in bytes from the beginning of
the returned chunk to the beginning of the backend allocation;
- a 8-bit salt.
On x64, which is currently the only architecture supported, the header fits
within 16-bytes, which works nicely with the minimum alignment requirements.
This header fits within 8 bytes, on all platforms supported.
The checksum is computed as a CRC32 (requiring the SSE 4.2 instruction set)
of the global secret, the chunk pointer itself, and the 16 bytes of header with
The checksum is computed as a CRC32 (made faster with hardware support)
of the global secret, the chunk pointer itself, and the 8 bytes of header with
the checksum field zeroed out.
The header is atomically loaded and stored to prevent races (this requires
platform support such as the cmpxchg16b instruction). This is important as two
consecutive chunks could belong to different threads. We also want to avoid
any type of double fetches of information located in the header, and use local
copies of the header for this purpose.
The header is atomically loaded and stored to prevent races. This is important
as two consecutive chunks could belong to different threads. We also want to
avoid any type of double fetches of information located in the header, and use
local copies of the header for this purpose.
Delayed Freelist
-----------------
@@ -94,9 +97,9 @@ You may also build Scudo like this:
.. code::
cd $LLVM/projects/compiler-rt/lib
clang++ -fPIC -std=c++11 -msse4.2 -mcx16 -O2 -I. scudo/*.cpp \
clang++ -fPIC -std=c++11 -msse4.2 -O2 -I. scudo/*.cpp \
$(\ls sanitizer_common/*.{cc,S} | grep -v "sanitizer_termination\|sanitizer_common_nolibc") \
-shared -o scudo-allocator.so -lpthread
-shared -o scudo-allocator.so -pthread
and then use it with existing binaries as follows:
@@ -136,29 +139,29 @@ Or using the function:
The following options are available:
+-----------------------------+---------+------------------------------------------------+
| Option | Default | Description |
+-----------------------------+---------+------------------------------------------------+
| QuarantineSizeMb | 64 | The size (in Mb) of quarantine used to delay |
| | | the actual deallocation of chunks. Lower value |
| | | may reduce memory usage but decrease the |
| | | effectiveness of the mitigation; a negative |
| | | value will fallback to a default of 64Mb. |
+-----------------------------+---------+------------------------------------------------+
| ThreadLocalQuarantineSizeKb | 1024 | The size (in Kb) of per-thread cache use to |
| | | offload the global quarantine. Lower value may |
| | | reduce memory usage but might increase |
| | | contention on the global quarantine. |
+-----------------------------+---------+------------------------------------------------+
| DeallocationTypeMismatch | true | Whether or not we report errors on |
| | | malloc/delete, new/free, new/delete[], etc. |
+-----------------------------+---------+------------------------------------------------+
| DeleteSizeMismatch | true | Whether or not we report errors on mismatch |
| | | between sizes of new and delete. |
+-----------------------------+---------+------------------------------------------------+
| ZeroContents | false | Whether or not we zero chunk contents on |
| | | allocation and deallocation. |
+-----------------------------+---------+------------------------------------------------+
+-----------------------------+----------------+----------------+------------------------------------------------+
| Option | 64-bit default | 32-bit default | Description |
+-----------------------------+----------------+----------------+------------------------------------------------+
| QuarantineSizeMb | 64 | 16 | The size (in Mb) of quarantine used to delay |
| | | | the actual deallocation of chunks. Lower value |
| | | | may reduce memory usage but decrease the |
| | | | effectiveness of the mitigation; a negative |
| | | | value will fallback to a default of 64Mb. |
+-----------------------------+----------------+----------------+------------------------------------------------+
| ThreadLocalQuarantineSizeKb | 1024 | 256 | The size (in Kb) of per-thread cache use to |
| | | | offload the global quarantine. Lower value may |
| | | | reduce memory usage but might increase |
| | | | contention on the global quarantine. |
+-----------------------------+----------------+----------------+------------------------------------------------+
| DeallocationTypeMismatch | true | true | Whether or not we report errors on |
| | | | malloc/delete, new/free, new/delete[], etc. |
+-----------------------------+----------------+----------------+------------------------------------------------+
| DeleteSizeMismatch | true | true | Whether or not we report errors on mismatch |
| | | | between sizes of new and delete. |
+-----------------------------+----------------+----------------+------------------------------------------------+
| ZeroContents | false | false | Whether or not we zero chunk contents on |
| | | | allocation and deallocation. |
+-----------------------------+----------------+----------------+------------------------------------------------+
Allocator related common Sanitizer options can also be passed through Scudo
options, such as ``allocator_may_return_null``. A detailed list including those

View File

@@ -831,7 +831,7 @@ Bugs and Enhancements
Currently known bugs and enhancements under consideration can be
tracked by performing a `bugzilla search
<http://llvm.org/bugs/buglist.cgi?cmdtype=runnamed&namedcmd=Statepoint%20Bugs&list_id=64342>`_
<https://bugs.llvm.org/buglist.cgi?cmdtype=runnamed&namedcmd=Statepoint%20Bugs&list_id=64342>`_
for [Statepoint] in the summary field. When filing new bugs, please
use this tag so that interested parties see the newly filed bug. As
with most LLVM features, design discussions take place on `llvm-dev

View File

@@ -228,6 +228,12 @@ CTags
format. A helper script, utils/TableGen/tdtags, provides an easier-to-use
interface; run 'tdtags -H' for documentation.
X86EVEX2VEX
-----------
**Purpose**: This X86 specific tablegen backend emits tables that map EVEX
encoded instructions to their VEX encoded identical instruction.
Clang BackEnds
==============

View File

@@ -313,7 +313,7 @@ default outputs a ``ModuleID``:
ret i32 0
}
``ModuleID`` can unexpetedly match against ``CHECK`` lines. For example:
``ModuleID`` can unexpectedly match against ``CHECK`` lines. For example:
.. code-block:: llvm
@@ -387,23 +387,49 @@ depends on special features of sub-architectures, you must add the specific
triple, test with the specific FileCheck and put it into the specific
directory that will filter out all other architectures.
REQUIRES and REQUIRES-ANY directive
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Some tests can be enabled only in specific situation - like having
debug build. Use ``REQUIRES`` directive to specify those requirements.
Constraining test execution
---------------------------
Some tests can be run only in specific configurations, such as
with debug builds or on particular platforms. Use ``REQUIRES``
and ``UNSUPPORTED`` to control when the test is enabled.
Some tests are expected to fail. For example, there may be a known bug
that the test detect. Use ``XFAIL`` to mark a test as an expected failure.
An ``XFAIL`` test will be successful if its execution fails, and
will be a failure if its execution succeeds.
.. code-block:: llvm
; This test will be only enabled in the build with asserts
; This test will be only enabled in the build with asserts.
; REQUIRES: asserts
; This test is disabled on Linux.
; UNSUPPORTED: -linux-
; This test is expected to fail on PowerPC.
; XFAIL: powerpc
You can separate requirements by a comma.
``REQUIRES`` means all listed requirements must be satisfied.
``REQUIRES-ANY`` means at least one must be satisfied.
``REQUIRES`` and ``UNSUPPORTED`` and ``XFAIL`` all accept a comma-separated
list of boolean expressions. The values in each expression may be:
- Features added to ``config.available_features`` by
configuration files such as ``lit.cfg``.
- Substrings of the target triple (``UNSUPPORTED`` and ``XFAIL`` only).
| ``REQUIRES`` enables the test if all expressions are true.
| ``UNSUPPORTED`` disables the test if any expression is true.
| ``XFAIL`` expects the test to fail if any expression is true.
As a special case, ``XFAIL: *`` is expected to fail everywhere.
.. code-block:: llvm
; This test is disabled on Windows,
; and is disabled on Linux, except for Android Linux.
; UNSUPPORTED: windows, linux && !android
; This test is expected to fail on both PowerPC and ARM.
; XFAIL: powerpc || arm
List of features that can be used in ``REQUIRES`` and ``REQUIRES-ANY`` can be
found in lit.cfg files.
Substitutions
-------------
@@ -442,6 +468,25 @@ RUN lines:
Expands to the path separator, i.e. ``:`` (or ``;`` on Windows).
``%/s, %/S, %/t, %/T:``
Act like the corresponding substitution above but replace any ``\``
character with a ``/``. This is useful to normalize path separators.
Example: ``%s: C:\Desktop Files/foo_test.s.tmp``
Example: ``%/s: C:/Desktop Files/foo_test.s.tmp``
``%:s, %:S, %:t, %:T:``
Act like the corresponding substitution above but remove colons at
the beginning of Windows paths. This is useful to allow concatenation
of absolute paths on Windows to produce a legal path.
Example: ``%s: C:\Desktop Files\foo_test.s.tmp``
Example: ``%:s: C\Desktop Files\foo_test.s.tmp``
**LLVM-specific substitutions:**
@@ -520,24 +565,6 @@ their name. For example:
This program runs its arguments and then inverts the result code from it.
Zero result codes become 1. Non-zero result codes become 0.
Sometimes it is necessary to mark a test case as "expected fail" or
XFAIL. You can easily mark a test as XFAIL just by including ``XFAIL:``
on a line near the top of the file. This signals that the test case
should succeed if the test fails. Such test cases are counted separately
by the testing tool. To specify an expected fail, use the XFAIL keyword
in the comments of the test program followed by a colon and one or more
failure patterns. Each failure pattern can be either ``*`` (to specify
fail everywhere), or a part of a target triple (indicating the test
should fail on that platform), or the name of a configurable feature
(for example, ``loadable_module``). If there is a match, the test is
expected to fail. If not, the test is expected to succeed. To XFAIL
everywhere just specify ``XFAIL: *``. Here is an example of an ``XFAIL``
line:
.. code-block:: llvm
; XFAIL: darwin,sun
To make the output more useful, :program:`lit` will scan
the lines of the test case for ones that contain a pattern that matches
``PR[0-9]+``. This is the syntax for specifying a PR (Problem Report) number

View File

@@ -593,12 +593,12 @@ the order in the definition of ``IntRegs`` in the target description file.
FPRegsClass FPRegsRegClass;
IntRegsClass IntRegsRegClass;
...
// IntRegs Sub-register Classess...
// IntRegs Sub-register Classes...
static const TargetRegisterClass* const IntRegsSubRegClasses [] = {
NULL
};
...
// IntRegs Super-register Classess...
// IntRegs Super-register Classes..
static const TargetRegisterClass* const IntRegsSuperRegClasses [] = {
NULL
};

View File

@@ -28,8 +28,9 @@ XRay consists of three main parts:
- A runtime library for enabling/disabling tracing at runtime.
- A suite of tools for analysing the traces.
**NOTE:** As of the time of this writing, XRay is only available for x86_64
and arm7 32-bit (no-thumb) Linux.
**NOTE:** As of February 27, 2017 , XRay is only available for the following
architectures running Linux: x86_64, arm7 (no thumb), aarch64, powerpc64le,
mips, mipsel, mips64, mips64el.
The compiler-inserted instrumentation points come in the form of nop-sleds in
the final generated binary, and an ELF section named ``xray_instr_map`` which
@@ -84,7 +85,10 @@ GCC-style attributes or C++11-style attributes.
When linking a binary, you can either manually link in the `XRay Runtime
Library`_ or use ``clang`` to link it in automatically with the
``-fxray-instrument`` flag.
``-fxray-instrument`` flag. Alternatively, you can statically link-in the XRay
runtime library from compiler-rt -- those archive files will take the name of
`libclang_rt.xray-{arch}` where `{arch}` is the mnemonic supported by clang
(x86_64, arm7, etc.).
LLVM Function Attribute
-----------------------
@@ -135,7 +139,7 @@ variable, where we list down the options and their defaults below.
+-------------------+-----------------+---------------+------------------------+
| Option | Type | Default | Description |
+===================+=================+===============+========================+
| patch_premain | ``bool`` | ``true`` | Whether to patch |
| patch_premain | ``bool`` | ``false`` | Whether to patch |
| | | | instrumentation points |
| | | | before main. |
+-------------------+-----------------+---------------+------------------------+
@@ -146,6 +150,11 @@ variable, where we list down the options and their defaults below.
| xray_logfile_base | ``const char*`` | ``xray-log.`` | Filename base for the |
| | | | XRay logfile. |
+-------------------+-----------------+---------------+------------------------+
| xray_fdr_log | ``bool`` | ``false`` | Wheter to install the |
| | | | Flight Data Recorder |
| | | | (FDR) mode. |
+-------------------+-----------------+---------------+------------------------+
If you choose to not use the default logging implementation that comes with the
XRay runtime and/or control when/how the XRay instrumentation runs, you may use
@@ -175,6 +184,64 @@ thread-safety of operations to be performed by the XRay runtime library:
XRay cannot guarantee that all threads that have ever gotten a copy of the
pointer will not invoke the function.
Flight Data Recorder Mode
-------------------------
XRay supports a logging mode which allows the application to only capture a
fixed amount of memory's worth of events. Flight Data Recorder (FDR) mode works
very much like a plane's "black box" which keeps recording data to memory in a
fixed-size circular queue of buffers, and have the data available
programmatically until the buffers are finalized and flushed. To use FDR mode
on your application, you may set the ``xray_fdr_log`` option to ``true`` in the
``XRAY_OPTIONS`` environment variable (while also optionally setting the
``xray_naive_log`` to ``false``).
When FDR mode is on, it will keep writing and recycling memory buffers until
the logging implementation is finalized -- at which point it can be flushed and
re-initialised later. To do this programmatically, we follow the workflow
provided below:
.. code-block:: c++
// Patch the sleds, if we haven't yet.
auto patch_status = __xray_patch();
// Maybe handle the patch_status errors.
// When we want to flush the log, we need to finalize it first, to give
// threads a chance to return buffers to the queue.
auto finalize_status = __xray_log_finalize();
if (finalize_status != XRAY_LOG_FINALIZED) {
// maybe retry, or bail out.
}
// At this point, we are sure that the log is finalized, so we may try
// flushing the log.
auto flush_status = __xray_log_flushLog();
if (flush_status != XRAY_LOG_FLUSHED) {
// maybe retry, or bail out.
}
The default settings for the FDR mode implementation will create logs named
similarly to the naive log implementation, but will have a different log
format. All the trace analysis tools (and the trace reading library) will
support all versions of the FDR mode format as we add more functionality and
record types in the future.
**NOTE:** We do not however promise perpetual support for when we update the
log versions we support going forward. Deprecation of the formats will be
announced and discussed on the developers mailing list.
XRay allows for replacing the default FDR mode logging implementation using the
following API:
- ``__xray_set_log_impl(...)``: This function takes a struct of type
``XRayLogImpl``, which is defined in ``xray/xray_log_interface.h``, part of
the XRay compiler-rt installation.
- ``__xray_log_init(...)``: This function allows for initializing and
re-initializing an installed logging implementation. See
``xray/xray_log_interface.h`` for details, part of the XRay compiler-rt
installation.
Trace Analysis Tools
--------------------
@@ -185,7 +252,26 @@ supports the following subcommands:
- ``extract``: Extract the instrumentation map from a binary, and return it as
YAML.
- ``account``: Performs basic function call accounting statistics with various
options for sorting, and output formats (supports CSV, YAML, and
console-friendly TEXT).
- ``convert``: Converts an XRay log file from one format to another. Currently
only converts to YAML.
- ``graph``: Generates a DOT graph of the function call relationships between
functions found in an XRay trace.
These subcommands use various library components found as part of the XRay
libraries, distributed with the LLVM distribution. These are:
- ``llvm/XRay/Trace.h`` : A trace reading library for conveniently loading
an XRay trace of supported forms, into a convenient in-memory representation.
All the analysis tools that deal with traces use this implementation.
- ``llvm/XRay/Graph.h`` : A semi-generic graph type used by the graph
subcommand to conveniently represent a function call graph with statistics
associated with edges and vertices.
- ``llvm/XRay/InstrumentationMap.h``: A convenient tool for analyzing the
instrumentation map in XRay-instrumented object files and binaries. The
``extract`` subcommand uses this particular library.
Future Work
===========
@@ -193,38 +279,19 @@ Future Work
There are a number of ongoing efforts for expanding the toolset building around
the XRay instrumentation system.
Flight Data Recorder Mode
-------------------------
The `XRay whitepaper`_ mentions a mode for when events are kept in memory, and
have the traces be dumped on demand through a triggering API. This work is
currently ongoing.
Trace Analysis
--------------
There are a few more subcommands making its way to the ``llvm-xray`` tool, that
are currently under review:
- ``convert``: Turns an XRay trace from one format to another. Currently
supporting conversion from the binary XRay log to YAML.
- ``account``: Do function call accounting based on data in the XRay log.
We have more subcommands and modes that we're thinking of developing, in the
following forms:
- ``stack``: Reconstruct the function call stacks in a timeline.
- ``convert``: Converting from one version of the XRay log to another (higher)
version, and converting to other trace formats (i.e. Chrome Trace Viewer,
pprof, etc.).
- ``graph``: Generate a function call graph with relative timings and distributions.
More Platforms
--------------
Since XRay is only currently available in x86_64 and arm7 32-bit (no-thumb)
running Linux, we're looking to supporting more platforms (architectures and
operating systems).
We're looking forward to contributions to port XRay to more architectures and
operating systems.
.. References...

View File

@@ -731,7 +731,7 @@ it is parsed. This allows dynamic types of nodes. But the YAML I/O model uses
static typing, so there are limits to how you can use tags with the YAML I/O
model. Recently, we added support to YAML I/O for checking/setting the optional
tag on a map. Using this functionality it is even possbile to support different
mappings, as long as they are convertable.
mappings, as long as they are convertible.
To check a tag, inside your mapping() method you can use io.mapTag() to specify
what the tag should be. This will also add that tag when writing yaml.

View File

@@ -47,10 +47,10 @@ copyright = u'2003-%d, LLVM Project' % date.today().year
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '4.0'
# The short version.
version = '5'
# The full version, including alpha/beta/rc tags.
release = '4.0'
release = '5'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
@@ -251,3 +251,7 @@ for name in os.listdir(command_guide_path):
# FIXME: Define intersphinx configuration.
intersphinx_mapping = {}
# Pygment lexer are sometimes out of date (when parsing LLVM for example) or
# wrong. Suppress the warning so the build doesn't abort.
suppress_warnings = [ 'misc.highlighting_failure' ]

View File

@@ -1885,7 +1885,7 @@ ENABLE_PREPROCESSING = YES
# The default value is: NO.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
MACRO_EXPANSION = NO
MACRO_EXPANSION = YES
# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
# the macro expansion is limited to the macros specified with the PREDEFINED and
@@ -1893,7 +1893,7 @@ MACRO_EXPANSION = NO
# The default value is: NO.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
EXPAND_ONLY_PREDEF = NO
EXPAND_ONLY_PREDEF = YES
# If the SEARCH_INCLUDES tag is set to YES the includes files in the
# INCLUDE_PATH will be searched if a #include is found.
@@ -1925,7 +1925,7 @@ INCLUDE_FILE_PATTERNS =
# recursively expanded use the := operator instead of the = operator.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
PREDEFINED =
PREDEFINED = LLVM_ALIGNAS(x)=
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
# tag can be used to specify a list of macro names that should be expanded. The

View File

@@ -125,14 +125,12 @@ usual include guards and #includes [2]_, we get to the definition of our class:
class KaleidoscopeJIT {
private:
std::unique_ptr<TargetMachine> TM;
const DataLayout DL;
ObjectLinkingLayer<> ObjectLayer;
IRCompileLayer<decltype(ObjectLayer)> CompileLayer;
public:
typedef decltype(CompileLayer)::ModuleSetHandleT ModuleHandleT;
Our class begins with four members: A TargetMachine, TM, which will be used
@@ -152,16 +150,16 @@ compiling it, and passing the resulting in-memory object files down to the
object linking layer below.
That's it for member variables, after that we have a single typedef:
ModuleHandle. This is the handle type that will be returned from our JIT's
ModuleHandleT. This is the handle type that will be returned from our JIT's
addModule method, and can be passed to the removeModule method to remove a
module. The IRCompileLayer class already provides a convenient handle type
(IRCompileLayer::ModuleSetHandleT), so we just alias our ModuleHandle to this.
(IRCompileLayer::ModuleSetHandleT), so we just alias our ModuleHandleT to this.
.. code-block:: c++
KaleidoscopeJIT()
: TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()),
CompileLayer(ObjectLayer, SimpleCompiler(*TM)) {
CompileLayer(ObjectLayer, SimpleCompiler(*TM)) {
llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr);
}
@@ -200,7 +198,7 @@ available for execution.
return JITSymbol(nullptr);
});
// Build a singlton module set to hold our module.
// Build a singleton module set to hold our module.
std::vector<std::unique_ptr<Module>> Ms;
Ms.push_back(std::move(M));
@@ -259,16 +257,16 @@ were linked into a single, ever-growing logical dylib. To implement this our
first lambda (the one defining findSymbolInLogicalDylib) will just search for
JIT'd code by calling the CompileLayer's findSymbol method. If we don't find a
symbol in the JIT itself we'll fall back to our second lambda, which implements
findSymbol. This will use the RTDyldMemoyrManager::getSymbolAddressInProcess
findSymbol. This will use the RTDyldMemoryManager::getSymbolAddressInProcess
method to search for the symbol within the program itself. If we can't find a
symbol definition via either of these paths the JIT will refuse to accept our
symbol definition via either of these paths, the JIT will refuse to accept our
module, returning a "symbol not found" error.
Now that we've built our symbol resolver we're ready to add our module to the
Now that we've built our symbol resolver, we're ready to add our module to the
JIT. We do this by calling the CompileLayer's addModuleSet method [4]_. Since
we only have a single Module and addModuleSet expects a collection, we will
create a vector of modules and add our module as the only member. Since we
have already typedef'd our ModuleHandle type to be the same as the
have already typedef'd our ModuleHandleT type to be the same as the
CompileLayer's handle type, we can return the handle from addModuleSet
directly from our addModule method.
@@ -304,7 +302,7 @@ treated as a duplicate definition when the next top-level expression is
entered. It is generally good to free any module that you know you won't need
to call further, just to free up the resources dedicated to it. However, you
don't strictly need to do this: All resources will be cleaned up when your
JIT class is destructed, if the haven't been freed before then.
JIT class is destructed, if they haven't been freed before then.
This brings us to the end of Chapter 1 of Building a JIT. You now have a basic
but fully functioning JIT stack that you can use to take LLVM IR and make it

View File

@@ -119,6 +119,8 @@ way to talk about functions themselves:
public:
PrototypeAST(const std::string &name, std::vector<std::string> Args)
: Name(name), Args(std::move(Args)) {}
const std::string &getName() const { return Name; }
};
/// FunctionAST - This class represents a function definition itself.

View File

@@ -122,7 +122,7 @@ First we'll do numeric literals:
.. code-block:: c++
Value *NumberExprAST::codegen() {
return ConstantFP::get(LLVMContext, APFloat(Val));
return ConstantFP::get(TheContext, APFloat(Val));
}
In the LLVM IR, numeric constants are represented with the
@@ -171,7 +171,7 @@ variables <LangImpl7.html#user-defined-local-variables>`_.
case '<':
L = Builder.CreateFCmpULT(L, R, "cmptmp");
// Convert bool 0/1 to double 0.0 or 1.0
return Builder.CreateUIToFP(L, Type::getDoubleTy(LLVMContext),
return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext),
"booltmp");
default:
return LogErrorV("invalid binary operator");
@@ -270,9 +270,9 @@ with:
Function *PrototypeAST::codegen() {
// Make the function type: double(double,double) etc.
std::vector<Type*> Doubles(Args.size(),
Type::getDoubleTy(LLVMContext));
Type::getDoubleTy(TheContext));
FunctionType *FT =
FunctionType::get(Type::getDoubleTy(LLVMContext), Doubles, false);
FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false);
Function *F =
Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
@@ -346,7 +346,7 @@ assert that the function is empty (i.e. has no body yet) before we start.
.. code-block:: c++
// Create a new basic block to start insertion into.
BasicBlock *BB = BasicBlock::Create(LLVMContext, "entry", TheFunction);
BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction);
Builder.SetInsertPoint(BB);
// Record the function arguments in the NamedValues map.
@@ -533,7 +533,8 @@ This shows an extern for the libm "cos" function, and a call to it.
ret double %calltmp
}
When you quit the current demo, it dumps out the IR for the entire
When you quit the current demo (by sending an EOF via CTRL+D on Linux
or CTRL+Z and ENTER on Windows), it dumps out the IR for the entire
module generated. Here you can see the big picture with all the
functions referencing each other.

View File

@@ -131,33 +131,29 @@ for us:
void InitializeModuleAndPassManager(void) {
// Open a new module.
Context LLVMContext;
TheModule = llvm::make_unique<Module>("my cool jit", LLVMContext);
TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
TheModule = llvm::make_unique<Module>("my cool jit", TheContext);
// Create a new pass manager attached to it.
TheFPM = llvm::make_unique<FunctionPassManager>(TheModule.get());
// Provide basic AliasAnalysis support for GVN.
TheFPM.add(createBasicAliasAnalysisPass());
// Do simple "peephole" optimizations and bit-twiddling optzns.
TheFPM.add(createInstructionCombiningPass());
TheFPM->add(createInstructionCombiningPass());
// Reassociate expressions.
TheFPM.add(createReassociatePass());
TheFPM->add(createReassociatePass());
// Eliminate Common SubExpressions.
TheFPM.add(createGVNPass());
TheFPM->add(createGVNPass());
// Simplify the control flow graph (deleting unreachable blocks, etc).
TheFPM.add(createCFGSimplificationPass());
TheFPM->add(createCFGSimplificationPass());
TheFPM.doInitialization();
TheFPM->doInitialization();
}
This code initializes the global module ``TheModule``, and the function pass
manager ``TheFPM``, which is attached to ``TheModule``. Once the pass manager is
set up, we use a series of "add" calls to add a bunch of LLVM passes.
In this case, we choose to add five passes: one analysis pass (alias analysis),
and four optimization passes. The passes we choose here are a pretty standard set
In this case, we choose to add four optimization passes.
The passes we choose here are a pretty standard set
of "cleanup" optimizations that are useful for a wide variety of code. I won't
delve into what they do but, believe me, they are a good starting place :).
@@ -227,8 +223,10 @@ expressions they type in. For example, if they type in "1 + 2;", we
should evaluate and print out 3. If they define a function, they should
be able to call it from the command line.
In order to do this, we first declare and initialize the JIT. This is
done by adding a global variable ``TheJIT``, and initializing it in
In order to do this, we first prepare the environment to create code for
the current native target and declare and initialize the JIT. This is
done by calling some ``InitializeNativeTarget\*`` functions and
adding a global variable ``TheJIT``, and initializing it in
``main``:
.. code-block:: c++
@@ -236,7 +234,21 @@ done by adding a global variable ``TheJIT``, and initializing it in
static std::unique_ptr<KaleidoscopeJIT> TheJIT;
...
int main() {
..
InitializeNativeTarget();
InitializeNativeTargetAsmPrinter();
InitializeNativeTargetAsmParser();
// Install standard binary operators.
// 1 is lowest precedence.
BinopPrecedence['<'] = 10;
BinopPrecedence['+'] = 20;
BinopPrecedence['-'] = 20;
BinopPrecedence['*'] = 40; // highest.
// Prime the first token.
fprintf(stderr, "ready> ");
getNextToken();
TheJIT = llvm::make_unique<KaleidoscopeJIT>();
// Run the main "interpreter loop" now.
@@ -245,9 +257,24 @@ done by adding a global variable ``TheJIT``, and initializing it in
return 0;
}
We also need to setup the data layout for the JIT:
.. code-block:: c++
void InitializeModuleAndPassManager(void) {
// Open a new module.
TheModule = llvm::make_unique<Module>("my cool jit", TheContext);
TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
// Create a new pass manager attached to it.
TheFPM = llvm::make_unique<FunctionPassManager>(TheModule.get());
...
The KaleidoscopeJIT class is a simple JIT built specifically for these
tutorials. In later chapters we will look at how it works and extend it with
new features, but for now we will take it as given. Its API is very simple::
tutorials, available inside the LLVM source code
at llvm-src/examples/Kaleidoscope/include/KaleidoscopeJIT.h.
In later chapters we will look at how it works and extend it with
new features, but for now we will take it as given. Its API is very simple:
``addModule`` adds an LLVM IR module to the JIT, making its functions
available for execution; ``removeModule`` removes a module, freeing any
memory associated with the code in that module; and ``findSymbol`` allows us
@@ -458,7 +485,8 @@ We also need to update HandleDefinition and HandleExtern:
if (auto FnAST = ParseDefinition()) {
if (auto *FnIR = FnAST->codegen()) {
fprintf(stderr, "Read function definition:");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
TheJIT->addModule(std::move(TheModule));
InitializeModuleAndPassManager();
}
@@ -472,7 +500,8 @@ We also need to update HandleDefinition and HandleExtern:
if (auto ProtoAST = ParseExtern()) {
if (auto *FnIR = ProtoAST->codegen()) {
fprintf(stderr, "Read extern: ");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
}
} else {
@@ -552,7 +581,10 @@ most recent to the oldest, to find the newest definition. If no definition is
found inside the JIT, it falls back to calling "``dlsym("sin")``" on the
Kaleidoscope process itself. Since "``sin``" is defined within the JIT's
address space, it simply patches up calls in the module to call the libm
version of ``sin`` directly.
version of ``sin`` directly. But in some cases this even goes further:
as sin and cos are names of standard math functions, the constant folder
will directly evaluate the function calls to the correct result when called
with constants like in the "``sin(1.0)``" above.
In the future we'll see how tweaking this symbol resolution rule can be used to
enable all sorts of useful features, from security (restricting the set of
@@ -565,12 +597,21 @@ if we add:
.. code-block:: c++
#ifdef LLVM_ON_WIN32
#define DLLEXPORT __declspec(dllexport)
#else
#define DLLEXPORT
#endif
/// putchard - putchar that takes a double and returns 0.
extern "C" double putchard(double X) {
extern "C" DLLEXPORT double putchard(double X) {
fputc((char)X, stderr);
return 0;
}
Note, that for Windows we need to actually export the functions because
the dynamic symbol loader will use GetProcAddress to find the symbols.
Now we can produce simple output to the console by using things like:
"``extern putchard(x); putchard(120);``", which prints a lowercase 'x'
on the console (120 is the ASCII code for 'x'). Similar code could be

View File

@@ -103,7 +103,8 @@ To represent the new expression we add a new AST node for it:
IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
std::unique_ptr<ExprAST> Else)
: Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
virtual Value *codegen();
Value *codegen() override;
};
The AST node just has pointers to the various subexpressions.
@@ -290,9 +291,9 @@ for ``IfExprAST``:
if (!CondV)
return nullptr;
// Convert condition to a bool by comparing equal to 0.0.
// Convert condition to a bool by comparing non-equal to 0.0.
CondV = Builder.CreateFCmpONE(
CondV, ConstantFP::get(LLVMContext, APFloat(0.0)), "ifcond");
CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond");
This code is straightforward and similar to what we saw before. We emit
the expression for the condition, then compare that value to zero to get
@@ -305,9 +306,9 @@ a truth value as a 1-bit (bool) value.
// Create blocks for the then and else cases. Insert the 'then' block at the
// end of the function.
BasicBlock *ThenBB =
BasicBlock::Create(LLVMContext, "then", TheFunction);
BasicBlock *ElseBB = BasicBlock::Create(LLVMContext, "else");
BasicBlock *MergeBB = BasicBlock::Create(LLVMContext, "ifcont");
BasicBlock::Create(TheContext, "then", TheFunction);
BasicBlock *ElseBB = BasicBlock::Create(TheContext, "else");
BasicBlock *MergeBB = BasicBlock::Create(TheContext, "ifcont");
Builder.CreateCondBr(CondV, ThenBB, ElseBB);
@@ -400,7 +401,7 @@ code:
TheFunction->getBasicBlockList().push_back(MergeBB);
Builder.SetInsertPoint(MergeBB);
PHINode *PN =
Builder.CreatePHI(Type::getDoubleTy(LLVMContext), 2, "iftmp");
Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, "iftmp");
PN->addIncoming(ThenV, ThenBB);
PN->addIncoming(ElseV, ElseBB);
@@ -433,7 +434,7 @@ something more aggressive, a 'for' expression:
::
extern putchard(char)
extern putchard(char);
def printstar(n)
for i = 1, i < n, 1.0 in
putchard(42); # ascii 42 = '*'
@@ -500,7 +501,8 @@ variable name and the constituent expressions in the node.
std::unique_ptr<ExprAST> Body)
: VarName(VarName), Start(std::move(Start)), End(std::move(End)),
Step(std::move(Step)), Body(std::move(Body)) {}
virtual Value *codegen();
Value *codegen() override;
};
Parser Extensions for the 'for' Loop
@@ -561,6 +563,27 @@ value to null in the AST node:
std::move(Body));
}
And again we hook it up as a primary expression:
.. code-block:: c++
static std::unique_ptr<ExprAST> ParsePrimary() {
switch (CurTok) {
default:
return LogError("unknown token when expecting an expression");
case tok_identifier:
return ParseIdentifierExpr();
case tok_number:
return ParseNumberExpr();
case '(':
return ParseParenExpr();
case tok_if:
return ParseIfExpr();
case tok_for:
return ParseForExpr();
}
}
LLVM IR for the 'for' Loop
--------------------------
@@ -610,7 +633,8 @@ expression for the loop value:
Value *ForExprAST::codegen() {
// Emit the start code first, without 'variable' in scope.
Value *StartVal = Start->codegen();
if (StartVal == 0) return 0;
if (!StartVal)
return nullptr;
With this out of the way, the next step is to set up the LLVM basic
block for the start of the loop body. In the case above, the whole loop
@@ -625,7 +649,7 @@ expression).
Function *TheFunction = Builder.GetInsertBlock()->getParent();
BasicBlock *PreheaderBB = Builder.GetInsertBlock();
BasicBlock *LoopBB =
BasicBlock::Create(LLVMContext, "loop", TheFunction);
BasicBlock::Create(TheContext, "loop", TheFunction);
// Insert an explicit fall through from the current block to the LoopBB.
Builder.CreateBr(LoopBB);
@@ -642,7 +666,7 @@ the two blocks.
Builder.SetInsertPoint(LoopBB);
// Start the PHI node with an entry for Start.
PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(LLVMContext),
PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(TheContext),
2, VarName.c_str());
Variable->addIncoming(StartVal, PreheaderBB);
@@ -693,7 +717,7 @@ table.
return nullptr;
} else {
// If not specified, use 1.0.
StepVal = ConstantFP::get(LLVMContext, APFloat(1.0));
StepVal = ConstantFP::get(TheContext, APFloat(1.0));
}
Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
@@ -710,9 +734,9 @@ iteration of the loop.
if (!EndCond)
return nullptr;
// Convert condition to a bool by comparing equal to 0.0.
// Convert condition to a bool by comparing non-equal to 0.0.
EndCond = Builder.CreateFCmpONE(
EndCond, ConstantFP::get(LLVMContext, APFloat(0.0)), "loopcond");
EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond");
Finally, we evaluate the exit value of the loop, to determine whether
the loop should exit. This mirrors the condition evaluation for the
@@ -723,7 +747,7 @@ if/then/else statement.
// Create the "after loop" block and insert it.
BasicBlock *LoopEndBB = Builder.GetInsertBlock();
BasicBlock *AfterBB =
BasicBlock::Create(LLVMContext, "afterloop", TheFunction);
BasicBlock::Create(TheContext, "afterloop", TheFunction);
// Insert the conditional branch into the end of LoopEndBB.
Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
@@ -751,7 +775,7 @@ insertion position to it.
NamedValues.erase(VarName);
// for expr always returns 0.0.
return Constant::getNullValue(Type::getDoubleTy(LLVMContext));
return Constant::getNullValue(Type::getDoubleTy(TheContext));
}
The final code handles various cleanups: now that we have the "NextVar"
@@ -772,7 +796,7 @@ Full Code Listing
=================
Here is the complete code listing for our running example, enhanced with
the if/then/else and for expressions.. To build this example, use:
the if/then/else and for expressions. To build this example, use:
.. code-block:: bash

View File

@@ -31,7 +31,7 @@ User-defined Operators: the Idea
================================
The "operator overloading" that we will add to Kaleidoscope is more
general than languages like C++. In C++, you are only allowed to
general than in languages like C++. In C++, you are only allowed to
redefine existing operators: you can't programmatically change the
grammar, introduce new operators, change precedence levels, etc. In this
chapter, we will add this capability to Kaleidoscope, which will let the
@@ -41,8 +41,8 @@ The point of going into user-defined operators in a tutorial like this
is to show the power and flexibility of using a hand-written parser.
Thus far, the parser we have been implementing uses recursive descent
for most parts of the grammar and operator precedence parsing for the
expressions. See `Chapter 2 <LangImpl2.html>`_ for details. Without
using operator precedence parsing, it would be very difficult to allow
expressions. See `Chapter 2 <LangImpl2.html>`_ for details. By
using operator precedence parsing, it is very easy to allow
the programmer to introduce new operators into the grammar: the grammar
is dynamically extensible as the JIT runs.
@@ -143,17 +143,18 @@ this:
: Name(name), Args(std::move(Args)), IsOperator(IsOperator),
Precedence(Prec) {}
Function *codegen();
const std::string &getName() const { return Name; }
bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
char getOperatorName() const {
assert(isUnaryOp() || isBinaryOp());
return Name[Name.size()-1];
return Name[Name.size() - 1];
}
unsigned getBinaryPrecedence() const { return Precedence; }
Function *codegen();
};
Basically, in addition to knowing a name for the prototype, we now keep
@@ -194,7 +195,7 @@ user-defined operator, we need to parse it:
// Read the precedence if present.
if (CurTok == tok_number) {
if (NumVal < 1 || NumVal > 100)
return LogErrorP("Invalid precedecnce: must be 1..100");
return LogErrorP("Invalid precedence: must be 1..100");
BinaryPrecedence = (unsigned)NumVal;
getNextToken();
}
@@ -225,7 +226,7 @@ This is all fairly straightforward parsing code, and we have already
seen a lot of similar code in the past. One interesting part about the
code above is the couple lines that set up ``FnName`` for binary
operators. This builds names like "binary@" for a newly defined "@"
operator. This then takes advantage of the fact that symbol names in the
operator. It then takes advantage of the fact that symbol names in the
LLVM symbol table are allowed to have any character in them, including
embedded nul characters.
@@ -251,7 +252,7 @@ default case for our existing binary operator node:
case '<':
L = Builder.CreateFCmpULT(L, R, "cmptmp");
// Convert bool 0/1 to double 0.0 or 1.0
return Builder.CreateUIToFP(L, Type::getDoubleTy(LLVMContext),
return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext),
"booltmp");
default:
break;
@@ -259,7 +260,7 @@ default case for our existing binary operator node:
// If it wasn't a builtin binary operator, it must be a user defined one. Emit
// a call to it.
Function *F = TheModule->getFunction(std::string("binary") + Op);
Function *F = getFunction(std::string("binary") + Op);
assert(F && "binary operator not found!");
Value *Ops[2] = { L, R };
@@ -277,22 +278,21 @@ The final piece of code we are missing, is a bit of top-level magic:
.. code-block:: c++
Function *FunctionAST::codegen() {
NamedValues.clear();
Function *TheFunction = Proto->codegen();
// Transfer ownership of the prototype to the FunctionProtos map, but keep a
// reference to it for use below.
auto &P = *Proto;
FunctionProtos[Proto->getName()] = std::move(Proto);
Function *TheFunction = getFunction(P.getName());
if (!TheFunction)
return nullptr;
// If this is an operator, install it.
if (Proto->isBinaryOp())
BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
if (P.isBinaryOp())
BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
// Create a new basic block to start insertion into.
BasicBlock *BB = BasicBlock::Create(LLVMContext, "entry", TheFunction);
Builder.SetInsertPoint(BB);
if (Value *RetVal = Body->codegen()) {
...
BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction);
...
Basically, before codegening a function, if it is a user-defined
operator, we register it in the precedence table. This allows the binary
@@ -323,7 +323,8 @@ that, we need an AST node:
public:
UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
: Opcode(Opcode), Operand(std::move(Operand)) {}
virtual Value *codegen();
Value *codegen() override;
};
This AST node is very simple and obvious by now. It directly mirrors the
@@ -345,7 +346,7 @@ simple: we'll add a new function to do it:
int Opc = CurTok;
getNextToken();
if (auto Operand = ParseUnary())
return llvm::unique_ptr<UnaryExprAST>(Opc, std::move(Operand));
return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand));
return nullptr;
}
@@ -433,7 +434,7 @@ unary operators. It looks like this:
if (!OperandV)
return nullptr;
Function *F = TheModule->getFunction(std::string("unary")+Opcode);
Function *F = getFunction(std::string("unary") + Opcode);
if (!F)
return LogErrorV("Unknown unary operator");
@@ -461,7 +462,7 @@ newline):
declare double @printd(double)
ready> def binary : 1 (x y) 0; # Low-precedence operator that ignores operands.
..
...
ready> printd(123) : printd(456) : printd(789);
123.000000
456.000000
@@ -518,10 +519,9 @@ denser the character:
::
ready>
extern putchard(char)
def printdensity(d)
ready> extern putchard(char);
...
ready> def printdensity(d)
if d > 8 then
putchard(32) # ' '
else if d > 4 then
@@ -538,9 +538,9 @@ denser the character:
Evaluated to 0.000000
Based on these simple primitive operations, we can start to define more
interesting things. For example, here's a little function that solves
for the number of iterations it takes a function in the complex plane to
converge:
interesting things. For example, here's a little function that determines
the number of iterations it takes for a certain function in the complex
plane to diverge:
::
@@ -742,7 +742,7 @@ Full Code Listing
=================
Here is the complete code listing for our running example, enhanced with
the if/then/else and for expressions.. To build this example, use:
the support for user-defined operators. To build this example, use:
.. code-block:: bash

View File

@@ -327,7 +327,7 @@ to update:
static std::map<std::string, AllocaInst*> NamedValues;
Also, since we will need to create these alloca's, we'll use a helper
Also, since we will need to create these allocas, we'll use a helper
function that ensures that the allocas are created in the entry block of
the function:
@@ -339,7 +339,7 @@ the function:
const std::string &VarName) {
IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
TheFunction->getEntryBlock().begin());
return TmpB.CreateAlloca(Type::getDoubleTy(LLVMContext), 0,
return TmpB.CreateAlloca(Type::getDoubleTy(TheContext), 0,
VarName.c_str());
}
@@ -348,7 +348,7 @@ the first instruction (.begin()) of the entry block. It then creates an
alloca with the expected name and returns it. Because all values in
Kaleidoscope are doubles, there is no need to pass in a type to use.
With this in place, the first functionality change we want to make is to
With this in place, the first functionality change we want to make belongs to
variable references. In our new scheme, variables live on the stack, so
code generating a reference to them actually needs to produce a load
from the stack slot:
@@ -377,7 +377,7 @@ the unabridged code):
// Create an alloca for the variable in the entry block.
AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
// Emit the start code first, without 'variable' in scope.
// Emit the start code first, without 'variable' in scope.
Value *StartVal = Start->codegen();
if (!StartVal)
return nullptr;
@@ -408,21 +408,25 @@ them. The code for this is also pretty simple:
.. code-block:: c++
/// CreateArgumentAllocas - Create an alloca for each argument and register the
/// argument in the symbol table so that references to it will succeed.
void PrototypeAST::CreateArgumentAllocas(Function *F) {
Function::arg_iterator AI = F->arg_begin();
for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
Function *FunctionAST::codegen() {
...
Builder.SetInsertPoint(BB);
// Record the function arguments in the NamedValues map.
NamedValues.clear();
for (auto &Arg : TheFunction->args()) {
// Create an alloca for this variable.
AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
// Store the initial value into the alloca.
Builder.CreateStore(AI, Alloca);
Builder.CreateStore(&Arg, Alloca);
// Add arguments to variable symbol table.
NamedValues[Args[Idx]] = Alloca;
NamedValues[Arg.getName()] = Alloca;
}
}
if (Value *RetVal = Body->codegen()) {
...
For each argument, we make an alloca, store the input value to the
function into the alloca, and register the alloca as the memory location
@@ -434,15 +438,13 @@ get good codegen once again:
.. code-block:: c++
// Set up the optimizer pipeline. Start with registering info about how the
// target lays out data structures.
OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
// Promote allocas to registers.
OurFPM.add(createPromoteMemoryToRegisterPass());
TheFPM->add(createPromoteMemoryToRegisterPass());
// Do simple "peephole" optimizations and bit-twiddling optzns.
OurFPM.add(createInstructionCombiningPass());
TheFPM->add(createInstructionCombiningPass());
// Reassociate expressions.
OurFPM.add(createReassociatePass());
TheFPM->add(createReassociatePass());
...
It is interesting to see what the code looks like before and after the
mem2reg optimization runs. For example, this is the before/after code
@@ -454,7 +456,7 @@ for our recursive fib function. Before the optimization:
entry:
%x1 = alloca double
store double %x, double* %x1
%x2 = load double* %x1
%x2 = load double, double* %x1
%cmptmp = fcmp ult double %x2, 3.000000e+00
%booltmp = uitofp i1 %cmptmp to double
%ifcond = fcmp one double %booltmp, 0.000000e+00
@@ -464,10 +466,10 @@ for our recursive fib function. Before the optimization:
br label %ifcont
else: ; preds = %entry
%x3 = load double* %x1
%x3 = load double, double* %x1
%subtmp = fsub double %x3, 1.000000e+00
%calltmp = call double @fib(double %subtmp)
%x4 = load double* %x1
%x4 = load double, double* %x1
%subtmp5 = fsub double %x4, 2.000000e+00
%calltmp6 = call double @fib(double %subtmp5)
%addtmp = fadd double %calltmp, %calltmp6
@@ -677,10 +679,10 @@ var/in, it looks like this:
public:
VarExprAST(std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames,
std::unique_ptr<ExprAST> body)
: VarNames(std::move(VarNames)), Body(std::move(Body)) {}
std::unique_ptr<ExprAST> Body)
: VarNames(std::move(VarNames)), Body(std::move(Body)) {}
virtual Value *codegen();
Value *codegen() override;
};
var/in allows a list of names to be defined all at once, and each name
@@ -812,7 +814,7 @@ previous value that we replace in OldBindings.
if (!InitVal)
return nullptr;
} else { // If not specified, use 0.0.
InitVal = ConstantFP::get(LLVMContext, APFloat(0.0));
InitVal = ConstantFP::get(TheContext, APFloat(0.0));
}
AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);

View File

@@ -18,7 +18,7 @@ Source level debugging uses formatted data that helps a debugger
translate from binary and the state of the machine back to the
source that the programmer wrote. In LLVM we generally use a format
called `DWARF <http://dwarfstd.org>`_. DWARF is a compact encoding
that represents types, source locations, and variable locations.
that represents types, source locations, and variable locations.
The short summary of this chapter is that we'll go through the
various things you have to add to a programming language to
@@ -94,14 +94,14 @@ Then we're going to remove the command line code wherever it exists:
return;
@@ -1184,7 +1183,6 @@ int main() {
BinopPrecedence['*'] = 40; // highest.
// Prime the first token.
- fprintf(stderr, "ready> ");
getNextToken();
Lastly we're going to disable all of the optimization passes and the JIT so
that the only thing that happens after we're done parsing and generating
code is that the llvm IR goes to standard error:
code is that the LLVM IR goes to standard error:
.. code-block:: udiff
@@ -140,7 +140,7 @@ code is that the llvm IR goes to standard error:
-
+ #endif
OurFPM.doInitialization();
// Set the global so the code gen can use this.
This relatively small set of changes get us to the point that we can compile
@@ -166,8 +166,8 @@ DWARF Emission Setup
Similar to the ``IRBuilder`` class we have a
`DIBuilder <http://llvm.org/doxygen/classllvm_1_1DIBuilder.html>`_ class
that helps in constructing debug metadata for an llvm IR file. It
corresponds 1:1 similarly to ``IRBuilder`` and llvm IR, but with nicer names.
that helps in constructing debug metadata for an LLVM IR file. It
corresponds 1:1 similarly to ``IRBuilder`` and LLVM IR, but with nicer names.
Using it does require that you be more familiar with DWARF terminology than
you needed to be with ``IRBuilder`` and ``Instruction`` names, but if you
read through the general documentation on the
@@ -194,7 +194,7 @@ expressions:
} KSDbgInfo;
DIType *DebugInfo::getDoubleTy() {
if (DblTy.isValid())
if (DblTy)
return DblTy;
DblTy = DBuilder->createBasicType("double", 64, 64, dwarf::DW_ATE_float);
@@ -214,7 +214,7 @@ There are a couple of things to note here. First, while we're producing a
compile unit for a language called Kaleidoscope we used the language
constant for C. This is because a debugger wouldn't necessarily understand
the calling conventions or default ABI for a language it doesn't recognize
and we follow the C ABI in our llvm code generation so it's the closest
and we follow the C ABI in our LLVM code generation so it's the closest
thing to accurate. This ensures we can actually call functions from the
debugger and have them execute. Secondly, you'll see the "fib.ks" in the
call to ``createCompileUnit``. This is a default hard coded value since
@@ -259,10 +259,11 @@ information) and construct our function definition:
unsigned LineNo = 0;
unsigned ScopeLine = 0;
DISubprogram *SP = DBuilder->createFunction(
FContext, Name, StringRef(), Unit, LineNo,
CreateFunctionType(Args.size(), Unit), false /* internal linkage */,
true /* definition */, ScopeLine, DINode::FlagPrototyped, false);
F->setSubprogram(SP);
FContext, P.getName(), StringRef(), Unit, LineNo,
CreateFunctionType(TheFunction->arg_size(), Unit),
false /* internal linkage */, true /* definition */, ScopeLine,
DINode::FlagPrototyped, false);
TheFunction->setSubprogram(SP);
and we now have an DISubprogram that contains a reference to all of our
metadata for the function.
@@ -326,10 +327,9 @@ that we pass down through when we create a new expression:
giving us locations for each of our expressions and variables.
From this we can make sure to tell ``DIBuilder`` when we're at a new source
location so it can use that when we generate the rest of our code and make
sure that each instruction has source location information. We do this
by constructing another small function:
To make sure that every instruction gets proper source location information,
we have to tell ``Builder`` whenever we're at a new source location.
We use a small helper function for this:
.. code-block:: c++
@@ -343,40 +343,23 @@ by constructing another small function:
DebugLoc::get(AST->getLine(), AST->getCol(), Scope));
}
that both tells the main ``IRBuilder`` where we are, but also what scope
we're in. Since we've just created a function above we can either be in
the main file scope (like when we created our function), or now we can be
in the function scope we just created. To represent this we create a stack
of scopes:
This both tells the main ``IRBuilder`` where we are, but also what scope
we're in. The scope can either be on compile-unit level or be the nearest
enclosing lexical block like the current function.
To represent this we create a stack of scopes:
.. code-block:: c++
std::vector<DIScope *> LexicalBlocks;
std::map<const PrototypeAST *, DIScope *> FnScopeMap;
and keep a map of each function to the scope that it represents (an
DISubprogram is also an DIScope).
Then we make sure to:
and push the scope (function) to the top of the stack when we start
generating the code for each function:
.. code-block:: c++
KSDbgInfo.emitLocation(this);
KSDbgInfo.LexicalBlocks.push_back(SP);
emit the location every time we start to generate code for a new AST, and
also:
.. code-block:: c++
KSDbgInfo.FnScopeMap[this] = SP;
store the scope (function) when we create it and use it:
KSDbgInfo.LexicalBlocks.push_back(&KSDbgInfo.FnScopeMap[Proto]);
when we start generating the code for each function.
also, don't forget to pop the scope back off of your scope stack at the
Also, we may not forget to pop the scope back off of the scope stack at the
end of the code generation for the function:
.. code-block:: c++
@@ -385,6 +368,13 @@ end of the code generation for the function:
// unconditionally.
KSDbgInfo.LexicalBlocks.pop_back();
Then we make sure to emit the location every time we start to generate code
for a new AST object:
.. code-block:: c++
KSDbgInfo.emitLocation(this);
Variables
=========
@@ -392,25 +382,37 @@ Now that we have functions, we need to be able to print out the variables
we have in scope. Let's get our function arguments set up so we can get
decent backtraces and see how our functions are being called. It isn't
a lot of code, and we generally handle it when we're creating the
argument allocas in ``PrototypeAST::CreateArgumentAllocas``.
argument allocas in ``FunctionAST::codegen``.
.. code-block:: c++
DIScope *Scope = KSDbgInfo.LexicalBlocks.back();
DIFile *Unit = DBuilder->createFile(KSDbgInfo.TheCU.getFilename(),
KSDbgInfo.TheCU.getDirectory());
DILocalVariable D = DBuilder->createParameterVariable(
Scope, Args[Idx], Idx + 1, Unit, Line, KSDbgInfo.getDoubleTy(), true);
// Record the function arguments in the NamedValues map.
NamedValues.clear();
unsigned ArgIdx = 0;
for (auto &Arg : TheFunction->args()) {
// Create an alloca for this variable.
AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(),
DebugLoc::get(Line, 0, Scope),
Builder.GetInsertBlock());
// Create a debug descriptor for the variable.
DILocalVariable *D = DBuilder->createParameterVariable(
SP, Arg.getName(), ++ArgIdx, Unit, LineNo, KSDbgInfo.getDoubleTy(),
true);
Here we're doing a few things. First, we're grabbing our current scope
for the variable so we can say what range of code our variable is valid
through. Second, we're creating the variable, giving it the scope,
DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(),
DebugLoc::get(LineNo, 0, SP),
Builder.GetInsertBlock());
// Store the initial value into the alloca.
Builder.CreateStore(&Arg, Alloca);
// Add arguments to variable symbol table.
NamedValues[Arg.getName()] = Alloca;
}
Here we're first creating the variable, giving it the scope (``SP``),
the name, source location, type, and since it's an argument, the argument
index. Third, we create an ``lvm.dbg.declare`` call to indicate at the IR
index. Next, we create an ``lvm.dbg.declare`` call to indicate at the IR
level that we've got a variable in an alloca (and it gives a starting
location for the variable), and setting a source location for the
beginning of the scope on the declare.
@@ -420,7 +422,7 @@ assumptions based on how code and debug information was generated for them
in the past. In this case we need to do a little bit of a hack to avoid
generating line information for the function prologue so that the debugger
knows to skip over those instructions when setting a breakpoint. So in
``FunctionAST::CodeGen`` we add a couple of lines:
``FunctionAST::CodeGen`` we add some more lines:
.. code-block:: c++
@@ -434,7 +436,7 @@ body of the function:
.. code-block:: c++
KSDbgInfo.emitLocation(Body);
KSDbgInfo.emitLocation(Body.get());
With this we have enough debug information to set breakpoints in functions,
print out argument variables, and call functions. Not too bad for just a

View File

@@ -103,19 +103,7 @@ Parser Extensions for If/Then/Else
Now that we have the relevant tokens coming from the lexer and we have
the AST node to build, our parsing logic is relatively straightforward.
First we define a new parsing function:
.. code-block:: ocaml
let rec parse_primary = parser
...
(* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
| [< 'Token.If; c=parse_expr;
'Token.Then ?? "expected 'then'"; t=parse_expr;
'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
Ast.If (c, t, e)
Next we hook it up as a primary expression:
Next we add a new case for parsing a if-expression as a primary expression:
.. code-block:: ocaml

View File

@@ -29,6 +29,7 @@
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/ExecutionEngine/GenericValue.h"
#include "llvm/ExecutionEngine/MCJIT.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -153,13 +154,22 @@ int main(int argc, char **argv) {
//Write it out
if (JIT) {
InitializeNativeTarget();
InitializeNativeTargetAsmPrinter();
outs() << "------- Running JIT -------\n";
Module &M = *Mod;
ExecutionEngine *ee = EngineBuilder(std::move(Mod)).create();
if (!ee) {
errs() << "Error: execution engine creation failed.\n";
abort();
}
std::vector<GenericValue> args;
Function *brainf_func = M.getFunction("brainf");
GenericValue gv = ee->runFunction(brainf_func, args);
// Genereated code calls putchar, and output is not guaranteed without fflush.
// The better place for fflush(stdout) call would be the generated code, but it
// is unmanageable because stdout linkage name depends on stdlib implementation.
fflush(stdout);
} else {
WriteBitcodeToFile(Mod.get(), *out);
}

View File

@@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS
Core
ExecutionEngine
MC
MCJIT
Support
nativecodegen
)

View File

@@ -22,7 +22,7 @@
#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Mangler.h"
#include "llvm/Support/DynamicLibrary.h"
@@ -40,7 +40,7 @@ class KaleidoscopeJIT {
private:
std::unique_ptr<TargetMachine> TM;
const DataLayout DL;
ObjectLinkingLayer<> ObjectLayer;
RTDyldObjectLinkingLayer<> ObjectLayer;
IRCompileLayer<decltype(ObjectLayer)> CompileLayer;
public:

View File

@@ -1110,7 +1110,8 @@ static void HandleDefinition() {
if (auto FnAST = ParseDefinition()) {
if (auto *FnIR = FnAST->codegen()) {
fprintf(stderr, "Read function definition:");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
TheJIT->addModule(std::move(TheModule));
InitializeModule();
}
@@ -1124,7 +1125,8 @@ static void HandleExtern() {
if (auto ProtoAST = ParseExtern()) {
if (auto *FnIR = ProtoAST->codegen()) {
fprintf(stderr, "Read extern: ");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
}
} else {

View File

@@ -23,7 +23,7 @@
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Mangler.h"
@@ -44,7 +44,7 @@ class KaleidoscopeJIT {
private:
std::unique_ptr<TargetMachine> TM;
const DataLayout DL;
ObjectLinkingLayer<> ObjectLayer;
RTDyldObjectLinkingLayer<> ObjectLayer;
IRCompileLayer<decltype(ObjectLayer)> CompileLayer;
typedef std::function<std::unique_ptr<Module>(std::unique_ptr<Module>)>

View File

@@ -1110,7 +1110,8 @@ static void HandleDefinition() {
if (auto FnAST = ParseDefinition()) {
if (auto *FnIR = FnAST->codegen()) {
fprintf(stderr, "Read function definition:");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
TheJIT->addModule(std::move(TheModule));
InitializeModule();
}
@@ -1124,7 +1125,8 @@ static void HandleExtern() {
if (auto ProtoAST = ParseExtern()) {
if (auto *FnIR = ProtoAST->codegen()) {
fprintf(stderr, "Read extern: ");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
}
} else {

View File

@@ -24,7 +24,7 @@
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Mangler.h"
@@ -46,7 +46,7 @@ class KaleidoscopeJIT {
private:
std::unique_ptr<TargetMachine> TM;
const DataLayout DL;
ObjectLinkingLayer<> ObjectLayer;
RTDyldObjectLinkingLayer<> ObjectLayer;
IRCompileLayer<decltype(ObjectLayer)> CompileLayer;
typedef std::function<std::unique_ptr<Module>(std::unique_ptr<Module>)>
@@ -70,7 +70,7 @@ public:
CompileCallbackManager(
orc::createLocalCompileCallbackManager(TM->getTargetTriple(), 0)),
CODLayer(OptimizeLayer,
[this](Function &F) { return std::set<Function*>({&F}); },
[](Function &F) { return std::set<Function*>({&F}); },
*CompileCallbackManager,
orc::createLocalIndirectStubsManagerBuilder(
TM->getTargetTriple())) {

View File

@@ -1110,7 +1110,8 @@ static void HandleDefinition() {
if (auto FnAST = ParseDefinition()) {
if (auto *FnIR = FnAST->codegen()) {
fprintf(stderr, "Read function definition:");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
TheJIT->addModule(std::move(TheModule));
InitializeModule();
}
@@ -1124,7 +1125,8 @@ static void HandleExtern() {
if (auto ProtoAST = ParseExtern()) {
if (auto *FnIR = ProtoAST->codegen()) {
fprintf(stderr, "Read extern: ");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
}
} else {

View File

@@ -24,7 +24,7 @@
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Mangler.h"
@@ -73,7 +73,7 @@ class KaleidoscopeJIT {
private:
std::unique_ptr<TargetMachine> TM;
const DataLayout DL;
ObjectLinkingLayer<> ObjectLayer;
RTDyldObjectLinkingLayer<> ObjectLayer;
IRCompileLayer<decltype(ObjectLayer)> CompileLayer;
typedef std::function<std::unique_ptr<Module>(std::unique_ptr<Module>)>

View File

@@ -1126,7 +1126,8 @@ static void HandleExtern() {
if (auto ProtoAST = ParseExtern()) {
if (auto *FnIR = ProtoAST->codegen()) {
fprintf(stderr, "Read extern: ");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
}
} else {

View File

@@ -26,7 +26,7 @@
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LegacyPassManager.h"
@@ -79,7 +79,7 @@ class KaleidoscopeJIT {
private:
std::unique_ptr<TargetMachine> TM;
const DataLayout DL;
ObjectLinkingLayer<> ObjectLayer;
RTDyldObjectLinkingLayer<> ObjectLayer;
IRCompileLayer<decltype(ObjectLayer)> CompileLayer;
typedef std::function<std::unique_ptr<Module>(std::unique_ptr<Module>)>

View File

@@ -1150,7 +1150,8 @@ static void HandleExtern() {
if (auto ProtoAST = ParseExtern()) {
if (auto *FnIR = ProtoAST->codegen()) {
fprintf(stderr, "Read extern: ");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
}
} else {

View File

@@ -140,6 +140,8 @@ class PrototypeAST {
public:
PrototypeAST(const std::string &Name, std::vector<std::string> Args)
: Name(Name), Args(std::move(Args)) {}
const std::string &getName() const { return Name; }
};
/// FunctionAST - This class represents a function definition itself.

View File

@@ -522,7 +522,8 @@ static void HandleDefinition() {
if (auto FnAST = ParseDefinition()) {
if (auto *FnIR = FnAST->codegen()) {
fprintf(stderr, "Read function definition:");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
}
} else {
// Skip token for error recovery.
@@ -534,7 +535,8 @@ static void HandleExtern() {
if (auto ProtoAST = ParseExtern()) {
if (auto *FnIR = ProtoAST->codegen()) {
fprintf(stderr, "Read extern: ");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
}
} else {
// Skip token for error recovery.
@@ -547,7 +549,8 @@ static void HandleTopLevelExpression() {
if (auto FnAST = ParseTopLevelExpr()) {
if (auto *FnIR = FnAST->codegen()) {
fprintf(stderr, "Read top-level expression:");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
}
} else {
// Skip token for error recovery.
@@ -601,7 +604,7 @@ int main() {
MainLoop();
// Print out all of the generated code.
TheModule->dump();
TheModule->print(errs(), nullptr);
return 0;
}

View File

@@ -571,7 +571,8 @@ static void HandleDefinition() {
if (auto FnAST = ParseDefinition()) {
if (auto *FnIR = FnAST->codegen()) {
fprintf(stderr, "Read function definition:");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
TheJIT->addModule(std::move(TheModule));
InitializeModuleAndPassManager();
}
@@ -585,7 +586,8 @@ static void HandleExtern() {
if (auto ProtoAST = ParseExtern()) {
if (auto *FnIR = ProtoAST->codegen()) {
fprintf(stderr, "Read extern: ");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
}
} else {
@@ -648,14 +650,20 @@ static void MainLoop() {
// "Library" functions that can be "extern'd" from user code.
//===----------------------------------------------------------------------===//
#ifdef LLVM_ON_WIN32
#define DLLEXPORT __declspec(dllexport)
#else
#define DLLEXPORT
#endif
/// putchard - putchar that takes a double and returns 0.
extern "C" double putchard(double X) {
extern "C" DLLEXPORT double putchard(double X) {
fputc((char)X, stderr);
return 0;
}
/// printd - printf that takes a double prints it as "%f\n", returning 0.
extern "C" double printd(double X) {
extern "C" DLLEXPORT double printd(double X) {
fprintf(stderr, "%f\n", X);
return 0;
}

View File

@@ -622,7 +622,7 @@ Value *IfExprAST::codegen() {
if (!CondV)
return nullptr;
// Convert condition to a bool by comparing equal to 0.0.
// Convert condition to a bool by comparing non-equal to 0.0.
CondV = Builder.CreateFCmpONE(
CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond");
@@ -736,7 +736,7 @@ Value *ForExprAST::codegen() {
if (!EndCond)
return nullptr;
// Convert condition to a bool by comparing equal to 0.0.
// Convert condition to a bool by comparing non-equal to 0.0.
EndCond = Builder.CreateFCmpONE(
EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond");
@@ -845,7 +845,8 @@ static void HandleDefinition() {
if (auto FnAST = ParseDefinition()) {
if (auto *FnIR = FnAST->codegen()) {
fprintf(stderr, "Read function definition:");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
TheJIT->addModule(std::move(TheModule));
InitializeModuleAndPassManager();
}
@@ -859,7 +860,8 @@ static void HandleExtern() {
if (auto ProtoAST = ParseExtern()) {
if (auto *FnIR = ProtoAST->codegen()) {
fprintf(stderr, "Read extern: ");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
}
} else {
@@ -922,14 +924,20 @@ static void MainLoop() {
// "Library" functions that can be "extern'd" from user code.
//===----------------------------------------------------------------------===//
#ifdef LLVM_ON_WIN32
#define DLLEXPORT __declspec(dllexport)
#else
#define DLLEXPORT
#endif
/// putchard - putchar that takes a double and returns 0.
extern "C" double putchard(double X) {
extern "C" DLLEXPORT double putchard(double X) {
fputc((char)X, stderr);
return 0;
}
/// printd - printf that takes a double prints it as "%f\n", returning 0.
extern "C" double printd(double X) {
extern "C" DLLEXPORT double printd(double X) {
fprintf(stderr, "%f\n", X);
return 0;
}

View File

@@ -567,7 +567,7 @@ static std::unique_ptr<PrototypeAST> ParsePrototype() {
// Read the precedence if present.
if (CurTok == tok_number) {
if (NumVal < 1 || NumVal > 100)
return LogErrorP("Invalid precedecnce: must be 1..100");
return LogErrorP("Invalid precedence: must be 1..100");
BinaryPrecedence = (unsigned)NumVal;
getNextToken();
}
@@ -734,7 +734,7 @@ Value *IfExprAST::codegen() {
if (!CondV)
return nullptr;
// Convert condition to a bool by comparing equal to 0.0.
// Convert condition to a bool by comparing non-equal to 0.0.
CondV = Builder.CreateFCmpONE(
CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond");
@@ -848,7 +848,7 @@ Value *ForExprAST::codegen() {
if (!EndCond)
return nullptr;
// Convert condition to a bool by comparing equal to 0.0.
// Convert condition to a bool by comparing non-equal to 0.0.
EndCond = Builder.CreateFCmpONE(
EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond");
@@ -964,7 +964,8 @@ static void HandleDefinition() {
if (auto FnAST = ParseDefinition()) {
if (auto *FnIR = FnAST->codegen()) {
fprintf(stderr, "Read function definition:");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
TheJIT->addModule(std::move(TheModule));
InitializeModuleAndPassManager();
}
@@ -978,7 +979,8 @@ static void HandleExtern() {
if (auto ProtoAST = ParseExtern()) {
if (auto *FnIR = ProtoAST->codegen()) {
fprintf(stderr, "Read extern: ");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
}
} else {
@@ -1041,14 +1043,20 @@ static void MainLoop() {
// "Library" functions that can be "extern'd" from user code.
//===----------------------------------------------------------------------===//
#ifdef LLVM_ON_WIN32
#define DLLEXPORT __declspec(dllexport)
#else
#define DLLEXPORT
#endif
/// putchard - putchar that takes a double and returns 0.
extern "C" double putchard(double X) {
extern "C" DLLEXPORT double putchard(double X) {
fputc((char)X, stderr);
return 0;
}
/// printd - printf that takes a double prints it as "%f\n", returning 0.
extern "C" double printd(double X) {
extern "C" DLLEXPORT double printd(double X) {
fprintf(stderr, "%f\n", X);
return 0;
}

View File

@@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS
RuntimeDyld
ScalarOpts
Support
TransformUtils
native
)

View File

@@ -639,7 +639,7 @@ static std::unique_ptr<PrototypeAST> ParsePrototype() {
// Read the precedence if present.
if (CurTok == tok_number) {
if (NumVal < 1 || NumVal > 100)
return LogErrorP("Invalid precedecnce: must be 1..100");
return LogErrorP("Invalid precedence: must be 1..100");
BinaryPrecedence = (unsigned)NumVal;
getNextToken();
}
@@ -840,7 +840,7 @@ Value *IfExprAST::codegen() {
if (!CondV)
return nullptr;
// Convert condition to a bool by comparing equal to 0.0.
// Convert condition to a bool by comparing non-equal to 0.0.
CondV = Builder.CreateFCmpONE(
CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond");
@@ -963,7 +963,7 @@ Value *ForExprAST::codegen() {
Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
Builder.CreateStore(NextVar, Alloca);
// Convert condition to a bool by comparing equal to 0.0.
// Convert condition to a bool by comparing non-equal to 0.0.
EndCond = Builder.CreateFCmpONE(
EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond");
@@ -1115,6 +1115,8 @@ static void InitializeModuleAndPassManager() {
// Create a new pass manager attached to it.
TheFPM = llvm::make_unique<legacy::FunctionPassManager>(TheModule.get());
// Promote allocas to registers.
TheFPM->add(createPromoteMemoryToRegisterPass());
// Do simple "peephole" optimizations and bit-twiddling optzns.
TheFPM->add(createInstructionCombiningPass());
// Reassociate expressions.
@@ -1131,7 +1133,8 @@ static void HandleDefinition() {
if (auto FnAST = ParseDefinition()) {
if (auto *FnIR = FnAST->codegen()) {
fprintf(stderr, "Read function definition:");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
TheJIT->addModule(std::move(TheModule));
InitializeModuleAndPassManager();
}
@@ -1145,7 +1148,8 @@ static void HandleExtern() {
if (auto ProtoAST = ParseExtern()) {
if (auto *FnIR = ProtoAST->codegen()) {
fprintf(stderr, "Read extern: ");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
}
} else {
@@ -1208,14 +1212,20 @@ static void MainLoop() {
// "Library" functions that can be "extern'd" from user code.
//===----------------------------------------------------------------------===//
#ifdef LLVM_ON_WIN32
#define DLLEXPORT __declspec(dllexport)
#else
#define DLLEXPORT
#endif
/// putchard - putchar that takes a double and returns 0.
extern "C" double putchard(double X) {
extern "C" DLLEXPORT double putchard(double X) {
fputc((char)X, stderr);
return 0;
}
/// printd - printf that takes a double prints it as "%f\n", returning 0.
extern "C" double printd(double X) {
extern "C" DLLEXPORT double printd(double X) {
fprintf(stderr, "%f\n", X);
return 0;
}

View File

@@ -642,7 +642,7 @@ static std::unique_ptr<PrototypeAST> ParsePrototype() {
// Read the precedence if present.
if (CurTok == tok_number) {
if (NumVal < 1 || NumVal > 100)
return LogErrorP("Invalid precedecnce: must be 1..100");
return LogErrorP("Invalid precedence: must be 1..100");
BinaryPrecedence = (unsigned)NumVal;
getNextToken();
}
@@ -841,7 +841,7 @@ Value *IfExprAST::codegen() {
if (!CondV)
return nullptr;
// Convert condition to a bool by comparing equal to 0.0.
// Convert condition to a bool by comparing non-equal to 0.0.
CondV = Builder.CreateFCmpONE(
CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond");
@@ -964,7 +964,7 @@ Value *ForExprAST::codegen() {
Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
Builder.CreateStore(NextVar, Alloca);
// Convert condition to a bool by comparing equal to 0.0.
// Convert condition to a bool by comparing non-equal to 0.0.
EndCond = Builder.CreateFCmpONE(
EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond");
@@ -1114,7 +1114,8 @@ static void HandleDefinition() {
if (auto FnAST = ParseDefinition()) {
if (auto *FnIR = FnAST->codegen()) {
fprintf(stderr, "Read function definition:");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
}
} else {
// Skip token for error recovery.
@@ -1126,7 +1127,8 @@ static void HandleExtern() {
if (auto ProtoAST = ParseExtern()) {
if (auto *FnIR = ProtoAST->codegen()) {
fprintf(stderr, "Read extern: ");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
}
} else {
@@ -1171,14 +1173,20 @@ static void MainLoop() {
// "Library" functions that can be "extern'd" from user code.
//===----------------------------------------------------------------------===//
#ifdef LLVM_ON_WIN32
#define DLLEXPORT __declspec(dllexport)
#else
#define DLLEXPORT
#endif
/// putchard - putchar that takes a double and returns 0.
extern "C" double putchard(double X) {
extern "C" DLLEXPORT double putchard(double X) {
fputc((char)X, stderr);
return 0;
}
/// printd - printf that takes a double prints it as "%f\n", returning 0.
extern "C" double printd(double X) {
extern "C" DLLEXPORT double printd(double X) {
fprintf(stderr, "%f\n", X);
return 0;
}

View File

@@ -756,7 +756,7 @@ static std::unique_ptr<PrototypeAST> ParsePrototype() {
// Read the precedence if present.
if (CurTok == tok_number) {
if (NumVal < 1 || NumVal > 100)
return LogErrorP("Invalid precedecnce: must be 1..100");
return LogErrorP("Invalid precedence: must be 1..100");
BinaryPrecedence = (unsigned)NumVal;
getNextToken();
}
@@ -1004,7 +1004,7 @@ Value *IfExprAST::codegen() {
if (!CondV)
return nullptr;
// Convert condition to a bool by comparing equal to 0.0.
// Convert condition to a bool by comparing non-equal to 0.0.
CondV = Builder.CreateFCmpONE(
CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond");
@@ -1129,7 +1129,7 @@ Value *ForExprAST::codegen() {
Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
Builder.CreateStore(NextVar, Alloca);
// Convert condition to a bool by comparing equal to 0.0.
// Convert condition to a bool by comparing non-equal to 0.0.
EndCond = Builder.CreateFCmpONE(
EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond");
@@ -1379,14 +1379,20 @@ static void MainLoop() {
// "Library" functions that can be "extern'd" from user code.
//===----------------------------------------------------------------------===//
#ifdef LLVM_ON_WIN32
#define DLLEXPORT __declspec(dllexport)
#else
#define DLLEXPORT
#endif
/// putchard - putchar that takes a double and returns 0.
extern "C" double putchard(double X) {
extern "C" DLLEXPORT double putchard(double X) {
fputc((char)X, stderr);
return 0;
}
/// printd - printf that takes a double prints it as "%f\n", returning 0.
extern "C" double printd(double X) {
extern "C" DLLEXPORT double printd(double X) {
fprintf(stderr, "%f\n", X);
return 0;
}
@@ -1439,7 +1445,7 @@ int main() {
DBuilder->finalize();
// Print out all of the generated code.
TheModule->dump();
TheModule->print(errs(), nullptr);
return 0;
}

View File

@@ -1395,7 +1395,8 @@ static void HandleDefinition() {
if (Function *LF = F->Codegen()) {
#ifndef MINIMAL_STDERR_OUTPUT
fprintf(stderr, "Read function definition:");
LF->dump();
LF->print(errs());
fprintf(stderr, "\n");
#endif
}
} else {
@@ -1409,7 +1410,8 @@ static void HandleExtern() {
if (Function *F = P->Codegen()) {
#ifndef MINIMAL_STDERR_OUTPUT
fprintf(stderr, "Read extern: ");
F->dump();
F->print(errs());
fprintf(stderr, "\n");
#endif
}
} else {
@@ -1540,7 +1542,7 @@ int main(int argc, char **argv) {
#ifndef MINIMAL_STDERR_OUTPUT
// Print out all of the generated code.
TheHelper->dump();
TheHelper->print(errs());
#endif
return 0;

View File

@@ -1474,7 +1474,8 @@ static void HandleDefinition() {
Function *LF = F->Codegen();
if (LF && VerboseOutput) {
fprintf(stderr, "Read function definition:");
LF->dump();
LF->print(errs());
fprintf(stderr, "\n");
}
} else {
// Skip token for error recovery.
@@ -1487,7 +1488,8 @@ static void HandleExtern() {
Function *F = P->Codegen();
if (F && VerboseOutput) {
fprintf(stderr, "Read extern: ");
F->dump();
F->print(errs());
fprintf(stderr, "\n");
}
} else {
// Skip token for error recovery.

View File

@@ -1252,7 +1252,8 @@ static void HandleDefinition() {
if (Function *LF = F->Codegen()) {
#ifndef MINIMAL_STDERR_OUTPUT
fprintf(stderr, "Read function definition:");
LF->dump();
LF->print(errs());
fprintf(stderr, "\n");
#endif
}
} else {
@@ -1266,7 +1267,8 @@ static void HandleExtern() {
if (Function *F = P->Codegen()) {
#ifndef MINIMAL_STDERR_OUTPUT
fprintf(stderr, "Read extern: ");
F->dump();
F->print(errs());
fprintf(stderr, "\n");
#endif
}
} else {
@@ -1370,7 +1372,7 @@ int main() {
#ifndef MINIMAL_STDERR_OUTPUT
// Print out all of the generated code.
TheHelper->dump();
TheHelper->print(errs());
#endif
return 0;

View File

@@ -1010,7 +1010,8 @@ static void HandleDefinition() {
if (Function *LF = F->Codegen()) {
#ifndef MINIMAL_STDERR_OUTPUT
fprintf(stderr, "Read function definition:");
LF->dump();
LF->print(errs());
fprintf(stderr, "\n");
#endif
}
} else {
@@ -1024,7 +1025,8 @@ static void HandleExtern() {
if (Function *F = P->Codegen()) {
#ifndef MINIMAL_STDERR_OUTPUT
fprintf(stderr, "Read extern: ");
F->dump();
F->print(errs());
fprintf(stderr, "\n");
#endif
}
} else {
@@ -1157,7 +1159,7 @@ int main(int argc, char **argv) {
// Print out all of the generated code.
TheFPM = 0;
#ifndef MINIMAL_STDERR_OUTPUT
TheModule->dump();
TheModule->print(errs(), nullptr);
#endif
return 0;
}

View File

@@ -1293,7 +1293,8 @@ static void HandleDefinition() {
if (Function *LF = F->Codegen()) {
#ifndef MINIMAL_STDERR_OUTPUT
fprintf(stderr, "Read function definition:");
LF->dump();
LF->print(errs());
fprintf(stderr, "\n");
#endif
}
} else {
@@ -1307,7 +1308,8 @@ static void HandleExtern() {
if (Function *F = P->Codegen()) {
#ifndef MINIMAL_STDERR_OUTPUT
fprintf(stderr, "Read extern: ");
F->dump();
F->print(errs());
fprintf(stderr, "\n");
#endif
}
} else {

View File

@@ -24,7 +24,7 @@
#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Mangler.h"
#include "llvm/Support/DynamicLibrary.h"
@@ -40,7 +40,7 @@ namespace orc {
class KaleidoscopeJIT {
public:
typedef ObjectLinkingLayer<> ObjLayerT;
typedef RTDyldObjectLinkingLayer<> ObjLayerT;
typedef IRCompileLayer<ObjLayerT> CompileLayerT;
typedef CompileLayerT::ModuleSetHandleT ModuleHandleT;
@@ -97,17 +97,40 @@ private:
}
JITSymbol findMangledSymbol(const std::string &Name) {
#ifdef LLVM_ON_WIN32
// The symbol lookup of ObjectLinkingLayer uses the SymbolRef::SF_Exported
// flag to decide whether a symbol will be visible or not, when we call
// IRCompileLayer::findSymbolIn with ExportedSymbolsOnly set to true.
//
// But for Windows COFF objects, this flag is currently never set.
// For a potential solution see: https://reviews.llvm.org/rL258665
// For now, we allow non-exported symbols on Windows as a workaround.
const bool ExportedSymbolsOnly = false;
#else
const bool ExportedSymbolsOnly = true;
#endif
// Search modules in reverse order: from last added to first added.
// This is the opposite of the usual search order for dlsym, but makes more
// sense in a REPL where we want to bind to the newest available definition.
for (auto H : make_range(ModuleHandles.rbegin(), ModuleHandles.rend()))
if (auto Sym = CompileLayer.findSymbolIn(H, Name, true))
if (auto Sym = CompileLayer.findSymbolIn(H, Name, ExportedSymbolsOnly))
return Sym;
// If we can't find the symbol in the JIT, try looking in the host process.
if (auto SymAddr = RTDyldMemoryManager::getSymbolAddressInProcess(Name))
return JITSymbol(SymAddr, JITSymbolFlags::Exported);
#ifdef LLVM_ON_WIN32
// For Windows retry without "_" at begining, as RTDyldMemoryManager uses
// GetProcAddress and standard libraries like msvcrt.dll use names
// with and without "_" (for example "_itoa" but "sin").
if (Name.length() > 2 && Name[0] == '_')
if (auto SymAddr =
RTDyldMemoryManager::getSymbolAddressInProcess(Name.substr(1)))
return JITSymbol(SymAddr, JITSymbolFlags::Exported);
#endif
return nullptr;
}

View File

@@ -11,4 +11,4 @@ add_llvm_example(ParallelJIT
ParallelJIT.cpp
)
target_link_libraries(ParallelJIT ${PTHREAD_LIB})
target_link_libraries(ParallelJIT ${LLVM_PTHREAD_LIB})

View File

@@ -56,6 +56,9 @@ void LLVMAddMergedLoadStoreMotionPass(LLVMPassManagerRef PM);
/** See llvm::createGVNPass function. */
void LLVMAddGVNPass(LLVMPassManagerRef PM);
/** See llvm::createGVNPass function. */
void LLVMAddNewGVNPass(LLVMPassManagerRef PM);
/** See llvm::createIndVarSimplifyPass function. */
void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM);

View File

@@ -44,7 +44,7 @@ typedef bool lto_bool_t;
* @{
*/
#define LTO_API_VERSION 20
#define LTO_API_VERSION 21
/**
* \since prior to LTO_API_VERSION=3
@@ -636,6 +636,29 @@ extern unsigned int thinlto_module_get_num_objects(thinlto_code_gen_t cg);
extern LTOObjectBuffer thinlto_module_get_object(thinlto_code_gen_t cg,
unsigned int index);
/**
* Returns the number of object files produced by the ThinLTO CodeGenerator.
*
* It usually matches the number of input files, but this is not a guarantee of
* the API and may change in future implementation, so the client should not
* assume it.
*
* \since LTO_API_VERSION=21
*/
unsigned int thinlto_module_get_num_object_files(thinlto_code_gen_t cg);
/**
* Returns the path to the ith object file produced by the ThinLTO
* CodeGenerator.
*
* Client should use \p thinlto_module_get_num_object_files() to get the number
* of available objects.
*
* \since LTO_API_VERSION=21
*/
const char *thinlto_module_get_object_file(thinlto_code_gen_t cg,
unsigned int index);
/**
* Sets which PIC code model to generate.
* Returns true on error (check lto_get_error_message() for details).
@@ -724,6 +747,17 @@ extern void thinlto_codegen_set_cache_entry_expiration(thinlto_code_gen_t cg,
extern void thinlto_codegen_set_savetemps_dir(thinlto_code_gen_t cg,
const char *save_temps_dir);
/**
* Set the path to a directory where to save generated object files. This
* path can be used by a linker to request on-disk files instead of in-memory
* buffers. When set, results are available through
* thinlto_module_get_object_file() instead of thinlto_module_get_object().
*
* \since LTO_API_VERSION=21
*/
void thinlto_set_generated_objects_dir(thinlto_code_gen_t cg,
const char *save_temps_dir);
/**
* Sets the cpu to generate code for.
*

View File

@@ -21,12 +21,22 @@
#include "llvm/Support/ErrorHandling.h"
#include <memory>
#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
do { \
if (usesLayout<IEEEFloat>(getSemantics())) \
return U.IEEE.METHOD_CALL; \
if (usesLayout<DoubleAPFloat>(getSemantics())) \
return U.Double.METHOD_CALL; \
llvm_unreachable("Unexpected semantics"); \
} while (false)
namespace llvm {
struct fltSemantics;
class APSInt;
class StringRef;
class APFloat;
class raw_ostream;
template <typename T> class SmallVectorImpl;
@@ -41,7 +51,7 @@ enum lostFraction { // Example of truncated bits:
lfMoreThanHalf // 1xxxxx x's not all zero
};
/// \brief A self-contained host- and target-independent arbitrary-precision
/// A self-contained host- and target-independent arbitrary-precision
/// floating-point software implementation.
///
/// APFloat uses bignum integer arithmetic as provided by static functions in
@@ -135,16 +145,16 @@ struct APFloatBase {
/// \name Floating Point Semantics.
/// @{
static const fltSemantics IEEEhalf;
static const fltSemantics IEEEsingle;
static const fltSemantics IEEEdouble;
static const fltSemantics IEEEquad;
static const fltSemantics PPCDoubleDouble;
static const fltSemantics x87DoubleExtended;
static const fltSemantics &IEEEhalf();
static const fltSemantics &IEEEsingle();
static const fltSemantics &IEEEdouble();
static const fltSemantics &IEEEquad();
static const fltSemantics &PPCDoubleDouble();
static const fltSemantics &x87DoubleExtended();
/// A Pseudo fltsemantic used to construct APFloats that cannot conflict with
/// anything real.
static const fltSemantics Bogus;
static const fltSemantics &Bogus();
/// @}
@@ -190,7 +200,7 @@ struct APFloatBase {
uninitialized
};
/// \brief Enumeration of \c ilogb error results.
/// Enumeration of \c ilogb error results.
enum IlogbErrorKinds {
IEK_Zero = INT_MIN + 1,
IEK_NaN = INT_MIN,
@@ -226,7 +236,7 @@ public:
/// @}
/// \brief Returns whether this instance allocated memory.
/// Returns whether this instance allocated memory.
bool needsCleanup() const { return partCount() > 1; }
/// \name Convenience "constructors"
@@ -234,10 +244,6 @@ public:
/// @}
/// Used to insert APFloat objects, or objects that contain APFloat objects,
/// into FoldingSets.
void Profile(FoldingSetNodeID &NID) const;
/// \name Arithmetic
/// @{
@@ -254,53 +260,12 @@ public:
/// IEEE-754R 5.3.1: nextUp/nextDown.
opStatus next(bool nextDown);
/// \brief Operator+ overload which provides the default
/// \c nmNearestTiesToEven rounding mode and *no* error checking.
IEEEFloat operator+(const IEEEFloat &RHS) const {
IEEEFloat Result = *this;
Result.add(RHS, rmNearestTiesToEven);
return Result;
}
/// \brief Operator- overload which provides the default
/// \c nmNearestTiesToEven rounding mode and *no* error checking.
IEEEFloat operator-(const IEEEFloat &RHS) const {
IEEEFloat Result = *this;
Result.subtract(RHS, rmNearestTiesToEven);
return Result;
}
/// \brief Operator* overload which provides the default
/// \c nmNearestTiesToEven rounding mode and *no* error checking.
IEEEFloat operator*(const IEEEFloat &RHS) const {
IEEEFloat Result = *this;
Result.multiply(RHS, rmNearestTiesToEven);
return Result;
}
/// \brief Operator/ overload which provides the default
/// \c nmNearestTiesToEven rounding mode and *no* error checking.
IEEEFloat operator/(const IEEEFloat &RHS) const {
IEEEFloat Result = *this;
Result.divide(RHS, rmNearestTiesToEven);
return Result;
}
/// @}
/// \name Sign operations.
/// @{
void changeSign();
void clearSign();
void copySign(const IEEEFloat &);
/// \brief A static helper to produce a copy of an APFloat value with its sign
/// copied from some other APFloat.
static IEEEFloat copySign(IEEEFloat Value, const IEEEFloat &Sign) {
Value.copySign(Sign);
return Value;
}
/// @}
@@ -310,7 +275,6 @@ public:
opStatus convert(const fltSemantics &, roundingMode, bool *);
opStatus convertToInteger(integerPart *, unsigned int, bool, roundingMode,
bool *) const;
opStatus convertToInteger(APSInt &, roundingMode, bool *) const;
opStatus convertFromAPInt(const APInt &, bool, roundingMode);
opStatus convertFromSignExtendedInteger(const integerPart *, unsigned int,
bool, roundingMode);
@@ -406,7 +370,7 @@ public:
IEEEFloat &operator=(const IEEEFloat &);
IEEEFloat &operator=(IEEEFloat &&);
/// \brief Overload to compute a hash code for an APFloat value.
/// Overload to compute a hash code for an APFloat value.
///
/// Note that the use of hash codes for floating point values is in general
/// frought with peril. Equality is hard to define for these values. For
@@ -442,9 +406,9 @@ public:
/// If this value has an exact multiplicative inverse, store it in inv and
/// return true.
bool getExactInverse(IEEEFloat *inv) const;
bool getExactInverse(APFloat *inv) const;
/// \brief Returns the exponent of the internal representation of the APFloat.
/// Returns the exponent of the internal representation of the APFloat.
///
/// Because the radix of APFloat is 2, this is equivalent to floor(log2(x)).
/// For special APFloat values, this returns special error codes:
@@ -455,7 +419,7 @@ public:
///
friend int ilogb(const IEEEFloat &Arg);
/// \brief Returns: X * 2^Exp for integral exponents.
/// Returns: X * 2^Exp for integral exponents.
friend IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode);
friend IEEEFloat frexp(const IEEEFloat &X, int &Exp, roundingMode);
@@ -479,6 +443,8 @@ public:
/// @}
cmpResult compareAbsoluteValue(const IEEEFloat &) const;
private:
/// \name Simple Queries
/// @{
@@ -527,7 +493,6 @@ private:
bool convertFromStringSpecials(StringRef str);
opStatus normalize(roundingMode, lostFraction);
opStatus addOrSubtract(const IEEEFloat &, roundingMode, bool subtract);
cmpResult compareAbsoluteValue(const IEEEFloat &) const;
opStatus handleOverflow(roundingMode);
bool roundAwayFromZero(roundingMode, lostFraction, unsigned int) const;
opStatus convertToSignExtendedInteger(integerPart *, unsigned int, bool,
@@ -600,6 +565,12 @@ class DoubleAPFloat final : public APFloatBase {
const fltSemantics *Semantics;
std::unique_ptr<APFloat[]> Floats;
opStatus addImpl(const APFloat &a, const APFloat &aa, const APFloat &c,
const APFloat &cc, roundingMode RM);
opStatus addWithSpecial(const DoubleAPFloat &LHS, const DoubleAPFloat &RHS,
DoubleAPFloat &Out, roundingMode RM);
public:
DoubleAPFloat(const fltSemantics &S);
DoubleAPFloat(const fltSemantics &S, uninitializedTag);
@@ -623,8 +594,68 @@ public:
APFloat &getFirst() { return Floats[0]; }
const APFloat &getFirst() const { return Floats[0]; }
APFloat &getSecond() { return Floats[1]; }
const APFloat &getSecond() const { return Floats[1]; }
opStatus add(const DoubleAPFloat &RHS, roundingMode RM);
opStatus subtract(const DoubleAPFloat &RHS, roundingMode RM);
opStatus multiply(const DoubleAPFloat &RHS, roundingMode RM);
opStatus divide(const DoubleAPFloat &RHS, roundingMode RM);
opStatus remainder(const DoubleAPFloat &RHS);
opStatus mod(const DoubleAPFloat &RHS);
opStatus fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
const DoubleAPFloat &Addend, roundingMode RM);
opStatus roundToIntegral(roundingMode RM);
void changeSign();
cmpResult compareAbsoluteValue(const DoubleAPFloat &RHS) const;
fltCategory getCategory() const;
bool isNegative() const;
void makeInf(bool Neg);
void makeZero(bool Neg);
void makeLargest(bool Neg);
void makeSmallest(bool Neg);
void makeSmallestNormalized(bool Neg);
void makeNaN(bool SNaN, bool Neg, const APInt *fill);
cmpResult compare(const DoubleAPFloat &RHS) const;
bool bitwiseIsEqual(const DoubleAPFloat &RHS) const;
APInt bitcastToAPInt() const;
opStatus convertFromString(StringRef, roundingMode);
opStatus next(bool nextDown);
opStatus convertToInteger(integerPart *Input, unsigned int Width,
bool IsSigned, roundingMode RM,
bool *IsExact) const;
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM);
opStatus convertFromSignExtendedInteger(const integerPart *Input,
unsigned int InputSize, bool IsSigned,
roundingMode RM);
opStatus convertFromZeroExtendedInteger(const integerPart *Input,
unsigned int InputSize, bool IsSigned,
roundingMode RM);
unsigned int convertToHexString(char *DST, unsigned int HexDigits,
bool UpperCase, roundingMode RM) const;
bool isDenormal() const;
bool isSmallest() const;
bool isLargest() const;
bool isInteger() const;
void toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
unsigned FormatMaxPadding) const;
bool getExactInverse(APFloat *inv) const;
friend int ilogb(const DoubleAPFloat &Arg);
friend DoubleAPFloat scalbn(DoubleAPFloat X, int Exp, roundingMode);
friend DoubleAPFloat frexp(const DoubleAPFloat &X, int &Exp, roundingMode);
friend hash_code hash_value(const DoubleAPFloat &Arg);
};
hash_code hash_value(const DoubleAPFloat &Arg);
} // End detail namespace
// This is a interface class that is currently forwarding functionalities from
@@ -643,48 +674,56 @@ class APFloat : public APFloatBase {
explicit Storage(IEEEFloat F, const fltSemantics &S);
explicit Storage(DoubleAPFloat F, const fltSemantics &S)
: Double(std::move(F)) {
assert(&S == &PPCDoubleDouble);
assert(&S == &PPCDoubleDouble());
}
template <typename... ArgTypes>
Storage(const fltSemantics &Semantics, ArgTypes &&... Args) {
if (usesLayout<IEEEFloat>(Semantics)) {
new (&IEEE) IEEEFloat(Semantics, std::forward<ArgTypes>(Args)...);
} else if (usesLayout<DoubleAPFloat>(Semantics)) {
new (&Double) DoubleAPFloat(Semantics, std::forward<ArgTypes>(Args)...);
} else {
llvm_unreachable("Unexpected semantics");
return;
}
if (usesLayout<DoubleAPFloat>(Semantics)) {
new (&Double) DoubleAPFloat(Semantics, std::forward<ArgTypes>(Args)...);
return;
}
llvm_unreachable("Unexpected semantics");
}
~Storage() {
if (usesLayout<IEEEFloat>(*semantics)) {
IEEE.~IEEEFloat();
} else if (usesLayout<DoubleAPFloat>(*semantics)) {
Double.~DoubleAPFloat();
} else {
llvm_unreachable("Unexpected semantics");
return;
}
if (usesLayout<DoubleAPFloat>(*semantics)) {
Double.~DoubleAPFloat();
return;
}
llvm_unreachable("Unexpected semantics");
}
Storage(const Storage &RHS) {
if (usesLayout<IEEEFloat>(*RHS.semantics)) {
new (this) IEEEFloat(RHS.IEEE);
} else if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
new (this) DoubleAPFloat(RHS.Double);
} else {
llvm_unreachable("Unexpected semantics");
return;
}
if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
new (this) DoubleAPFloat(RHS.Double);
return;
}
llvm_unreachable("Unexpected semantics");
}
Storage(Storage &&RHS) {
if (usesLayout<IEEEFloat>(*RHS.semantics)) {
new (this) IEEEFloat(std::move(RHS.IEEE));
} else if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
new (this) DoubleAPFloat(std::move(RHS.Double));
} else {
llvm_unreachable("Unexpected semantics");
return;
}
if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
new (this) DoubleAPFloat(std::move(RHS.Double));
return;
}
llvm_unreachable("Unexpected semantics");
}
Storage &operator=(const Storage &RHS) {
@@ -720,51 +759,51 @@ class APFloat : public APFloatBase {
static_assert(std::is_same<T, IEEEFloat>::value ||
std::is_same<T, DoubleAPFloat>::value, "");
if (std::is_same<T, DoubleAPFloat>::value) {
return &Semantics == &PPCDoubleDouble;
return &Semantics == &PPCDoubleDouble();
}
return &Semantics != &PPCDoubleDouble;
return &Semantics != &PPCDoubleDouble();
}
IEEEFloat &getIEEE() {
if (usesLayout<IEEEFloat>(*U.semantics)) {
if (usesLayout<IEEEFloat>(*U.semantics))
return U.IEEE;
} else if (usesLayout<DoubleAPFloat>(*U.semantics)) {
if (usesLayout<DoubleAPFloat>(*U.semantics))
return U.Double.getFirst().U.IEEE;
} else {
llvm_unreachable("Unexpected semantics");
}
llvm_unreachable("Unexpected semantics");
}
const IEEEFloat &getIEEE() const {
if (usesLayout<IEEEFloat>(*U.semantics)) {
if (usesLayout<IEEEFloat>(*U.semantics))
return U.IEEE;
} else if (usesLayout<DoubleAPFloat>(*U.semantics)) {
if (usesLayout<DoubleAPFloat>(*U.semantics))
return U.Double.getFirst().U.IEEE;
} else {
llvm_unreachable("Unexpected semantics");
}
llvm_unreachable("Unexpected semantics");
}
void makeZero(bool Neg) { getIEEE().makeZero(Neg); }
void makeZero(bool Neg) { APFLOAT_DISPATCH_ON_SEMANTICS(makeZero(Neg)); }
void makeInf(bool Neg) { getIEEE().makeInf(Neg); }
void makeInf(bool Neg) { APFLOAT_DISPATCH_ON_SEMANTICS(makeInf(Neg)); }
void makeNaN(bool SNaN, bool Neg, const APInt *fill) {
getIEEE().makeNaN(SNaN, Neg, fill);
APFLOAT_DISPATCH_ON_SEMANTICS(makeNaN(SNaN, Neg, fill));
}
void makeLargest(bool Neg) { getIEEE().makeLargest(Neg); }
void makeLargest(bool Neg) {
APFLOAT_DISPATCH_ON_SEMANTICS(makeLargest(Neg));
}
void makeSmallest(bool Neg) { getIEEE().makeSmallest(Neg); }
void makeSmallest(bool Neg) {
APFLOAT_DISPATCH_ON_SEMANTICS(makeSmallest(Neg));
}
void makeSmallestNormalized(bool Neg) {
getIEEE().makeSmallestNormalized(Neg);
APFLOAT_DISPATCH_ON_SEMANTICS(makeSmallestNormalized(Neg));
}
// FIXME: This is due to clang 3.3 (or older version) always checks for the
// default constructor in an array aggregate initialization, even if no
// elements in the array is default initialized.
APFloat() : U(IEEEdouble) {
APFloat() : U(IEEEdouble()) {
llvm_unreachable("This is a workaround for old clang.");
}
@@ -772,6 +811,16 @@ class APFloat : public APFloatBase {
explicit APFloat(DoubleAPFloat F, const fltSemantics &S)
: U(std::move(F), S) {}
cmpResult compareAbsoluteValue(const APFloat &RHS) const {
assert(&getSemantics() == &RHS.getSemantics() &&
"Should only compare APFloats with the same semantics");
if (usesLayout<IEEEFloat>(getSemantics()))
return U.IEEE.compareAbsoluteValue(RHS.U.IEEE);
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.compareAbsoluteValue(RHS.U.Double);
llvm_unreachable("Unexpected semantics");
}
public:
APFloat(const fltSemantics &Semantics) : U(Semantics) {}
APFloat(const fltSemantics &Semantics, StringRef S);
@@ -780,22 +829,14 @@ public:
APFloat(const fltSemantics &Semantics, uninitializedTag)
: U(Semantics, uninitialized) {}
APFloat(const fltSemantics &Semantics, const APInt &I) : U(Semantics, I) {}
explicit APFloat(double d) : U(IEEEFloat(d), IEEEdouble) {}
explicit APFloat(float f) : U(IEEEFloat(f), IEEEsingle) {}
explicit APFloat(double d) : U(IEEEFloat(d), IEEEdouble()) {}
explicit APFloat(float f) : U(IEEEFloat(f), IEEEsingle()) {}
APFloat(const APFloat &RHS) = default;
APFloat(APFloat &&RHS) = default;
~APFloat() = default;
bool needsCleanup() const {
if (usesLayout<IEEEFloat>(getSemantics())) {
return U.IEEE.needsCleanup();
} else if (usesLayout<DoubleAPFloat>(getSemantics())) {
return U.Double.needsCleanup();
} else {
llvm_unreachable("Unexpected semantics");
}
}
bool needsCleanup() const { APFLOAT_DISPATCH_ON_SEMANTICS(needsCleanup()); }
/// Factory for Positive and Negative Zero.
///
@@ -882,57 +923,134 @@ public:
/// \param isIEEE - If 128 bit number, select between PPC and IEEE
static APFloat getAllOnesValue(unsigned BitWidth, bool isIEEE = false);
void Profile(FoldingSetNodeID &NID) const { getIEEE().Profile(NID); }
/// Used to insert APFloat objects, or objects that contain APFloat objects,
/// into FoldingSets.
void Profile(FoldingSetNodeID &NID) const;
opStatus add(const APFloat &RHS, roundingMode RM) {
return getIEEE().add(RHS.getIEEE(), RM);
assert(&getSemantics() == &RHS.getSemantics() &&
"Should only call on two APFloats with the same semantics");
if (usesLayout<IEEEFloat>(getSemantics()))
return U.IEEE.add(RHS.U.IEEE, RM);
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.add(RHS.U.Double, RM);
llvm_unreachable("Unexpected semantics");
}
opStatus subtract(const APFloat &RHS, roundingMode RM) {
return getIEEE().subtract(RHS.getIEEE(), RM);
assert(&getSemantics() == &RHS.getSemantics() &&
"Should only call on two APFloats with the same semantics");
if (usesLayout<IEEEFloat>(getSemantics()))
return U.IEEE.subtract(RHS.U.IEEE, RM);
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.subtract(RHS.U.Double, RM);
llvm_unreachable("Unexpected semantics");
}
opStatus multiply(const APFloat &RHS, roundingMode RM) {
return getIEEE().multiply(RHS.getIEEE(), RM);
assert(&getSemantics() == &RHS.getSemantics() &&
"Should only call on two APFloats with the same semantics");
if (usesLayout<IEEEFloat>(getSemantics()))
return U.IEEE.multiply(RHS.U.IEEE, RM);
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.multiply(RHS.U.Double, RM);
llvm_unreachable("Unexpected semantics");
}
opStatus divide(const APFloat &RHS, roundingMode RM) {
return getIEEE().divide(RHS.getIEEE(), RM);
assert(&getSemantics() == &RHS.getSemantics() &&
"Should only call on two APFloats with the same semantics");
if (usesLayout<IEEEFloat>(getSemantics()))
return U.IEEE.divide(RHS.U.IEEE, RM);
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.divide(RHS.U.Double, RM);
llvm_unreachable("Unexpected semantics");
}
opStatus remainder(const APFloat &RHS) {
return getIEEE().remainder(RHS.getIEEE());
assert(&getSemantics() == &RHS.getSemantics() &&
"Should only call on two APFloats with the same semantics");
if (usesLayout<IEEEFloat>(getSemantics()))
return U.IEEE.remainder(RHS.U.IEEE);
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.remainder(RHS.U.Double);
llvm_unreachable("Unexpected semantics");
}
opStatus mod(const APFloat &RHS) {
assert(&getSemantics() == &RHS.getSemantics() &&
"Should only call on two APFloats with the same semantics");
if (usesLayout<IEEEFloat>(getSemantics()))
return U.IEEE.mod(RHS.U.IEEE);
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.mod(RHS.U.Double);
llvm_unreachable("Unexpected semantics");
}
opStatus mod(const APFloat &RHS) { return getIEEE().mod(RHS.getIEEE()); }
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend,
roundingMode RM) {
return getIEEE().fusedMultiplyAdd(Multiplicand.getIEEE(), Addend.getIEEE(),
RM);
assert(&getSemantics() == &Multiplicand.getSemantics() &&
"Should only call on APFloats with the same semantics");
assert(&getSemantics() == &Addend.getSemantics() &&
"Should only call on APFloats with the same semantics");
if (usesLayout<IEEEFloat>(getSemantics()))
return U.IEEE.fusedMultiplyAdd(Multiplicand.U.IEEE, Addend.U.IEEE, RM);
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.fusedMultiplyAdd(Multiplicand.U.Double, Addend.U.Double,
RM);
llvm_unreachable("Unexpected semantics");
}
opStatus roundToIntegral(roundingMode RM) {
return getIEEE().roundToIntegral(RM);
APFLOAT_DISPATCH_ON_SEMANTICS(roundToIntegral(RM));
}
opStatus next(bool nextDown) { return getIEEE().next(nextDown); }
// TODO: bool parameters are not readable and a source of bugs.
// Do something.
opStatus next(bool nextDown) {
APFLOAT_DISPATCH_ON_SEMANTICS(next(nextDown));
}
/// Add two APFloats, rounding ties to the nearest even.
/// No error checking.
APFloat operator+(const APFloat &RHS) const {
return APFloat(getIEEE() + RHS.getIEEE(), getSemantics());
APFloat Result(*this);
(void)Result.add(RHS, rmNearestTiesToEven);
return Result;
}
/// Subtract two APFloats, rounding ties to the nearest even.
/// No error checking.
APFloat operator-(const APFloat &RHS) const {
return APFloat(getIEEE() - RHS.getIEEE(), getSemantics());
APFloat Result(*this);
(void)Result.subtract(RHS, rmNearestTiesToEven);
return Result;
}
/// Multiply two APFloats, rounding ties to the nearest even.
/// No error checking.
APFloat operator*(const APFloat &RHS) const {
return APFloat(getIEEE() * RHS.getIEEE(), getSemantics());
APFloat Result(*this);
(void)Result.multiply(RHS, rmNearestTiesToEven);
return Result;
}
/// Divide the first APFloat by the second, rounding ties to the nearest even.
/// No error checking.
APFloat operator/(const APFloat &RHS) const {
return APFloat(getIEEE() / RHS.getIEEE(), getSemantics());
APFloat Result(*this);
(void)Result.divide(RHS, rmNearestTiesToEven);
return Result;
}
void changeSign() { getIEEE().changeSign(); }
void clearSign() { getIEEE().clearSign(); }
void copySign(const APFloat &RHS) { getIEEE().copySign(RHS.getIEEE()); }
void changeSign() { APFLOAT_DISPATCH_ON_SEMANTICS(changeSign()); }
void clearSign() {
if (isNegative())
changeSign();
}
void copySign(const APFloat &RHS) {
if (isNegative() != RHS.isNegative())
changeSign();
}
/// A static helper to produce a copy of an APFloat value with its sign
/// copied from some other APFloat.
static APFloat copySign(APFloat Value, const APFloat &Sign) {
return APFloat(IEEEFloat::copySign(Value.getIEEE(), Sign.getIEEE()),
Value.getSemantics());
Value.copySign(Sign);
return Value;
}
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM,
@@ -940,46 +1058,60 @@ public:
opStatus convertToInteger(integerPart *Input, unsigned int Width,
bool IsSigned, roundingMode RM,
bool *IsExact) const {
return getIEEE().convertToInteger(Input, Width, IsSigned, RM, IsExact);
APFLOAT_DISPATCH_ON_SEMANTICS(
convertToInteger(Input, Width, IsSigned, RM, IsExact));
}
opStatus convertToInteger(APSInt &Result, roundingMode RM,
bool *IsExact) const {
return getIEEE().convertToInteger(Result, RM, IsExact);
}
bool *IsExact) const;
opStatus convertFromAPInt(const APInt &Input, bool IsSigned,
roundingMode RM) {
return getIEEE().convertFromAPInt(Input, IsSigned, RM);
APFLOAT_DISPATCH_ON_SEMANTICS(convertFromAPInt(Input, IsSigned, RM));
}
opStatus convertFromSignExtendedInteger(const integerPart *Input,
unsigned int InputSize, bool IsSigned,
roundingMode RM) {
return getIEEE().convertFromSignExtendedInteger(Input, InputSize, IsSigned,
RM);
APFLOAT_DISPATCH_ON_SEMANTICS(
convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM));
}
opStatus convertFromZeroExtendedInteger(const integerPart *Input,
unsigned int InputSize, bool IsSigned,
roundingMode RM) {
return getIEEE().convertFromZeroExtendedInteger(Input, InputSize, IsSigned,
RM);
APFLOAT_DISPATCH_ON_SEMANTICS(
convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM));
}
opStatus convertFromString(StringRef, roundingMode);
APInt bitcastToAPInt() const { return getIEEE().bitcastToAPInt(); }
APInt bitcastToAPInt() const {
APFLOAT_DISPATCH_ON_SEMANTICS(bitcastToAPInt());
}
double convertToDouble() const { return getIEEE().convertToDouble(); }
float convertToFloat() const { return getIEEE().convertToFloat(); }
bool operator==(const APFloat &) const = delete;
cmpResult compare(const APFloat &RHS) const {
return getIEEE().compare(RHS.getIEEE());
assert(&getSemantics() == &RHS.getSemantics() &&
"Should only compare APFloats with the same semantics");
if (usesLayout<IEEEFloat>(getSemantics()))
return U.IEEE.compare(RHS.U.IEEE);
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.compare(RHS.U.Double);
llvm_unreachable("Unexpected semantics");
}
bool bitwiseIsEqual(const APFloat &RHS) const {
return getIEEE().bitwiseIsEqual(RHS.getIEEE());
if (&getSemantics() != &RHS.getSemantics())
return false;
if (usesLayout<IEEEFloat>(getSemantics()))
return U.IEEE.bitwiseIsEqual(RHS.U.IEEE);
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.bitwiseIsEqual(RHS.U.Double);
llvm_unreachable("Unexpected semantics");
}
unsigned int convertToHexString(char *DST, unsigned int HexDigits,
bool UpperCase, roundingMode RM) const {
return getIEEE().convertToHexString(DST, HexDigits, UpperCase, RM);
APFLOAT_DISPATCH_ON_SEMANTICS(
convertToHexString(DST, HexDigits, UpperCase, RM));
}
bool isZero() const { return getCategory() == fcZero; }
@@ -987,7 +1119,7 @@ public:
bool isNaN() const { return getCategory() == fcNaN; }
bool isNegative() const { return getIEEE().isNegative(); }
bool isDenormal() const { return getIEEE().isDenormal(); }
bool isDenormal() const { APFLOAT_DISPATCH_ON_SEMANTICS(isDenormal()); }
bool isSignaling() const { return getIEEE().isSignaling(); }
bool isNormal() const { return !isDenormal() && isFiniteNonZero(); }
@@ -999,26 +1131,31 @@ public:
bool isFiniteNonZero() const { return isFinite() && !isZero(); }
bool isPosZero() const { return isZero() && !isNegative(); }
bool isNegZero() const { return isZero() && isNegative(); }
bool isSmallest() const { return getIEEE().isSmallest(); }
bool isLargest() const { return getIEEE().isLargest(); }
bool isInteger() const { return getIEEE().isInteger(); }
bool isSmallest() const { APFLOAT_DISPATCH_ON_SEMANTICS(isSmallest()); }
bool isLargest() const { APFLOAT_DISPATCH_ON_SEMANTICS(isLargest()); }
bool isInteger() const { APFLOAT_DISPATCH_ON_SEMANTICS(isInteger()); }
APFloat &operator=(const APFloat &RHS) = default;
APFloat &operator=(APFloat &&RHS) = default;
void toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision = 0,
unsigned FormatMaxPadding = 3) const {
return getIEEE().toString(Str, FormatPrecision, FormatMaxPadding);
APFLOAT_DISPATCH_ON_SEMANTICS(
toString(Str, FormatPrecision, FormatMaxPadding));
}
void print(raw_ostream &) const;
void dump() const;
bool getExactInverse(APFloat *inv) const {
return getIEEE().getExactInverse(inv ? &inv->getIEEE() : nullptr);
APFLOAT_DISPATCH_ON_SEMANTICS(getExactInverse(inv));
}
friend hash_code hash_value(const APFloat &Arg);
friend int ilogb(const APFloat &Arg) { return ilogb(Arg.getIEEE()); }
friend APFloat scalbn(APFloat X, int Exp, roundingMode RM);
friend APFloat frexp(const APFloat &X, int &Exp, roundingMode RM);
friend IEEEFloat;
friend DoubleAPFloat;
};
@@ -1028,22 +1165,36 @@ public:
/// xlC compiler.
hash_code hash_value(const APFloat &Arg);
inline APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM) {
return APFloat(scalbn(X.getIEEE(), Exp, RM), X.getSemantics());
if (APFloat::usesLayout<detail::IEEEFloat>(X.getSemantics()))
return APFloat(scalbn(X.U.IEEE, Exp, RM), X.getSemantics());
if (APFloat::usesLayout<detail::DoubleAPFloat>(X.getSemantics()))
return APFloat(scalbn(X.U.Double, Exp, RM), X.getSemantics());
llvm_unreachable("Unexpected semantics");
}
/// \brief Equivalent of C standard library function.
/// Equivalent of C standard library function.
///
/// While the C standard says Exp is an unspecified value for infinity and nan,
/// this returns INT_MAX for infinities, and INT_MIN for NaNs.
inline APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM) {
return APFloat(frexp(X.getIEEE(), Exp, RM), X.getSemantics());
if (APFloat::usesLayout<detail::IEEEFloat>(X.getSemantics()))
return APFloat(frexp(X.U.IEEE, Exp, RM), X.getSemantics());
if (APFloat::usesLayout<detail::DoubleAPFloat>(X.getSemantics()))
return APFloat(frexp(X.U.Double, Exp, RM), X.getSemantics());
llvm_unreachable("Unexpected semantics");
}
/// \brief Returns the absolute value of the argument.
/// Returns the absolute value of the argument.
inline APFloat abs(APFloat X) {
X.clearSign();
return X;
}
/// \brief Returns the negated value of the argument.
inline APFloat neg(APFloat X) {
X.changeSign();
return X;
}
/// Implements IEEE minNum semantics. Returns the smaller of the 2 arguments if
/// both are not NaN. If either argument is a NaN, returns the other argument.
LLVM_READONLY
@@ -1068,4 +1219,5 @@ inline APFloat maxnum(const APFloat &A, const APFloat &B) {
} // namespace llvm
#undef APFLOAT_DISPATCH_ON_SEMANTICS
#endif // LLVM_ADT_APFLOAT_H

View File

@@ -147,7 +147,7 @@ class LLVM_NODISCARD APInt {
return *this;
// Mask out the high bits.
uint64_t mask = ~uint64_t(0ULL) >> (APINT_BITS_PER_WORD - wordBits);
uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - wordBits);
if (isSingleWord())
VAL &= mask;
else
@@ -196,15 +196,6 @@ class LLVM_NODISCARD APInt {
/// out-of-line slow case for shl
APInt shlSlowCase(unsigned shiftAmt) const;
/// out-of-line slow case for operator&
APInt AndSlowCase(const APInt &RHS) const;
/// out-of-line slow case for operator|
APInt OrSlowCase(const APInt &RHS) const;
/// out-of-line slow case for operator^
APInt XorSlowCase(const APInt &RHS) const;
/// out-of-line slow case for operator=
APInt &AssignSlowCase(const APInt &RHS);
@@ -223,6 +214,9 @@ class LLVM_NODISCARD APInt {
/// out-of-line slow case for countPopulation
unsigned countPopulationSlowCase() const;
/// out-of-line slow case for setBits.
void setBitsSlowCase(unsigned loBit, unsigned hiBit);
public:
/// \name Constructors
/// @{
@@ -240,11 +234,12 @@ public:
APInt(unsigned numBits, uint64_t val, bool isSigned = false)
: BitWidth(numBits), VAL(0) {
assert(BitWidth && "bitwidth too small");
if (isSingleWord())
if (isSingleWord()) {
VAL = val;
else
clearUnusedBits();
} else {
initSlowCase(val, isSigned);
clearUnusedBits();
}
}
/// \brief Construct an APInt of numBits width, initialized as bigVal[].
@@ -341,7 +336,7 @@ public:
/// This checks to see if the value has all bits of the APInt are set or not.
bool isAllOnesValue() const {
if (isSingleWord())
return VAL == ~integerPart(0) >> (APINT_BITS_PER_WORD - BitWidth);
return VAL == UINT64_MAX >> (APINT_BITS_PER_WORD - BitWidth);
return countPopulationSlowCase() == BitWidth;
}
@@ -406,7 +401,7 @@ public:
/// If this value is smaller than the specified limit, return it, otherwise
/// return the limit value. This causes the value to saturate to the limit.
uint64_t getLimitedValue(uint64_t Limit = ~0ULL) const {
uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX) const {
return (getActiveBits() > 64 || getZExtValue() > Limit) ? Limit
: getZExtValue();
}
@@ -501,12 +496,26 @@ public:
///
/// \returns An APInt value with the requested bits set.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
assert(hiBit <= numBits && "hiBit out of range");
assert(loBit < numBits && "loBit out of range");
if (hiBit < loBit)
return getLowBitsSet(numBits, hiBit) |
getHighBitsSet(numBits, numBits - loBit);
return getLowBitsSet(numBits, hiBit - loBit).shl(loBit);
APInt Res(numBits, 0);
Res.setBits(loBit, hiBit);
return Res;
}
/// \brief Get a value with upper bits starting at loBit set.
///
/// Constructs an APInt value that has a contiguous range of bits set. The
/// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
/// bits will be zero. For example, with parameters(32, 12) you would get
/// 0xFFFFF000.
///
/// \param numBits the intended bit width of the result
/// \param loBit the index of the lowest bit to set.
///
/// \returns An APInt value with the requested bits set.
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
APInt Res(numBits, 0);
Res.setBitsFrom(loBit);
return Res;
}
/// \brief Get a value with high bits set
@@ -516,15 +525,9 @@ public:
/// \param numBits the bitwidth of the result
/// \param hiBitsSet the number of high-order bits set in the result.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
assert(hiBitsSet <= numBits && "Too many bits to set!");
// Handle a degenerate case, to avoid shifting by word size
if (hiBitsSet == 0)
return APInt(numBits, 0);
unsigned shiftAmt = numBits - hiBitsSet;
// For small values, return quickly
if (numBits <= APINT_BITS_PER_WORD)
return APInt(numBits, ~0ULL << shiftAmt);
return getAllOnesValue(numBits).shl(shiftAmt);
APInt Res(numBits, 0);
Res.setHighBits(hiBitsSet);
return Res;
}
/// \brief Get a value with low bits set
@@ -534,16 +537,9 @@ public:
/// \param numBits the bitwidth of the result
/// \param loBitsSet the number of low-order bits set in the result.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
assert(loBitsSet <= numBits && "Too many bits to set!");
// Handle a degenerate case, to avoid shifting by word size
if (loBitsSet == 0)
return APInt(numBits, 0);
if (loBitsSet == APINT_BITS_PER_WORD)
return APInt(numBits, UINT64_MAX);
// For small values, return quickly.
if (loBitsSet <= APINT_BITS_PER_WORD)
return APInt(numBits, UINT64_MAX >> (APINT_BITS_PER_WORD - loBitsSet));
return getAllOnesValue(numBits).lshr(numBits - loBitsSet);
APInt Res(numBits, 0);
Res.setLowBits(loBitsSet);
return Res;
}
/// \brief Return a value containing V broadcasted over NewLen bits.
@@ -613,17 +609,6 @@ public:
/// \returns *this decremented by one.
APInt &operator--();
/// \brief Unary bitwise complement operator.
///
/// Performs a bitwise complement operation on this APInt.
///
/// \returns an APInt that is the bitwise complement of *this
APInt operator~() const {
APInt Result(*this);
Result.flipAllBits();
return Result;
}
/// \brief Logical negation operator.
///
/// Performs logical negation operation on this APInt.
@@ -698,6 +683,13 @@ public:
/// \returns *this after ANDing with RHS.
APInt &operator&=(const APInt &RHS);
/// \brief Bitwise AND assignment operator.
///
/// Performs a bitwise AND operation on this APInt and RHS. RHS is
/// logically zero-extended or truncated to match the bit-width of
/// the LHS.
APInt &operator&=(uint64_t RHS);
/// \brief Bitwise OR assignment operator.
///
/// Performs a bitwise OR operation on this APInt and RHS. The result is
@@ -729,6 +721,21 @@ public:
/// \returns *this after XORing with RHS.
APInt &operator^=(const APInt &RHS);
/// \brief Bitwise XOR assignment operator.
///
/// Performs a bitwise XOR operation on this APInt and RHS. RHS is
/// logically zero-extended or truncated to match the bit-width of
/// the LHS.
APInt &operator^=(uint64_t RHS) {
if (isSingleWord()) {
VAL ^= RHS;
clearUnusedBits();
} else {
pVal[0] ^= RHS;
}
return *this;
}
/// \brief Multiplication assignment operator.
///
/// Multiplies this APInt by RHS and assigns the result to *this.
@@ -766,59 +773,6 @@ public:
/// \name Binary Operators
/// @{
/// \brief Bitwise AND operator.
///
/// Performs a bitwise AND operation on *this and RHS.
///
/// \returns An APInt value representing the bitwise AND of *this and RHS.
APInt operator&(const APInt &RHS) const {
assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
if (isSingleWord())
return APInt(getBitWidth(), VAL & RHS.VAL);
return AndSlowCase(RHS);
}
APInt And(const APInt &RHS) const { return this->operator&(RHS); }
/// \brief Bitwise OR operator.
///
/// Performs a bitwise OR operation on *this and RHS.
///
/// \returns An APInt value representing the bitwise OR of *this and RHS.
APInt operator|(const APInt &RHS) const {
assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
if (isSingleWord())
return APInt(getBitWidth(), VAL | RHS.VAL);
return OrSlowCase(RHS);
}
/// \brief Bitwise OR function.
///
/// Performs a bitwise or on *this and RHS. This is implemented by simply
/// calling operator|.
///
/// \returns An APInt value representing the bitwise OR of *this and RHS.
APInt Or(const APInt &RHS) const { return this->operator|(RHS); }
/// \brief Bitwise XOR operator.
///
/// Performs a bitwise XOR operation on *this and RHS.
///
/// \returns An APInt value representing the bitwise XOR of *this and RHS.
APInt operator^(const APInt &RHS) const {
assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
if (isSingleWord())
return APInt(BitWidth, VAL ^ RHS.VAL);
return XorSlowCase(RHS);
}
/// \brief Bitwise XOR function.
///
/// Performs a bitwise XOR operation on *this and RHS. This is implemented
/// through the usage of operator^.
///
/// \returns An APInt value representing the bitwise XOR of *this and RHS.
APInt Xor(const APInt &RHS) const { return this->operator^(RHS); }
/// \brief Multiplication operator.
///
/// Multiplies this APInt by RHS and returns the result.
@@ -1144,7 +1098,11 @@ public:
/// This operation tests if there are any pairs of corresponding bits
/// between this APInt and RHS that are both set.
bool intersects(const APInt &RHS) const { return (*this & RHS) != 0; }
bool intersects(const APInt &RHS) const {
APInt temp(*this);
temp &= RHS;
return temp != 0;
}
/// @}
/// \name Resizing Operators
@@ -1217,6 +1175,44 @@ public:
/// Set the given bit to 1 whose position is given as "bitPosition".
void setBit(unsigned bitPosition);
/// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
void setBits(unsigned loBit, unsigned hiBit) {
assert(hiBit <= BitWidth && "hiBit out of range");
assert(loBit <= BitWidth && "loBit out of range");
if (loBit == hiBit)
return;
if (loBit > hiBit) {
setLowBits(hiBit);
setHighBits(BitWidth - loBit);
return;
}
if (loBit < APINT_BITS_PER_WORD && hiBit <= APINT_BITS_PER_WORD) {
uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
mask <<= loBit;
if (isSingleWord())
VAL |= mask;
else
pVal[0] |= mask;
} else {
setBitsSlowCase(loBit, hiBit);
}
}
/// Set the top bits starting from loBit.
void setBitsFrom(unsigned loBit) {
return setBits(loBit, BitWidth);
}
/// Set the bottom loBits bits.
void setLowBits(unsigned loBits) {
return setBits(0, loBits);
}
/// Set the top hiBits bits.
void setHighBits(unsigned hiBits) {
return setBits(BitWidth - hiBits, BitWidth);
}
/// \brief Set every bit to 0.
void clearAllBits() {
if (isSingleWord())
@@ -1247,6 +1243,12 @@ public:
/// as "bitPosition".
void flipBit(unsigned bitPosition);
/// Insert the bits from a smaller APInt starting at bitPosition.
void insertBits(const APInt &SubBits, unsigned bitPosition);
/// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
APInt extractBits(unsigned numBits, unsigned bitPosition) const;
/// @}
/// \name Value Characterization Functions
/// @{
@@ -1723,6 +1725,74 @@ inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; }
inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; }
/// \brief Unary bitwise complement operator.
///
/// \returns an APInt that is the bitwise complement of \p v.
inline APInt operator~(APInt v) {
v.flipAllBits();
return v;
}
inline APInt operator&(APInt a, const APInt &b) {
a &= b;
return a;
}
inline APInt operator&(const APInt &a, APInt &&b) {
b &= a;
return std::move(b);
}
inline APInt operator&(APInt a, uint64_t RHS) {
a &= RHS;
return a;
}
inline APInt operator&(uint64_t LHS, APInt b) {
b &= LHS;
return b;
}
inline APInt operator|(APInt a, const APInt &b) {
a |= b;
return a;
}
inline APInt operator|(const APInt &a, APInt &&b) {
b |= a;
return std::move(b);
}
inline APInt operator|(APInt a, uint64_t RHS) {
a |= RHS;
return a;
}
inline APInt operator|(uint64_t LHS, APInt b) {
b |= LHS;
return b;
}
inline APInt operator^(APInt a, const APInt &b) {
a ^= b;
return a;
}
inline APInt operator^(const APInt &a, APInt &&b) {
b ^= a;
return std::move(b);
}
inline APInt operator^(APInt a, uint64_t RHS) {
a ^= RHS;
return a;
}
inline APInt operator^(uint64_t LHS, APInt b) {
b ^= LHS;
return b;
}
inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) {
I.print(OS, true);
return OS;
@@ -1925,37 +1995,6 @@ inline APInt urem(const APInt &LHS, const APInt &RHS) { return LHS.urem(RHS); }
/// Performs multiplication on APInt values.
inline APInt mul(const APInt &LHS, const APInt &RHS) { return LHS * RHS; }
/// \brief Function for addition operation.
///
/// Performs addition on APInt values.
inline APInt add(const APInt &LHS, const APInt &RHS) { return LHS + RHS; }
/// \brief Function for subtraction operation.
///
/// Performs subtraction on APInt values.
inline APInt sub(const APInt &LHS, const APInt &RHS) { return LHS - RHS; }
/// \brief Bitwise AND function for APInt.
///
/// Performs bitwise AND operation on APInt LHS and
/// APInt RHS.
inline APInt And(const APInt &LHS, const APInt &RHS) { return LHS & RHS; }
/// \brief Bitwise OR function for APInt.
///
/// Performs bitwise OR operation on APInt LHS and APInt RHS.
inline APInt Or(const APInt &LHS, const APInt &RHS) { return LHS | RHS; }
/// \brief Bitwise XOR function for APInt.
///
/// Performs bitwise XOR operation on APInt.
inline APInt Xor(const APInt &LHS, const APInt &RHS) { return LHS ^ RHS; }
/// \brief Bitwise complement function.
///
/// Performs a bitwise complement operation on APInt.
inline APInt Not(const APInt &APIVal) { return ~APIVal; }
} // End of APIntOps namespace
// See friend declaration above. This additional declaration is required in

View File

@@ -413,6 +413,25 @@ namespace llvm {
}
};
/// This is a MutableArrayRef that owns its array.
template <typename T> class OwningArrayRef : public MutableArrayRef<T> {
public:
OwningArrayRef() {}
OwningArrayRef(size_t Size) : MutableArrayRef<T>(new T[Size], Size) {}
OwningArrayRef(ArrayRef<T> Data)
: MutableArrayRef<T>(new T[Data.size()], Data.size()) {
std::copy(Data.begin(), Data.end(), this->begin());
}
OwningArrayRef(OwningArrayRef &&Other) { *this = Other; }
OwningArrayRef &operator=(OwningArrayRef &&Other) {
delete[] this->data();
this->MutableArrayRef<T>::operator=(Other);
Other.MutableArrayRef<T>::operator=(MutableArrayRef<T>());
return *this;
}
~OwningArrayRef() { delete[] this->data(); }
};
/// @name ArrayRef Convenience constructors
/// @{

Some files were not shown because too many files have changed in this diff Show More