mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-13 19:24:21 +00:00
e621757365
Pursuant to discussions at https://discourse.llvm.org/t/rfc-c-23-p1467r9-extended-floating-point-types-and-standard-names/70033/22, this commit enhances the handling of the __bf16 type in Clang. - Firstly, it upgrades __bf16 from a storage-only type to an arithmetic type. - Secondly, it changes the mangling of __bf16 to DF16b on all architectures except ARM. This change has been made in accordance with the finalization of the mangling for the std::bfloat16_t type, as discussed at https://github.com/itanium-cxx-abi/cxx-abi/pull/147. - Finally, this commit extends the existing excess precision support to the __bf16 type. This applies to hardware architectures that do not natively support bfloat16 arithmetic. Appropriate tests have been added to verify the effects of these changes and ensure no regressions in other areas of the compiler. Reviewed By: rjmccall, pengfei, zahiraam Differential Revision: https://reviews.llvm.org/D150913
130 lines
6.6 KiB
Plaintext
130 lines
6.6 KiB
Plaintext
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
|
// REQUIRES: amdgpu-registered-target
|
|
// REQUIRES: x86-registered-target
|
|
|
|
// RUN: %clang_cc1 "-aux-triple" "x86_64-unknown-linux-gnu" "-triple" "amdgcn-amd-amdhsa" \
|
|
// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -emit-llvm -o - %s | FileCheck %s
|
|
|
|
#include "Inputs/cuda.h"
|
|
|
|
// CHECK-LABEL: @_Z8test_argPDF16bDF16b(
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
|
|
// CHECK-NEXT: [[BF16:%.*]] = alloca bfloat, align 2, addrspace(5)
|
|
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
|
|
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
|
|
// CHECK-NEXT: [[BF16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BF16]] to ptr
|
|
// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store bfloat [[IN:%.*]], ptr [[IN_ADDR_ASCAST]], align 2
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[IN_ADDR_ASCAST]], align 2
|
|
// CHECK-NEXT: store bfloat [[TMP0]], ptr [[BF16_ASCAST]], align 2
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[BF16_ASCAST]], align 2
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store bfloat [[TMP1]], ptr [[TMP2]], align 2
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
__device__ void test_arg(__bf16 *out, __bf16 in) {
|
|
__bf16 bf16 = in;
|
|
*out = bf16;
|
|
}
|
|
|
|
// CHECK-LABEL: @_Z9test_loadPDF16bS_(
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[BF16:%.*]] = alloca bfloat, align 2, addrspace(5)
|
|
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
|
|
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
|
|
// CHECK-NEXT: [[BF16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BF16]] to ptr
|
|
// CHECK-NEXT: store ptr [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store ptr [[IN:%.*]], ptr [[IN_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IN_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[TMP0]], align 2
|
|
// CHECK-NEXT: store bfloat [[TMP1]], ptr [[BF16_ASCAST]], align 2
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[BF16_ASCAST]], align 2
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUT_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store bfloat [[TMP2]], ptr [[TMP3]], align 2
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
__device__ void test_load(__bf16 *out, __bf16 *in) {
|
|
__bf16 bf16 = *in;
|
|
*out = bf16;
|
|
}
|
|
|
|
// CHECK-LABEL: @_Z8test_retDF16b(
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2, addrspace(5)
|
|
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
|
|
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
|
|
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
|
|
// CHECK-NEXT: store bfloat [[IN:%.*]], ptr [[IN_ADDR_ASCAST]], align 2
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[IN_ADDR_ASCAST]], align 2
|
|
// CHECK-NEXT: ret bfloat [[TMP0]]
|
|
//
|
|
__device__ __bf16 test_ret( __bf16 in) {
|
|
return in;
|
|
}
|
|
|
|
// CHECK-LABEL: @_Z9test_callDF16b(
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2, addrspace(5)
|
|
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
|
|
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
|
|
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
|
|
// CHECK-NEXT: store bfloat [[IN:%.*]], ptr [[IN_ADDR_ASCAST]], align 2
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[IN_ADDR_ASCAST]], align 2
|
|
// CHECK-NEXT: [[CALL:%.*]] = call contract noundef bfloat @_Z8test_retDF16b(bfloat noundef [[TMP0]]) #[[ATTR1:[0-9]+]]
|
|
// CHECK-NEXT: ret bfloat [[CALL]]
|
|
//
|
|
__device__ __bf16 test_call( __bf16 in) {
|
|
return test_ret(in);
|
|
}
|
|
|
|
|
|
// CHECK-LABEL: @_Z15test_vec_assignv(
|
|
// CHECK-NEXT: entry:
|
|
// CHECK-NEXT: [[VEC2_A:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[VEC2_B:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[VEC4_A:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[VEC4_B:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[VEC8_A:%.*]] = alloca <8 x bfloat>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[VEC8_B:%.*]] = alloca <8 x bfloat>, align 16, addrspace(5)
|
|
// CHECK-NEXT: [[VEC16_A:%.*]] = alloca <16 x bfloat>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[VEC16_B:%.*]] = alloca <16 x bfloat>, align 32, addrspace(5)
|
|
// CHECK-NEXT: [[VEC2_A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC2_A]] to ptr
|
|
// CHECK-NEXT: [[VEC2_B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC2_B]] to ptr
|
|
// CHECK-NEXT: [[VEC4_A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4_A]] to ptr
|
|
// CHECK-NEXT: [[VEC4_B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4_B]] to ptr
|
|
// CHECK-NEXT: [[VEC8_A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8_A]] to ptr
|
|
// CHECK-NEXT: [[VEC8_B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8_B]] to ptr
|
|
// CHECK-NEXT: [[VEC16_A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC16_A]] to ptr
|
|
// CHECK-NEXT: [[VEC16_B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC16_B]] to ptr
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x bfloat>, ptr [[VEC2_B_ASCAST]], align 4
|
|
// CHECK-NEXT: store <2 x bfloat> [[TMP0]], ptr [[VEC2_A_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x bfloat>, ptr [[VEC4_B_ASCAST]], align 8
|
|
// CHECK-NEXT: store <4 x bfloat> [[TMP1]], ptr [[VEC4_A_ASCAST]], align 8
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x bfloat>, ptr [[VEC8_B_ASCAST]], align 16
|
|
// CHECK-NEXT: store <8 x bfloat> [[TMP2]], ptr [[VEC8_A_ASCAST]], align 16
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x bfloat>, ptr [[VEC16_B_ASCAST]], align 32
|
|
// CHECK-NEXT: store <16 x bfloat> [[TMP3]], ptr [[VEC16_A_ASCAST]], align 32
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
__device__ void test_vec_assign() {
|
|
typedef __attribute__((ext_vector_type(2))) __bf16 bf16_x2;
|
|
bf16_x2 vec2_a, vec2_b;
|
|
vec2_a = vec2_b;
|
|
|
|
typedef __attribute__((ext_vector_type(4))) __bf16 bf16_x4;
|
|
bf16_x4 vec4_a, vec4_b;
|
|
vec4_a = vec4_b;
|
|
|
|
typedef __attribute__((ext_vector_type(8))) __bf16 bf16_x8;
|
|
bf16_x8 vec8_a, vec8_b;
|
|
vec8_a = vec8_b;
|
|
|
|
typedef __attribute__((ext_vector_type(16))) __bf16 bf16_x16;
|
|
bf16_x16 vec16_a, vec16_b;
|
|
vec16_a = vec16_b;
|
|
}
|