From caf6b71ab2e6f9de3efe6ed71e9a21579b9994b6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 10 Jul 2019 17:11:29 +0000 Subject: [PATCH] [X86] Change the IR sequence for _mm_storeh_pi and _mm_storel_pi to perform the store as a <2 x float> instead of i64. This is similar to what we do for loadl_pi and loadh_pi. llvm-svn: 365669 --- clang/include/clang/Basic/BuiltinsX86.def | 2 -- clang/lib/CodeGen/CGBuiltin.cpp | 16 ---------------- clang/lib/Headers/xmmintrin.h | 12 ++++++++++-- clang/test/CodeGen/builtins-x86.c | 2 -- clang/test/CodeGen/sse-builtins.c | 10 ++++------ 5 files changed, 14 insertions(+), 28 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index bc6e382396ad..a0ba0ecf36bb 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -306,8 +306,6 @@ TARGET_BUILTIN(__builtin_ia32_stmxcsr, "Ui", "n", "sse") TARGET_HEADER_BUILTIN(_mm_getcsr, "Ui", "nh", "xmmintrin.h", ALL_LANGUAGES, "sse") TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "ncV:128:", "sse") TARGET_BUILTIN(__builtin_ia32_cvttss2si, "iV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "nV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "nV:128:", "sse") TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "nV:128:", "sse") TARGET_BUILTIN(__builtin_ia32_sfence, "v", "n", "sse") TARGET_HEADER_BUILTIN(_mm_sfence, "v", "nh", "xmmintrin.h", ALL_LANGUAGES, "sse") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f21e02d79c9d..52e2d5bfb912 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10651,22 +10651,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(Intr, Ops); } - case X86::BI__builtin_ia32_storehps: - case X86::BI__builtin_ia32_storelps: { - llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); - llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); - - // cast val v2i64 - Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); - - // extract (0, 1) - unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; - Ops[1] = Builder.CreateExtractElement(Ops[1], Index, "extract"); - - // cast pointer to i64 & store - Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); - return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); - } case X86::BI__builtin_ia32_vextractf128_pd256: case X86::BI__builtin_ia32_vextractf128_ps256: case X86::BI__builtin_ia32_vextractf128_si256: diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index 6f5517ee873b..75ff37655bda 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -1919,7 +1919,11 @@ _mm_setzero_ps(void) static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pi(__m64 *__p, __m128 __a) { - __builtin_ia32_storehps((__v2si *)__p, (__v4sf)__a); + typedef float __mm_storeh_pi_v2f32 __attribute__((__vector_size__(8))); + struct __mm_storeh_pi_struct { + __mm_storeh_pi_v2f32 __u; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 2, 3); } /// Stores the lower 64 bits of a 128-bit vector of [4 x float] to a @@ -1936,7 +1940,11 @@ _mm_storeh_pi(__m64 *__p, __m128 __a) static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pi(__m64 *__p, __m128 __a) { - __builtin_ia32_storelps((__v2si *)__p, (__v4sf)__a); + typedef float __mm_storeh_pi_v2f32 __attribute__((__vector_size__(8))); + struct __mm_storeh_pi_struct { + __mm_storeh_pi_v2f32 __u; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 0, 1); } /// Stores the lower 32 bits of a 128-bit vector of [4 x float] to a diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c index 056563931d6a..61b9d53c74f9 100644 --- a/clang/test/CodeGen/builtins-x86.c +++ b/clang/test/CodeGen/builtins-x86.c @@ -341,8 +341,6 @@ void f0() { #endif tmp_V2i = __builtin_ia32_cvttps2pi(tmp_V4f); (void) __builtin_ia32_maskmovq(tmp_V8c, tmp_V8c, tmp_cp); - (void) __builtin_ia32_storehps(tmp_V2ip, tmp_V4f); - (void) __builtin_ia32_storelps(tmp_V2ip, tmp_V4f); tmp_i = __builtin_ia32_movmskps(tmp_V4f); tmp_i = __builtin_ia32_pmovmskb(tmp_V8c); (void) __builtin_ia32_movntq(tmp_V1LLip, tmp_V1LLi); diff --git a/clang/test/CodeGen/sse-builtins.c b/clang/test/CodeGen/sse-builtins.c index eb47c190b956..4179341fadfc 100644 --- a/clang/test/CodeGen/sse-builtins.c +++ b/clang/test/CodeGen/sse-builtins.c @@ -688,17 +688,15 @@ void test_mm_store1_ps(float* x, __m128 y) { void test_mm_storeh_pi(__m64* x, __m128 y) { // CHECK-LABEL: test_mm_storeh_pi - // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64> - // CHECK: extractelement <2 x i64> %{{.*}}, i64 1 - // CHECK: store i64 %{{.*}}, i64* {{.*}} + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> + // CHECK: store <2 x float> %{{.*}}, <2 x float>* %{{.*}}, align 1{{$}} _mm_storeh_pi(x, y); } void test_mm_storel_pi(__m64* x, __m128 y) { // CHECK-LABEL: test_mm_storel_pi - // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64> - // CHECK: extractelement <2 x i64> %{{.*}}, i64 0 - // CHECK: store i64 %{{.*}}, i64* {{.*}} + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> + // CHECK: store <2 x float> %{{.*}}, <2 x float>* %{{.*}}, align 1{{$}} _mm_storel_pi(x, y); }