From 3c19a3950c8a716ed0f6e8eb8fd6f81f1696719c Mon Sep 17 00:00:00 2001 From: Scott Michel Date: Wed, 5 Mar 2008 23:00:19 +0000 Subject: [PATCH] - Expand tabs to spaces. - select_bits.ll now fully functional now that PR1993 is closed. It was previously broken by refactoring in SPUInstrInfo.td and using multiclasses. - Same for eqv.ll llvm-svn: 47972 --- test/CodeGen/CellSPU/and_ops.ll | 87 +-- test/CodeGen/CellSPU/call_indirect.ll | 34 +- test/CodeGen/CellSPU/ctpop.ll | 10 +- test/CodeGen/CellSPU/dp_farith.ll | 80 +-- test/CodeGen/CellSPU/eqv.ll | 168 ++--- test/CodeGen/CellSPU/fcmp.ll | 12 +- test/CodeGen/CellSPU/fdiv.ll | 8 +- test/CodeGen/CellSPU/fneg-fabs.ll | 24 +- test/CodeGen/CellSPU/immed16.ll | 10 +- test/CodeGen/CellSPU/immed32.ll | 22 +- test/CodeGen/CellSPU/immed64.ll | 5 + test/CodeGen/CellSPU/int2fp.ll | 24 +- test/CodeGen/CellSPU/intrinsics_branch.ll | 114 ++-- test/CodeGen/CellSPU/intrinsics_float.ll | 60 +- test/CodeGen/CellSPU/intrinsics_logical.ll | 18 +- test/CodeGen/CellSPU/mul_ops.ll | 24 +- test/CodeGen/CellSPU/or_ops.ll | 78 +-- test/CodeGen/CellSPU/rotate_ops.ll | 52 +- test/CodeGen/CellSPU/select_bits.ll | 731 ++++++++++++++------- test/CodeGen/CellSPU/shift_ops.ll | 144 ++-- test/CodeGen/CellSPU/sp_farith.ll | 70 +- test/CodeGen/CellSPU/struct_1.ll | 64 +- test/CodeGen/CellSPU/vec_const.ll | 42 +- test/CodeGen/CellSPU/vecinsert.ll | 32 +- 24 files changed, 1096 insertions(+), 817 deletions(-) diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll index 6858dbabe64..46396ebb1f5 100644 --- a/test/CodeGen/CellSPU/and_ops.ll +++ b/test/CodeGen/CellSPU/and_ops.ll @@ -4,6 +4,7 @@ ; RUN: grep andi %t1.s | count 36 ; RUN: grep andhi %t1.s | count 30 ; RUN: grep andbi %t1.s | count 4 + target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" target triple = "spu" @@ -39,33 +40,33 @@ define <16 x i8> @and_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { } define i32 @and_i32_1(i32 %arg1, i32 %arg2) { - %A = and i32 %arg2, %arg1 - ret i32 %A + %A = and i32 %arg2, %arg1 + ret i32 %A } define i32 @and_i32_2(i32 %arg1, i32 %arg2) { - %A = and i32 %arg1, %arg2 - ret i32 %A + %A = and i32 %arg1, %arg2 + ret i32 %A } define i16 @and_i16_1(i16 %arg1, i16 %arg2) { - %A = and i16 %arg2, %arg1 - ret i16 %A + %A = and i16 %arg2, %arg1 + ret i16 %A } define i16 @and_i16_2(i16 %arg1, i16 %arg2) { - %A = and i16 %arg1, %arg2 - ret i16 %A + %A = and i16 %arg1, %arg2 + ret i16 %A } define i8 @and_i8_1(i8 %arg1, i8 %arg2) { - %A = and i8 %arg2, %arg1 - ret i8 %A + %A = and i8 %arg2, %arg1 + ret i8 %A } define i8 @and_i8_2(i8 %arg1, i8 %arg2) { - %A = and i8 %arg1, %arg2 - ret i8 %A + %A = and i8 %arg1, %arg2 + ret i8 %A } ; ANDC instruction generation: @@ -126,57 +127,57 @@ define <16 x i8> @andc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) { } define i32 @andc_i32_1(i32 %arg1, i32 %arg2) { - %A = xor i32 %arg2, -1 - %B = and i32 %A, %arg1 - ret i32 %B + %A = xor i32 %arg2, -1 + %B = and i32 %A, %arg1 + ret i32 %B } define i32 @andc_i32_2(i32 %arg1, i32 %arg2) { - %A = xor i32 %arg1, -1 - %B = and i32 %A, %arg2 - ret i32 %B + %A = xor i32 %arg1, -1 + %B = and i32 %A, %arg2 + ret i32 %B } define i32 @andc_i32_3(i32 %arg1, i32 %arg2) { - %A = xor i32 %arg2, -1 - %B = and i32 %arg1, %A - ret i32 %B + %A = xor i32 %arg2, -1 + %B = and i32 %arg1, %A + ret i32 %B } define i16 @andc_i16_1(i16 %arg1, i16 %arg2) { - %A = xor i16 %arg2, -1 - %B = and i16 %A, %arg1 - ret i16 %B + %A = xor i16 %arg2, -1 + %B = and i16 %A, %arg1 + ret i16 %B } define i16 @andc_i16_2(i16 %arg1, i16 %arg2) { - %A = xor i16 %arg1, -1 - %B = and i16 %A, %arg2 - ret i16 %B + %A = xor i16 %arg1, -1 + %B = and i16 %A, %arg2 + ret i16 %B } define i16 @andc_i16_3(i16 %arg1, i16 %arg2) { - %A = xor i16 %arg2, -1 - %B = and i16 %arg1, %A - ret i16 %B + %A = xor i16 %arg2, -1 + %B = and i16 %arg1, %A + ret i16 %B } define i8 @andc_i8_1(i8 %arg1, i8 %arg2) { - %A = xor i8 %arg2, -1 - %B = and i8 %A, %arg1 - ret i8 %B + %A = xor i8 %arg2, -1 + %B = and i8 %A, %arg1 + ret i8 %B } define i8 @andc_i8_2(i8 %arg1, i8 %arg2) { - %A = xor i8 %arg1, -1 - %B = and i8 %A, %arg2 - ret i8 %B + %A = xor i8 %arg1, -1 + %B = and i8 %A, %arg2 + ret i8 %B } define i8 @andc_i8_3(i8 %arg1, i8 %arg2) { - %A = xor i8 %arg2, -1 - %B = and i8 %arg1, %A - ret i8 %B + %A = xor i8 %arg2, -1 + %B = and i8 %arg1, %A + ret i8 %B } ; ANDI instruction generation (i32 data type): @@ -252,7 +253,7 @@ define i16 @andhi_i16(i16 signext %in) signext { ; i8 data type (s/b ANDBI if 8-bit registers were supported): define <16 x i8> @and_v16i8(<16 x i8> %in) { - ; ANDBI generated for vector types + ; ANDBI generated for vector types %tmp2 = and <16 x i8> %in, < i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42 > @@ -260,19 +261,19 @@ define <16 x i8> @and_v16i8(<16 x i8> %in) { } define i8 @and_u8(i8 zeroext %in) zeroext { - ; ANDBI generated: + ; ANDBI generated: %tmp37 = and i8 %in, 37 ret i8 %tmp37 } define i8 @and_sext8(i8 signext %in) signext { - ; ANDBI generated + ; ANDBI generated %tmp38 = and i8 %in, 37 ret i8 %tmp38 } define i8 @and_i8(i8 %in) { - ; ANDBI generated + ; ANDBI generated %tmp38 = and i8 %in, 205 ret i8 %tmp38 } diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll index 11481edc12f..b3437b89095 100644 --- a/test/CodeGen/CellSPU/call_indirect.ll +++ b/test/CodeGen/CellSPU/call_indirect.ll @@ -23,27 +23,27 @@ target triple = "spu-unknown-elf" define void @dispatcher(i32 %i_arg, float %f_arg) { entry: - %tmp2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 0), align 16 - tail call void %tmp2( i32 %i_arg, float %f_arg ) - %tmp2.1 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 1), align 4 - tail call void %tmp2.1( i32 %i_arg, float %f_arg ) - %tmp2.2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 2), align 4 - tail call void %tmp2.2( i32 %i_arg, float %f_arg ) - %tmp2.3 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 3), align 4 - tail call void %tmp2.3( i32 %i_arg, float %f_arg ) - %tmp2.4 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 4), align 4 - tail call void %tmp2.4( i32 %i_arg, float %f_arg ) - %tmp2.5 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 5), align 4 - tail call void %tmp2.5( i32 %i_arg, float %f_arg ) - ret void + %tmp2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 0), align 16 + tail call void %tmp2( i32 %i_arg, float %f_arg ) + %tmp2.1 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 1), align 4 + tail call void %tmp2.1( i32 %i_arg, float %f_arg ) + %tmp2.2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 2), align 4 + tail call void %tmp2.2( i32 %i_arg, float %f_arg ) + %tmp2.3 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 3), align 4 + tail call void %tmp2.3( i32 %i_arg, float %f_arg ) + %tmp2.4 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 4), align 4 + tail call void %tmp2.4( i32 %i_arg, float %f_arg ) + %tmp2.5 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 5), align 4 + tail call void %tmp2.5( i32 %i_arg, float %f_arg ) + ret void } @ptr_list = internal global [1 x void ()*] [ void ()* inttoptr (i64 4294967295 to void ()*) ], align 4 @ptr.a = internal global void ()** getelementptr ([1 x void ()*]* @ptr_list, i32 0, i32 1), align 16 define void @double_indirect_call() { - %a = load void ()*** @ptr.a, align 16 - %b = load void ()** %a, align 4 - tail call void %b() - ret void + %a = load void ()*** @ptr.a, align 16 + %b = load void ()** %a, align 4 + tail call void %b() + ret void } diff --git a/test/CodeGen/CellSPU/ctpop.ll b/test/CodeGen/CellSPU/ctpop.ll index 5665596dc3c..79bb611fe20 100644 --- a/test/CodeGen/CellSPU/ctpop.ll +++ b/test/CodeGen/CellSPU/ctpop.ll @@ -11,20 +11,20 @@ declare i32 @llvm.ctpop.i16(i16) declare i32 @llvm.ctpop.i32(i32) define i32 @test_i8(i8 %X) { - call i32 @llvm.ctpop.i8(i8 %X) - %Y = bitcast i32 %1 to i32 - ret i32 %Y + call i32 @llvm.ctpop.i8(i8 %X) + %Y = bitcast i32 %1 to i32 + ret i32 %Y } define i32 @test_i16(i16 %X) { call i32 @llvm.ctpop.i16(i16 %X) - %Y = bitcast i32 %1 to i32 + %Y = bitcast i32 %1 to i32 ret i32 %Y } define i32 @test_i32(i32 %X) { call i32 @llvm.ctpop.i32(i32 %X) - %Y = bitcast i32 %1 to i32 + %Y = bitcast i32 %1 to i32 ret i32 %Y } diff --git a/test/CodeGen/CellSPU/dp_farith.ll b/test/CodeGen/CellSPU/dp_farith.ll index 7cec5192f30..2579a404eea 100644 --- a/test/CodeGen/CellSPU/dp_farith.ll +++ b/test/CodeGen/CellSPU/dp_farith.ll @@ -11,92 +11,92 @@ target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i target triple = "spu" define double @fadd(double %arg1, double %arg2) { - %A = add double %arg1, %arg2 - ret double %A + %A = add double %arg1, %arg2 + ret double %A } define <2 x double> @fadd_vec(<2 x double> %arg1, <2 x double> %arg2) { - %A = add <2 x double> %arg1, %arg2 - ret <2 x double> %A + %A = add <2 x double> %arg1, %arg2 + ret <2 x double> %A } define double @fsub(double %arg1, double %arg2) { - %A = sub double %arg1, %arg2 - ret double %A + %A = sub double %arg1, %arg2 + ret double %A } define <2 x double> @fsub_vec(<2 x double> %arg1, <2 x double> %arg2) { - %A = sub <2 x double> %arg1, %arg2 - ret <2 x double> %A + %A = sub <2 x double> %arg1, %arg2 + ret <2 x double> %A } define double @fmul(double %arg1, double %arg2) { - %A = mul double %arg1, %arg2 - ret double %A + %A = mul double %arg1, %arg2 + ret double %A } define <2 x double> @fmul_vec(<2 x double> %arg1, <2 x double> %arg2) { - %A = mul <2 x double> %arg1, %arg2 - ret <2 x double> %A + %A = mul <2 x double> %arg1, %arg2 + ret <2 x double> %A } define double @fma(double %arg1, double %arg2, double %arg3) { - %A = mul double %arg1, %arg2 - %B = add double %A, %arg3 - ret double %B + %A = mul double %arg1, %arg2 + %B = add double %A, %arg3 + ret double %B } define <2 x double> @fma_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) { - %A = mul <2 x double> %arg1, %arg2 - %B = add <2 x double> %A, %arg3 - ret <2 x double> %B + %A = mul <2 x double> %arg1, %arg2 + %B = add <2 x double> %A, %arg3 + ret <2 x double> %B } define double @fms(double %arg1, double %arg2, double %arg3) { - %A = mul double %arg1, %arg2 - %B = sub double %A, %arg3 - ret double %B + %A = mul double %arg1, %arg2 + %B = sub double %A, %arg3 + ret double %B } define <2 x double> @fms_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) { - %A = mul <2 x double> %arg1, %arg2 - %B = sub <2 x double> %A, %arg3 - ret <2 x double> %B + %A = mul <2 x double> %arg1, %arg2 + %B = sub <2 x double> %A, %arg3 + ret <2 x double> %B } ; - (a * b - c) define double @d_fnms_1(double %arg1, double %arg2, double %arg3) { - %A = mul double %arg1, %arg2 - %B = sub double %A, %arg3 - %C = sub double -0.000000e+00, %B ; [#uses=1] - ret double %C + %A = mul double %arg1, %arg2 + %B = sub double %A, %arg3 + %C = sub double -0.000000e+00, %B ; [#uses=1] + ret double %C } ; Annother way of getting fnms ; - ( a * b ) + c => c - (a * b) define double @d_fnms_2(double %arg1, double %arg2, double %arg3) { - %A = mul double %arg1, %arg2 - %B = sub double %arg3, %A - ret double %B + %A = mul double %arg1, %arg2 + %B = sub double %arg3, %A + ret double %B } ; FNMS: - (a * b - c) => c - (a * b) define <2 x double> @d_fnms_vec_1(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) { - %A = mul <2 x double> %arg1, %arg2 - %B = sub <2 x double> %arg3, %A ; - ret <2 x double> %B + %A = mul <2 x double> %arg1, %arg2 + %B = sub <2 x double> %arg3, %A ; + ret <2 x double> %B } ; Another way to get fnms using a constant vector ; - ( a * b - c) define <2 x double> @d_fnms_vec_2(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) { - %A = mul <2 x double> %arg1, %arg2 ; <<2 x double>> [#uses=1] - %B = sub <2 x double> %A, %arg3 ; <<2 x double>> [#uses=1] - %C = sub <2 x double> < double -0.00000e+00, double -0.00000e+00 >, %B - ret <2 x double> %C + %A = mul <2 x double> %arg1, %arg2 ; <<2 x double>> [#uses=1] + %B = sub <2 x double> %A, %arg3 ; <<2 x double>> [#uses=1] + %C = sub <2 x double> < double -0.00000e+00, double -0.00000e+00 >, %B + ret <2 x double> %C } ;define double @fdiv_1(double %arg1, double %arg2) { -; %A = fdiv double %arg1, %arg2 ; [#uses=1] -; ret double %A +; %A = fdiv double %arg1, %arg2 ; [#uses=1] +; ret double %A ;} diff --git a/test/CodeGen/CellSPU/eqv.ll b/test/CodeGen/CellSPU/eqv.ll index b8a9d59801a..54069567720 100644 --- a/test/CodeGen/CellSPU/eqv.ll +++ b/test/CodeGen/CellSPU/eqv.ll @@ -14,139 +14,139 @@ target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i target triple = "spu" define <4 x i32> @equiv_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) { - %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] - %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] - %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] - %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1] - ret <4 x i32> %C + %A = and <4 x i32> %arg1, %arg2 + %B = or <4 x i32> %arg1, %arg2 + %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > + %C = or <4 x i32> %A, %Bnot + ret <4 x i32> %C } define <4 x i32> @equiv_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) { - %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] - %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] - %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] - %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1] - ret <4 x i32> %C + %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] + %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] + %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] + %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1] + ret <4 x i32> %C } define <4 x i32> @equiv_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) { - %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] - %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] - %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] - %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1] - ret <4 x i32> %C + %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] + %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1] + %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] + %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1] + ret <4 x i32> %C } define <4 x i32> @equiv_v4i32_4(<4 x i32> %arg1, <4 x i32> %arg2) { - %arg2not = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] - %C = xor <4 x i32> %arg1, %arg2not - ret <4 x i32> %C + %arg2not = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 > + %C = xor <4 x i32> %arg1, %arg2not + ret <4 x i32> %C } define i32 @equiv_i32_1(i32 %arg1, i32 %arg2) { - %A = and i32 %arg1, %arg2 ; [#uses=1] - %B = or i32 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i32 %B, -1 ; [#uses=1] - %C = or i32 %A, %Bnot ; [#uses=1] - ret i32 %C + %A = and i32 %arg1, %arg2 ; [#uses=1] + %B = or i32 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i32 %B, -1 ; [#uses=1] + %C = or i32 %A, %Bnot ; [#uses=1] + ret i32 %C } define i32 @equiv_i32_2(i32 %arg1, i32 %arg2) { - %B = or i32 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i32 %B, -1 ; [#uses=1] - %A = and i32 %arg1, %arg2 ; [#uses=1] - %C = or i32 %A, %Bnot ; [#uses=1] - ret i32 %C + %B = or i32 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i32 %B, -1 ; [#uses=1] + %A = and i32 %arg1, %arg2 ; [#uses=1] + %C = or i32 %A, %Bnot ; [#uses=1] + ret i32 %C } define i32 @equiv_i32_3(i32 %arg1, i32 %arg2) { - %B = or i32 %arg1, %arg2 ; [#uses=1] - %A = and i32 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i32 %B, -1 ; [#uses=1] - %C = or i32 %A, %Bnot ; [#uses=1] - ret i32 %C + %B = or i32 %arg1, %arg2 ; [#uses=1] + %A = and i32 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i32 %B, -1 ; [#uses=1] + %C = or i32 %A, %Bnot ; [#uses=1] + ret i32 %C } define i32 @equiv_i32_4(i32 %arg1, i32 %arg2) { - %arg2not = xor i32 %arg2, -1 - %C = xor i32 %arg1, %arg2not - ret i32 %C + %arg2not = xor i32 %arg2, -1 + %C = xor i32 %arg1, %arg2not + ret i32 %C } define i32 @equiv_i32_5(i32 %arg1, i32 %arg2) { - %arg1not = xor i32 %arg1, -1 - %C = xor i32 %arg2, %arg1not - ret i32 %C + %arg1not = xor i32 %arg1, -1 + %C = xor i32 %arg2, %arg1not + ret i32 %C } define i16 @equiv_i16_1(i16 signext %arg1, i16 signext %arg2) signext { - %A = and i16 %arg1, %arg2 ; [#uses=1] - %B = or i16 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i16 %B, -1 ; [#uses=1] - %C = or i16 %A, %Bnot ; [#uses=1] - ret i16 %C + %A = and i16 %arg1, %arg2 ; [#uses=1] + %B = or i16 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i16 %B, -1 ; [#uses=1] + %C = or i16 %A, %Bnot ; [#uses=1] + ret i16 %C } define i16 @equiv_i16_2(i16 signext %arg1, i16 signext %arg2) signext { - %B = or i16 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i16 %B, -1 ; [#uses=1] - %A = and i16 %arg1, %arg2 ; [#uses=1] - %C = or i16 %A, %Bnot ; [#uses=1] - ret i16 %C + %B = or i16 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i16 %B, -1 ; [#uses=1] + %A = and i16 %arg1, %arg2 ; [#uses=1] + %C = or i16 %A, %Bnot ; [#uses=1] + ret i16 %C } define i16 @equiv_i16_3(i16 signext %arg1, i16 signext %arg2) signext { - %B = or i16 %arg1, %arg2 ; [#uses=1] - %A = and i16 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i16 %B, -1 ; [#uses=1] - %C = or i16 %A, %Bnot ; [#uses=1] - ret i16 %C + %B = or i16 %arg1, %arg2 ; [#uses=1] + %A = and i16 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i16 %B, -1 ; [#uses=1] + %C = or i16 %A, %Bnot ; [#uses=1] + ret i16 %C } define i8 @equiv_i8_1(i8 signext %arg1, i8 signext %arg2) signext { - %A = and i8 %arg1, %arg2 ; [#uses=1] - %B = or i8 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i8 %B, -1 ; [#uses=1] - %C = or i8 %A, %Bnot ; [#uses=1] - ret i8 %C + %A = and i8 %arg1, %arg2 ; [#uses=1] + %B = or i8 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i8 %B, -1 ; [#uses=1] + %C = or i8 %A, %Bnot ; [#uses=1] + ret i8 %C } define i8 @equiv_i8_2(i8 signext %arg1, i8 signext %arg2) signext { - %B = or i8 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i8 %B, -1 ; [#uses=1] - %A = and i8 %arg1, %arg2 ; [#uses=1] - %C = or i8 %A, %Bnot ; [#uses=1] - ret i8 %C + %B = or i8 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i8 %B, -1 ; [#uses=1] + %A = and i8 %arg1, %arg2 ; [#uses=1] + %C = or i8 %A, %Bnot ; [#uses=1] + ret i8 %C } define i8 @equiv_i8_3(i8 signext %arg1, i8 signext %arg2) signext { - %B = or i8 %arg1, %arg2 ; [#uses=1] - %A = and i8 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i8 %B, -1 ; [#uses=1] - %C = or i8 %A, %Bnot ; [#uses=1] - ret i8 %C + %B = or i8 %arg1, %arg2 ; [#uses=1] + %A = and i8 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i8 %B, -1 ; [#uses=1] + %C = or i8 %A, %Bnot ; [#uses=1] + ret i8 %C } define i8 @equiv_u8_1(i8 zeroext %arg1, i8 zeroext %arg2) zeroext { - %A = and i8 %arg1, %arg2 ; [#uses=1] - %B = or i8 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i8 %B, -1 ; [#uses=1] - %C = or i8 %A, %Bnot ; [#uses=1] - ret i8 %C + %A = and i8 %arg1, %arg2 ; [#uses=1] + %B = or i8 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i8 %B, -1 ; [#uses=1] + %C = or i8 %A, %Bnot ; [#uses=1] + ret i8 %C } define i8 @equiv_u8_2(i8 zeroext %arg1, i8 zeroext %arg2) zeroext { - %B = or i8 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i8 %B, -1 ; [#uses=1] - %A = and i8 %arg1, %arg2 ; [#uses=1] - %C = or i8 %A, %Bnot ; [#uses=1] - ret i8 %C + %B = or i8 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i8 %B, -1 ; [#uses=1] + %A = and i8 %arg1, %arg2 ; [#uses=1] + %C = or i8 %A, %Bnot ; [#uses=1] + ret i8 %C } define i8 @equiv_u8_3(i8 zeroext %arg1, i8 zeroext %arg2) zeroext { - %B = or i8 %arg1, %arg2 ; [#uses=1] - %A = and i8 %arg1, %arg2 ; [#uses=1] - %Bnot = xor i8 %B, -1 ; [#uses=1] - %C = or i8 %A, %Bnot ; [#uses=1] - ret i8 %C + %B = or i8 %arg1, %arg2 ; [#uses=1] + %A = and i8 %arg1, %arg2 ; [#uses=1] + %Bnot = xor i8 %B, -1 ; [#uses=1] + %C = or i8 %A, %Bnot ; [#uses=1] + ret i8 %C } diff --git a/test/CodeGen/CellSPU/fcmp.ll b/test/CodeGen/CellSPU/fcmp.ll index d212bd51e40..aad77175d16 100644 --- a/test/CodeGen/CellSPU/fcmp.ll +++ b/test/CodeGen/CellSPU/fcmp.ll @@ -10,13 +10,13 @@ declare double @fabs(double) declare float @fabsf(float) define i1 @fcmp_eq(float %arg1, float %arg2) { - %A = fcmp oeq float %arg1, %arg2 ; [#uses=1] - ret i1 %A + %A = fcmp oeq float %arg1, %arg2 ; [#uses=1] + ret i1 %A } define i1 @fcmp_mag_eq(float %arg1, float %arg2) { - %A = call float @fabsf(float %arg1) ; [#uses=1] - %B = call float @fabsf(float %arg2) ; [#uses=1] - %C = fcmp oeq float %A, %B ; [#uses=1] - ret i1 %C + %A = call float @fabsf(float %arg1) ; [#uses=1] + %B = call float @fabsf(float %arg2) ; [#uses=1] + %C = fcmp oeq float %A, %B ; [#uses=1] + ret i1 %C } diff --git a/test/CodeGen/CellSPU/fdiv.ll b/test/CodeGen/CellSPU/fdiv.ll index 7d562625344..75af90e23f8 100644 --- a/test/CodeGen/CellSPU/fdiv.ll +++ b/test/CodeGen/CellSPU/fdiv.ll @@ -10,11 +10,11 @@ target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i target triple = "spu" define float @fdiv32(float %arg1, float %arg2) { - %A = fdiv float %arg1, %arg2 - ret float %A + %A = fdiv float %arg1, %arg2 + ret float %A } define <4 x float> @fdiv_v4f32(<4 x float> %arg1, <4 x float> %arg2) { - %A = fdiv <4 x float> %arg1, %arg2 - ret <4 x float> %A + %A = fdiv <4 x float> %arg1, %arg2 + ret <4 x float> %A } diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll index 9a7a7b5c37e..045bb052989 100644 --- a/test/CodeGen/CellSPU/fneg-fabs.ll +++ b/test/CodeGen/CellSPU/fneg-fabs.ll @@ -8,24 +8,24 @@ target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i target triple = "spu" define double @fneg_dp(double %X) { - %Y = sub double -0.000000e+00, %X - ret double %Y + %Y = sub double -0.000000e+00, %X + ret double %Y } define <2 x double> @fneg_dp_vec(<2 x double> %X) { - %Y = sub <2 x double> < double -0.0000e+00, double -0.0000e+00 >, %X - ret <2 x double> %Y + %Y = sub <2 x double> < double -0.0000e+00, double -0.0000e+00 >, %X + ret <2 x double> %Y } define float @fneg_sp(float %X) { - %Y = sub float -0.000000e+00, %X - ret float %Y + %Y = sub float -0.000000e+00, %X + ret float %Y } define <4 x float> @fneg_sp_vec(<4 x float> %X) { - %Y = sub <4 x float> , %X - ret <4 x float> %Y + ret <4 x float> %Y } declare double @fabs(double) @@ -33,11 +33,11 @@ declare double @fabs(double) declare float @fabsf(float) define double @fabs_dp(double %X) { - %Y = call double @fabs( double %X ) ; [#uses=1] - ret double %Y + %Y = call double @fabs( double %X ) ; [#uses=1] + ret double %Y } define float @fabs_sp(float %X) { - %Y = call float @fabsf( float %X ) ; [#uses=1] - ret float %Y + %Y = call float @fabsf( float %X ) ; [#uses=1] + ret float %Y } diff --git a/test/CodeGen/CellSPU/immed16.ll b/test/CodeGen/CellSPU/immed16.ll index 603ec058e97..684305bd0c9 100644 --- a/test/CodeGen/CellSPU/immed16.ll +++ b/test/CodeGen/CellSPU/immed16.ll @@ -5,31 +5,31 @@ target triple = "spu" define i16 @test_1() { %x = alloca i16, align 16 - store i16 419, i16* %x ;; ILH via pattern + store i16 419, i16* %x ;; ILH via pattern ret i16 0 } define i16 @test_2() { %x = alloca i16, align 16 - store i16 1023, i16* %x ;; ILH via pattern + store i16 1023, i16* %x ;; ILH via pattern ret i16 0 } define i16 @test_3() { %x = alloca i16, align 16 - store i16 -1023, i16* %x ;; ILH via pattern + store i16 -1023, i16* %x ;; ILH via pattern ret i16 0 } define i16 @test_4() { %x = alloca i16, align 16 - store i16 32767, i16* %x ;; ILH via pattern + store i16 32767, i16* %x ;; ILH via pattern ret i16 0 } define i16 @test_5() { %x = alloca i16, align 16 - store i16 -32768, i16* %x ;; ILH via pattern + store i16 -32768, i16* %x ;; ILH via pattern ret i16 0 } diff --git a/test/CodeGen/CellSPU/immed32.ll b/test/CodeGen/CellSPU/immed32.ll index d269a45f5b5..2cadfff9dca 100644 --- a/test/CodeGen/CellSPU/immed32.ll +++ b/test/CodeGen/CellSPU/immed32.ll @@ -16,57 +16,57 @@ target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i target triple = "spu" define i32 @test_1() { - ret i32 4784128 ;; ILHU via pattern (0x49000) + ret i32 4784128 ;; ILHU via pattern (0x49000) } define i32 @test_2() { - ret i32 5308431 ;; ILHU/IOHL via pattern (0x5100f) + ret i32 5308431 ;; ILHU/IOHL via pattern (0x5100f) } define i32 @test_3() { - ret i32 511 ;; IL via pattern + ret i32 511 ;; IL via pattern } define i32 @test_4() { - ret i32 -512 ;; IL via pattern + ret i32 -512 ;; IL via pattern } ;; double float floatval ;; 0x4005bf0a80000000 0x402d|f854 2.718282 define float @float_const_1() { - ret float 0x4005BF0A80000000 ;; ILHU/IOHL + ret float 0x4005BF0A80000000 ;; ILHU/IOHL } ;; double float floatval ;; 0x3810000000000000 0x0080|0000 0.000000 define float @float_const_2() { - ret float 0x3810000000000000 ;; IL 128 + ret float 0x3810000000000000 ;; IL 128 } ;; double float floatval ;; 0x47efffffe0000000 0x7f7f|ffff NaN define float @float_const_3() { - ret float 0x47EFFFFFE0000000 ;; ILHU/IOHL via pattern + ret float 0x47EFFFFFE0000000 ;; ILHU/IOHL via pattern } ;; double float floatval ;; 0x400921fb60000000 0x4049|0fdb 3.141593 define float @float_const_4() { - ret float 0x400921FB60000000 ;; ILHU/IOHL via pattern + ret float 0x400921FB60000000 ;; ILHU/IOHL via pattern } ;; double float floatval ;; 0xbff6a09e60000000 0xbfb5|04f3 -1.414214 define float @float_const_5() { - ret float 0xBFF6A09E60000000 ;; ILHU/IOHL via pattern + ret float 0xBFF6A09E60000000 ;; ILHU/IOHL via pattern } ;; double float floatval ;; 0x3ff6a09e60000000 0x3fb5|04f3 1.414214 define float @float_const_6() { - ret float 0x3FF6A09E60000000 ;; ILHU/IOHL via pattern + ret float 0x3FF6A09E60000000 ;; ILHU/IOHL via pattern } define float @float_const_7() { - ret float 0.000000e+00 ;; IL 0 via pattern + ret float 0.000000e+00 ;; IL 0 via pattern } diff --git a/test/CodeGen/CellSPU/immed64.ll b/test/CodeGen/CellSPU/immed64.ll index ecb2107f966..f92cdf434bb 100644 --- a/test/CodeGen/CellSPU/immed64.ll +++ b/test/CodeGen/CellSPU/immed64.ll @@ -1,5 +1,6 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s ; RUN: grep lqa %t1.s | count 13 +; RUN: grep il %t1.s | count 21 ; RUN: grep shufb %t1.s | count 13 ; RUN: grep 65520 %t1.s | count 1 ; RUN: grep 43981 %t1.s | count 1 @@ -52,6 +53,10 @@ define i64 @i64_const_8() { ret i64 0 ;; IL } +define i64 @i64_const_9() { + ret i64 -1 ;; IL +} + ; 0x4005bf0a8b145769 -> ; (ILHU 0x4005 [16389]/IOHL 0xbf0a [48906]) ; (ILHU 0x8b14 [35604]/IOHL 0x5769 [22377]) diff --git a/test/CodeGen/CellSPU/int2fp.ll b/test/CodeGen/CellSPU/int2fp.ll index 009229ea7bd..ee3076594ad 100644 --- a/test/CodeGen/CellSPU/int2fp.ll +++ b/test/CodeGen/CellSPU/int2fp.ll @@ -11,31 +11,31 @@ target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i target triple = "spu" define float @sitofp_i32(i32 %arg1) { - %A = sitofp i32 %arg1 to float ; [#uses=1] - ret float %A + %A = sitofp i32 %arg1 to float ; [#uses=1] + ret float %A } define float @uitofp_u32(i32 %arg1) { - %A = uitofp i32 %arg1 to float ; [#uses=1] - ret float %A + %A = uitofp i32 %arg1 to float ; [#uses=1] + ret float %A } define float @sitofp_i16(i16 %arg1) { - %A = sitofp i16 %arg1 to float ; [#uses=1] - ret float %A + %A = sitofp i16 %arg1 to float ; [#uses=1] + ret float %A } define float @uitofp_i16(i16 %arg1) { - %A = uitofp i16 %arg1 to float ; [#uses=1] - ret float %A + %A = uitofp i16 %arg1 to float ; [#uses=1] + ret float %A } define float @sitofp_i8(i8 %arg1) { - %A = sitofp i8 %arg1 to float ; [#uses=1] - ret float %A + %A = sitofp i8 %arg1 to float ; [#uses=1] + ret float %A } define float @uitofp_i8(i8 %arg1) { - %A = uitofp i8 %arg1 to float ; [#uses=1] - ret float %A + %A = uitofp i8 %arg1 to float ; [#uses=1] + ret float %A } diff --git a/test/CodeGen/CellSPU/intrinsics_branch.ll b/test/CodeGen/CellSPU/intrinsics_branch.ll index ead235bf521..87ad18211a2 100644 --- a/test/CodeGen/CellSPU/intrinsics_branch.ll +++ b/test/CodeGen/CellSPU/intrinsics_branch.ll @@ -36,115 +36,115 @@ declare <16 x i8> @llvm.spu.si.clgtbi(<16 x i8>, i8) define <4 x i32> @test(<4 x i32> %A) { - call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y + call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y } define <4 x i32> @ceqtest(<4 x i32> %A, <4 x i32> %B) { - call <4 x i32> @llvm.spu.si.ceq(<4 x i32> %A, <4 x i32> %B) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y + call <4 x i32> @llvm.spu.si.ceq(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y } define <8 x i16> @ceqhtest(<8 x i16> %A, <8 x i16> %B) { - call <8 x i16> @llvm.spu.si.ceqh(<8 x i16> %A, <8 x i16> %B) - %Y = bitcast <8 x i16> %1 to <8 x i16> - ret <8 x i16> %Y + call <8 x i16> @llvm.spu.si.ceqh(<8 x i16> %A, <8 x i16> %B) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y } define <16 x i8> @ceqbtest(<16 x i8> %A, <16 x i8> %B) { - call <16 x i8> @llvm.spu.si.ceqb(<16 x i8> %A, <16 x i8> %B) - %Y = bitcast <16 x i8> %1 to <16 x i8> - ret <16 x i8> %Y + call <16 x i8> @llvm.spu.si.ceqb(<16 x i8> %A, <16 x i8> %B) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y } define <4 x i32> @ceqitest(<4 x i32> %A) { - call <4 x i32> @llvm.spu.si.ceqi(<4 x i32> %A, i16 65) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y + call <4 x i32> @llvm.spu.si.ceqi(<4 x i32> %A, i16 65) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y } define <8 x i16> @ceqhitest(<8 x i16> %A) { - call <8 x i16> @llvm.spu.si.ceqhi(<8 x i16> %A, i16 65) - %Y = bitcast <8 x i16> %1 to <8 x i16> - ret <8 x i16> %Y + call <8 x i16> @llvm.spu.si.ceqhi(<8 x i16> %A, i16 65) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y } define <16 x i8> @ceqbitest(<16 x i8> %A) { - call <16 x i8> @llvm.spu.si.ceqbi(<16 x i8> %A, i8 65) - %Y = bitcast <16 x i8> %1 to <16 x i8> - ret <16 x i8> %Y + call <16 x i8> @llvm.spu.si.ceqbi(<16 x i8> %A, i8 65) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y } define <4 x i32> @cgttest(<4 x i32> %A, <4 x i32> %B) { - call <4 x i32> @llvm.spu.si.cgt(<4 x i32> %A, <4 x i32> %B) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y + call <4 x i32> @llvm.spu.si.cgt(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y } define <8 x i16> @cgthtest(<8 x i16> %A, <8 x i16> %B) { - call <8 x i16> @llvm.spu.si.cgth(<8 x i16> %A, <8 x i16> %B) - %Y = bitcast <8 x i16> %1 to <8 x i16> - ret <8 x i16> %Y + call <8 x i16> @llvm.spu.si.cgth(<8 x i16> %A, <8 x i16> %B) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y } define <16 x i8> @cgtbtest(<16 x i8> %A, <16 x i8> %B) { - call <16 x i8> @llvm.spu.si.cgtb(<16 x i8> %A, <16 x i8> %B) - %Y = bitcast <16 x i8> %1 to <16 x i8> - ret <16 x i8> %Y + call <16 x i8> @llvm.spu.si.cgtb(<16 x i8> %A, <16 x i8> %B) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y } define <4 x i32> @cgtitest(<4 x i32> %A) { - call <4 x i32> @llvm.spu.si.cgti(<4 x i32> %A, i16 65) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y + call <4 x i32> @llvm.spu.si.cgti(<4 x i32> %A, i16 65) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y } define <8 x i16> @cgthitest(<8 x i16> %A) { - call <8 x i16> @llvm.spu.si.cgthi(<8 x i16> %A, i16 65) - %Y = bitcast <8 x i16> %1 to <8 x i16> - ret <8 x i16> %Y + call <8 x i16> @llvm.spu.si.cgthi(<8 x i16> %A, i16 65) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y } define <16 x i8> @cgtbitest(<16 x i8> %A) { - call <16 x i8> @llvm.spu.si.cgtbi(<16 x i8> %A, i8 65) - %Y = bitcast <16 x i8> %1 to <16 x i8> - ret <16 x i8> %Y + call <16 x i8> @llvm.spu.si.cgtbi(<16 x i8> %A, i8 65) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y } define <4 x i32> @clgttest(<4 x i32> %A, <4 x i32> %B) { - call <4 x i32> @llvm.spu.si.clgt(<4 x i32> %A, <4 x i32> %B) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y + call <4 x i32> @llvm.spu.si.clgt(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y } define <8 x i16> @clgthtest(<8 x i16> %A, <8 x i16> %B) { - call <8 x i16> @llvm.spu.si.clgth(<8 x i16> %A, <8 x i16> %B) - %Y = bitcast <8 x i16> %1 to <8 x i16> - ret <8 x i16> %Y + call <8 x i16> @llvm.spu.si.clgth(<8 x i16> %A, <8 x i16> %B) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y } define <16 x i8> @clgtbtest(<16 x i8> %A, <16 x i8> %B) { - call <16 x i8> @llvm.spu.si.clgtb(<16 x i8> %A, <16 x i8> %B) - %Y = bitcast <16 x i8> %1 to <16 x i8> - ret <16 x i8> %Y + call <16 x i8> @llvm.spu.si.clgtb(<16 x i8> %A, <16 x i8> %B) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y } define <4 x i32> @clgtitest(<4 x i32> %A) { - call <4 x i32> @llvm.spu.si.clgti(<4 x i32> %A, i16 65) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y + call <4 x i32> @llvm.spu.si.clgti(<4 x i32> %A, i16 65) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y } define <8 x i16> @clgthitest(<8 x i16> %A) { - call <8 x i16> @llvm.spu.si.clgthi(<8 x i16> %A, i16 65) - %Y = bitcast <8 x i16> %1 to <8 x i16> - ret <8 x i16> %Y + call <8 x i16> @llvm.spu.si.clgthi(<8 x i16> %A, i16 65) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y } define <16 x i8> @clgtbitest(<16 x i8> %A) { - call <16 x i8> @llvm.spu.si.clgtbi(<16 x i8> %A, i8 65) - %Y = bitcast <16 x i8> %1 to <16 x i8> - ret <16 x i8> %Y + call <16 x i8> @llvm.spu.si.clgtbi(<16 x i8> %A, i8 65) + %Y = bitcast <16 x i8> %1 to <16 x i8> + ret <16 x i8> %Y } diff --git a/test/CodeGen/CellSPU/intrinsics_float.ll b/test/CodeGen/CellSPU/intrinsics_float.ll index 64a5b73374d..c18f8deb385 100644 --- a/test/CodeGen/CellSPU/intrinsics_float.ll +++ b/test/CodeGen/CellSPU/intrinsics_float.ll @@ -28,15 +28,15 @@ declare <4 x float> @llvm.spu.si.fnms(<4 x float>, <4 x float>, <4 x float>) declare <4 x float> @llvm.spu.si.fms(<4 x float>, <4 x float>, <4 x float>) define <4 x i32> @test(<4 x i32> %A) { - call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y + call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y } define <4 x float> @fatest(<4 x float> %A, <4 x float> %B) { - call <4 x float> @llvm.spu.si.fa(<4 x float> %A, <4 x float> %B) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y + call <4 x float> @llvm.spu.si.fa(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y } define <4 x float> @fstest(<4 x float> %A, <4 x float> %B) { @@ -46,49 +46,49 @@ define <4 x float> @fstest(<4 x float> %A, <4 x float> %B) { } define <4 x float> @fmtest(<4 x float> %A, <4 x float> %B) { - call <4 x float> @llvm.spu.si.fm(<4 x float> %A, <4 x float> %B) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y + call <4 x float> @llvm.spu.si.fm(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y } define <4 x float> @fceqtest(<4 x float> %A, <4 x float> %B) { - call <4 x float> @llvm.spu.si.fceq(<4 x float> %A, <4 x float> %B) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y + call <4 x float> @llvm.spu.si.fceq(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y } define <4 x float> @fcmeqtest(<4 x float> %A, <4 x float> %B) { - call <4 x float> @llvm.spu.si.fcmeq(<4 x float> %A, <4 x float> %B) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y + call <4 x float> @llvm.spu.si.fcmeq(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y } define <4 x float> @fcgttest(<4 x float> %A, <4 x float> %B) { - call <4 x float> @llvm.spu.si.fcgt(<4 x float> %A, <4 x float> %B) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y + call <4 x float> @llvm.spu.si.fcgt(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y } define <4 x float> @fcmgttest(<4 x float> %A, <4 x float> %B) { - call <4 x float> @llvm.spu.si.fcmgt(<4 x float> %A, <4 x float> %B) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y + call <4 x float> @llvm.spu.si.fcmgt(<4 x float> %A, <4 x float> %B) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y } define <4 x float> @fmatest(<4 x float> %A, <4 x float> %B, <4 x float> %C) { - call <4 x float> @llvm.spu.si.fma(<4 x float> %A, <4 x float> %B, <4 x float> %C) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y + call <4 x float> @llvm.spu.si.fma(<4 x float> %A, <4 x float> %B, <4 x float> %C) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y } define <4 x float> @fnmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) { - call <4 x float> @llvm.spu.si.fnms(<4 x float> %A, <4 x float> %B, <4 x float> %C) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y + call <4 x float> @llvm.spu.si.fnms(<4 x float> %A, <4 x float> %B, <4 x float> %C) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y } define <4 x float> @fmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) { - call <4 x float> @llvm.spu.si.fms(<4 x float> %A, <4 x float> %B, <4 x float> %C) - %Y = bitcast <4 x float> %1 to <4 x float> - ret <4 x float> %Y + call <4 x float> @llvm.spu.si.fms(<4 x float> %A, <4 x float> %B, <4 x float> %C) + %Y = bitcast <4 x float> %1 to <4 x float> + ret <4 x float> %Y } diff --git a/test/CodeGen/CellSPU/intrinsics_logical.ll b/test/CodeGen/CellSPU/intrinsics_logical.ll index b8af8adb2b7..843340b7454 100644 --- a/test/CodeGen/CellSPU/intrinsics_logical.ll +++ b/test/CodeGen/CellSPU/intrinsics_logical.ll @@ -25,9 +25,9 @@ declare <4 x i32> @llvm.spu.si.nand(<4 x i32>, <4 x i32>) declare <4 x i32> @llvm.spu.si.nor(<4 x i32>, <4 x i32>) define <4 x i32> @andtest(<4 x i32> %A, <4 x i32> %B) { - call <4 x i32> @llvm.spu.si.and(<4 x i32> %A, <4 x i32> %B) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y + call <4 x i32> @llvm.spu.si.and(<4 x i32> %A, <4 x i32> %B) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y } define <4 x i32> @andctest(<4 x i32> %A, <4 x i32> %B) { @@ -37,13 +37,13 @@ define <4 x i32> @andctest(<4 x i32> %A, <4 x i32> %B) { } define <4 x i32> @anditest(<4 x i32> %A) { - call <4 x i32> @llvm.spu.si.andi(<4 x i32> %A, i16 65) - %Y = bitcast <4 x i32> %1 to <4 x i32> - ret <4 x i32> %Y + call <4 x i32> @llvm.spu.si.andi(<4 x i32> %A, i16 65) + %Y = bitcast <4 x i32> %1 to <4 x i32> + ret <4 x i32> %Y } define <8 x i16> @andhitest(<8 x i16> %A) { - call <8 x i16> @llvm.spu.si.andhi(<8 x i16> %A, i16 65) - %Y = bitcast <8 x i16> %1 to <8 x i16> - ret <8 x i16> %Y + call <8 x i16> @llvm.spu.si.andhi(<8 x i16> %A, i16 65) + %Y = bitcast <8 x i16> %1 to <8 x i16> + ret <8 x i16> %Y } diff --git a/test/CodeGen/CellSPU/mul_ops.ll b/test/CodeGen/CellSPU/mul_ops.ll index a67c572a7c6..843505f1359 100644 --- a/test/CodeGen/CellSPU/mul_ops.ll +++ b/test/CodeGen/CellSPU/mul_ops.ll @@ -54,36 +54,36 @@ entry: define i32 @mul_i32_1(i32 %arg1, i32 %arg2) { entry: - %A = mul i32 %arg2, %arg1 - ret i32 %A + %A = mul i32 %arg2, %arg1 + ret i32 %A } define i32 @mul_i32_2(i32 %arg1, i32 %arg2) { entry: - %A = mul i32 %arg1, %arg2 - ret i32 %A + %A = mul i32 %arg1, %arg2 + ret i32 %A } define i16 @mul_i16_1(i16 %arg1, i16 %arg2) { entry: - %A = mul i16 %arg2, %arg1 - ret i16 %A + %A = mul i16 %arg2, %arg1 + ret i16 %A } define i16 @mul_i16_2(i16 %arg1, i16 %arg2) { entry: - %A = mul i16 %arg1, %arg2 - ret i16 %A + %A = mul i16 %arg1, %arg2 + ret i16 %A } define i8 @mul_i8_1(i8 %arg1, i8 %arg2) { entry: - %A = mul i8 %arg2, %arg1 - ret i8 %A + %A = mul i8 %arg2, %arg1 + ret i8 %A } define i8 @mul_i8_2(i8 %arg1, i8 %arg2) { entry: - %A = mul i8 %arg1, %arg2 - ret i8 %A + %A = mul i8 %arg1, %arg2 + ret i8 %A } diff --git a/test/CodeGen/CellSPU/or_ops.ll b/test/CodeGen/CellSPU/or_ops.ll index 91e3e2145ab..4e9da8f1297 100644 --- a/test/CodeGen/CellSPU/or_ops.ll +++ b/test/CodeGen/CellSPU/or_ops.ll @@ -39,33 +39,33 @@ define <16 x i8> @or_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) { } define i32 @or_i32_1(i32 %arg1, i32 %arg2) { - %A = or i32 %arg2, %arg1 - ret i32 %A + %A = or i32 %arg2, %arg1 + ret i32 %A } define i32 @or_i32_2(i32 %arg1, i32 %arg2) { - %A = or i32 %arg1, %arg2 - ret i32 %A + %A = or i32 %arg1, %arg2 + ret i32 %A } define i16 @or_i16_1(i16 %arg1, i16 %arg2) { - %A = or i16 %arg2, %arg1 - ret i16 %A + %A = or i16 %arg2, %arg1 + ret i16 %A } define i16 @or_i16_2(i16 %arg1, i16 %arg2) { - %A = or i16 %arg1, %arg2 - ret i16 %A + %A = or i16 %arg1, %arg2 + ret i16 %A } define i8 @or_i8_1(i8 %arg1, i8 %arg2) { - %A = or i8 %arg2, %arg1 - ret i8 %A + %A = or i8 %arg2, %arg1 + ret i8 %A } define i8 @or_i8_2(i8 %arg1, i8 %arg2) { - %A = or i8 %arg1, %arg2 - ret i8 %A + %A = or i8 %arg1, %arg2 + ret i8 %A } ; ORC instruction generation: @@ -126,57 +126,57 @@ define <16 x i8> @orc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) { } define i32 @orc_i32_1(i32 %arg1, i32 %arg2) { - %A = xor i32 %arg2, -1 - %B = or i32 %A, %arg1 - ret i32 %B + %A = xor i32 %arg2, -1 + %B = or i32 %A, %arg1 + ret i32 %B } define i32 @orc_i32_2(i32 %arg1, i32 %arg2) { - %A = xor i32 %arg1, -1 - %B = or i32 %A, %arg2 - ret i32 %B + %A = xor i32 %arg1, -1 + %B = or i32 %A, %arg2 + ret i32 %B } define i32 @orc_i32_3(i32 %arg1, i32 %arg2) { - %A = xor i32 %arg2, -1 - %B = or i32 %arg1, %A - ret i32 %B + %A = xor i32 %arg2, -1 + %B = or i32 %arg1, %A + ret i32 %B } define i16 @orc_i16_1(i16 %arg1, i16 %arg2) { - %A = xor i16 %arg2, -1 - %B = or i16 %A, %arg1 - ret i16 %B + %A = xor i16 %arg2, -1 + %B = or i16 %A, %arg1 + ret i16 %B } define i16 @orc_i16_2(i16 %arg1, i16 %arg2) { - %A = xor i16 %arg1, -1 - %B = or i16 %A, %arg2 - ret i16 %B + %A = xor i16 %arg1, -1 + %B = or i16 %A, %arg2 + ret i16 %B } define i16 @orc_i16_3(i16 %arg1, i16 %arg2) { - %A = xor i16 %arg2, -1 - %B = or i16 %arg1, %A - ret i16 %B + %A = xor i16 %arg2, -1 + %B = or i16 %arg1, %A + ret i16 %B } define i8 @orc_i8_1(i8 %arg1, i8 %arg2) { - %A = xor i8 %arg2, -1 - %B = or i8 %A, %arg1 - ret i8 %B + %A = xor i8 %arg2, -1 + %B = or i8 %A, %arg1 + ret i8 %B } define i8 @orc_i8_2(i8 %arg1, i8 %arg2) { - %A = xor i8 %arg1, -1 - %B = or i8 %A, %arg2 - ret i8 %B + %A = xor i8 %arg1, -1 + %B = or i8 %A, %arg2 + ret i8 %B } define i8 @orc_i8_3(i8 %arg1, i8 %arg2) { - %A = xor i8 %arg2, -1 - %B = or i8 %arg1, %A - ret i8 %B + %A = xor i8 %arg2, -1 + %B = or i8 %arg1, %A + ret i8 %B } ; ORI instruction generation (i32 data type): diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll index 0386838a555..55104a4ceb7 100644 --- a/test/CodeGen/CellSPU/rotate_ops.ll +++ b/test/CodeGen/CellSPU/rotate_ops.ll @@ -115,45 +115,45 @@ define i16 @rotr16_2(i16 %arg1, i16 %arg) { } define i16 @rotli16(i16 %A) { - %B = shl i16 %A, 5 ; [#uses=1] - %C = lshr i16 %A, 11 ; [#uses=1] - %D = or i16 %B, %C ; [#uses=1] - ret i16 %D + %B = shl i16 %A, 5 ; [#uses=1] + %C = lshr i16 %A, 11 ; [#uses=1] + %D = or i16 %B, %C ; [#uses=1] + ret i16 %D } define i16 @rotri16(i16 %A) { - %B = lshr i16 %A, 5 ; [#uses=1] - %C = shl i16 %A, 11 ; [#uses=1] - %D = or i16 %B, %C ; [#uses=1] - ret i16 %D + %B = lshr i16 %A, 5 ; [#uses=1] + %C = shl i16 %A, 11 ; [#uses=1] + %D = or i16 %B, %C ; [#uses=1] + ret i16 %D } define i8 @rotl8(i8 %A, i8 %Amt) { - %B = shl i8 %A, %Amt ; [#uses=1] - %Amt2 = sub i8 8, %Amt ; [#uses=1] - %C = lshr i8 %A, %Amt2 ; [#uses=1] - %D = or i8 %B, %C ; [#uses=1] - ret i8 %D + %B = shl i8 %A, %Amt ; [#uses=1] + %Amt2 = sub i8 8, %Amt ; [#uses=1] + %C = lshr i8 %A, %Amt2 ; [#uses=1] + %D = or i8 %B, %C ; [#uses=1] + ret i8 %D } define i8 @rotr8(i8 %A, i8 %Amt) { - %B = lshr i8 %A, %Amt ; [#uses=1] - %Amt2 = sub i8 8, %Amt ; [#uses=1] - %C = shl i8 %A, %Amt2 ; [#uses=1] - %D = or i8 %B, %C ; [#uses=1] - ret i8 %D + %B = lshr i8 %A, %Amt ; [#uses=1] + %Amt2 = sub i8 8, %Amt ; [#uses=1] + %C = shl i8 %A, %Amt2 ; [#uses=1] + %D = or i8 %B, %C ; [#uses=1] + ret i8 %D } define i8 @rotli8(i8 %A) { - %B = shl i8 %A, 5 ; [#uses=1] - %C = lshr i8 %A, 3 ; [#uses=1] - %D = or i8 %B, %C ; [#uses=1] - ret i8 %D + %B = shl i8 %A, 5 ; [#uses=1] + %C = lshr i8 %A, 3 ; [#uses=1] + %D = or i8 %B, %C ; [#uses=1] + ret i8 %D } define i8 @rotri8(i8 %A) { - %B = lshr i8 %A, 5 ; [#uses=1] - %C = shl i8 %A, 3 ; [#uses=1] - %D = or i8 %B, %C ; [#uses=1] - ret i8 %D + %B = lshr i8 %A, 5 ; [#uses=1] + %C = shl i8 %A, 3 ; [#uses=1] + %D = or i8 %B, %C ; [#uses=1] + ret i8 %D } diff --git a/test/CodeGen/CellSPU/select_bits.ll b/test/CodeGen/CellSPU/select_bits.ll index b1600bf8f2b..3a7334d808c 100644 --- a/test/CodeGen/CellSPU/select_bits.ll +++ b/test/CodeGen/CellSPU/select_bits.ll @@ -1,296 +1,569 @@ ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s -; RUN: grep selb %t1.s | count 160 -; RUN: grep and %t1.s | count 2 -; RUN: grep xsbh %t1.s | count 1 -; RUN: grep xshw %t1.s | count 2 +; RUN: grep selb %t1.s | count 280 + target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" target triple = "spu" -define <16 x i8> @selb_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { - %A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %A, %arg1 ; <<16 x i8>> [#uses=1] - %C = and <16 x i8> %arg2, %arg3 ; <<16 x i8>> [#uses=1] - %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1] - ret <16 x i8> %D +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +; v2i64 +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +; (or (and rC, rB), (and (not rC), rA)) +define <2 x i64> @selb_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { + %C = and <2 x i64> %rC, %rB + %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > + %B = and <2 x i64> %A, %rA + %D = or <2 x i64> %C, %B + ret <2 x i64> %D } -define <16 x i8> @selb_v16i8_11(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { - %A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %arg1, %A ; <<16 x i8>> [#uses=1] - %C = and <16 x i8> %arg3, %arg2 ; <<16 x i8>> [#uses=1] - %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1] - ret <16 x i8> %D +; (or (and rB, rC), (and (not rC), rA)) +define <2 x i64> @selb_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { + %C = and <2 x i64> %rB, %rC + %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > + %B = and <2 x i64> %A, %rA + %D = or <2 x i64> %C, %B + ret <2 x i64> %D } -define <16 x i8> @selb_v16i8_12(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { - %A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %arg1, %A ; <<16 x i8>> [#uses=1] - %C = and <16 x i8> %arg2, %arg3 ; <<16 x i8>> [#uses=1] - %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1] - ret <16 x i8> %D +; (or (and (not rC), rA), (and rB, rC)) +define <2 x i64> @selb_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { + %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > + %B = and <2 x i64> %A, %rA + %C = and <2 x i64> %rB, %rC + %D = or <2 x i64> %C, %B + ret <2 x i64> %D } -define <16 x i8> @selb_v16i8_13(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { - %A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %A, %arg1 ; <<16 x i8>> [#uses=1] - %C = and <16 x i8> %arg2, %arg3 ; <<16 x i8>> [#uses=1] - %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1] - ret <16 x i8> %D +; (or (and (not rC), rA), (and rC, rB)) +define <2 x i64> @selb_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { + %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > + %B = and <2 x i64> %A, %rA + %C = and <2 x i64> %rC, %rB + %D = or <2 x i64> %C, %B + ret <2 x i64> %D } -define <16 x i8> @selb_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { - %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %A, %arg2 ; <<16 x i8>> [#uses=1] - %C = and <16 x i8> %arg3, %arg1 ; <<16 x i8>> [#uses=1] - %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1] - ret <16 x i8> %D +; (or (and rC, rB), (and rA, (not rC))) +define <2 x i64> @selb_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { + %C = and <2 x i64> %rC, %rB + %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > + %B = and <2 x i64> %rA, %A + %D = or <2 x i64> %C, %B + ret <2 x i64> %D } -define <16 x i8> @selb_v16i8_21(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { - %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %arg2, %A ; <<16 x i8>> [#uses=1] - %C = and <16 x i8> %arg3, %arg1 ; <<16 x i8>> [#uses=1] - %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1] - ret <16 x i8> %D +; (or (and rB, rC), (and rA, (not rC))) +define <2 x i64> @selb_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { + %C = and <2 x i64> %rB, %rC + %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > + %B = and <2 x i64> %rA, %A + %D = or <2 x i64> %C, %B + ret <2 x i64> %D } -define <16 x i8> @selb_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { - %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %A, %arg1 ; <<16 x i8>> [#uses=1] - %C = and <16 x i8> %arg3, %arg2 ; <<16 x i8>> [#uses=1] - %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1] - ret <16 x i8> %D +; (or (and rA, (not rC)), (and rB, rC)) +define <2 x i64> @selb_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { + %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > + %B = and <2 x i64> %rA, %A + %C = and <2 x i64> %rB, %rC + %D = or <2 x i64> %C, %B + ret <2 x i64> %D } -define <16 x i8> @selb_v16i8_4(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { - %C = and <16 x i8> %arg3, %arg2 ; <<16 x i8>> [#uses=1] - %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %A, %arg1 ; <<16 x i8>> [#uses=1] - %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1] - ret <16 x i8> %D +; (or (and rA, (not rC)), (and rC, rB)) +define <2 x i64> @selb_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) { + %A = xor <2 x i64> %rC, < i64 -1, i64 -1 > + %B = and <2 x i64> %rA, %A + %C = and <2 x i64> %rC, %rB + %D = or <2 x i64> %C, %B + ret <2 x i64> %D } -define <16 x i8> @selb_v16i8_41(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { - %C = and <16 x i8> %arg2, %arg3 ; <<16 x i8>> [#uses=1] - %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %arg1, %A ; <<16 x i8>> [#uses=1] - %D = or <16 x i8> %C, %B ; <<16 x i8>> [#uses=1] - ret <16 x i8> %D +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +; v4i32 +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +; (or (and rC, rB), (and (not rC), rA)) +define <4 x i32> @selb_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { + %C = and <4 x i32> %rC, %rB + %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 > + %B = and <4 x i32> %A, %rA + %D = or <4 x i32> %C, %B + ret <4 x i32> %D } -define <16 x i8> @selb_v16i8_42(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { - %C = and <16 x i8> %arg2, %arg3 ; <<16 x i8>> [#uses=1] - %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %A, %arg1 ; <<16 x i8>> [#uses=1] - %D = or <16 x i8> %C, %B ; <<16 x i8>> [#uses=1] - ret <16 x i8> %D +; (or (and rB, rC), (and (not rC), rA)) +define <4 x i32> @selb_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { + %C = and <4 x i32> %rB, %rC + %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 > + %B = and <4 x i32> %A, %rA + %D = or <4 x i32> %C, %B + ret <4 x i32> %D } -define <16 x i8> @selb_v16i8_5(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) { - %C = and <16 x i8> %arg2, %arg1 ; <<16 x i8>> [#uses=1] - %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, - i8 -1, i8 -1, i8 -1, i8 -1 > - %B = and <16 x i8> %A, %arg3 ; <<16 x i8>> [#uses=1] - %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1] - ret <16 x i8> %D +; (or (and (not rC), rA), (and rB, rC)) +define <4 x i32> @selb_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { + %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 > + %B = and <4 x i32> %A, %rA + %C = and <4 x i32> %rB, %rC + %D = or <4 x i32> %C, %B + ret <4 x i32> %D } -define <8 x i16> @selb_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { - %A = xor <8 x i16> %arg3, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1 > - %B = and <8 x i16> %A, %arg1 ; <<8 x i16>> [#uses=1] - %C = and <8 x i16> %arg2, %arg3 ; <<8 x i16>> [#uses=1] - %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1] - ret <8 x i16> %D +; (or (and (not rC), rA), (and rC, rB)) +define <4 x i32> @selb_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { + %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> + %B = and <4 x i32> %A, %rA + %C = and <4 x i32> %rC, %rB + %D = or <4 x i32> %C, %B + ret <4 x i32> %D } -define <8 x i16> @selb_v8i16_11(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { - %A = xor <8 x i16> %arg3, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1 > - %B = and <8 x i16> %arg1, %A ; <<8 x i16>> [#uses=1] - %C = and <8 x i16> %arg3, %arg2 ; <<8 x i16>> [#uses=1] - %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1] - ret <8 x i16> %D +; (or (and rC, rB), (and rA, (not rC))) +define <4 x i32> @selb_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { + %C = and <4 x i32> %rC, %rB + %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> + %B = and <4 x i32> %rA, %A + %D = or <4 x i32> %C, %B + ret <4 x i32> %D } -define <8 x i16> @selb_v8i16_12(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { - %A = xor <8 x i16> %arg3, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1 > - %B = and <8 x i16> %arg1, %A ; <<8 x i16>> [#uses=1] - %C = and <8 x i16> %arg2, %arg3 ; <<8 x i16>> [#uses=1] - %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1] - ret <8 x i16> %D +; (or (and rB, rC), (and rA, (not rC))) +define <4 x i32> @selb_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { + %C = and <4 x i32> %rB, %rC + %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> + %B = and <4 x i32> %rA, %A + %D = or <4 x i32> %C, %B + ret <4 x i32> %D } -define <8 x i16> @selb_v8i16_13(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { - %A = xor <8 x i16> %arg3, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1 > - %B = and <8 x i16> %A, %arg1 ; <<8 x i16>> [#uses=1] - %C = and <8 x i16> %arg2, %arg3 ; <<8 x i16>> [#uses=1] - %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1] - ret <8 x i16> %D +; (or (and rA, (not rC)), (and rB, rC)) +define <4 x i32> @selb_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { + %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> + %B = and <4 x i32> %rA, %A + %C = and <4 x i32> %rB, %rC + %D = or <4 x i32> %C, %B + ret <4 x i32> %D } -define <8 x i16> @selb_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { - %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1 > - %B = and <8 x i16> %A, %arg2 ; <<8 x i16>> [#uses=1] - %C = and <8 x i16> %arg3, %arg1 ; <<8 x i16>> [#uses=1] - %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1] - ret <8 x i16> %D +; (or (and rA, (not rC)), (and rC, rB)) +define <4 x i32> @selb_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) { + %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1> + %B = and <4 x i32> %rA, %A + %C = and <4 x i32> %rC, %rB + %D = or <4 x i32> %C, %B + ret <4 x i32> %D } -define <8 x i16> @selb_v8i16_21(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { - %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1 > - %B = and <8 x i16> %arg2, %A ; <<8 x i16>> [#uses=1] - %C = and <8 x i16> %arg3, %arg1 ; <<8 x i16>> [#uses=1] - %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1] - ret <8 x i16> %D +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +; v8i16 +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +; (or (and rC, rB), (and (not rC), rA)) +define <8 x i16> @selb_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { + %C = and <8 x i16> %rC, %rB + %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %A, %rA + %D = or <8 x i16> %C, %B + ret <8 x i16> %D } -define <8 x i16> @selb_v8i16_3(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { - %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1 > - %B = and <8 x i16> %A, %arg1 ; <<8 x i16>> [#uses=1] - %C = and <8 x i16> %arg3, %arg2 ; <<8 x i16>> [#uses=1] - %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1] - ret <8 x i16> %D +; (or (and rB, rC), (and (not rC), rA)) +define <8 x i16> @selb_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { + %C = and <8 x i16> %rB, %rC + %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %A, %rA + %D = or <8 x i16> %C, %B + ret <8 x i16> %D } -define <8 x i16> @selb_v8i16_4(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { - %C = and <8 x i16> %arg3, %arg2 ; <<8 x i16>> [#uses=1] - %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1 > - %B = and <8 x i16> %A, %arg1 ; <<8 x i16>> [#uses=1] - %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1] - ret <8 x i16> %D +; (or (and (not rC), rA), (and rB, rC)) +define <8 x i16> @selb_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { + %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %A, %rA + %C = and <8 x i16> %rB, %rC + %D = or <8 x i16> %C, %B + ret <8 x i16> %D } -define <8 x i16> @selb_v8i16_41(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { - %C = and <8 x i16> %arg2, %arg3 ; <<8 x i16>> [#uses=1] - %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1 > - %B = and <8 x i16> %arg1, %A ; <<8 x i16>> [#uses=1] - %D = or <8 x i16> %C, %B ; <<8 x i16>> [#uses=1] - ret <8 x i16> %D +; (or (and (not rC), rA), (and rC, rB)) +define <8 x i16> @selb_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { + %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %A, %rA + %C = and <8 x i16> %rC, %rB + %D = or <8 x i16> %C, %B + ret <8 x i16> %D } -define <8 x i16> @selb_v8i16_42(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { - %C = and <8 x i16> %arg2, %arg3 ; <<8 x i16>> [#uses=1] - %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1 > - %B = and <8 x i16> %A, %arg1 ; <<8 x i16>> [#uses=1] - %D = or <8 x i16> %C, %B ; <<8 x i16>> [#uses=1] - ret <8 x i16> %D +; (or (and rC, rB), (and rA, (not rC))) +define <8 x i16> @selb_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { + %C = and <8 x i16> %rC, %rB + %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %rA, %A + %D = or <8 x i16> %C, %B + ret <8 x i16> %D } -define <8 x i16> @selb_v8i16_5(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) { - %C = and <8 x i16> %arg2, %arg1 ; <<8 x i16>> [#uses=1] - %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, - i16 -1, i16 -1 > - %B = and <8 x i16> %A, %arg3 ; <<8 x i16>> [#uses=1] - %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1] - ret <8 x i16> %D +; (or (and rB, rC), (and rA, (not rC))) +define <8 x i16> @selb_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { + %C = and <8 x i16> %rB, %rC + %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %rA, %A + %D = or <8 x i16> %C, %B + ret <8 x i16> %D } -define <4 x i32> @selb_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3) { - %tmpnot = xor <4 x i32> %arg3, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] - %tmp2 = and <4 x i32> %tmpnot, %arg1 ; <<4 x i32>> [#uses=1] - %tmp5 = and <4 x i32> %arg2, %arg3 ; <<4 x i32>> [#uses=1] - %tmp6 = or <4 x i32> %tmp2, %tmp5 ; <<4 x i32>> [#uses=1] - ret <4 x i32> %tmp6 +; (or (and rA, (not rC)), (and rB, rC)) +define <8 x i16> @selb_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { + %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %rA, %A + %C = and <8 x i16> %rB, %rC + %D = or <8 x i16> %C, %B + ret <8 x i16> %D } -define <4 x i32> @selb_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3) { - %tmpnot = xor <4 x i32> %arg3, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] - %tmp2 = and <4 x i32> %tmpnot, %arg1 ; <<4 x i32>> [#uses=1] - %tmp5 = and <4 x i32> %arg2, %arg3 ; <<4 x i32>> [#uses=1] - %tmp6 = or <4 x i32> %tmp2, %tmp5 ; <<4 x i32>> [#uses=1] - ret <4 x i32> %tmp6 +; (or (and rA, (not rC)), (and rC, rB)) +define <8 x i16> @selb_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) { + %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1, + i16 -1, i16 -1, i16 -1, i16 -1 > + %B = and <8 x i16> %rA, %A + %C = and <8 x i16> %rC, %rB + %D = or <8 x i16> %C, %B + ret <8 x i16> %D } -define <4 x i32> @selb_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3) { - %tmpnot = xor <4 x i32> %arg3, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] - %tmp2 = and <4 x i32> %tmpnot, %arg1 ; <<4 x i32>> [#uses=1] - %tmp5 = and <4 x i32> %arg3, %arg2 ; <<4 x i32>> [#uses=1] - %tmp6 = or <4 x i32> %tmp2, %tmp5 ; <<4 x i32>> [#uses=1] - ret <4 x i32> %tmp6 +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +; v16i8 +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +; (or (and rC, rB), (and (not rC), rA)) +define <16 x i8> @selb_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { + %C = and <16 x i8> %rC, %rB + %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %A, %rA + %D = or <16 x i8> %C, %B + ret <16 x i8> %D } -define <4 x i32> @selb_v4i32_4(<4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3) { - %tmp2 = and <4 x i32> %arg3, %arg2 ; <<4 x i32>> [#uses=1] - %tmp3not = xor <4 x i32> %arg3, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] - %tmp5 = and <4 x i32> %tmp3not, %arg1 ; <<4 x i32>> [#uses=1] - %tmp6 = or <4 x i32> %tmp2, %tmp5 ; <<4 x i32>> [#uses=1] - ret <4 x i32> %tmp6 +; (or (and rB, rC), (and (not rC), rA)) +define <16 x i8> @selb_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { + %C = and <16 x i8> %rB, %rC + %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %A, %rA + %D = or <16 x i8> %C, %B + ret <16 x i8> %D } -define <4 x i32> @selb_v4i32_5(<4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3) { - %tmp2 = and <4 x i32> %arg3, %arg2 ; <<4 x i32>> [#uses=1] - %tmp3not = xor <4 x i32> %arg3, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] - %tmp5 = and <4 x i32> %tmp3not, %arg1 ; <<4 x i32>> [#uses=1] - %tmp6 = or <4 x i32> %tmp2, %tmp5 ; <<4 x i32>> [#uses=1] - ret <4 x i32> %tmp6 +; (or (and (not rC), rA), (and rB, rC)) +define <16 x i8> @selb_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { + %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %A, %rA + %C = and <16 x i8> %rB, %rC + %D = or <16 x i8> %C, %B + ret <16 x i8> %D } -define i32 @selb_i32(i32 %arg1, i32 %arg2, i32 %arg3) { - %tmp1not = xor i32 %arg3, -1 ; [#uses=1] - %tmp3 = and i32 %tmp1not, %arg1 ; [#uses=1] - %tmp6 = and i32 %arg3, %arg2 ; [#uses=1] - %tmp7 = or i32 %tmp3, %tmp6 ; [#uses=1] - ret i32 %tmp7 +; (or (and (not rC), rA), (and rC, rB)) +define <16 x i8> @selb_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { + %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %A, %rA + %C = and <16 x i8> %rC, %rB + %D = or <16 x i8> %C, %B + ret <16 x i8> %D } -define i16 @selb_i16(i16 signext %arg1, i16 signext %arg2, i16 signext %arg3) signext { - %tmp3 = and i16 %arg3, %arg1 ; [#uses=1] - %tmp4not = xor i16 %arg3, -1 ; [#uses=1] - %tmp6 = and i16 %tmp4not, %arg2 ; [#uses=1] - %retval1011 = or i16 %tmp3, %tmp6 ; [#uses=1] - ret i16 %retval1011 +; (or (and rC, rB), (and rA, (not rC))) +define <16 x i8> @selb_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { + %C = and <16 x i8> %rC, %rB + %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %rA, %A + %D = or <16 x i8> %C, %B + ret <16 x i8> %D } -define i16 @selb_i16u(i16 zeroext %arg1, i16 zeroext %arg2, i16 zeroext %arg3) zeroext { - %tmp3 = and i16 %arg3, %arg1 ; [#uses=1] - %tmp4not = xor i16 %arg3, -1 ; [#uses=1] - %tmp6 = and i16 %tmp4not, %arg2 ; [#uses=1] - %retval1011 = or i16 %tmp3, %tmp6 ; [#uses=1] - ret i16 %retval1011 +; (or (and rB, rC), (and rA, (not rC))) +define <16 x i8> @selb_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { + %C = and <16 x i8> %rB, %rC + %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %rA, %A + %D = or <16 x i8> %C, %B + ret <16 x i8> %D } -define i8 @selb_i8u(i8 zeroext %arg1, i8 zeroext %arg2, i8 zeroext %arg3) zeroext { - %tmp3 = and i8 %arg3, %arg1 ; [#uses=1] - %tmp4not = xor i8 %arg3, -1 ; [#uses=1] - %tmp6 = and i8 %tmp4not, %arg2 ; [#uses=1] - %retval1011 = or i8 %tmp3, %tmp6 ; [#uses=1] - ret i8 %retval1011 +; (or (and rA, (not rC)), (and rB, rC)) +define <16 x i8> @selb_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { + %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %rA, %A + %C = and <16 x i8> %rB, %rC + %D = or <16 x i8> %C, %B + ret <16 x i8> %D } -define i8 @selb_i8(i8 signext %arg1, i8 signext %arg2, i8 signext %arg3) signext { - %tmp3 = and i8 %arg3, %arg1 ; [#uses=1] - %tmp4not = xor i8 %arg3, -1 ; [#uses=1] - %tmp6 = and i8 %tmp4not, %arg2 ; [#uses=1] - %retval1011 = or i8 %tmp3, %tmp6 ; [#uses=1] - ret i8 %retval1011 +; (or (and rA, (not rC)), (and rC, rB)) +define <16 x i8> @selb_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) { + %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1, + i8 -1, i8 -1, i8 -1, i8 -1 > + %B = and <16 x i8> %rA, %A + %C = and <16 x i8> %rC, %rB + %D = or <16 x i8> %C, %B + ret <16 x i8> %D +} + +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +; i32 +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +; (or (and rC, rB), (and (not rC), rA)) +define i32 @selb_i32_01(i32 %rA, i32 %rB, i32 %rC) { + %C = and i32 %rC, %rB + %A = xor i32 %rC, -1 + %B = and i32 %A, %rA + %D = or i32 %C, %B + ret i32 %D +} + +; (or (and rB, rC), (and (not rC), rA)) +define i32 @selb_i32_02(i32 %rA, i32 %rB, i32 %rC) { + %C = and i32 %rB, %rC + %A = xor i32 %rC, -1 + %B = and i32 %A, %rA + %D = or i32 %C, %B + ret i32 %D +} + +; (or (and (not rC), rA), (and rB, rC)) +define i32 @selb_i32_03(i32 %rA, i32 %rB, i32 %rC) { + %A = xor i32 %rC, -1 + %B = and i32 %A, %rA + %C = and i32 %rB, %rC + %D = or i32 %C, %B + ret i32 %D +} + +; (or (and (not rC), rA), (and rC, rB)) +define i32 @selb_i32_04(i32 %rA, i32 %rB, i32 %rC) { + %A = xor i32 %rC, -1 + %B = and i32 %A, %rA + %C = and i32 %rC, %rB + %D = or i32 %C, %B + ret i32 %D +} + +; (or (and rC, rB), (and rA, (not rC))) +define i32 @selb_i32_05(i32 %rA, i32 %rB, i32 %rC) { + %C = and i32 %rC, %rB + %A = xor i32 %rC, -1 + %B = and i32 %rA, %A + %D = or i32 %C, %B + ret i32 %D +} + +; (or (and rB, rC), (and rA, (not rC))) +define i32 @selb_i32_06(i32 %rA, i32 %rB, i32 %rC) { + %C = and i32 %rB, %rC + %A = xor i32 %rC, -1 + %B = and i32 %rA, %A + %D = or i32 %C, %B + ret i32 %D +} + +; (or (and rA, (not rC)), (and rB, rC)) +define i32 @selb_i32_07(i32 %rA, i32 %rB, i32 %rC) { + %A = xor i32 %rC, -1 + %B = and i32 %rA, %A + %C = and i32 %rB, %rC + %D = or i32 %C, %B + ret i32 %D +} + +; (or (and rA, (not rC)), (and rC, rB)) +define i32 @selb_i32_08(i32 %rA, i32 %rB, i32 %rC) { + %A = xor i32 %rC, -1 + %B = and i32 %rA, %A + %C = and i32 %rC, %rB + %D = or i32 %C, %B + ret i32 %D +} + +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +; i16 +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +; (or (and rC, rB), (and (not rC), rA)) +define i16 @selb_i16_01(i16 %rA, i16 %rB, i16 %rC) { + %C = and i16 %rC, %rB + %A = xor i16 %rC, -1 + %B = and i16 %A, %rA + %D = or i16 %C, %B + ret i16 %D +} + +; (or (and rB, rC), (and (not rC), rA)) +define i16 @selb_i16_02(i16 %rA, i16 %rB, i16 %rC) { + %C = and i16 %rB, %rC + %A = xor i16 %rC, -1 + %B = and i16 %A, %rA + %D = or i16 %C, %B + ret i16 %D +} + +; (or (and (not rC), rA), (and rB, rC)) +define i16 @selb_i16_03(i16 %rA, i16 %rB, i16 %rC) { + %A = xor i16 %rC, -1 + %B = and i16 %A, %rA + %C = and i16 %rB, %rC + %D = or i16 %C, %B + ret i16 %D +} + +; (or (and (not rC), rA), (and rC, rB)) +define i16 @selb_i16_04(i16 %rA, i16 %rB, i16 %rC) { + %A = xor i16 %rC, -1 + %B = and i16 %A, %rA + %C = and i16 %rC, %rB + %D = or i16 %C, %B + ret i16 %D +} + +; (or (and rC, rB), (and rA, (not rC))) +define i16 @selb_i16_05(i16 %rA, i16 %rB, i16 %rC) { + %C = and i16 %rC, %rB + %A = xor i16 %rC, -1 + %B = and i16 %rA, %A + %D = or i16 %C, %B + ret i16 %D +} + +; (or (and rB, rC), (and rA, (not rC))) +define i16 @selb_i16_06(i16 %rA, i16 %rB, i16 %rC) { + %C = and i16 %rB, %rC + %A = xor i16 %rC, -1 + %B = and i16 %rA, %A + %D = or i16 %C, %B + ret i16 %D +} + +; (or (and rA, (not rC)), (and rB, rC)) +define i16 @selb_i16_07(i16 %rA, i16 %rB, i16 %rC) { + %A = xor i16 %rC, -1 + %B = and i16 %rA, %A + %C = and i16 %rB, %rC + %D = or i16 %C, %B + ret i16 %D +} + +; (or (and rA, (not rC)), (and rC, rB)) +define i16 @selb_i16_08(i16 %rA, i16 %rB, i16 %rC) { + %A = xor i16 %rC, -1 + %B = and i16 %rA, %A + %C = and i16 %rC, %rB + %D = or i16 %C, %B + ret i16 %D +} + +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +; i8 +;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +; (or (and rC, rB), (and (not rC), rA)) +define i8 @selb_i8_01(i8 %rA, i8 %rB, i8 %rC) { + %C = and i8 %rC, %rB + %A = xor i8 %rC, -1 + %B = and i8 %A, %rA + %D = or i8 %C, %B + ret i8 %D +} + +; (or (and rB, rC), (and (not rC), rA)) +define i8 @selb_i8_02(i8 %rA, i8 %rB, i8 %rC) { + %C = and i8 %rB, %rC + %A = xor i8 %rC, -1 + %B = and i8 %A, %rA + %D = or i8 %C, %B + ret i8 %D +} + +; (or (and (not rC), rA), (and rB, rC)) +define i8 @selb_i8_03(i8 %rA, i8 %rB, i8 %rC) { + %A = xor i8 %rC, -1 + %B = and i8 %A, %rA + %C = and i8 %rB, %rC + %D = or i8 %C, %B + ret i8 %D +} + +; (or (and (not rC), rA), (and rC, rB)) +define i8 @selb_i8_04(i8 %rA, i8 %rB, i8 %rC) { + %A = xor i8 %rC, -1 + %B = and i8 %A, %rA + %C = and i8 %rC, %rB + %D = or i8 %C, %B + ret i8 %D +} + +; (or (and rC, rB), (and rA, (not rC))) +define i8 @selb_i8_05(i8 %rA, i8 %rB, i8 %rC) { + %C = and i8 %rC, %rB + %A = xor i8 %rC, -1 + %B = and i8 %rA, %A + %D = or i8 %C, %B + ret i8 %D +} + +; (or (and rB, rC), (and rA, (not rC))) +define i8 @selb_i8_06(i8 %rA, i8 %rB, i8 %rC) { + %C = and i8 %rB, %rC + %A = xor i8 %rC, -1 + %B = and i8 %rA, %A + %D = or i8 %C, %B + ret i8 %D +} + +; (or (and rA, (not rC)), (and rB, rC)) +define i8 @selb_i8_07(i8 %rA, i8 %rB, i8 %rC) { + %A = xor i8 %rC, -1 + %B = and i8 %rA, %A + %C = and i8 %rB, %rC + %D = or i8 %C, %B + ret i8 %D +} + +; (or (and rA, (not rC)), (and rC, rB)) +define i8 @selb_i8_08(i8 %rA, i8 %rB, i8 %rC) { + %A = xor i8 %rC, -1 + %B = and i8 %rA, %A + %C = and i8 %rC, %rB + %D = or i8 %C, %B + ret i8 %D } diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll index 4256d91fdb3..b6629cac2a1 100644 --- a/test/CodeGen/CellSPU/shift_ops.ll +++ b/test/CodeGen/CellSPU/shift_ops.ll @@ -15,198 +15,198 @@ target triple = "spu" ; to a 32-bit type: define i16 @shlh_i16_1(i16 %arg1, i16 %arg2) { - %A = shl i16 %arg1, %arg2 - ret i16 %A + %A = shl i16 %arg1, %arg2 + ret i16 %A } define i16 @shlh_i16_2(i16 %arg1, i16 %arg2) { - %A = shl i16 %arg2, %arg1 - ret i16 %A + %A = shl i16 %arg2, %arg1 + ret i16 %A } define i16 @shlh_i16_3(i16 signext %arg1, i16 signext %arg2) signext { - %A = shl i16 %arg1, %arg2 - ret i16 %A + %A = shl i16 %arg1, %arg2 + ret i16 %A } define i16 @shlh_i16_4(i16 signext %arg1, i16 signext %arg2) signext { - %A = shl i16 %arg2, %arg1 - ret i16 %A + %A = shl i16 %arg2, %arg1 + ret i16 %A } define i16 @shlh_i16_5(i16 zeroext %arg1, i16 zeroext %arg2) zeroext { - %A = shl i16 %arg1, %arg2 - ret i16 %A + %A = shl i16 %arg1, %arg2 + ret i16 %A } define i16 @shlh_i16_6(i16 zeroext %arg1, i16 zeroext %arg2) zeroext { - %A = shl i16 %arg2, %arg1 - ret i16 %A + %A = shl i16 %arg2, %arg1 + ret i16 %A } ; Shift left i16 with immediate: define i16 @shlhi_i16_1(i16 %arg1) { - %A = shl i16 %arg1, 12 - ret i16 %A + %A = shl i16 %arg1, 12 + ret i16 %A } ; Should not generate anything other than the return, arg1 << 0 = arg1 define i16 @shlhi_i16_2(i16 %arg1) { - %A = shl i16 %arg1, 0 - ret i16 %A + %A = shl i16 %arg1, 0 + ret i16 %A } define i16 @shlhi_i16_3(i16 %arg1) { - %A = shl i16 16383, %arg1 - ret i16 %A + %A = shl i16 16383, %arg1 + ret i16 %A } ; Should generate 0, 0 << arg1 = 0 define i16 @shlhi_i16_4(i16 %arg1) { - %A = shl i16 0, %arg1 - ret i16 %A + %A = shl i16 0, %arg1 + ret i16 %A } define i16 @shlhi_i16_5(i16 signext %arg1) signext { - %A = shl i16 %arg1, 12 - ret i16 %A + %A = shl i16 %arg1, 12 + ret i16 %A } ; Should not generate anything other than the return, arg1 << 0 = arg1 define i16 @shlhi_i16_6(i16 signext %arg1) signext { - %A = shl i16 %arg1, 0 - ret i16 %A + %A = shl i16 %arg1, 0 + ret i16 %A } define i16 @shlhi_i16_7(i16 signext %arg1) signext { - %A = shl i16 16383, %arg1 - ret i16 %A + %A = shl i16 16383, %arg1 + ret i16 %A } ; Should generate 0, 0 << arg1 = 0 define i16 @shlhi_i16_8(i16 signext %arg1) signext { - %A = shl i16 0, %arg1 - ret i16 %A + %A = shl i16 0, %arg1 + ret i16 %A } define i16 @shlhi_i16_9(i16 zeroext %arg1) zeroext { - %A = shl i16 %arg1, 12 - ret i16 %A + %A = shl i16 %arg1, 12 + ret i16 %A } ; Should not generate anything other than the return, arg1 << 0 = arg1 define i16 @shlhi_i16_10(i16 zeroext %arg1) zeroext { - %A = shl i16 %arg1, 0 - ret i16 %A + %A = shl i16 %arg1, 0 + ret i16 %A } define i16 @shlhi_i16_11(i16 zeroext %arg1) zeroext { - %A = shl i16 16383, %arg1 - ret i16 %A + %A = shl i16 16383, %arg1 + ret i16 %A } ; Should generate 0, 0 << arg1 = 0 define i16 @shlhi_i16_12(i16 zeroext %arg1) zeroext { - %A = shl i16 0, %arg1 - ret i16 %A + %A = shl i16 0, %arg1 + ret i16 %A } ; Shift left i32 via register, note that the second operand to shl is promoted ; to a 32-bit type: define i32 @shl_i32_1(i32 %arg1, i32 %arg2) { - %A = shl i32 %arg1, %arg2 - ret i32 %A + %A = shl i32 %arg1, %arg2 + ret i32 %A } define i32 @shl_i32_2(i32 %arg1, i32 %arg2) { - %A = shl i32 %arg2, %arg1 - ret i32 %A + %A = shl i32 %arg2, %arg1 + ret i32 %A } define i32 @shl_i32_3(i32 signext %arg1, i32 signext %arg2) signext { - %A = shl i32 %arg1, %arg2 - ret i32 %A + %A = shl i32 %arg1, %arg2 + ret i32 %A } define i32 @shl_i32_4(i32 signext %arg1, i32 signext %arg2) signext { - %A = shl i32 %arg2, %arg1 - ret i32 %A + %A = shl i32 %arg2, %arg1 + ret i32 %A } define i32 @shl_i32_5(i32 zeroext %arg1, i32 zeroext %arg2) zeroext { - %A = shl i32 %arg1, %arg2 - ret i32 %A + %A = shl i32 %arg1, %arg2 + ret i32 %A } define i32 @shl_i32_6(i32 zeroext %arg1, i32 zeroext %arg2) zeroext { - %A = shl i32 %arg2, %arg1 - ret i32 %A + %A = shl i32 %arg2, %arg1 + ret i32 %A } ; Shift left i32 with immediate: define i32 @shli_i32_1(i32 %arg1) { - %A = shl i32 %arg1, 12 - ret i32 %A + %A = shl i32 %arg1, 12 + ret i32 %A } ; Should not generate anything other than the return, arg1 << 0 = arg1 define i32 @shli_i32_2(i32 %arg1) { - %A = shl i32 %arg1, 0 - ret i32 %A + %A = shl i32 %arg1, 0 + ret i32 %A } define i32 @shli_i32_3(i32 %arg1) { - %A = shl i32 16383, %arg1 - ret i32 %A + %A = shl i32 16383, %arg1 + ret i32 %A } ; Should generate 0, 0 << arg1 = 0 define i32 @shli_i32_4(i32 %arg1) { - %A = shl i32 0, %arg1 - ret i32 %A + %A = shl i32 0, %arg1 + ret i32 %A } define i32 @shli_i32_5(i32 signext %arg1) signext { - %A = shl i32 %arg1, 12 - ret i32 %A + %A = shl i32 %arg1, 12 + ret i32 %A } ; Should not generate anything other than the return, arg1 << 0 = arg1 define i32 @shli_i32_6(i32 signext %arg1) signext { - %A = shl i32 %arg1, 0 - ret i32 %A + %A = shl i32 %arg1, 0 + ret i32 %A } define i32 @shli_i32_7(i32 signext %arg1) signext { - %A = shl i32 16383, %arg1 - ret i32 %A + %A = shl i32 16383, %arg1 + ret i32 %A } ; Should generate 0, 0 << arg1 = 0 define i32 @shli_i32_8(i32 signext %arg1) signext { - %A = shl i32 0, %arg1 - ret i32 %A + %A = shl i32 0, %arg1 + ret i32 %A } define i32 @shli_i32_9(i32 zeroext %arg1) zeroext { - %A = shl i32 %arg1, 12 - ret i32 %A + %A = shl i32 %arg1, 12 + ret i32 %A } ; Should not generate anything other than the return, arg1 << 0 = arg1 define i32 @shli_i32_10(i32 zeroext %arg1) zeroext { - %A = shl i32 %arg1, 0 - ret i32 %A + %A = shl i32 %arg1, 0 + ret i32 %A } define i32 @shli_i32_11(i32 zeroext %arg1) zeroext { - %A = shl i32 16383, %arg1 - ret i32 %A + %A = shl i32 16383, %arg1 + ret i32 %A } ; Should generate 0, 0 << arg1 = 0 define i32 @shli_i32_12(i32 zeroext %arg1) zeroext { - %A = shl i32 0, %arg1 - ret i32 %A + %A = shl i32 0, %arg1 + ret i32 %A } diff --git a/test/CodeGen/CellSPU/sp_farith.ll b/test/CodeGen/CellSPU/sp_farith.ll index eb3fbd94a1b..949e69a5a3d 100644 --- a/test/CodeGen/CellSPU/sp_farith.ll +++ b/test/CodeGen/CellSPU/sp_farith.ll @@ -12,79 +12,79 @@ target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i target triple = "spu" define float @fp_add(float %arg1, float %arg2) { - %A = add float %arg1, %arg2 ; [#uses=1] - ret float %A + %A = add float %arg1, %arg2 ; [#uses=1] + ret float %A } define <4 x float> @fp_add_vec(<4 x float> %arg1, <4 x float> %arg2) { - %A = add <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] - ret <4 x float> %A + %A = add <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] + ret <4 x float> %A } define float @fp_sub(float %arg1, float %arg2) { - %A = sub float %arg1, %arg2 ; [#uses=1] - ret float %A + %A = sub float %arg1, %arg2 ; [#uses=1] + ret float %A } define <4 x float> @fp_sub_vec(<4 x float> %arg1, <4 x float> %arg2) { - %A = sub <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] - ret <4 x float> %A + %A = sub <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] + ret <4 x float> %A } define float @fp_mul(float %arg1, float %arg2) { - %A = mul float %arg1, %arg2 ; [#uses=1] - ret float %A + %A = mul float %arg1, %arg2 ; [#uses=1] + ret float %A } define <4 x float> @fp_mul_vec(<4 x float> %arg1, <4 x float> %arg2) { - %A = mul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] - ret <4 x float> %A + %A = mul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] + ret <4 x float> %A } define float @fp_mul_add(float %arg1, float %arg2, float %arg3) { - %A = mul float %arg1, %arg2 ; [#uses=1] - %B = add float %A, %arg3 ; [#uses=1] - ret float %B + %A = mul float %arg1, %arg2 ; [#uses=1] + %B = add float %A, %arg3 ; [#uses=1] + ret float %B } define <4 x float> @fp_mul_add_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) { - %A = mul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] - %B = add <4 x float> %A, %arg3 ; <<4 x float>> [#uses=1] - ret <4 x float> %B + %A = mul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] + %B = add <4 x float> %A, %arg3 ; <<4 x float>> [#uses=1] + ret <4 x float> %B } define float @fp_mul_sub(float %arg1, float %arg2, float %arg3) { - %A = mul float %arg1, %arg2 ; [#uses=1] - %B = sub float %A, %arg3 ; [#uses=1] - ret float %B + %A = mul float %arg1, %arg2 ; [#uses=1] + %B = sub float %A, %arg3 ; [#uses=1] + ret float %B } define <4 x float> @fp_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) { - %A = mul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] - %B = sub <4 x float> %A, %arg3 ; <<4 x float>> [#uses=1] - ret <4 x float> %B + %A = mul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1] + %B = sub <4 x float> %A, %arg3 ; <<4 x float>> [#uses=1] + ret <4 x float> %B } ; Test the straightforward way of getting fnms ; c - a * b define float @fp_neg_mul_sub_1(float %arg1, float %arg2, float %arg3) { - %A = mul float %arg1, %arg2 - %B = sub float %arg3, %A - ret float %B + %A = mul float %arg1, %arg2 + %B = sub float %arg3, %A + ret float %B } ; Test another way of getting fnms ; - ( a *b -c ) = c - a * b define float @fp_neg_mul_sub_2(float %arg1, float %arg2, float %arg3) { - %A = mul float %arg1, %arg2 - %B = sub float %A, %arg3 - %C = sub float -0.0, %B - ret float %C + %A = mul float %arg1, %arg2 + %B = sub float %A, %arg3 + %C = sub float -0.0, %B + ret float %C } define <4 x float> @fp_neg_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) { - %A = mul <4 x float> %arg1, %arg2 - %B = sub <4 x float> %A, %arg3 - %D = sub <4 x float> < float -0.0, float -0.0, float -0.0, float -0.0 >, %B - ret <4 x float> %D + %A = mul <4 x float> %arg1, %arg2 + %B = sub <4 x float> %A, %arg3 + %D = sub <4 x float> < float -0.0, float -0.0, float -0.0, float -0.0 >, %B + ret <4 x float> %D } diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll index 5d6daa2ddfe..3df7267ff27 100644 --- a/test/CodeGen/CellSPU/struct_1.ll +++ b/test/CodeGen/CellSPU/struct_1.ll @@ -49,96 +49,96 @@ target triple = "spu" define i8 @get_hackstate_c1() zeroext nounwind { entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 - ret i8 %tmp2 + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 + ret i8 %tmp2 } define i8 @get_hackstate_c2() zeroext nounwind { entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 - ret i8 %tmp2 + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 + ret i8 %tmp2 } define i8 @get_hackstate_c3() zeroext nounwind { entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 - ret i8 %tmp2 + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 + ret i8 %tmp2 } define i32 @get_hackstate_i1() nounwind { entry: - %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 - ret i32 %tmp2 + %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 + ret i32 %tmp2 } define i16 @get_hackstate_s1() signext nounwind { entry: - %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 - ret i16 %tmp2 + %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 + ret i16 %tmp2 } define i8 @get_hackstate_c6() zeroext nounwind { entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 8), align 16 - ret i8 %tmp2 + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 8), align 16 + ret i8 %tmp2 } define i8 @get_hackstate_c7() zeroext nounwind { entry: - %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16 - ret i8 %tmp2 + %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16 + ret i8 %tmp2 } define i32 @get_hackstate_i3() nounwind { entry: - %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16 - ret i32 %tmp2 + %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16 + ret i32 %tmp2 } define i32 @get_hackstate_i6() nounwind { entry: - %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16 - ret i32 %tmp2 + %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16 + ret i32 %tmp2 } define void @set_hackstate_c1(i8 zeroext %c) nounwind { entry: - store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 - ret void + store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16 + ret void } define void @set_hackstate_c2(i8 zeroext %c) nounwind { entry: - store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 - ret void + store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16 + ret void } define void @set_hackstate_c3(i8 zeroext %c) nounwind { entry: - store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 - ret void + store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16 + ret void } define void @set_hackstate_i1(i32 %i) nounwind { entry: - store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 - ret void + store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16 + ret void } define void @set_hackstate_s1(i16 signext %s) nounwind { entry: - store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 - ret void + store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16 + ret void } define void @set_hackstate_i3(i32 %i) nounwind { entry: - store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16 - ret void + store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16 + ret void } define void @set_hackstate_i6(i32 %i) nounwind { entry: - store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16 - ret void + store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16 + ret void } diff --git a/test/CodeGen/CellSPU/vec_const.ll b/test/CodeGen/CellSPU/vec_const.ll index 3f7eb626cbe..f604100c4c8 100644 --- a/test/CodeGen/CellSPU/vec_const.ll +++ b/test/CodeGen/CellSPU/vec_const.ll @@ -26,30 +26,30 @@ target triple = "spu-unknown-elf" ; IL , 2 define <4 x i32> @v4i32_constvec() { - ret <4 x i32> < i32 2, i32 2, i32 2, i32 2 > + ret <4 x i32> < i32 2, i32 2, i32 2, i32 2 > } ; Spill to constant pool define <4 x i32> @v4i32_constpool() { - ret <4 x i32> < i32 2, i32 1, i32 1, i32 2 > + ret <4 x i32> < i32 2, i32 1, i32 1, i32 2 > } ; Max negative range for IL define <4 x i32> @v4i32_constvec_2() { - ret <4 x i32> < i32 -32768, i32 -32768, i32 -32768, i32 -32768 > + ret <4 x i32> < i32 -32768, i32 -32768, i32 -32768, i32 -32768 > } ; ILHU , 73 (0x49) ; 4784128 = 0x490000 define <4 x i32> @v4i32_constvec_3() { - ret <4 x i32> < i32 4784128, i32 4784128, + ret <4 x i32> < i32 4784128, i32 4784128, i32 4784128, i32 4784128 > } ; ILHU , 61 (0x3d) ; IOHL , 15395 (0x3c23) define <4 x i32> @v4i32_constvec_4() { - ret <4 x i32> < i32 4013091, i32 4013091, + ret <4 x i32> < i32 4013091, i32 4013091, i32 4013091, i32 4013091 > } @@ -58,25 +58,25 @@ define <4 x i32> @v4i32_constvec_4() { ; Tests for whether we expand the size of the bit pattern properly, because ; this could be interpreted as an i8 pattern (0x50) define <4 x i32> @v4i32_constvec_5() { - ret <4 x i32> < i32 1347440720, i32 1347440720, + ret <4 x i32> < i32 1347440720, i32 1347440720, i32 1347440720, i32 1347440720 > } ; ILH define <8 x i16> @v8i16_constvec_1() { - ret <8 x i16> < i16 32767, i16 32767, i16 32767, i16 32767, + ret <8 x i16> < i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767 > } ; ILH define <8 x i16> @v8i16_constvec_2() { - ret <8 x i16> < i16 511, i16 511, i16 511, i16 511, i16 511, + ret <8 x i16> < i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511 > } ; ILH define <8 x i16> @v8i16_constvec_3() { - ret <8 x i16> < i16 -512, i16 -512, i16 -512, i16 -512, i16 -512, + ret <8 x i16> < i16 -512, i16 -512, i16 -512, i16 -512, i16 -512, i16 -512, i16 -512, i16 -512 > } @@ -84,7 +84,7 @@ define <8 x i16> @v8i16_constvec_3() { ; Tests whether we expand the size of the bit pattern properly, because ; this could be interpreted as an i8 pattern (0x60) define <8 x i16> @v8i16_constvec_4() { - ret <8 x i16> < i16 24672, i16 24672, i16 24672, i16 24672, i16 24672, + ret <8 x i16> < i16 24672, i16 24672, i16 24672, i16 24672, i16 24672, i16 24672, i16 24672, i16 24672 > } @@ -93,7 +93,7 @@ define <8 x i16> @v8i16_constvec_4() { ; this is an i8 pattern but has to be expanded out to i16 to load it ; properly into the vector register. define <16 x i8> @v16i8_constvec_1() { - ret <16 x i8> < i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, + ret <16 x i8> < i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96 > } @@ -101,16 +101,16 @@ define <4 x float> @v4f32_constvec_1() { entry: ret <4 x float> < float 0x4005BF0A80000000, float 0x4005BF0A80000000, - float 0x4005BF0A80000000, - float 0x4005BF0A80000000 > + float 0x4005BF0A80000000, + float 0x4005BF0A80000000 > } define <4 x float> @v4f32_constvec_2() { entry: ret <4 x float> < float 0.000000e+00, float 0.000000e+00, - float 0.000000e+00, - float 0.000000e+00 > + float 0.000000e+00, + float 0.000000e+00 > } @@ -118,8 +118,8 @@ define <4 x float> @v4f32_constvec_3() { entry: ret <4 x float> < float 0x4005BF0A80000000, float 0x3810000000000000, - float 0x47EFFFFFE0000000, - float 0x400921FB60000000 > + float 0x47EFFFFFE0000000, + float 0x400921FB60000000 > } ; 1311768467750121234 => 0x 12345678 abcdef12 @@ -129,13 +129,13 @@ entry: ; LO32_lo: 61202 define <2 x i64> @i64_constvec_1() { entry: - ret <2 x i64> < i64 1311768467750121234, - i64 1311768467750121234 > + ret <2 x i64> < i64 1311768467750121234, + i64 1311768467750121234 > } define <2 x i64> @i64_constvec_2() { entry: - ret <2 x i64> < i64 1, i64 1311768467750121234 > + ret <2 x i64> < i64 1, i64 1311768467750121234 > } define <2 x double> @f64_constvec_1() { @@ -150,5 +150,5 @@ entry: define <2 x double> @f64_constvec_2() { entry: ret <2 x double> < double 0x400921fb54442d18, - double 0x400921fb54442d18 > + double 0x400921fb54442d18 > } diff --git a/test/CodeGen/CellSPU/vecinsert.ll b/test/CodeGen/CellSPU/vecinsert.ll index 3d28e602f6d..9864c538493 100644 --- a/test/CodeGen/CellSPU/vecinsert.ll +++ b/test/CodeGen/CellSPU/vecinsert.ll @@ -19,35 +19,35 @@ target triple = "spu-unknown-elf" ; 67 -> 0x43, as 8-bit vector constant load = 0x4343 (17219)0x4343 define <16 x i8> @test_v16i8(<16 x i8> %P, i8 %x) { entry: - %tmp1 = insertelement <16 x i8> %P, i8 %x, i32 10 - %tmp1.1 = insertelement <16 x i8> %tmp1, i8 67, i32 7 - %tmp1.2 = insertelement <16 x i8> %tmp1.1, i8 %x, i32 15 - ret <16 x i8> %tmp1.2 + %tmp1 = insertelement <16 x i8> %P, i8 %x, i32 10 + %tmp1.1 = insertelement <16 x i8> %tmp1, i8 67, i32 7 + %tmp1.2 = insertelement <16 x i8> %tmp1.1, i8 %x, i32 15 + ret <16 x i8> %tmp1.2 } ; 22598 -> 0x5846 define <8 x i16> @test_v8i16(<8 x i16> %P, i16 %x) { entry: - %tmp1 = insertelement <8 x i16> %P, i16 %x, i32 5 - %tmp1.1 = insertelement <8 x i16> %tmp1, i16 22598, i32 7 - %tmp1.2 = insertelement <8 x i16> %tmp1.1, i16 %x, i32 2 - ret <8 x i16> %tmp1.2 + %tmp1 = insertelement <8 x i16> %P, i16 %x, i32 5 + %tmp1.1 = insertelement <8 x i16> %tmp1, i16 22598, i32 7 + %tmp1.2 = insertelement <8 x i16> %tmp1.1, i16 %x, i32 2 + ret <8 x i16> %tmp1.2 } ; 1574023 -> 0x180487 (ILHU 24/IOHL 1159) define <4 x i32> @test_v4i32_1(<4 x i32> %P, i32 %x) { entry: - %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2 - %tmp1.1 = insertelement <4 x i32> %tmp1, i32 1574023, i32 1 - %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3 - ret <4 x i32> %tmp1.2 + %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2 + %tmp1.1 = insertelement <4 x i32> %tmp1, i32 1574023, i32 1 + %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3 + ret <4 x i32> %tmp1.2 } ; Should generate IL for the load define <4 x i32> @test_v4i32_2(<4 x i32> %P, i32 %x) { entry: - %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2 - %tmp1.1 = insertelement <4 x i32> %tmp1, i32 -39, i32 1 - %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3 - ret <4 x i32> %tmp1.2 + %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2 + %tmp1.1 = insertelement <4 x i32> %tmp1, i32 -39, i32 1 + %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3 + ret <4 x i32> %tmp1.2 }