diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index a1ddb02563e3..ba76a4696146 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1277,7 +1277,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) { SDValue Hi = GetPromotedInteger(N->getOperand(0)); SDValue Lo = GetPromotedInteger(N->getOperand(1)); - SDValue Amt = GetPromotedInteger(N->getOperand(2)); + SDValue Amt = ZExtPromotedInteger(N->getOperand(2)); SDLoc DL(N); EVT OldVT = N->getOperand(0).getValueType(); diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll index fbf00a59f3cf..6dfc61046c5e 100644 --- a/llvm/test/CodeGen/AArch64/funnel-shift.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -69,13 +69,14 @@ declare i37 @llvm.fshl.i37(i37, i37, i37) define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { ; CHECK-LABEL: fshl_i37: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #31883 -; CHECK-NEXT: mov w9, #37 -; CHECK-NEXT: movk x8, #3542, lsl #16 +; CHECK-NEXT: mov x9, #31883 +; CHECK-NEXT: and x8, x2, #0x1fffffffff +; CHECK-NEXT: movk x9, #3542, lsl #16 ; CHECK-NEXT: ubfiz x10, x1, #26, #37 -; CHECK-NEXT: movk x8, #51366, lsl #32 -; CHECK-NEXT: movk x8, #56679, lsl #48 -; CHECK-NEXT: umulh x8, x2, x8 +; CHECK-NEXT: movk x9, #51366, lsl #32 +; CHECK-NEXT: movk x9, #56679, lsl #48 +; CHECK-NEXT: umulh x8, x8, x9 +; CHECK-NEXT: mov w9, #37 ; CHECK-NEXT: ubfx x8, x8, #5, #27 ; CHECK-NEXT: msub w8, w8, w9, w2 ; CHECK-NEXT: mvn w9, w8 @@ -206,14 +207,15 @@ declare i37 @llvm.fshr.i37(i37, i37, i37) define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { ; CHECK-LABEL: fshr_i37: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #31883 -; CHECK-NEXT: mov w9, #37 -; CHECK-NEXT: movk x8, #3542, lsl #16 +; CHECK-NEXT: mov x9, #31883 +; CHECK-NEXT: and x8, x2, #0x1fffffffff +; CHECK-NEXT: movk x9, #3542, lsl #16 ; CHECK-NEXT: lsl x10, x1, #27 -; CHECK-NEXT: movk x8, #51366, lsl #32 +; CHECK-NEXT: movk x9, #51366, lsl #32 ; CHECK-NEXT: lsl x11, x0, #1 -; CHECK-NEXT: movk x8, #56679, lsl #48 -; CHECK-NEXT: umulh x8, x2, x8 +; CHECK-NEXT: movk x9, #56679, lsl #48 +; CHECK-NEXT: umulh x8, x8, x9 +; CHECK-NEXT: mov w9, #37 ; CHECK-NEXT: lsr x8, x8, #5 ; CHECK-NEXT: msub w8, w8, w9, w2 ; CHECK-NEXT: add w8, w8, #27 diff --git a/llvm/test/CodeGen/AMDGPU/fshr.ll b/llvm/test/CodeGen/AMDGPU/fshr.ll index 3eeec671fbc4..994cc78ec791 100644 --- a/llvm/test/CodeGen/AMDGPU/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/fshr.ll @@ -1101,11 +1101,12 @@ define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) { ; SI-LABEL: v_fshr_i24: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_and_b32_e32 v3, 0xffffff, v2 ; SI-NEXT: s_mov_b32 s4, 0xaaaaaaab -; SI-NEXT: v_mul_hi_u32 v3, v2, s4 +; SI-NEXT: v_mul_hi_u32 v3, v3, s4 ; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; SI-NEXT: v_lshrrev_b32_e32 v3, 4, v3 -; SI-NEXT: v_mul_lo_u32 v3, v3, 24 +; SI-NEXT: v_mul_u32_u24_e32 v3, 24, v3 ; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 ; SI-NEXT: v_add_i32_e32 v2, vcc, 8, v2 ; SI-NEXT: v_alignbit_b32 v0, v0, v1, v2 @@ -1114,11 +1115,12 @@ define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) { ; VI-LABEL: v_fshr_i24: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_and_b32_e32 v3, 0xffffff, v2 ; VI-NEXT: s_mov_b32 s4, 0xaaaaaaab -; VI-NEXT: v_mul_hi_u32 v3, v2, s4 +; VI-NEXT: v_mul_hi_u32 v3, v3, s4 ; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; VI-NEXT: v_lshrrev_b32_e32 v3, 4, v3 -; VI-NEXT: v_mul_lo_u32 v3, v3, 24 +; VI-NEXT: v_mul_u32_u24_e32 v3, 24, v3 ; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 ; VI-NEXT: v_add_u32_e32 v2, vcc, 8, v2 ; VI-NEXT: v_alignbit_b32 v0, v0, v1, v2 @@ -1127,11 +1129,12 @@ define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) { ; GFX9-LABEL: v_fshr_i24: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v3, 0xffffff, v2 ; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab -; GFX9-NEXT: v_mul_hi_u32 v3, v2, s4 +; GFX9-NEXT: v_mul_hi_u32 v3, v3, s4 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 4, v3 -; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24 +; GFX9-NEXT: v_mul_u32_u24_e32 v3, 24, v3 ; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3 ; GFX9-NEXT: v_add_u32_e32 v2, 8, v2 ; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, v2 @@ -1146,10 +1149,11 @@ define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mul_hi_u32 v3, 0xaaaaaaab, v2 +; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX10-NEXT: v_mul_hi_u32 v3, 0xaaaaaaab, v3 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 4, v3 -; GFX10-NEXT: v_mul_lo_u32 v3, v3, 24 +; GFX10-NEXT: v_mul_u32_u24_e32 v3, 24, v3 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3 ; GFX10-NEXT: v_add_nc_u32_e32 v2, 8, v2 ; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, v2 @@ -1162,19 +1166,22 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2 ; SI-LABEL: v_fshr_v2i24: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: s_mov_b32 s4, 0xaaaaaaab -; SI-NEXT: v_mul_hi_u32 v6, v4, s4 -; SI-NEXT: v_mul_hi_u32 v7, v5, s4 +; SI-NEXT: s_mov_b32 s4, 0xffffff +; SI-NEXT: v_and_b32_e32 v6, s4, v4 +; SI-NEXT: s_mov_b32 s5, 0xaaaaaaab +; SI-NEXT: v_mul_hi_u32 v6, v6, s5 +; SI-NEXT: v_and_b32_e32 v7, s4, v5 ; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; SI-NEXT: v_lshrrev_b32_e32 v6, 4, v6 -; SI-NEXT: v_mul_lo_u32 v6, v6, 24 +; SI-NEXT: v_mul_u32_u24_e32 v6, 24, v6 ; SI-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 -; SI-NEXT: v_lshrrev_b32_e32 v6, 4, v7 -; SI-NEXT: v_mul_lo_u32 v6, v6, 24 +; SI-NEXT: v_mul_hi_u32 v6, v7, s5 ; SI-NEXT: v_add_i32_e32 v4, vcc, 8, v4 ; SI-NEXT: v_alignbit_b32 v0, v0, v2, v4 ; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v3 -; SI-NEXT: v_sub_i32_e32 v3, vcc, v5, v6 +; SI-NEXT: v_lshrrev_b32_e32 v3, 4, v6 +; SI-NEXT: v_mul_u32_u24_e32 v3, 24, v3 +; SI-NEXT: v_sub_i32_e32 v3, vcc, v5, v3 ; SI-NEXT: v_add_i32_e32 v3, vcc, 8, v3 ; SI-NEXT: v_alignbit_b32 v1, v1, v2, v3 ; SI-NEXT: s_setpc_b64 s[30:31] @@ -1182,19 +1189,22 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2 ; VI-LABEL: v_fshr_v2i24: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: s_mov_b32 s4, 0xaaaaaaab -; VI-NEXT: v_mul_hi_u32 v6, v4, s4 -; VI-NEXT: v_mul_hi_u32 v7, v5, s4 +; VI-NEXT: s_mov_b32 s4, 0xffffff +; VI-NEXT: v_and_b32_e32 v6, s4, v4 +; VI-NEXT: s_mov_b32 s5, 0xaaaaaaab +; VI-NEXT: v_mul_hi_u32 v6, v6, s5 +; VI-NEXT: v_and_b32_e32 v7, s4, v5 ; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; VI-NEXT: v_lshrrev_b32_e32 v6, 4, v6 -; VI-NEXT: v_mul_lo_u32 v6, v6, 24 +; VI-NEXT: v_mul_u32_u24_e32 v6, 24, v6 ; VI-NEXT: v_sub_u32_e32 v4, vcc, v4, v6 -; VI-NEXT: v_lshrrev_b32_e32 v6, 4, v7 -; VI-NEXT: v_mul_lo_u32 v6, v6, 24 +; VI-NEXT: v_mul_hi_u32 v6, v7, s5 ; VI-NEXT: v_add_u32_e32 v4, vcc, 8, v4 ; VI-NEXT: v_alignbit_b32 v0, v0, v2, v4 ; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v3 -; VI-NEXT: v_sub_u32_e32 v3, vcc, v5, v6 +; VI-NEXT: v_lshrrev_b32_e32 v3, 4, v6 +; VI-NEXT: v_mul_u32_u24_e32 v3, 24, v3 +; VI-NEXT: v_sub_u32_e32 v3, vcc, v5, v3 ; VI-NEXT: v_add_u32_e32 v3, vcc, 8, v3 ; VI-NEXT: v_alignbit_b32 v1, v1, v2, v3 ; VI-NEXT: s_setpc_b64 s[30:31] @@ -1202,19 +1212,22 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2 ; GFX9-LABEL: v_fshr_v2i24: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab -; GFX9-NEXT: v_mul_hi_u32 v6, v4, s4 -; GFX9-NEXT: v_mul_hi_u32 v7, v5, s4 +; GFX9-NEXT: s_mov_b32 s4, 0xffffff +; GFX9-NEXT: v_and_b32_e32 v6, s4, v4 +; GFX9-NEXT: s_mov_b32 s5, 0xaaaaaaab +; GFX9-NEXT: v_mul_hi_u32 v6, v6, s5 +; GFX9-NEXT: v_and_b32_e32 v7, s4, v5 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v6 -; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX9-NEXT: v_mul_u32_u24_e32 v6, 24, v6 ; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v7 -; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX9-NEXT: v_mul_hi_u32 v6, v7, s5 ; GFX9-NEXT: v_add_u32_e32 v4, 8, v4 ; GFX9-NEXT: v_alignbit_b32 v0, v0, v2, v4 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v3 -; GFX9-NEXT: v_sub_u32_e32 v3, v5, v6 +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 4, v6 +; GFX9-NEXT: v_mul_u32_u24_e32 v3, 24, v3 +; GFX9-NEXT: v_sub_u32_e32 v3, v5, v3 ; GFX9-NEXT: v_add_u32_e32 v3, 8, v3 ; GFX9-NEXT: v_alignbit_b32 v1, v1, v2, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -1228,15 +1241,18 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2 ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b32 s4, 0xaaaaaaab +; GFX10-NEXT: s_mov_b32 s4, 0xffffff ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX10-NEXT: v_mul_hi_u32 v6, v4, s4 -; GFX10-NEXT: v_mul_hi_u32 v7, v5, s4 +; GFX10-NEXT: v_and_b32_e32 v6, s4, v4 +; GFX10-NEXT: v_and_b32_e32 v7, s4, v5 +; GFX10-NEXT: s_mov_b32 s4, 0xaaaaaaab ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX10-NEXT: v_mul_hi_u32 v6, v6, s4 +; GFX10-NEXT: v_mul_hi_u32 v7, v7, s4 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 4, v6 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 4, v7 -; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24 +; GFX10-NEXT: v_mul_u32_u24_e32 v6, 24, v6 +; GFX10-NEXT: v_mul_u32_u24_e32 v7, 24, v7 ; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v6 ; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v7 ; GFX10-NEXT: v_add_nc_u32_e32 v4, 8, v4 diff --git a/llvm/test/CodeGen/ARM/funnel-shift.ll b/llvm/test/CodeGen/ARM/funnel-shift.ll index 25e210d81914..5a7c4384428e 100644 --- a/llvm/test/CodeGen/ARM/funnel-shift.ll +++ b/llvm/test/CodeGen/ARM/funnel-shift.ll @@ -47,64 +47,66 @@ declare i37 @llvm.fshl.i37(i37, i37, i37) define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { ; SCALAR-LABEL: fshl_i37: ; SCALAR: @ %bb.0: -; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, lr} -; SCALAR-NEXT: push {r4, r5, r6, r7, r8, lr} -; SCALAR-NEXT: mov r4, r1 +; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; SCALAR-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} ; SCALAR-NEXT: mov r8, r0 -; SCALAR-NEXT: ldr r0, [sp, #24] -; SCALAR-NEXT: mov r5, r3 -; SCALAR-NEXT: ldr r1, [sp, #28] -; SCALAR-NEXT: mov r6, r2 +; SCALAR-NEXT: ldr r0, [sp, #36] +; SCALAR-NEXT: mov r4, r1 +; SCALAR-NEXT: mov r6, r3 +; SCALAR-NEXT: and r1, r0, #31 +; SCALAR-NEXT: ldr r0, [sp, #32] +; SCALAR-NEXT: mov r9, r2 ; SCALAR-NEXT: mov r2, #37 ; SCALAR-NEXT: mov r3, #0 ; SCALAR-NEXT: bl __aeabi_uldivmod -; SCALAR-NEXT: lsl r1, r5, #27 -; SCALAR-NEXT: ands r12, r2, #32 -; SCALAR-NEXT: orr r1, r1, r6, lsr #5 +; SCALAR-NEXT: lsl r1, r6, #27 +; SCALAR-NEXT: ands r0, r2, #32 +; SCALAR-NEXT: orr r1, r1, r9, lsr #5 ; SCALAR-NEXT: mov r3, r8 -; SCALAR-NEXT: and r5, r2, #31 -; SCALAR-NEXT: mov r0, #31 +; SCALAR-NEXT: and r6, r2, #31 +; SCALAR-NEXT: mov r7, #31 ; SCALAR-NEXT: movne r3, r1 -; SCALAR-NEXT: cmp r12, #0 -; SCALAR-NEXT: bic r2, r0, r2 -; SCALAR-NEXT: lslne r1, r6, #27 +; SCALAR-NEXT: cmp r0, #0 +; SCALAR-NEXT: lslne r1, r9, #27 +; SCALAR-NEXT: bic r2, r7, r2 ; SCALAR-NEXT: movne r4, r8 -; SCALAR-NEXT: lsl r7, r3, r5 +; SCALAR-NEXT: lsl r5, r3, r6 ; SCALAR-NEXT: lsr r0, r1, #1 -; SCALAR-NEXT: lsl r1, r4, r5 +; SCALAR-NEXT: lsl r1, r4, r6 ; SCALAR-NEXT: lsr r3, r3, #1 -; SCALAR-NEXT: orr r0, r7, r0, lsr r2 +; SCALAR-NEXT: orr r0, r5, r0, lsr r2 ; SCALAR-NEXT: orr r1, r1, r3, lsr r2 -; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, pc} +; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; ; NEON-LABEL: fshl_i37: ; NEON: @ %bb.0: ; NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} ; NEON-NEXT: push {r4, r5, r6, r7, r11, lr} ; NEON-NEXT: mov r4, r1 -; NEON-NEXT: mov r5, r0 -; NEON-NEXT: ldr r0, [sp, #24] -; NEON-NEXT: mov r7, r3 ; NEON-NEXT: ldr r1, [sp, #28] -; NEON-NEXT: mov r6, r2 +; NEON-NEXT: mov r6, r0 +; NEON-NEXT: ldr r0, [sp, #24] +; NEON-NEXT: and r1, r1, #31 +; NEON-NEXT: mov r5, r3 +; NEON-NEXT: mov r7, r2 ; NEON-NEXT: mov r2, #37 ; NEON-NEXT: mov r3, #0 ; NEON-NEXT: bl __aeabi_uldivmod ; NEON-NEXT: mov r0, #31 ; NEON-NEXT: bic r1, r0, r2 -; NEON-NEXT: lsl r0, r7, #27 +; NEON-NEXT: lsl r0, r5, #27 ; NEON-NEXT: ands r12, r2, #32 -; NEON-NEXT: orr r0, r0, r6, lsr #5 -; NEON-NEXT: mov r7, r5 +; NEON-NEXT: orr r0, r0, r7, lsr #5 +; NEON-NEXT: mov r5, r6 ; NEON-NEXT: and r2, r2, #31 -; NEON-NEXT: movne r7, r0 -; NEON-NEXT: lslne r0, r6, #27 +; NEON-NEXT: movne r5, r0 +; NEON-NEXT: lslne r0, r7, #27 ; NEON-NEXT: cmp r12, #0 -; NEON-NEXT: lsl r3, r7, r2 +; NEON-NEXT: lsl r3, r5, r2 ; NEON-NEXT: lsr r0, r0, #1 -; NEON-NEXT: movne r4, r5 +; NEON-NEXT: movne r4, r6 ; NEON-NEXT: orr r0, r3, r0, lsr r1 -; NEON-NEXT: lsr r3, r7, #1 +; NEON-NEXT: lsr r3, r5, #1 ; NEON-NEXT: lsl r2, r4, r2 ; NEON-NEXT: orr r1, r2, r3, lsr r1 ; NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} @@ -233,38 +235,73 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { ; Verify that weird types are minimally supported. declare i37 @llvm.fshr.i37(i37, i37, i37) define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { -; CHECK-LABEL: fshr_i37: -; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: ldr r0, [sp, #24] -; CHECK-NEXT: mov r5, r3 -; CHECK-NEXT: ldr r1, [sp, #28] -; CHECK-NEXT: mov r7, r2 -; CHECK-NEXT: mov r2, #37 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: bl __aeabi_uldivmod -; CHECK-NEXT: lsl r3, r5, #27 -; CHECK-NEXT: add r0, r2, #27 -; CHECK-NEXT: orr r3, r3, r7, lsr #5 -; CHECK-NEXT: mov r1, #31 -; CHECK-NEXT: ands r12, r0, #32 -; CHECK-NEXT: mov r5, r6 -; CHECK-NEXT: moveq r5, r3 -; CHECK-NEXT: bic r1, r1, r0 -; CHECK-NEXT: lsl r2, r5, #1 -; CHECK-NEXT: lsleq r3, r7, #27 -; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: and r7, r0, #31 -; CHECK-NEXT: lsl r2, r2, r1 -; CHECK-NEXT: moveq r4, r6 -; CHECK-NEXT: orr r0, r2, r3, lsr r7 -; CHECK-NEXT: lsl r2, r4, #1 -; CHECK-NEXT: lsl r1, r2, r1 -; CHECK-NEXT: orr r1, r1, r5, lsr r7 -; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} +; SCALAR-LABEL: fshr_i37: +; SCALAR: @ %bb.0: +; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, lr} +; SCALAR-NEXT: push {r4, r5, r6, r7, r8, lr} +; SCALAR-NEXT: mov r8, r0 +; SCALAR-NEXT: ldr r0, [sp, #28] +; SCALAR-NEXT: mov r4, r1 +; SCALAR-NEXT: mov r5, r3 +; SCALAR-NEXT: and r1, r0, #31 +; SCALAR-NEXT: ldr r0, [sp, #24] +; SCALAR-NEXT: mov r7, r2 +; SCALAR-NEXT: mov r2, #37 +; SCALAR-NEXT: mov r3, #0 +; SCALAR-NEXT: bl __aeabi_uldivmod +; SCALAR-NEXT: lsl r3, r5, #27 +; SCALAR-NEXT: add r0, r2, #27 +; SCALAR-NEXT: orr r3, r3, r7, lsr #5 +; SCALAR-NEXT: ands r2, r0, #32 +; SCALAR-NEXT: mov r5, r8 +; SCALAR-NEXT: mov r1, #31 +; SCALAR-NEXT: moveq r5, r3 +; SCALAR-NEXT: lsleq r3, r7, #27 +; SCALAR-NEXT: cmp r2, #0 +; SCALAR-NEXT: bic r1, r1, r0 +; SCALAR-NEXT: moveq r4, r8 +; SCALAR-NEXT: lsl r6, r5, #1 +; SCALAR-NEXT: and r7, r0, #31 +; SCALAR-NEXT: lsl r2, r4, #1 +; SCALAR-NEXT: lsl r6, r6, r1 +; SCALAR-NEXT: lsl r1, r2, r1 +; SCALAR-NEXT: orr r0, r6, r3, lsr r7 +; SCALAR-NEXT: orr r1, r1, r5, lsr r7 +; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, pc} +; +; NEON-LABEL: fshr_i37: +; NEON: @ %bb.0: +; NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} +; NEON-NEXT: push {r4, r5, r6, r7, r8, lr} +; NEON-NEXT: mov r4, r1 +; NEON-NEXT: ldr r1, [sp, #28] +; NEON-NEXT: mov r8, r0 +; NEON-NEXT: ldr r0, [sp, #24] +; NEON-NEXT: and r1, r1, #31 +; NEON-NEXT: mov r5, r3 +; NEON-NEXT: mov r7, r2 +; NEON-NEXT: mov r2, #37 +; NEON-NEXT: mov r3, #0 +; NEON-NEXT: bl __aeabi_uldivmod +; NEON-NEXT: lsl r3, r5, #27 +; NEON-NEXT: add r0, r2, #27 +; NEON-NEXT: orr r3, r3, r7, lsr #5 +; NEON-NEXT: ands r2, r0, #32 +; NEON-NEXT: mov r5, r8 +; NEON-NEXT: mov r1, #31 +; NEON-NEXT: moveq r5, r3 +; NEON-NEXT: lsleq r3, r7, #27 +; NEON-NEXT: cmp r2, #0 +; NEON-NEXT: bic r1, r1, r0 +; NEON-NEXT: moveq r4, r8 +; NEON-NEXT: lsl r6, r5, #1 +; NEON-NEXT: and r7, r0, #31 +; NEON-NEXT: lsl r2, r4, #1 +; NEON-NEXT: lsl r6, r6, r1 +; NEON-NEXT: lsl r1, r2, r1 +; NEON-NEXT: orr r0, r6, r3, lsr r7 +; NEON-NEXT: orr r1, r1, r5, lsr r7 +; NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) ret i37 %f } diff --git a/llvm/test/CodeGen/Mips/funnel-shift.ll b/llvm/test/CodeGen/Mips/funnel-shift.ll index d4f47318ebb1..737e95c8262a 100644 --- a/llvm/test/CodeGen/Mips/funnel-shift.ll +++ b/llvm/test/CodeGen/Mips/funnel-shift.ll @@ -66,7 +66,8 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { ; CHECK-BE-NEXT: move $17, $6 ; CHECK-BE-NEXT: move $18, $5 ; CHECK-BE-NEXT: move $19, $4 -; CHECK-BE-NEXT: lw $4, 56($sp) +; CHECK-BE-NEXT: lw $1, 56($sp) +; CHECK-BE-NEXT: andi $4, $1, 31 ; CHECK-BE-NEXT: lw $5, 60($sp) ; CHECK-BE-NEXT: addiu $6, $zero, 0 ; CHECK-BE-NEXT: jal __umoddi3 @@ -117,8 +118,9 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { ; CHECK-LE-NEXT: move $17, $6 ; CHECK-LE-NEXT: move $18, $5 ; CHECK-LE-NEXT: move $19, $4 +; CHECK-LE-NEXT: lw $1, 60($sp) +; CHECK-LE-NEXT: andi $5, $1, 31 ; CHECK-LE-NEXT: lw $4, 56($sp) -; CHECK-LE-NEXT: lw $5, 60($sp) ; CHECK-LE-NEXT: addiu $6, $zero, 37 ; CHECK-LE-NEXT: jal __umoddi3 ; CHECK-LE-NEXT: addiu $7, $zero, 0 @@ -309,7 +311,8 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { ; CHECK-BE-NEXT: move $17, $6 ; CHECK-BE-NEXT: move $18, $5 ; CHECK-BE-NEXT: move $19, $4 -; CHECK-BE-NEXT: lw $4, 56($sp) +; CHECK-BE-NEXT: lw $1, 56($sp) +; CHECK-BE-NEXT: andi $4, $1, 31 ; CHECK-BE-NEXT: lw $5, 60($sp) ; CHECK-BE-NEXT: addiu $6, $zero, 0 ; CHECK-BE-NEXT: jal __umoddi3 @@ -327,9 +330,9 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { ; CHECK-BE-NEXT: andi $1, $1, 31 ; CHECK-BE-NEXT: sll $6, $19, 1 ; CHECK-BE-NEXT: sllv $6, $6, $1 +; CHECK-BE-NEXT: sll $7, $16, 27 ; CHECK-BE-NEXT: or $2, $6, $2 -; CHECK-BE-NEXT: sll $6, $16, 27 -; CHECK-BE-NEXT: movz $4, $6, $3 +; CHECK-BE-NEXT: movz $4, $7, $3 ; CHECK-BE-NEXT: srlv $3, $4, $5 ; CHECK-BE-NEXT: sll $4, $18, 1 ; CHECK-BE-NEXT: sllv $1, $4, $1 @@ -360,8 +363,9 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { ; CHECK-LE-NEXT: move $17, $6 ; CHECK-LE-NEXT: move $18, $5 ; CHECK-LE-NEXT: move $19, $4 +; CHECK-LE-NEXT: lw $1, 60($sp) +; CHECK-LE-NEXT: andi $5, $1, 31 ; CHECK-LE-NEXT: lw $4, 56($sp) -; CHECK-LE-NEXT: lw $5, 60($sp) ; CHECK-LE-NEXT: addiu $6, $zero, 37 ; CHECK-LE-NEXT: jal __umoddi3 ; CHECK-LE-NEXT: addiu $7, $zero, 0 diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift.ll b/llvm/test/CodeGen/PowerPC/funnel-shift.ll index 62b68e0b2cad..10e2fc0326f9 100644 --- a/llvm/test/CodeGen/PowerPC/funnel-shift.ll +++ b/llvm/test/CodeGen/PowerPC/funnel-shift.ll @@ -250,7 +250,7 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { ; CHECK32_32-NEXT: mr 29, 5 ; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill ; CHECK32_32-NEXT: mr 30, 6 -; CHECK32_32-NEXT: mr 3, 7 +; CHECK32_32-NEXT: clrlwi 3, 7, 27 ; CHECK32_32-NEXT: mr 4, 8 ; CHECK32_32-NEXT: li 5, 0 ; CHECK32_32-NEXT: li 6, 37 @@ -299,7 +299,7 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { ; CHECK32_64-NEXT: .cfi_offset r30, -8 ; CHECK32_64-NEXT: stw 27, 12(1) # 4-byte Folded Spill ; CHECK32_64-NEXT: mr 27, 3 -; CHECK32_64-NEXT: mr 3, 7 +; CHECK32_64-NEXT: clrlwi 3, 7, 27 ; CHECK32_64-NEXT: stw 28, 16(1) # 4-byte Folded Spill ; CHECK32_64-NEXT: mr 28, 4 ; CHECK32_64-NEXT: mr 4, 8 @@ -353,12 +353,13 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { ; CHECK64-LABEL: fshl_i37: ; CHECK64: # %bb.0: ; CHECK64-NEXT: lis 6, 28339 -; CHECK64-NEXT: sldi 4, 4, 27 +; CHECK64-NEXT: clrldi 7, 5, 27 ; CHECK64-NEXT: ori 6, 6, 58451 +; CHECK64-NEXT: sldi 4, 4, 27 ; CHECK64-NEXT: rldic 6, 6, 33, 0 ; CHECK64-NEXT: oris 6, 6, 3542 ; CHECK64-NEXT: ori 6, 6, 31883 -; CHECK64-NEXT: mulhdu 6, 5, 6 +; CHECK64-NEXT: mulhdu 6, 7, 6 ; CHECK64-NEXT: rldicl 6, 6, 59, 5 ; CHECK64-NEXT: mulli 6, 6, 37 ; CHECK64-NEXT: sub 5, 5, 6 @@ -549,7 +550,7 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { ; CHECK32_32-NEXT: mr 29, 5 ; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill ; CHECK32_32-NEXT: mr 30, 6 -; CHECK32_32-NEXT: mr 3, 7 +; CHECK32_32-NEXT: clrlwi 3, 7, 27 ; CHECK32_32-NEXT: mr 4, 8 ; CHECK32_32-NEXT: li 5, 0 ; CHECK32_32-NEXT: li 6, 37 @@ -599,7 +600,7 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { ; CHECK32_64-NEXT: .cfi_offset r30, -8 ; CHECK32_64-NEXT: stw 27, 12(1) # 4-byte Folded Spill ; CHECK32_64-NEXT: mr 27, 3 -; CHECK32_64-NEXT: mr 3, 7 +; CHECK32_64-NEXT: clrlwi 3, 7, 27 ; CHECK32_64-NEXT: stw 28, 16(1) # 4-byte Folded Spill ; CHECK32_64-NEXT: mr 28, 4 ; CHECK32_64-NEXT: mr 4, 8 @@ -649,12 +650,13 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { ; CHECK64-LABEL: fshr_i37: ; CHECK64: # %bb.0: ; CHECK64-NEXT: lis 6, 28339 -; CHECK64-NEXT: sldi 4, 4, 27 +; CHECK64-NEXT: clrldi 7, 5, 27 ; CHECK64-NEXT: ori 6, 6, 58451 +; CHECK64-NEXT: sldi 4, 4, 27 ; CHECK64-NEXT: rldic 6, 6, 33, 0 ; CHECK64-NEXT: oris 6, 6, 3542 ; CHECK64-NEXT: ori 6, 6, 31883 -; CHECK64-NEXT: mulhdu 6, 5, 6 +; CHECK64-NEXT: mulhdu 6, 7, 6 ; CHECK64-NEXT: rldicl 6, 6, 59, 5 ; CHECK64-NEXT: mulli 6, 6, 37 ; CHECK64-NEXT: sub 5, 5, 6 diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll index 2e9deb51dbe0..820b66f6179c 100644 --- a/llvm/test/CodeGen/X86/funnel-shift.ll +++ b/llvm/test/CodeGen/X86/funnel-shift.ll @@ -140,13 +140,15 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind { ; X86-SSE2-NEXT: pushl %ebx ; X86-SSE2-NEXT: pushl %edi ; X86-SSE2-NEXT: pushl %esi +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: andl $31, %eax ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-SSE2-NEXT: shldl $27, %ebx, %edi ; X86-SSE2-NEXT: pushl $0 ; X86-SSE2-NEXT: pushl $37 -; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: pushl %eax ; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp) ; X86-SSE2-NEXT: calll __umoddi3 ; X86-SSE2-NEXT: addl $16, %esp @@ -174,8 +176,9 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind { ; X64-AVX2-LABEL: fshl_i37: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: movq %rdx, %rcx +; X64-AVX2-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF +; X64-AVX2-NEXT: andq %rdx, %rax ; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rdx # imm = 0xDD67C8A60DD67C8B -; X64-AVX2-NEXT: movq %rcx, %rax ; X64-AVX2-NEXT: mulq %rdx ; X64-AVX2-NEXT: shrq $5, %rdx ; X64-AVX2-NEXT: leal (%rdx,%rdx,8), %eax @@ -304,13 +307,15 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind { ; X86-SSE2-NEXT: pushl %ebx ; X86-SSE2-NEXT: pushl %edi ; X86-SSE2-NEXT: pushl %esi +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: andl $31, %eax ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-SSE2-NEXT: shldl $27, %ebx, %esi ; X86-SSE2-NEXT: pushl $0 ; X86-SSE2-NEXT: pushl $37 -; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp) +; X86-SSE2-NEXT: pushl %eax ; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp) ; X86-SSE2-NEXT: calll __umoddi3 ; X86-SSE2-NEXT: addl $16, %esp @@ -339,8 +344,9 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind { ; X64-AVX2-LABEL: fshr_i37: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: movq %rdx, %rcx +; X64-AVX2-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF +; X64-AVX2-NEXT: andq %rdx, %rax ; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rdx # imm = 0xDD67C8A60DD67C8B -; X64-AVX2-NEXT: movq %rcx, %rax ; X64-AVX2-NEXT: mulq %rdx ; X64-AVX2-NEXT: shrq $5, %rdx ; X64-AVX2-NEXT: leal (%rdx,%rdx,8), %eax