[SelectionDAG] Clear promoted bits before UREM on shift amount in PromoteIntRes_FunnelShift.

Otherwise we have garbage in the upper bits that can affect the
results of the UREM.

Fixes PR55296.

Differential Revision: https://reviews.llvm.org/D125076
This commit is contained in:
Craig Topper 2022-05-06 00:04:43 -07:00
parent 324d696c15
commit 76f90a9d71
7 changed files with 195 additions and 128 deletions

View File

@ -1277,7 +1277,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
SDValue Hi = GetPromotedInteger(N->getOperand(0));
SDValue Lo = GetPromotedInteger(N->getOperand(1));
SDValue Amt = GetPromotedInteger(N->getOperand(2));
SDValue Amt = ZExtPromotedInteger(N->getOperand(2));
SDLoc DL(N);
EVT OldVT = N->getOperand(0).getValueType();

View File

@ -69,13 +69,14 @@ declare i37 @llvm.fshl.i37(i37, i37, i37)
define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-LABEL: fshl_i37:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #31883
; CHECK-NEXT: mov w9, #37
; CHECK-NEXT: movk x8, #3542, lsl #16
; CHECK-NEXT: mov x9, #31883
; CHECK-NEXT: and x8, x2, #0x1fffffffff
; CHECK-NEXT: movk x9, #3542, lsl #16
; CHECK-NEXT: ubfiz x10, x1, #26, #37
; CHECK-NEXT: movk x8, #51366, lsl #32
; CHECK-NEXT: movk x8, #56679, lsl #48
; CHECK-NEXT: umulh x8, x2, x8
; CHECK-NEXT: movk x9, #51366, lsl #32
; CHECK-NEXT: movk x9, #56679, lsl #48
; CHECK-NEXT: umulh x8, x8, x9
; CHECK-NEXT: mov w9, #37
; CHECK-NEXT: ubfx x8, x8, #5, #27
; CHECK-NEXT: msub w8, w8, w9, w2
; CHECK-NEXT: mvn w9, w8
@ -206,14 +207,15 @@ declare i37 @llvm.fshr.i37(i37, i37, i37)
define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-LABEL: fshr_i37:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #31883
; CHECK-NEXT: mov w9, #37
; CHECK-NEXT: movk x8, #3542, lsl #16
; CHECK-NEXT: mov x9, #31883
; CHECK-NEXT: and x8, x2, #0x1fffffffff
; CHECK-NEXT: movk x9, #3542, lsl #16
; CHECK-NEXT: lsl x10, x1, #27
; CHECK-NEXT: movk x8, #51366, lsl #32
; CHECK-NEXT: movk x9, #51366, lsl #32
; CHECK-NEXT: lsl x11, x0, #1
; CHECK-NEXT: movk x8, #56679, lsl #48
; CHECK-NEXT: umulh x8, x2, x8
; CHECK-NEXT: movk x9, #56679, lsl #48
; CHECK-NEXT: umulh x8, x8, x9
; CHECK-NEXT: mov w9, #37
; CHECK-NEXT: lsr x8, x8, #5
; CHECK-NEXT: msub w8, w8, w9, w2
; CHECK-NEXT: add w8, w8, #27

View File

@ -1101,11 +1101,12 @@ define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) {
; SI-LABEL: v_fshr_i24:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_and_b32_e32 v3, 0xffffff, v2
; SI-NEXT: s_mov_b32 s4, 0xaaaaaaab
; SI-NEXT: v_mul_hi_u32 v3, v2, s4
; SI-NEXT: v_mul_hi_u32 v3, v3, s4
; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; SI-NEXT: v_lshrrev_b32_e32 v3, 4, v3
; SI-NEXT: v_mul_lo_u32 v3, v3, 24
; SI-NEXT: v_mul_u32_u24_e32 v3, 24, v3
; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
; SI-NEXT: v_add_i32_e32 v2, vcc, 8, v2
; SI-NEXT: v_alignbit_b32 v0, v0, v1, v2
@ -1114,11 +1115,12 @@ define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) {
; VI-LABEL: v_fshr_i24:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_and_b32_e32 v3, 0xffffff, v2
; VI-NEXT: s_mov_b32 s4, 0xaaaaaaab
; VI-NEXT: v_mul_hi_u32 v3, v2, s4
; VI-NEXT: v_mul_hi_u32 v3, v3, s4
; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; VI-NEXT: v_lshrrev_b32_e32 v3, 4, v3
; VI-NEXT: v_mul_lo_u32 v3, v3, 24
; VI-NEXT: v_mul_u32_u24_e32 v3, 24, v3
; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
; VI-NEXT: v_add_u32_e32 v2, vcc, 8, v2
; VI-NEXT: v_alignbit_b32 v0, v0, v1, v2
@ -1127,11 +1129,12 @@ define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) {
; GFX9-LABEL: v_fshr_i24:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v3, 0xffffff, v2
; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab
; GFX9-NEXT: v_mul_hi_u32 v3, v2, s4
; GFX9-NEXT: v_mul_hi_u32 v3, v3, s4
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 4, v3
; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX9-NEXT: v_mul_u32_u24_e32 v3, 24, v3
; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
; GFX9-NEXT: v_add_u32_e32 v2, 8, v2
; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, v2
@ -1146,10 +1149,11 @@ define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mul_hi_u32 v3, 0xaaaaaaab, v2
; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX10-NEXT: v_mul_hi_u32 v3, 0xaaaaaaab, v3
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 4, v3
; GFX10-NEXT: v_mul_lo_u32 v3, v3, 24
; GFX10-NEXT: v_mul_u32_u24_e32 v3, 24, v3
; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
; GFX10-NEXT: v_add_nc_u32_e32 v2, 8, v2
; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, v2
@ -1162,19 +1166,22 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2
; SI-LABEL: v_fshr_v2i24:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: s_mov_b32 s4, 0xaaaaaaab
; SI-NEXT: v_mul_hi_u32 v6, v4, s4
; SI-NEXT: v_mul_hi_u32 v7, v5, s4
; SI-NEXT: s_mov_b32 s4, 0xffffff
; SI-NEXT: v_and_b32_e32 v6, s4, v4
; SI-NEXT: s_mov_b32 s5, 0xaaaaaaab
; SI-NEXT: v_mul_hi_u32 v6, v6, s5
; SI-NEXT: v_and_b32_e32 v7, s4, v5
; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; SI-NEXT: v_lshrrev_b32_e32 v6, 4, v6
; SI-NEXT: v_mul_lo_u32 v6, v6, 24
; SI-NEXT: v_mul_u32_u24_e32 v6, 24, v6
; SI-NEXT: v_sub_i32_e32 v4, vcc, v4, v6
; SI-NEXT: v_lshrrev_b32_e32 v6, 4, v7
; SI-NEXT: v_mul_lo_u32 v6, v6, 24
; SI-NEXT: v_mul_hi_u32 v6, v7, s5
; SI-NEXT: v_add_i32_e32 v4, vcc, 8, v4
; SI-NEXT: v_alignbit_b32 v0, v0, v2, v4
; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v3
; SI-NEXT: v_sub_i32_e32 v3, vcc, v5, v6
; SI-NEXT: v_lshrrev_b32_e32 v3, 4, v6
; SI-NEXT: v_mul_u32_u24_e32 v3, 24, v3
; SI-NEXT: v_sub_i32_e32 v3, vcc, v5, v3
; SI-NEXT: v_add_i32_e32 v3, vcc, 8, v3
; SI-NEXT: v_alignbit_b32 v1, v1, v2, v3
; SI-NEXT: s_setpc_b64 s[30:31]
@ -1182,19 +1189,22 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2
; VI-LABEL: v_fshr_v2i24:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: s_mov_b32 s4, 0xaaaaaaab
; VI-NEXT: v_mul_hi_u32 v6, v4, s4
; VI-NEXT: v_mul_hi_u32 v7, v5, s4
; VI-NEXT: s_mov_b32 s4, 0xffffff
; VI-NEXT: v_and_b32_e32 v6, s4, v4
; VI-NEXT: s_mov_b32 s5, 0xaaaaaaab
; VI-NEXT: v_mul_hi_u32 v6, v6, s5
; VI-NEXT: v_and_b32_e32 v7, s4, v5
; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; VI-NEXT: v_lshrrev_b32_e32 v6, 4, v6
; VI-NEXT: v_mul_lo_u32 v6, v6, 24
; VI-NEXT: v_mul_u32_u24_e32 v6, 24, v6
; VI-NEXT: v_sub_u32_e32 v4, vcc, v4, v6
; VI-NEXT: v_lshrrev_b32_e32 v6, 4, v7
; VI-NEXT: v_mul_lo_u32 v6, v6, 24
; VI-NEXT: v_mul_hi_u32 v6, v7, s5
; VI-NEXT: v_add_u32_e32 v4, vcc, 8, v4
; VI-NEXT: v_alignbit_b32 v0, v0, v2, v4
; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v3
; VI-NEXT: v_sub_u32_e32 v3, vcc, v5, v6
; VI-NEXT: v_lshrrev_b32_e32 v3, 4, v6
; VI-NEXT: v_mul_u32_u24_e32 v3, 24, v3
; VI-NEXT: v_sub_u32_e32 v3, vcc, v5, v3
; VI-NEXT: v_add_u32_e32 v3, vcc, 8, v3
; VI-NEXT: v_alignbit_b32 v1, v1, v2, v3
; VI-NEXT: s_setpc_b64 s[30:31]
@ -1202,19 +1212,22 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2
; GFX9-LABEL: v_fshr_v2i24:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab
; GFX9-NEXT: v_mul_hi_u32 v6, v4, s4
; GFX9-NEXT: v_mul_hi_u32 v7, v5, s4
; GFX9-NEXT: s_mov_b32 s4, 0xffffff
; GFX9-NEXT: v_and_b32_e32 v6, s4, v4
; GFX9-NEXT: s_mov_b32 s5, 0xaaaaaaab
; GFX9-NEXT: v_mul_hi_u32 v6, v6, s5
; GFX9-NEXT: v_and_b32_e32 v7, s4, v5
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v6
; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24
; GFX9-NEXT: v_mul_u32_u24_e32 v6, 24, v6
; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v7
; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24
; GFX9-NEXT: v_mul_hi_u32 v6, v7, s5
; GFX9-NEXT: v_add_u32_e32 v4, 8, v4
; GFX9-NEXT: v_alignbit_b32 v0, v0, v2, v4
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v3
; GFX9-NEXT: v_sub_u32_e32 v3, v5, v6
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 4, v6
; GFX9-NEXT: v_mul_u32_u24_e32 v3, 24, v3
; GFX9-NEXT: v_sub_u32_e32 v3, v5, v3
; GFX9-NEXT: v_add_u32_e32 v3, 8, v3
; GFX9-NEXT: v_alignbit_b32 v1, v1, v2, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
@ -1228,15 +1241,18 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s4, 0xaaaaaaab
; GFX10-NEXT: s_mov_b32 s4, 0xffffff
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 8, v2
; GFX10-NEXT: v_mul_hi_u32 v6, v4, s4
; GFX10-NEXT: v_mul_hi_u32 v7, v5, s4
; GFX10-NEXT: v_and_b32_e32 v6, s4, v4
; GFX10-NEXT: v_and_b32_e32 v7, s4, v5
; GFX10-NEXT: s_mov_b32 s4, 0xaaaaaaab
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 8, v3
; GFX10-NEXT: v_mul_hi_u32 v6, v6, s4
; GFX10-NEXT: v_mul_hi_u32 v7, v7, s4
; GFX10-NEXT: v_lshrrev_b32_e32 v6, 4, v6
; GFX10-NEXT: v_lshrrev_b32_e32 v7, 4, v7
; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24
; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24
; GFX10-NEXT: v_mul_u32_u24_e32 v6, 24, v6
; GFX10-NEXT: v_mul_u32_u24_e32 v7, 24, v7
; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v6
; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v7
; GFX10-NEXT: v_add_nc_u32_e32 v4, 8, v4

View File

@ -47,64 +47,66 @@ declare i37 @llvm.fshl.i37(i37, i37, i37)
define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; SCALAR-LABEL: fshl_i37:
; SCALAR: @ %bb.0:
; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, lr}
; SCALAR-NEXT: push {r4, r5, r6, r7, r8, lr}
; SCALAR-NEXT: mov r4, r1
; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; SCALAR-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
; SCALAR-NEXT: mov r8, r0
; SCALAR-NEXT: ldr r0, [sp, #24]
; SCALAR-NEXT: mov r5, r3
; SCALAR-NEXT: ldr r1, [sp, #28]
; SCALAR-NEXT: mov r6, r2
; SCALAR-NEXT: ldr r0, [sp, #36]
; SCALAR-NEXT: mov r4, r1
; SCALAR-NEXT: mov r6, r3
; SCALAR-NEXT: and r1, r0, #31
; SCALAR-NEXT: ldr r0, [sp, #32]
; SCALAR-NEXT: mov r9, r2
; SCALAR-NEXT: mov r2, #37
; SCALAR-NEXT: mov r3, #0
; SCALAR-NEXT: bl __aeabi_uldivmod
; SCALAR-NEXT: lsl r1, r5, #27
; SCALAR-NEXT: ands r12, r2, #32
; SCALAR-NEXT: orr r1, r1, r6, lsr #5
; SCALAR-NEXT: lsl r1, r6, #27
; SCALAR-NEXT: ands r0, r2, #32
; SCALAR-NEXT: orr r1, r1, r9, lsr #5
; SCALAR-NEXT: mov r3, r8
; SCALAR-NEXT: and r5, r2, #31
; SCALAR-NEXT: mov r0, #31
; SCALAR-NEXT: and r6, r2, #31
; SCALAR-NEXT: mov r7, #31
; SCALAR-NEXT: movne r3, r1
; SCALAR-NEXT: cmp r12, #0
; SCALAR-NEXT: bic r2, r0, r2
; SCALAR-NEXT: lslne r1, r6, #27
; SCALAR-NEXT: cmp r0, #0
; SCALAR-NEXT: lslne r1, r9, #27
; SCALAR-NEXT: bic r2, r7, r2
; SCALAR-NEXT: movne r4, r8
; SCALAR-NEXT: lsl r7, r3, r5
; SCALAR-NEXT: lsl r5, r3, r6
; SCALAR-NEXT: lsr r0, r1, #1
; SCALAR-NEXT: lsl r1, r4, r5
; SCALAR-NEXT: lsl r1, r4, r6
; SCALAR-NEXT: lsr r3, r3, #1
; SCALAR-NEXT: orr r0, r7, r0, lsr r2
; SCALAR-NEXT: orr r0, r5, r0, lsr r2
; SCALAR-NEXT: orr r1, r1, r3, lsr r2
; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, pc}
; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
;
; NEON-LABEL: fshl_i37:
; NEON: @ %bb.0:
; NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
; NEON-NEXT: mov r4, r1
; NEON-NEXT: mov r5, r0
; NEON-NEXT: ldr r0, [sp, #24]
; NEON-NEXT: mov r7, r3
; NEON-NEXT: ldr r1, [sp, #28]
; NEON-NEXT: mov r6, r2
; NEON-NEXT: mov r6, r0
; NEON-NEXT: ldr r0, [sp, #24]
; NEON-NEXT: and r1, r1, #31
; NEON-NEXT: mov r5, r3
; NEON-NEXT: mov r7, r2
; NEON-NEXT: mov r2, #37
; NEON-NEXT: mov r3, #0
; NEON-NEXT: bl __aeabi_uldivmod
; NEON-NEXT: mov r0, #31
; NEON-NEXT: bic r1, r0, r2
; NEON-NEXT: lsl r0, r7, #27
; NEON-NEXT: lsl r0, r5, #27
; NEON-NEXT: ands r12, r2, #32
; NEON-NEXT: orr r0, r0, r6, lsr #5
; NEON-NEXT: mov r7, r5
; NEON-NEXT: orr r0, r0, r7, lsr #5
; NEON-NEXT: mov r5, r6
; NEON-NEXT: and r2, r2, #31
; NEON-NEXT: movne r7, r0
; NEON-NEXT: lslne r0, r6, #27
; NEON-NEXT: movne r5, r0
; NEON-NEXT: lslne r0, r7, #27
; NEON-NEXT: cmp r12, #0
; NEON-NEXT: lsl r3, r7, r2
; NEON-NEXT: lsl r3, r5, r2
; NEON-NEXT: lsr r0, r0, #1
; NEON-NEXT: movne r4, r5
; NEON-NEXT: movne r4, r6
; NEON-NEXT: orr r0, r3, r0, lsr r1
; NEON-NEXT: lsr r3, r7, #1
; NEON-NEXT: lsr r3, r5, #1
; NEON-NEXT: lsl r2, r4, r2
; NEON-NEXT: orr r1, r2, r3, lsr r1
; NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
@ -233,38 +235,73 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
; Verify that weird types are minimally supported.
declare i37 @llvm.fshr.i37(i37, i37, i37)
define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-LABEL: fshr_i37:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: ldr r0, [sp, #24]
; CHECK-NEXT: mov r5, r3
; CHECK-NEXT: ldr r1, [sp, #28]
; CHECK-NEXT: mov r7, r2
; CHECK-NEXT: mov r2, #37
; CHECK-NEXT: mov r3, #0
; CHECK-NEXT: bl __aeabi_uldivmod
; CHECK-NEXT: lsl r3, r5, #27
; CHECK-NEXT: add r0, r2, #27
; CHECK-NEXT: orr r3, r3, r7, lsr #5
; CHECK-NEXT: mov r1, #31
; CHECK-NEXT: ands r12, r0, #32
; CHECK-NEXT: mov r5, r6
; CHECK-NEXT: moveq r5, r3
; CHECK-NEXT: bic r1, r1, r0
; CHECK-NEXT: lsl r2, r5, #1
; CHECK-NEXT: lsleq r3, r7, #27
; CHECK-NEXT: cmp r12, #0
; CHECK-NEXT: and r7, r0, #31
; CHECK-NEXT: lsl r2, r2, r1
; CHECK-NEXT: moveq r4, r6
; CHECK-NEXT: orr r0, r2, r3, lsr r7
; CHECK-NEXT: lsl r2, r4, #1
; CHECK-NEXT: lsl r1, r2, r1
; CHECK-NEXT: orr r1, r1, r5, lsr r7
; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
; SCALAR-LABEL: fshr_i37:
; SCALAR: @ %bb.0:
; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, lr}
; SCALAR-NEXT: push {r4, r5, r6, r7, r8, lr}
; SCALAR-NEXT: mov r8, r0
; SCALAR-NEXT: ldr r0, [sp, #28]
; SCALAR-NEXT: mov r4, r1
; SCALAR-NEXT: mov r5, r3
; SCALAR-NEXT: and r1, r0, #31
; SCALAR-NEXT: ldr r0, [sp, #24]
; SCALAR-NEXT: mov r7, r2
; SCALAR-NEXT: mov r2, #37
; SCALAR-NEXT: mov r3, #0
; SCALAR-NEXT: bl __aeabi_uldivmod
; SCALAR-NEXT: lsl r3, r5, #27
; SCALAR-NEXT: add r0, r2, #27
; SCALAR-NEXT: orr r3, r3, r7, lsr #5
; SCALAR-NEXT: ands r2, r0, #32
; SCALAR-NEXT: mov r5, r8
; SCALAR-NEXT: mov r1, #31
; SCALAR-NEXT: moveq r5, r3
; SCALAR-NEXT: lsleq r3, r7, #27
; SCALAR-NEXT: cmp r2, #0
; SCALAR-NEXT: bic r1, r1, r0
; SCALAR-NEXT: moveq r4, r8
; SCALAR-NEXT: lsl r6, r5, #1
; SCALAR-NEXT: and r7, r0, #31
; SCALAR-NEXT: lsl r2, r4, #1
; SCALAR-NEXT: lsl r6, r6, r1
; SCALAR-NEXT: lsl r1, r2, r1
; SCALAR-NEXT: orr r0, r6, r3, lsr r7
; SCALAR-NEXT: orr r1, r1, r5, lsr r7
; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, pc}
;
; NEON-LABEL: fshr_i37:
; NEON: @ %bb.0:
; NEON-NEXT: .save {r4, r5, r6, r7, r8, lr}
; NEON-NEXT: push {r4, r5, r6, r7, r8, lr}
; NEON-NEXT: mov r4, r1
; NEON-NEXT: ldr r1, [sp, #28]
; NEON-NEXT: mov r8, r0
; NEON-NEXT: ldr r0, [sp, #24]
; NEON-NEXT: and r1, r1, #31
; NEON-NEXT: mov r5, r3
; NEON-NEXT: mov r7, r2
; NEON-NEXT: mov r2, #37
; NEON-NEXT: mov r3, #0
; NEON-NEXT: bl __aeabi_uldivmod
; NEON-NEXT: lsl r3, r5, #27
; NEON-NEXT: add r0, r2, #27
; NEON-NEXT: orr r3, r3, r7, lsr #5
; NEON-NEXT: ands r2, r0, #32
; NEON-NEXT: mov r5, r8
; NEON-NEXT: mov r1, #31
; NEON-NEXT: moveq r5, r3
; NEON-NEXT: lsleq r3, r7, #27
; NEON-NEXT: cmp r2, #0
; NEON-NEXT: bic r1, r1, r0
; NEON-NEXT: moveq r4, r8
; NEON-NEXT: lsl r6, r5, #1
; NEON-NEXT: and r7, r0, #31
; NEON-NEXT: lsl r2, r4, #1
; NEON-NEXT: lsl r6, r6, r1
; NEON-NEXT: lsl r1, r2, r1
; NEON-NEXT: orr r0, r6, r3, lsr r7
; NEON-NEXT: orr r1, r1, r5, lsr r7
; NEON-NEXT: pop {r4, r5, r6, r7, r8, pc}
%f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
ret i37 %f
}

View File

@ -66,7 +66,8 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-BE-NEXT: move $17, $6
; CHECK-BE-NEXT: move $18, $5
; CHECK-BE-NEXT: move $19, $4
; CHECK-BE-NEXT: lw $4, 56($sp)
; CHECK-BE-NEXT: lw $1, 56($sp)
; CHECK-BE-NEXT: andi $4, $1, 31
; CHECK-BE-NEXT: lw $5, 60($sp)
; CHECK-BE-NEXT: addiu $6, $zero, 0
; CHECK-BE-NEXT: jal __umoddi3
@ -117,8 +118,9 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-LE-NEXT: move $17, $6
; CHECK-LE-NEXT: move $18, $5
; CHECK-LE-NEXT: move $19, $4
; CHECK-LE-NEXT: lw $1, 60($sp)
; CHECK-LE-NEXT: andi $5, $1, 31
; CHECK-LE-NEXT: lw $4, 56($sp)
; CHECK-LE-NEXT: lw $5, 60($sp)
; CHECK-LE-NEXT: addiu $6, $zero, 37
; CHECK-LE-NEXT: jal __umoddi3
; CHECK-LE-NEXT: addiu $7, $zero, 0
@ -309,7 +311,8 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-BE-NEXT: move $17, $6
; CHECK-BE-NEXT: move $18, $5
; CHECK-BE-NEXT: move $19, $4
; CHECK-BE-NEXT: lw $4, 56($sp)
; CHECK-BE-NEXT: lw $1, 56($sp)
; CHECK-BE-NEXT: andi $4, $1, 31
; CHECK-BE-NEXT: lw $5, 60($sp)
; CHECK-BE-NEXT: addiu $6, $zero, 0
; CHECK-BE-NEXT: jal __umoddi3
@ -327,9 +330,9 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-BE-NEXT: andi $1, $1, 31
; CHECK-BE-NEXT: sll $6, $19, 1
; CHECK-BE-NEXT: sllv $6, $6, $1
; CHECK-BE-NEXT: sll $7, $16, 27
; CHECK-BE-NEXT: or $2, $6, $2
; CHECK-BE-NEXT: sll $6, $16, 27
; CHECK-BE-NEXT: movz $4, $6, $3
; CHECK-BE-NEXT: movz $4, $7, $3
; CHECK-BE-NEXT: srlv $3, $4, $5
; CHECK-BE-NEXT: sll $4, $18, 1
; CHECK-BE-NEXT: sllv $1, $4, $1
@ -360,8 +363,9 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK-LE-NEXT: move $17, $6
; CHECK-LE-NEXT: move $18, $5
; CHECK-LE-NEXT: move $19, $4
; CHECK-LE-NEXT: lw $1, 60($sp)
; CHECK-LE-NEXT: andi $5, $1, 31
; CHECK-LE-NEXT: lw $4, 56($sp)
; CHECK-LE-NEXT: lw $5, 60($sp)
; CHECK-LE-NEXT: addiu $6, $zero, 37
; CHECK-LE-NEXT: jal __umoddi3
; CHECK-LE-NEXT: addiu $7, $zero, 0

View File

@ -250,7 +250,7 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; CHECK32_32-NEXT: mr 29, 5
; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill
; CHECK32_32-NEXT: mr 30, 6
; CHECK32_32-NEXT: mr 3, 7
; CHECK32_32-NEXT: clrlwi 3, 7, 27
; CHECK32_32-NEXT: mr 4, 8
; CHECK32_32-NEXT: li 5, 0
; CHECK32_32-NEXT: li 6, 37
@ -299,7 +299,7 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; CHECK32_64-NEXT: .cfi_offset r30, -8
; CHECK32_64-NEXT: stw 27, 12(1) # 4-byte Folded Spill
; CHECK32_64-NEXT: mr 27, 3
; CHECK32_64-NEXT: mr 3, 7
; CHECK32_64-NEXT: clrlwi 3, 7, 27
; CHECK32_64-NEXT: stw 28, 16(1) # 4-byte Folded Spill
; CHECK32_64-NEXT: mr 28, 4
; CHECK32_64-NEXT: mr 4, 8
@ -353,12 +353,13 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
; CHECK64-LABEL: fshl_i37:
; CHECK64: # %bb.0:
; CHECK64-NEXT: lis 6, 28339
; CHECK64-NEXT: sldi 4, 4, 27
; CHECK64-NEXT: clrldi 7, 5, 27
; CHECK64-NEXT: ori 6, 6, 58451
; CHECK64-NEXT: sldi 4, 4, 27
; CHECK64-NEXT: rldic 6, 6, 33, 0
; CHECK64-NEXT: oris 6, 6, 3542
; CHECK64-NEXT: ori 6, 6, 31883
; CHECK64-NEXT: mulhdu 6, 5, 6
; CHECK64-NEXT: mulhdu 6, 7, 6
; CHECK64-NEXT: rldicl 6, 6, 59, 5
; CHECK64-NEXT: mulli 6, 6, 37
; CHECK64-NEXT: sub 5, 5, 6
@ -549,7 +550,7 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK32_32-NEXT: mr 29, 5
; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill
; CHECK32_32-NEXT: mr 30, 6
; CHECK32_32-NEXT: mr 3, 7
; CHECK32_32-NEXT: clrlwi 3, 7, 27
; CHECK32_32-NEXT: mr 4, 8
; CHECK32_32-NEXT: li 5, 0
; CHECK32_32-NEXT: li 6, 37
@ -599,7 +600,7 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK32_64-NEXT: .cfi_offset r30, -8
; CHECK32_64-NEXT: stw 27, 12(1) # 4-byte Folded Spill
; CHECK32_64-NEXT: mr 27, 3
; CHECK32_64-NEXT: mr 3, 7
; CHECK32_64-NEXT: clrlwi 3, 7, 27
; CHECK32_64-NEXT: stw 28, 16(1) # 4-byte Folded Spill
; CHECK32_64-NEXT: mr 28, 4
; CHECK32_64-NEXT: mr 4, 8
@ -649,12 +650,13 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
; CHECK64-LABEL: fshr_i37:
; CHECK64: # %bb.0:
; CHECK64-NEXT: lis 6, 28339
; CHECK64-NEXT: sldi 4, 4, 27
; CHECK64-NEXT: clrldi 7, 5, 27
; CHECK64-NEXT: ori 6, 6, 58451
; CHECK64-NEXT: sldi 4, 4, 27
; CHECK64-NEXT: rldic 6, 6, 33, 0
; CHECK64-NEXT: oris 6, 6, 3542
; CHECK64-NEXT: ori 6, 6, 31883
; CHECK64-NEXT: mulhdu 6, 5, 6
; CHECK64-NEXT: mulhdu 6, 7, 6
; CHECK64-NEXT: rldicl 6, 6, 59, 5
; CHECK64-NEXT: mulli 6, 6, 37
; CHECK64-NEXT: sub 5, 5, 6

View File

@ -140,13 +140,15 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind {
; X86-SSE2-NEXT: pushl %ebx
; X86-SSE2-NEXT: pushl %edi
; X86-SSE2-NEXT: pushl %esi
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: andl $31, %eax
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SSE2-NEXT: shldl $27, %ebx, %edi
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl $37
; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
; X86-SSE2-NEXT: pushl %eax
; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
; X86-SSE2-NEXT: calll __umoddi3
; X86-SSE2-NEXT: addl $16, %esp
@ -174,8 +176,9 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind {
; X64-AVX2-LABEL: fshl_i37:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: movq %rdx, %rcx
; X64-AVX2-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
; X64-AVX2-NEXT: andq %rdx, %rax
; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rdx # imm = 0xDD67C8A60DD67C8B
; X64-AVX2-NEXT: movq %rcx, %rax
; X64-AVX2-NEXT: mulq %rdx
; X64-AVX2-NEXT: shrq $5, %rdx
; X64-AVX2-NEXT: leal (%rdx,%rdx,8), %eax
@ -304,13 +307,15 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind {
; X86-SSE2-NEXT: pushl %ebx
; X86-SSE2-NEXT: pushl %edi
; X86-SSE2-NEXT: pushl %esi
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: andl $31, %eax
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE2-NEXT: shldl $27, %ebx, %esi
; X86-SSE2-NEXT: pushl $0
; X86-SSE2-NEXT: pushl $37
; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
; X86-SSE2-NEXT: pushl %eax
; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
; X86-SSE2-NEXT: calll __umoddi3
; X86-SSE2-NEXT: addl $16, %esp
@ -339,8 +344,9 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind {
; X64-AVX2-LABEL: fshr_i37:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: movq %rdx, %rcx
; X64-AVX2-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
; X64-AVX2-NEXT: andq %rdx, %rax
; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rdx # imm = 0xDD67C8A60DD67C8B
; X64-AVX2-NEXT: movq %rcx, %rax
; X64-AVX2-NEXT: mulq %rdx
; X64-AVX2-NEXT: shrq $5, %rdx
; X64-AVX2-NEXT: leal (%rdx,%rdx,8), %eax