mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-15 12:09:51 +00:00
[SelectionDAG] Clear promoted bits before UREM on shift amount in PromoteIntRes_FunnelShift.
Otherwise we have garbage in the upper bits that can affect the results of the UREM. Fixes PR55296. Differential Revision: https://reviews.llvm.org/D125076
This commit is contained in:
parent
324d696c15
commit
76f90a9d71
@ -1277,7 +1277,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
|
||||
SDValue Hi = GetPromotedInteger(N->getOperand(0));
|
||||
SDValue Lo = GetPromotedInteger(N->getOperand(1));
|
||||
SDValue Amt = GetPromotedInteger(N->getOperand(2));
|
||||
SDValue Amt = ZExtPromotedInteger(N->getOperand(2));
|
||||
|
||||
SDLoc DL(N);
|
||||
EVT OldVT = N->getOperand(0).getValueType();
|
||||
|
@ -69,13 +69,14 @@ declare i37 @llvm.fshl.i37(i37, i37, i37)
|
||||
define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
|
||||
; CHECK-LABEL: fshl_i37:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov x8, #31883
|
||||
; CHECK-NEXT: mov w9, #37
|
||||
; CHECK-NEXT: movk x8, #3542, lsl #16
|
||||
; CHECK-NEXT: mov x9, #31883
|
||||
; CHECK-NEXT: and x8, x2, #0x1fffffffff
|
||||
; CHECK-NEXT: movk x9, #3542, lsl #16
|
||||
; CHECK-NEXT: ubfiz x10, x1, #26, #37
|
||||
; CHECK-NEXT: movk x8, #51366, lsl #32
|
||||
; CHECK-NEXT: movk x8, #56679, lsl #48
|
||||
; CHECK-NEXT: umulh x8, x2, x8
|
||||
; CHECK-NEXT: movk x9, #51366, lsl #32
|
||||
; CHECK-NEXT: movk x9, #56679, lsl #48
|
||||
; CHECK-NEXT: umulh x8, x8, x9
|
||||
; CHECK-NEXT: mov w9, #37
|
||||
; CHECK-NEXT: ubfx x8, x8, #5, #27
|
||||
; CHECK-NEXT: msub w8, w8, w9, w2
|
||||
; CHECK-NEXT: mvn w9, w8
|
||||
@ -206,14 +207,15 @@ declare i37 @llvm.fshr.i37(i37, i37, i37)
|
||||
define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
|
||||
; CHECK-LABEL: fshr_i37:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov x8, #31883
|
||||
; CHECK-NEXT: mov w9, #37
|
||||
; CHECK-NEXT: movk x8, #3542, lsl #16
|
||||
; CHECK-NEXT: mov x9, #31883
|
||||
; CHECK-NEXT: and x8, x2, #0x1fffffffff
|
||||
; CHECK-NEXT: movk x9, #3542, lsl #16
|
||||
; CHECK-NEXT: lsl x10, x1, #27
|
||||
; CHECK-NEXT: movk x8, #51366, lsl #32
|
||||
; CHECK-NEXT: movk x9, #51366, lsl #32
|
||||
; CHECK-NEXT: lsl x11, x0, #1
|
||||
; CHECK-NEXT: movk x8, #56679, lsl #48
|
||||
; CHECK-NEXT: umulh x8, x2, x8
|
||||
; CHECK-NEXT: movk x9, #56679, lsl #48
|
||||
; CHECK-NEXT: umulh x8, x8, x9
|
||||
; CHECK-NEXT: mov w9, #37
|
||||
; CHECK-NEXT: lsr x8, x8, #5
|
||||
; CHECK-NEXT: msub w8, w8, w9, w2
|
||||
; CHECK-NEXT: add w8, w8, #27
|
||||
|
@ -1101,11 +1101,12 @@ define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) {
|
||||
; SI-LABEL: v_fshr_i24:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SI-NEXT: v_and_b32_e32 v3, 0xffffff, v2
|
||||
; SI-NEXT: s_mov_b32 s4, 0xaaaaaaab
|
||||
; SI-NEXT: v_mul_hi_u32 v3, v2, s4
|
||||
; SI-NEXT: v_mul_hi_u32 v3, v3, s4
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
||||
; SI-NEXT: v_lshrrev_b32_e32 v3, 4, v3
|
||||
; SI-NEXT: v_mul_lo_u32 v3, v3, 24
|
||||
; SI-NEXT: v_mul_u32_u24_e32 v3, 24, v3
|
||||
; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
|
||||
; SI-NEXT: v_add_i32_e32 v2, vcc, 8, v2
|
||||
; SI-NEXT: v_alignbit_b32 v0, v0, v1, v2
|
||||
@ -1114,11 +1115,12 @@ define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) {
|
||||
; VI-LABEL: v_fshr_i24:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; VI-NEXT: v_and_b32_e32 v3, 0xffffff, v2
|
||||
; VI-NEXT: s_mov_b32 s4, 0xaaaaaaab
|
||||
; VI-NEXT: v_mul_hi_u32 v3, v2, s4
|
||||
; VI-NEXT: v_mul_hi_u32 v3, v3, s4
|
||||
; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v3, 4, v3
|
||||
; VI-NEXT: v_mul_lo_u32 v3, v3, 24
|
||||
; VI-NEXT: v_mul_u32_u24_e32 v3, 24, v3
|
||||
; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
|
||||
; VI-NEXT: v_add_u32_e32 v2, vcc, 8, v2
|
||||
; VI-NEXT: v_alignbit_b32 v0, v0, v1, v2
|
||||
@ -1127,11 +1129,12 @@ define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) {
|
||||
; GFX9-LABEL: v_fshr_i24:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_and_b32_e32 v3, 0xffffff, v2
|
||||
; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab
|
||||
; GFX9-NEXT: v_mul_hi_u32 v3, v2, s4
|
||||
; GFX9-NEXT: v_mul_hi_u32 v3, v3, s4
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
||||
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 4, v3
|
||||
; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24
|
||||
; GFX9-NEXT: v_mul_u32_u24_e32 v3, 24, v3
|
||||
; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
|
||||
; GFX9-NEXT: v_add_u32_e32 v2, 8, v2
|
||||
; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, v2
|
||||
@ -1146,10 +1149,11 @@ define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) {
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: v_mul_hi_u32 v3, 0xaaaaaaab, v2
|
||||
; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v2
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 8, v1
|
||||
; GFX10-NEXT: v_mul_hi_u32 v3, 0xaaaaaaab, v3
|
||||
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 4, v3
|
||||
; GFX10-NEXT: v_mul_lo_u32 v3, v3, 24
|
||||
; GFX10-NEXT: v_mul_u32_u24_e32 v3, 24, v3
|
||||
; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
|
||||
; GFX10-NEXT: v_add_nc_u32_e32 v2, 8, v2
|
||||
; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, v2
|
||||
@ -1162,19 +1166,22 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2
|
||||
; SI-LABEL: v_fshr_v2i24:
|
||||
; SI: ; %bb.0:
|
||||
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SI-NEXT: s_mov_b32 s4, 0xaaaaaaab
|
||||
; SI-NEXT: v_mul_hi_u32 v6, v4, s4
|
||||
; SI-NEXT: v_mul_hi_u32 v7, v5, s4
|
||||
; SI-NEXT: s_mov_b32 s4, 0xffffff
|
||||
; SI-NEXT: v_and_b32_e32 v6, s4, v4
|
||||
; SI-NEXT: s_mov_b32 s5, 0xaaaaaaab
|
||||
; SI-NEXT: v_mul_hi_u32 v6, v6, s5
|
||||
; SI-NEXT: v_and_b32_e32 v7, s4, v5
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v2
|
||||
; SI-NEXT: v_lshrrev_b32_e32 v6, 4, v6
|
||||
; SI-NEXT: v_mul_lo_u32 v6, v6, 24
|
||||
; SI-NEXT: v_mul_u32_u24_e32 v6, 24, v6
|
||||
; SI-NEXT: v_sub_i32_e32 v4, vcc, v4, v6
|
||||
; SI-NEXT: v_lshrrev_b32_e32 v6, 4, v7
|
||||
; SI-NEXT: v_mul_lo_u32 v6, v6, 24
|
||||
; SI-NEXT: v_mul_hi_u32 v6, v7, s5
|
||||
; SI-NEXT: v_add_i32_e32 v4, vcc, 8, v4
|
||||
; SI-NEXT: v_alignbit_b32 v0, v0, v2, v4
|
||||
; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v3
|
||||
; SI-NEXT: v_sub_i32_e32 v3, vcc, v5, v6
|
||||
; SI-NEXT: v_lshrrev_b32_e32 v3, 4, v6
|
||||
; SI-NEXT: v_mul_u32_u24_e32 v3, 24, v3
|
||||
; SI-NEXT: v_sub_i32_e32 v3, vcc, v5, v3
|
||||
; SI-NEXT: v_add_i32_e32 v3, vcc, 8, v3
|
||||
; SI-NEXT: v_alignbit_b32 v1, v1, v2, v3
|
||||
; SI-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1182,19 +1189,22 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2
|
||||
; VI-LABEL: v_fshr_v2i24:
|
||||
; VI: ; %bb.0:
|
||||
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; VI-NEXT: s_mov_b32 s4, 0xaaaaaaab
|
||||
; VI-NEXT: v_mul_hi_u32 v6, v4, s4
|
||||
; VI-NEXT: v_mul_hi_u32 v7, v5, s4
|
||||
; VI-NEXT: s_mov_b32 s4, 0xffffff
|
||||
; VI-NEXT: v_and_b32_e32 v6, s4, v4
|
||||
; VI-NEXT: s_mov_b32 s5, 0xaaaaaaab
|
||||
; VI-NEXT: v_mul_hi_u32 v6, v6, s5
|
||||
; VI-NEXT: v_and_b32_e32 v7, s4, v5
|
||||
; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v2
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v6, 4, v6
|
||||
; VI-NEXT: v_mul_lo_u32 v6, v6, 24
|
||||
; VI-NEXT: v_mul_u32_u24_e32 v6, 24, v6
|
||||
; VI-NEXT: v_sub_u32_e32 v4, vcc, v4, v6
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v6, 4, v7
|
||||
; VI-NEXT: v_mul_lo_u32 v6, v6, 24
|
||||
; VI-NEXT: v_mul_hi_u32 v6, v7, s5
|
||||
; VI-NEXT: v_add_u32_e32 v4, vcc, 8, v4
|
||||
; VI-NEXT: v_alignbit_b32 v0, v0, v2, v4
|
||||
; VI-NEXT: v_lshlrev_b32_e32 v2, 8, v3
|
||||
; VI-NEXT: v_sub_u32_e32 v3, vcc, v5, v6
|
||||
; VI-NEXT: v_lshrrev_b32_e32 v3, 4, v6
|
||||
; VI-NEXT: v_mul_u32_u24_e32 v3, 24, v3
|
||||
; VI-NEXT: v_sub_u32_e32 v3, vcc, v5, v3
|
||||
; VI-NEXT: v_add_u32_e32 v3, vcc, 8, v3
|
||||
; VI-NEXT: v_alignbit_b32 v1, v1, v2, v3
|
||||
; VI-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1202,19 +1212,22 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2
|
||||
; GFX9-LABEL: v_fshr_v2i24:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab
|
||||
; GFX9-NEXT: v_mul_hi_u32 v6, v4, s4
|
||||
; GFX9-NEXT: v_mul_hi_u32 v7, v5, s4
|
||||
; GFX9-NEXT: s_mov_b32 s4, 0xffffff
|
||||
; GFX9-NEXT: v_and_b32_e32 v6, s4, v4
|
||||
; GFX9-NEXT: s_mov_b32 s5, 0xaaaaaaab
|
||||
; GFX9-NEXT: v_mul_hi_u32 v6, v6, s5
|
||||
; GFX9-NEXT: v_and_b32_e32 v7, s4, v5
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v2
|
||||
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v6
|
||||
; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24
|
||||
; GFX9-NEXT: v_mul_u32_u24_e32 v6, 24, v6
|
||||
; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6
|
||||
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v7
|
||||
; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24
|
||||
; GFX9-NEXT: v_mul_hi_u32 v6, v7, s5
|
||||
; GFX9-NEXT: v_add_u32_e32 v4, 8, v4
|
||||
; GFX9-NEXT: v_alignbit_b32 v0, v0, v2, v4
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 8, v3
|
||||
; GFX9-NEXT: v_sub_u32_e32 v3, v5, v6
|
||||
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 4, v6
|
||||
; GFX9-NEXT: v_mul_u32_u24_e32 v3, 24, v3
|
||||
; GFX9-NEXT: v_sub_u32_e32 v3, v5, v3
|
||||
; GFX9-NEXT: v_add_u32_e32 v3, 8, v3
|
||||
; GFX9-NEXT: v_alignbit_b32 v1, v1, v2, v3
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
@ -1228,15 +1241,18 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: s_mov_b32 s4, 0xaaaaaaab
|
||||
; GFX10-NEXT: s_mov_b32 s4, 0xffffff
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 8, v2
|
||||
; GFX10-NEXT: v_mul_hi_u32 v6, v4, s4
|
||||
; GFX10-NEXT: v_mul_hi_u32 v7, v5, s4
|
||||
; GFX10-NEXT: v_and_b32_e32 v6, s4, v4
|
||||
; GFX10-NEXT: v_and_b32_e32 v7, s4, v5
|
||||
; GFX10-NEXT: s_mov_b32 s4, 0xaaaaaaab
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 8, v3
|
||||
; GFX10-NEXT: v_mul_hi_u32 v6, v6, s4
|
||||
; GFX10-NEXT: v_mul_hi_u32 v7, v7, s4
|
||||
; GFX10-NEXT: v_lshrrev_b32_e32 v6, 4, v6
|
||||
; GFX10-NEXT: v_lshrrev_b32_e32 v7, 4, v7
|
||||
; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24
|
||||
; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24
|
||||
; GFX10-NEXT: v_mul_u32_u24_e32 v6, 24, v6
|
||||
; GFX10-NEXT: v_mul_u32_u24_e32 v7, 24, v7
|
||||
; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v6
|
||||
; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v7
|
||||
; GFX10-NEXT: v_add_nc_u32_e32 v4, 8, v4
|
||||
|
@ -47,64 +47,66 @@ declare i37 @llvm.fshl.i37(i37, i37, i37)
|
||||
define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
|
||||
; SCALAR-LABEL: fshl_i37:
|
||||
; SCALAR: @ %bb.0:
|
||||
; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, lr}
|
||||
; SCALAR-NEXT: push {r4, r5, r6, r7, r8, lr}
|
||||
; SCALAR-NEXT: mov r4, r1
|
||||
; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
|
||||
; SCALAR-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
|
||||
; SCALAR-NEXT: mov r8, r0
|
||||
; SCALAR-NEXT: ldr r0, [sp, #24]
|
||||
; SCALAR-NEXT: mov r5, r3
|
||||
; SCALAR-NEXT: ldr r1, [sp, #28]
|
||||
; SCALAR-NEXT: mov r6, r2
|
||||
; SCALAR-NEXT: ldr r0, [sp, #36]
|
||||
; SCALAR-NEXT: mov r4, r1
|
||||
; SCALAR-NEXT: mov r6, r3
|
||||
; SCALAR-NEXT: and r1, r0, #31
|
||||
; SCALAR-NEXT: ldr r0, [sp, #32]
|
||||
; SCALAR-NEXT: mov r9, r2
|
||||
; SCALAR-NEXT: mov r2, #37
|
||||
; SCALAR-NEXT: mov r3, #0
|
||||
; SCALAR-NEXT: bl __aeabi_uldivmod
|
||||
; SCALAR-NEXT: lsl r1, r5, #27
|
||||
; SCALAR-NEXT: ands r12, r2, #32
|
||||
; SCALAR-NEXT: orr r1, r1, r6, lsr #5
|
||||
; SCALAR-NEXT: lsl r1, r6, #27
|
||||
; SCALAR-NEXT: ands r0, r2, #32
|
||||
; SCALAR-NEXT: orr r1, r1, r9, lsr #5
|
||||
; SCALAR-NEXT: mov r3, r8
|
||||
; SCALAR-NEXT: and r5, r2, #31
|
||||
; SCALAR-NEXT: mov r0, #31
|
||||
; SCALAR-NEXT: and r6, r2, #31
|
||||
; SCALAR-NEXT: mov r7, #31
|
||||
; SCALAR-NEXT: movne r3, r1
|
||||
; SCALAR-NEXT: cmp r12, #0
|
||||
; SCALAR-NEXT: bic r2, r0, r2
|
||||
; SCALAR-NEXT: lslne r1, r6, #27
|
||||
; SCALAR-NEXT: cmp r0, #0
|
||||
; SCALAR-NEXT: lslne r1, r9, #27
|
||||
; SCALAR-NEXT: bic r2, r7, r2
|
||||
; SCALAR-NEXT: movne r4, r8
|
||||
; SCALAR-NEXT: lsl r7, r3, r5
|
||||
; SCALAR-NEXT: lsl r5, r3, r6
|
||||
; SCALAR-NEXT: lsr r0, r1, #1
|
||||
; SCALAR-NEXT: lsl r1, r4, r5
|
||||
; SCALAR-NEXT: lsl r1, r4, r6
|
||||
; SCALAR-NEXT: lsr r3, r3, #1
|
||||
; SCALAR-NEXT: orr r0, r7, r0, lsr r2
|
||||
; SCALAR-NEXT: orr r0, r5, r0, lsr r2
|
||||
; SCALAR-NEXT: orr r1, r1, r3, lsr r2
|
||||
; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, pc}
|
||||
; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
|
||||
;
|
||||
; NEON-LABEL: fshl_i37:
|
||||
; NEON: @ %bb.0:
|
||||
; NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
|
||||
; NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
|
||||
; NEON-NEXT: mov r4, r1
|
||||
; NEON-NEXT: mov r5, r0
|
||||
; NEON-NEXT: ldr r0, [sp, #24]
|
||||
; NEON-NEXT: mov r7, r3
|
||||
; NEON-NEXT: ldr r1, [sp, #28]
|
||||
; NEON-NEXT: mov r6, r2
|
||||
; NEON-NEXT: mov r6, r0
|
||||
; NEON-NEXT: ldr r0, [sp, #24]
|
||||
; NEON-NEXT: and r1, r1, #31
|
||||
; NEON-NEXT: mov r5, r3
|
||||
; NEON-NEXT: mov r7, r2
|
||||
; NEON-NEXT: mov r2, #37
|
||||
; NEON-NEXT: mov r3, #0
|
||||
; NEON-NEXT: bl __aeabi_uldivmod
|
||||
; NEON-NEXT: mov r0, #31
|
||||
; NEON-NEXT: bic r1, r0, r2
|
||||
; NEON-NEXT: lsl r0, r7, #27
|
||||
; NEON-NEXT: lsl r0, r5, #27
|
||||
; NEON-NEXT: ands r12, r2, #32
|
||||
; NEON-NEXT: orr r0, r0, r6, lsr #5
|
||||
; NEON-NEXT: mov r7, r5
|
||||
; NEON-NEXT: orr r0, r0, r7, lsr #5
|
||||
; NEON-NEXT: mov r5, r6
|
||||
; NEON-NEXT: and r2, r2, #31
|
||||
; NEON-NEXT: movne r7, r0
|
||||
; NEON-NEXT: lslne r0, r6, #27
|
||||
; NEON-NEXT: movne r5, r0
|
||||
; NEON-NEXT: lslne r0, r7, #27
|
||||
; NEON-NEXT: cmp r12, #0
|
||||
; NEON-NEXT: lsl r3, r7, r2
|
||||
; NEON-NEXT: lsl r3, r5, r2
|
||||
; NEON-NEXT: lsr r0, r0, #1
|
||||
; NEON-NEXT: movne r4, r5
|
||||
; NEON-NEXT: movne r4, r6
|
||||
; NEON-NEXT: orr r0, r3, r0, lsr r1
|
||||
; NEON-NEXT: lsr r3, r7, #1
|
||||
; NEON-NEXT: lsr r3, r5, #1
|
||||
; NEON-NEXT: lsl r2, r4, r2
|
||||
; NEON-NEXT: orr r1, r2, r3, lsr r1
|
||||
; NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
|
||||
@ -233,38 +235,73 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
|
||||
; Verify that weird types are minimally supported.
|
||||
declare i37 @llvm.fshr.i37(i37, i37, i37)
|
||||
define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
|
||||
; CHECK-LABEL: fshr_i37:
|
||||
; CHECK: @ %bb.0:
|
||||
; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
|
||||
; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
|
||||
; CHECK-NEXT: mov r4, r1
|
||||
; CHECK-NEXT: mov r6, r0
|
||||
; CHECK-NEXT: ldr r0, [sp, #24]
|
||||
; CHECK-NEXT: mov r5, r3
|
||||
; CHECK-NEXT: ldr r1, [sp, #28]
|
||||
; CHECK-NEXT: mov r7, r2
|
||||
; CHECK-NEXT: mov r2, #37
|
||||
; CHECK-NEXT: mov r3, #0
|
||||
; CHECK-NEXT: bl __aeabi_uldivmod
|
||||
; CHECK-NEXT: lsl r3, r5, #27
|
||||
; CHECK-NEXT: add r0, r2, #27
|
||||
; CHECK-NEXT: orr r3, r3, r7, lsr #5
|
||||
; CHECK-NEXT: mov r1, #31
|
||||
; CHECK-NEXT: ands r12, r0, #32
|
||||
; CHECK-NEXT: mov r5, r6
|
||||
; CHECK-NEXT: moveq r5, r3
|
||||
; CHECK-NEXT: bic r1, r1, r0
|
||||
; CHECK-NEXT: lsl r2, r5, #1
|
||||
; CHECK-NEXT: lsleq r3, r7, #27
|
||||
; CHECK-NEXT: cmp r12, #0
|
||||
; CHECK-NEXT: and r7, r0, #31
|
||||
; CHECK-NEXT: lsl r2, r2, r1
|
||||
; CHECK-NEXT: moveq r4, r6
|
||||
; CHECK-NEXT: orr r0, r2, r3, lsr r7
|
||||
; CHECK-NEXT: lsl r2, r4, #1
|
||||
; CHECK-NEXT: lsl r1, r2, r1
|
||||
; CHECK-NEXT: orr r1, r1, r5, lsr r7
|
||||
; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
|
||||
; SCALAR-LABEL: fshr_i37:
|
||||
; SCALAR: @ %bb.0:
|
||||
; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, lr}
|
||||
; SCALAR-NEXT: push {r4, r5, r6, r7, r8, lr}
|
||||
; SCALAR-NEXT: mov r8, r0
|
||||
; SCALAR-NEXT: ldr r0, [sp, #28]
|
||||
; SCALAR-NEXT: mov r4, r1
|
||||
; SCALAR-NEXT: mov r5, r3
|
||||
; SCALAR-NEXT: and r1, r0, #31
|
||||
; SCALAR-NEXT: ldr r0, [sp, #24]
|
||||
; SCALAR-NEXT: mov r7, r2
|
||||
; SCALAR-NEXT: mov r2, #37
|
||||
; SCALAR-NEXT: mov r3, #0
|
||||
; SCALAR-NEXT: bl __aeabi_uldivmod
|
||||
; SCALAR-NEXT: lsl r3, r5, #27
|
||||
; SCALAR-NEXT: add r0, r2, #27
|
||||
; SCALAR-NEXT: orr r3, r3, r7, lsr #5
|
||||
; SCALAR-NEXT: ands r2, r0, #32
|
||||
; SCALAR-NEXT: mov r5, r8
|
||||
; SCALAR-NEXT: mov r1, #31
|
||||
; SCALAR-NEXT: moveq r5, r3
|
||||
; SCALAR-NEXT: lsleq r3, r7, #27
|
||||
; SCALAR-NEXT: cmp r2, #0
|
||||
; SCALAR-NEXT: bic r1, r1, r0
|
||||
; SCALAR-NEXT: moveq r4, r8
|
||||
; SCALAR-NEXT: lsl r6, r5, #1
|
||||
; SCALAR-NEXT: and r7, r0, #31
|
||||
; SCALAR-NEXT: lsl r2, r4, #1
|
||||
; SCALAR-NEXT: lsl r6, r6, r1
|
||||
; SCALAR-NEXT: lsl r1, r2, r1
|
||||
; SCALAR-NEXT: orr r0, r6, r3, lsr r7
|
||||
; SCALAR-NEXT: orr r1, r1, r5, lsr r7
|
||||
; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, pc}
|
||||
;
|
||||
; NEON-LABEL: fshr_i37:
|
||||
; NEON: @ %bb.0:
|
||||
; NEON-NEXT: .save {r4, r5, r6, r7, r8, lr}
|
||||
; NEON-NEXT: push {r4, r5, r6, r7, r8, lr}
|
||||
; NEON-NEXT: mov r4, r1
|
||||
; NEON-NEXT: ldr r1, [sp, #28]
|
||||
; NEON-NEXT: mov r8, r0
|
||||
; NEON-NEXT: ldr r0, [sp, #24]
|
||||
; NEON-NEXT: and r1, r1, #31
|
||||
; NEON-NEXT: mov r5, r3
|
||||
; NEON-NEXT: mov r7, r2
|
||||
; NEON-NEXT: mov r2, #37
|
||||
; NEON-NEXT: mov r3, #0
|
||||
; NEON-NEXT: bl __aeabi_uldivmod
|
||||
; NEON-NEXT: lsl r3, r5, #27
|
||||
; NEON-NEXT: add r0, r2, #27
|
||||
; NEON-NEXT: orr r3, r3, r7, lsr #5
|
||||
; NEON-NEXT: ands r2, r0, #32
|
||||
; NEON-NEXT: mov r5, r8
|
||||
; NEON-NEXT: mov r1, #31
|
||||
; NEON-NEXT: moveq r5, r3
|
||||
; NEON-NEXT: lsleq r3, r7, #27
|
||||
; NEON-NEXT: cmp r2, #0
|
||||
; NEON-NEXT: bic r1, r1, r0
|
||||
; NEON-NEXT: moveq r4, r8
|
||||
; NEON-NEXT: lsl r6, r5, #1
|
||||
; NEON-NEXT: and r7, r0, #31
|
||||
; NEON-NEXT: lsl r2, r4, #1
|
||||
; NEON-NEXT: lsl r6, r6, r1
|
||||
; NEON-NEXT: lsl r1, r2, r1
|
||||
; NEON-NEXT: orr r0, r6, r3, lsr r7
|
||||
; NEON-NEXT: orr r1, r1, r5, lsr r7
|
||||
; NEON-NEXT: pop {r4, r5, r6, r7, r8, pc}
|
||||
%f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
|
||||
ret i37 %f
|
||||
}
|
||||
|
@ -66,7 +66,8 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
|
||||
; CHECK-BE-NEXT: move $17, $6
|
||||
; CHECK-BE-NEXT: move $18, $5
|
||||
; CHECK-BE-NEXT: move $19, $4
|
||||
; CHECK-BE-NEXT: lw $4, 56($sp)
|
||||
; CHECK-BE-NEXT: lw $1, 56($sp)
|
||||
; CHECK-BE-NEXT: andi $4, $1, 31
|
||||
; CHECK-BE-NEXT: lw $5, 60($sp)
|
||||
; CHECK-BE-NEXT: addiu $6, $zero, 0
|
||||
; CHECK-BE-NEXT: jal __umoddi3
|
||||
@ -117,8 +118,9 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
|
||||
; CHECK-LE-NEXT: move $17, $6
|
||||
; CHECK-LE-NEXT: move $18, $5
|
||||
; CHECK-LE-NEXT: move $19, $4
|
||||
; CHECK-LE-NEXT: lw $1, 60($sp)
|
||||
; CHECK-LE-NEXT: andi $5, $1, 31
|
||||
; CHECK-LE-NEXT: lw $4, 56($sp)
|
||||
; CHECK-LE-NEXT: lw $5, 60($sp)
|
||||
; CHECK-LE-NEXT: addiu $6, $zero, 37
|
||||
; CHECK-LE-NEXT: jal __umoddi3
|
||||
; CHECK-LE-NEXT: addiu $7, $zero, 0
|
||||
@ -309,7 +311,8 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
|
||||
; CHECK-BE-NEXT: move $17, $6
|
||||
; CHECK-BE-NEXT: move $18, $5
|
||||
; CHECK-BE-NEXT: move $19, $4
|
||||
; CHECK-BE-NEXT: lw $4, 56($sp)
|
||||
; CHECK-BE-NEXT: lw $1, 56($sp)
|
||||
; CHECK-BE-NEXT: andi $4, $1, 31
|
||||
; CHECK-BE-NEXT: lw $5, 60($sp)
|
||||
; CHECK-BE-NEXT: addiu $6, $zero, 0
|
||||
; CHECK-BE-NEXT: jal __umoddi3
|
||||
@ -327,9 +330,9 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
|
||||
; CHECK-BE-NEXT: andi $1, $1, 31
|
||||
; CHECK-BE-NEXT: sll $6, $19, 1
|
||||
; CHECK-BE-NEXT: sllv $6, $6, $1
|
||||
; CHECK-BE-NEXT: sll $7, $16, 27
|
||||
; CHECK-BE-NEXT: or $2, $6, $2
|
||||
; CHECK-BE-NEXT: sll $6, $16, 27
|
||||
; CHECK-BE-NEXT: movz $4, $6, $3
|
||||
; CHECK-BE-NEXT: movz $4, $7, $3
|
||||
; CHECK-BE-NEXT: srlv $3, $4, $5
|
||||
; CHECK-BE-NEXT: sll $4, $18, 1
|
||||
; CHECK-BE-NEXT: sllv $1, $4, $1
|
||||
@ -360,8 +363,9 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
|
||||
; CHECK-LE-NEXT: move $17, $6
|
||||
; CHECK-LE-NEXT: move $18, $5
|
||||
; CHECK-LE-NEXT: move $19, $4
|
||||
; CHECK-LE-NEXT: lw $1, 60($sp)
|
||||
; CHECK-LE-NEXT: andi $5, $1, 31
|
||||
; CHECK-LE-NEXT: lw $4, 56($sp)
|
||||
; CHECK-LE-NEXT: lw $5, 60($sp)
|
||||
; CHECK-LE-NEXT: addiu $6, $zero, 37
|
||||
; CHECK-LE-NEXT: jal __umoddi3
|
||||
; CHECK-LE-NEXT: addiu $7, $zero, 0
|
||||
|
@ -250,7 +250,7 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
|
||||
; CHECK32_32-NEXT: mr 29, 5
|
||||
; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill
|
||||
; CHECK32_32-NEXT: mr 30, 6
|
||||
; CHECK32_32-NEXT: mr 3, 7
|
||||
; CHECK32_32-NEXT: clrlwi 3, 7, 27
|
||||
; CHECK32_32-NEXT: mr 4, 8
|
||||
; CHECK32_32-NEXT: li 5, 0
|
||||
; CHECK32_32-NEXT: li 6, 37
|
||||
@ -299,7 +299,7 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
|
||||
; CHECK32_64-NEXT: .cfi_offset r30, -8
|
||||
; CHECK32_64-NEXT: stw 27, 12(1) # 4-byte Folded Spill
|
||||
; CHECK32_64-NEXT: mr 27, 3
|
||||
; CHECK32_64-NEXT: mr 3, 7
|
||||
; CHECK32_64-NEXT: clrlwi 3, 7, 27
|
||||
; CHECK32_64-NEXT: stw 28, 16(1) # 4-byte Folded Spill
|
||||
; CHECK32_64-NEXT: mr 28, 4
|
||||
; CHECK32_64-NEXT: mr 4, 8
|
||||
@ -353,12 +353,13 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
|
||||
; CHECK64-LABEL: fshl_i37:
|
||||
; CHECK64: # %bb.0:
|
||||
; CHECK64-NEXT: lis 6, 28339
|
||||
; CHECK64-NEXT: sldi 4, 4, 27
|
||||
; CHECK64-NEXT: clrldi 7, 5, 27
|
||||
; CHECK64-NEXT: ori 6, 6, 58451
|
||||
; CHECK64-NEXT: sldi 4, 4, 27
|
||||
; CHECK64-NEXT: rldic 6, 6, 33, 0
|
||||
; CHECK64-NEXT: oris 6, 6, 3542
|
||||
; CHECK64-NEXT: ori 6, 6, 31883
|
||||
; CHECK64-NEXT: mulhdu 6, 5, 6
|
||||
; CHECK64-NEXT: mulhdu 6, 7, 6
|
||||
; CHECK64-NEXT: rldicl 6, 6, 59, 5
|
||||
; CHECK64-NEXT: mulli 6, 6, 37
|
||||
; CHECK64-NEXT: sub 5, 5, 6
|
||||
@ -549,7 +550,7 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
|
||||
; CHECK32_32-NEXT: mr 29, 5
|
||||
; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill
|
||||
; CHECK32_32-NEXT: mr 30, 6
|
||||
; CHECK32_32-NEXT: mr 3, 7
|
||||
; CHECK32_32-NEXT: clrlwi 3, 7, 27
|
||||
; CHECK32_32-NEXT: mr 4, 8
|
||||
; CHECK32_32-NEXT: li 5, 0
|
||||
; CHECK32_32-NEXT: li 6, 37
|
||||
@ -599,7 +600,7 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
|
||||
; CHECK32_64-NEXT: .cfi_offset r30, -8
|
||||
; CHECK32_64-NEXT: stw 27, 12(1) # 4-byte Folded Spill
|
||||
; CHECK32_64-NEXT: mr 27, 3
|
||||
; CHECK32_64-NEXT: mr 3, 7
|
||||
; CHECK32_64-NEXT: clrlwi 3, 7, 27
|
||||
; CHECK32_64-NEXT: stw 28, 16(1) # 4-byte Folded Spill
|
||||
; CHECK32_64-NEXT: mr 28, 4
|
||||
; CHECK32_64-NEXT: mr 4, 8
|
||||
@ -649,12 +650,13 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
|
||||
; CHECK64-LABEL: fshr_i37:
|
||||
; CHECK64: # %bb.0:
|
||||
; CHECK64-NEXT: lis 6, 28339
|
||||
; CHECK64-NEXT: sldi 4, 4, 27
|
||||
; CHECK64-NEXT: clrldi 7, 5, 27
|
||||
; CHECK64-NEXT: ori 6, 6, 58451
|
||||
; CHECK64-NEXT: sldi 4, 4, 27
|
||||
; CHECK64-NEXT: rldic 6, 6, 33, 0
|
||||
; CHECK64-NEXT: oris 6, 6, 3542
|
||||
; CHECK64-NEXT: ori 6, 6, 31883
|
||||
; CHECK64-NEXT: mulhdu 6, 5, 6
|
||||
; CHECK64-NEXT: mulhdu 6, 7, 6
|
||||
; CHECK64-NEXT: rldicl 6, 6, 59, 5
|
||||
; CHECK64-NEXT: mulli 6, 6, 37
|
||||
; CHECK64-NEXT: sub 5, 5, 6
|
||||
|
@ -140,13 +140,15 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind {
|
||||
; X86-SSE2-NEXT: pushl %ebx
|
||||
; X86-SSE2-NEXT: pushl %edi
|
||||
; X86-SSE2-NEXT: pushl %esi
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: andl $31, %eax
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-SSE2-NEXT: shldl $27, %ebx, %edi
|
||||
; X86-SSE2-NEXT: pushl $0
|
||||
; X86-SSE2-NEXT: pushl $37
|
||||
; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-SSE2-NEXT: pushl %eax
|
||||
; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-SSE2-NEXT: calll __umoddi3
|
||||
; X86-SSE2-NEXT: addl $16, %esp
|
||||
@ -174,8 +176,9 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind {
|
||||
; X64-AVX2-LABEL: fshl_i37:
|
||||
; X64-AVX2: # %bb.0:
|
||||
; X64-AVX2-NEXT: movq %rdx, %rcx
|
||||
; X64-AVX2-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
|
||||
; X64-AVX2-NEXT: andq %rdx, %rax
|
||||
; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rdx # imm = 0xDD67C8A60DD67C8B
|
||||
; X64-AVX2-NEXT: movq %rcx, %rax
|
||||
; X64-AVX2-NEXT: mulq %rdx
|
||||
; X64-AVX2-NEXT: shrq $5, %rdx
|
||||
; X64-AVX2-NEXT: leal (%rdx,%rdx,8), %eax
|
||||
@ -304,13 +307,15 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind {
|
||||
; X86-SSE2-NEXT: pushl %ebx
|
||||
; X86-SSE2-NEXT: pushl %edi
|
||||
; X86-SSE2-NEXT: pushl %esi
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: andl $31, %eax
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-SSE2-NEXT: shldl $27, %ebx, %esi
|
||||
; X86-SSE2-NEXT: pushl $0
|
||||
; X86-SSE2-NEXT: pushl $37
|
||||
; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-SSE2-NEXT: pushl %eax
|
||||
; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
|
||||
; X86-SSE2-NEXT: calll __umoddi3
|
||||
; X86-SSE2-NEXT: addl $16, %esp
|
||||
@ -339,8 +344,9 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind {
|
||||
; X64-AVX2-LABEL: fshr_i37:
|
||||
; X64-AVX2: # %bb.0:
|
||||
; X64-AVX2-NEXT: movq %rdx, %rcx
|
||||
; X64-AVX2-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
|
||||
; X64-AVX2-NEXT: andq %rdx, %rax
|
||||
; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rdx # imm = 0xDD67C8A60DD67C8B
|
||||
; X64-AVX2-NEXT: movq %rcx, %rax
|
||||
; X64-AVX2-NEXT: mulq %rdx
|
||||
; X64-AVX2-NEXT: shrq $5, %rdx
|
||||
; X64-AVX2-NEXT: leal (%rdx,%rdx,8), %eax
|
||||
|
Loading…
Reference in New Issue
Block a user