From 9568e5c3c3f1e25288d2ff375dba0fddbf161fd6 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 21 Jun 2011 06:01:08 +0000 Subject: [PATCH] Teach dag combine to match halfword byteswap patterns. 1. (((x) & 0xFF00) >> 8) | (((x) & 0x00FF) << 8) => (bswap x) >> 16 2. ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0xff000000)>>8)|((x&0x00ff0000)<<8)) => (rotl (bswap x) 16) This allows us to eliminate most of the def : Pat patterns for ARM rev16 revsh instructions. It catches many more cases for ARM and x86. rdar://9609108 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133503 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 266 ++++++++++++++++++++++- lib/Target/ARM/ARMInstrInfo.td | 27 +-- lib/Target/ARM/ARMInstrThumb.td | 19 +- lib/Target/ARM/ARMInstrThumb2.td | 25 +-- test/CodeGen/ARM/rev.ll | 40 ++++ test/CodeGen/X86/bswap.ll | 38 +++- 6 files changed, 348 insertions(+), 67 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4ac590af57a..443fb3271d7 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -238,6 +238,9 @@ namespace { SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildUDIV(SDNode *N); + SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, + bool DemandHighBits = true); + SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); @@ -2512,6 +2515,244 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(); } +/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16 +/// +SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, + bool DemandHighBits) { + if (!LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) + return SDValue(); + if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + return SDValue(); + + // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) + bool LookPassAnd0 = false; + bool LookPassAnd1 = false; + if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) + std::swap(N0, N1); + if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) + std::swap(N0, N1); + if (N0.getOpcode() == ISD::AND) { + if (!N0.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N01C = dyn_cast(N0.getOperand(1)); + if (!N01C || N01C->getZExtValue() != 0xFF00) + return SDValue(); + N0 = N0.getOperand(0); + LookPassAnd0 = true; + } + + if (N1.getOpcode() == ISD::AND) { + if (!N1.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N11C = dyn_cast(N1.getOperand(1)); + if (!N11C || N11C->getZExtValue() != 0xFF) + return SDValue(); + N1 = N1.getOperand(0); + LookPassAnd1 = true; + } + + if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) + std::swap(N0, N1); + if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) + return SDValue(); + if (!N0.getNode()->hasOneUse() || + !N1.getNode()->hasOneUse()) + return SDValue(); + + ConstantSDNode *N01C = dyn_cast(N0.getOperand(1)); + ConstantSDNode *N11C = dyn_cast(N1.getOperand(1)); + if (!N01C || !N11C) + return SDValue(); + if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) + return SDValue(); + + // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) + SDValue N00 = N0->getOperand(0); + if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { + if (!N00.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N001C = dyn_cast(N00.getOperand(1)); + if (!N001C || N001C->getZExtValue() != 0xFF) + return SDValue(); + N00 = N00.getOperand(0); + LookPassAnd0 = true; + } + + SDValue N10 = N1->getOperand(0); + if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { + if (!N10.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N101C = dyn_cast(N10.getOperand(1)); + if (!N101C || N101C->getZExtValue() != 0xFF00) + return SDValue(); + N10 = N10.getOperand(0); + LookPassAnd1 = true; + } + + if (N00 != N10) + return SDValue(); + + // Make sure everything beyond the low halfword is zero since the SRL 16 + // will clear the top bits. + unsigned OpSizeInBits = VT.getSizeInBits(); + if (DemandHighBits && OpSizeInBits > 16 && + (!LookPassAnd0 || !LookPassAnd1) && + !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16))) + return SDValue(); + + SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00); + if (OpSizeInBits > 16) + Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res, + DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); + return Res; +} + +/// isBSwapHWordElement - Return true if the specified node is an element +/// that makes up a 32-bit packed halfword byteswap. i.e. +/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) +static bool isBSwapHWordElement(SDValue N, SmallVector &Parts) { + if (!N.getNode()->hasOneUse()) + return false; + + unsigned Opc = N.getOpcode(); + if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) + return false; + + ConstantSDNode *N1C = dyn_cast(N.getOperand(1)); + if (!N1C) + return false; + + unsigned Num; + switch (N1C->getZExtValue()) { + default: + return false; + case 0xFF: Num = 0; break; + case 0xFF00: Num = 1; break; + case 0xFF0000: Num = 2; break; + case 0xFF000000: Num = 3; break; + } + + // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). + SDValue N0 = N.getOperand(0); + if (Opc == ISD::AND) { + if (Num == 0 || Num == 2) { + // (x >> 8) & 0xff + // (x >> 8) & 0xff0000 + if (N0.getOpcode() != ISD::SRL) + return false; + ConstantSDNode *C = dyn_cast(N0.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } else { + // (x << 8) & 0xff00 + // (x << 8) & 0xff000000 + if (N0.getOpcode() != ISD::SHL) + return false; + ConstantSDNode *C = dyn_cast(N0.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } + } else if (Opc == ISD::SHL) { + // (x & 0xff) << 8 + // (x & 0xff0000) << 8 + if (Num != 0 && Num != 2) + return false; + ConstantSDNode *C = dyn_cast(N.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } else { // Opc == ISD::SRL + // (x & 0xff00) >> 8 + // (x & 0xff000000) >> 8 + if (Num != 1 && Num != 3) + return false; + ConstantSDNode *C = dyn_cast(N.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } + + if (Parts[Num]) + return false; + + Parts[Num] = N0.getOperand(0).getNode(); + return true; +} + +/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is +/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) +/// => (rotl (bswap x), 16) +SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { + if (!LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i32) + return SDValue(); + if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + return SDValue(); + + SmallVector Parts(4, (SDNode*)0); + // Look for either + // (or (or (and), (and)), (or (and), (and))) + // (or (or (or (and), (and)), (and)), (and)) + if (N0.getOpcode() != ISD::OR) + return SDValue(); + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + + if (N1.getOpcode() == ISD::OR) { + // (or (or (and), (and)), (or (and), (and))) + SDValue N000 = N00.getOperand(0); + if (!isBSwapHWordElement(N000, Parts)) + return SDValue(); + + SDValue N001 = N00.getOperand(1); + if (!isBSwapHWordElement(N001, Parts)) + return SDValue(); + SDValue N010 = N01.getOperand(0); + if (!isBSwapHWordElement(N010, Parts)) + return SDValue(); + SDValue N011 = N01.getOperand(1); + if (!isBSwapHWordElement(N011, Parts)) + return SDValue(); + } else { + // (or (or (or (and), (and)), (and)), (and)) + if (!isBSwapHWordElement(N1, Parts)) + return SDValue(); + if (!isBSwapHWordElement(N01, Parts)) + return SDValue(); + if (N00.getOpcode() != ISD::OR) + return SDValue(); + SDValue N000 = N00.getOperand(0); + if (!isBSwapHWordElement(N000, Parts)) + return SDValue(); + SDValue N001 = N00.getOperand(1); + if (!isBSwapHWordElement(N001, Parts)) + return SDValue(); + } + + // Make sure the parts are all coming from the same node. + if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) + return SDValue(); + + SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, + SDValue(Parts[0],0)); + + // Result of the bswap should be rotated by 16. If it's not legal, than + // do (x << 16) | (x >> 16). + SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); + if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) + return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt); + else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) + return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt); + return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, + DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt), + DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt)); +} + SDValue DAGCombiner::visitOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2547,6 +2788,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold (or x, c) -> c iff (x & ~c) == 0 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) return N1; + + // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) + SDValue BSwap = MatchBSwapHWord(N, N0, N1); + if (BSwap.getNode() != 0) + return BSwap; + BSwap = MatchBSwapHWordLow(N, N0, N1); + if (BSwap.getNode() != 0) + return BSwap; + // reassociate or SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1); if (ROR.getNode() != 0) @@ -4606,6 +4856,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } + + // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) + if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { + SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false); + if (BSwap.getNode() != 0) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + BSwap, N1); + } + return SDValue(); } @@ -5231,7 +5491,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { // fold (sint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128 && // ...but only if the target supports immediate floating-point values - (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) + (Level == llvm::Unrestricted || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); // If the input is a legal type, and SINT_TO_FP is not legal on this target, @@ -5255,7 +5516,8 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { // fold (uint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128 && // ...but only if the target supports immediate floating-point values - (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) + (Level == llvm::Unrestricted || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); // If the input is a legal type, and UINT_TO_FP is not legal on this target, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index e2bbcfb12c9..5c013de238e 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -3008,41 +3008,22 @@ def REV : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "rev", "\t$Rd, $Rm", [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>; +let AddedComplexity = 5 in def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "rev16", "\t$Rd, $Rm", - [(set GPR:$Rd, - (or (and (srl GPR:$Rm, (i32 8)), 0xFF), - (or (and (shl GPR:$Rm, (i32 8)), 0xFF00), - (or (and (srl GPR:$Rm, (i32 8)), 0xFF0000), - (and (shl GPR:$Rm, (i32 8)), 0xFF000000)))))]>, + [(set GPR:$Rd, (rotr (bswap GPR:$Rm), (i32 16)))]>, Requires<[IsARM, HasV6]>; -def : ARMV6Pat<(or (or (or (and (srl GPR:$Rm, (i32 8)), 0xFF0000), - (and (shl GPR:$Rm, (i32 8)), 0xFF000000)), - (and (srl GPR:$Rm, (i32 8)), 0xFF)), - (and (shl GPR:$Rm, (i32 8)), 0xFF00)), - (REV16 GPR:$Rm)>; - +let AddedComplexity = 5 in def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "revsh", "\t$Rd, $Rm", - [(set GPR:$Rd, - (sext_inreg - (or (srl GPR:$Rm, (i32 8)), - (shl GPR:$Rm, (i32 8))), i16))]>, + [(set GPR:$Rd, (sra (bswap GPR:$Rm), (i32 16)))]>, Requires<[IsARM, HasV6]>; -def : ARMV6Pat<(sext_inreg (or (srl (and GPR:$Rm, 0xFF00), (i32 8)), - (shl GPR:$Rm, (i32 8))), i16), - (REVSH GPR:$Rm)>; - def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)), (and (srl GPR:$Rm, (i32 8)), 0xFF)), (REVSH GPR:$Rm)>; -// Need the AddedComplexity or else MOVs + REV would be chosen. -let AddedComplexity = 5 in -def : ARMV6Pat<(sra (bswap GPR:$Rm), (i32 16)), (REVSH GPR:$Rm)>; - def lsl_shift_imm : SDNodeXFormgetZExtValue()); return CurDAG->getTargetConstant(Sh, MVT::i32); diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 8430aa31e78..44fbc021d86 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -1176,31 +1176,16 @@ def tREV16 : // A8.6.135 T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iUNAr, "rev16", "\t$Rd, $Rm", - [(set tGPR:$Rd, - (or (and (srl tGPR:$Rm, (i32 8)), 0xFF), - (or (and (shl tGPR:$Rm, (i32 8)), 0xFF00), - (or (and (srl tGPR:$Rm, (i32 8)), 0xFF0000), - (and (shl tGPR:$Rm, (i32 8)), 0xFF000000)))))]>, + [(set tGPR:$Rd, (rotr (bswap tGPR:$Rm), (i32 16)))]>, Requires<[IsThumb, IsThumb1Only, HasV6]>; def tREVSH : // A8.6.136 T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iUNAr, "revsh", "\t$Rd, $Rm", - [(set tGPR:$Rd, - (sext_inreg - (or (srl tGPR:$Rm, (i32 8)), - (shl tGPR:$Rm, (i32 8))), i16))]>, + [(set tGPR:$Rd, (sra (bswap tGPR:$Rm), (i32 16)))]>, Requires<[IsThumb, IsThumb1Only, HasV6]>; -def : T1Pat<(sext_inreg (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)), - (shl tGPR:$Rm, (i32 8))), i16), - (tREVSH tGPR:$Rm)>, - Requires<[IsThumb, IsThumb1Only, HasV6]>; - -def : T1Pat<(sra (bswap tGPR:$Rm), (i32 16)), (tREVSH tGPR:$Rm)>, - Requires<[IsThumb, IsThumb1Only, HasV6]>; - // Rotate right register def tROR : // A8.6.139 T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index cd077a86e99..60fff05520d 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -2587,35 +2587,16 @@ def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, "rev16", ".w\t$Rd, $Rm", - [(set rGPR:$Rd, - (or (and (srl rGPR:$Rm, (i32 8)), 0xFF), - (or (and (shl rGPR:$Rm, (i32 8)), 0xFF00), - (or (and (srl rGPR:$Rm, (i32 8)), 0xFF0000), - (and (shl rGPR:$Rm, (i32 8)), 0xFF000000)))))]>; - -def : T2Pat<(or (or (or (and (srl rGPR:$Rm, (i32 8)), 0xFF0000), - (and (shl rGPR:$Rm, (i32 8)), 0xFF000000)), - (and (srl rGPR:$Rm, (i32 8)), 0xFF)), - (and (shl rGPR:$Rm, (i32 8)), 0xFF00)), - (t2REV16 rGPR:$Rm)>; + [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>; def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, "revsh", ".w\t$Rd, $Rm", - [(set rGPR:$Rd, - (sext_inreg - (or (srl rGPR:$Rm, (i32 8)), - (shl rGPR:$Rm, (i32 8))), i16))]>; - -def : T2Pat<(sext_inreg (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)), - (shl rGPR:$Rm, (i32 8))), i16), - (t2REVSH rGPR:$Rm)>; + [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>; def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)), - (and (srl rGPR:$Rm, (i32 8)), 0xFF)), + (and (srl rGPR:$Rm, (i32 8)), 0xFF)), (t2REVSH rGPR:$Rm)>; -def : T2Pat<(sra (bswap rGPR:$Rm), (i32 16)), (t2REVSH rGPR:$Rm)>; - def t2PKHBT : T2ThreeReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh), IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh", diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll index c210a55b66f..ea44c28fb70 100644 --- a/test/CodeGen/ARM/rev.ll +++ b/test/CodeGen/ARM/rev.ll @@ -84,3 +84,43 @@ entry: %or10 = or i32 %or6, %shl ret i32 %or10 } + +; rdar://9164521 +define i32 @test7(i32 %a) nounwind readnone { +entry: +; CHECK: test7 +; CHECK: rev r0, r0 +; CHECK: lsr r0, r0, #16 + %and = lshr i32 %a, 8 + %shr3 = and i32 %and, 255 + %and2 = shl i32 %a, 8 + %shl = and i32 %and2, 65280 + %or = or i32 %shr3, %shl + ret i32 %or +} + +define i32 @test8(i32 %a) nounwind readnone { +entry: +; CHECK: test8 +; CHECK: revsh r0, r0 + %and = lshr i32 %a, 8 + %shr4 = and i32 %and, 255 + %and2 = shl i32 %a, 8 + %or = or i32 %shr4, %and2 + %sext = shl i32 %or, 16 + %conv3 = ashr exact i32 %sext, 16 + ret i32 %conv3 +} + +define zeroext i16 @test9(i16 zeroext %v) nounwind readnone { +entry: +; CHECK: test9 +; CHECK: rev r0, r0 +; CHECK: lsr r0, r0, #16 + %conv = zext i16 %v to i32 + %shr4 = lshr i32 %conv, 8 + %shl = shl nuw nsw i32 %conv, 8 + %or = or i32 %shr4, %shl + %conv3 = trunc i32 %or to i16 + ret i16 %conv3 +} diff --git a/test/CodeGen/X86/bswap.ll b/test/CodeGen/X86/bswap.ll index 0a72c1c4784..a7540aafa9b 100644 --- a/test/CodeGen/X86/bswap.ll +++ b/test/CodeGen/X86/bswap.ll @@ -1,8 +1,6 @@ ; bswap should be constant folded when it is passed a constant argument -; RUN: llc < %s -march=x86 | \ -; RUN: grep bswapl | count 3 -; RUN: llc < %s -march=x86 | grep rolw | count 1 +; RUN: llc < %s -march=x86 | FileCheck %s declare i16 @llvm.bswap.i16(i16) @@ -11,17 +9,51 @@ declare i32 @llvm.bswap.i32(i32) declare i64 @llvm.bswap.i64(i64) define i16 @W(i16 %A) { +; CHECK: W: +; CHECK: rolw $8, %ax %Z = call i16 @llvm.bswap.i16( i16 %A ) ; [#uses=1] ret i16 %Z } define i32 @X(i32 %A) { +; CHECK: X: +; CHECK: bswapl %eax %Z = call i32 @llvm.bswap.i32( i32 %A ) ; [#uses=1] ret i32 %Z } define i64 @Y(i64 %A) { +; CHECK: Y: +; CHECK: bswapl %eax +; CHECK: bswapl %edx %Z = call i64 @llvm.bswap.i64( i64 %A ) ; [#uses=1] ret i64 %Z } +; rdar://9164521 +define i32 @test1(i32 %a) nounwind readnone { +entry: +; CHECK: test1 +; CHECK: bswapl %eax +; CHECK: shrl $16, %eax + %and = lshr i32 %a, 8 + %shr3 = and i32 %and, 255 + %and2 = shl i32 %a, 8 + %shl = and i32 %and2, 65280 + %or = or i32 %shr3, %shl + ret i32 %or +} + +define i32 @test2(i32 %a) nounwind readnone { +entry: +; CHECK: test2 +; CHECK: bswapl %eax +; CHECK: sarl $16, %eax + %and = lshr i32 %a, 8 + %shr4 = and i32 %and, 255 + %and2 = shl i32 %a, 8 + %or = or i32 %shr4, %and2 + %sext = shl i32 %or, 16 + %conv3 = ashr exact i32 %sext, 16 + ret i32 %conv3 +}