diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 8d7bddbea95..bfbf10a1ea1 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -1759,59 +1759,53 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) { return false; bool LHSIsKill = hasTrivialKill(LHS); - unsigned RHSReg = 0; - bool RHSIsKill = false; - bool UseImm = true; - if (!isa(RHS)) { - RHSReg = getRegForValue(RHS); - if (!RHSReg) - return false; - RHSIsKill = hasTrivialKill(RHS); - UseImm = false; + // Check if the immediate can be encoded in the instruction and if we should + // invert the instruction (adds -> subs) to handle negative immediates. + bool UseImm = false; + bool UseInverse = false; + uint64_t Imm = 0; + if (const auto *C = dyn_cast(RHS)) { + if (C->isNegative()) { + UseInverse = true; + Imm = -(C->getSExtValue()); + } else + Imm = C->getZExtValue(); + + if (isUInt<12>(Imm)) + UseImm = true; + + UseInverse = UseImm && UseInverse; } + static const unsigned OpcTable[2][2][2] = { + { {AArch64::ADDSWrr, AArch64::ADDSXrr}, + {AArch64::ADDSWri, AArch64::ADDSXri} }, + { {AArch64::SUBSWrr, AArch64::SUBSXrr}, + {AArch64::SUBSWri, AArch64::SUBSXri} } + }; unsigned Opc = 0; unsigned MulReg = 0; + unsigned RHSReg = 0; + bool RHSIsKill = false; AArch64CC::CondCode CC = AArch64CC::Invalid; bool Is64Bit = VT == MVT::i64; switch (II->getIntrinsicID()) { default: llvm_unreachable("Unexpected intrinsic!"); case Intrinsic::sadd_with_overflow: - if (UseImm) - Opc = Is64Bit ? AArch64::ADDSXri : AArch64::ADDSWri; - else - Opc = Is64Bit ? AArch64::ADDSXrr : AArch64::ADDSWrr; - CC = AArch64CC::VS; - break; + Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break; case Intrinsic::uadd_with_overflow: - if (UseImm) - Opc = Is64Bit ? AArch64::ADDSXri : AArch64::ADDSWri; - else - Opc = Is64Bit ? AArch64::ADDSXrr : AArch64::ADDSWrr; - CC = AArch64CC::HS; - break; + Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::HS; break; case Intrinsic::ssub_with_overflow: - if (UseImm) - Opc = Is64Bit ? AArch64::SUBSXri : AArch64::SUBSWri; - else - Opc = Is64Bit ? AArch64::SUBSXrr : AArch64::SUBSWrr; - CC = AArch64CC::VS; - break; + Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break; case Intrinsic::usub_with_overflow: - if (UseImm) - Opc = Is64Bit ? AArch64::SUBSXri : AArch64::SUBSWri; - else - Opc = Is64Bit ? AArch64::SUBSXrr : AArch64::SUBSWrr; - CC = AArch64CC::LO; - break; + Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::LO; break; case Intrinsic::smul_with_overflow: { CC = AArch64CC::NE; - if (UseImm) { - RHSReg = getRegForValue(RHS); - if (!RHSReg) - return false; - RHSIsKill = hasTrivialKill(RHS); - } + RHSReg = getRegForValue(RHS); + if (!RHSReg) + return false; + RHSIsKill = hasTrivialKill(RHS); + if (VT == MVT::i32) { MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32); @@ -1841,12 +1835,11 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) { } case Intrinsic::umul_with_overflow: { CC = AArch64CC::NE; - if (UseImm) { - RHSReg = getRegForValue(RHS); - if (!RHSReg) - return false; - RHSIsKill = hasTrivialKill(RHS); - } + RHSReg = getRegForValue(RHS); + if (!RHSReg) + return false; + RHSIsKill = hasTrivialKill(RHS); + if (VT == MVT::i32) { MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); unsigned CmpReg = createResultReg(TLI.getRegClassFor(MVT::i64)); @@ -1872,15 +1865,23 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) { } } + if (!UseImm) { + RHSReg = getRegForValue(RHS); + if (!RHSReg) + return false; + RHSIsKill = hasTrivialKill(RHS); + } + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); if (Opc) { MachineInstrBuilder MIB; MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(LHSReg, getKillRegState(LHSIsKill)); - if (UseImm) - MIB.addImm(cast(RHS)->getZExtValue()); - else + if (UseImm) { + MIB.addImm(Imm); + MIB.addImm(0); + } else MIB.addReg(RHSReg, getKillRegState(RHSIsKill)); } else diff --git a/test/CodeGen/AArch64/arm64-xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll index efc63dbbecf..fe81d8d8deb 100644 --- a/test/CodeGen/AArch64/arm64-xaluo.ll +++ b/test/CodeGen/AArch64/arm64-xaluo.ll @@ -4,9 +4,9 @@ ; ; Get the actual value of the overflow bit. ; -define zeroext i1 @saddo.i32(i32 %v1, i32 %v2, i32* %res) { +define zeroext i1 @saddo1.i32(i32 %v1, i32 %v2, i32* %res) { entry: -; CHECK-LABEL: saddo.i32 +; CHECK-LABEL: saddo1.i32 ; CHECK: adds {{w[0-9]+}}, w0, w1 ; CHECK-NEXT: cset {{w[0-9]+}}, vs %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) @@ -16,9 +16,48 @@ entry: ret i1 %obit } -define zeroext i1 @saddo.i64(i64 %v1, i64 %v2, i64* %res) { +; Test the immediate version. +define zeroext i1 @saddo2.i32(i32 %v1, i32* %res) { entry: -; CHECK-LABEL: saddo.i64 +; CHECK-LABEL: saddo2.i32 +; CHECK: adds {{w[0-9]+}}, w0, #4 +; CHECK-NEXT: cset {{w[0-9]+}}, vs + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 4) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32* %res + ret i1 %obit +} + +; Test negative immediates. +define zeroext i1 @saddo3.i32(i32 %v1, i32* %res) { +entry: +; CHECK-LABEL: saddo3.i32 +; CHECK: subs {{w[0-9]+}}, w0, #4 +; CHECK-NEXT: cset {{w[0-9]+}}, vs + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 -4) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32* %res + ret i1 %obit +} + +; Test immediates that are too large to be encoded. +define zeroext i1 @saddo4.i32(i32 %v1, i32* %res) { +entry: +; CHECK-LABEL: saddo4.i32 +; CHECK: adds {{w[0-9]+}}, w0, {{w[0-9]+}} +; CHECK-NEXT: cset {{w[0-9]+}}, vs + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 16777215) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32* %res + ret i1 %obit +} + +define zeroext i1 @saddo1.i64(i64 %v1, i64 %v2, i64* %res) { +entry: +; CHECK-LABEL: saddo1.i64 ; CHECK: adds {{x[0-9]+}}, x0, x1 ; CHECK-NEXT: cset {{w[0-9]+}}, vs %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) @@ -28,6 +67,30 @@ entry: ret i1 %obit } +define zeroext i1 @saddo2.i64(i64 %v1, i64* %res) { +entry: +; CHECK-LABEL: saddo2.i64 +; CHECK: adds {{x[0-9]+}}, x0, #4 +; CHECK-NEXT: cset {{w[0-9]+}}, vs + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 4) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64* %res + ret i1 %obit +} + +define zeroext i1 @saddo3.i64(i64 %v1, i64* %res) { +entry: +; CHECK-LABEL: saddo3.i64 +; CHECK: subs {{x[0-9]+}}, x0, #4 +; CHECK-NEXT: cset {{w[0-9]+}}, vs + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -4) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64* %res + ret i1 %obit +} + define zeroext i1 @uaddo.i32(i32 %v1, i32 %v2, i32* %res) { entry: ; CHECK-LABEL: uaddo.i32 @@ -52,9 +115,9 @@ entry: ret i1 %obit } -define zeroext i1 @ssubo.i32(i32 %v1, i32 %v2, i32* %res) { +define zeroext i1 @ssubo1.i32(i32 %v1, i32 %v2, i32* %res) { entry: -; CHECK-LABEL: ssubo.i32 +; CHECK-LABEL: ssubo1.i32 ; CHECK: subs {{w[0-9]+}}, w0, w1 ; CHECK-NEXT: cset {{w[0-9]+}}, vs %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) @@ -64,6 +127,18 @@ entry: ret i1 %obit } +define zeroext i1 @ssubo2.i32(i32 %v1, i32* %res) { +entry: +; CHECK-LABEL: ssubo2.i32 +; CHECK: adds {{w[0-9]+}}, w0, #4 +; CHECK-NEXT: cset {{w[0-9]+}}, vs + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 -4) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32* %res + ret i1 %obit +} + define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) { entry: ; CHECK-LABEL: ssubo.i64