diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 432abb500ce..81a93b1ed3b 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -870,12 +870,17 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { case MVT::i8: case MVT::i16: case MVT::i32: - if (!useAM3) + if (!useAM3) { // Integer loads/stores handle 12-bit offsets. needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); - else + // Handle negative offsets. + if (isThumb2) + needsLowering = !(needsLowering && Subtarget->hasV6T2Ops() && + Addr.Offset < 0 && Addr.Offset > -256); + } else { // ARM halfword load/stores and signed byte loads use +/-imm8 offsets. needsLowering = (Addr.Offset > 255 || Addr.Offset < -255); + } break; case MVT::f32: case MVT::f64: @@ -967,24 +972,42 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, default: return false; case MVT::i1: case MVT::i8: - if (isZExt) { - Opc = isThumb2 ? ARM::t2LDRBi12 : ARM::LDRBi12; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8; + else + Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12; } else { - Opc = isThumb2 ? ARM::t2LDRSBi12 : ARM::LDRSB; - if (!isThumb2) useAM3 = true; + if (isZExt) { + Opc = ARM::LDRBi12; + } else { + Opc = ARM::LDRSB; + useAM3 = true; + } } RC = ARM::GPRRegisterClass; break; case MVT::i16: - if (isZExt) - Opc = isThumb2 ? ARM::t2LDRHi12 : ARM::LDRH; - else - Opc = isThumb2 ? ARM::t2LDRSHi12 : ARM::LDRSH; - if (!isThumb2) useAM3 = true; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8; + else + Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12; + } else { + Opc = isZExt ? ARM::LDRH : ARM::LDRSH; + useAM3 = true; + } RC = ARM::GPRRegisterClass; break; case MVT::i32: - Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + Opc = ARM::t2LDRi8; + else + Opc = ARM::t2LDRi12; + } else { + Opc = ARM::LDRi12; + } RC = ARM::GPRRegisterClass; break; case MVT::f32: @@ -1045,14 +1068,35 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { SrcReg = Res; } // Fallthrough here. case MVT::i8: - StrOpc = isThumb2 ? ARM::t2STRBi12 : ARM::STRBi12; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + StrOpc = ARM::t2STRBi8; + else + StrOpc = ARM::t2STRBi12; + } else { + StrOpc = ARM::STRBi12; + } break; case MVT::i16: - StrOpc = isThumb2 ? ARM::t2STRHi12 : ARM::STRH; - if (!isThumb2) useAM3 = true; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + StrOpc = ARM::t2STRHi8; + else + StrOpc = ARM::t2STRHi12; + } else { + StrOpc = ARM::STRH; + useAM3 = true; + } break; case MVT::i32: - StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + StrOpc = ARM::t2STRi8; + else + StrOpc = ARM::t2STRi12; + } else { + StrOpc = ARM::STRi12; + } break; case MVT::f32: if (!Subtarget->hasVFP2()) return false; diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll new file mode 100644 index 00000000000..0b8a768b87c --- /dev/null +++ b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll @@ -0,0 +1,168 @@ +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB + +define i32 @t1(i32* nocapture %ptr) nounwind readonly { +entry: +; THUMB: t1 + %add.ptr = getelementptr inbounds i32* %ptr, i32 -1 + %0 = load i32* %add.ptr, align 4, !tbaa !0 +; THUMB: ldr r{{[0-9]}}, [r0, #-4] + ret i32 %0 +} + +define i32 @t2(i32* nocapture %ptr) nounwind readonly { +entry: +; THUMB: t2 + %add.ptr = getelementptr inbounds i32* %ptr, i32 -63 + %0 = load i32* %add.ptr, align 4, !tbaa !0 +; THUMB: ldr r{{[0-9]}}, [r0, #-252] + ret i32 %0 +} + +define i32 @t3(i32* nocapture %ptr) nounwind readonly { +entry: +; THUMB: t3 + %add.ptr = getelementptr inbounds i32* %ptr, i32 -64 + %0 = load i32* %add.ptr, align 4, !tbaa !0 +; THUMB: ldr r{{[0-9]}}, [r0] + ret i32 %0 +} + +define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly { +entry: +; THUMB: t4 + %add.ptr = getelementptr inbounds i16* %ptr, i32 -1 + %0 = load i16* %add.ptr, align 2, !tbaa !3 +; THUMB: ldrh r{{[0-9]}}, [r0, #-2] + ret i16 %0 +} + +define zeroext i16 @t5(i16* nocapture %ptr) nounwind readonly { +entry: +; THUMB: t5 + %add.ptr = getelementptr inbounds i16* %ptr, i32 -127 + %0 = load i16* %add.ptr, align 2, !tbaa !3 +; THUMB: ldrh r{{[0-9]}}, [r0, #-254] + ret i16 %0 +} + +define zeroext i16 @t6(i16* nocapture %ptr) nounwind readonly { +entry: +; THUMB: t6 + %add.ptr = getelementptr inbounds i16* %ptr, i32 -128 + %0 = load i16* %add.ptr, align 2, !tbaa !3 +; THUMB: ldrh r{{[0-9]}}, [r0] + ret i16 %0 +} + +define zeroext i8 @t7(i8* nocapture %ptr) nounwind readonly { +entry: +; THUMB: t7 + %add.ptr = getelementptr inbounds i8* %ptr, i32 -1 + %0 = load i8* %add.ptr, align 1, !tbaa !1 +; THUMB: ldrb r{{[0-9]}}, [r0, #-1] + ret i8 %0 +} + +define zeroext i8 @t8(i8* nocapture %ptr) nounwind readonly { +entry: +; THUMB: t8 + %add.ptr = getelementptr inbounds i8* %ptr, i32 -255 + %0 = load i8* %add.ptr, align 1, !tbaa !1 +; THUMB: ldrb r{{[0-9]}}, [r0, #-255] + ret i8 %0 +} + +define zeroext i8 @t9(i8* nocapture %ptr) nounwind readonly { +entry: +; THUMB: t9 + %add.ptr = getelementptr inbounds i8* %ptr, i32 -256 + %0 = load i8* %add.ptr, align 1, !tbaa !1 +; THUMB: ldrb r{{[0-9]}}, [r0] + ret i8 %0 +} + +define void @t10(i32* nocapture %ptr) nounwind { +entry: +; THUMB: t10 + %add.ptr = getelementptr inbounds i32* %ptr, i32 -1 + store i32 0, i32* %add.ptr, align 4, !tbaa !0 +; THUMB: str r{{[0-9]}}, [r0, #-4] + ret void +} + +define void @t11(i32* nocapture %ptr) nounwind { +entry: +; THUMB: t11 + %add.ptr = getelementptr inbounds i32* %ptr, i32 -63 + store i32 0, i32* %add.ptr, align 4, !tbaa !0 +; THUMB: str r{{[0-9]}}, [r0, #-252] + ret void +} + +define void @t12(i32* nocapture %ptr) nounwind { +entry: +; THUMB: t12 + %add.ptr = getelementptr inbounds i32* %ptr, i32 -64 + store i32 0, i32* %add.ptr, align 4, !tbaa !0 +; THUMB: str r{{[0-9]}}, [r0] + ret void +} + +define void @t13(i16* nocapture %ptr) nounwind { +entry: +; THUMB: t13 + %add.ptr = getelementptr inbounds i16* %ptr, i32 -1 + store i16 0, i16* %add.ptr, align 2, !tbaa !3 +; THUMB: strh r{{[0-9]}}, [r0, #-2] + ret void +} + +define void @t14(i16* nocapture %ptr) nounwind { +entry: +; THUMB: t14 + %add.ptr = getelementptr inbounds i16* %ptr, i32 -127 + store i16 0, i16* %add.ptr, align 2, !tbaa !3 +; THUMB: strh r{{[0-9]}}, [r0, #-254] + ret void +} + +define void @t15(i16* nocapture %ptr) nounwind { +entry: +; THUMB: t15 + %add.ptr = getelementptr inbounds i16* %ptr, i32 -128 + store i16 0, i16* %add.ptr, align 2, !tbaa !3 +; THUMB: strh r{{[0-9]}}, [r0] + ret void +} + +define void @t16(i8* nocapture %ptr) nounwind { +entry: +; THUMB: t16 + %add.ptr = getelementptr inbounds i8* %ptr, i32 -1 + store i8 0, i8* %add.ptr, align 1, !tbaa !1 +; THUMB: strb r{{[0-9]}}, [r0, #-1] + ret void +} + +define void @t17(i8* nocapture %ptr) nounwind { +entry: +; THUMB: t17 + %add.ptr = getelementptr inbounds i8* %ptr, i32 -255 + store i8 0, i8* %add.ptr, align 1, !tbaa !1 +; THUMB: strb r{{[0-9]}}, [r0, #-255] + ret void +} + +define void @t18(i8* nocapture %ptr) nounwind { +entry: +; THUMB: t18 + %add.ptr = getelementptr inbounds i8* %ptr, i32 -256 + store i8 0, i8* %add.ptr, align 1, !tbaa !1 +; THUMB: strb r{{[0-9]}}, [r0] + ret void +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} +!3 = metadata !{metadata !"short", metadata !1}