mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-04-01 07:11:45 +00:00
Resubmit r360436 "[X86] Avoid SFB - Fix inconsistent codegen with/without debug info"
Fixes https://bugs.llvm.org/show_bug.cgi?id=40969 The functions findPotentiallyBlockedCopies and buildCopy are currently not accounting for the presence of debug instructions. In the former this results in the optimization not being trigerred, and in the latter results in inconsistent codegen. This patch enables the optimization to be performed in a debug build and ensures the codegen is consistent with non-debug builds. Patch by Chris Dawson. Differential Revision: https://reviews.llvm.org/D61680 llvm-svn: 361527
This commit is contained in:
parent
784f34da60
commit
a43ec506ab
@ -406,7 +406,10 @@ void X86AvoidSFBPass::buildCopy(MachineInstr *LoadInst, unsigned NLoadOpcode,
|
||||
// If the load and store are consecutive, use the loadInst location to
|
||||
// reduce register pressure.
|
||||
MachineInstr *StInst = StoreInst;
|
||||
if (StoreInst->getPrevNode() == LoadInst)
|
||||
auto PrevInstrIt = skipDebugInstructionsBackward(
|
||||
std::prev(MachineBasicBlock::instr_iterator(StoreInst)),
|
||||
MBB->instr_begin());
|
||||
if (PrevInstrIt.getNodePtr() == LoadInst)
|
||||
StInst = LoadInst;
|
||||
MachineInstr *NewStore =
|
||||
BuildMI(*MBB, StInst, StInst->getDebugLoc(), TII->get(NStoreOpcode))
|
||||
@ -491,19 +494,22 @@ void X86AvoidSFBPass::buildCopies(int Size, MachineInstr *LoadInst,
|
||||
static void updateKillStatus(MachineInstr *LoadInst, MachineInstr *StoreInst) {
|
||||
MachineOperand &LoadBase = getBaseOperand(LoadInst);
|
||||
MachineOperand &StoreBase = getBaseOperand(StoreInst);
|
||||
auto StorePrevNonDbgInstr = skipDebugInstructionsBackward(
|
||||
std::prev(MachineBasicBlock::instr_iterator(StoreInst)),
|
||||
LoadInst->getParent()->instr_begin()).getNodePtr();
|
||||
if (LoadBase.isReg()) {
|
||||
MachineInstr *LastLoad = LoadInst->getPrevNode();
|
||||
// If the original load and store to xmm/ymm were consecutive
|
||||
// then the partial copies were also created in
|
||||
// a consecutive order to reduce register pressure,
|
||||
// and the location of the last load is before the last store.
|
||||
if (StoreInst->getPrevNode() == LoadInst)
|
||||
if (StorePrevNonDbgInstr == LoadInst)
|
||||
LastLoad = LoadInst->getPrevNode()->getPrevNode();
|
||||
getBaseOperand(LastLoad).setIsKill(LoadBase.isKill());
|
||||
}
|
||||
if (StoreBase.isReg()) {
|
||||
MachineInstr *StInst = StoreInst;
|
||||
if (StoreInst->getPrevNode() == LoadInst)
|
||||
if (StorePrevNonDbgInstr == LoadInst)
|
||||
StInst = LoadInst;
|
||||
getBaseOperand(StInst->getPrevNode()).setIsKill(StoreBase.isKill());
|
||||
}
|
||||
@ -530,7 +536,7 @@ void X86AvoidSFBPass::findPotentiallylBlockedCopies(MachineFunction &MF) {
|
||||
if (!isPotentialBlockedMemCpyLd(MI.getOpcode()))
|
||||
continue;
|
||||
int DefVR = MI.getOperand(0).getReg();
|
||||
if (!MRI->hasOneUse(DefVR))
|
||||
if (!MRI->hasOneNonDBGUse(DefVR))
|
||||
continue;
|
||||
for (auto UI = MRI->use_nodbg_begin(DefVR), UE = MRI->use_nodbg_end();
|
||||
UI != UE;) {
|
||||
|
222
test/CodeGen/X86/avoid-sfb-g-no-change.mir
Normal file
222
test/CodeGen/X86/avoid-sfb-g-no-change.mir
Normal file
@ -0,0 +1,222 @@
|
||||
# RUN: llc %s -run-pass x86-avoid-SFB -mtriple=x86_64-unknown-linux-gnu -o - | FileCheck %s -check-prefixes DEBUG-LABEL,CHECK
|
||||
# RUN: llc %s -run-pass x86-avoid-SFB -mtriple=x86_64-unknown-linux-gnu -o - | FileCheck %s -check-prefixes NODEBUG-LABEL,CHECK
|
||||
#
|
||||
# This was generated from:
|
||||
#
|
||||
# using alpha = float __attribute__((ext_vector_type(4)));
|
||||
#
|
||||
# void bravo(alpha * __restrict__ p1, alpha * __restrict__ p2) {
|
||||
# char *p3 = (char *)p1;
|
||||
# *p3 = 0;
|
||||
# alpha t = *p1;
|
||||
# *p2 = t;
|
||||
# }
|
||||
#
|
||||
# Using the command line:
|
||||
# clang -g -c 1.cpp -O2 -S -emit-llvm -fno-strict-aliasing --target=x86_64-unknown-unknown -o test.ll
|
||||
# llc -stop-before=x86-avoid-SFB test.ll -o before.mir
|
||||
|
||||
--- |
|
||||
; ModuleID = 'test.ll'
|
||||
source_filename = "1.cpp"
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
|
||||
; Function Attrs: norecurse nounwind uwtable
|
||||
define dso_local void @debug(<4 x float>* noalias nocapture %p1, <4 x float>* noalias nocapture %p2) local_unnamed_addr #0 !dbg !10 {
|
||||
entry:
|
||||
call void @llvm.dbg.value(metadata <4 x float>* %p1, metadata !21, metadata !DIExpression()), !dbg !25
|
||||
call void @llvm.dbg.value(metadata <4 x float>* %p2, metadata !22, metadata !DIExpression()), !dbg !25
|
||||
%0 = bitcast <4 x float>* %p1 to i8*, !dbg !26
|
||||
call void @llvm.dbg.value(metadata i8* %0, metadata !23, metadata !DIExpression()), !dbg !25
|
||||
store i8 0, i8* %0, align 1, !dbg !27
|
||||
%1 = load <4 x float>, <4 x float>* %p1, align 16, !dbg !28
|
||||
call void @llvm.dbg.value(metadata <4 x float> %1, metadata !24, metadata !DIExpression()), !dbg !25
|
||||
store <4 x float> %1, <4 x float>* %p2, align 16, !dbg !29
|
||||
ret void, !dbg !30
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind uwtable
|
||||
define dso_local void @nodebug(<4 x float>* noalias nocapture %p1, <4 x float>* noalias nocapture %p2) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%0 = bitcast <4 x float>* %p1 to i8*
|
||||
store i8 0, i8* %0, align 1
|
||||
%1 = load <4 x float>, <4 x float>* %p1, align 16
|
||||
store <4 x float> %1, <4 x float>* %p2, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone speculatable
|
||||
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare void @llvm.stackprotector(i8*, i8**) #2
|
||||
|
||||
attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind readnone speculatable }
|
||||
attributes #2 = { nounwind }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!6, !7, !8}
|
||||
!llvm.ident = !{!9}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 9.0.0 (https://github.com/llvm/llvm-project.git 9afc4764dd24bd2f23c44e51ad33f8e58234a8b6)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, nameTableKind: None)
|
||||
!1 = !DIFile(filename: "1.cpp", directory: "C:\5CUsers\5Cgbdawsoc\5CDocuments\5Cllvm\5Cbg40969")
|
||||
!2 = !{}
|
||||
!3 = !{!4}
|
||||
!4 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !5, size: 64)
|
||||
!5 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
|
||||
!6 = !{i32 2, !"Dwarf Version", i32 4}
|
||||
!7 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!8 = !{i32 1, !"wchar_size", i32 4}
|
||||
!9 = !{!"clang version 9.0.0 (https://github.com/llvm/llvm-project.git 9afc4764dd24bd2f23c44e51ad33f8e58234a8b6)"}
|
||||
!10 = distinct !DISubprogram(name: "bravo", linkageName: "_Z5bravoPDv4_fS0_", scope: !1, file: !1, line: 4, type: !11, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !20)
|
||||
!11 = !DISubroutineType(types: !12)
|
||||
!12 = !{null, !13, !13}
|
||||
!13 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !14)
|
||||
!14 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !15, size: 64)
|
||||
!15 = !DIDerivedType(tag: DW_TAG_typedef, name: "alpha", file: !1, line: 2, baseType: !16)
|
||||
!16 = !DICompositeType(tag: DW_TAG_array_type, baseType: !17, size: 128, flags: DIFlagVector, elements: !18)
|
||||
!17 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
|
||||
!18 = !{!19}
|
||||
!19 = !DISubrange(count: 4)
|
||||
!20 = !{!21, !22, !23, !24}
|
||||
!21 = !DILocalVariable(name: "p1", arg: 1, scope: !10, file: !1, line: 4, type: !13)
|
||||
!22 = !DILocalVariable(name: "p2", arg: 2, scope: !10, file: !1, line: 4, type: !13)
|
||||
!23 = !DILocalVariable(name: "p3", scope: !10, file: !1, line: 5, type: !4)
|
||||
!24 = !DILocalVariable(name: "t", scope: !10, file: !1, line: 7, type: !15)
|
||||
!25 = !DILocation(line: 0, scope: !10)
|
||||
!26 = !DILocation(line: 5, column: 14, scope: !10)
|
||||
!27 = !DILocation(line: 6, column: 7, scope: !10)
|
||||
!28 = !DILocation(line: 7, column: 13, scope: !10)
|
||||
!29 = !DILocation(line: 8, column: 7, scope: !10)
|
||||
!30 = !DILocation(line: 9, column: 1, scope: !10)
|
||||
|
||||
...
|
||||
---
|
||||
name: debug
|
||||
alignment: 4
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: gr64, preferred-register: '' }
|
||||
- { id: 2, class: vr128, preferred-register: '' }
|
||||
liveins:
|
||||
- { reg: '$rdi', virtual-reg: '%0' }
|
||||
- { reg: '$rsi', virtual-reg: '%1' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 4294967295
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack: []
|
||||
stack: []
|
||||
constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $rdi, $rsi
|
||||
|
||||
DBG_VALUE $rdi, $noreg, !21, !DIExpression(), debug-location !25
|
||||
DBG_VALUE $rsi, $noreg, !22, !DIExpression(), debug-location !25
|
||||
%1:gr64 = COPY $rsi
|
||||
DBG_VALUE %1, $noreg, !22, !DIExpression(), debug-location !25
|
||||
%0:gr64 = COPY $rdi
|
||||
DBG_VALUE %0, $noreg, !21, !DIExpression(), debug-location !25
|
||||
DBG_VALUE %0, $noreg, !23, !DIExpression(), debug-location !25
|
||||
MOV8mi %0, 1, $noreg, 0, $noreg, 0, debug-location !27 :: (store 1 into %ir.0)
|
||||
%2:vr128 = MOVAPSrm %0, 1, $noreg, 0, $noreg, debug-location !28 :: (load 16 from %ir.p1)
|
||||
DBG_VALUE %2, $noreg, !24, !DIExpression(), debug-location !25
|
||||
MOVAPSmr %1, 1, $noreg, 0, $noreg, killed %2, debug-location !29 :: (store 16 into %ir.p2)
|
||||
RET 0, debug-location !30
|
||||
|
||||
...
|
||||
---
|
||||
name: nodebug
|
||||
alignment: 4
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: gr64, preferred-register: '' }
|
||||
- { id: 2, class: vr128, preferred-register: '' }
|
||||
liveins:
|
||||
- { reg: '$rdi', virtual-reg: '%0' }
|
||||
- { reg: '$rsi', virtual-reg: '%1' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 4294967295
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack: []
|
||||
stack: []
|
||||
constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $rdi, $rsi
|
||||
|
||||
%1:gr64 = COPY $rsi
|
||||
%0:gr64 = COPY $rdi
|
||||
MOV8mi %0, 1, $noreg, 0, $noreg, 0 :: (store 1 into %ir.0)
|
||||
%2:vr128 = MOVAPSrm %0, 1, $noreg, 0, $noreg :: (load 16 from %ir.p1)
|
||||
MOVAPSmr %1, 1, $noreg, 0, $noreg, killed %2 :: (store 16 into %ir.p2)
|
||||
RET 0
|
||||
|
||||
; DEBUG-LABEL: name: debug
|
||||
; NODEBUG-LABEL: name: nodebug
|
||||
; CHECK: %1:gr64 = COPY
|
||||
; CHECK: %0:gr64 = COPY
|
||||
; CHECK: MOV8mi
|
||||
; CHECK: %3:gr8 = MOV8rm
|
||||
; CHECK: MOV8mr
|
||||
; CHECK: %4:gr64 = MOV64rm
|
||||
; CHECK: MOV64mr
|
||||
; CHECK: %5:gr32 = MOV32rm
|
||||
; CHECK: MOV32mr
|
||||
; CHECK: %6:gr16 = MOV16rm
|
||||
; CHECK: MOV16mr
|
||||
; CHECK: %7:gr8 = MOV8rm
|
||||
; CHECK: MOV8mr
|
||||
; CHECK: RET 0
|
||||
; DEBUG-LABEL: name: nodebug
|
||||
...
|
Loading…
x
Reference in New Issue
Block a user