Resubmit r360436 "[X86] Avoid SFB - Fix inconsistent codegen with/without debug info"

Fixes https://bugs.llvm.org/show_bug.cgi?id=40969

The functions findPotentiallyBlockedCopies and buildCopy are currently not
accounting for the presence of debug instructions. In the former this results
in the optimization not being trigerred, and in the latter results in
inconsistent codegen.

This patch enables the optimization to be performed in a debug build and
ensures the codegen is consistent with non-debug builds.

Patch by Chris Dawson.

Differential Revision: https://reviews.llvm.org/D61680

llvm-svn: 361527
This commit is contained in:
Robert Lougher 2019-05-23 18:15:12 +00:00
parent 784f34da60
commit a43ec506ab
2 changed files with 232 additions and 4 deletions

View File

@ -406,7 +406,10 @@ void X86AvoidSFBPass::buildCopy(MachineInstr *LoadInst, unsigned NLoadOpcode,
// If the load and store are consecutive, use the loadInst location to
// reduce register pressure.
MachineInstr *StInst = StoreInst;
if (StoreInst->getPrevNode() == LoadInst)
auto PrevInstrIt = skipDebugInstructionsBackward(
std::prev(MachineBasicBlock::instr_iterator(StoreInst)),
MBB->instr_begin());
if (PrevInstrIt.getNodePtr() == LoadInst)
StInst = LoadInst;
MachineInstr *NewStore =
BuildMI(*MBB, StInst, StInst->getDebugLoc(), TII->get(NStoreOpcode))
@ -491,19 +494,22 @@ void X86AvoidSFBPass::buildCopies(int Size, MachineInstr *LoadInst,
static void updateKillStatus(MachineInstr *LoadInst, MachineInstr *StoreInst) {
MachineOperand &LoadBase = getBaseOperand(LoadInst);
MachineOperand &StoreBase = getBaseOperand(StoreInst);
auto StorePrevNonDbgInstr = skipDebugInstructionsBackward(
std::prev(MachineBasicBlock::instr_iterator(StoreInst)),
LoadInst->getParent()->instr_begin()).getNodePtr();
if (LoadBase.isReg()) {
MachineInstr *LastLoad = LoadInst->getPrevNode();
// If the original load and store to xmm/ymm were consecutive
// then the partial copies were also created in
// a consecutive order to reduce register pressure,
// and the location of the last load is before the last store.
if (StoreInst->getPrevNode() == LoadInst)
if (StorePrevNonDbgInstr == LoadInst)
LastLoad = LoadInst->getPrevNode()->getPrevNode();
getBaseOperand(LastLoad).setIsKill(LoadBase.isKill());
}
if (StoreBase.isReg()) {
MachineInstr *StInst = StoreInst;
if (StoreInst->getPrevNode() == LoadInst)
if (StorePrevNonDbgInstr == LoadInst)
StInst = LoadInst;
getBaseOperand(StInst->getPrevNode()).setIsKill(StoreBase.isKill());
}
@ -530,7 +536,7 @@ void X86AvoidSFBPass::findPotentiallylBlockedCopies(MachineFunction &MF) {
if (!isPotentialBlockedMemCpyLd(MI.getOpcode()))
continue;
int DefVR = MI.getOperand(0).getReg();
if (!MRI->hasOneUse(DefVR))
if (!MRI->hasOneNonDBGUse(DefVR))
continue;
for (auto UI = MRI->use_nodbg_begin(DefVR), UE = MRI->use_nodbg_end();
UI != UE;) {

View File

@ -0,0 +1,222 @@
# RUN: llc %s -run-pass x86-avoid-SFB -mtriple=x86_64-unknown-linux-gnu -o - | FileCheck %s -check-prefixes DEBUG-LABEL,CHECK
# RUN: llc %s -run-pass x86-avoid-SFB -mtriple=x86_64-unknown-linux-gnu -o - | FileCheck %s -check-prefixes NODEBUG-LABEL,CHECK
#
# This was generated from:
#
# using alpha = float __attribute__((ext_vector_type(4)));
#
# void bravo(alpha * __restrict__ p1, alpha * __restrict__ p2) {
# char *p3 = (char *)p1;
# *p3 = 0;
# alpha t = *p1;
# *p2 = t;
# }
#
# Using the command line:
# clang -g -c 1.cpp -O2 -S -emit-llvm -fno-strict-aliasing --target=x86_64-unknown-unknown -o test.ll
# llc -stop-before=x86-avoid-SFB test.ll -o before.mir
--- |
; ModuleID = 'test.ll'
source_filename = "1.cpp"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
; Function Attrs: norecurse nounwind uwtable
define dso_local void @debug(<4 x float>* noalias nocapture %p1, <4 x float>* noalias nocapture %p2) local_unnamed_addr #0 !dbg !10 {
entry:
call void @llvm.dbg.value(metadata <4 x float>* %p1, metadata !21, metadata !DIExpression()), !dbg !25
call void @llvm.dbg.value(metadata <4 x float>* %p2, metadata !22, metadata !DIExpression()), !dbg !25
%0 = bitcast <4 x float>* %p1 to i8*, !dbg !26
call void @llvm.dbg.value(metadata i8* %0, metadata !23, metadata !DIExpression()), !dbg !25
store i8 0, i8* %0, align 1, !dbg !27
%1 = load <4 x float>, <4 x float>* %p1, align 16, !dbg !28
call void @llvm.dbg.value(metadata <4 x float> %1, metadata !24, metadata !DIExpression()), !dbg !25
store <4 x float> %1, <4 x float>* %p2, align 16, !dbg !29
ret void, !dbg !30
}
; Function Attrs: norecurse nounwind uwtable
define dso_local void @nodebug(<4 x float>* noalias nocapture %p1, <4 x float>* noalias nocapture %p2) local_unnamed_addr #0 {
entry:
%0 = bitcast <4 x float>* %p1 to i8*
store i8 0, i8* %0, align 1
%1 = load <4 x float>, <4 x float>* %p1, align 16
store <4 x float> %1, <4 x float>* %p2, align 16
ret void
}
; Function Attrs: nounwind readnone speculatable
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
; Function Attrs: nounwind
declare void @llvm.stackprotector(i8*, i8**) #2
attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!6, !7, !8}
!llvm.ident = !{!9}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 9.0.0 (https://github.com/llvm/llvm-project.git 9afc4764dd24bd2f23c44e51ad33f8e58234a8b6)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, nameTableKind: None)
!1 = !DIFile(filename: "1.cpp", directory: "C:\5CUsers\5Cgbdawsoc\5CDocuments\5Cllvm\5Cbg40969")
!2 = !{}
!3 = !{!4}
!4 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !5, size: 64)
!5 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
!6 = !{i32 2, !"Dwarf Version", i32 4}
!7 = !{i32 2, !"Debug Info Version", i32 3}
!8 = !{i32 1, !"wchar_size", i32 4}
!9 = !{!"clang version 9.0.0 (https://github.com/llvm/llvm-project.git 9afc4764dd24bd2f23c44e51ad33f8e58234a8b6)"}
!10 = distinct !DISubprogram(name: "bravo", linkageName: "_Z5bravoPDv4_fS0_", scope: !1, file: !1, line: 4, type: !11, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !20)
!11 = !DISubroutineType(types: !12)
!12 = !{null, !13, !13}
!13 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !14)
!14 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !15, size: 64)
!15 = !DIDerivedType(tag: DW_TAG_typedef, name: "alpha", file: !1, line: 2, baseType: !16)
!16 = !DICompositeType(tag: DW_TAG_array_type, baseType: !17, size: 128, flags: DIFlagVector, elements: !18)
!17 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
!18 = !{!19}
!19 = !DISubrange(count: 4)
!20 = !{!21, !22, !23, !24}
!21 = !DILocalVariable(name: "p1", arg: 1, scope: !10, file: !1, line: 4, type: !13)
!22 = !DILocalVariable(name: "p2", arg: 2, scope: !10, file: !1, line: 4, type: !13)
!23 = !DILocalVariable(name: "p3", scope: !10, file: !1, line: 5, type: !4)
!24 = !DILocalVariable(name: "t", scope: !10, file: !1, line: 7, type: !15)
!25 = !DILocation(line: 0, scope: !10)
!26 = !DILocation(line: 5, column: 14, scope: !10)
!27 = !DILocation(line: 6, column: 7, scope: !10)
!28 = !DILocation(line: 7, column: 13, scope: !10)
!29 = !DILocation(line: 8, column: 7, scope: !10)
!30 = !DILocation(line: 9, column: 1, scope: !10)
...
---
name: debug
alignment: 4
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers:
- { id: 0, class: gr64, preferred-register: '' }
- { id: 1, class: gr64, preferred-register: '' }
- { id: 2, class: vr128, preferred-register: '' }
liveins:
- { reg: '$rdi', virtual-reg: '%0' }
- { reg: '$rsi', virtual-reg: '%1' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
liveins: $rdi, $rsi
DBG_VALUE $rdi, $noreg, !21, !DIExpression(), debug-location !25
DBG_VALUE $rsi, $noreg, !22, !DIExpression(), debug-location !25
%1:gr64 = COPY $rsi
DBG_VALUE %1, $noreg, !22, !DIExpression(), debug-location !25
%0:gr64 = COPY $rdi
DBG_VALUE %0, $noreg, !21, !DIExpression(), debug-location !25
DBG_VALUE %0, $noreg, !23, !DIExpression(), debug-location !25
MOV8mi %0, 1, $noreg, 0, $noreg, 0, debug-location !27 :: (store 1 into %ir.0)
%2:vr128 = MOVAPSrm %0, 1, $noreg, 0, $noreg, debug-location !28 :: (load 16 from %ir.p1)
DBG_VALUE %2, $noreg, !24, !DIExpression(), debug-location !25
MOVAPSmr %1, 1, $noreg, 0, $noreg, killed %2, debug-location !29 :: (store 16 into %ir.p2)
RET 0, debug-location !30
...
---
name: nodebug
alignment: 4
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers:
- { id: 0, class: gr64, preferred-register: '' }
- { id: 1, class: gr64, preferred-register: '' }
- { id: 2, class: vr128, preferred-register: '' }
liveins:
- { reg: '$rdi', virtual-reg: '%0' }
- { reg: '$rsi', virtual-reg: '%1' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
liveins: $rdi, $rsi
%1:gr64 = COPY $rsi
%0:gr64 = COPY $rdi
MOV8mi %0, 1, $noreg, 0, $noreg, 0 :: (store 1 into %ir.0)
%2:vr128 = MOVAPSrm %0, 1, $noreg, 0, $noreg :: (load 16 from %ir.p1)
MOVAPSmr %1, 1, $noreg, 0, $noreg, killed %2 :: (store 16 into %ir.p2)
RET 0
; DEBUG-LABEL: name: debug
; NODEBUG-LABEL: name: nodebug
; CHECK: %1:gr64 = COPY
; CHECK: %0:gr64 = COPY
; CHECK: MOV8mi
; CHECK: %3:gr8 = MOV8rm
; CHECK: MOV8mr
; CHECK: %4:gr64 = MOV64rm
; CHECK: MOV64mr
; CHECK: %5:gr32 = MOV32rm
; CHECK: MOV32mr
; CHECK: %6:gr16 = MOV16rm
; CHECK: MOV16mr
; CHECK: %7:gr8 = MOV8rm
; CHECK: MOV8mr
; CHECK: RET 0
; DEBUG-LABEL: name: nodebug
...