mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-15 09:51:00 +00:00
Fix MO's analyzePhysReg, it was confusing sub- and super-registers. Problem pointed out by Michael Hordijk.
llvm-svn: 244120
This commit is contained in:
parent
49042becff
commit
e3b11e6e69
@ -310,7 +310,7 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
|
||||
if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg))
|
||||
continue;
|
||||
|
||||
bool IsRegOrSuperReg = MOReg == Reg || TRI->isSubRegister(MOReg, Reg);
|
||||
bool IsRegOrSuperReg = MOReg == Reg || TRI->isSuperRegister(MOReg, Reg);
|
||||
bool IsRegOrOverlapping = MOReg == Reg || TRI->regsOverlap(MOReg, Reg);
|
||||
|
||||
if (IsRegOrSuperReg && MO.readsReg()) {
|
||||
|
@ -3903,34 +3903,56 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
return;
|
||||
}
|
||||
|
||||
// Moving EFLAGS to / from another register requires a push and a pop.
|
||||
// Notice that we have to adjust the stack if we don't want to clobber the
|
||||
// first frame index. See X86FrameLowering.cpp - clobbersTheStack.
|
||||
if (SrcReg == X86::EFLAGS) {
|
||||
if (X86::GR64RegClass.contains(DestReg)) {
|
||||
BuildMI(MBB, MI, DL, get(X86::PUSHF64));
|
||||
BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
|
||||
return;
|
||||
bool FromEFLAGS = SrcReg == X86::EFLAGS;
|
||||
bool ToEFLAGS = DestReg == X86::EFLAGS;
|
||||
int Reg = FromEFLAGS ? DestReg : SrcReg;
|
||||
bool is32 = X86::GR32RegClass.contains(Reg);
|
||||
bool is64 = X86::GR64RegClass.contains(Reg);
|
||||
if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) {
|
||||
// The flags need to be saved, but saving EFLAGS with PUSHF/POPF is
|
||||
// inefficient. Instead:
|
||||
// - Save the overflow flag OF into AL using SETO, and restore it using a
|
||||
// signed 8-bit addition of AL and INT8_MAX.
|
||||
// - Save/restore the bottom 8 EFLAGS bits (CF, PF, AF, ZF, SF) to/from AH
|
||||
// using LAHF/SAHF.
|
||||
// - When RAX/EAX is live and isn't the destination register, make sure it
|
||||
// isn't clobbered by PUSH/POP'ing it before and after saving/restoring
|
||||
// the flags.
|
||||
// This approach is ~2.25x faster than using PUSHF/POPF.
|
||||
//
|
||||
// This is still somewhat inefficient because we don't know which flags are
|
||||
// actually live inside EFLAGS. Were we able to do a single SETcc instead of
|
||||
// SETO+LAHF / ADDB+SAHF the code could be 1.02x faster.
|
||||
//
|
||||
// Notice that we have to adjust the stack if we don't want to clobber the
|
||||
// first frame index. See X86FrameLowering.cpp - clobbersTheStack.
|
||||
|
||||
int Mov = is64 ? X86::MOV64rr : X86::MOV32rr;
|
||||
int Push = is64 ? X86::PUSH64r : X86::PUSH32r;
|
||||
int Pop = is64 ? X86::POP64r : X86::POP32r;
|
||||
int AX = is64 ? X86::RAX : X86::EAX;
|
||||
|
||||
bool AXDead = (Reg == AX) ||
|
||||
(MachineBasicBlock::LQR_Dead ==
|
||||
MBB.computeRegisterLiveness(&getRegisterInfo(), AX, MI));
|
||||
|
||||
if (!AXDead)
|
||||
BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true));
|
||||
if (FromEFLAGS) {
|
||||
BuildMI(MBB, MI, DL, get(X86::SETOr), X86::AL);
|
||||
BuildMI(MBB, MI, DL, get(X86::LAHF));
|
||||
BuildMI(MBB, MI, DL, get(Mov), Reg).addReg(AX);
|
||||
}
|
||||
if (X86::GR32RegClass.contains(DestReg)) {
|
||||
BuildMI(MBB, MI, DL, get(X86::PUSHF32));
|
||||
BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (DestReg == X86::EFLAGS) {
|
||||
if (X86::GR64RegClass.contains(SrcReg)) {
|
||||
BuildMI(MBB, MI, DL, get(X86::PUSH64r))
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
BuildMI(MBB, MI, DL, get(X86::POPF64));
|
||||
return;
|
||||
}
|
||||
if (X86::GR32RegClass.contains(SrcReg)) {
|
||||
BuildMI(MBB, MI, DL, get(X86::PUSH32r))
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
BuildMI(MBB, MI, DL, get(X86::POPF32));
|
||||
return;
|
||||
if (ToEFLAGS) {
|
||||
BuildMI(MBB, MI, DL, get(Mov), AX).addReg(Reg, getKillRegState(KillSrc));
|
||||
BuildMI(MBB, MI, DL, get(X86::ADD8ri), X86::AL)
|
||||
.addReg(X86::AL)
|
||||
.addImm(INT8_MAX);
|
||||
BuildMI(MBB, MI, DL, get(X86::SAHF));
|
||||
}
|
||||
if (!AXDead)
|
||||
BuildMI(MBB, MI, DL, get(Pop), AX);
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
|
||||
|
@ -1,24 +1,58 @@
|
||||
; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu %s -o - | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu %s -o - | FileCheck %s -check-prefix=i386
|
||||
; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=i386f
|
||||
; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s -check-prefix=x8664
|
||||
; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664
|
||||
|
||||
declare i32 @bar()
|
||||
declare i32 @foo()
|
||||
declare i32 @bar(i64)
|
||||
|
||||
define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) {
|
||||
; CHECK-LABEL: test_intervening_call:
|
||||
; CHECK: cmpxchg
|
||||
; CHECK: pushf[[LQ:[lq]]]
|
||||
; CHECK-NEXT: pop[[LQ]] [[FLAGS:%.*]]
|
||||
; i386-LABEL: test_intervening_call:
|
||||
; i386: cmpxchg8b
|
||||
; i386-NEXT: pushl %eax
|
||||
; i386-NEXT: seto %al
|
||||
; i386-NEXT: lahf
|
||||
; i386-NEXT: movl %eax, [[FLAGS:%.*]]
|
||||
; i386-NEXT: popl %eax
|
||||
; i386-NEXT: movl %edx, 4(%esp)
|
||||
; i386-NEXT: movl %eax, (%esp)
|
||||
; i386-NEXT: calll bar
|
||||
; i386-NEXT: movl [[FLAGS]], %eax
|
||||
; i386-NEXT: addb $127, %al
|
||||
; i386-NEXT: sahf
|
||||
; i386-NEXT: jne
|
||||
|
||||
; CHECK-NEXT: call[[LQ]] bar
|
||||
; i386f-LABEL: test_intervening_call:
|
||||
; i386f: cmpxchg8b
|
||||
; i386f-NEXT: movl %eax, (%esp)
|
||||
; i386f-NEXT: movl %edx, 4(%esp)
|
||||
; i386f-NEXT: seto %al
|
||||
; i386f-NEXT: lahf
|
||||
; i386f-NEXT: movl %eax, [[FLAGS:%.*]]
|
||||
; i386f-NEXT: calll bar
|
||||
; i386f-NEXT: movl [[FLAGS]], %eax
|
||||
; i386f-NEXT: addb $127, %al
|
||||
; i386f-NEXT: sahf
|
||||
; i386f-NEXT: jne
|
||||
|
||||
; x8664-LABEL: test_intervening_call:
|
||||
; x8664: cmpxchgq
|
||||
; x8664: pushq %rax
|
||||
; x8664-NEXT: seto %al
|
||||
; x8664-NEXT: lahf
|
||||
; x8664-NEXT: movq %rax, [[FLAGS:%.*]]
|
||||
; x8664-NEXT: popq %rax
|
||||
; x8664-NEXT: movq %rax, %rdi
|
||||
; x8664-NEXT: callq bar
|
||||
; x8664-NEXT: movq [[FLAGS]], %rax
|
||||
; x8664-NEXT: addb $127, %al
|
||||
; x8664-NEXT: sahf
|
||||
; x8664-NEXT: jne
|
||||
|
||||
; CHECK-NEXT: push[[LQ]] [[FLAGS]]
|
||||
; CHECK-NEXT: popf[[LQ]]
|
||||
; CHECK-NEXT: jne
|
||||
%cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
|
||||
%v = extractvalue { i64, i1 } %cx, 0
|
||||
%p = extractvalue { i64, i1 } %cx, 1
|
||||
call i32 @bar()
|
||||
call i32 @bar(i64 %v)
|
||||
br i1 %p, label %t, label %f
|
||||
|
||||
t:
|
||||
@ -30,10 +64,18 @@ f:
|
||||
|
||||
; Interesting in producing a clobber without any function calls.
|
||||
define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) {
|
||||
; CHECK-LABEL: test_control_flow:
|
||||
; i386-LABEL: test_control_flow:
|
||||
; i386: cmpxchg
|
||||
; i386-NEXT: jne
|
||||
|
||||
; i386f-LABEL: test_control_flow:
|
||||
; i386f: cmpxchg
|
||||
; i386f-NEXT: jne
|
||||
|
||||
; x8664-LABEL: test_control_flow:
|
||||
; x8664: cmpxchg
|
||||
; x8664-NEXT: jne
|
||||
|
||||
; CHECK: cmpxchg
|
||||
; CHECK-NEXT: jne
|
||||
entry:
|
||||
%cmp = icmp sgt i32 %i, %j
|
||||
br i1 %cmp, label %loop_start, label %cond.end
|
||||
@ -67,20 +109,46 @@ cond.end:
|
||||
; This one is an interesting case because CMOV doesn't have a chain
|
||||
; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here.
|
||||
define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) {
|
||||
; CHECK-LABEL: test_feed_cmov:
|
||||
; i386-LABEL: test_feed_cmov:
|
||||
; i386: cmpxchgl
|
||||
; i386-NEXT: seto %al
|
||||
; i386-NEXT: lahf
|
||||
; i386-NEXT: movl %eax, [[FLAGS:%.*]]
|
||||
; i386-NEXT: calll foo
|
||||
; i386-NEXT: pushl %eax
|
||||
; i386-NEXT: movl [[FLAGS]], %eax
|
||||
; i386-NEXT: addb $127, %al
|
||||
; i386-NEXT: sahf
|
||||
; i386-NEXT: popl %eax
|
||||
|
||||
; CHECK: cmpxchg
|
||||
; CHECK: pushf[[LQ:[lq]]]
|
||||
; CHECK-NEXT: pop[[LQ]] [[FLAGS:%.*]]
|
||||
; i386f-LABEL: test_feed_cmov:
|
||||
; i386f: cmpxchgl
|
||||
; i386f-NEXT: seto %al
|
||||
; i386f-NEXT: lahf
|
||||
; i386f-NEXT: movl %eax, [[FLAGS:%.*]]
|
||||
; i386f-NEXT: calll foo
|
||||
; i386f-NEXT: pushl %eax
|
||||
; i386f-NEXT: movl [[FLAGS]], %eax
|
||||
; i386f-NEXT: addb $127, %al
|
||||
; i386f-NEXT: sahf
|
||||
; i386f-NEXT: popl %eax
|
||||
|
||||
; CHECK-NEXT: call[[LQ]] bar
|
||||
; x8664-LABEL: test_feed_cmov:
|
||||
; x8664: cmpxchgl
|
||||
; x8664: seto %al
|
||||
; x8664-NEXT: lahf
|
||||
; x8664-NEXT: movq %rax, [[FLAGS:%.*]]
|
||||
; x8664-NEXT: callq foo
|
||||
; x8664-NEXT: pushq %rax
|
||||
; x8664-NEXT: movq [[FLAGS]], %rax
|
||||
; x8664-NEXT: addb $127, %al
|
||||
; x8664-NEXT: sahf
|
||||
; x8664-NEXT: popq %rax
|
||||
|
||||
; CHECK-NEXT: push[[LQ]] [[FLAGS]]
|
||||
; CHECK-NEXT: popf[[LQ]]
|
||||
%res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
|
||||
%success = extractvalue { i32, i1 } %res, 1
|
||||
|
||||
%rhs = call i32 @bar()
|
||||
%rhs = call i32 @foo()
|
||||
|
||||
%ret = select i1 %success, i32 %new, i32 %rhs
|
||||
ret i32 %ret
|
||||
|
Loading…
x
Reference in New Issue
Block a user