Cache the Function dependent subtarget on the MachineFunction.

As preparation for removing the getSubtargetImpl() call from
TargetMachine go ahead and flip the switch on caching the function
dependent subtarget and remove the bare getSubtargetImpl call
from the X86 port. As part of this add a few tests that show we
can generate code and assemble on X86 based on features/cpu on
the Function.

llvm-svn: 232879
This commit is contained in:
Eric Christopher 2015-03-21 03:13:10 +00:00
parent 3e8e6fb933
commit 3d3373d3e2
4 changed files with 108 additions and 2 deletions

View File

@ -54,7 +54,7 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
unsigned FunctionNum, MachineModuleInfo &mmi)
: Fn(F), Target(TM), STI(TM.getSubtargetImpl()), Ctx(mmi.getContext()),
: Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()),
MMI(mmi) {
if (STI->getRegisterInfo())
RegInfo = new (Allocator) MachineRegisterInfo(this);

View File

@ -33,7 +33,6 @@ public:
const TargetOptions &Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OL);
~X86TargetMachine() override;
const X86Subtarget *getSubtargetImpl() const override { return &Subtarget; }
const X86Subtarget *getSubtargetImpl(const Function &F) const override;
TargetIRAnalysis getTargetIRAnalysis() override;

View File

@ -0,0 +1,26 @@
; RUN: llc < %s -march=x86-64 -filetype=obj -o - | llvm-objdump -d - | FileCheck %s
; This test verifies that we assemble code for different architectures
; based on target-cpu and target-features attributes.
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @foo() #0 {
entry:
call void asm sideeffect "aeskeygenassist $$0x4, %xmm0, %xmm1", "~{dirflag},~{fpsr},~{flags}"()
ret void
}
; CHECK: foo
; CHECK: aeskeygenassist
define void @bar() #2 {
entry:
call void asm sideeffect "crc32b 4(%rbx), %eax", "~{dirflag},~{fpsr},~{flags}"()
ret void
}
; CHECK: bar
; CHECK: crc32b
attributes #0 = { "target-cpu"="x86-64" "target-features"="+avx2" }
attributes #2 = { "target-cpu"="corei7" "target-features"="+sse4.2" }

View File

@ -0,0 +1,81 @@
; RUN: llc < %s -march=x86-64 -o - | FileCheck %s
; This test verifies that we produce different code for different architectures
; based on target-cpu and target-features attributes.
; In this case avx has a vmovss instruction and otherwise we should be using movss
; to materialize constants.
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define float @_Z3barv() #0 {
entry:
ret float 4.000000e+00
}
; CHECK: barv
; CHECK: vmovss
define float @_Z4testv() #1 {
entry:
ret float 1.000000e+00
}
; CHECK: testv
; CHECK: movss
define float @_Z3foov() #2 {
entry:
ret float 4.000000e+00
}
; CHECK: foov
; CHECK: movss
define float @_Z3bazv() #0 {
entry:
ret float 4.000000e+00
}
; CHECK: bazv
; CHECK: vmovss
define <2 x i64> @foo(<2 x i64> %a) #3 {
entry:
%a.addr = alloca <2 x i64>, align 16
store <2 x i64> %a, <2 x i64>* %a.addr, align 16
%0 = load <2 x i64>, <2 x i64>* %a.addr, align 16
%1 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %0, i8 4)
ret <2 x i64> %1
}
; Function Attrs: nounwind readnone
declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8)
; CHECK: foo
; CHECK: aeskeygenassist
; Function Attrs: nounwind uwtable
define i32 @bar(i32 %crc, i8* %a) #3 {
entry:
%crc.addr = alloca i32, align 4
%a.addr = alloca i8*, align 8
store i32 %crc, i32* %crc.addr, align 4
store i8* %a, i8** %a.addr, align 8
%0 = load i32, i32* %crc.addr, align 4
%1 = load i8*, i8** %a.addr, align 8
%incdec.ptr = getelementptr inbounds i8, i8* %1, i32 1
store i8* %incdec.ptr, i8** %a.addr, align 8
%2 = load i8, i8* %1, align 1
%3 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %0, i8 %2)
ret i32 %3
}
; Function Attrs: nounwind readnone
declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8)
; CHECK: bar
; CHECK: crc32b
attributes #0 = { "target-cpu"="x86-64" "target-features"="+avx2" }
attributes #1 = { "target-cpu"="x86-64" }
attributes #2 = { "target-cpu"="corei7" "target-features"="+sse4.2" }
attributes #3 = { "target-cpu"="x86-64" "target-features"="+avx2,+aes" }