Add PPC 440 scheduler and some associated tests (new files)

llvm-svn: 142171
This commit is contained in:
Hal Finkel 2011-10-17 04:03:55 +00:00
parent 74543873a4
commit b128cda81b
3 changed files with 623 additions and 0 deletions

View File

@ -0,0 +1,568 @@
//===- PPCSchedule440.td - PPC 440 Scheduling Definitions ----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// Primary reference:
// PowerPC 440x6 Embedded Processor Core Users Manual.
// IBM (as updated in) 2010.
// The basic PPC 440 does not include a floating-point unit; the pipeline
// timings here are constructed to match the FP2 unit shipped with the
// PPC-440- and PPC-450-based Blue Gene (L and P) supercomputers.
// References:
// S. Chatterjee, et al. Design and exploitation of a high-performance
// SIMD floating-point unit for Blue Gene/L.
// IBM J. Res. & Dev. 49 (2/3) March/May 2005.
// also:
// Carlos Sosa and Brant Knudson. IBM System Blue Gene Solution:
// Blue Gene/P Application Development.
// IBM (as updated in) 2009.
//===----------------------------------------------------------------------===//
// Functional units on the PowerPC 440/450 chip sets
//
def IFTH1 : FuncUnit; // Fetch unit 1
def IFTH2 : FuncUnit; // Fetch unit 2
def PDCD1 : FuncUnit; // Decode unit 1
def PDCD2 : FuncUnit; // Decode unit 2
def DISS1 : FuncUnit; // Issue unit 1
def DISS2 : FuncUnit; // Issue unit 2
def LRACC : FuncUnit; // Register access and dispatch for
// the simple integer (J-pipe) and
// load/store (L-pipe) pipelines
def IRACC : FuncUnit; // Register access and dispatch for
// the complex integer (I-pipe) pipeline
def FRACC : FuncUnit; // Register access and dispatch for
// the floating-point execution (F-pipe) pipeline
def IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline
def IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline
def IWB : FuncUnit; // Write-back unit for the I pipeline
def JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline
def JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline
def JWB : FuncUnit; // Write-back unit for the J pipeline
def AGEN : FuncUnit; // Address generation for the L pipeline
def CRD : FuncUnit; // D-cache access for the L pipeline
def LWB : FuncUnit; // Write-back unit for the L pipeline
def FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline
def FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline
def FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline
def FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline
def FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline
def FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline
def FWB : FuncUnit; // Write-back unit for the F pipeline
def LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used
// to make sure that no lwarx/stwcx.
// instructions are issued while another
// lwarx/stwcx. is in the L pipe.
def GPR_Bypass : Bypass; // The bypass for general-purpose regs.
def FPR_Bypass : Bypass; // The bypass for floating-point regs.
// Notes:
// Instructions are held in the FRACC, LRACC and IRACC pipeline
// stages until their source operands become ready. Exceptions:
// - Store instructions will hold in the AGEN stage
// - The integer multiply-accumulate instruction will hold in
// the IEXE1 stage
//
// For most I-pipe operations, the result is available at the end of
// the IEXE1 stage. Operations such as multiply and divide must
// continue to execute in IEXE2 and IWB. Divide resides in IWB for
// 33 cycles (multiply also calculates its result in IWB). For all
// J-pipe instructions, the result is available
// at the end of the JEXE1 stage. Loads have a 3-cycle latency
// (data is not available until after the LWB stage).
//
// The L1 cache hit latency is four cycles for floating point loads
// and three cycles for integer loads.
//
// The stwcx. instruction requires both the LRACC and the IRACC
// dispatch stages. It must be issued from DISS0.
//
// All lwarx/stwcx. instructions hold in LRACC if another
// uncommitted lwarx/stwcx. is in AGEN, CRD, or LWB.
//
// msync (a.k.a. sync) and mbar will hold in LWB until all load/store
// resources are empty. AGEN and CRD are held empty until the msync/mbar
// commits.
//
// Most floating-point instructions, computational and move,
// have a 5-cycle latency. Divide takes longer (30 cycles). Instructions that
// update the CR take 2 cycles. Stores take 3 cycles and, as mentioned above,
// loads take 4 cycles (for L1 hit).
//
// This file defines the itinerary class data for the PPC 440 processor.
//
//===----------------------------------------------------------------------===//
def PPC440Itineraries : ProcessorItineraries<
[IFTH1, IFTH2, PDCD1, PDCD2, DISS1, DISS2, FRACC,
IRACC, IEXE1, IEXE2, IWB, LRACC, JEXE1, JEXE2, JWB, AGEN, CRD, LWB,
FEXE1, FEXE2, FEXE3, FEXE4, FEXE5, FEXE6, FWB, LWARX_Hold],
[GPR_Bypass, FPR_Bypass], [
InstrItinData<IntGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC, LRACC]>,
InstrStage<1, [IEXE1, JEXE1]>,
InstrStage<1, [IEXE2, JEXE2]>,
InstrStage<1, [IWB, JWB]>],
[6, 4, 4],
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntCompare , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC, LRACC]>,
InstrStage<1, [IEXE1, JEXE1]>,
InstrStage<1, [IEXE2, JEXE2]>,
InstrStage<1, [IWB, JWB]>],
[6, 4, 4],
[NoBypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntDivW , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<33, [IWB]>],
[40, 4, 4],
[NoBypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntMFFS , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<1, [IWB]>],
[7, 4, 4],
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntMTFSB0 , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<1, [IWB]>],
[7, 4, 4],
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntMulHW , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<1, [IWB]>],
[8, 4, 4],
[NoBypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntMulHWU , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<1, [IWB]>],
[8, 4, 4],
[NoBypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntMulLI , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<1, [IWB]>],
[8, 4, 4],
[NoBypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntRotate , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC, LRACC]>,
InstrStage<1, [IEXE1, JEXE1]>,
InstrStage<1, [IEXE2, JEXE2]>,
InstrStage<1, [IWB, JWB]>],
[6, 4, 4],
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntShift , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC, LRACC]>,
InstrStage<1, [IEXE1, JEXE1]>,
InstrStage<1, [IEXE2, JEXE2]>,
InstrStage<1, [IWB, JWB]>],
[6, 4, 4],
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntTrapW , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<1, [IWB]>],
[6, 4],
[GPR_Bypass, GPR_Bypass]>,
InstrItinData<BrB , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<1, [IWB]>],
[8, 4],
[NoBypass, GPR_Bypass]>,
InstrItinData<BrCR , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<1, [IWB]>],
[8, 4, 4],
[NoBypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<BrMCR , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<1, [IWB]>],
[8, 4, 4],
[NoBypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<BrMCRX , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<1, [IWB]>],
[8, 4, 4],
[NoBypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStDCBA , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [LRACC]>,
InstrStage<1, [AGEN]>,
InstrStage<1, [CRD]>,
InstrStage<1, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
InstrItinData<LdStDCBF , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [LRACC]>,
InstrStage<1, [AGEN]>,
InstrStage<1, [CRD]>,
InstrStage<1, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
InstrItinData<LdStDCBI , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [LRACC]>,
InstrStage<1, [AGEN]>,
InstrStage<1, [CRD]>,
InstrStage<1, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
InstrItinData<LdStGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [LRACC]>,
InstrStage<1, [AGEN]>,
InstrStage<1, [CRD]>,
InstrStage<2, [LWB]>],
[9, 5], // FIXME: should be [9, 5] for loads and
// [8, 5] for stores.
[NoBypass, GPR_Bypass]>,
InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [LRACC]>,
InstrStage<1, [AGEN]>,
InstrStage<1, [CRD]>,
InstrStage<1, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
InstrItinData<LdStUX , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [LRACC]>,
InstrStage<1, [AGEN]>,
InstrStage<1, [CRD]>,
InstrStage<1, [LWB]>],
[8, 5, 5],
[NoBypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStLFD , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [LRACC]>,
InstrStage<1, [AGEN]>,
InstrStage<1, [CRD]>,
InstrStage<2, [LWB]>],
[9, 5, 5],
[NoBypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStLFDU , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [LRACC]>,
InstrStage<1, [AGEN]>,
InstrStage<1, [CRD]>,
InstrStage<1, [LWB]>],
[9, 5, 5],
[NoBypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStLHA , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [LRACC]>,
InstrStage<1, [AGEN]>,
InstrStage<1, [CRD]>,
InstrStage<1, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
InstrItinData<LdStLMW , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [LRACC]>,
InstrStage<1, [AGEN]>,
InstrStage<1, [CRD]>,
InstrStage<1, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
InstrItinData<LdStLWARX , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1]>,
InstrStage<1, [IRACC], 0>,
InstrStage<4, [LWARX_Hold], 0>,
InstrStage<1, [LRACC]>,
InstrStage<1, [AGEN]>,
InstrStage<1, [CRD]>,
InstrStage<1, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
InstrItinData<LdStSTWCX , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1]>,
InstrStage<1, [IRACC], 0>,
InstrStage<4, [LWARX_Hold], 0>,
InstrStage<1, [LRACC]>,
InstrStage<1, [AGEN]>,
InstrStage<1, [CRD]>,
InstrStage<1, [LWB]>],
[8, 5],
[NoBypass, GPR_Bypass]>,
InstrItinData<LdStSync , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [LRACC]>,
InstrStage<3, [AGEN], 1>,
InstrStage<2, [CRD], 1>,
InstrStage<1, [LWB]>]>,
InstrItinData<SprISYNC , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [FRACC], 0>,
InstrStage<1, [LRACC], 0>,
InstrStage<1, [IRACC]>,
InstrStage<1, [FEXE1], 0>,
InstrStage<1, [AGEN], 0>,
InstrStage<1, [JEXE1], 0>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [FEXE2], 0>,
InstrStage<1, [CRD], 0>,
InstrStage<1, [JEXE2], 0>,
InstrStage<1, [IEXE2]>,
InstrStage<6, [FEXE3], 0>,
InstrStage<6, [LWB], 0>,
InstrStage<6, [JWB], 0>,
InstrStage<6, [IWB]>]>,
InstrItinData<SprMFSR , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<1, [IWB]>],
[6, 4],
[GPR_Bypass, GPR_Bypass]>,
InstrItinData<SprMTMSR , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<1, [IWB]>],
[6, 4],
[GPR_Bypass, GPR_Bypass]>,
InstrItinData<SprMTSR , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<3, [IWB]>],
[9, 4],
[NoBypass, GPR_Bypass]>,
InstrItinData<SprTLBSYNC , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<1, [IWB]>]>,
InstrItinData<SprMFCR , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<1, [IWB]>],
[8, 4],
[NoBypass, GPR_Bypass]>,
InstrItinData<SprMFMSR , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<1, [IWB]>],
[7, 4],
[GPR_Bypass, GPR_Bypass]>,
InstrItinData<SprMFSPR , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<3, [IWB]>],
[10, 4],
[NoBypass, GPR_Bypass]>,
InstrItinData<SprMFTB , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<3, [IWB]>],
[10, 4],
[NoBypass, GPR_Bypass]>,
InstrItinData<SprMTSPR , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<3, [IWB]>],
[10, 4],
[NoBypass, GPR_Bypass]>,
InstrItinData<SprMTSRIN , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<3, [IWB]>],
[10, 4],
[NoBypass, GPR_Bypass]>,
InstrItinData<SprRFI , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<1, [IWB]>],
[8, 4],
[NoBypass, GPR_Bypass]>,
InstrItinData<SprSC , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [IRACC]>,
InstrStage<1, [IEXE1]>,
InstrStage<1, [IEXE2]>,
InstrStage<1, [IWB]>],
[8, 4],
[NoBypass, GPR_Bypass]>,
InstrItinData<FPGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [FRACC]>,
InstrStage<1, [FEXE1]>,
InstrStage<1, [FEXE2]>,
InstrStage<1, [FEXE3]>,
InstrStage<1, [FEXE4]>,
InstrStage<1, [FEXE5]>,
InstrStage<1, [FEXE6]>,
InstrStage<1, [FWB]>],
[10, 4, 4],
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<FPCompare , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [FRACC]>,
InstrStage<1, [FEXE1]>,
InstrStage<1, [FEXE2]>,
InstrStage<1, [FEXE3]>,
InstrStage<1, [FEXE4]>,
InstrStage<1, [FEXE5]>,
InstrStage<1, [FEXE6]>,
InstrStage<1, [FWB]>],
[10, 4, 4],
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<FPDivD , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [FRACC]>,
InstrStage<1, [FEXE1]>,
InstrStage<1, [FEXE2]>,
InstrStage<1, [FEXE3]>,
InstrStage<1, [FEXE4]>,
InstrStage<1, [FEXE5]>,
InstrStage<1, [FEXE6]>,
InstrStage<25, [FWB]>],
[35, 4, 4],
[NoBypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<FPDivS , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [FRACC]>,
InstrStage<1, [FEXE1]>,
InstrStage<1, [FEXE2]>,
InstrStage<1, [FEXE3]>,
InstrStage<1, [FEXE4]>,
InstrStage<1, [FEXE5]>,
InstrStage<1, [FEXE6]>,
InstrStage<13, [FWB]>],
[23, 4, 4],
[NoBypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<FPFused , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [FRACC]>,
InstrStage<1, [FEXE1]>,
InstrStage<1, [FEXE2]>,
InstrStage<1, [FEXE3]>,
InstrStage<1, [FEXE4]>,
InstrStage<1, [FEXE5]>,
InstrStage<1, [FEXE6]>,
InstrStage<1, [FWB]>],
[10, 4, 4, 4],
[FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<FPRes , [InstrStage<1, [IFTH1, IFTH2]>,
InstrStage<1, [PDCD1, PDCD2]>,
InstrStage<1, [DISS1, DISS2]>,
InstrStage<1, [FRACC]>,
InstrStage<1, [FEXE1]>,
InstrStage<1, [FEXE2]>,
InstrStage<1, [FEXE3]>,
InstrStage<1, [FEXE4]>,
InstrStage<1, [FEXE5]>,
InstrStage<1, [FEXE6]>,
InstrStage<1, [FWB]>],
[10, 4],
[FPR_Bypass, FPR_Bypass]>
]>;

View File

@ -0,0 +1,32 @@
; RUN: llc < %s -march=ppc32 -mcpu=440 | grep fmadd
%0 = type { double, double }
define void @maybe_an_fma(%0* sret %agg.result, %0* byval %a, %0* byval %b, %0* byval %c) nounwind {
entry:
%a.realp = getelementptr inbounds %0* %a, i32 0, i32 0
%a.real = load double* %a.realp
%a.imagp = getelementptr inbounds %0* %a, i32 0, i32 1
%a.imag = load double* %a.imagp
%b.realp = getelementptr inbounds %0* %b, i32 0, i32 0
%b.real = load double* %b.realp
%b.imagp = getelementptr inbounds %0* %b, i32 0, i32 1
%b.imag = load double* %b.imagp
%mul.rl = fmul double %a.real, %b.real
%mul.rr = fmul double %a.imag, %b.imag
%mul.r = fsub double %mul.rl, %mul.rr
%mul.il = fmul double %a.imag, %b.real
%mul.ir = fmul double %a.real, %b.imag
%mul.i = fadd double %mul.il, %mul.ir
%c.realp = getelementptr inbounds %0* %c, i32 0, i32 0
%c.real = load double* %c.realp
%c.imagp = getelementptr inbounds %0* %c, i32 0, i32 1
%c.imag = load double* %c.imagp
%add.r = fadd double %mul.r, %c.real
%add.i = fadd double %mul.i, %c.imag
%real = getelementptr inbounds %0* %agg.result, i32 0, i32 0
%imag = getelementptr inbounds %0* %agg.result, i32 0, i32 1
store double %add.r, double* %real
store double %add.i, double* %imag
ret void
}

View File

@ -0,0 +1,23 @@
; RUN: llc < %s -march=ppc32 -o %t
; RUN: grep sync %t
; RUN: not grep msync %t
; RUN: llc < %s -march=ppc32 -mcpu=440 | grep msync
define i32 @has_a_fence(i32 %a, i32 %b) nounwind {
entry:
fence acquire
%cond = icmp eq i32 %a, %b
br i1 %cond, label %IfEqual, label %IfUnequal
IfEqual:
fence release
br label %end
IfUnequal:
fence release
ret i32 0
end:
ret i32 1
}