llvm-mirror/lib/Target/CellSPU/SPUMathInstr.td
Kalle Raiskila 8b6f5df4ae Remove all traces of v2[i,f]32 on SPU.
The "half vectors" are now widened to full size by the legalizer.
The only exception is in parameter passing, where half vectors are 
expanded. This causes changes to some dejagnu tests.

llvm-svn: 111360
2010-08-18 10:04:39 +00:00

98 lines
4.4 KiB
TableGen

//======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
//
// Cell SPU math operations
//
// This target description file contains instruction sequences for various
// math operations, such as vector multiplies, i32 multiply, etc., for the
// SPU's i32, i16 i8 and corresponding vector types.
//
// Any resemblance to libsimdmath or the Cell SDK simdmath library is
// purely and completely coincidental.
//===----------------------------------------------------------------------===//
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// v16i8 multiply instruction sequence:
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
(ORv4i32
(ANDv4i32
(SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
(SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
(ROTMAHIv8i16 VECREG:$rB, 8)), 8),
(FSMBIv8i16 0x2222)),
(ILAv4i32 0x0000ffff)),
(SHLIv4i32
(SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
(ROTMAIv4i32_i32 VECREG:$rB, 16)),
(SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
(ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
(FSMBIv8i16 0x2222)), 16))>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// v8i16 multiply instruction sequence:
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
(SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
(SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
(FSMBIv8i16 0xcccc))>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// v4i32, i32 multiply instruction sequence:
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def MPYv4i32:
Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
(Av4i32
(v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)),
(v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
(v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;
def MPYi32:
Pat<(mul R32C:$rA, R32C:$rB),
(Ar32
(Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
(MPYHr32 R32C:$rB, R32C:$rA)),
(MPYUr32 R32C:$rA, R32C:$rB))>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// f32, v4f32 divide instruction sequence:
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// Reciprocal estimate and interpolation
def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
// Division estimate
def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
// Newton-Raphson iteration
def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
Interpf32.Fragment,
DivEstf32.Fragment)>;
// Epsilon addition
def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
(SELBf32_cond NRaphf32.Fragment,
Epsilonf32.Fragment,
(CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
// Reciprocal estimate and interpolation
def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
// Division estimate
def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
// Newton-Raphson iteration
def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
(v4f32 VECREG:$rB),
(v4f32 VECREG:$rA)),
Interpv4f32.Fragment,
DivEstv4f32.Fragment)>;
// Epsilon addition
def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
(SELBv4f32_cond NRaphv4f32.Fragment,
Epsilonv4f32.Fragment,
(CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
Epsilonv4f32.Fragment,
(v4f32 VECREG:$rA)), -1))>;