mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-13 22:58:50 +00:00
[NVPTX] Add more surface/texture intrinsics, including CUDA unified texture fetch
This also uses TSFlags to mark machine instructions that are surface/texture accesses, as well as the vector width for surface operations. This is used to simplify some of the switch statements that need to detect surface/texture instructions llvm-svn: 213256
This commit is contained in:
parent
48ae22e14a
commit
9c3e284e16
File diff suppressed because it is too large
Load Diff
@ -84,6 +84,17 @@ __attribute__((unused))
|
||||
#endif
|
||||
static const char *NamedMDForAnnotations = "nvvm.annotations";
|
||||
|
||||
namespace NVPTXII {
|
||||
enum {
|
||||
// These must be kept in sync with TSFlags in NVPTXInstrFormats.td
|
||||
IsTexFlag = 0x80,
|
||||
IsSuldMask = 0x300,
|
||||
IsSuldShift = 8,
|
||||
IsSustFlag = 0x400,
|
||||
IsSurfTexQueryFlag = 0x800,
|
||||
IsTexModeUnifiedFlag = 0x1000
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -330,253 +330,51 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
|
||||
unsigned OpNo, MCOperand &MCOp) {
|
||||
const MachineOperand &MO = MI->getOperand(OpNo);
|
||||
const MCInstrDesc &MCID = MI->getDesc();
|
||||
|
||||
switch (MI->getOpcode()) {
|
||||
default: return false;
|
||||
case NVPTX::TEX_1D_F32_I32:
|
||||
case NVPTX::TEX_1D_F32_F32:
|
||||
case NVPTX::TEX_1D_F32_F32_LEVEL:
|
||||
case NVPTX::TEX_1D_F32_F32_GRAD:
|
||||
case NVPTX::TEX_1D_I32_I32:
|
||||
case NVPTX::TEX_1D_I32_F32:
|
||||
case NVPTX::TEX_1D_I32_F32_LEVEL:
|
||||
case NVPTX::TEX_1D_I32_F32_GRAD:
|
||||
case NVPTX::TEX_1D_ARRAY_F32_I32:
|
||||
case NVPTX::TEX_1D_ARRAY_F32_F32:
|
||||
case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
|
||||
case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
|
||||
case NVPTX::TEX_1D_ARRAY_I32_I32:
|
||||
case NVPTX::TEX_1D_ARRAY_I32_F32:
|
||||
case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
|
||||
case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
|
||||
case NVPTX::TEX_2D_F32_I32:
|
||||
case NVPTX::TEX_2D_F32_F32:
|
||||
case NVPTX::TEX_2D_F32_F32_LEVEL:
|
||||
case NVPTX::TEX_2D_F32_F32_GRAD:
|
||||
case NVPTX::TEX_2D_I32_I32:
|
||||
case NVPTX::TEX_2D_I32_F32:
|
||||
case NVPTX::TEX_2D_I32_F32_LEVEL:
|
||||
case NVPTX::TEX_2D_I32_F32_GRAD:
|
||||
case NVPTX::TEX_2D_ARRAY_F32_I32:
|
||||
case NVPTX::TEX_2D_ARRAY_F32_F32:
|
||||
case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
|
||||
case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
|
||||
case NVPTX::TEX_2D_ARRAY_I32_I32:
|
||||
case NVPTX::TEX_2D_ARRAY_I32_F32:
|
||||
case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
|
||||
case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
|
||||
case NVPTX::TEX_3D_F32_I32:
|
||||
case NVPTX::TEX_3D_F32_F32:
|
||||
case NVPTX::TEX_3D_F32_F32_LEVEL:
|
||||
case NVPTX::TEX_3D_F32_F32_GRAD:
|
||||
case NVPTX::TEX_3D_I32_I32:
|
||||
case NVPTX::TEX_3D_I32_F32:
|
||||
case NVPTX::TEX_3D_I32_F32_LEVEL:
|
||||
case NVPTX::TEX_3D_I32_F32_GRAD:
|
||||
{
|
||||
if (MCID.TSFlags & NVPTXII::IsTexFlag) {
|
||||
// This is a texture fetch, so operand 4 is a texref and operand 5 is
|
||||
// a samplerref
|
||||
if (OpNo == 4) {
|
||||
if (OpNo == 4 && MO.isImm()) {
|
||||
lowerImageHandleSymbol(MO.getImm(), MCOp);
|
||||
return true;
|
||||
}
|
||||
if (OpNo == 5) {
|
||||
if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
|
||||
lowerImageHandleSymbol(MO.getImm(), MCOp);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
case NVPTX::SULD_1D_I8_TRAP:
|
||||
case NVPTX::SULD_1D_I16_TRAP:
|
||||
case NVPTX::SULD_1D_I32_TRAP:
|
||||
case NVPTX::SULD_1D_ARRAY_I8_TRAP:
|
||||
case NVPTX::SULD_1D_ARRAY_I16_TRAP:
|
||||
case NVPTX::SULD_1D_ARRAY_I32_TRAP:
|
||||
case NVPTX::SULD_2D_I8_TRAP:
|
||||
case NVPTX::SULD_2D_I16_TRAP:
|
||||
case NVPTX::SULD_2D_I32_TRAP:
|
||||
case NVPTX::SULD_2D_ARRAY_I8_TRAP:
|
||||
case NVPTX::SULD_2D_ARRAY_I16_TRAP:
|
||||
case NVPTX::SULD_2D_ARRAY_I32_TRAP:
|
||||
case NVPTX::SULD_3D_I8_TRAP:
|
||||
case NVPTX::SULD_3D_I16_TRAP:
|
||||
case NVPTX::SULD_3D_I32_TRAP: {
|
||||
// This is a V1 surface load, so operand 1 is a surfref
|
||||
if (OpNo == 1) {
|
||||
} else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
|
||||
unsigned VecSize =
|
||||
1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
|
||||
|
||||
// For a surface load of vector size N, the Nth operand will be the surfref
|
||||
if (OpNo == VecSize && MO.isImm()) {
|
||||
lowerImageHandleSymbol(MO.getImm(), MCOp);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
case NVPTX::SULD_1D_V2I8_TRAP:
|
||||
case NVPTX::SULD_1D_V2I16_TRAP:
|
||||
case NVPTX::SULD_1D_V2I32_TRAP:
|
||||
case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
|
||||
case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
|
||||
case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
|
||||
case NVPTX::SULD_2D_V2I8_TRAP:
|
||||
case NVPTX::SULD_2D_V2I16_TRAP:
|
||||
case NVPTX::SULD_2D_V2I32_TRAP:
|
||||
case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
|
||||
case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
|
||||
case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
|
||||
case NVPTX::SULD_3D_V2I8_TRAP:
|
||||
case NVPTX::SULD_3D_V2I16_TRAP:
|
||||
case NVPTX::SULD_3D_V2I32_TRAP: {
|
||||
// This is a V2 surface load, so operand 2 is a surfref
|
||||
if (OpNo == 2) {
|
||||
lowerImageHandleSymbol(MO.getImm(), MCOp);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
case NVPTX::SULD_1D_V4I8_TRAP:
|
||||
case NVPTX::SULD_1D_V4I16_TRAP:
|
||||
case NVPTX::SULD_1D_V4I32_TRAP:
|
||||
case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
|
||||
case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
|
||||
case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
|
||||
case NVPTX::SULD_2D_V4I8_TRAP:
|
||||
case NVPTX::SULD_2D_V4I16_TRAP:
|
||||
case NVPTX::SULD_2D_V4I32_TRAP:
|
||||
case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
|
||||
case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
|
||||
case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
|
||||
case NVPTX::SULD_3D_V4I8_TRAP:
|
||||
case NVPTX::SULD_3D_V4I16_TRAP:
|
||||
case NVPTX::SULD_3D_V4I32_TRAP: {
|
||||
// This is a V4 surface load, so operand 4 is a surfref
|
||||
if (OpNo == 4) {
|
||||
lowerImageHandleSymbol(MO.getImm(), MCOp);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
case NVPTX::SUST_B_1D_B8_TRAP:
|
||||
case NVPTX::SUST_B_1D_B16_TRAP:
|
||||
case NVPTX::SUST_B_1D_B32_TRAP:
|
||||
case NVPTX::SUST_B_1D_V2B8_TRAP:
|
||||
case NVPTX::SUST_B_1D_V2B16_TRAP:
|
||||
case NVPTX::SUST_B_1D_V2B32_TRAP:
|
||||
case NVPTX::SUST_B_1D_V4B8_TRAP:
|
||||
case NVPTX::SUST_B_1D_V4B16_TRAP:
|
||||
case NVPTX::SUST_B_1D_V4B32_TRAP:
|
||||
case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
|
||||
case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
|
||||
case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
|
||||
case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
|
||||
case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
|
||||
case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
|
||||
case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
|
||||
case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
|
||||
case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
|
||||
case NVPTX::SUST_B_2D_B8_TRAP:
|
||||
case NVPTX::SUST_B_2D_B16_TRAP:
|
||||
case NVPTX::SUST_B_2D_B32_TRAP:
|
||||
case NVPTX::SUST_B_2D_V2B8_TRAP:
|
||||
case NVPTX::SUST_B_2D_V2B16_TRAP:
|
||||
case NVPTX::SUST_B_2D_V2B32_TRAP:
|
||||
case NVPTX::SUST_B_2D_V4B8_TRAP:
|
||||
case NVPTX::SUST_B_2D_V4B16_TRAP:
|
||||
case NVPTX::SUST_B_2D_V4B32_TRAP:
|
||||
case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
|
||||
case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
|
||||
case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
|
||||
case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
|
||||
case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
|
||||
case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
|
||||
case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
|
||||
case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
|
||||
case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
|
||||
case NVPTX::SUST_B_3D_B8_TRAP:
|
||||
case NVPTX::SUST_B_3D_B16_TRAP:
|
||||
case NVPTX::SUST_B_3D_B32_TRAP:
|
||||
case NVPTX::SUST_B_3D_V2B8_TRAP:
|
||||
case NVPTX::SUST_B_3D_V2B16_TRAP:
|
||||
case NVPTX::SUST_B_3D_V2B32_TRAP:
|
||||
case NVPTX::SUST_B_3D_V4B8_TRAP:
|
||||
case NVPTX::SUST_B_3D_V4B16_TRAP:
|
||||
case NVPTX::SUST_B_3D_V4B32_TRAP:
|
||||
case NVPTX::SUST_P_1D_B8_TRAP:
|
||||
case NVPTX::SUST_P_1D_B16_TRAP:
|
||||
case NVPTX::SUST_P_1D_B32_TRAP:
|
||||
case NVPTX::SUST_P_1D_V2B8_TRAP:
|
||||
case NVPTX::SUST_P_1D_V2B16_TRAP:
|
||||
case NVPTX::SUST_P_1D_V2B32_TRAP:
|
||||
case NVPTX::SUST_P_1D_V4B8_TRAP:
|
||||
case NVPTX::SUST_P_1D_V4B16_TRAP:
|
||||
case NVPTX::SUST_P_1D_V4B32_TRAP:
|
||||
case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
|
||||
case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
|
||||
case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
|
||||
case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
|
||||
case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
|
||||
case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
|
||||
case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
|
||||
case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
|
||||
case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
|
||||
case NVPTX::SUST_P_2D_B8_TRAP:
|
||||
case NVPTX::SUST_P_2D_B16_TRAP:
|
||||
case NVPTX::SUST_P_2D_B32_TRAP:
|
||||
case NVPTX::SUST_P_2D_V2B8_TRAP:
|
||||
case NVPTX::SUST_P_2D_V2B16_TRAP:
|
||||
case NVPTX::SUST_P_2D_V2B32_TRAP:
|
||||
case NVPTX::SUST_P_2D_V4B8_TRAP:
|
||||
case NVPTX::SUST_P_2D_V4B16_TRAP:
|
||||
case NVPTX::SUST_P_2D_V4B32_TRAP:
|
||||
case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
|
||||
case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
|
||||
case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
|
||||
case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
|
||||
case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
|
||||
case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
|
||||
case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
|
||||
case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
|
||||
case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
|
||||
case NVPTX::SUST_P_3D_B8_TRAP:
|
||||
case NVPTX::SUST_P_3D_B16_TRAP:
|
||||
case NVPTX::SUST_P_3D_B32_TRAP:
|
||||
case NVPTX::SUST_P_3D_V2B8_TRAP:
|
||||
case NVPTX::SUST_P_3D_V2B16_TRAP:
|
||||
case NVPTX::SUST_P_3D_V2B32_TRAP:
|
||||
case NVPTX::SUST_P_3D_V4B8_TRAP:
|
||||
case NVPTX::SUST_P_3D_V4B16_TRAP:
|
||||
case NVPTX::SUST_P_3D_V4B32_TRAP: {
|
||||
} else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
|
||||
// This is a surface store, so operand 0 is a surfref
|
||||
if (OpNo == 0) {
|
||||
if (OpNo == 0 && MO.isImm()) {
|
||||
lowerImageHandleSymbol(MO.getImm(), MCOp);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
case NVPTX::TXQ_CHANNEL_ORDER:
|
||||
case NVPTX::TXQ_CHANNEL_DATA_TYPE:
|
||||
case NVPTX::TXQ_WIDTH:
|
||||
case NVPTX::TXQ_HEIGHT:
|
||||
case NVPTX::TXQ_DEPTH:
|
||||
case NVPTX::TXQ_ARRAY_SIZE:
|
||||
case NVPTX::TXQ_NUM_SAMPLES:
|
||||
case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
|
||||
case NVPTX::SUQ_CHANNEL_ORDER:
|
||||
case NVPTX::SUQ_CHANNEL_DATA_TYPE:
|
||||
case NVPTX::SUQ_WIDTH:
|
||||
case NVPTX::SUQ_HEIGHT:
|
||||
case NVPTX::SUQ_DEPTH:
|
||||
case NVPTX::SUQ_ARRAY_SIZE: {
|
||||
} else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
|
||||
// This is a query, so operand 1 is a surfref/texref
|
||||
if (OpNo == 1) {
|
||||
if (OpNo == 1 && MO.isImm()) {
|
||||
lowerImageHandleSymbol(MO.getImm(), MCOp);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -77,54 +77,244 @@ enum NodeType {
|
||||
StoreRetvalV4,
|
||||
|
||||
// Texture intrinsics
|
||||
Tex1DFloatI32,
|
||||
Tex1DFloatS32,
|
||||
Tex1DFloatFloat,
|
||||
Tex1DFloatFloatLevel,
|
||||
Tex1DFloatFloatGrad,
|
||||
Tex1DI32I32,
|
||||
Tex1DI32Float,
|
||||
Tex1DI32FloatLevel,
|
||||
Tex1DI32FloatGrad,
|
||||
Tex1DArrayFloatI32,
|
||||
Tex1DS32S32,
|
||||
Tex1DS32Float,
|
||||
Tex1DS32FloatLevel,
|
||||
Tex1DS32FloatGrad,
|
||||
Tex1DU32S32,
|
||||
Tex1DU32Float,
|
||||
Tex1DU32FloatLevel,
|
||||
Tex1DU32FloatGrad,
|
||||
Tex1DArrayFloatS32,
|
||||
Tex1DArrayFloatFloat,
|
||||
Tex1DArrayFloatFloatLevel,
|
||||
Tex1DArrayFloatFloatGrad,
|
||||
Tex1DArrayI32I32,
|
||||
Tex1DArrayI32Float,
|
||||
Tex1DArrayI32FloatLevel,
|
||||
Tex1DArrayI32FloatGrad,
|
||||
Tex2DFloatI32,
|
||||
Tex1DArrayS32S32,
|
||||
Tex1DArrayS32Float,
|
||||
Tex1DArrayS32FloatLevel,
|
||||
Tex1DArrayS32FloatGrad,
|
||||
Tex1DArrayU32S32,
|
||||
Tex1DArrayU32Float,
|
||||
Tex1DArrayU32FloatLevel,
|
||||
Tex1DArrayU32FloatGrad,
|
||||
Tex2DFloatS32,
|
||||
Tex2DFloatFloat,
|
||||
Tex2DFloatFloatLevel,
|
||||
Tex2DFloatFloatGrad,
|
||||
Tex2DI32I32,
|
||||
Tex2DI32Float,
|
||||
Tex2DI32FloatLevel,
|
||||
Tex2DI32FloatGrad,
|
||||
Tex2DArrayFloatI32,
|
||||
Tex2DS32S32,
|
||||
Tex2DS32Float,
|
||||
Tex2DS32FloatLevel,
|
||||
Tex2DS32FloatGrad,
|
||||
Tex2DU32S32,
|
||||
Tex2DU32Float,
|
||||
Tex2DU32FloatLevel,
|
||||
Tex2DU32FloatGrad,
|
||||
Tex2DArrayFloatS32,
|
||||
Tex2DArrayFloatFloat,
|
||||
Tex2DArrayFloatFloatLevel,
|
||||
Tex2DArrayFloatFloatGrad,
|
||||
Tex2DArrayI32I32,
|
||||
Tex2DArrayI32Float,
|
||||
Tex2DArrayI32FloatLevel,
|
||||
Tex2DArrayI32FloatGrad,
|
||||
Tex3DFloatI32,
|
||||
Tex2DArrayS32S32,
|
||||
Tex2DArrayS32Float,
|
||||
Tex2DArrayS32FloatLevel,
|
||||
Tex2DArrayS32FloatGrad,
|
||||
Tex2DArrayU32S32,
|
||||
Tex2DArrayU32Float,
|
||||
Tex2DArrayU32FloatLevel,
|
||||
Tex2DArrayU32FloatGrad,
|
||||
Tex3DFloatS32,
|
||||
Tex3DFloatFloat,
|
||||
Tex3DFloatFloatLevel,
|
||||
Tex3DFloatFloatGrad,
|
||||
Tex3DI32I32,
|
||||
Tex3DI32Float,
|
||||
Tex3DI32FloatLevel,
|
||||
Tex3DI32FloatGrad,
|
||||
Tex3DS32S32,
|
||||
Tex3DS32Float,
|
||||
Tex3DS32FloatLevel,
|
||||
Tex3DS32FloatGrad,
|
||||
Tex3DU32S32,
|
||||
Tex3DU32Float,
|
||||
Tex3DU32FloatLevel,
|
||||
Tex3DU32FloatGrad,
|
||||
TexCubeFloatFloat,
|
||||
TexCubeFloatFloatLevel,
|
||||
TexCubeS32Float,
|
||||
TexCubeS32FloatLevel,
|
||||
TexCubeU32Float,
|
||||
TexCubeU32FloatLevel,
|
||||
TexCubeArrayFloatFloat,
|
||||
TexCubeArrayFloatFloatLevel,
|
||||
TexCubeArrayS32Float,
|
||||
TexCubeArrayS32FloatLevel,
|
||||
TexCubeArrayU32Float,
|
||||
TexCubeArrayU32FloatLevel,
|
||||
Tld4R2DFloatFloat,
|
||||
Tld4G2DFloatFloat,
|
||||
Tld4B2DFloatFloat,
|
||||
Tld4A2DFloatFloat,
|
||||
Tld4R2DS64Float,
|
||||
Tld4G2DS64Float,
|
||||
Tld4B2DS64Float,
|
||||
Tld4A2DS64Float,
|
||||
Tld4R2DU64Float,
|
||||
Tld4G2DU64Float,
|
||||
Tld4B2DU64Float,
|
||||
Tld4A2DU64Float,
|
||||
TexUnified1DFloatS32,
|
||||
TexUnified1DFloatFloat,
|
||||
TexUnified1DFloatFloatLevel,
|
||||
TexUnified1DFloatFloatGrad,
|
||||
TexUnified1DS32S32,
|
||||
TexUnified1DS32Float,
|
||||
TexUnified1DS32FloatLevel,
|
||||
TexUnified1DS32FloatGrad,
|
||||
TexUnified1DU32S32,
|
||||
TexUnified1DU32Float,
|
||||
TexUnified1DU32FloatLevel,
|
||||
TexUnified1DU32FloatGrad,
|
||||
TexUnified1DArrayFloatS32,
|
||||
TexUnified1DArrayFloatFloat,
|
||||
TexUnified1DArrayFloatFloatLevel,
|
||||
TexUnified1DArrayFloatFloatGrad,
|
||||
TexUnified1DArrayS32S32,
|
||||
TexUnified1DArrayS32Float,
|
||||
TexUnified1DArrayS32FloatLevel,
|
||||
TexUnified1DArrayS32FloatGrad,
|
||||
TexUnified1DArrayU32S32,
|
||||
TexUnified1DArrayU32Float,
|
||||
TexUnified1DArrayU32FloatLevel,
|
||||
TexUnified1DArrayU32FloatGrad,
|
||||
TexUnified2DFloatS32,
|
||||
TexUnified2DFloatFloat,
|
||||
TexUnified2DFloatFloatLevel,
|
||||
TexUnified2DFloatFloatGrad,
|
||||
TexUnified2DS32S32,
|
||||
TexUnified2DS32Float,
|
||||
TexUnified2DS32FloatLevel,
|
||||
TexUnified2DS32FloatGrad,
|
||||
TexUnified2DU32S32,
|
||||
TexUnified2DU32Float,
|
||||
TexUnified2DU32FloatLevel,
|
||||
TexUnified2DU32FloatGrad,
|
||||
TexUnified2DArrayFloatS32,
|
||||
TexUnified2DArrayFloatFloat,
|
||||
TexUnified2DArrayFloatFloatLevel,
|
||||
TexUnified2DArrayFloatFloatGrad,
|
||||
TexUnified2DArrayS32S32,
|
||||
TexUnified2DArrayS32Float,
|
||||
TexUnified2DArrayS32FloatLevel,
|
||||
TexUnified2DArrayS32FloatGrad,
|
||||
TexUnified2DArrayU32S32,
|
||||
TexUnified2DArrayU32Float,
|
||||
TexUnified2DArrayU32FloatLevel,
|
||||
TexUnified2DArrayU32FloatGrad,
|
||||
TexUnified3DFloatS32,
|
||||
TexUnified3DFloatFloat,
|
||||
TexUnified3DFloatFloatLevel,
|
||||
TexUnified3DFloatFloatGrad,
|
||||
TexUnified3DS32S32,
|
||||
TexUnified3DS32Float,
|
||||
TexUnified3DS32FloatLevel,
|
||||
TexUnified3DS32FloatGrad,
|
||||
TexUnified3DU32S32,
|
||||
TexUnified3DU32Float,
|
||||
TexUnified3DU32FloatLevel,
|
||||
TexUnified3DU32FloatGrad,
|
||||
TexUnifiedCubeFloatFloat,
|
||||
TexUnifiedCubeFloatFloatLevel,
|
||||
TexUnifiedCubeS32Float,
|
||||
TexUnifiedCubeS32FloatLevel,
|
||||
TexUnifiedCubeU32Float,
|
||||
TexUnifiedCubeU32FloatLevel,
|
||||
TexUnifiedCubeArrayFloatFloat,
|
||||
TexUnifiedCubeArrayFloatFloatLevel,
|
||||
TexUnifiedCubeArrayS32Float,
|
||||
TexUnifiedCubeArrayS32FloatLevel,
|
||||
TexUnifiedCubeArrayU32Float,
|
||||
TexUnifiedCubeArrayU32FloatLevel,
|
||||
Tld4UnifiedR2DFloatFloat,
|
||||
Tld4UnifiedG2DFloatFloat,
|
||||
Tld4UnifiedB2DFloatFloat,
|
||||
Tld4UnifiedA2DFloatFloat,
|
||||
Tld4UnifiedR2DS64Float,
|
||||
Tld4UnifiedG2DS64Float,
|
||||
Tld4UnifiedB2DS64Float,
|
||||
Tld4UnifiedA2DS64Float,
|
||||
Tld4UnifiedR2DU64Float,
|
||||
Tld4UnifiedG2DU64Float,
|
||||
Tld4UnifiedB2DU64Float,
|
||||
Tld4UnifiedA2DU64Float,
|
||||
|
||||
// Surface intrinsics
|
||||
Suld1DI8Clamp,
|
||||
Suld1DI16Clamp,
|
||||
Suld1DI32Clamp,
|
||||
Suld1DI64Clamp,
|
||||
Suld1DV2I8Clamp,
|
||||
Suld1DV2I16Clamp,
|
||||
Suld1DV2I32Clamp,
|
||||
Suld1DV2I64Clamp,
|
||||
Suld1DV4I8Clamp,
|
||||
Suld1DV4I16Clamp,
|
||||
Suld1DV4I32Clamp,
|
||||
|
||||
Suld1DArrayI8Clamp,
|
||||
Suld1DArrayI16Clamp,
|
||||
Suld1DArrayI32Clamp,
|
||||
Suld1DArrayI64Clamp,
|
||||
Suld1DArrayV2I8Clamp,
|
||||
Suld1DArrayV2I16Clamp,
|
||||
Suld1DArrayV2I32Clamp,
|
||||
Suld1DArrayV2I64Clamp,
|
||||
Suld1DArrayV4I8Clamp,
|
||||
Suld1DArrayV4I16Clamp,
|
||||
Suld1DArrayV4I32Clamp,
|
||||
|
||||
Suld2DI8Clamp,
|
||||
Suld2DI16Clamp,
|
||||
Suld2DI32Clamp,
|
||||
Suld2DI64Clamp,
|
||||
Suld2DV2I8Clamp,
|
||||
Suld2DV2I16Clamp,
|
||||
Suld2DV2I32Clamp,
|
||||
Suld2DV2I64Clamp,
|
||||
Suld2DV4I8Clamp,
|
||||
Suld2DV4I16Clamp,
|
||||
Suld2DV4I32Clamp,
|
||||
|
||||
Suld2DArrayI8Clamp,
|
||||
Suld2DArrayI16Clamp,
|
||||
Suld2DArrayI32Clamp,
|
||||
Suld2DArrayI64Clamp,
|
||||
Suld2DArrayV2I8Clamp,
|
||||
Suld2DArrayV2I16Clamp,
|
||||
Suld2DArrayV2I32Clamp,
|
||||
Suld2DArrayV2I64Clamp,
|
||||
Suld2DArrayV4I8Clamp,
|
||||
Suld2DArrayV4I16Clamp,
|
||||
Suld2DArrayV4I32Clamp,
|
||||
|
||||
Suld3DI8Clamp,
|
||||
Suld3DI16Clamp,
|
||||
Suld3DI32Clamp,
|
||||
Suld3DI64Clamp,
|
||||
Suld3DV2I8Clamp,
|
||||
Suld3DV2I16Clamp,
|
||||
Suld3DV2I32Clamp,
|
||||
Suld3DV2I64Clamp,
|
||||
Suld3DV4I8Clamp,
|
||||
Suld3DV4I16Clamp,
|
||||
Suld3DV4I32Clamp,
|
||||
|
||||
Suld1DI8Trap,
|
||||
Suld1DI16Trap,
|
||||
Suld1DI32Trap,
|
||||
Suld1DI64Trap,
|
||||
Suld1DV2I8Trap,
|
||||
Suld1DV2I16Trap,
|
||||
Suld1DV2I32Trap,
|
||||
Suld1DV2I64Trap,
|
||||
Suld1DV4I8Trap,
|
||||
Suld1DV4I16Trap,
|
||||
Suld1DV4I32Trap,
|
||||
@ -132,9 +322,11 @@ enum NodeType {
|
||||
Suld1DArrayI8Trap,
|
||||
Suld1DArrayI16Trap,
|
||||
Suld1DArrayI32Trap,
|
||||
Suld1DArrayI64Trap,
|
||||
Suld1DArrayV2I8Trap,
|
||||
Suld1DArrayV2I16Trap,
|
||||
Suld1DArrayV2I32Trap,
|
||||
Suld1DArrayV2I64Trap,
|
||||
Suld1DArrayV4I8Trap,
|
||||
Suld1DArrayV4I16Trap,
|
||||
Suld1DArrayV4I32Trap,
|
||||
@ -142,9 +334,11 @@ enum NodeType {
|
||||
Suld2DI8Trap,
|
||||
Suld2DI16Trap,
|
||||
Suld2DI32Trap,
|
||||
Suld2DI64Trap,
|
||||
Suld2DV2I8Trap,
|
||||
Suld2DV2I16Trap,
|
||||
Suld2DV2I32Trap,
|
||||
Suld2DV2I64Trap,
|
||||
Suld2DV4I8Trap,
|
||||
Suld2DV4I16Trap,
|
||||
Suld2DV4I32Trap,
|
||||
@ -152,9 +346,11 @@ enum NodeType {
|
||||
Suld2DArrayI8Trap,
|
||||
Suld2DArrayI16Trap,
|
||||
Suld2DArrayI32Trap,
|
||||
Suld2DArrayI64Trap,
|
||||
Suld2DArrayV2I8Trap,
|
||||
Suld2DArrayV2I16Trap,
|
||||
Suld2DArrayV2I32Trap,
|
||||
Suld2DArrayV2I64Trap,
|
||||
Suld2DArrayV4I8Trap,
|
||||
Suld2DArrayV4I16Trap,
|
||||
Suld2DArrayV4I32Trap,
|
||||
@ -162,12 +358,74 @@ enum NodeType {
|
||||
Suld3DI8Trap,
|
||||
Suld3DI16Trap,
|
||||
Suld3DI32Trap,
|
||||
Suld3DI64Trap,
|
||||
Suld3DV2I8Trap,
|
||||
Suld3DV2I16Trap,
|
||||
Suld3DV2I32Trap,
|
||||
Suld3DV2I64Trap,
|
||||
Suld3DV4I8Trap,
|
||||
Suld3DV4I16Trap,
|
||||
Suld3DV4I32Trap
|
||||
Suld3DV4I32Trap,
|
||||
|
||||
Suld1DI8Zero,
|
||||
Suld1DI16Zero,
|
||||
Suld1DI32Zero,
|
||||
Suld1DI64Zero,
|
||||
Suld1DV2I8Zero,
|
||||
Suld1DV2I16Zero,
|
||||
Suld1DV2I32Zero,
|
||||
Suld1DV2I64Zero,
|
||||
Suld1DV4I8Zero,
|
||||
Suld1DV4I16Zero,
|
||||
Suld1DV4I32Zero,
|
||||
|
||||
Suld1DArrayI8Zero,
|
||||
Suld1DArrayI16Zero,
|
||||
Suld1DArrayI32Zero,
|
||||
Suld1DArrayI64Zero,
|
||||
Suld1DArrayV2I8Zero,
|
||||
Suld1DArrayV2I16Zero,
|
||||
Suld1DArrayV2I32Zero,
|
||||
Suld1DArrayV2I64Zero,
|
||||
Suld1DArrayV4I8Zero,
|
||||
Suld1DArrayV4I16Zero,
|
||||
Suld1DArrayV4I32Zero,
|
||||
|
||||
Suld2DI8Zero,
|
||||
Suld2DI16Zero,
|
||||
Suld2DI32Zero,
|
||||
Suld2DI64Zero,
|
||||
Suld2DV2I8Zero,
|
||||
Suld2DV2I16Zero,
|
||||
Suld2DV2I32Zero,
|
||||
Suld2DV2I64Zero,
|
||||
Suld2DV4I8Zero,
|
||||
Suld2DV4I16Zero,
|
||||
Suld2DV4I32Zero,
|
||||
|
||||
Suld2DArrayI8Zero,
|
||||
Suld2DArrayI16Zero,
|
||||
Suld2DArrayI32Zero,
|
||||
Suld2DArrayI64Zero,
|
||||
Suld2DArrayV2I8Zero,
|
||||
Suld2DArrayV2I16Zero,
|
||||
Suld2DArrayV2I32Zero,
|
||||
Suld2DArrayV2I64Zero,
|
||||
Suld2DArrayV4I8Zero,
|
||||
Suld2DArrayV4I16Zero,
|
||||
Suld2DArrayV4I32Zero,
|
||||
|
||||
Suld3DI8Zero,
|
||||
Suld3DI16Zero,
|
||||
Suld3DI32Zero,
|
||||
Suld3DI64Zero,
|
||||
Suld3DV2I8Zero,
|
||||
Suld3DV2I16Zero,
|
||||
Suld3DV2I32Zero,
|
||||
Suld3DV2I64Zero,
|
||||
Suld3DV4I8Zero,
|
||||
Suld3DV4I16Zero,
|
||||
Suld3DV4I32Zero
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -36,8 +36,24 @@ class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern>
|
||||
bit IsLoad = 0;
|
||||
bit IsStore = 0;
|
||||
|
||||
let TSFlags{3-0} = VecInstType;
|
||||
let TSFlags{4-4} = IsSimpleMove;
|
||||
let TSFlags{5-5} = IsLoad;
|
||||
let TSFlags{6-6} = IsStore;
|
||||
bit IsTex = 0;
|
||||
bit IsSust = 0;
|
||||
bit IsSurfTexQuery = 0;
|
||||
bit IsTexModeUnified = 0;
|
||||
|
||||
// The following field is encoded as log2 of the vector size minus one,
|
||||
// with 0 meaning the operation is not a surface instruction. For example,
|
||||
// if IsSuld == 2, then the instruction is a suld instruction with vector size
|
||||
// 2**(2-1) = 2.
|
||||
bits<2> IsSuld = 0;
|
||||
|
||||
let TSFlags{3-0} = VecInstType;
|
||||
let TSFlags{4-4} = IsSimpleMove;
|
||||
let TSFlags{5-5} = IsLoad;
|
||||
let TSFlags{6-6} = IsStore;
|
||||
let TSFlags{7} = IsTex;
|
||||
let TSFlags{9-8} = IsSuld;
|
||||
let TSFlags{10} = IsSust;
|
||||
let TSFlags{11} = IsSurfTexQuery;
|
||||
let TSFlags{12} = IsTexModeUnified;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -15,6 +15,7 @@
|
||||
|
||||
#include "NVPTX.h"
|
||||
#include "NVPTXMachineFunctionInfo.h"
|
||||
#include "NVPTXSubtarget.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
@ -32,10 +33,16 @@ private:
|
||||
public:
|
||||
NVPTXReplaceImageHandles();
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "NVPTX Replace Image Handles";
|
||||
}
|
||||
private:
|
||||
bool processInstr(MachineInstr &MI);
|
||||
void replaceImageHandle(MachineOperand &Op, MachineFunction &MF);
|
||||
bool findIndexForHandle(MachineOperand &Op, MachineFunction &MF,
|
||||
unsigned &Idx);
|
||||
};
|
||||
}
|
||||
|
||||
@ -65,242 +72,43 @@ bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) {
|
||||
E = InstrsToRemove.end(); I != E; ++I) {
|
||||
(*I)->eraseFromParent();
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
|
||||
MachineFunction &MF = *MI.getParent()->getParent();
|
||||
// Check if we have a surface/texture instruction
|
||||
switch (MI.getOpcode()) {
|
||||
default: return false;
|
||||
case NVPTX::TEX_1D_F32_I32:
|
||||
case NVPTX::TEX_1D_F32_F32:
|
||||
case NVPTX::TEX_1D_F32_F32_LEVEL:
|
||||
case NVPTX::TEX_1D_F32_F32_GRAD:
|
||||
case NVPTX::TEX_1D_I32_I32:
|
||||
case NVPTX::TEX_1D_I32_F32:
|
||||
case NVPTX::TEX_1D_I32_F32_LEVEL:
|
||||
case NVPTX::TEX_1D_I32_F32_GRAD:
|
||||
case NVPTX::TEX_1D_ARRAY_F32_I32:
|
||||
case NVPTX::TEX_1D_ARRAY_F32_F32:
|
||||
case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
|
||||
case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
|
||||
case NVPTX::TEX_1D_ARRAY_I32_I32:
|
||||
case NVPTX::TEX_1D_ARRAY_I32_F32:
|
||||
case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
|
||||
case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
|
||||
case NVPTX::TEX_2D_F32_I32:
|
||||
case NVPTX::TEX_2D_F32_F32:
|
||||
case NVPTX::TEX_2D_F32_F32_LEVEL:
|
||||
case NVPTX::TEX_2D_F32_F32_GRAD:
|
||||
case NVPTX::TEX_2D_I32_I32:
|
||||
case NVPTX::TEX_2D_I32_F32:
|
||||
case NVPTX::TEX_2D_I32_F32_LEVEL:
|
||||
case NVPTX::TEX_2D_I32_F32_GRAD:
|
||||
case NVPTX::TEX_2D_ARRAY_F32_I32:
|
||||
case NVPTX::TEX_2D_ARRAY_F32_F32:
|
||||
case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
|
||||
case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
|
||||
case NVPTX::TEX_2D_ARRAY_I32_I32:
|
||||
case NVPTX::TEX_2D_ARRAY_I32_F32:
|
||||
case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
|
||||
case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
|
||||
case NVPTX::TEX_3D_F32_I32:
|
||||
case NVPTX::TEX_3D_F32_F32:
|
||||
case NVPTX::TEX_3D_F32_F32_LEVEL:
|
||||
case NVPTX::TEX_3D_F32_F32_GRAD:
|
||||
case NVPTX::TEX_3D_I32_I32:
|
||||
case NVPTX::TEX_3D_I32_F32:
|
||||
case NVPTX::TEX_3D_I32_F32_LEVEL:
|
||||
case NVPTX::TEX_3D_I32_F32_GRAD: {
|
||||
const MCInstrDesc &MCID = MI.getDesc();
|
||||
|
||||
if (MCID.TSFlags & NVPTXII::IsTexFlag) {
|
||||
// This is a texture fetch, so operand 4 is a texref and operand 5 is
|
||||
// a samplerref
|
||||
MachineOperand &TexHandle = MI.getOperand(4);
|
||||
MachineOperand &SampHandle = MI.getOperand(5);
|
||||
|
||||
replaceImageHandle(TexHandle, MF);
|
||||
replaceImageHandle(SampHandle, MF);
|
||||
|
||||
if (!(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
|
||||
MachineOperand &SampHandle = MI.getOperand(5);
|
||||
replaceImageHandle(SampHandle, MF);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
case NVPTX::SULD_1D_I8_TRAP:
|
||||
case NVPTX::SULD_1D_I16_TRAP:
|
||||
case NVPTX::SULD_1D_I32_TRAP:
|
||||
case NVPTX::SULD_1D_ARRAY_I8_TRAP:
|
||||
case NVPTX::SULD_1D_ARRAY_I16_TRAP:
|
||||
case NVPTX::SULD_1D_ARRAY_I32_TRAP:
|
||||
case NVPTX::SULD_2D_I8_TRAP:
|
||||
case NVPTX::SULD_2D_I16_TRAP:
|
||||
case NVPTX::SULD_2D_I32_TRAP:
|
||||
case NVPTX::SULD_2D_ARRAY_I8_TRAP:
|
||||
case NVPTX::SULD_2D_ARRAY_I16_TRAP:
|
||||
case NVPTX::SULD_2D_ARRAY_I32_TRAP:
|
||||
case NVPTX::SULD_3D_I8_TRAP:
|
||||
case NVPTX::SULD_3D_I16_TRAP:
|
||||
case NVPTX::SULD_3D_I32_TRAP: {
|
||||
// This is a V1 surface load, so operand 1 is a surfref
|
||||
MachineOperand &SurfHandle = MI.getOperand(1);
|
||||
} else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
|
||||
unsigned VecSize =
|
||||
1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
|
||||
|
||||
// For a surface load of vector size N, the Nth operand will be the surfref
|
||||
MachineOperand &SurfHandle = MI.getOperand(VecSize);
|
||||
|
||||
replaceImageHandle(SurfHandle, MF);
|
||||
|
||||
return true;
|
||||
}
|
||||
case NVPTX::SULD_1D_V2I8_TRAP:
|
||||
case NVPTX::SULD_1D_V2I16_TRAP:
|
||||
case NVPTX::SULD_1D_V2I32_TRAP:
|
||||
case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
|
||||
case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
|
||||
case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
|
||||
case NVPTX::SULD_2D_V2I8_TRAP:
|
||||
case NVPTX::SULD_2D_V2I16_TRAP:
|
||||
case NVPTX::SULD_2D_V2I32_TRAP:
|
||||
case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
|
||||
case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
|
||||
case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
|
||||
case NVPTX::SULD_3D_V2I8_TRAP:
|
||||
case NVPTX::SULD_3D_V2I16_TRAP:
|
||||
case NVPTX::SULD_3D_V2I32_TRAP: {
|
||||
// This is a V2 surface load, so operand 2 is a surfref
|
||||
MachineOperand &SurfHandle = MI.getOperand(2);
|
||||
|
||||
replaceImageHandle(SurfHandle, MF);
|
||||
|
||||
return true;
|
||||
}
|
||||
case NVPTX::SULD_1D_V4I8_TRAP:
|
||||
case NVPTX::SULD_1D_V4I16_TRAP:
|
||||
case NVPTX::SULD_1D_V4I32_TRAP:
|
||||
case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
|
||||
case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
|
||||
case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
|
||||
case NVPTX::SULD_2D_V4I8_TRAP:
|
||||
case NVPTX::SULD_2D_V4I16_TRAP:
|
||||
case NVPTX::SULD_2D_V4I32_TRAP:
|
||||
case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
|
||||
case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
|
||||
case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
|
||||
case NVPTX::SULD_3D_V4I8_TRAP:
|
||||
case NVPTX::SULD_3D_V4I16_TRAP:
|
||||
case NVPTX::SULD_3D_V4I32_TRAP: {
|
||||
// This is a V4 surface load, so operand 4 is a surfref
|
||||
MachineOperand &SurfHandle = MI.getOperand(4);
|
||||
|
||||
replaceImageHandle(SurfHandle, MF);
|
||||
|
||||
return true;
|
||||
}
|
||||
case NVPTX::SUST_B_1D_B8_TRAP:
|
||||
case NVPTX::SUST_B_1D_B16_TRAP:
|
||||
case NVPTX::SUST_B_1D_B32_TRAP:
|
||||
case NVPTX::SUST_B_1D_V2B8_TRAP:
|
||||
case NVPTX::SUST_B_1D_V2B16_TRAP:
|
||||
case NVPTX::SUST_B_1D_V2B32_TRAP:
|
||||
case NVPTX::SUST_B_1D_V4B8_TRAP:
|
||||
case NVPTX::SUST_B_1D_V4B16_TRAP:
|
||||
case NVPTX::SUST_B_1D_V4B32_TRAP:
|
||||
case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
|
||||
case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
|
||||
case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
|
||||
case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
|
||||
case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
|
||||
case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
|
||||
case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
|
||||
case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
|
||||
case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
|
||||
case NVPTX::SUST_B_2D_B8_TRAP:
|
||||
case NVPTX::SUST_B_2D_B16_TRAP:
|
||||
case NVPTX::SUST_B_2D_B32_TRAP:
|
||||
case NVPTX::SUST_B_2D_V2B8_TRAP:
|
||||
case NVPTX::SUST_B_2D_V2B16_TRAP:
|
||||
case NVPTX::SUST_B_2D_V2B32_TRAP:
|
||||
case NVPTX::SUST_B_2D_V4B8_TRAP:
|
||||
case NVPTX::SUST_B_2D_V4B16_TRAP:
|
||||
case NVPTX::SUST_B_2D_V4B32_TRAP:
|
||||
case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
|
||||
case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
|
||||
case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
|
||||
case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
|
||||
case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
|
||||
case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
|
||||
case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
|
||||
case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
|
||||
case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
|
||||
case NVPTX::SUST_B_3D_B8_TRAP:
|
||||
case NVPTX::SUST_B_3D_B16_TRAP:
|
||||
case NVPTX::SUST_B_3D_B32_TRAP:
|
||||
case NVPTX::SUST_B_3D_V2B8_TRAP:
|
||||
case NVPTX::SUST_B_3D_V2B16_TRAP:
|
||||
case NVPTX::SUST_B_3D_V2B32_TRAP:
|
||||
case NVPTX::SUST_B_3D_V4B8_TRAP:
|
||||
case NVPTX::SUST_B_3D_V4B16_TRAP:
|
||||
case NVPTX::SUST_B_3D_V4B32_TRAP:
|
||||
case NVPTX::SUST_P_1D_B8_TRAP:
|
||||
case NVPTX::SUST_P_1D_B16_TRAP:
|
||||
case NVPTX::SUST_P_1D_B32_TRAP:
|
||||
case NVPTX::SUST_P_1D_V2B8_TRAP:
|
||||
case NVPTX::SUST_P_1D_V2B16_TRAP:
|
||||
case NVPTX::SUST_P_1D_V2B32_TRAP:
|
||||
case NVPTX::SUST_P_1D_V4B8_TRAP:
|
||||
case NVPTX::SUST_P_1D_V4B16_TRAP:
|
||||
case NVPTX::SUST_P_1D_V4B32_TRAP:
|
||||
case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
|
||||
case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
|
||||
case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
|
||||
case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
|
||||
case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
|
||||
case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
|
||||
case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
|
||||
case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
|
||||
case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
|
||||
case NVPTX::SUST_P_2D_B8_TRAP:
|
||||
case NVPTX::SUST_P_2D_B16_TRAP:
|
||||
case NVPTX::SUST_P_2D_B32_TRAP:
|
||||
case NVPTX::SUST_P_2D_V2B8_TRAP:
|
||||
case NVPTX::SUST_P_2D_V2B16_TRAP:
|
||||
case NVPTX::SUST_P_2D_V2B32_TRAP:
|
||||
case NVPTX::SUST_P_2D_V4B8_TRAP:
|
||||
case NVPTX::SUST_P_2D_V4B16_TRAP:
|
||||
case NVPTX::SUST_P_2D_V4B32_TRAP:
|
||||
case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
|
||||
case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
|
||||
case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
|
||||
case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
|
||||
case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
|
||||
case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
|
||||
case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
|
||||
case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
|
||||
case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
|
||||
case NVPTX::SUST_P_3D_B8_TRAP:
|
||||
case NVPTX::SUST_P_3D_B16_TRAP:
|
||||
case NVPTX::SUST_P_3D_B32_TRAP:
|
||||
case NVPTX::SUST_P_3D_V2B8_TRAP:
|
||||
case NVPTX::SUST_P_3D_V2B16_TRAP:
|
||||
case NVPTX::SUST_P_3D_V2B32_TRAP:
|
||||
case NVPTX::SUST_P_3D_V4B8_TRAP:
|
||||
case NVPTX::SUST_P_3D_V4B16_TRAP:
|
||||
case NVPTX::SUST_P_3D_V4B32_TRAP: {
|
||||
} else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
|
||||
// This is a surface store, so operand 0 is a surfref
|
||||
MachineOperand &SurfHandle = MI.getOperand(0);
|
||||
|
||||
replaceImageHandle(SurfHandle, MF);
|
||||
|
||||
return true;
|
||||
}
|
||||
case NVPTX::TXQ_CHANNEL_ORDER:
|
||||
case NVPTX::TXQ_CHANNEL_DATA_TYPE:
|
||||
case NVPTX::TXQ_WIDTH:
|
||||
case NVPTX::TXQ_HEIGHT:
|
||||
case NVPTX::TXQ_DEPTH:
|
||||
case NVPTX::TXQ_ARRAY_SIZE:
|
||||
case NVPTX::TXQ_NUM_SAMPLES:
|
||||
case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
|
||||
case NVPTX::SUQ_CHANNEL_ORDER:
|
||||
case NVPTX::SUQ_CHANNEL_DATA_TYPE:
|
||||
case NVPTX::SUQ_WIDTH:
|
||||
case NVPTX::SUQ_HEIGHT:
|
||||
case NVPTX::SUQ_DEPTH:
|
||||
case NVPTX::SUQ_ARRAY_SIZE: {
|
||||
} else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
|
||||
// This is a query, so operand 1 is a surfref/texref
|
||||
MachineOperand &Handle = MI.getOperand(1);
|
||||
|
||||
@ -308,22 +116,38 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void NVPTXReplaceImageHandles::
|
||||
replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
|
||||
unsigned Idx;
|
||||
if (findIndexForHandle(Op, MF, Idx)) {
|
||||
Op.ChangeToImmediate(Idx);
|
||||
}
|
||||
}
|
||||
|
||||
bool NVPTXReplaceImageHandles::
|
||||
findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
|
||||
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
NVPTXMachineFunctionInfo *MFI = MF.getInfo<NVPTXMachineFunctionInfo>();
|
||||
|
||||
assert(Op.isReg() && "Handle is not in a reg?");
|
||||
|
||||
// Which instruction defines the handle?
|
||||
MachineInstr *MI = MRI.getVRegDef(Op.getReg());
|
||||
assert(MI && "No def for image handle vreg?");
|
||||
MachineInstr &TexHandleDef = *MI;
|
||||
MachineInstr &TexHandleDef = *MRI.getVRegDef(Op.getReg());
|
||||
|
||||
switch (TexHandleDef.getOpcode()) {
|
||||
case NVPTX::LD_i64_avar: {
|
||||
// The handle is a parameter value being loaded, replace with the
|
||||
// parameter symbol
|
||||
const NVPTXSubtarget &ST = MF.getTarget().getSubtarget<NVPTXSubtarget>();
|
||||
if (ST.getDrvInterface() == NVPTX::CUDA) {
|
||||
// For CUDA, we preserve the param loads coming from function arguments
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!");
|
||||
StringRef Sym = TexHandleDef.getOperand(6).getSymbolName();
|
||||
std::string ParamBaseName = MF.getName();
|
||||
@ -333,19 +157,27 @@ replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
|
||||
std::string NewSym;
|
||||
raw_string_ostream NewSymStr(NewSym);
|
||||
NewSymStr << MF.getFunction()->getName() << "_param_" << Param;
|
||||
Op.ChangeToImmediate(
|
||||
MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str()));
|
||||
|
||||
InstrsToRemove.insert(&TexHandleDef);
|
||||
break;
|
||||
Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str());
|
||||
return true;
|
||||
}
|
||||
case NVPTX::texsurf_handles: {
|
||||
// The handle is a global variable, replace with the global variable name
|
||||
assert(TexHandleDef.getOperand(1).isGlobal() && "Load is not a global!");
|
||||
const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal();
|
||||
assert(GV->hasName() && "Global sampler must be named!");
|
||||
Op.ChangeToImmediate(MFI->getImageHandleSymbolIndex(GV->getName().data()));
|
||||
InstrsToRemove.insert(&TexHandleDef);
|
||||
break;
|
||||
Idx = MFI->getImageHandleSymbolIndex(GV->getName().data());
|
||||
return true;
|
||||
}
|
||||
case NVPTX::nvvm_move_i64:
|
||||
case TargetOpcode::COPY: {
|
||||
bool Res = findIndexForHandle(TexHandleDef.getOperand(1), MF, Idx);
|
||||
if (Res) {
|
||||
InstrsToRemove.insert(&TexHandleDef);
|
||||
}
|
||||
return Res;
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("Unknown instruction operating on handle");
|
||||
|
@ -91,7 +91,12 @@ public:
|
||||
inline bool hasROT64() const { return SmVersion >= 20; }
|
||||
|
||||
bool hasImageHandles() const {
|
||||
// Currently disabled
|
||||
// Enable handles for Kepler+, where CUDA supports indirect surfaces and
|
||||
// textures
|
||||
if (getDrvInterface() == NVPTX::CUDA)
|
||||
return (SmVersion >= 30);
|
||||
|
||||
// Disabled, otherwise
|
||||
return false;
|
||||
}
|
||||
bool is64Bit() const { return Is64Bit; }
|
||||
|
53
test/CodeGen/NVPTX/surf-read-cuda.ll
Normal file
53
test/CodeGen/NVPTX/surf-read-cuda.ll
Normal file
@ -0,0 +1,53 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30
|
||||
|
||||
target triple = "nvptx-unknown-cuda"
|
||||
|
||||
declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32)
|
||||
declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)
|
||||
|
||||
|
||||
; SM20-LABEL: .entry foo
|
||||
; SM30-LABEL: .entry foo
|
||||
define void @foo(i64 %img, float* %red, i32 %idx) {
|
||||
; SM20: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
|
||||
; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}]
|
||||
; SM30: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
|
||||
; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}]
|
||||
%val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx)
|
||||
; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
|
||||
; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
|
||||
%ret = sitofp i32 %val to float
|
||||
; SM20: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
|
||||
; SM30: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
|
||||
store float %ret, float* %red
|
||||
ret void
|
||||
}
|
||||
|
||||
@surf0 = internal addrspace(1) global i64 0, align 8
|
||||
|
||||
; SM20-LABEL: .entry bar
|
||||
; SM30-LABEL: .entry bar
|
||||
define void @bar(float* %red, i32 %idx) {
|
||||
; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0
|
||||
%surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf0)
|
||||
; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [surf0, {%r{{[0-9]+}}}]
|
||||
; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFHANDLE]], {%r{{[0-9]+}}}]
|
||||
%val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %surfHandle, i32 %idx)
|
||||
; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
|
||||
; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
|
||||
%ret = sitofp i32 %val to float
|
||||
; SM20: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
|
||||
; SM30: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
|
||||
store float %ret, float* %red
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
!nvvm.annotations = !{!1, !2, !3}
|
||||
!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1}
|
||||
!2 = metadata !{void (float*, i32)* @bar, metadata !"kernel", i32 1}
|
||||
!3 = metadata !{i64 addrspace(1)* @surf0, metadata !"surface", i32 1}
|
||||
|
42
test/CodeGen/NVPTX/surf-write-cuda.ll
Normal file
42
test/CodeGen/NVPTX/surf-write-cuda.ll
Normal file
@ -0,0 +1,42 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30
|
||||
|
||||
target triple = "nvptx-unknown-cuda"
|
||||
|
||||
declare void @llvm.nvvm.sust.b.1d.i32.trap(i64, i32, i32)
|
||||
declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)
|
||||
|
||||
|
||||
; SM20-LABEL: .entry foo
|
||||
; SM30-LABEL: .entry foo
|
||||
define void @foo(i64 %img, i32 %val, i32 %idx) {
|
||||
; SM20: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
|
||||
; SM20: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
|
||||
; SM30: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
|
||||
; SM30: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
|
||||
tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %img, i32 %idx, i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@surf0 = internal addrspace(1) global i64 0, align 8
|
||||
|
||||
|
||||
|
||||
; SM20-LABEL: .entry bar
|
||||
; SM30-LABEL: .entry bar
|
||||
define void @bar(i32 %val, i32 %idx) {
|
||||
; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0
|
||||
%surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf0)
|
||||
; SM20: sust.b.1d.b32.trap [surf0, {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
|
||||
; SM30: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
|
||||
tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %surfHandle, i32 %idx, i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
!nvvm.annotations = !{!1, !2, !3}
|
||||
!1 = metadata !{void (i64, i32, i32)* @foo, metadata !"kernel", i32 1}
|
||||
!2 = metadata !{void (i32, i32)* @bar, metadata !"kernel", i32 1}
|
||||
!3 = metadata !{i64 addrspace(1)* @surf0, metadata !"surface", i32 1}
|
||||
|
46
test/CodeGen/NVPTX/tex-read-cuda.ll
Normal file
46
test/CodeGen/NVPTX/tex-read-cuda.ll
Normal file
@ -0,0 +1,46 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30
|
||||
|
||||
|
||||
target triple = "nvptx-unknown-cuda"
|
||||
|
||||
declare { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64, i32)
|
||||
declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)
|
||||
|
||||
; SM20-LABEL: .entry foo
|
||||
; SM30-LABEL: .entry foo
|
||||
define void @foo(i64 %img, float* %red, i32 %idx) {
|
||||
; SM20: ld.param.u64 %rd[[TEXREG:[0-9]+]], [foo_param_0];
|
||||
; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}]
|
||||
; SM30: ld.param.u64 %rd[[TEXREG:[0-9]+]], [foo_param_0];
|
||||
; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}]
|
||||
%val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %img, i32 %idx)
|
||||
%ret = extractvalue { float, float, float, float } %val, 0
|
||||
; SM20: st.f32 [%r{{[0-9]+}}], %f[[RED]]
|
||||
; SM30: st.f32 [%r{{[0-9]+}}], %f[[RED]]
|
||||
store float %ret, float* %red
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@tex0 = internal addrspace(1) global i64 0, align 8
|
||||
|
||||
; SM20-LABEL: .entry bar
|
||||
; SM30-LABEL: .entry bar
|
||||
define void @bar(float* %red, i32 %idx) {
|
||||
; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0
|
||||
%texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex0)
|
||||
; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}]
|
||||
; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}]
|
||||
%val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx)
|
||||
%ret = extractvalue { float, float, float, float } %val, 0
|
||||
; SM20: st.f32 [%r{{[0-9]+}}], %f[[RED]]
|
||||
; SM30: st.f32 [%r{{[0-9]+}}], %f[[RED]]
|
||||
store float %ret, float* %red
|
||||
ret void
|
||||
}
|
||||
|
||||
!nvvm.annotations = !{!1, !2, !3}
|
||||
!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1}
|
||||
!2 = metadata !{void (float*, i32)* @bar, metadata !"kernel", i32 1}
|
||||
!3 = metadata !{i64 addrspace(1)* @tex0, metadata !"texture", i32 1}
|
@ -2,12 +2,12 @@
|
||||
|
||||
target triple = "nvptx-unknown-nvcl"
|
||||
|
||||
declare { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.i32(i64, i64, i32)
|
||||
declare { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.s32(i64, i64, i32)
|
||||
|
||||
; CHECK: .entry foo
|
||||
define void @foo(i64 %img, i64 %sampler, float* %red, i32 %idx) {
|
||||
; CHECK: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [foo_param_0, foo_param_1, {%r{{[0-9]+}}}]
|
||||
%val = tail call { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.i32(i64 %img, i64 %sampler, i32 %idx)
|
||||
%val = tail call { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.s32(i64 %img, i64 %sampler, i32 %idx)
|
||||
%ret = extractvalue { float, float, float, float } %val, 0
|
||||
; CHECK: st.f32 [%r{{[0-9]+}}], %f[[RED]]
|
||||
store float %ret, float* %red
|
||||
|
Loading…
Reference in New Issue
Block a user