[AMDGPU][MC] Enabled constant expressions as operands of s_getreg/s_setreg

See bug 40820: https://bugs.llvm.org/show_bug.cgi?id=40820

Reviewers: artem.tamazov, arsenm

Differential Revision: https://reviews.llvm.org/D61125

llvm-svn: 363255
This commit is contained in:
Dmitry Preobrazhensky 2019-06-13 12:46:37 +00:00
parent f9e2013962
commit 9a76ad9693
7 changed files with 255 additions and 138 deletions

View File

@ -1137,7 +1137,11 @@ private:
};
bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
void validateHwreg(const OperandInfoTy &HwReg,
const int64_t Offset,
const int64_t Width,
const SMLoc Loc);
void errorExpTgt();
OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
@ -4496,124 +4500,95 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
return MatchOperand_Success;
}
bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
int64_t &Width) {
bool
AMDGPUOperand::isSWaitCnt() const {
return isImm();
}
//===----------------------------------------------------------------------===//
// hwreg
//===----------------------------------------------------------------------===//
bool
AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
int64_t &Offset,
int64_t &Width) {
using namespace llvm::AMDGPU::Hwreg;
if (Parser.getTok().getString() != "hwreg")
return true;
Parser.Lex();
if (getLexer().isNot(AsmToken::LParen))
return true;
Parser.Lex();
if (getLexer().is(AsmToken::Identifier)) {
// The register may be specified by name or using a numeric code
if (isToken(AsmToken::Identifier) &&
(HwReg.Id = getHwregId(getTokenStr())) >= 0) {
HwReg.IsSymbolic = true;
HwReg.Id = ID_UNKNOWN_;
const StringRef tok = Parser.getTok().getString();
int Last = ID_SYMBOLIC_LAST_;
if (isSI() || isCI() || isVI())
Last = ID_SYMBOLIC_FIRST_GFX9_;
else if (isGFX9())
Last = ID_SYMBOLIC_FIRST_GFX10_;
for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
if (tok == IdSymbolic[i]) {
HwReg.Id = i;
break;
}
}
Parser.Lex();
} else {
HwReg.IsSymbolic = false;
if (getLexer().isNot(AsmToken::Integer))
return true;
if (getParser().parseAbsoluteExpression(HwReg.Id))
return true;
}
if (getLexer().is(AsmToken::RParen)) {
Parser.Lex();
lex(); // skip message name
} else if (!parseExpr(HwReg.Id)) {
return false;
}
// optional params
if (getLexer().isNot(AsmToken::Comma))
return true;
Parser.Lex();
if (getLexer().isNot(AsmToken::Integer))
return true;
if (getParser().parseAbsoluteExpression(Offset))
if (trySkipToken(AsmToken::RParen))
return true;
if (getLexer().isNot(AsmToken::Comma))
return true;
Parser.Lex();
if (getLexer().isNot(AsmToken::Integer))
return true;
if (getParser().parseAbsoluteExpression(Width))
return true;
if (getLexer().isNot(AsmToken::RParen))
return true;
Parser.Lex();
return false;
// parse optional params
return
skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
parseExpr(Offset) &&
skipToken(AsmToken::Comma, "expected a comma") &&
parseExpr(Width) &&
skipToken(AsmToken::RParen, "expected a closing parenthesis");
}
OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
void
AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
const int64_t Offset,
const int64_t Width,
const SMLoc Loc) {
using namespace llvm::AMDGPU::Hwreg;
int64_t Imm16Val = 0;
SMLoc S = Parser.getTok().getLoc();
switch(getLexer().getKind()) {
default: return MatchOperand_NoMatch;
case AsmToken::Integer:
// The operand can be an integer value.
if (getParser().parseAbsoluteExpression(Imm16Val))
return MatchOperand_NoMatch;
if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
Error(S, "invalid immediate: only 16-bit values are legal");
// Do not return error code, but create an imm operand anyway and proceed
// to the next operand, if any. That avoids unneccessary error messages.
}
break;
case AsmToken::Identifier: {
OperandInfoTy HwReg(ID_UNKNOWN_);
int64_t Offset = OFFSET_DEFAULT_;
int64_t Width = WIDTH_M1_DEFAULT_ + 1;
if (parseHwregConstruct(HwReg, Offset, Width))
return MatchOperand_ParseFail;
if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
if (HwReg.IsSymbolic)
Error(S, "invalid symbolic name of hardware register");
else
Error(S, "invalid code of hardware register: only 6-bit values are legal");
}
if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
Error(S, "invalid bit offset: only 5-bit values are legal");
if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
}
break;
if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
Error(Loc, "specified hardware register is not supported on this GPU");
} else if (!isValidHwreg(HwReg.Id)) {
Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
} else if (!isValidHwregOffset(Offset)) {
Error(Loc, "invalid bit offset: only 5-bit values are legal");
} else if (!isValidHwregWidth(Width)) {
Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
}
Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
return MatchOperand_Success;
}
bool AMDGPUOperand::isSWaitCnt() const {
return isImm();
OperandMatchResultTy
AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
using namespace llvm::AMDGPU::Hwreg;
int64_t ImmVal = 0;
SMLoc Loc = getLoc();
// If parse failed, do not return error code
// to avoid excessive error messages.
if (trySkipId("hwreg", AsmToken::LParen)) {
OperandInfoTy HwReg(ID_UNKNOWN_);
int64_t Offset = OFFSET_DEFAULT_;
int64_t Width = WIDTH_DEFAULT_;
if (parseHwregBody(HwReg, Offset, Width)) {
validateHwreg(HwReg, Offset, Width, Loc);
ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
}
} else if (parseExpr(ImmVal)) {
if (ImmVal < 0 || !isUInt<16>(ImmVal))
Error(Loc, "invalid immediate: only 16-bit values are legal");
}
Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
return MatchOperand_Success;
}
bool AMDGPUOperand::isHwreg() const {
return isImmTy(ImmTyHwreg);
}
//===----------------------------------------------------------------------===//
// sendmsg
//===----------------------------------------------------------------------===//
bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
using namespace llvm::AMDGPU::SendMsg;

View File

@ -1405,25 +1405,22 @@ void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
using namespace llvm::AMDGPU::Hwreg;
unsigned Id;
unsigned Offset;
unsigned Width;
unsigned SImm16 = MI->getOperand(OpNo).getImm();
const unsigned Id = (SImm16 & ID_MASK_) >> ID_SHIFT_;
const unsigned Offset = (SImm16 & OFFSET_MASK_) >> OFFSET_SHIFT_;
const unsigned Width = ((SImm16 & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
using namespace llvm::AMDGPU::Hwreg;
unsigned Val = MI->getOperand(OpNo).getImm();
decodeHwreg(Val, Id, Offset, Width);
StringRef HwRegName = getHwreg(Id, STI);
O << "hwreg(";
unsigned Last = ID_SYMBOLIC_LAST_;
if (AMDGPU::isSI(STI) || AMDGPU::isCI(STI) || AMDGPU::isVI(STI))
Last = ID_SYMBOLIC_FIRST_GFX9_;
else if (AMDGPU::isGFX9(STI))
Last = ID_SYMBOLIC_FIRST_GFX10_;
if (ID_SYMBOLIC_FIRST_ <= Id && Id < Last && IdSymbolic[Id]) {
O << IdSymbolic[Id];
if (!HwRegName.empty()) {
O << HwRegName;
} else {
O << Id;
}
if (Width != WIDTH_M1_DEFAULT_ + 1 || Offset != OFFSET_DEFAULT_) {
if (Width != WIDTH_DEFAULT_ || Offset != OFFSET_DEFAULT_) {
O << ", " << Offset << ", " << Width;
}
O << ')';

View File

@ -344,6 +344,11 @@ enum WidthMinusOne : unsigned { // WidthMinusOne, (5) [15:11]
WIDTH_M1_SRC_PRIVATE_BASE = 15
};
// Some values from WidthMinusOne mapped into Width domain.
enum Width : unsigned {
WIDTH_DEFAULT_ = WIDTH_M1_DEFAULT_ + 1,
};
} // namespace Hwreg
namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32.

View File

@ -10,6 +10,7 @@
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPU.h"
#include "SIDefines.h"
#include "AMDGPUAsmUtils.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
@ -640,6 +641,68 @@ unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
}
//===----------------------------------------------------------------------===//
// hwreg
//===----------------------------------------------------------------------===//
namespace Hwreg {
int64_t getHwregId(const StringRef Name) {
for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; ++Id) {
if (IdSymbolic[Id] && Name == IdSymbolic[Id])
return Id;
}
return ID_UNKNOWN_;
}
static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) {
if (isSI(STI) || isCI(STI) || isVI(STI))
return ID_SYMBOLIC_FIRST_GFX9_;
else if (isGFX9(STI))
return ID_SYMBOLIC_FIRST_GFX10_;
else
return ID_SYMBOLIC_LAST_;
}
bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) {
return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
IdSymbolic[Id];
}
bool isValidHwreg(int64_t Id) {
return 0 <= Id && isUInt<ID_WIDTH_>(Id);
}
bool isValidHwregOffset(int64_t Offset) {
return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
}
bool isValidHwregWidth(int64_t Width) {
return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
}
int64_t encodeHwreg(int64_t Id, int64_t Offset, int64_t Width) {
return (Id << ID_SHIFT_) |
(Offset << OFFSET_SHIFT_) |
((Width - 1) << WIDTH_M1_SHIFT_);
}
StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
return isValidHwreg(Id, STI) ? IdSymbolic[Id] : "";
}
void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
Id = (Val & ID_MASK_) >> ID_SHIFT_;
Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
}
} // namespace Hwreg
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
unsigned getInitialPSInputAddr(const Function &F) {
return getIntegerAttribute(F, "InitialPSInputAddr", 0);
}

View File

@ -406,6 +406,33 @@ unsigned encodeWaitcnt(const IsaVersion &Version,
unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
namespace Hwreg {
LLVM_READONLY
int64_t getHwregId(const StringRef Name);
LLVM_READNONE
bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI);
LLVM_READNONE
bool isValidHwreg(int64_t Id);
LLVM_READNONE
bool isValidHwregOffset(int64_t Offset);
LLVM_READNONE
bool isValidHwregWidth(int64_t Width);
LLVM_READNONE
int64_t encodeHwreg(int64_t Id, int64_t Offset, int64_t Width);
LLVM_READNONE
StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
} // namespace Hwreg
unsigned getInitialPSInputAddr(const Function &F);
LLVM_READNONE

View File

@ -8,11 +8,23 @@
s_setreg_b32 0x1f803, s2
// GCN: error: invalid immediate: only 16-bit values are legal
s_setreg_b32 typo(0x40), s2
// GCN: error: expected absolute expression
s_setreg_b32 hwreg(0x40), s2
// GCN: error: invalid code of hardware register: only 6-bit values are legal
s_setreg_b32 hwreg(HW_REG_WRONG), s2
// GCN: error: invalid symbolic name of hardware register
// GCN: error: expected absolute expression
s_setreg_b32 hwreg(1 2,3), s2
// GCN: error: expected a comma or a closing parenthesis
s_setreg_b32 hwreg(1,2 3), s2
// GCN: error: expected a comma
s_setreg_b32 hwreg(1,2,3, s2
// GCN: error: expected a closing parenthesis
s_setreg_b32 hwreg(3,32,32), s2
// GCN: error: invalid bit offset: only 5-bit values are legal
@ -33,57 +45,57 @@ s_cbranch_i_fork s[2:3], 0x6
// GFX10: error: instruction not supported on this GPU
s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES)
// SI-ERR: error: invalid symbolic name of hardware register
// VI-ERR: error: invalid symbolic name of hardware register
// SI-ERR: specified hardware register is not supported on this GPU
// VI-ERR: specified hardware register is not supported on this GPU
// GFX9: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES) ; encoding: [0x0f,0xf8,0x82,0xb8]
// GFX10: s_getreg_b32 s2, hwreg(HW_REG_SH_MEM_BASES) ; encoding: [0x0f,0xf8,0x02,0xb9]
s_getreg_b32 s2, hwreg(HW_REG_TBA_LO)
// SI-ERR: error: invalid symbolic name of hardware register
// VI-ERR: error: invalid symbolic name of hardware register
// GFX9-ERR: error: invalid symbolic name of hardware register
// SI-ERR: specified hardware register is not supported on this GPU
// VI-ERR: specified hardware register is not supported on this GPU
// GFX9-ERR: specified hardware register is not supported on this GPU
// GFX10: s_getreg_b32 s2, hwreg(HW_REG_TBA_LO) ; encoding: [0x10,0xf8,0x02,0xb9]
s_getreg_b32 s2, hwreg(HW_REG_TBA_HI)
// SI-ERR: error: invalid symbolic name of hardware register
// VI-ERR: error: invalid symbolic name of hardware register
// GFX9-ERR: error: invalid symbolic name of hardware register
// SI-ERR: specified hardware register is not supported on this GPU
// VI-ERR: specified hardware register is not supported on this GPU
// GFX9-ERR: specified hardware register is not supported on this GPU
// GFX10: s_getreg_b32 s2, hwreg(HW_REG_TBA_HI) ; encoding: [0x11,0xf8,0x02,0xb9]
s_getreg_b32 s2, hwreg(HW_REG_TMA_LO)
// SI-ERR: error: invalid symbolic name of hardware register
// VI-ERR: error: invalid symbolic name of hardware register
// GFX9-ERR: error: invalid symbolic name of hardware register
// SI-ERR: specified hardware register is not supported on this GPU
// VI-ERR: specified hardware register is not supported on this GPU
// GFX9-ERR: specified hardware register is not supported on this GPU
// GFX10: s_getreg_b32 s2, hwreg(HW_REG_TMA_LO) ; encoding: [0x12,0xf8,0x02,0xb9]
s_getreg_b32 s2, hwreg(HW_REG_TMA_HI)
// SI-ERR: error: invalid symbolic name of hardware register
// VI-ERR: error: invalid symbolic name of hardware register
// GFX9-ERR: error: invalid symbolic name of hardware register
// SI-ERR: specified hardware register is not supported on this GPU
// VI-ERR: specified hardware register is not supported on this GPU
// GFX9-ERR: specified hardware register is not supported on this GPU
// GFX10: s_getreg_b32 s2, hwreg(HW_REG_TMA_HI) ; encoding: [0x13,0xf8,0x02,0xb9]
s_getreg_b32 s2, hwreg(HW_REG_FLAT_SCR_LO)
// SI-ERR: error: invalid symbolic name of hardware register
// VI-ERR: error: invalid symbolic name of hardware register
// GFX9-ERR: error: invalid symbolic name of hardware register
// SI-ERR: specified hardware register is not supported on this GPU
// VI-ERR: specified hardware register is not supported on this GPU
// GFX9-ERR: specified hardware register is not supported on this GPU
// GFX10: s_getreg_b32 s2, hwreg(HW_REG_FLAT_SCR_LO) ; encoding: [0x14,0xf8,0x02,0xb9]
s_getreg_b32 s2, hwreg(HW_REG_FLAT_SCR_HI)
// SI-ERR: error: invalid symbolic name of hardware register
// VI-ERR: error: invalid symbolic name of hardware register
// GFX9-ERR: error: invalid symbolic name of hardware register
// SI-ERR: specified hardware register is not supported on this GPU
// VI-ERR: specified hardware register is not supported on this GPU
// GFX9-ERR: specified hardware register is not supported on this GPU
// GFX10: s_getreg_b32 s2, hwreg(HW_REG_FLAT_SCR_HI) ; encoding: [0x15,0xf8,0x02,0xb9]
s_getreg_b32 s2, hwreg(HW_REG_XNACK_MASK)
// SI-ERR: error: invalid symbolic name of hardware register
// VI-ERR: error: invalid symbolic name of hardware register
// GFX9-ERR: error: invalid symbolic name of hardware register
// SI-ERR: specified hardware register is not supported on this GPU
// VI-ERR: specified hardware register is not supported on this GPU
// GFX9-ERR: specified hardware register is not supported on this GPU
// GFX10: s_getreg_b32 s2, hwreg(HW_REG_XNACK_MASK) ; encoding: [0x16,0xf8,0x02,0xb9]
s_getreg_b32 s2, hwreg(HW_REG_POPS_PACKER)
// SI-ERR: error: invalid symbolic name of hardware register
// VI-ERR: error: invalid symbolic name of hardware register
// GFX9-ERR: error: invalid symbolic name of hardware register
// SI-ERR: specified hardware register is not supported on this GPU
// VI-ERR: specified hardware register is not supported on this GPU
// GFX9-ERR: specified hardware register is not supported on this GPU
// GFX10: s_getreg_b32 s2, hwreg(HW_REG_POPS_PACKER) ; encoding: [0x19,0xf8,0x02,0xb9]
s_cmpk_le_u32 s2, -1

View File

@ -91,6 +91,10 @@ s_cbranch_i_fork s[2:3], 0x6
// SICI: s_cbranch_i_fork s[2:3], 6 ; encoding: [0x06,0x00,0x82,0xb8]
// VI9: s_cbranch_i_fork s[2:3], 6 ; encoding: [0x06,0x00,0x02,0xb8]
//===----------------------------------------------------------------------===//
// getreg/setreg and hwreg macro
//===----------------------------------------------------------------------===//
// raw number mapped to known HW register
s_getreg_b32 s2, 0x6
// SICI: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9]
@ -277,6 +281,40 @@ s_setreg_imm32_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), 0xff
// SICI: s_setreg_imm32_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), 0xff ; encoding: [0x45,0xf0,0x80,0xba,0xff,0x00,0x00,0x00]
// VI9: s_setreg_imm32_b32 hwreg(HW_REG_GPR_ALLOC, 1, 31), 0xff ; encoding: [0x45,0xf0,0x00,0xba,0xff,0x00,0x00,0x00]
//===----------------------------------------------------------------------===//
// expressions and hwreg macro
//===----------------------------------------------------------------------===//
hwreg=6
s_getreg_b32 s2, hwreg
// SICI: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9]
// VI9: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x82,0xb8]
x=5
s_getreg_b32 s2, x+1
// SICI: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9]
// VI9: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x82,0xb8]
x=5
s_getreg_b32 s2, 1+x
// SICI: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x02,0xb9]
// VI9: s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) ; encoding: [0x06,0x00,0x82,0xb8]
reg=50
offset=2
width=30
s_getreg_b32 s2, hwreg(reg + 1, offset - 1, width + 1)
// SICI: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x02,0xb9]
// VI9: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x82,0xb8]
s_getreg_b32 s2, hwreg(1 + reg, -1 + offset, 1 + width)
// SICI: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x02,0xb9]
// VI9: s_getreg_b32 s2, hwreg(51, 1, 31) ; encoding: [0x73,0xf0,0x82,0xb8]
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
s_endpgm_ordered_ps_done
// GFX9: s_endpgm_ordered_ps_done ; encoding: [0x00,0x00,0x9e,0xbf]
// NOSICIVI: error: instruction not supported on this GPU