mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-28 14:10:41 +00:00
Enable support for vector sext and trunc:
Limit the folding of any_ext and sext into the load operation to scalars. Limit the active-bits trunc optimization to scalars. Document vector trunc and vector sext in LangRef. Similar to commit 126080 (for enabling zext). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126424 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0412d5b40a
commit
8c20ec54d9
@ -4575,12 +4575,12 @@ entry:
|
||||
type <tt>ty2</tt>.</p>
|
||||
|
||||
<h5>Arguments:</h5>
|
||||
<p>The '<tt>trunc</tt>' instruction takes a <tt>value</tt> to trunc, which must
|
||||
be an <a href="#t_integer">integer</a> type, and a type that specifies the
|
||||
size and type of the result, which must be
|
||||
an <a href="#t_integer">integer</a> type. The bit size of <tt>value</tt> must
|
||||
be larger than the bit size of <tt>ty2</tt>. Equal sized types are not
|
||||
allowed.</p>
|
||||
<p>The '<tt>trunc</tt>' instruction takes a value to trunc, and a type to trunc it to.
|
||||
Both types must be of <a href="#t_integer">integer</a> types, or vectors
|
||||
of the same number of integers.
|
||||
The bit size of the <tt>value</tt> must be larger than
|
||||
the bit size of the destination type, <tt>ty2</tt>.
|
||||
Equal sized types are not allowed.</p>
|
||||
|
||||
<h5>Semantics:</h5>
|
||||
<p>The '<tt>trunc</tt>' instruction truncates the high order bits
|
||||
@ -4590,9 +4590,10 @@ entry:
|
||||
|
||||
<h5>Example:</h5>
|
||||
<pre>
|
||||
%X = trunc i32 257 to i8 <i>; yields i8:1</i>
|
||||
%Y = trunc i32 123 to i1 <i>; yields i1:true</i>
|
||||
%Z = trunc i32 122 to i1 <i>; yields i1:false</i>
|
||||
%X = trunc i32 257 to i8 <i>; yields i8:1</i>
|
||||
%Y = trunc i32 123 to i1 <i>; yields i1:true</i>
|
||||
%Z = trunc i32 122 to i1 <i>; yields i1:false</i>
|
||||
%W = trunc <2 x i16> <i16 8, i16 7> to <2 x i8> <i>; yields <i8 8, i8 7></i>
|
||||
</pre>
|
||||
|
||||
</div>
|
||||
@ -4651,10 +4652,11 @@ entry:
|
||||
<p>The '<tt>sext</tt>' sign extends <tt>value</tt> to the type <tt>ty2</tt>.</p>
|
||||
|
||||
<h5>Arguments:</h5>
|
||||
<p>The '<tt>sext</tt>' instruction takes a value to cast, which must be of
|
||||
<a href="#t_integer">integer</a> type, and a type to cast it to, which must
|
||||
also be of <a href="#t_integer">integer</a> type. The bit size of the
|
||||
<tt>value</tt> must be smaller than the bit size of the destination type,
|
||||
<p>The '<tt>sext</tt>' instruction takes a value to cast, and a type to cast it to.
|
||||
Both types must be of <a href="#t_integer">integer</a> types, or vectors
|
||||
of the same number of integers.
|
||||
The bit size of the <tt>value</tt> must be smaller than
|
||||
the bit size of the destination type,
|
||||
<tt>ty2</tt>.</p>
|
||||
|
||||
<h5>Semantics:</h5>
|
||||
@ -4668,6 +4670,7 @@ entry:
|
||||
<pre>
|
||||
%X = sext i8 -1 to i16 <i>; yields i16 :65535</i>
|
||||
%Y = sext i1 true to i32 <i>; yields i32:-1</i>
|
||||
%Z = sext <2 x i16> <i16 8, i16 7> to <2 x i32> <i>; yields <i32 8, i32 7></i>
|
||||
</pre>
|
||||
|
||||
</div>
|
||||
|
@ -3685,7 +3685,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
|
||||
}
|
||||
|
||||
// fold (sext (load x)) -> (sext (truncate (sextload x)))
|
||||
if (ISD::isNON_EXTLoad(N0.getNode()) &&
|
||||
// None of the supported targets knows how to perform load and sign extend
|
||||
// in one instruction. We only perform this transformation on scalars.
|
||||
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
|
||||
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
|
||||
TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
|
||||
bool DoXform = true;
|
||||
@ -4096,7 +4098,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
|
||||
}
|
||||
|
||||
// fold (aext (load x)) -> (aext (truncate (extload x)))
|
||||
if (ISD::isNON_EXTLoad(N0.getNode()) &&
|
||||
// None of the supported targets knows how to perform load and any_ext
|
||||
// in one instruction. We only perform this transformation on scalars.
|
||||
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
|
||||
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
|
||||
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
|
||||
bool DoXform = true;
|
||||
@ -4506,14 +4510,17 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
|
||||
}
|
||||
|
||||
// See if we can simplify the input to this truncate through knowledge that
|
||||
// only the low bits are being used. For example "trunc (or (shl x, 8), y)"
|
||||
// -> trunc y
|
||||
SDValue Shorter =
|
||||
GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
|
||||
VT.getSizeInBits()));
|
||||
if (Shorter.getNode())
|
||||
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter);
|
||||
|
||||
// only the low bits are being used.
|
||||
// For example "trunc (or (shl x, 8), y)" // -> trunc y
|
||||
// Currenly we only perform this optimization on scalars because vectors
|
||||
// may have different active low bits.
|
||||
if (!VT.isVector()) {
|
||||
SDValue Shorter =
|
||||
GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
|
||||
VT.getSizeInBits()));
|
||||
if (Shorter.getNode())
|
||||
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter);
|
||||
}
|
||||
// fold (truncate (load x)) -> (smaller load x)
|
||||
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
|
||||
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
|
||||
|
77
test/CodeGen/X86/vec_anyext.ll
Normal file
77
test/CodeGen/X86/vec_anyext.ll
Normal file
@ -0,0 +1,77 @@
|
||||
; RUN: llc < %s -march=x86-64
|
||||
; PR 9267
|
||||
|
||||
define<4 x i16> @func_16_32() {
|
||||
%F = load <4 x i32>* undef
|
||||
%G = trunc <4 x i32> %F to <4 x i16>
|
||||
%H = load <4 x i32>* undef
|
||||
%Y = trunc <4 x i32> %H to <4 x i16>
|
||||
%T = add <4 x i16> %Y, %G
|
||||
store <4 x i16>%T , <4 x i16>* undef
|
||||
ret <4 x i16> %T
|
||||
}
|
||||
|
||||
define<4 x i16> @func_16_64() {
|
||||
%F = load <4 x i64>* undef
|
||||
%G = trunc <4 x i64> %F to <4 x i16>
|
||||
%H = load <4 x i64>* undef
|
||||
%Y = trunc <4 x i64> %H to <4 x i16>
|
||||
%T = xor <4 x i16> %Y, %G
|
||||
store <4 x i16>%T , <4 x i16>* undef
|
||||
ret <4 x i16> %T
|
||||
}
|
||||
|
||||
define<4 x i32> @func_32_64() {
|
||||
%F = load <4 x i64>* undef
|
||||
%G = trunc <4 x i64> %F to <4 x i32>
|
||||
%H = load <4 x i64>* undef
|
||||
%Y = trunc <4 x i64> %H to <4 x i32>
|
||||
%T = or <4 x i32> %Y, %G
|
||||
ret <4 x i32> %T
|
||||
}
|
||||
|
||||
define<4 x i8> @func_8_16() {
|
||||
%F = load <4 x i16>* undef
|
||||
%G = trunc <4 x i16> %F to <4 x i8>
|
||||
%H = load <4 x i16>* undef
|
||||
%Y = trunc <4 x i16> %H to <4 x i8>
|
||||
%T = add <4 x i8> %Y, %G
|
||||
ret <4 x i8> %T
|
||||
}
|
||||
|
||||
define<4 x i8> @func_8_32() {
|
||||
%F = load <4 x i32>* undef
|
||||
%G = trunc <4 x i32> %F to <4 x i8>
|
||||
%H = load <4 x i32>* undef
|
||||
%Y = trunc <4 x i32> %H to <4 x i8>
|
||||
%T = sub <4 x i8> %Y, %G
|
||||
ret <4 x i8> %T
|
||||
}
|
||||
|
||||
define<4 x i8> @func_8_64() {
|
||||
%F = load <4 x i64>* undef
|
||||
%G = trunc <4 x i64> %F to <4 x i8>
|
||||
%H = load <4 x i64>* undef
|
||||
%Y = trunc <4 x i64> %H to <4 x i8>
|
||||
%T = add <4 x i8> %Y, %G
|
||||
ret <4 x i8> %T
|
||||
}
|
||||
|
||||
define<4 x i16> @const_16_32() {
|
||||
%G = trunc <4 x i32> <i32 0, i32 3, i32 8, i32 7> to <4 x i16>
|
||||
ret <4 x i16> %G
|
||||
}
|
||||
|
||||
define<4 x i16> @const_16_64() {
|
||||
%G = trunc <4 x i64> <i64 0, i64 3, i64 8, i64 7> to <4 x i16>
|
||||
ret <4 x i16> %G
|
||||
}
|
||||
|
||||
define void @bugOnTruncBitwidthReduce() nounwind {
|
||||
meh:
|
||||
%0 = xor <4 x i64> zeroinitializer, zeroinitializer
|
||||
%1 = trunc <4 x i64> %0 to <4 x i32>
|
||||
%2 = lshr <4 x i32> %1, <i32 18, i32 18, i32 18, i32 18>
|
||||
%3 = xor <4 x i32> %2, %1
|
||||
ret void
|
||||
}
|
69
test/CodeGen/X86/vec_sext.ll
Normal file
69
test/CodeGen/X86/vec_sext.ll
Normal file
@ -0,0 +1,69 @@
|
||||
; RUN: llc < %s -march=x86-64
|
||||
; PR 9267
|
||||
|
||||
define<4 x i32> @func_16_32() {
|
||||
%F = load <4 x i16>* undef
|
||||
%G = sext <4 x i16> %F to <4 x i32>
|
||||
%H = load <4 x i16>* undef
|
||||
%Y = sext <4 x i16> %H to <4 x i32>
|
||||
%T = add <4 x i32> %Y, %G
|
||||
store <4 x i32>%T , <4 x i32>* undef
|
||||
ret <4 x i32> %T
|
||||
}
|
||||
|
||||
define<4 x i64> @func_16_64() {
|
||||
%F = load <4 x i16>* undef
|
||||
%G = sext <4 x i16> %F to <4 x i64>
|
||||
%H = load <4 x i16>* undef
|
||||
%Y = sext <4 x i16> %H to <4 x i64>
|
||||
%T = xor <4 x i64> %Y, %G
|
||||
store <4 x i64>%T , <4 x i64>* undef
|
||||
ret <4 x i64> %T
|
||||
}
|
||||
|
||||
define<4 x i64> @func_32_64() {
|
||||
%F = load <4 x i32>* undef
|
||||
%G = sext <4 x i32> %F to <4 x i64>
|
||||
%H = load <4 x i32>* undef
|
||||
%Y = sext <4 x i32> %H to <4 x i64>
|
||||
%T = or <4 x i64> %Y, %G
|
||||
ret <4 x i64> %T
|
||||
}
|
||||
|
||||
define<4 x i16> @func_8_16() {
|
||||
%F = load <4 x i8>* undef
|
||||
%G = sext <4 x i8> %F to <4 x i16>
|
||||
%H = load <4 x i8>* undef
|
||||
%Y = sext <4 x i8> %H to <4 x i16>
|
||||
%T = add <4 x i16> %Y, %G
|
||||
ret <4 x i16> %T
|
||||
}
|
||||
|
||||
define<4 x i32> @func_8_32() {
|
||||
%F = load <4 x i8>* undef
|
||||
%G = sext <4 x i8> %F to <4 x i32>
|
||||
%H = load <4 x i8>* undef
|
||||
%Y = sext <4 x i8> %H to <4 x i32>
|
||||
%T = sub <4 x i32> %Y, %G
|
||||
ret <4 x i32> %T
|
||||
}
|
||||
|
||||
define<4 x i64> @func_8_64() {
|
||||
%F = load <4 x i8>* undef
|
||||
%G = sext <4 x i8> %F to <4 x i64>
|
||||
%H = load <4 x i8>* undef
|
||||
%Y = sext <4 x i8> %H to <4 x i64>
|
||||
%T = add <4 x i64> %Y, %G
|
||||
ret <4 x i64> %T
|
||||
}
|
||||
|
||||
define<4 x i32> @const_16_32() {
|
||||
%G = sext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i32>
|
||||
ret <4 x i32> %G
|
||||
}
|
||||
|
||||
define<4 x i64> @const_16_64() {
|
||||
%G = sext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i64>
|
||||
ret <4 x i64> %G
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user