mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-13 23:18:58 +00:00
edfeeb6d70
The PowerPC backend, somewhat embarrassingly, did not generate an optimal-length sequence of instructions for a 32-bit bswap. While adding a pattern for the bswap intrinsic to fix this would not have been terribly difficult, doing so would not have addressed the real problem: we had been generating poor code for many bit-permuting operations (by which I mean things like byte-swap that permute the bits of one or more inputs around in various ways). Here are some initial steps toward solving this deficiency. Bit-permuting operations are represented, at the SDAG level, using ISD::ROTL, SHL, SRL, AND and OR (mostly with constant second operands). Looking back through these operations, we can build up a description of the bits in the resulting value in terms of bits of one or more input values (and constant zeros). For each bit, we compute the rotation amount from the original value, and then group consecutive (value, rotation factor) bits into groups. Groups sharing these attributes are then collected and sorted, and we can then instruction select the entire permutation using a combination of masked rotations (rlwinm), imm ands (andi/andis), and masked rotation inserts (rlwimi). The result is that instead of lowering an i32 bswap as: rlwinm 5, 3, 24, 16, 23 rlwinm 4, 3, 24, 0, 7 rlwimi 4, 3, 8, 8, 15 rlwimi 5, 3, 8, 24, 31 rlwimi 4, 5, 0, 16, 31 we now produce: rlwinm 4, 3, 8, 0, 31 rlwimi 4, 3, 24, 16, 23 rlwimi 4, 3, 24, 0, 7 and for the 'test6' example in the PowerPC/README.txt file: unsigned test6(unsigned x) { return ((x & 0x00FF0000) >> 16) | ((x & 0x000000FF) << 16); } we used to produce: lis 4, 255 rlwinm 3, 3, 16, 0, 31 ori 4, 4, 255 and 3, 3, 4 and now we produce: rlwinm 4, 3, 16, 24, 31 rlwimi 4, 3, 16, 8, 15 and, as a nice bonus, this fixes the FIXME in test/CodeGen/PowerPC/rlwimi-and.ll. This commit does not include instruction-selection for i64 operations, those will come later. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224318 91177308-0d34-0410-b5e6-96231b3b80d8
30 lines
1.2 KiB
LLVM
30 lines
1.2 KiB
LLVM
; All of these ands and shifts should be folded into rlwimi's
|
|
; RUN: llc < %s -march=ppc32 -o %t
|
|
; RUN: grep rlwimi %t | count 4
|
|
; RUN: not grep srwi %t
|
|
; RUN: not grep slwi %t
|
|
|
|
define i16 @test1(i32 %srcA, i32 %srcB, i32 %alpha) nounwind {
|
|
entry:
|
|
%tmp.1 = shl i32 %srcA, 15 ; <i32> [#uses=1]
|
|
%tmp.4 = and i32 %tmp.1, 32505856 ; <i32> [#uses=1]
|
|
%tmp.6 = and i32 %srcA, 31775 ; <i32> [#uses=1]
|
|
%tmp.7 = or i32 %tmp.4, %tmp.6 ; <i32> [#uses=1]
|
|
%tmp.9 = shl i32 %srcB, 15 ; <i32> [#uses=1]
|
|
%tmp.12 = and i32 %tmp.9, 32505856 ; <i32> [#uses=1]
|
|
%tmp.14 = and i32 %srcB, 31775 ; <i32> [#uses=1]
|
|
%tmp.15 = or i32 %tmp.12, %tmp.14 ; <i32> [#uses=1]
|
|
%tmp.18 = mul i32 %tmp.7, %alpha ; <i32> [#uses=1]
|
|
%tmp.20 = sub i32 32, %alpha ; <i32> [#uses=1]
|
|
%tmp.22 = mul i32 %tmp.15, %tmp.20 ; <i32> [#uses=1]
|
|
%tmp.23 = add i32 %tmp.22, %tmp.18 ; <i32> [#uses=2]
|
|
%tmp.27 = lshr i32 %tmp.23, 5 ; <i32> [#uses=1]
|
|
%tmp.28 = trunc i32 %tmp.27 to i16 ; <i16> [#uses=1]
|
|
%tmp.29 = and i16 %tmp.28, 31775 ; <i16> [#uses=1]
|
|
%tmp.33 = lshr i32 %tmp.23, 20 ; <i32> [#uses=1]
|
|
%tmp.34 = trunc i32 %tmp.33 to i16 ; <i16> [#uses=1]
|
|
%tmp.35 = and i16 %tmp.34, 992 ; <i16> [#uses=1]
|
|
%tmp.36 = or i16 %tmp.29, %tmp.35 ; <i16> [#uses=1]
|
|
ret i16 %tmp.36
|
|
}
|