llvm/test/CodeGen/X86/avx-shift.ll

139 lines
4.1 KiB
LLVM

; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
;;; Shift left
; CHECK: vpslld
; CHECK: vpslld
define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
%s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
2>
ret <8 x i32> %s
}
; CHECK: vpsllw
; CHECK: vpsllw
define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
%s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
ret <16 x i16> %s
}
; CHECK: vpsllq
; CHECK: vpsllq
define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
%s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
ret <4 x i64> %s
}
;;; Logical Shift right
; CHECK: vpsrld
; CHECK: vpsrld
define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
%s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
2>
ret <8 x i32> %s
}
; CHECK: vpsrlw
; CHECK: vpsrlw
define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
%s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
ret <16 x i16> %s
}
; CHECK: vpsrlq
; CHECK: vpsrlq
define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
%s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
ret <4 x i64> %s
}
;;; Arithmetic Shift right
; CHECK: vpsrad
; CHECK: vpsrad
define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
%s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
2>
ret <8 x i32> %s
}
; CHECK: vpsraw
; CHECK: vpsraw
define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
%s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
ret <16 x i16> %s
}
; CHECK: vpsrlw
; CHECK: pand
; CHECK: pxor
; CHECK: psubb
; CHECK: vpsrlw
; CHECK: pand
; CHECK: pxor
; CHECK: psubb
define <32 x i8> @vshift09(<32 x i8> %a) nounwind readnone {
%s = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
ret <32 x i8> %s
}
; CHECK: pxor
; CHECK: pcmpgtb
; CHECK: pcmpgtb
define <32 x i8> @vshift10(<32 x i8> %a) nounwind readnone {
%s = ashr <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
ret <32 x i8> %s
}
; CHECK: vpsrlw
; CHECK: pand
; CHECK: vpsrlw
; CHECK: pand
define <32 x i8> @vshift11(<32 x i8> %a) nounwind readnone {
%s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
ret <32 x i8> %s
}
; CHECK: vpsllw
; CHECK: pand
; CHECK: vpsllw
; CHECK: pand
define <32 x i8> @vshift12(<32 x i8> %a) nounwind readnone {
%s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
ret <32 x i8> %s
}
;;; Support variable shifts
; CHECK: _vshift08
; CHECK: vextractf128 $1
; CHECK: vpslld $23
; CHECK: vextractf128 $1
; CHECK: vpslld $23
define <8 x i32> @vshift08(<8 x i32> %a) nounwind {
%bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a
ret <8 x i32> %bitop
}
;;; Uses shifts for sign extension
; CHECK: _sext_v16i16
; CHECK: vpsllw
; CHECK: vpsraw
; CHECK: vpsllw
; CHECK: vpsraw
; CHECK: vinsertf128
define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
%b = trunc <16 x i16> %a to <16 x i8>
%c = sext <16 x i8> %b to <16 x i16>
ret <16 x i16> %c
}
; CHECK: _sext_v8i32
; CHECK: vpslld
; CHECK: vpsrad
; CHECK: vpslld
; CHECK: vpsrad
; CHECK: vinsertf128
define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
%b = trunc <8 x i32> %a to <8 x i16>
%c = sext <8 x i16> %b to <8 x i32>
ret <8 x i32> %c
}