133 lines
3.9 KiB
C

/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
// sse2
#if _M_SSE >= 0x200
#include <xmmintrin.h>
#include <emmintrin.h>
#ifndef _MM_DENORMALS_ARE_ZERO
#define _MM_DENORMALS_ARE_ZERO 0x0040
#endif
#define MXCSR (_MM_DENORMALS_ARE_ZERO | _MM_MASK_MASK | _MM_ROUND_NEAREST | _MM_FLUSH_ZERO_ON)
#if _MSC_VER < 1500
__forceinline __m128i _mm_castps_si128(__m128 a) {return *(__m128i*)&a;}
__forceinline __m128 _mm_castsi128_ps(__m128i a) {return *(__m128*)&a;}
__forceinline __m128i _mm_castpd_si128(__m128d a) {return *(__m128i*)&a;}
__forceinline __m128d _mm_castsi128_pd(__m128i a) {return *(__m128d*)&a;}
__forceinline __m128d _mm_castps_pd(__m128 a) {return *(__m128d*)&a;}
__forceinline __m128 _mm_castpd_ps(__m128d a) {return *(__m128*)&a;}
#endif
const __m128 ps_3f800000 = _mm_castsi128_ps(_mm_set1_epi32(0x3f800000));
const __m128 ps_4b000000 = _mm_castsi128_ps(_mm_set1_epi32(0x4b000000));
const __m128 ps_7fffffff = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
const __m128 ps_80000000 = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
const __m128 ps_ffffffff = _mm_castsi128_ps(_mm_set1_epi32(0xffffffff));
__forceinline __m128 _mm_neg_ps(__m128 r)
{
return _mm_xor_ps(ps_80000000, r);
}
__forceinline __m128 _mm_abs_ps(__m128 r)
{
return _mm_and_ps(ps_7fffffff, r);
}
#define _MM_TRANSPOSE4_SI128(row0, row1, row2, row3) \
{ \
__m128 tmp0 = _mm_shuffle_ps(_mm_castsi128_ps(row0), _mm_castsi128_ps(row1), 0x44); \
__m128 tmp2 = _mm_shuffle_ps(_mm_castsi128_ps(row0), _mm_castsi128_ps(row1), 0xEE); \
__m128 tmp1 = _mm_shuffle_ps(_mm_castsi128_ps(row2), _mm_castsi128_ps(row3), 0x44); \
__m128 tmp3 = _mm_shuffle_ps(_mm_castsi128_ps(row2), _mm_castsi128_ps(row3), 0xEE); \
(row0) = _mm_castps_si128(_mm_shuffle_ps(tmp0, tmp1, 0x88)); \
(row1) = _mm_castps_si128(_mm_shuffle_ps(tmp0, tmp1, 0xDD)); \
(row2) = _mm_castps_si128(_mm_shuffle_ps(tmp2, tmp3, 0x88)); \
(row3) = _mm_castps_si128(_mm_shuffle_ps(tmp2, tmp3, 0xDD)); \
}
__forceinline __m128 _mm_rcpnr_ps(__m128 r)
{
__m128 t = _mm_rcp_ps(r);
return _mm_sub_ps(_mm_add_ps(t, t), _mm_mul_ps(_mm_mul_ps(t, t), r));
}
#else
#error TODO: GSVector4 and GSRasterizer needs SSE2
#endif
// sse3
#if _M_SSE >= 0x301
#include <tmmintrin.h>
#endif
// sse4
#if _M_SSE >= 0x401
#include <smmintrin.h>
#else
// not an equal replacement for sse4's blend but for our needs it is ok
#define _mm_blendv_ps(a, b, mask) _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, b))
#define _mm_blendv_epi8(a, b, mask) _mm_or_si128(_mm_andnot_si128(mask, a), _mm_and_si128(mask, b))
__forceinline __m128 _mm_round_ps(__m128 x)
{
__m128 t = _mm_or_ps(_mm_and_ps(ps_80000000, x), ps_4b000000);
return _mm_sub_ps(_mm_add_ps(x, t), t);
}
__forceinline __m128 _mm_floor_ps(__m128 x)
{
__m128 t = _mm_round_ps(x);
return _mm_sub_ps(t, _mm_and_ps(_mm_cmplt_ps(x, t), ps_3f800000));
}
__forceinline __m128 _mm_ceil_ps(__m128 x)
{
__m128 t = _mm_round_ps(x);
return _mm_add_ps(t, _mm_and_ps(_mm_cmpgt_ps(x, t), ps_3f800000));
}
#endif