mirror of
https://github.com/stenzek/duckstation.git
synced 2024-11-27 08:00:36 +00:00
Misc: More iNN => sNN
This commit is contained in:
parent
986e207cff
commit
e1c876671a
@ -93,17 +93,17 @@ public:
|
||||
|
||||
ALWAYS_INLINE operator int32x2_t() const { return v2s; }
|
||||
|
||||
ALWAYS_INLINE GSVector2i sat_i8(const GSVector2i& min, const GSVector2i& max) const
|
||||
ALWAYS_INLINE GSVector2i sat_s8(const GSVector2i& min, const GSVector2i& max) const
|
||||
{
|
||||
return max_i8(min).min_i8(max);
|
||||
return max_s8(min).min_s8(max);
|
||||
}
|
||||
ALWAYS_INLINE GSVector2i sat_i16(const GSVector2i& min, const GSVector2i& max) const
|
||||
ALWAYS_INLINE GSVector2i sat_s16(const GSVector2i& min, const GSVector2i& max) const
|
||||
{
|
||||
return max_i16(min).min_i16(max);
|
||||
return max_s16(min).min_s16(max);
|
||||
}
|
||||
ALWAYS_INLINE GSVector2i sat_i32(const GSVector2i& min, const GSVector2i& max) const
|
||||
ALWAYS_INLINE GSVector2i sat_s32(const GSVector2i& min, const GSVector2i& max) const
|
||||
{
|
||||
return max_i32(min).min_i32(max);
|
||||
return max_s32(min).min_s32(max);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector2i sat_u8(const GSVector2i& min, const GSVector2i& max) const
|
||||
@ -119,29 +119,29 @@ public:
|
||||
return max_u32(min).min_u32(max);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector2i min_i8(const GSVector2i& v) const
|
||||
ALWAYS_INLINE GSVector2i min_s8(const GSVector2i& v) const
|
||||
{
|
||||
return GSVector2i(vreinterpret_s32_s8(vmin_s8(vreinterpret_s8_s32(v2s), vreinterpret_s8_s32(v.v2s))));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector2i max_i8(const GSVector2i& v) const
|
||||
ALWAYS_INLINE GSVector2i max_s8(const GSVector2i& v) const
|
||||
{
|
||||
return GSVector2i(vreinterpret_s32_s8(vmax_s8(vreinterpret_s8_s32(v2s), vreinterpret_s8_s32(v.v2s))));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector2i min_i16(const GSVector2i& v) const
|
||||
ALWAYS_INLINE GSVector2i min_s16(const GSVector2i& v) const
|
||||
{
|
||||
return GSVector2i(vreinterpret_s32_s16(vmin_s16(vreinterpret_s16_s32(v2s), vreinterpret_s16_s32(v.v2s))));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector2i max_i16(const GSVector2i& v) const
|
||||
ALWAYS_INLINE GSVector2i max_s16(const GSVector2i& v) const
|
||||
{
|
||||
return GSVector2i(vreinterpret_s32_s16(vmax_s16(vreinterpret_s16_s32(v2s), vreinterpret_s16_s32(v.v2s))));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector2i min_i32(const GSVector2i& v) const { return GSVector2i(vmin_s32(v2s, v.v2s)); }
|
||||
ALWAYS_INLINE GSVector2i min_s32(const GSVector2i& v) const { return GSVector2i(vmin_s32(v2s, v.v2s)); }
|
||||
|
||||
ALWAYS_INLINE GSVector2i max_i32(const GSVector2i& v) const { return GSVector2i(vmax_s32(v2s, v.v2s)); }
|
||||
ALWAYS_INLINE GSVector2i max_s32(const GSVector2i& v) const { return GSVector2i(vmax_s32(v2s, v.v2s)); }
|
||||
|
||||
ALWAYS_INLINE GSVector2i min_u8(const GSVector2i& v) const
|
||||
{
|
||||
@ -1136,37 +1136,37 @@ public:
|
||||
#endif
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector4i runion(const GSVector4i& a) const { return min_i32(a).upl64(max_i32(a).srl<8>()); }
|
||||
ALWAYS_INLINE GSVector4i runion(const GSVector4i& a) const { return min_s32(a).upl64(max_s32(a).srl<8>()); }
|
||||
|
||||
ALWAYS_INLINE GSVector4i rintersect(const GSVector4i& a) const { return sat_i32(a); }
|
||||
ALWAYS_INLINE GSVector4i rintersect(const GSVector4i& a) const { return sat_s32(a); }
|
||||
ALWAYS_INLINE bool rintersects(const GSVector4i& v) const { return !rintersect(v).rempty(); }
|
||||
ALWAYS_INLINE bool rcontains(const GSVector4i& v) const { return rintersect(v).eq(v); }
|
||||
|
||||
ALWAYS_INLINE u32 rgba32() const { return static_cast<u32>(ps32().pu16().extract32<0>()); }
|
||||
|
||||
ALWAYS_INLINE GSVector4i sat_i8(const GSVector4i& min, const GSVector4i& max) const
|
||||
ALWAYS_INLINE GSVector4i sat_s8(const GSVector4i& min, const GSVector4i& max) const
|
||||
{
|
||||
return max_i8(min).min_i8(max);
|
||||
return max_s8(min).min_s8(max);
|
||||
}
|
||||
ALWAYS_INLINE GSVector4i sat_i8(const GSVector4i& minmax) const
|
||||
ALWAYS_INLINE GSVector4i sat_s8(const GSVector4i& minmax) const
|
||||
{
|
||||
return max_i8(minmax.xyxy()).min_i8(minmax.zwzw());
|
||||
return max_s8(minmax.xyxy()).min_s8(minmax.zwzw());
|
||||
}
|
||||
ALWAYS_INLINE GSVector4i sat_i16(const GSVector4i& min, const GSVector4i& max) const
|
||||
ALWAYS_INLINE GSVector4i sat_s16(const GSVector4i& min, const GSVector4i& max) const
|
||||
{
|
||||
return max_i16(min).min_i16(max);
|
||||
return max_s16(min).min_s16(max);
|
||||
}
|
||||
ALWAYS_INLINE GSVector4i sat_i16(const GSVector4i& minmax) const
|
||||
ALWAYS_INLINE GSVector4i sat_s16(const GSVector4i& minmax) const
|
||||
{
|
||||
return max_i16(minmax.xyxy()).min_i16(minmax.zwzw());
|
||||
return max_s16(minmax.xyxy()).min_s16(minmax.zwzw());
|
||||
}
|
||||
ALWAYS_INLINE GSVector4i sat_i32(const GSVector4i& min, const GSVector4i& max) const
|
||||
ALWAYS_INLINE GSVector4i sat_s32(const GSVector4i& min, const GSVector4i& max) const
|
||||
{
|
||||
return max_i32(min).min_i32(max);
|
||||
return max_s32(min).min_s32(max);
|
||||
}
|
||||
ALWAYS_INLINE GSVector4i sat_i32(const GSVector4i& minmax) const
|
||||
ALWAYS_INLINE GSVector4i sat_s32(const GSVector4i& minmax) const
|
||||
{
|
||||
return max_i32(minmax.xyxy()).min_i32(minmax.zwzw());
|
||||
return max_s32(minmax.xyxy()).min_s32(minmax.zwzw());
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector4i sat_u8(const GSVector4i& min, const GSVector4i& max) const
|
||||
@ -1194,29 +1194,29 @@ public:
|
||||
return max_u32(minmax.xyxy()).min_u32(minmax.zwzw());
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector4i min_i8(const GSVector4i& v) const
|
||||
ALWAYS_INLINE GSVector4i min_s8(const GSVector4i& v) const
|
||||
{
|
||||
return GSVector4i(vreinterpretq_s32_s8(vminq_s8(vreinterpretq_s8_s32(v4s), vreinterpretq_s8_s32(v.v4s))));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector4i max_i8(const GSVector4i& v) const
|
||||
ALWAYS_INLINE GSVector4i max_s8(const GSVector4i& v) const
|
||||
{
|
||||
return GSVector4i(vreinterpretq_s32_s8(vmaxq_s8(vreinterpretq_s8_s32(v4s), vreinterpretq_s8_s32(v.v4s))));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector4i min_i16(const GSVector4i& v) const
|
||||
ALWAYS_INLINE GSVector4i min_s16(const GSVector4i& v) const
|
||||
{
|
||||
return GSVector4i(vreinterpretq_s32_s16(vminq_s16(vreinterpretq_s16_s32(v4s), vreinterpretq_s16_s32(v.v4s))));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector4i max_i16(const GSVector4i& v) const
|
||||
ALWAYS_INLINE GSVector4i max_s16(const GSVector4i& v) const
|
||||
{
|
||||
return GSVector4i(vreinterpretq_s32_s16(vmaxq_s16(vreinterpretq_s16_s32(v4s), vreinterpretq_s16_s32(v.v4s))));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector4i min_i32(const GSVector4i& v) const { return GSVector4i(vminq_s32(v4s, v.v4s)); }
|
||||
ALWAYS_INLINE GSVector4i min_s32(const GSVector4i& v) const { return GSVector4i(vminq_s32(v4s, v.v4s)); }
|
||||
|
||||
ALWAYS_INLINE GSVector4i max_i32(const GSVector4i& v) const { return GSVector4i(vmaxq_s32(v4s, v.v4s)); }
|
||||
ALWAYS_INLINE GSVector4i max_s32(const GSVector4i& v) const { return GSVector4i(vmaxq_s32(v4s, v.v4s)); }
|
||||
|
||||
ALWAYS_INLINE GSVector4i min_u8(const GSVector4i& v) const
|
||||
{
|
||||
|
@ -130,17 +130,17 @@ public:
|
||||
y = i;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector2i sat_i8(const GSVector2i& min, const GSVector2i& max) const
|
||||
ALWAYS_INLINE GSVector2i sat_s8(const GSVector2i& min, const GSVector2i& max) const
|
||||
{
|
||||
return max_i8(min).min_i8(max);
|
||||
return max_s8(min).min_s8(max);
|
||||
}
|
||||
ALWAYS_INLINE GSVector2i sat_i16(const GSVector2i& min, const GSVector2i& max) const
|
||||
ALWAYS_INLINE GSVector2i sat_s16(const GSVector2i& min, const GSVector2i& max) const
|
||||
{
|
||||
return max_i16(min).min_i16(max);
|
||||
return max_s16(min).min_s16(max);
|
||||
}
|
||||
ALWAYS_INLINE GSVector2i sat_i32(const GSVector2i& min, const GSVector2i& max) const
|
||||
ALWAYS_INLINE GSVector2i sat_s32(const GSVector2i& min, const GSVector2i& max) const
|
||||
{
|
||||
return max_i32(min).min_i32(max);
|
||||
return max_s32(min).min_s32(max);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector2i sat_u8(const GSVector2i& min, const GSVector2i& max) const
|
||||
@ -156,12 +156,12 @@ public:
|
||||
return max_u32(min).min_u32(max);
|
||||
}
|
||||
|
||||
GSVector2i min_i8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = std::min(S8[i], v.S8[i])); }
|
||||
GSVector2i max_i8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = std::max(S8[i], v.S8[i])); }
|
||||
GSVector2i min_i16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = std::min(S16[i], v.S16[i])); }
|
||||
GSVector2i max_i16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = std::max(S16[i], v.S16[i])); }
|
||||
GSVector2i min_i32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = std::min(S32[i], v.S32[i])); }
|
||||
GSVector2i max_i32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = std::max(S32[i], v.S32[i])); }
|
||||
GSVector2i min_s8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = std::min(S8[i], v.S8[i])); }
|
||||
GSVector2i max_s8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = std::max(S8[i], v.S8[i])); }
|
||||
GSVector2i min_s16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = std::min(S16[i], v.S16[i])); }
|
||||
GSVector2i max_s16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = std::max(S16[i], v.S16[i])); }
|
||||
GSVector2i min_s32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = std::min(S32[i], v.S32[i])); }
|
||||
GSVector2i max_s32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = std::max(S32[i], v.S32[i])); }
|
||||
|
||||
GSVector2i min_u8(const GSVector2i& v) const { ALL_LANES_8(ret.U8[i] = std::min(U8[i], v.U8[i])); }
|
||||
GSVector2i max_u8(const GSVector2i& v) const { ALL_LANES_8(ret.U8[i] = std::max(U8[i], v.U8[i])); }
|
||||
@ -952,37 +952,37 @@ public:
|
||||
ALWAYS_INLINE bool rempty() const { return lt32(zwzw()).mask() != 0x00ff; }
|
||||
|
||||
// TODO: Optimize for no-simd, this generates crap code.
|
||||
ALWAYS_INLINE GSVector4i runion(const GSVector4i& v) const { return min_i32(v).upl64(max_i32(v).srl<8>()); }
|
||||
ALWAYS_INLINE GSVector4i runion(const GSVector4i& v) const { return min_s32(v).upl64(max_s32(v).srl<8>()); }
|
||||
|
||||
ALWAYS_INLINE GSVector4i rintersect(const GSVector4i& v) const { return sat_i32(v); }
|
||||
ALWAYS_INLINE GSVector4i rintersect(const GSVector4i& v) const { return sat_s32(v); }
|
||||
ALWAYS_INLINE bool rintersects(const GSVector4i& v) const { return !rintersect(v).rempty(); }
|
||||
ALWAYS_INLINE bool rcontains(const GSVector4i& v) const { return rintersect(v).eq(v); }
|
||||
|
||||
ALWAYS_INLINE u32 rgba32() const { return static_cast<u32>(ps32().pu16().extract32<0>()); }
|
||||
|
||||
ALWAYS_INLINE GSVector4i sat_i8(const GSVector4i& min, const GSVector4i& max) const
|
||||
ALWAYS_INLINE GSVector4i sat_s8(const GSVector4i& min, const GSVector4i& max) const
|
||||
{
|
||||
return max_i8(min).min_i8(max);
|
||||
return max_s8(min).min_s8(max);
|
||||
}
|
||||
ALWAYS_INLINE GSVector4i sat_i8(const GSVector4i& minmax) const
|
||||
ALWAYS_INLINE GSVector4i sat_s8(const GSVector4i& minmax) const
|
||||
{
|
||||
return max_i8(minmax.xyxy()).min_i8(minmax.zwzw());
|
||||
return max_s8(minmax.xyxy()).min_s8(minmax.zwzw());
|
||||
}
|
||||
ALWAYS_INLINE GSVector4i sat_i16(const GSVector4i& min, const GSVector4i& max) const
|
||||
ALWAYS_INLINE GSVector4i sat_s16(const GSVector4i& min, const GSVector4i& max) const
|
||||
{
|
||||
return max_i16(min).min_i16(max);
|
||||
return max_s16(min).min_s16(max);
|
||||
}
|
||||
ALWAYS_INLINE GSVector4i sat_i16(const GSVector4i& minmax) const
|
||||
ALWAYS_INLINE GSVector4i sat_s16(const GSVector4i& minmax) const
|
||||
{
|
||||
return max_i16(minmax.xyxy()).min_i16(minmax.zwzw());
|
||||
return max_s16(minmax.xyxy()).min_s16(minmax.zwzw());
|
||||
}
|
||||
ALWAYS_INLINE GSVector4i sat_i32(const GSVector4i& min, const GSVector4i& max) const
|
||||
ALWAYS_INLINE GSVector4i sat_s32(const GSVector4i& min, const GSVector4i& max) const
|
||||
{
|
||||
return max_i32(min).min_i32(max);
|
||||
return max_s32(min).min_s32(max);
|
||||
}
|
||||
ALWAYS_INLINE GSVector4i sat_i32(const GSVector4i& minmax) const
|
||||
ALWAYS_INLINE GSVector4i sat_s32(const GSVector4i& minmax) const
|
||||
{
|
||||
return max_i32(minmax.xyxy()).min_i32(minmax.zwzw());
|
||||
return max_s32(minmax.xyxy()).min_s32(minmax.zwzw());
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector4i sat_u8(const GSVector4i& min, const GSVector4i& max) const
|
||||
@ -1010,12 +1010,12 @@ public:
|
||||
return max_u32(minmax.xyxy()).min_u32(minmax.zwzw());
|
||||
}
|
||||
|
||||
GSVector4i min_i8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = std::min(S8[i], v.S8[i])); }
|
||||
GSVector4i max_i8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = std::max(S8[i], v.S8[i])); }
|
||||
GSVector4i min_i16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = std::min(S16[i], v.S16[i])); }
|
||||
GSVector4i max_i16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = std::max(S16[i], v.S16[i])); }
|
||||
GSVector4i min_i32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = std::min(S32[i], v.S32[i])); }
|
||||
GSVector4i max_i32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = std::max(S32[i], v.S32[i])); }
|
||||
GSVector4i min_s8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = std::min(S8[i], v.S8[i])); }
|
||||
GSVector4i max_s8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = std::max(S8[i], v.S8[i])); }
|
||||
GSVector4i min_s16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = std::min(S16[i], v.S16[i])); }
|
||||
GSVector4i max_s16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = std::max(S16[i], v.S16[i])); }
|
||||
GSVector4i min_s32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = std::min(S32[i], v.S32[i])); }
|
||||
GSVector4i max_s32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = std::max(S32[i], v.S32[i])); }
|
||||
|
||||
GSVector4i min_u8(const GSVector4i& v) const { ALL_LANES_8(ret.U8[i] = std::min(U8[i], v.U8[i])); }
|
||||
GSVector4i max_u8(const GSVector4i& v) const { ALL_LANES_8(ret.U8[i] = std::max(U8[i], v.U8[i])); }
|
||||
|
@ -102,17 +102,17 @@ public:
|
||||
|
||||
ALWAYS_INLINE operator __m128i() const { return m; }
|
||||
|
||||
ALWAYS_INLINE GSVector2i sat_i8(const GSVector2i& min, const GSVector2i& max) const
|
||||
ALWAYS_INLINE GSVector2i sat_s8(const GSVector2i& min, const GSVector2i& max) const
|
||||
{
|
||||
return max_i8(min).min_i8(max);
|
||||
return max_s8(min).min_s8(max);
|
||||
}
|
||||
ALWAYS_INLINE GSVector2i sat_i16(const GSVector2i& min, const GSVector2i& max) const
|
||||
ALWAYS_INLINE GSVector2i sat_s16(const GSVector2i& min, const GSVector2i& max) const
|
||||
{
|
||||
return max_i16(min).min_i16(max);
|
||||
return max_s16(min).min_s16(max);
|
||||
}
|
||||
ALWAYS_INLINE GSVector2i sat_i32(const GSVector2i& min, const GSVector2i& max) const
|
||||
ALWAYS_INLINE GSVector2i sat_s32(const GSVector2i& min, const GSVector2i& max) const
|
||||
{
|
||||
return max_i32(min).min_i32(max);
|
||||
return max_s32(min).min_s32(max);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector2i sat_u8(const GSVector2i& min, const GSVector2i& max) const
|
||||
@ -128,12 +128,12 @@ public:
|
||||
return max_u32(min).min_u32(max);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector2i min_i8(const GSVector2i& v) const { return GSVector2i(_mm_min_epi8(m, v)); }
|
||||
ALWAYS_INLINE GSVector2i max_i8(const GSVector2i& v) const { return GSVector2i(_mm_max_epi8(m, v)); }
|
||||
ALWAYS_INLINE GSVector2i min_i16(const GSVector2i& v) const { return GSVector2i(_mm_min_epi16(m, v)); }
|
||||
ALWAYS_INLINE GSVector2i max_i16(const GSVector2i& v) const { return GSVector2i(_mm_max_epi16(m, v)); }
|
||||
ALWAYS_INLINE GSVector2i min_i32(const GSVector2i& v) const { return GSVector2i(_mm_min_epi32(m, v)); }
|
||||
ALWAYS_INLINE GSVector2i max_i32(const GSVector2i& v) const { return GSVector2i(_mm_max_epi32(m, v)); }
|
||||
ALWAYS_INLINE GSVector2i min_s8(const GSVector2i& v) const { return GSVector2i(_mm_min_epi8(m, v)); }
|
||||
ALWAYS_INLINE GSVector2i max_s8(const GSVector2i& v) const { return GSVector2i(_mm_max_epi8(m, v)); }
|
||||
ALWAYS_INLINE GSVector2i min_s16(const GSVector2i& v) const { return GSVector2i(_mm_min_epi16(m, v)); }
|
||||
ALWAYS_INLINE GSVector2i max_s16(const GSVector2i& v) const { return GSVector2i(_mm_max_epi16(m, v)); }
|
||||
ALWAYS_INLINE GSVector2i min_s32(const GSVector2i& v) const { return GSVector2i(_mm_min_epi32(m, v)); }
|
||||
ALWAYS_INLINE GSVector2i max_s32(const GSVector2i& v) const { return GSVector2i(_mm_max_epi32(m, v)); }
|
||||
|
||||
ALWAYS_INLINE GSVector2i min_u8(const GSVector2i& v) const { return GSVector2i(_mm_min_epu8(m, v)); }
|
||||
ALWAYS_INLINE GSVector2i max_u8(const GSVector2i& v) const { return GSVector2i(_mm_max_epu8(m, v)); }
|
||||
@ -873,37 +873,37 @@ public:
|
||||
|
||||
ALWAYS_INLINE bool rempty() const { return lt32(zwzw()).mask() != 0x00ff; }
|
||||
|
||||
ALWAYS_INLINE GSVector4i runion(const GSVector4i& v) const { return min_i32(v).blend32<0xc>(max_i32(v)); }
|
||||
ALWAYS_INLINE GSVector4i runion(const GSVector4i& v) const { return min_s32(v).blend32<0xc>(max_s32(v)); }
|
||||
|
||||
ALWAYS_INLINE GSVector4i rintersect(const GSVector4i& v) const { return sat_i32(v); }
|
||||
ALWAYS_INLINE GSVector4i rintersect(const GSVector4i& v) const { return sat_s32(v); }
|
||||
ALWAYS_INLINE bool rintersects(const GSVector4i& v) const { return !rintersect(v).rempty(); }
|
||||
ALWAYS_INLINE bool rcontains(const GSVector4i& v) const { return rintersect(v).eq(v); }
|
||||
|
||||
ALWAYS_INLINE u32 rgba32() const { return static_cast<u32>(ps32().pu16().extract32<0>()); }
|
||||
|
||||
ALWAYS_INLINE GSVector4i sat_i8(const GSVector4i& min, const GSVector4i& max) const
|
||||
ALWAYS_INLINE GSVector4i sat_s8(const GSVector4i& min, const GSVector4i& max) const
|
||||
{
|
||||
return max_i8(min).min_i8(max);
|
||||
return max_s8(min).min_s8(max);
|
||||
}
|
||||
ALWAYS_INLINE GSVector4i sat_i8(const GSVector4i& minmax) const
|
||||
ALWAYS_INLINE GSVector4i sat_s8(const GSVector4i& minmax) const
|
||||
{
|
||||
return max_i8(minmax.xyxy()).min_i8(minmax.zwzw());
|
||||
return max_s8(minmax.xyxy()).min_s8(minmax.zwzw());
|
||||
}
|
||||
ALWAYS_INLINE GSVector4i sat_i16(const GSVector4i& min, const GSVector4i& max) const
|
||||
ALWAYS_INLINE GSVector4i sat_s16(const GSVector4i& min, const GSVector4i& max) const
|
||||
{
|
||||
return max_i16(min).min_i16(max);
|
||||
return max_s16(min).min_s16(max);
|
||||
}
|
||||
ALWAYS_INLINE GSVector4i sat_i16(const GSVector4i& minmax) const
|
||||
ALWAYS_INLINE GSVector4i sat_s16(const GSVector4i& minmax) const
|
||||
{
|
||||
return max_i16(minmax.xyxy()).min_i16(minmax.zwzw());
|
||||
return max_s16(minmax.xyxy()).min_s16(minmax.zwzw());
|
||||
}
|
||||
ALWAYS_INLINE GSVector4i sat_i32(const GSVector4i& min, const GSVector4i& max) const
|
||||
ALWAYS_INLINE GSVector4i sat_s32(const GSVector4i& min, const GSVector4i& max) const
|
||||
{
|
||||
return max_i32(min).min_i32(max);
|
||||
return max_s32(min).min_s32(max);
|
||||
}
|
||||
ALWAYS_INLINE GSVector4i sat_i32(const GSVector4i& minmax) const
|
||||
ALWAYS_INLINE GSVector4i sat_s32(const GSVector4i& minmax) const
|
||||
{
|
||||
return max_i32(minmax.xyxy()).min_i32(minmax.zwzw());
|
||||
return max_s32(minmax.xyxy()).min_s32(minmax.zwzw());
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector4i sat_u8(const GSVector4i& min, const GSVector4i& max) const
|
||||
@ -931,12 +931,12 @@ public:
|
||||
return max_u32(minmax.xyxy()).min_u32(minmax.zwzw());
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector4i min_i8(const GSVector4i& v) const { return GSVector4i(_mm_min_epi8(m, v)); }
|
||||
ALWAYS_INLINE GSVector4i max_i8(const GSVector4i& v) const { return GSVector4i(_mm_max_epi8(m, v)); }
|
||||
ALWAYS_INLINE GSVector4i min_i16(const GSVector4i& v) const { return GSVector4i(_mm_min_epi16(m, v)); }
|
||||
ALWAYS_INLINE GSVector4i max_i16(const GSVector4i& v) const { return GSVector4i(_mm_max_epi16(m, v)); }
|
||||
ALWAYS_INLINE GSVector4i min_i32(const GSVector4i& v) const { return GSVector4i(_mm_min_epi32(m, v)); }
|
||||
ALWAYS_INLINE GSVector4i max_i32(const GSVector4i& v) const { return GSVector4i(_mm_max_epi32(m, v)); }
|
||||
ALWAYS_INLINE GSVector4i min_s8(const GSVector4i& v) const { return GSVector4i(_mm_min_epi8(m, v)); }
|
||||
ALWAYS_INLINE GSVector4i max_s8(const GSVector4i& v) const { return GSVector4i(_mm_max_epi8(m, v)); }
|
||||
ALWAYS_INLINE GSVector4i min_s16(const GSVector4i& v) const { return GSVector4i(_mm_min_epi16(m, v)); }
|
||||
ALWAYS_INLINE GSVector4i max_s16(const GSVector4i& v) const { return GSVector4i(_mm_max_epi16(m, v)); }
|
||||
ALWAYS_INLINE GSVector4i min_s32(const GSVector4i& v) const { return GSVector4i(_mm_min_epi32(m, v)); }
|
||||
ALWAYS_INLINE GSVector4i max_s32(const GSVector4i& v) const { return GSVector4i(_mm_max_epi32(m, v)); }
|
||||
|
||||
ALWAYS_INLINE GSVector4i min_u8(const GSVector4i& v) const { return GSVector4i(_mm_min_epu8(m, v)); }
|
||||
ALWAYS_INLINE GSVector4i max_u8(const GSVector4i& v) const { return GSVector4i(_mm_max_epu8(m, v)); }
|
||||
|
@ -3525,7 +3525,7 @@ static s16 GetPeakVolume(const u8* raw_sector, u8 channel)
|
||||
GSVector4i v_peak = GSVector4i::zero();
|
||||
for (u32 i = 0; i < NUM_SAMPLES; i += 8)
|
||||
{
|
||||
v_peak = v_peak.max_i16(GSVector4i::load<false>(current_ptr));
|
||||
v_peak = v_peak.max_s16(GSVector4i::load<false>(current_ptr));
|
||||
current_ptr += sizeof(v_peak);
|
||||
}
|
||||
|
||||
|
@ -341,9 +341,9 @@ protected:
|
||||
// TODO: Coordinates are exclusive, so off by one here...
|
||||
const GSVector2i clamp_min = GSVector2i::load(&m_clamped_drawing_area.x);
|
||||
const GSVector2i clamp_max = GSVector2i::load(&m_clamped_drawing_area.z);
|
||||
v1 = v1.sat_i32(clamp_min, clamp_max);
|
||||
v2 = v2.sat_i32(clamp_min, clamp_max);
|
||||
v3 = v3.sat_i32(clamp_min, clamp_max);
|
||||
v1 = v1.sat_s32(clamp_min, clamp_max);
|
||||
v2 = v2.sat_s32(clamp_min, clamp_max);
|
||||
v3 = v3.sat_s32(clamp_min, clamp_max);
|
||||
|
||||
TickCount pixels =
|
||||
std::abs((v1.x * v2.y + v2.x * v3.y + v3.x * v1.y - v1.x * v3.y - v2.x * v1.y - v3.x * v2.y) / 2);
|
||||
|
@ -2713,7 +2713,7 @@ void GPU_HW::LoadVertices()
|
||||
const GSVector2i vstart_pos = GSVector2i(start_pos.x + m_drawing_offset.x, start_pos.y + m_drawing_offset.y);
|
||||
const GSVector2i vend_pos = GSVector2i(end_pos.x + m_drawing_offset.x, end_pos.y + m_drawing_offset.y);
|
||||
const GSVector4i bounds = GSVector4i::xyxy(vstart_pos, vend_pos);
|
||||
const GSVector4i rect = GSVector4i::xyxy(vstart_pos.min_i32(vend_pos), vstart_pos.max_i32(vend_pos))
|
||||
const GSVector4i rect = GSVector4i::xyxy(vstart_pos.min_s32(vend_pos), vstart_pos.max_s32(vend_pos))
|
||||
.add32(GSVector4i::cxpr(0, 0, 1, 1));
|
||||
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);
|
||||
|
||||
@ -2773,7 +2773,7 @@ void GPU_HW::LoadVertices()
|
||||
const GSVector2i end_pos = GSVector2i(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y);
|
||||
const GSVector4i bounds = GSVector4i::xyxy(start_pos, end_pos);
|
||||
const GSVector4i rect =
|
||||
GSVector4i::xyxy(start_pos.min_i32(end_pos), start_pos.max_i32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1));
|
||||
GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1));
|
||||
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);
|
||||
if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty())
|
||||
{
|
||||
@ -2861,7 +2861,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect)
|
||||
uv_rect = ((uv_rect & twin.xyxy()) | twin.zwzw());
|
||||
|
||||
// Min could be greater than max after applying window, correct for it.
|
||||
uv_rect = uv_rect.min_i32(uv_rect.zwzw()).max_i32(uv_rect.xyxy());
|
||||
uv_rect = uv_rect.min_s32(uv_rect.zwzw()).max_s32(uv_rect.xyxy());
|
||||
}
|
||||
|
||||
const GPUTextureMode tmode = m_draw_mode.mode_reg.texture_mode;
|
||||
|
@ -533,10 +533,10 @@ void GPU_SW::DispatchRenderCommand()
|
||||
}
|
||||
|
||||
// Cull polygons which are too large.
|
||||
const GSVector2i min_pos_12 = positions[1].min_i32(positions[2]);
|
||||
const GSVector2i max_pos_12 = positions[1].max_i32(positions[2]);
|
||||
const GSVector4i draw_rect_012 = GSVector4i(min_pos_12.min_i32(positions[0]))
|
||||
.upl64(GSVector4i(max_pos_12.max_i32(positions[0])))
|
||||
const GSVector2i min_pos_12 = positions[1].min_s32(positions[2]);
|
||||
const GSVector2i max_pos_12 = positions[1].max_s32(positions[2]);
|
||||
const GSVector4i draw_rect_012 = GSVector4i(min_pos_12.min_s32(positions[0]))
|
||||
.upl64(GSVector4i(max_pos_12.max_s32(positions[0])))
|
||||
.add32(GSVector4i::cxpr(0, 0, 1, 1));
|
||||
const bool first_tri_culled =
|
||||
(draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT ||
|
||||
@ -558,8 +558,8 @@ void GPU_SW::DispatchRenderCommand()
|
||||
// quads
|
||||
if (rc.quad_polygon)
|
||||
{
|
||||
const GSVector4i draw_rect_123 = GSVector4i(min_pos_12.min_i32(positions[3]))
|
||||
.upl64(GSVector4i(max_pos_12.max_i32(positions[3])))
|
||||
const GSVector4i draw_rect_123 = GSVector4i(min_pos_12.min_s32(positions[3]))
|
||||
.upl64(GSVector4i(max_pos_12.max_s32(positions[3])))
|
||||
.add32(GSVector4i::cxpr(0, 0, 1, 1));
|
||||
|
||||
// Cull polygons which are too large.
|
||||
@ -680,7 +680,7 @@ void GPU_SW::DispatchRenderCommand()
|
||||
|
||||
const GSVector4i v0 = GSVector4i::loadl(&cmd->vertices[0].x);
|
||||
const GSVector4i v1 = GSVector4i::loadl(&cmd->vertices[1].x);
|
||||
const GSVector4i rect = v0.min_i32(v1).xyxy(v0.max_i32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1));
|
||||
const GSVector4i rect = v0.min_s32(v1).xyxy(v0.max_s32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1));
|
||||
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);
|
||||
|
||||
if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty())
|
||||
@ -718,7 +718,7 @@ void GPU_SW::DispatchRenderCommand()
|
||||
|
||||
const GSVector4i v0 = GSVector4i::loadl(&cmd->vertices[0].x);
|
||||
const GSVector4i v1 = GSVector4i::loadl(&cmd->vertices[1].x);
|
||||
const GSVector4i rect = v0.min_i32(v1).xyxy(v0.max_i32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1));
|
||||
const GSVector4i rect = v0.min_s32(v1).xyxy(v0.max_s32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1));
|
||||
const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area);
|
||||
|
||||
if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty())
|
||||
|
@ -580,8 +580,8 @@ ShadePixel(const PixelVectors<texture_enable>& pv, GPUTextureMode texture_mode,
|
||||
// Convert to 5bit.
|
||||
if constexpr (dithering_enable)
|
||||
{
|
||||
rg = rg.sra16<4>().add16(dither).max_i16(GSVectorNi::zero()).sra16<3>();
|
||||
ba = ba.sra16<4>().add16(dither).max_i16(GSVectorNi::zero()).sra16<3>();
|
||||
rg = rg.sra16<4>().add16(dither).max_s16(GSVectorNi::zero()).sra16<3>();
|
||||
ba = ba.sra16<4>().add16(dither).max_s16(GSVectorNi::zero()).sra16<3>();
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -606,8 +606,8 @@ ShadePixel(const PixelVectors<texture_enable>& pv, GPUTextureMode texture_mode,
|
||||
// Non-textured transparent polygons don't set bit 15, but are treated as transparent.
|
||||
if constexpr (dithering_enable)
|
||||
{
|
||||
GSVectorNi rg = vertex_color_rg.add16(dither).max_i16(GSVectorNi::zero()).sra16<3>();
|
||||
GSVectorNi ba = vertex_color_ba.add16(dither).max_i16(GSVectorNi::zero()).sra16<3>();
|
||||
GSVectorNi rg = vertex_color_rg.add16(dither).max_s16(GSVectorNi::zero()).sra16<3>();
|
||||
GSVectorNi ba = vertex_color_ba.add16(dither).max_s16(GSVectorNi::zero()).sra16<3>();
|
||||
|
||||
// Clamp to 5bit. We use 32bit for BA to set a to zero.
|
||||
rg = rg.min_u16(GSVectorNi::cxpr16(0x1F));
|
||||
|
Loading…
Reference in New Issue
Block a user