ppsspp/GPU/Common/IndexGenerator.cpp

433 lines
14 KiB
C++
Raw Normal View History

2013-08-17 09:23:51 +00:00
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
2017-06-02 10:03:46 +00:00
#include <cstring>
2013-08-17 09:23:51 +00:00
#include "ppsspp_config.h"
#include "CPUDetect.h"
#include "Common.h"
#ifdef _M_SSE
#include <emmintrin.h>
#endif
#if PPSSPP_ARCH(ARM_NEON)
#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
#endif
#endif
2020-09-14 22:34:45 +00:00
#include "IndexGenerator.h"
2013-08-17 09:23:51 +00:00
// Points don't need indexing...
2017-01-28 11:03:45 +00:00
const u8 IndexGenerator::indexedPrimitiveType[7] = {
2013-08-17 09:23:51 +00:00
GE_PRIM_POINTS,
GE_PRIM_LINES,
GE_PRIM_LINES,
GE_PRIM_TRIANGLES,
GE_PRIM_TRIANGLES,
GE_PRIM_TRIANGLES,
GE_PRIM_RECTANGLES,
};
void IndexGenerator::Setup(u16 *inds) {
this->indsBase_ = inds;
Reset();
}
2018-06-03 14:29:50 +00:00
void IndexGenerator::AddPrim(int prim, int vertexCount, bool clockwise) {
2013-08-17 09:23:51 +00:00
switch (prim) {
case GE_PRIM_POINTS: AddPoints(vertexCount); break;
case GE_PRIM_LINES: AddLineList(vertexCount); break;
case GE_PRIM_LINE_STRIP: AddLineStrip(vertexCount); break;
2018-06-05 02:39:40 +00:00
case GE_PRIM_TRIANGLES: AddList(vertexCount, clockwise); break;
2018-06-03 14:29:50 +00:00
case GE_PRIM_TRIANGLE_STRIP: AddStrip(vertexCount, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: AddFan(vertexCount, clockwise); break;
2013-08-17 09:23:51 +00:00
case GE_PRIM_RECTANGLES: AddRectangles(vertexCount); break; // Same
}
}
void IndexGenerator::AddPoints(int numVerts) {
u16 *outInds = inds_;
const int startIndex = index_;
2013-08-17 09:23:51 +00:00
for (int i = 0; i < numVerts; i++)
*outInds++ = startIndex + i;
inds_ = outInds;
2013-08-17 09:23:51 +00:00
// ignore overflow verts
index_ += numVerts;
count_ += numVerts;
prim_ = GE_PRIM_POINTS;
seenPrims_ |= 1 << GE_PRIM_POINTS;
}
2018-06-05 02:39:40 +00:00
void IndexGenerator::AddList(int numVerts, bool clockwise) {
u16 *outInds = inds_;
const int startIndex = index_;
2018-06-05 02:39:40 +00:00
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
2013-10-01 16:02:09 +00:00
for (int i = 0; i < numVerts; i += 3) {
*outInds++ = startIndex + i;
2018-06-05 02:39:40 +00:00
*outInds++ = startIndex + i + v1;
*outInds++ = startIndex + i + v2;
2013-08-17 09:23:51 +00:00
}
inds_ = outInds;
2013-08-17 09:23:51 +00:00
// ignore overflow verts
index_ += numVerts;
2013-10-01 16:02:09 +00:00
count_ += numVerts;
2013-08-17 09:23:51 +00:00
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= 1 << GE_PRIM_TRIANGLES;
if (!clockwise) {
// Make sure we don't treat this as pure.
seenPrims_ |= 1 << GE_PRIM_TRIANGLE_STRIP;
}
2013-08-17 09:23:51 +00:00
}
alignas(16) static const u16 offsets_clockwise[24] = {
0, (u16)(0 + 1), (u16)(0 + 2),
1, (u16)(1 + 2), (u16)(1 + 1),
2, (u16)(2 + 1), (u16)(2 + 2),
3, (u16)(3 + 2), (u16)(3 + 1),
4, (u16)(4 + 1), (u16)(4 + 2),
5, (u16)(5 + 2), (u16)(5 + 1),
6, (u16)(6 + 1), (u16)(6 + 2),
7, (u16)(7 + 2), (u16)(7 + 1),
};
alignas(16) static const uint16_t offsets_counter_clockwise[24] = {
0, (u16)(0 + 2), (u16)(0 + 1),
1, (u16)(1 + 1), (u16)(1 + 2),
2, (u16)(2 + 2), (u16)(2 + 1),
3, (u16)(3 + 1), (u16)(3 + 2),
4, (u16)(4 + 2), (u16)(4 + 1),
5, (u16)(5 + 1), (u16)(5 + 2),
6, (u16)(6 + 2), (u16)(6 + 1),
7, (u16)(7 + 1), (u16)(7 + 2),
};
2018-06-03 14:29:50 +00:00
void IndexGenerator::AddStrip(int numVerts, bool clockwise) {
int numTris = numVerts - 2;
#ifdef _M_SSE
// In an SSE2 register we can fit 8 16-bit integers.
// However, we need to output a multiple of 3 indices.
// The first such multiple is 24, which means we'll generate 24 indices per cycle,
// which corresponds to 8 triangles. That's pretty cool.
// We allow ourselves to write some extra indices to avoid the fallback loop.
// That's alright as we're appending to a buffer - they will get overwritten anyway.
int numChunks = (numTris + 7) / 8;
2020-09-24 08:03:07 +00:00
__m128i ibase8 = _mm_set1_epi16(index_);
__m128i increment = _mm_set1_epi16(8);
const __m128i *offsets = (const __m128i *)(clockwise ? offsets_clockwise : offsets_counter_clockwise);
__m128i offsets0 = _mm_load_si128(offsets);
__m128i offsets1 = _mm_load_si128(offsets + 1);
__m128i offsets2 = _mm_load_si128(offsets + 2);
__m128i *dst = (__m128i *)inds_;
for (int i = 0; i < numChunks; i++) {
_mm_storeu_si128(dst, _mm_add_epi16(ibase8, offsets0));
_mm_storeu_si128(dst + 1, _mm_add_epi16(ibase8, offsets1));
_mm_storeu_si128(dst + 2, _mm_add_epi16(ibase8, offsets2));
ibase8 = _mm_add_epi16(ibase8, increment);
dst += 3;
}
2020-09-24 08:03:07 +00:00
inds_ += numTris * 3;
// wind doesn't need to be updated, an even number of triangles have been drawn.
#elif PPSSPP_ARCH(ARM_NEON)
int numChunks = (numTris + 7) / 8;
2020-09-24 08:03:07 +00:00
uint16x8_t ibase8 = vdupq_n_u16(index_);
uint16x8_t increment = vdupq_n_u16(8);
const u16 *offsets = clockwise ? offsets_clockwise : offsets_counter_clockwise;
uint16x8_t offsets0 = vld1q_u16(offsets);
uint16x8_t offsets1 = vld1q_u16(offsets + 8);
uint16x8_t offsets2 = vld1q_u16(offsets + 16);
u16 *dst = inds_;
for (int i = 0; i < numChunks; i++) {
vst1q_u16(dst, vaddq_u16(ibase8, offsets0));
vst1q_u16(dst + 8, vaddq_u16(ibase8, offsets1));
vst1q_u16(dst + 16, vaddq_u16(ibase8, offsets2));
ibase8 = vaddq_u16(ibase8, increment);
dst += 3 * 8;
}
2020-09-24 08:03:07 +00:00
inds_ += numTris * 3;
#else
// Slow fallback loop.
2020-09-24 08:03:07 +00:00
int wind = clockwise ? 1 : 2;
int ibase = index_;
size_t numPairs = numTris / 2;
u16 *outInds = inds_;
while (numPairs > 0) {
*outInds++ = ibase;
*outInds++ = ibase + wind;
*outInds++ = ibase + (wind ^ 3);
*outInds++ = ibase + 1;
*outInds++ = ibase + 1 + (wind ^ 3);
*outInds++ = ibase + 1 + wind;
ibase += 2;
numPairs--;
}
if (numTris & 1) {
*outInds++ = ibase;
*outInds++ = ibase + wind;
wind ^= 3; // toggle between 1 and 2
*outInds++ = ibase + wind;
2013-08-17 09:23:51 +00:00
}
2020-09-24 08:03:07 +00:00
inds_ = outInds;
#endif
2013-08-17 09:23:51 +00:00
index_ += numVerts;
if (numTris > 0)
count_ += numTris * 3;
2013-08-17 09:23:51 +00:00
// This is so we can detect one single strip by just looking at seenPrims_.
if (!seenPrims_ && clockwise) {
2013-08-17 09:23:51 +00:00
seenPrims_ = 1 << GE_PRIM_TRIANGLE_STRIP;
prim_ = GE_PRIM_TRIANGLE_STRIP;
pureCount_ = numVerts;
} else {
seenPrims_ |= (1 << GE_PRIM_TRIANGLE_STRIP) | (1 << GE_PRIM_TRIANGLES);
2013-08-17 09:23:51 +00:00
prim_ = GE_PRIM_TRIANGLES;
pureCount_ = 0;
}
}
2018-06-03 14:29:50 +00:00
void IndexGenerator::AddFan(int numVerts, bool clockwise) {
const int numTris = numVerts - 2;
u16 *outInds = inds_;
const int startIndex = index_;
2018-06-03 14:29:50 +00:00
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
2013-08-17 09:23:51 +00:00
for (int i = 0; i < numTris; i++) {
*outInds++ = startIndex;
2018-06-03 14:29:50 +00:00
*outInds++ = startIndex + i + v1;
*outInds++ = startIndex + i + v2;
2013-08-17 09:23:51 +00:00
}
inds_ = outInds;
2013-08-17 09:23:51 +00:00
index_ += numVerts;
count_ += numTris * 3;
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= 1 << GE_PRIM_TRIANGLE_FAN;
if (!clockwise) {
// Make sure we don't treat this as pure.
seenPrims_ |= 1 << GE_PRIM_TRIANGLE_STRIP;
}
2013-08-17 09:23:51 +00:00
}
//Lines
void IndexGenerator::AddLineList(int numVerts) {
u16 *outInds = inds_;
const int startIndex = index_;
2013-10-01 16:02:09 +00:00
for (int i = 0; i < numVerts; i += 2) {
*outInds++ = startIndex + i;
*outInds++ = startIndex + i + 1;
2013-08-17 09:23:51 +00:00
}
inds_ = outInds;
2013-08-17 09:23:51 +00:00
index_ += numVerts;
2013-10-01 16:02:09 +00:00
count_ += numVerts;
2013-08-17 09:23:51 +00:00
prim_ = GE_PRIM_LINES;
seenPrims_ |= 1 << prim_;
}
void IndexGenerator::AddLineStrip(int numVerts) {
const int numLines = numVerts - 1;
u16 *outInds = inds_;
const int startIndex = index_;
2013-08-17 09:23:51 +00:00
for (int i = 0; i < numLines; i++) {
*outInds++ = startIndex + i;
*outInds++ = startIndex + i + 1;
2013-08-17 09:23:51 +00:00
}
inds_ = outInds;
2013-08-17 09:23:51 +00:00
index_ += numVerts;
count_ += numLines * 2;
prim_ = GE_PRIM_LINES;
seenPrims_ |= 1 << GE_PRIM_LINE_STRIP;
}
void IndexGenerator::AddRectangles(int numVerts) {
u16 *outInds = inds_;
const int startIndex = index_;
//rectangles always need 2 vertices, disregard the last one if there's an odd number
numVerts = numVerts & ~1;
for (int i = 0; i < numVerts; i += 2) {
*outInds++ = startIndex + i;
*outInds++ = startIndex + i + 1;
2013-08-17 09:23:51 +00:00
}
inds_ = outInds;
2013-08-17 09:23:51 +00:00
index_ += numVerts;
2013-10-01 16:02:09 +00:00
count_ += numVerts;
2013-08-17 09:23:51 +00:00
prim_ = GE_PRIM_RECTANGLES;
seenPrims_ |= 1 << GE_PRIM_RECTANGLES;
}
template <class ITypeLE, int flag>
void IndexGenerator::TranslatePoints(int numInds, const ITypeLE *inds, int indexOffset) {
indexOffset = index_ - indexOffset;
u16 *outInds = inds_;
2013-08-17 09:23:51 +00:00
for (int i = 0; i < numInds; i++)
*outInds++ = indexOffset + inds[i];
inds_ = outInds;
2013-08-17 09:23:51 +00:00
count_ += numInds;
prim_ = GE_PRIM_POINTS;
seenPrims_ |= (1 << GE_PRIM_POINTS) | flag;
2013-08-17 09:23:51 +00:00
}
template <class ITypeLE, int flag>
void IndexGenerator::TranslateLineList(int numInds, const ITypeLE *inds, int indexOffset) {
indexOffset = index_ - indexOffset;
u16 *outInds = inds_;
numInds = numInds & ~1;
for (int i = 0; i < numInds; i += 2) {
*outInds++ = indexOffset + inds[i];
*outInds++ = indexOffset + inds[i + 1];
2013-08-17 09:23:51 +00:00
}
inds_ = outInds;
2013-10-01 16:02:09 +00:00
count_ += numInds;
prim_ = GE_PRIM_LINES;
seenPrims_ |= (1 << GE_PRIM_LINES) | flag;
2013-08-17 09:23:51 +00:00
}
template <class ITypeLE, int flag>
void IndexGenerator::TranslateLineStrip(int numInds, const ITypeLE *inds, int indexOffset) {
indexOffset = index_ - indexOffset;
int numLines = numInds - 1;
u16 *outInds = inds_;
for (int i = 0; i < numLines; i++) {
*outInds++ = indexOffset + inds[i];
*outInds++ = indexOffset + inds[i + 1];
2013-08-17 09:23:51 +00:00
}
inds_ = outInds;
count_ += numLines * 2;
prim_ = GE_PRIM_LINES;
seenPrims_ |= (1 << GE_PRIM_LINE_STRIP) | flag;
2013-08-17 09:23:51 +00:00
}
template <class ITypeLE, int flag>
2018-06-05 02:39:40 +00:00
void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
indexOffset = index_ - indexOffset;
2017-06-02 10:03:46 +00:00
// We only bother doing this minor optimization in triangle list, since it's by far the most
// common operation that can benefit.
if (sizeof(ITypeLE) == sizeof(inds_[0]) && indexOffset == 0 && clockwise) {
2017-06-02 10:03:46 +00:00
memcpy(inds_, inds, numInds * sizeof(ITypeLE));
inds_ += numInds;
count_ += numInds;
} else {
u16 *outInds = inds_;
int numTris = numInds / 3; // Round to whole triangles
numInds = numTris * 3;
2018-06-05 02:39:40 +00:00
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
2017-06-02 10:03:46 +00:00
for (int i = 0; i < numInds; i += 3) {
*outInds++ = indexOffset + inds[i];
2018-06-05 02:39:40 +00:00
*outInds++ = indexOffset + inds[i + v1];
*outInds++ = indexOffset + inds[i + v2];
2017-06-02 10:03:46 +00:00
}
inds_ = outInds;
count_ += numInds;
2013-08-17 09:23:51 +00:00
}
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= (1 << GE_PRIM_TRIANGLES) | flag;
2013-08-17 09:23:51 +00:00
}
template <class ITypeLE, int flag>
2018-04-27 13:49:43 +00:00
void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
int wind = clockwise ? 1 : 2;
indexOffset = index_ - indexOffset;
2013-08-17 09:23:51 +00:00
int numTris = numInds - 2;
u16 *outInds = inds_;
2013-08-17 09:23:51 +00:00
for (int i = 0; i < numTris; i++) {
*outInds++ = indexOffset + inds[i];
*outInds++ = indexOffset + inds[i + wind];
wind ^= 3; // Toggle between 1 and 2
*outInds++ = indexOffset + inds[i + wind];
2013-08-17 09:23:51 +00:00
}
inds_ = outInds;
2013-08-17 09:23:51 +00:00
count_ += numTris * 3;
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= (1 << GE_PRIM_TRIANGLE_STRIP) | flag;
2013-08-17 09:23:51 +00:00
}
template <class ITypeLE, int flag>
2018-06-03 14:29:50 +00:00
void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
2013-08-17 09:23:51 +00:00
if (numInds <= 0) return;
indexOffset = index_ - indexOffset;
2013-08-17 09:23:51 +00:00
int numTris = numInds - 2;
u16 *outInds = inds_;
2018-06-03 14:29:50 +00:00
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
2013-08-17 09:23:51 +00:00
for (int i = 0; i < numTris; i++) {
*outInds++ = indexOffset + inds[0];
2018-06-03 14:50:35 +00:00
*outInds++ = indexOffset + inds[i + v1];
*outInds++ = indexOffset + inds[i + v2];
2013-08-17 09:23:51 +00:00
}
inds_ = outInds;
2013-08-17 09:23:51 +00:00
count_ += numTris * 3;
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= (1 << GE_PRIM_TRIANGLE_FAN) | flag;
2013-08-17 09:23:51 +00:00
}
template <class ITypeLE, int flag>
inline void IndexGenerator::TranslateRectangles(int numInds, const ITypeLE *inds, int indexOffset) {
indexOffset = index_ - indexOffset;
u16 *outInds = inds_;
//rectangles always need 2 vertices, disregard the last one if there's an odd number
numInds = numInds & ~1;
2013-10-01 16:02:09 +00:00
for (int i = 0; i < numInds; i += 2) {
*outInds++ = indexOffset + inds[i];
*outInds++ = indexOffset + inds[i+1];
2013-08-17 09:23:51 +00:00
}
inds_ = outInds;
count_ += numInds;
prim_ = GE_PRIM_RECTANGLES;
seenPrims_ |= (1 << GE_PRIM_RECTANGLES) | flag;
2013-08-17 09:23:51 +00:00
}
// Could template this too, but would have to define in header.
2018-04-27 13:49:43 +00:00
void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, bool clockwise) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break;
2018-06-05 02:39:40 +00:00
case GE_PRIM_TRIANGLES: TranslateList<u8, SEEN_INDEX8>(numInds, inds, indexOffset, clockwise); break;
2018-04-27 13:49:43 +00:00
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u8, SEEN_INDEX8>(numInds, inds, indexOffset, clockwise); break;
2018-06-03 14:29:50 +00:00
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u8, SEEN_INDEX8>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u8, SEEN_INDEX8>(numInds, inds, indexOffset); break; // Same
2013-08-17 09:23:51 +00:00
}
}
2018-04-27 13:49:43 +00:00
void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, bool clockwise) {
switch (prim) {
2016-04-10 08:52:51 +00:00
case GE_PRIM_POINTS: TranslatePoints<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break;
2018-06-05 02:39:40 +00:00
case GE_PRIM_TRIANGLES: TranslateList<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset, clockwise); break;
2018-04-27 13:49:43 +00:00
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset, clockwise); break;
2018-06-03 14:29:50 +00:00
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset, clockwise); break;
2016-04-10 08:52:51 +00:00
case GE_PRIM_RECTANGLES: TranslateRectangles<u16_le, SEEN_INDEX16>(numInds, inds, indexOffset); break; // Same
2013-08-17 09:23:51 +00:00
}
}
2018-04-27 13:49:43 +00:00
void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, bool clockwise) {
switch (prim) {
2016-04-10 08:52:51 +00:00
case GE_PRIM_POINTS: TranslatePoints<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break;
2018-06-05 02:39:40 +00:00
case GE_PRIM_TRIANGLES: TranslateList<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset, clockwise); break;
2018-04-27 13:49:43 +00:00
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset, clockwise); break;
2018-06-03 14:29:50 +00:00
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset, clockwise); break;
2016-04-10 08:52:51 +00:00
case GE_PRIM_RECTANGLES: TranslateRectangles<u32_le, SEEN_INDEX32>(numInds, inds, indexOffset); break; // Same
2013-08-17 09:23:51 +00:00
}
}