LCMS patch to set up aligned space for SSE2 assembly. Part of bug 445552. r=vlad

This commit is contained in:
Bobby Holley 2008-08-17 13:48:10 -07:00
parent f48fd6d7c3
commit efdd8a7ef4
3 changed files with 34 additions and 18 deletions

View File

@ -1524,9 +1524,17 @@ typedef struct { // Float vector
} FVEC3, FAR* LPFVEC3;
typedef struct { // Matrix (Float)
FVEC3 v[3];
FVEC3 v[4]; // We secretly pad to 4 vectors so that we have an extra 16-byte-aligned
// 16 byte buffer to use later on
} FMAT3, FAR* LPFMAT3;
// Structure for giving us alignment with our FMAT3's
typedef struct {
BYTE _Buffer[sizeof(FMAT3) + 16];
LPFMAT3 F;
} FMAT3A, FAR* LPFMAT3A;
void cdecl FMAT3ASetup(LPFMAT3A m);
void cdecl VEC3init(LPVEC3 r, double x, double y, double z); // double version
void cdecl VEC3initF(LPWVEC3 r, double x, double y, double z); // Fix32 version
@ -1853,7 +1861,7 @@ typedef struct {
union {
WMAT3 W;
FMAT3 F;
FMAT3A FA; // This is not a matrix proper - use FA.F to access the matrix pointer
} Matrix;
L16PARAMS p16; // Primary curve

View File

@ -102,8 +102,9 @@ LPMATSHAPER cmsAllocMatShaper2(LPMAT3 Matrix, LPGAMMATABLE In[], LPLCMSPRECACHE
// Fill matrix part
if (Behaviour & MATSHAPER_FLOATMAT) {
MAT3toFloat(&NewMatShaper -> Matrix.F, Matrix);
if (!FMAT3isIdentity(&NewMatShaper -> Matrix.F, 0.00001f))
FMAT3ASetup(&NewMatShaper->Matrix.FA);
MAT3toFloat(NewMatShaper -> Matrix.FA.F, Matrix);
if (!FMAT3isIdentity(NewMatShaper -> Matrix.FA.F, 0.00001f))
NewMatShaper -> dwFlags |= MATSHAPER_HASMATRIX;
}
else {
@ -399,41 +400,42 @@ void OutputBehaviour(LPMATSHAPER MatShaper, WORD In[], WORD Out[])
void cmsEvalMatShaperFloat(LPMATSHAPER MatShaper, BYTE In[], BYTE Out[])
{
WORD tmp[3];
FVEC3 InVect, OutVect;
FVEC3 OutVect;
LPFVEC3 FloatVals = &MatShaper -> Matrix.FA.F->v[3]; // Access our secret aligned temp buffer
if (MatShaper -> dwFlags & MATSHAPER_HASINPSHAPER)
{
if (MatShaper->L2_Precache != NULL)
{
InVect.n[VX] = MatShaper->L2_Precache->Impl.LI16F_FORWARD.Cache[0][In[0]];
InVect.n[VY] = MatShaper->L2_Precache->Impl.LI16F_FORWARD.Cache[1][In[1]];
InVect.n[VZ] = MatShaper->L2_Precache->Impl.LI16F_FORWARD.Cache[2][In[2]];
FloatVals->n[VX] = MatShaper->L2_Precache->Impl.LI16F_FORWARD.Cache[0][In[0]];
FloatVals->n[VY] = MatShaper->L2_Precache->Impl.LI16F_FORWARD.Cache[1][In[1]];
FloatVals->n[VZ] = MatShaper->L2_Precache->Impl.LI16F_FORWARD.Cache[2][In[2]];
}
else
{
InVect.n[VX] = ToFloatDomain(cmsLinearInterpLUT16(RGB_8_TO_16(In[0]), MatShaper -> L2[0], &MatShaper -> p2_16));
InVect.n[VY] = ToFloatDomain(cmsLinearInterpLUT16(RGB_8_TO_16(In[1]), MatShaper -> L2[1], &MatShaper -> p2_16));
InVect.n[VZ] = ToFloatDomain(cmsLinearInterpLUT16(RGB_8_TO_16(In[2]), MatShaper -> L2[2], &MatShaper -> p2_16));
FloatVals->n[VX] = ToFloatDomain(cmsLinearInterpLUT16(RGB_8_TO_16(In[0]), MatShaper -> L2[0], &MatShaper -> p2_16));
FloatVals->n[VY] = ToFloatDomain(cmsLinearInterpLUT16(RGB_8_TO_16(In[1]), MatShaper -> L2[1], &MatShaper -> p2_16));
FloatVals->n[VZ] = ToFloatDomain(cmsLinearInterpLUT16(RGB_8_TO_16(In[2]), MatShaper -> L2[2], &MatShaper -> p2_16));
}
}
else
{
InVect.n[VX] = ToFloatDomain(In[0]);
InVect.n[VY] = ToFloatDomain(In[1]);
InVect.n[VZ] = ToFloatDomain(In[2]);
FloatVals->n[VX] = ToFloatDomain(In[0]);
FloatVals->n[VY] = ToFloatDomain(In[1]);
FloatVals->n[VZ] = ToFloatDomain(In[2]);
}
if (MatShaper -> dwFlags & MATSHAPER_HASMATRIX)
{
MAT3evalF(&OutVect, &MatShaper -> Matrix.F, &InVect);
MAT3evalF(&OutVect, MatShaper -> Matrix.FA.F, FloatVals);
}
else
{
OutVect.n[VX] = InVect.n[VX];
OutVect.n[VY] = InVect.n[VY];
OutVect.n[VZ] = InVect.n[VZ];
OutVect.n[VX] = FloatVals->n[VX];
OutVect.n[VY] = FloatVals->n[VY];
OutVect.n[VZ] = FloatVals->n[VZ];
}

View File

@ -238,6 +238,12 @@ int FromFloatDomain(Float a)
#endif
// Helper function to set up the alignment for LPFMAT3A
void FMAT3ASetup(LPFMAT3A m)
{
m -> F = (LPFMAT3) (m -> _Buffer + (16 - (((unsigned) m -> _Buffer) % 16)));
}
// Initiate a vector (double version)