mirror of
https://foundry.openuru.org/gitblit/r/CWE-ou-minkata.git
synced 2025-07-13 18:17:49 -04:00
SSE3 hsMatrix44 multiplication
This commit is contained in:
@ -47,6 +47,10 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com
|
||||
#include "hsStream.h"
|
||||
#include <math.h>
|
||||
|
||||
#ifdef HAVE_SSE
|
||||
# include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
static hsMatrix44 myIdent = hsMatrix44().Reset();
|
||||
const hsMatrix44& hsMatrix44::IdentityMatrix() { return myIdent; }
|
||||
|
||||
@ -92,6 +96,18 @@ void hsMatrix44::DecompRigid(hsScalarTriple &translate, hsQuat &rotate) const
|
||||
rotate.QuatFromMatrix44(*this);
|
||||
}
|
||||
|
||||
#ifdef HAVE_SSE
|
||||
# define MULTBEGIN(i) \
|
||||
xmm[0] = _mm_loadu_ps(fMap[i]);
|
||||
# define MULTCELL(i, j) \
|
||||
xmm[1] = _mm_set_ps(b.fMap[3][j], b.fMap[2][j], b.fMap[1][j], b.fMap[0][j]); \
|
||||
xmm[j+2] = _mm_mul_ps(xmm[0], xmm[1]);
|
||||
# define MULTFINISH(i) \
|
||||
xmm[6] = _mm_hadd_ps(xmm[2], xmm[3]); \
|
||||
xmm[7] = _mm_hadd_ps(xmm[4], xmm[5]); \
|
||||
xmm[1] = _mm_hadd_ps(xmm[6], xmm[7]); \
|
||||
_mm_storeu_ps(c.fMap[i], xmm[1]);
|
||||
#endif
|
||||
|
||||
hsMatrix44 hsMatrix44::operator*(const hsMatrix44& b) const
|
||||
{
|
||||
@ -108,6 +124,37 @@ hsMatrix44 hsMatrix44::operator*(const hsMatrix44& b) const
|
||||
if( b.fFlags & hsMatrix44::kIsIdent )
|
||||
return *this;
|
||||
|
||||
#ifdef HAVE_SSE
|
||||
__m128 xmm[8];
|
||||
|
||||
MULTBEGIN(0);
|
||||
MULTCELL(0, 0);
|
||||
MULTCELL(0, 1);
|
||||
MULTCELL(0, 2);
|
||||
MULTCELL(0, 3);
|
||||
MULTFINISH(0);
|
||||
|
||||
MULTBEGIN(1);
|
||||
MULTCELL(1, 0);
|
||||
MULTCELL(1, 1);
|
||||
MULTCELL(1, 2);
|
||||
MULTCELL(1, 3);
|
||||
MULTFINISH(1);
|
||||
|
||||
MULTBEGIN(2);
|
||||
MULTCELL(2, 0);
|
||||
MULTCELL(2, 1);
|
||||
MULTCELL(2, 2);
|
||||
MULTCELL(2, 3);
|
||||
MULTFINISH(2);
|
||||
|
||||
MULTBEGIN(3);
|
||||
MULTCELL(3, 0);
|
||||
MULTCELL(3, 1);
|
||||
MULTCELL(3, 2);
|
||||
MULTCELL(3, 3);
|
||||
MULTFINISH(3);
|
||||
#else
|
||||
c.fMap[0][0] = (fMap[0][0] * b.fMap[0][0]) + (fMap[0][1] * b.fMap[1][0]) + (fMap[0][2] * b.fMap[2][0]) + (fMap[0][3] * b.fMap[3][0]);
|
||||
c.fMap[0][1] = (fMap[0][0] * b.fMap[0][1]) + (fMap[0][1] * b.fMap[1][1]) + (fMap[0][2] * b.fMap[2][1]) + (fMap[0][3] * b.fMap[3][1]);
|
||||
c.fMap[0][2] = (fMap[0][0] * b.fMap[0][2]) + (fMap[0][1] * b.fMap[1][2]) + (fMap[0][2] * b.fMap[2][2]) + (fMap[0][3] * b.fMap[3][2]);
|
||||
@ -127,6 +174,7 @@ hsMatrix44 hsMatrix44::operator*(const hsMatrix44& b) const
|
||||
c.fMap[3][1] = (fMap[3][0] * b.fMap[0][1]) + (fMap[3][1] * b.fMap[1][1]) + (fMap[3][2] * b.fMap[2][1]) + (fMap[3][3] * b.fMap[3][1]);
|
||||
c.fMap[3][2] = (fMap[3][0] * b.fMap[0][2]) + (fMap[3][1] * b.fMap[1][2]) + (fMap[3][2] * b.fMap[2][2]) + (fMap[3][3] * b.fMap[3][2]);
|
||||
c.fMap[3][3] = (fMap[3][0] * b.fMap[0][3]) + (fMap[3][1] * b.fMap[1][3]) + (fMap[3][2] * b.fMap[2][3]) + (fMap[3][3] * b.fMap[3][3]);
|
||||
#endif
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -380,6 +380,7 @@ plProfile_CreateTimer(" CIRecalcT", "Object", CIRecalcT);
|
||||
plProfile_CreateTimer(" CIDirtyT", "Object", CIDirtyT);
|
||||
plProfile_CreateTimer(" CISetT", "Object", CISetT);
|
||||
|
||||
#ifndef HAVE_SSE
|
||||
static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& rhs)
|
||||
{
|
||||
hsMatrix44 ret;
|
||||
@ -440,6 +441,7 @@ static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& r
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif // HAVE_SSE
|
||||
|
||||
void plCoordinateInterface::IRecalcTransforms()
|
||||
{
|
||||
@ -447,7 +449,7 @@ void plCoordinateInterface::IRecalcTransforms()
|
||||
plProfile_BeginTiming(CIRecalcT);
|
||||
if( fParent )
|
||||
{
|
||||
#if 0
|
||||
#ifdef HAVE_SSE
|
||||
fLocalToWorld = fParent->GetLocalToWorld() * fLocalToParent;
|
||||
fWorldToLocal = fParentToLocal * fParent->GetWorldToLocal();
|
||||
#else
|
||||
|
@ -416,7 +416,7 @@ hsBool plDrawableSpans::IBoundsInvalid(const hsBounds3Ext& bnd) const
|
||||
}
|
||||
|
||||
//// SetTransform ////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HAVE_SSE
|
||||
static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& rhs)
|
||||
{
|
||||
hsMatrix44 ret;
|
||||
@ -477,6 +477,7 @@ static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& r
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef MF_TEST_UPDATE
|
||||
plProfile_CreateCounter("DSSetTrans", "Update", DSSetTrans);
|
||||
@ -520,13 +521,13 @@ plDrawable& plDrawableSpans::SetTransform( uint32_t index, const hsMatrix44& l2w
|
||||
#endif // MF_TEST_UPDATE
|
||||
for( i = 0; i < spans->GetCount(); i++ )
|
||||
{
|
||||
#if 0
|
||||
#ifdef HAVE_SSE
|
||||
fLocalToWorlds[ (*spans)[ i ] ] = l2w * fLocalToBones[ (*spans)[ i ] ];
|
||||
fWorldToLocals[ (*spans)[ i ] ] = fBoneToLocals[ (*spans)[ i ] ] * w2l;
|
||||
#else
|
||||
fLocalToWorlds[ (*spans)[ i ] ] = IMatrixMul34(l2w, fLocalToBones[ (*spans)[ i ] ]);
|
||||
fWorldToLocals[ (*spans)[ i ] ] = IMatrixMul34(fBoneToLocals[ (*spans)[ i ] ], w2l);
|
||||
#endif
|
||||
#endif // HAVE_SSE
|
||||
}
|
||||
#ifdef MF_TEST_UPDATE
|
||||
plProfile_EndTiming(DSMatTransT);
|
||||
|
Reference in New Issue
Block a user