Browse Source

SSE3 hsMatrix44 multiplication

Adam Johnson 13 years ago
parent
commit
062cb15b44
  1. 48
      Sources/Plasma/CoreLib/hsMatrix44.cpp
  2. 4
      Sources/Plasma/NucleusLib/pnSceneObject/plCoordinateInterface.cpp
  3. 7
      Sources/Plasma/PubUtilLib/plDrawable/plDrawableSpans.cpp

48
Sources/Plasma/CoreLib/hsMatrix44.cpp

@ -47,6 +47,10 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com
#include "hsStream.h" #include "hsStream.h"
#include <math.h> #include <math.h>
#ifdef HAVE_SSE
# include <smmintrin.h>
#endif
static hsMatrix44 myIdent = hsMatrix44().Reset(); static hsMatrix44 myIdent = hsMatrix44().Reset();
const hsMatrix44& hsMatrix44::IdentityMatrix() { return myIdent; } const hsMatrix44& hsMatrix44::IdentityMatrix() { return myIdent; }
@ -92,6 +96,18 @@ void hsMatrix44::DecompRigid(hsScalarTriple &translate, hsQuat &rotate) const
rotate.QuatFromMatrix44(*this); rotate.QuatFromMatrix44(*this);
} }
#ifdef HAVE_SSE
# define MULTBEGIN(i) \
xmm[0] = _mm_loadu_ps(fMap[i]);
# define MULTCELL(i, j) \
xmm[1] = _mm_set_ps(b.fMap[3][j], b.fMap[2][j], b.fMap[1][j], b.fMap[0][j]); \
xmm[j+2] = _mm_mul_ps(xmm[0], xmm[1]);
# define MULTFINISH(i) \
xmm[6] = _mm_hadd_ps(xmm[2], xmm[3]); \
xmm[7] = _mm_hadd_ps(xmm[4], xmm[5]); \
xmm[1] = _mm_hadd_ps(xmm[6], xmm[7]); \
_mm_storeu_ps(c.fMap[i], xmm[1]);
#endif
hsMatrix44 hsMatrix44::operator*(const hsMatrix44& b) const hsMatrix44 hsMatrix44::operator*(const hsMatrix44& b) const
{ {
@ -108,6 +124,37 @@ hsMatrix44 hsMatrix44::operator*(const hsMatrix44& b) const
if( b.fFlags & hsMatrix44::kIsIdent ) if( b.fFlags & hsMatrix44::kIsIdent )
return *this; return *this;
#ifdef HAVE_SSE
__m128 xmm[8];
MULTBEGIN(0);
MULTCELL(0, 0);
MULTCELL(0, 1);
MULTCELL(0, 2);
MULTCELL(0, 3);
MULTFINISH(0);
MULTBEGIN(1);
MULTCELL(1, 0);
MULTCELL(1, 1);
MULTCELL(1, 2);
MULTCELL(1, 3);
MULTFINISH(1);
MULTBEGIN(2);
MULTCELL(2, 0);
MULTCELL(2, 1);
MULTCELL(2, 2);
MULTCELL(2, 3);
MULTFINISH(2);
MULTBEGIN(3);
MULTCELL(3, 0);
MULTCELL(3, 1);
MULTCELL(3, 2);
MULTCELL(3, 3);
MULTFINISH(3);
#else
c.fMap[0][0] = (fMap[0][0] * b.fMap[0][0]) + (fMap[0][1] * b.fMap[1][0]) + (fMap[0][2] * b.fMap[2][0]) + (fMap[0][3] * b.fMap[3][0]); c.fMap[0][0] = (fMap[0][0] * b.fMap[0][0]) + (fMap[0][1] * b.fMap[1][0]) + (fMap[0][2] * b.fMap[2][0]) + (fMap[0][3] * b.fMap[3][0]);
c.fMap[0][1] = (fMap[0][0] * b.fMap[0][1]) + (fMap[0][1] * b.fMap[1][1]) + (fMap[0][2] * b.fMap[2][1]) + (fMap[0][3] * b.fMap[3][1]); c.fMap[0][1] = (fMap[0][0] * b.fMap[0][1]) + (fMap[0][1] * b.fMap[1][1]) + (fMap[0][2] * b.fMap[2][1]) + (fMap[0][3] * b.fMap[3][1]);
c.fMap[0][2] = (fMap[0][0] * b.fMap[0][2]) + (fMap[0][1] * b.fMap[1][2]) + (fMap[0][2] * b.fMap[2][2]) + (fMap[0][3] * b.fMap[3][2]); c.fMap[0][2] = (fMap[0][0] * b.fMap[0][2]) + (fMap[0][1] * b.fMap[1][2]) + (fMap[0][2] * b.fMap[2][2]) + (fMap[0][3] * b.fMap[3][2]);
@ -127,6 +174,7 @@ hsMatrix44 hsMatrix44::operator*(const hsMatrix44& b) const
c.fMap[3][1] = (fMap[3][0] * b.fMap[0][1]) + (fMap[3][1] * b.fMap[1][1]) + (fMap[3][2] * b.fMap[2][1]) + (fMap[3][3] * b.fMap[3][1]); c.fMap[3][1] = (fMap[3][0] * b.fMap[0][1]) + (fMap[3][1] * b.fMap[1][1]) + (fMap[3][2] * b.fMap[2][1]) + (fMap[3][3] * b.fMap[3][1]);
c.fMap[3][2] = (fMap[3][0] * b.fMap[0][2]) + (fMap[3][1] * b.fMap[1][2]) + (fMap[3][2] * b.fMap[2][2]) + (fMap[3][3] * b.fMap[3][2]); c.fMap[3][2] = (fMap[3][0] * b.fMap[0][2]) + (fMap[3][1] * b.fMap[1][2]) + (fMap[3][2] * b.fMap[2][2]) + (fMap[3][3] * b.fMap[3][2]);
c.fMap[3][3] = (fMap[3][0] * b.fMap[0][3]) + (fMap[3][1] * b.fMap[1][3]) + (fMap[3][2] * b.fMap[2][3]) + (fMap[3][3] * b.fMap[3][3]); c.fMap[3][3] = (fMap[3][0] * b.fMap[0][3]) + (fMap[3][1] * b.fMap[1][3]) + (fMap[3][2] * b.fMap[2][3]) + (fMap[3][3] * b.fMap[3][3]);
#endif
return c; return c;
} }

4
Sources/Plasma/NucleusLib/pnSceneObject/plCoordinateInterface.cpp

@ -380,6 +380,7 @@ plProfile_CreateTimer(" CIRecalcT", "Object", CIRecalcT);
plProfile_CreateTimer(" CIDirtyT", "Object", CIDirtyT); plProfile_CreateTimer(" CIDirtyT", "Object", CIDirtyT);
plProfile_CreateTimer(" CISetT", "Object", CISetT); plProfile_CreateTimer(" CISetT", "Object", CISetT);
#ifndef HAVE_SSE
static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& rhs) static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& rhs)
{ {
hsMatrix44 ret; hsMatrix44 ret;
@ -440,6 +441,7 @@ static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& r
return ret; return ret;
} }
#endif // HAVE_SSE
void plCoordinateInterface::IRecalcTransforms() void plCoordinateInterface::IRecalcTransforms()
{ {
@ -447,7 +449,7 @@ void plCoordinateInterface::IRecalcTransforms()
plProfile_BeginTiming(CIRecalcT); plProfile_BeginTiming(CIRecalcT);
if( fParent ) if( fParent )
{ {
#if 0 #ifdef HAVE_SSE
fLocalToWorld = fParent->GetLocalToWorld() * fLocalToParent; fLocalToWorld = fParent->GetLocalToWorld() * fLocalToParent;
fWorldToLocal = fParentToLocal * fParent->GetWorldToLocal(); fWorldToLocal = fParentToLocal * fParent->GetWorldToLocal();
#else #else

7
Sources/Plasma/PubUtilLib/plDrawable/plDrawableSpans.cpp

@ -416,7 +416,7 @@ hsBool plDrawableSpans::IBoundsInvalid(const hsBounds3Ext& bnd) const
} }
//// SetTransform //////////////////////////////////////////////////////////// //// SetTransform ////////////////////////////////////////////////////////////
#ifndef HAVE_SSE
static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& rhs) static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& rhs)
{ {
hsMatrix44 ret; hsMatrix44 ret;
@ -477,6 +477,7 @@ static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& r
return ret; return ret;
} }
#endif
#ifdef MF_TEST_UPDATE #ifdef MF_TEST_UPDATE
plProfile_CreateCounter("DSSetTrans", "Update", DSSetTrans); plProfile_CreateCounter("DSSetTrans", "Update", DSSetTrans);
@ -520,13 +521,13 @@ plDrawable& plDrawableSpans::SetTransform( uint32_t index, const hsMatrix44& l2w
#endif // MF_TEST_UPDATE #endif // MF_TEST_UPDATE
for( i = 0; i < spans->GetCount(); i++ ) for( i = 0; i < spans->GetCount(); i++ )
{ {
#if 0 #ifdef HAVE_SSE
fLocalToWorlds[ (*spans)[ i ] ] = l2w * fLocalToBones[ (*spans)[ i ] ]; fLocalToWorlds[ (*spans)[ i ] ] = l2w * fLocalToBones[ (*spans)[ i ] ];
fWorldToLocals[ (*spans)[ i ] ] = fBoneToLocals[ (*spans)[ i ] ] * w2l; fWorldToLocals[ (*spans)[ i ] ] = fBoneToLocals[ (*spans)[ i ] ] * w2l;
#else #else
fLocalToWorlds[ (*spans)[ i ] ] = IMatrixMul34(l2w, fLocalToBones[ (*spans)[ i ] ]); fLocalToWorlds[ (*spans)[ i ] ] = IMatrixMul34(l2w, fLocalToBones[ (*spans)[ i ] ]);
fWorldToLocals[ (*spans)[ i ] ] = IMatrixMul34(fBoneToLocals[ (*spans)[ i ] ], w2l); fWorldToLocals[ (*spans)[ i ] ] = IMatrixMul34(fBoneToLocals[ (*spans)[ i ] ], w2l);
#endif #endif // HAVE_SSE
} }
#ifdef MF_TEST_UPDATE #ifdef MF_TEST_UPDATE
plProfile_EndTiming(DSMatTransT); plProfile_EndTiming(DSMatTransT);

Loading…
Cancel
Save