diff --git a/Sources/Plasma/CoreLib/hsMatrix44.cpp b/Sources/Plasma/CoreLib/hsMatrix44.cpp index 81eba463..9c4053f3 100644 --- a/Sources/Plasma/CoreLib/hsMatrix44.cpp +++ b/Sources/Plasma/CoreLib/hsMatrix44.cpp @@ -47,6 +47,10 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "hsStream.h" #include +#ifdef HAVE_SSE +# include +#endif + static hsMatrix44 myIdent = hsMatrix44().Reset(); const hsMatrix44& hsMatrix44::IdentityMatrix() { return myIdent; } @@ -92,6 +96,18 @@ void hsMatrix44::DecompRigid(hsScalarTriple &translate, hsQuat &rotate) const rotate.QuatFromMatrix44(*this); } +#ifdef HAVE_SSE +# define MULTBEGIN(i) \ + xmm[0] = _mm_loadu_ps(fMap[i]); +# define MULTCELL(i, j) \ + xmm[1] = _mm_set_ps(b.fMap[3][j], b.fMap[2][j], b.fMap[1][j], b.fMap[0][j]); \ + xmm[j+2] = _mm_mul_ps(xmm[0], xmm[1]); +# define MULTFINISH(i) \ + xmm[6] = _mm_hadd_ps(xmm[2], xmm[3]); \ + xmm[7] = _mm_hadd_ps(xmm[4], xmm[5]); \ + xmm[1] = _mm_hadd_ps(xmm[6], xmm[7]); \ + _mm_storeu_ps(c.fMap[i], xmm[1]); +#endif hsMatrix44 hsMatrix44::operator*(const hsMatrix44& b) const { @@ -108,6 +124,37 @@ hsMatrix44 hsMatrix44::operator*(const hsMatrix44& b) const if( b.fFlags & hsMatrix44::kIsIdent ) return *this; +#ifdef HAVE_SSE + __m128 xmm[8]; + + MULTBEGIN(0); + MULTCELL(0, 0); + MULTCELL(0, 1); + MULTCELL(0, 2); + MULTCELL(0, 3); + MULTFINISH(0); + + MULTBEGIN(1); + MULTCELL(1, 0); + MULTCELL(1, 1); + MULTCELL(1, 2); + MULTCELL(1, 3); + MULTFINISH(1); + + MULTBEGIN(2); + MULTCELL(2, 0); + MULTCELL(2, 1); + MULTCELL(2, 2); + MULTCELL(2, 3); + MULTFINISH(2); + + MULTBEGIN(3); + MULTCELL(3, 0); + MULTCELL(3, 1); + MULTCELL(3, 2); + MULTCELL(3, 3); + MULTFINISH(3); +#else c.fMap[0][0] = (fMap[0][0] * b.fMap[0][0]) + (fMap[0][1] * b.fMap[1][0]) + (fMap[0][2] * b.fMap[2][0]) + (fMap[0][3] * b.fMap[3][0]); c.fMap[0][1] = (fMap[0][0] * b.fMap[0][1]) + (fMap[0][1] * b.fMap[1][1]) + (fMap[0][2] * b.fMap[2][1]) + (fMap[0][3] * b.fMap[3][1]); c.fMap[0][2] = (fMap[0][0] * b.fMap[0][2]) + (fMap[0][1] * b.fMap[1][2]) + (fMap[0][2] * b.fMap[2][2]) + (fMap[0][3] * b.fMap[3][2]); @@ -127,6 +174,7 @@ hsMatrix44 hsMatrix44::operator*(const hsMatrix44& b) const c.fMap[3][1] = (fMap[3][0] * b.fMap[0][1]) + (fMap[3][1] * b.fMap[1][1]) + (fMap[3][2] * b.fMap[2][1]) + (fMap[3][3] * b.fMap[3][1]); c.fMap[3][2] = (fMap[3][0] * b.fMap[0][2]) + (fMap[3][1] * b.fMap[1][2]) + (fMap[3][2] * b.fMap[2][2]) + (fMap[3][3] * b.fMap[3][2]); c.fMap[3][3] = (fMap[3][0] * b.fMap[0][3]) + (fMap[3][1] * b.fMap[1][3]) + (fMap[3][2] * b.fMap[2][3]) + (fMap[3][3] * b.fMap[3][3]); +#endif return c; } diff --git a/Sources/Plasma/NucleusLib/pnSceneObject/plCoordinateInterface.cpp b/Sources/Plasma/NucleusLib/pnSceneObject/plCoordinateInterface.cpp index 643834be..1c64314d 100644 --- a/Sources/Plasma/NucleusLib/pnSceneObject/plCoordinateInterface.cpp +++ b/Sources/Plasma/NucleusLib/pnSceneObject/plCoordinateInterface.cpp @@ -380,6 +380,7 @@ plProfile_CreateTimer(" CIRecalcT", "Object", CIRecalcT); plProfile_CreateTimer(" CIDirtyT", "Object", CIDirtyT); plProfile_CreateTimer(" CISetT", "Object", CISetT); +#ifndef HAVE_SSE static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& rhs) { hsMatrix44 ret; @@ -440,6 +441,7 @@ static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& r return ret; } +#endif // HAVE_SSE void plCoordinateInterface::IRecalcTransforms() { @@ -447,7 +449,7 @@ void plCoordinateInterface::IRecalcTransforms() plProfile_BeginTiming(CIRecalcT); if( fParent ) { -#if 0 +#ifdef HAVE_SSE fLocalToWorld = fParent->GetLocalToWorld() * fLocalToParent; fWorldToLocal = fParentToLocal * fParent->GetWorldToLocal(); #else diff --git a/Sources/Plasma/PubUtilLib/plDrawable/plDrawableSpans.cpp b/Sources/Plasma/PubUtilLib/plDrawable/plDrawableSpans.cpp index a3199627..3fc19b2a 100644 --- a/Sources/Plasma/PubUtilLib/plDrawable/plDrawableSpans.cpp +++ b/Sources/Plasma/PubUtilLib/plDrawable/plDrawableSpans.cpp @@ -416,7 +416,7 @@ hsBool plDrawableSpans::IBoundsInvalid(const hsBounds3Ext& bnd) const } //// SetTransform //////////////////////////////////////////////////////////// - +#ifndef HAVE_SSE static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& rhs) { hsMatrix44 ret; @@ -477,6 +477,7 @@ static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& r return ret; } +#endif #ifdef MF_TEST_UPDATE plProfile_CreateCounter("DSSetTrans", "Update", DSSetTrans); @@ -520,13 +521,13 @@ plDrawable& plDrawableSpans::SetTransform( uint32_t index, const hsMatrix44& l2w #endif // MF_TEST_UPDATE for( i = 0; i < spans->GetCount(); i++ ) { -#if 0 +#ifdef HAVE_SSE fLocalToWorlds[ (*spans)[ i ] ] = l2w * fLocalToBones[ (*spans)[ i ] ]; fWorldToLocals[ (*spans)[ i ] ] = fBoneToLocals[ (*spans)[ i ] ] * w2l; #else fLocalToWorlds[ (*spans)[ i ] ] = IMatrixMul34(l2w, fLocalToBones[ (*spans)[ i ] ]); fWorldToLocals[ (*spans)[ i ] ] = IMatrixMul34(fBoneToLocals[ (*spans)[ i ] ], w2l); -#endif +#endif // HAVE_SSE } #ifdef MF_TEST_UPDATE plProfile_EndTiming(DSMatTransT);