mirror of
https://foundry.openuru.org/gitblit/r/CWE-ou-minkata.git
synced 2025-07-19 11:49:09 +00:00
Update hsMatrix44 to use hsCpuID dispatcher.
This commit is contained in:
@ -47,13 +47,16 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com
|
|||||||
#include "hsStream.h"
|
#include "hsStream.h"
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
#ifdef HAVE_SSE
|
#ifdef HS_SIMD_INCLUDE
|
||||||
# include <smmintrin.h>
|
# include HS_SIMD_INCLUDE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static hsMatrix44 myIdent = hsMatrix44().Reset();
|
static hsMatrix44 myIdent = hsMatrix44().Reset();
|
||||||
const hsMatrix44& hsMatrix44::IdentityMatrix() { return myIdent; }
|
const hsMatrix44& hsMatrix44::IdentityMatrix() { return myIdent; }
|
||||||
|
|
||||||
|
// CPU-optimized functions requiring dispatch
|
||||||
|
hsFunctionDispatcher<hsMatrix44::mat_mult_ptr> hsMatrix44::mat_mult(hsMatrix44::mat_mult_fpu, 0, 0, hsMatrix44::mat_mult_sse3);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
For the rotation:
|
For the rotation:
|
||||||
<20> 2 2 <20>
|
<20> 2 2 <20>
|
||||||
@ -96,9 +99,47 @@ void hsMatrix44::DecompRigid(hsScalarTriple &translate, hsQuat &rotate) const
|
|||||||
rotate.QuatFromMatrix44(*this);
|
rotate.QuatFromMatrix44(*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_SSE
|
hsMatrix44 hsMatrix44::mat_mult_fpu(const hsMatrix44 &a, const hsMatrix44 &b)
|
||||||
|
{
|
||||||
|
hsMatrix44 c;
|
||||||
|
|
||||||
|
if( a.fFlags & b.fFlags & hsMatrix44::kIsIdent )
|
||||||
|
{
|
||||||
|
c.Reset();
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( a.fFlags & hsMatrix44::kIsIdent )
|
||||||
|
return b;
|
||||||
|
if( b.fFlags & hsMatrix44::kIsIdent )
|
||||||
|
return a;
|
||||||
|
|
||||||
|
c.fMap[0][0] = (a.fMap[0][0] * b.fMap[0][0]) + (a.fMap[0][1] * b.fMap[1][0]) + (a.fMap[0][2] * b.fMap[2][0]) + (a.fMap[0][3] * b.fMap[3][0]);
|
||||||
|
c.fMap[0][1] = (a.fMap[0][0] * b.fMap[0][1]) + (a.fMap[0][1] * b.fMap[1][1]) + (a.fMap[0][2] * b.fMap[2][1]) + (a.fMap[0][3] * b.fMap[3][1]);
|
||||||
|
c.fMap[0][2] = (a.fMap[0][0] * b.fMap[0][2]) + (a.fMap[0][1] * b.fMap[1][2]) + (a.fMap[0][2] * b.fMap[2][2]) + (a.fMap[0][3] * b.fMap[3][2]);
|
||||||
|
c.fMap[0][3] = (a.fMap[0][0] * b.fMap[0][3]) + (a.fMap[0][1] * b.fMap[1][3]) + (a.fMap[0][2] * b.fMap[2][3]) + (a.fMap[0][3] * b.fMap[3][3]);
|
||||||
|
|
||||||
|
c.fMap[1][0] = (a.fMap[1][0] * b.fMap[0][0]) + (a.fMap[1][1] * b.fMap[1][0]) + (a.fMap[1][2] * b.fMap[2][0]) + (a.fMap[1][3] * b.fMap[3][0]);
|
||||||
|
c.fMap[1][1] = (a.fMap[1][0] * b.fMap[0][1]) + (a.fMap[1][1] * b.fMap[1][1]) + (a.fMap[1][2] * b.fMap[2][1]) + (a.fMap[1][3] * b.fMap[3][1]);
|
||||||
|
c.fMap[1][2] = (a.fMap[1][0] * b.fMap[0][2]) + (a.fMap[1][1] * b.fMap[1][2]) + (a.fMap[1][2] * b.fMap[2][2]) + (a.fMap[1][3] * b.fMap[3][2]);
|
||||||
|
c.fMap[1][3] = (a.fMap[1][0] * b.fMap[0][3]) + (a.fMap[1][1] * b.fMap[1][3]) + (a.fMap[1][2] * b.fMap[2][3]) + (a.fMap[1][3] * b.fMap[3][3]);
|
||||||
|
|
||||||
|
c.fMap[2][0] = (a.fMap[2][0] * b.fMap[0][0]) + (a.fMap[2][1] * b.fMap[1][0]) + (a.fMap[2][2] * b.fMap[2][0]) + (a.fMap[2][3] * b.fMap[3][0]);
|
||||||
|
c.fMap[2][1] = (a.fMap[2][0] * b.fMap[0][1]) + (a.fMap[2][1] * b.fMap[1][1]) + (a.fMap[2][2] * b.fMap[2][1]) + (a.fMap[2][3] * b.fMap[3][1]);
|
||||||
|
c.fMap[2][2] = (a.fMap[2][0] * b.fMap[0][2]) + (a.fMap[2][1] * b.fMap[1][2]) + (a.fMap[2][2] * b.fMap[2][2]) + (a.fMap[2][3] * b.fMap[3][2]);
|
||||||
|
c.fMap[2][3] = (a.fMap[2][0] * b.fMap[0][3]) + (a.fMap[2][1] * b.fMap[1][3]) + (a.fMap[2][2] * b.fMap[2][3]) + (a.fMap[2][3] * b.fMap[3][3]);
|
||||||
|
|
||||||
|
c.fMap[3][0] = (a.fMap[3][0] * b.fMap[0][0]) + (a.fMap[3][1] * b.fMap[1][0]) + (a.fMap[3][2] * b.fMap[2][0]) + (a.fMap[3][3] * b.fMap[3][0]);
|
||||||
|
c.fMap[3][1] = (a.fMap[3][0] * b.fMap[0][1]) + (a.fMap[3][1] * b.fMap[1][1]) + (a.fMap[3][2] * b.fMap[2][1]) + (a.fMap[3][3] * b.fMap[3][1]);
|
||||||
|
c.fMap[3][2] = (a.fMap[3][0] * b.fMap[0][2]) + (a.fMap[3][1] * b.fMap[1][2]) + (a.fMap[3][2] * b.fMap[2][2]) + (a.fMap[3][3] * b.fMap[3][2]);
|
||||||
|
c.fMap[3][3] = (a.fMap[3][0] * b.fMap[0][3]) + (a.fMap[3][1] * b.fMap[1][3]) + (a.fMap[3][2] * b.fMap[2][3]) + (a.fMap[3][3] * b.fMap[3][3]);
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef HS_SSE3
|
||||||
# define MULTBEGIN(i) \
|
# define MULTBEGIN(i) \
|
||||||
xmm[0] = _mm_loadu_ps(fMap[i]);
|
xmm[0] = _mm_loadu_ps(a.fMap[i]);
|
||||||
# define MULTCELL(i, j) \
|
# define MULTCELL(i, j) \
|
||||||
xmm[1] = _mm_set_ps(b.fMap[3][j], b.fMap[2][j], b.fMap[1][j], b.fMap[0][j]); \
|
xmm[1] = _mm_set_ps(b.fMap[3][j], b.fMap[2][j], b.fMap[1][j], b.fMap[0][j]); \
|
||||||
xmm[j+2] = _mm_mul_ps(xmm[0], xmm[1]);
|
xmm[j+2] = _mm_mul_ps(xmm[0], xmm[1]);
|
||||||
@ -107,24 +148,23 @@ void hsMatrix44::DecompRigid(hsScalarTriple &translate, hsQuat &rotate) const
|
|||||||
xmm[7] = _mm_hadd_ps(xmm[4], xmm[5]); \
|
xmm[7] = _mm_hadd_ps(xmm[4], xmm[5]); \
|
||||||
xmm[1] = _mm_hadd_ps(xmm[6], xmm[7]); \
|
xmm[1] = _mm_hadd_ps(xmm[6], xmm[7]); \
|
||||||
_mm_storeu_ps(c.fMap[i], xmm[1]);
|
_mm_storeu_ps(c.fMap[i], xmm[1]);
|
||||||
#endif
|
#endif // HS_SSE3
|
||||||
|
|
||||||
hsMatrix44 hsMatrix44::operator*(const hsMatrix44& b) const
|
hsMatrix44 hsMatrix44::mat_mult_sse3(const hsMatrix44 &a, const hsMatrix44 &b)
|
||||||
{
|
{
|
||||||
hsMatrix44 c;
|
hsMatrix44 c;
|
||||||
|
#ifdef HS_SSE3
|
||||||
if( fFlags & b.fFlags & hsMatrix44::kIsIdent )
|
if( a.fFlags & b.fFlags & hsMatrix44::kIsIdent )
|
||||||
{
|
{
|
||||||
c.Reset();
|
c.Reset();
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( fFlags & hsMatrix44::kIsIdent )
|
if( a.fFlags & hsMatrix44::kIsIdent )
|
||||||
return b;
|
return b;
|
||||||
if( b.fFlags & hsMatrix44::kIsIdent )
|
if( b.fFlags & hsMatrix44::kIsIdent )
|
||||||
return *this;
|
return a;
|
||||||
|
|
||||||
#ifdef HAVE_SSE
|
|
||||||
__m128 xmm[8];
|
__m128 xmm[8];
|
||||||
|
|
||||||
MULTBEGIN(0);
|
MULTBEGIN(0);
|
||||||
@ -154,28 +194,7 @@ hsMatrix44 hsMatrix44::operator*(const hsMatrix44& b) const
|
|||||||
MULTCELL(3, 2);
|
MULTCELL(3, 2);
|
||||||
MULTCELL(3, 3);
|
MULTCELL(3, 3);
|
||||||
MULTFINISH(3);
|
MULTFINISH(3);
|
||||||
#else
|
#endif // HS_SSE3
|
||||||
c.fMap[0][0] = (fMap[0][0] * b.fMap[0][0]) + (fMap[0][1] * b.fMap[1][0]) + (fMap[0][2] * b.fMap[2][0]) + (fMap[0][3] * b.fMap[3][0]);
|
|
||||||
c.fMap[0][1] = (fMap[0][0] * b.fMap[0][1]) + (fMap[0][1] * b.fMap[1][1]) + (fMap[0][2] * b.fMap[2][1]) + (fMap[0][3] * b.fMap[3][1]);
|
|
||||||
c.fMap[0][2] = (fMap[0][0] * b.fMap[0][2]) + (fMap[0][1] * b.fMap[1][2]) + (fMap[0][2] * b.fMap[2][2]) + (fMap[0][3] * b.fMap[3][2]);
|
|
||||||
c.fMap[0][3] = (fMap[0][0] * b.fMap[0][3]) + (fMap[0][1] * b.fMap[1][3]) + (fMap[0][2] * b.fMap[2][3]) + (fMap[0][3] * b.fMap[3][3]);
|
|
||||||
|
|
||||||
c.fMap[1][0] = (fMap[1][0] * b.fMap[0][0]) + (fMap[1][1] * b.fMap[1][0]) + (fMap[1][2] * b.fMap[2][0]) + (fMap[1][3] * b.fMap[3][0]);
|
|
||||||
c.fMap[1][1] = (fMap[1][0] * b.fMap[0][1]) + (fMap[1][1] * b.fMap[1][1]) + (fMap[1][2] * b.fMap[2][1]) + (fMap[1][3] * b.fMap[3][1]);
|
|
||||||
c.fMap[1][2] = (fMap[1][0] * b.fMap[0][2]) + (fMap[1][1] * b.fMap[1][2]) + (fMap[1][2] * b.fMap[2][2]) + (fMap[1][3] * b.fMap[3][2]);
|
|
||||||
c.fMap[1][3] = (fMap[1][0] * b.fMap[0][3]) + (fMap[1][1] * b.fMap[1][3]) + (fMap[1][2] * b.fMap[2][3]) + (fMap[1][3] * b.fMap[3][3]);
|
|
||||||
|
|
||||||
c.fMap[2][0] = (fMap[2][0] * b.fMap[0][0]) + (fMap[2][1] * b.fMap[1][0]) + (fMap[2][2] * b.fMap[2][0]) + (fMap[2][3] * b.fMap[3][0]);
|
|
||||||
c.fMap[2][1] = (fMap[2][0] * b.fMap[0][1]) + (fMap[2][1] * b.fMap[1][1]) + (fMap[2][2] * b.fMap[2][1]) + (fMap[2][3] * b.fMap[3][1]);
|
|
||||||
c.fMap[2][2] = (fMap[2][0] * b.fMap[0][2]) + (fMap[2][1] * b.fMap[1][2]) + (fMap[2][2] * b.fMap[2][2]) + (fMap[2][3] * b.fMap[3][2]);
|
|
||||||
c.fMap[2][3] = (fMap[2][0] * b.fMap[0][3]) + (fMap[2][1] * b.fMap[1][3]) + (fMap[2][2] * b.fMap[2][3]) + (fMap[2][3] * b.fMap[3][3]);
|
|
||||||
|
|
||||||
c.fMap[3][0] = (fMap[3][0] * b.fMap[0][0]) + (fMap[3][1] * b.fMap[1][0]) + (fMap[3][2] * b.fMap[2][0]) + (fMap[3][3] * b.fMap[3][0]);
|
|
||||||
c.fMap[3][1] = (fMap[3][0] * b.fMap[0][1]) + (fMap[3][1] * b.fMap[1][1]) + (fMap[3][2] * b.fMap[2][1]) + (fMap[3][3] * b.fMap[3][1]);
|
|
||||||
c.fMap[3][2] = (fMap[3][0] * b.fMap[0][2]) + (fMap[3][1] * b.fMap[1][2]) + (fMap[3][2] * b.fMap[2][2]) + (fMap[3][3] * b.fMap[3][2]);
|
|
||||||
c.fMap[3][3] = (fMap[3][0] * b.fMap[0][3]) + (fMap[3][1] * b.fMap[1][3]) + (fMap[3][2] * b.fMap[2][3]) + (fMap[3][3] * b.fMap[3][3]);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,6 +44,7 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com
|
|||||||
|
|
||||||
#include "HeadSpin.h"
|
#include "HeadSpin.h"
|
||||||
#include "hsGeometry3.h"
|
#include "hsGeometry3.h"
|
||||||
|
#include "hsCpuID.h"
|
||||||
|
|
||||||
class hsQuat;
|
class hsQuat;
|
||||||
|
|
||||||
@ -104,7 +105,7 @@ struct hsMatrix44 {
|
|||||||
const hsVector3* up);
|
const hsVector3* up);
|
||||||
|
|
||||||
hsBool GetParity() const;
|
hsBool GetParity() const;
|
||||||
float GetDeterminant() const;
|
float GetDeterminant() const;
|
||||||
hsMatrix44* GetInverse(hsMatrix44* inverse) const;
|
hsMatrix44* GetInverse(hsMatrix44* inverse) const;
|
||||||
hsMatrix44* GetTranspose(hsMatrix44* inverse) const;
|
hsMatrix44* GetTranspose(hsMatrix44* inverse) const;
|
||||||
hsMatrix44* GetAdjoint(hsMatrix44* adjoint) const;
|
hsMatrix44* GetAdjoint(hsMatrix44* adjoint) const;
|
||||||
@ -140,7 +141,7 @@ struct hsMatrix44 {
|
|||||||
return rVal;
|
return rVal;
|
||||||
}
|
}
|
||||||
hsVector3 operator*(const hsVector3& p) const;
|
hsVector3 operator*(const hsVector3& p) const;
|
||||||
hsMatrix44 operator*(const hsMatrix44& b) const;
|
hsMatrix44 operator *(const hsMatrix44& other) const { return mat_mult.call(*this, other); }
|
||||||
|
|
||||||
hsPoint3* MapPoints(long count, hsPoint3 points[]) const;
|
hsPoint3* MapPoints(long count, hsPoint3 points[]) const;
|
||||||
|
|
||||||
@ -152,6 +153,12 @@ struct hsMatrix44 {
|
|||||||
|
|
||||||
void Read(hsStream *stream);
|
void Read(hsStream *stream);
|
||||||
void Write(hsStream *stream);
|
void Write(hsStream *stream);
|
||||||
|
|
||||||
|
// CPU-optimized functions
|
||||||
|
typedef hsMatrix44(*mat_mult_ptr)(const hsMatrix44&, const hsMatrix44&);
|
||||||
|
static hsMatrix44 mat_mult_fpu(const hsMatrix44&, const hsMatrix44&);
|
||||||
|
static hsMatrix44 mat_mult_sse3(const hsMatrix44&, const hsMatrix44&);
|
||||||
|
static hsFunctionDispatcher<mat_mult_ptr> mat_mult;
|
||||||
};
|
};
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
|
Reference in New Issue
Block a user