mirror of
https://foundry.openuru.org/gitblit/r/CWE-ou-minkata.git
synced 2025-07-17 18:59:09 +00:00
@ -84,6 +84,12 @@ if(MSVC)
|
||||
add_definitions(-D_SCL_SECURE_NO_WARNINGS)
|
||||
endif(MSVC)
|
||||
|
||||
# TODO: Maybe some kind of automated test here?
|
||||
option(PLASMA_USE_SSE "Enable SSE optimizations?" ON)
|
||||
if(PLASMA_USE_SSE)
|
||||
add_definitions(-DHAVE_SSE)
|
||||
endif(PLASMA_USE_SSE)
|
||||
|
||||
#TODO: Make the OpenSSL includes less promiscuous so this isn't needed
|
||||
include_directories(${OPENSSL_INCLUDE_DIR})
|
||||
|
||||
|
@ -49,6 +49,10 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com
|
||||
#include <dmdfm.h> // Windows Load EXE into memory suff
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_SSE
|
||||
# include <intrin.h>
|
||||
#endif
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include "HeadSpin.h"
|
||||
@ -1440,11 +1444,35 @@ LONG WINAPI plCustomUnhandledExceptionFilter( struct _EXCEPTION_POINTERS *Except
|
||||
return EXCEPTION_EXECUTE_HANDLER;
|
||||
}
|
||||
|
||||
bool CheckCPU()
|
||||
{
|
||||
const unsigned int sse3_flag = 0x00000001;
|
||||
// (any other CPU features...)
|
||||
|
||||
int cpu_info[4];
|
||||
__cpuid(cpu_info, 1);
|
||||
#ifdef HAVE_SSE
|
||||
if(cpu_info[2] & sse3_flag == 0)
|
||||
return false;
|
||||
#endif
|
||||
// Insert additional feature checks here
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#include "pfConsoleCore/pfConsoleEngine.h"
|
||||
PF_CONSOLE_LINK_ALL()
|
||||
|
||||
int WINAPI WinMain(HINSTANCE hInst, HINSTANCE hPrevInst, LPSTR lpCmdLine, int nCmdShow)
|
||||
{
|
||||
// Check to make sure we have a good CPU before getting started
|
||||
if (!CheckCPU())
|
||||
{
|
||||
plString msg = plString::Format("Your processor does not support all of the features required to play %S", ProductLongName());
|
||||
hsMessageBox(msg.c_str(), "Error", hsMessageBoxNormal, hsMessageBoxIconError);
|
||||
return PARABLE_NORMAL_EXIT;
|
||||
}
|
||||
|
||||
PF_CONSOLE_INIT_ALL()
|
||||
|
||||
// Set global handle
|
||||
|
@ -47,6 +47,10 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com
|
||||
#include "hsStream.h"
|
||||
#include <math.h>
|
||||
|
||||
#ifdef HAVE_SSE
|
||||
# include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
static hsMatrix44 myIdent = hsMatrix44().Reset();
|
||||
const hsMatrix44& hsMatrix44::IdentityMatrix() { return myIdent; }
|
||||
|
||||
@ -92,6 +96,18 @@ void hsMatrix44::DecompRigid(hsScalarTriple &translate, hsQuat &rotate) const
|
||||
rotate.QuatFromMatrix44(*this);
|
||||
}
|
||||
|
||||
#ifdef HAVE_SSE
|
||||
# define MULTBEGIN(i) \
|
||||
xmm[0] = _mm_loadu_ps(fMap[i]);
|
||||
# define MULTCELL(i, j) \
|
||||
xmm[1] = _mm_set_ps(b.fMap[3][j], b.fMap[2][j], b.fMap[1][j], b.fMap[0][j]); \
|
||||
xmm[j+2] = _mm_mul_ps(xmm[0], xmm[1]);
|
||||
# define MULTFINISH(i) \
|
||||
xmm[6] = _mm_hadd_ps(xmm[2], xmm[3]); \
|
||||
xmm[7] = _mm_hadd_ps(xmm[4], xmm[5]); \
|
||||
xmm[1] = _mm_hadd_ps(xmm[6], xmm[7]); \
|
||||
_mm_storeu_ps(c.fMap[i], xmm[1]);
|
||||
#endif
|
||||
|
||||
hsMatrix44 hsMatrix44::operator*(const hsMatrix44& b) const
|
||||
{
|
||||
@ -108,6 +124,37 @@ hsMatrix44 hsMatrix44::operator*(const hsMatrix44& b) const
|
||||
if( b.fFlags & hsMatrix44::kIsIdent )
|
||||
return *this;
|
||||
|
||||
#ifdef HAVE_SSE
|
||||
__m128 xmm[8];
|
||||
|
||||
MULTBEGIN(0);
|
||||
MULTCELL(0, 0);
|
||||
MULTCELL(0, 1);
|
||||
MULTCELL(0, 2);
|
||||
MULTCELL(0, 3);
|
||||
MULTFINISH(0);
|
||||
|
||||
MULTBEGIN(1);
|
||||
MULTCELL(1, 0);
|
||||
MULTCELL(1, 1);
|
||||
MULTCELL(1, 2);
|
||||
MULTCELL(1, 3);
|
||||
MULTFINISH(1);
|
||||
|
||||
MULTBEGIN(2);
|
||||
MULTCELL(2, 0);
|
||||
MULTCELL(2, 1);
|
||||
MULTCELL(2, 2);
|
||||
MULTCELL(2, 3);
|
||||
MULTFINISH(2);
|
||||
|
||||
MULTBEGIN(3);
|
||||
MULTCELL(3, 0);
|
||||
MULTCELL(3, 1);
|
||||
MULTCELL(3, 2);
|
||||
MULTCELL(3, 3);
|
||||
MULTFINISH(3);
|
||||
#else
|
||||
c.fMap[0][0] = (fMap[0][0] * b.fMap[0][0]) + (fMap[0][1] * b.fMap[1][0]) + (fMap[0][2] * b.fMap[2][0]) + (fMap[0][3] * b.fMap[3][0]);
|
||||
c.fMap[0][1] = (fMap[0][0] * b.fMap[0][1]) + (fMap[0][1] * b.fMap[1][1]) + (fMap[0][2] * b.fMap[2][1]) + (fMap[0][3] * b.fMap[3][1]);
|
||||
c.fMap[0][2] = (fMap[0][0] * b.fMap[0][2]) + (fMap[0][1] * b.fMap[1][2]) + (fMap[0][2] * b.fMap[2][2]) + (fMap[0][3] * b.fMap[3][2]);
|
||||
@ -127,6 +174,7 @@ hsMatrix44 hsMatrix44::operator*(const hsMatrix44& b) const
|
||||
c.fMap[3][1] = (fMap[3][0] * b.fMap[0][1]) + (fMap[3][1] * b.fMap[1][1]) + (fMap[3][2] * b.fMap[2][1]) + (fMap[3][3] * b.fMap[3][1]);
|
||||
c.fMap[3][2] = (fMap[3][0] * b.fMap[0][2]) + (fMap[3][1] * b.fMap[1][2]) + (fMap[3][2] * b.fMap[2][2]) + (fMap[3][3] * b.fMap[3][2]);
|
||||
c.fMap[3][3] = (fMap[3][0] * b.fMap[0][3]) + (fMap[3][1] * b.fMap[1][3]) + (fMap[3][2] * b.fMap[2][3]) + (fMap[3][3] * b.fMap[3][3]);
|
||||
#endif
|
||||
|
||||
return c;
|
||||
}
|
||||
|
@ -177,6 +177,12 @@ inline float hsRadiansToDegrees(float rad) { return float(rad * (180 / M_PI)); }
|
||||
#include <new>
|
||||
#define NEWZERO(t) new(calloc(sizeof(t), 1)) t
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# define ALIGN(n) __declspec(align(n))
|
||||
#else
|
||||
# define ALIGN(n) __atribute__(aligned(n))
|
||||
#endif
|
||||
|
||||
/////////////////////////////
|
||||
// Physical memory functions
|
||||
/////////////////////////////
|
||||
|
@ -380,6 +380,7 @@ plProfile_CreateTimer(" CIRecalcT", "Object", CIRecalcT);
|
||||
plProfile_CreateTimer(" CIDirtyT", "Object", CIDirtyT);
|
||||
plProfile_CreateTimer(" CISetT", "Object", CISetT);
|
||||
|
||||
#ifndef HAVE_SSE
|
||||
static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& rhs)
|
||||
{
|
||||
hsMatrix44 ret;
|
||||
@ -440,6 +441,7 @@ static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& r
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif // HAVE_SSE
|
||||
|
||||
void plCoordinateInterface::IRecalcTransforms()
|
||||
{
|
||||
@ -447,7 +449,7 @@ void plCoordinateInterface::IRecalcTransforms()
|
||||
plProfile_BeginTiming(CIRecalcT);
|
||||
if( fParent )
|
||||
{
|
||||
#if 0
|
||||
#ifdef HAVE_SSE
|
||||
fLocalToWorld = fParent->GetLocalToWorld() * fLocalToParent;
|
||||
fWorldToLocal = fParentToLocal * fParent->GetWorldToLocal();
|
||||
#else
|
||||
|
@ -416,7 +416,7 @@ hsBool plDrawableSpans::IBoundsInvalid(const hsBounds3Ext& bnd) const
|
||||
}
|
||||
|
||||
//// SetTransform ////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef HAVE_SSE
|
||||
static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& rhs)
|
||||
{
|
||||
hsMatrix44 ret;
|
||||
@ -477,6 +477,7 @@ static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& r
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef MF_TEST_UPDATE
|
||||
plProfile_CreateCounter("DSSetTrans", "Update", DSSetTrans);
|
||||
@ -520,13 +521,13 @@ plDrawable& plDrawableSpans::SetTransform( uint32_t index, const hsMatrix44& l2w
|
||||
#endif // MF_TEST_UPDATE
|
||||
for( i = 0; i < spans->GetCount(); i++ )
|
||||
{
|
||||
#if 0
|
||||
#ifdef HAVE_SSE
|
||||
fLocalToWorlds[ (*spans)[ i ] ] = l2w * fLocalToBones[ (*spans)[ i ] ];
|
||||
fWorldToLocals[ (*spans)[ i ] ] = fBoneToLocals[ (*spans)[ i ] ] * w2l;
|
||||
#else
|
||||
fLocalToWorlds[ (*spans)[ i ] ] = IMatrixMul34(l2w, fLocalToBones[ (*spans)[ i ] ]);
|
||||
fWorldToLocals[ (*spans)[ i ] ] = IMatrixMul34(fBoneToLocals[ (*spans)[ i ] ], w2l);
|
||||
#endif
|
||||
#endif // HAVE_SSE
|
||||
}
|
||||
#ifdef MF_TEST_UPDATE
|
||||
plProfile_EndTiming(DSMatTransT);
|
||||
|
@ -163,6 +163,10 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#ifdef HAVE_SSE
|
||||
# include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
//#define MF_TOSSER
|
||||
|
||||
int mfCurrentTest = 100;
|
||||
@ -10523,39 +10527,77 @@ void plDXPipeline::LoadResources()
|
||||
|
||||
// Sorry about this, but it really did speed up the skinning.
|
||||
// Just some macros for the inner loop of IBlendVertsIntoBuffer.
|
||||
#define MATRIXMULTBEGIN(xfm, wgt) \
|
||||
register float m00 = xfm.fMap[0][0]; \
|
||||
register float m01 = xfm.fMap[0][1]; \
|
||||
register float m02 = xfm.fMap[0][2]; \
|
||||
register float m03 = xfm.fMap[0][3]; \
|
||||
register float m10 = xfm.fMap[1][0]; \
|
||||
register float m11 = xfm.fMap[1][1]; \
|
||||
register float m12 = xfm.fMap[1][2]; \
|
||||
register float m13 = xfm.fMap[1][3]; \
|
||||
register float m20 = xfm.fMap[2][0]; \
|
||||
register float m21 = xfm.fMap[2][1]; \
|
||||
register float m22 = xfm.fMap[2][2]; \
|
||||
register float m23 = xfm.fMap[2][3]; \
|
||||
register float m_wgt = wgt; \
|
||||
register float srcX, srcY, srcZ;
|
||||
|
||||
#define MATRIXMULTPOINTADD(dst, src) \
|
||||
srcX = src.fX; \
|
||||
srcY = src.fY; \
|
||||
srcZ = src.fZ; \
|
||||
\
|
||||
dst.fX += (srcX * m00 + srcY * m01 + srcZ * m02 + m03) * m_wgt; \
|
||||
dst.fY += (srcX * m10 + srcY * m11 + srcZ * m12 + m13) * m_wgt; \
|
||||
dst.fZ += (srcX * m20 + srcY * m21 + srcZ * m22 + m23) * m_wgt;
|
||||
|
||||
#define MATRIXMULTVECTORADD(dst, src) \
|
||||
srcX = src.fX; \
|
||||
srcY = src.fY; \
|
||||
srcZ = src.fZ; \
|
||||
\
|
||||
dst.fX += (srcX * m00 + srcY * m01 + srcZ * m02) * m_wgt; \
|
||||
dst.fY += (srcX * m10 + srcY * m11 + srcZ * m12) * m_wgt; \
|
||||
dst.fZ += (srcX * m20 + srcY * m21 + srcZ * m22) * m_wgt;
|
||||
#ifdef HAVE_SSE
|
||||
# define MATRIXMULTBEGIN(xfm, wgt) \
|
||||
__m128 mc0, mc1, mc2, mwt, msr, _x, _y, _z, hbuf; \
|
||||
ALIGN(16) float hack[4]; \
|
||||
mc0 = _mm_loadu_ps(xfm.fMap[0]); \
|
||||
mc1 = _mm_loadu_ps(xfm.fMap[1]); \
|
||||
mc2 = _mm_loadu_ps(xfm.fMap[2]); \
|
||||
mwt = _mm_set_ps1(wgt);
|
||||
# define MATRIXMULTPOINTADD(dst, src) \
|
||||
msr = _mm_set_ps(1.f, src.fZ, src.fY, src.fX); \
|
||||
_x = _mm_mul_ps(_mm_mul_ps(mc0, msr), mwt); \
|
||||
_y = _mm_mul_ps(_mm_mul_ps(mc1, msr), mwt); \
|
||||
_z = _mm_mul_ps(_mm_mul_ps(mc2, msr), mwt); \
|
||||
\
|
||||
hbuf = _mm_hadd_ps(_x, _y); \
|
||||
hbuf = _mm_hadd_ps(hbuf, hbuf); \
|
||||
_mm_store_ps(hack, hbuf); \
|
||||
dst.fX += hack[0]; \
|
||||
dst.fY += hack[1]; \
|
||||
hbuf = _mm_hadd_ps(_z, _z); \
|
||||
hbuf = _mm_hadd_ps(hbuf, hbuf); \
|
||||
_mm_store_ps(hack, hbuf); \
|
||||
dst.fZ += hack[0];
|
||||
# define MATRIXMULTVECTORADD(dst, src) \
|
||||
msr = _mm_set_ps(0.f, src.fZ, src.fY, src.fX); \
|
||||
_x = _mm_mul_ps(_mm_mul_ps(mc0, msr), mwt); \
|
||||
_y = _mm_mul_ps(_mm_mul_ps(mc1, msr), mwt); \
|
||||
_z = _mm_mul_ps(_mm_mul_ps(mc2, msr), mwt); \
|
||||
\
|
||||
hbuf = _mm_hadd_ps(_x, _y); \
|
||||
hbuf = _mm_hadd_ps(hbuf, hbuf); \
|
||||
_mm_store_ps(hack, hbuf); \
|
||||
dst.fX += hack[0]; \
|
||||
dst.fY += hack[1]; \
|
||||
hbuf = _mm_hadd_ps(_z, _z); \
|
||||
hbuf = _mm_hadd_ps(hbuf, hbuf); \
|
||||
_mm_store_ps(hack, hbuf); \
|
||||
dst.fZ += hack[0];
|
||||
#else
|
||||
# define MATRIXMULTBEGIN(xfm, wgt) \
|
||||
float m00 = xfm.fMap[0][0]; \
|
||||
float m01 = xfm.fMap[0][1]; \
|
||||
float m02 = xfm.fMap[0][2]; \
|
||||
float m03 = xfm.fMap[0][3]; \
|
||||
float m10 = xfm.fMap[1][0]; \
|
||||
float m11 = xfm.fMap[1][1]; \
|
||||
float m12 = xfm.fMap[1][2]; \
|
||||
float m13 = xfm.fMap[1][3]; \
|
||||
float m20 = xfm.fMap[2][0]; \
|
||||
float m21 = xfm.fMap[2][1]; \
|
||||
float m22 = xfm.fMap[2][2]; \
|
||||
float m23 = xfm.fMap[2][3]; \
|
||||
float m_wgt = wgt; \
|
||||
float srcX, srcY, srcZ;
|
||||
# define MATRIXMULTPOINTADD(dst, src) \
|
||||
srcX = src.fX; \
|
||||
srcY = src.fY; \
|
||||
srcZ = src.fZ; \
|
||||
\
|
||||
dst.fX += (srcX * m00 + srcY * m01 + srcZ * m02 + m03) * m_wgt; \
|
||||
dst.fY += (srcX * m10 + srcY * m11 + srcZ * m12 + m13) * m_wgt; \
|
||||
dst.fZ += (srcX * m20 + srcY * m21 + srcZ * m22 + m23) * m_wgt;
|
||||
# define MATRIXMULTVECTORADD(dst, src) \
|
||||
srcX = src.fX; \
|
||||
srcY = src.fY; \
|
||||
srcZ = src.fZ; \
|
||||
\
|
||||
dst.fX += (srcX * m00 + srcY * m01 + srcZ * m02) * m_wgt; \
|
||||
dst.fY += (srcX * m10 + srcY * m11 + srcZ * m12) * m_wgt; \
|
||||
dst.fZ += (srcX * m20 + srcY * m21 + srcZ * m22) * m_wgt;
|
||||
#endif // HAVE_SSE
|
||||
|
||||
// inlTESTPOINT /////////////////////////////////////////
|
||||
// Update mins and maxs if destP is outside.
|
||||
|
Reference in New Issue
Block a user