Browse Source

Remove remaining pre-hsCpuID SSE special-casing.

Joseph Davies 13 years ago
parent
commit
9ee5c4d040
  1. 6
      CMakeLists.txt
  2. 28
      Sources/Plasma/Apps/plClient/winmain.cpp
  3. 7
      Sources/Plasma/NucleusLib/pnSceneObject/plCoordinateInterface.cpp
  4. 8
      Sources/Plasma/PubUtilLib/plDrawable/plDrawableSpans.cpp
  5. 32
      Sources/Plasma/PubUtilLib/plPipeline/plDXPipeline.cpp

6
CMakeLists.txt

@ -84,12 +84,6 @@ if(MSVC)
add_definitions(-D_SCL_SECURE_NO_WARNINGS) add_definitions(-D_SCL_SECURE_NO_WARNINGS)
endif(MSVC) endif(MSVC)
# TODO: Maybe some kind of automated test here?
option(PLASMA_USE_SSE "Enable SSE optimizations?" ON)
if(PLASMA_USE_SSE)
add_definitions(-DHAVE_SSE)
endif(PLASMA_USE_SSE)
#TODO: Make the OpenSSL includes less promiscuous so this isn't needed #TODO: Make the OpenSSL includes less promiscuous so this isn't needed
include_directories(${OPENSSL_INCLUDE_DIR}) include_directories(${OPENSSL_INCLUDE_DIR})

28
Sources/Plasma/Apps/plClient/winmain.cpp

@ -49,10 +49,6 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com
#include <dmdfm.h> // Windows Load EXE into memory suff #include <dmdfm.h> // Windows Load EXE into memory suff
#endif #endif
#ifdef HAVE_SSE
# include <intrin.h>
#endif
#include <curl/curl.h> #include <curl/curl.h>
#include "HeadSpin.h" #include "HeadSpin.h"
@ -1388,35 +1384,11 @@ LONG WINAPI plCustomUnhandledExceptionFilter( struct _EXCEPTION_POINTERS *Except
} }
#endif #endif
bool CheckCPU()
{
const unsigned int sse3_flag = 0x00000001;
// (any other CPU features...)
int cpu_info[4];
__cpuid(cpu_info, 1);
#ifdef HAVE_SSE
if((cpu_info[2] & sse3_flag) == 0)
return false;
#endif
// Insert additional feature checks here
return true;
}
#include "pfConsoleCore/pfConsoleEngine.h" #include "pfConsoleCore/pfConsoleEngine.h"
PF_CONSOLE_LINK_ALL() PF_CONSOLE_LINK_ALL()
int WINAPI WinMain(HINSTANCE hInst, HINSTANCE hPrevInst, LPSTR lpCmdLine, int nCmdShow) int WINAPI WinMain(HINSTANCE hInst, HINSTANCE hPrevInst, LPSTR lpCmdLine, int nCmdShow)
{ {
// Check to make sure we have a good CPU before getting started
if (!CheckCPU())
{
plString msg = plString::Format("Your processor does not support all of the features required to play %S.", ProductLongName());
hsMessageBox(msg.c_str(), "Error", hsMessageBoxNormal, hsMessageBoxIconError);
return PARABLE_NORMAL_EXIT;
}
PF_CONSOLE_INIT_ALL() PF_CONSOLE_INIT_ALL()
// Set global handle // Set global handle

7
Sources/Plasma/NucleusLib/pnSceneObject/plCoordinateInterface.cpp

@ -380,7 +380,6 @@ plProfile_CreateTimer(" CIRecalcT", "Object", CIRecalcT);
plProfile_CreateTimer(" CIDirtyT", "Object", CIDirtyT); plProfile_CreateTimer(" CIDirtyT", "Object", CIDirtyT);
plProfile_CreateTimer(" CISetT", "Object", CISetT); plProfile_CreateTimer(" CISetT", "Object", CISetT);
#ifndef HAVE_SSE
static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& rhs) static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& rhs)
{ {
hsMatrix44 ret; hsMatrix44 ret;
@ -441,7 +440,6 @@ static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& r
return ret; return ret;
} }
#endif // HAVE_SSE
void plCoordinateInterface::IRecalcTransforms() void plCoordinateInterface::IRecalcTransforms()
{ {
@ -449,13 +447,8 @@ void plCoordinateInterface::IRecalcTransforms()
plProfile_BeginTiming(CIRecalcT); plProfile_BeginTiming(CIRecalcT);
if( fParent ) if( fParent )
{ {
#ifdef HAVE_SSE
fLocalToWorld = fParent->GetLocalToWorld() * fLocalToParent;
fWorldToLocal = fParentToLocal * fParent->GetWorldToLocal();
#else
fLocalToWorld = IMatrixMul34(fParent->GetLocalToWorld(), fLocalToParent); fLocalToWorld = IMatrixMul34(fParent->GetLocalToWorld(), fLocalToParent);
fWorldToLocal = IMatrixMul34(fParentToLocal, fParent->GetWorldToLocal()); fWorldToLocal = IMatrixMul34(fParentToLocal, fParent->GetWorldToLocal());
#endif
} }
else else
{ {

8
Sources/Plasma/PubUtilLib/plDrawable/plDrawableSpans.cpp

@ -416,7 +416,6 @@ hsBool plDrawableSpans::IBoundsInvalid(const hsBounds3Ext& bnd) const
} }
//// SetTransform //////////////////////////////////////////////////////////// //// SetTransform ////////////////////////////////////////////////////////////
#ifndef HAVE_SSE
static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& rhs) static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& rhs)
{ {
hsMatrix44 ret; hsMatrix44 ret;
@ -477,7 +476,6 @@ static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& r
return ret; return ret;
} }
#endif
#ifdef MF_TEST_UPDATE #ifdef MF_TEST_UPDATE
plProfile_CreateCounter("DSSetTrans", "Update", DSSetTrans); plProfile_CreateCounter("DSSetTrans", "Update", DSSetTrans);
@ -521,13 +519,9 @@ plDrawable& plDrawableSpans::SetTransform( uint32_t index, const hsMatrix44& l2w
#endif // MF_TEST_UPDATE #endif // MF_TEST_UPDATE
for( i = 0; i < spans->GetCount(); i++ ) for( i = 0; i < spans->GetCount(); i++ )
{ {
#ifdef HAVE_SSE
fLocalToWorlds[ (*spans)[ i ] ] = l2w * fLocalToBones[ (*spans)[ i ] ];
fWorldToLocals[ (*spans)[ i ] ] = fBoneToLocals[ (*spans)[ i ] ] * w2l;
#else
fLocalToWorlds[ (*spans)[ i ] ] = IMatrixMul34(l2w, fLocalToBones[ (*spans)[ i ] ]); fLocalToWorlds[ (*spans)[ i ] ] = IMatrixMul34(l2w, fLocalToBones[ (*spans)[ i ] ]);
fWorldToLocals[ (*spans)[ i ] ] = IMatrixMul34(fBoneToLocals[ (*spans)[ i ] ], w2l); fWorldToLocals[ (*spans)[ i ] ] = IMatrixMul34(fBoneToLocals[ (*spans)[ i ] ], w2l);
#endif // HAVE_SSE
} }
#ifdef MF_TEST_UPDATE #ifdef MF_TEST_UPDATE
plProfile_EndTiming(DSMatTransT); plProfile_EndTiming(DSMatTransT);

32
Sources/Plasma/PubUtilLib/plPipeline/plDXPipeline.cpp

@ -163,8 +163,8 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com
#include <algorithm> #include <algorithm>
#ifdef HAVE_SSE #ifdef HS_SIMD_INCLUDE
# include <smmintrin.h> # include HS_SIMD_INCLUDE
#endif #endif
//#define MF_TOSSER //#define MF_TOSSER
@ -10527,9 +10527,9 @@ void plDXPipeline::LoadResources()
// Sorry about this, but it really did speed up the skinning. // Sorry about this, but it really did speed up the skinning.
// Just some macros for the inner loop of IBlendVertsIntoBuffer. // Just some macros for the inner loop of IBlendVertsIntoBuffer.
#ifdef HAVE_SSE #ifdef HS_SSE3
# define MATRIXMULTBEGIN(xfm, wgt) \ # define MATRIXMULTBEGIN(xfm, wgt) \
__m128 mc0, mc1, mc2, mwt, msr, _x, _y, _z, hbuf; \ __m128 mc0, mc1, mc2, mwt, msr, _x, _y, _z, hbuf1, hbuf2; \
ALIGN(16) float hack[4]; \ ALIGN(16) float hack[4]; \
mc0 = _mm_loadu_ps(xfm.fMap[0]); \ mc0 = _mm_loadu_ps(xfm.fMap[0]); \
mc1 = _mm_loadu_ps(xfm.fMap[1]); \ mc1 = _mm_loadu_ps(xfm.fMap[1]); \
@ -10541,30 +10541,26 @@ void plDXPipeline::LoadResources()
_y = _mm_mul_ps(_mm_mul_ps(mc1, msr), mwt); \ _y = _mm_mul_ps(_mm_mul_ps(mc1, msr), mwt); \
_z = _mm_mul_ps(_mm_mul_ps(mc2, msr), mwt); \ _z = _mm_mul_ps(_mm_mul_ps(mc2, msr), mwt); \
\ \
hbuf = _mm_hadd_ps(_x, _y); \ hbuf1 = _mm_hadd_ps(_x, _y); \
hbuf = _mm_hadd_ps(hbuf, hbuf); \ hbuf2 = _mm_hadd_ps(_z, _z); \
_mm_store_ps(hack, hbuf); \ hbuf1 = _mm_hadd_ps(hbuf1, hbuf2); \
_mm_store_ps(hack, hbuf1); \
dst.fX += hack[0]; \ dst.fX += hack[0]; \
dst.fY += hack[1]; \ dst.fY += hack[1]; \
hbuf = _mm_hadd_ps(_z, _z); \ dst.fZ += hack[2];
hbuf = _mm_hadd_ps(hbuf, hbuf); \
_mm_store_ps(hack, hbuf); \
dst.fZ += hack[0];
# define MATRIXMULTVECTORADD(dst, src) \ # define MATRIXMULTVECTORADD(dst, src) \
msr = _mm_set_ps(0.f, src.fZ, src.fY, src.fX); \ msr = _mm_set_ps(0.f, src.fZ, src.fY, src.fX); \
_x = _mm_mul_ps(_mm_mul_ps(mc0, msr), mwt); \ _x = _mm_mul_ps(_mm_mul_ps(mc0, msr), mwt); \
_y = _mm_mul_ps(_mm_mul_ps(mc1, msr), mwt); \ _y = _mm_mul_ps(_mm_mul_ps(mc1, msr), mwt); \
_z = _mm_mul_ps(_mm_mul_ps(mc2, msr), mwt); \ _z = _mm_mul_ps(_mm_mul_ps(mc2, msr), mwt); \
\ \
hbuf = _mm_hadd_ps(_x, _y); \ hbuf1 = _mm_hadd_ps(_x, _y); \
hbuf = _mm_hadd_ps(hbuf, hbuf); \ hbuf2 = _mm_hadd_ps(_z, _z); \
_mm_store_ps(hack, hbuf); \ hbuf1 = _mm_hadd_ps(hbuf1, hbuf2); \
_mm_store_ps(hack, hbuf1); \
dst.fX += hack[0]; \ dst.fX += hack[0]; \
dst.fY += hack[1]; \ dst.fY += hack[1]; \
hbuf = _mm_hadd_ps(_z, _z); \ dst.fZ += hack[2];
hbuf = _mm_hadd_ps(hbuf, hbuf); \
_mm_store_ps(hack, hbuf); \
dst.fZ += hack[0];
#else #else
# define MATRIXMULTBEGIN(xfm, wgt) \ # define MATRIXMULTBEGIN(xfm, wgt) \
float m00 = xfm.fMap[0][0]; \ float m00 = xfm.fMap[0][0]; \

Loading…
Cancel
Save