diff --git a/CMakeLists.txt b/CMakeLists.txt index c042e895..11abb4c3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,12 +84,6 @@ if(MSVC) add_definitions(-D_SCL_SECURE_NO_WARNINGS) endif(MSVC) -# TODO: Maybe some kind of automated test here? -option(PLASMA_USE_SSE "Enable SSE optimizations?" ON) -if(PLASMA_USE_SSE) - add_definitions(-DHAVE_SSE) -endif(PLASMA_USE_SSE) - #TODO: Make the OpenSSL includes less promiscuous so this isn't needed include_directories(${OPENSSL_INCLUDE_DIR}) diff --git a/Sources/Plasma/Apps/plClient/winmain.cpp b/Sources/Plasma/Apps/plClient/winmain.cpp index 9870afde..985a73f0 100644 --- a/Sources/Plasma/Apps/plClient/winmain.cpp +++ b/Sources/Plasma/Apps/plClient/winmain.cpp @@ -49,10 +49,6 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include // Windows Load EXE into memory suff #endif -#ifdef HAVE_SSE -# include -#endif - #include #include "HeadSpin.h" @@ -1388,35 +1384,11 @@ LONG WINAPI plCustomUnhandledExceptionFilter( struct _EXCEPTION_POINTERS *Except } #endif -bool CheckCPU() -{ - const unsigned int sse3_flag = 0x00000001; - // (any other CPU features...) - - int cpu_info[4]; - __cpuid(cpu_info, 1); -#ifdef HAVE_SSE - if((cpu_info[2] & sse3_flag) == 0) - return false; -#endif - // Insert additional feature checks here - - return true; -} - #include "pfConsoleCore/pfConsoleEngine.h" PF_CONSOLE_LINK_ALL() int WINAPI WinMain(HINSTANCE hInst, HINSTANCE hPrevInst, LPSTR lpCmdLine, int nCmdShow) { - // Check to make sure we have a good CPU before getting started - if (!CheckCPU()) - { - plString msg = plString::Format("Your processor does not support all of the features required to play %S.", ProductLongName()); - hsMessageBox(msg.c_str(), "Error", hsMessageBoxNormal, hsMessageBoxIconError); - return PARABLE_NORMAL_EXIT; - } - PF_CONSOLE_INIT_ALL() // Set global handle diff --git a/Sources/Plasma/NucleusLib/pnSceneObject/plCoordinateInterface.cpp b/Sources/Plasma/NucleusLib/pnSceneObject/plCoordinateInterface.cpp index 1c64314d..878acdc0 100644 --- a/Sources/Plasma/NucleusLib/pnSceneObject/plCoordinateInterface.cpp +++ b/Sources/Plasma/NucleusLib/pnSceneObject/plCoordinateInterface.cpp @@ -380,7 +380,6 @@ plProfile_CreateTimer(" CIRecalcT", "Object", CIRecalcT); plProfile_CreateTimer(" CIDirtyT", "Object", CIDirtyT); plProfile_CreateTimer(" CISetT", "Object", CISetT); -#ifndef HAVE_SSE static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& rhs) { hsMatrix44 ret; @@ -441,7 +440,6 @@ static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& r return ret; } -#endif // HAVE_SSE void plCoordinateInterface::IRecalcTransforms() { @@ -449,13 +447,8 @@ void plCoordinateInterface::IRecalcTransforms() plProfile_BeginTiming(CIRecalcT); if( fParent ) { -#ifdef HAVE_SSE - fLocalToWorld = fParent->GetLocalToWorld() * fLocalToParent; - fWorldToLocal = fParentToLocal * fParent->GetWorldToLocal(); -#else fLocalToWorld = IMatrixMul34(fParent->GetLocalToWorld(), fLocalToParent); fWorldToLocal = IMatrixMul34(fParentToLocal, fParent->GetWorldToLocal()); -#endif } else { diff --git a/Sources/Plasma/PubUtilLib/plDrawable/plDrawableSpans.cpp b/Sources/Plasma/PubUtilLib/plDrawable/plDrawableSpans.cpp index 3fc19b2a..369dd94e 100644 --- a/Sources/Plasma/PubUtilLib/plDrawable/plDrawableSpans.cpp +++ b/Sources/Plasma/PubUtilLib/plDrawable/plDrawableSpans.cpp @@ -416,7 +416,6 @@ hsBool plDrawableSpans::IBoundsInvalid(const hsBounds3Ext& bnd) const } //// SetTransform //////////////////////////////////////////////////////////// -#ifndef HAVE_SSE static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& rhs) { hsMatrix44 ret; @@ -477,7 +476,6 @@ static inline hsMatrix44 IMatrixMul34(const hsMatrix44& lhs, const hsMatrix44& r return ret; } -#endif #ifdef MF_TEST_UPDATE plProfile_CreateCounter("DSSetTrans", "Update", DSSetTrans); @@ -521,13 +519,9 @@ plDrawable& plDrawableSpans::SetTransform( uint32_t index, const hsMatrix44& l2w #endif // MF_TEST_UPDATE for( i = 0; i < spans->GetCount(); i++ ) { -#ifdef HAVE_SSE - fLocalToWorlds[ (*spans)[ i ] ] = l2w * fLocalToBones[ (*spans)[ i ] ]; - fWorldToLocals[ (*spans)[ i ] ] = fBoneToLocals[ (*spans)[ i ] ] * w2l; -#else fLocalToWorlds[ (*spans)[ i ] ] = IMatrixMul34(l2w, fLocalToBones[ (*spans)[ i ] ]); fWorldToLocals[ (*spans)[ i ] ] = IMatrixMul34(fBoneToLocals[ (*spans)[ i ] ], w2l); -#endif // HAVE_SSE + } #ifdef MF_TEST_UPDATE plProfile_EndTiming(DSMatTransT); diff --git a/Sources/Plasma/PubUtilLib/plPipeline/plDXPipeline.cpp b/Sources/Plasma/PubUtilLib/plPipeline/plDXPipeline.cpp index 6c93dbbd..413e98db 100644 --- a/Sources/Plasma/PubUtilLib/plPipeline/plDXPipeline.cpp +++ b/Sources/Plasma/PubUtilLib/plPipeline/plDXPipeline.cpp @@ -163,8 +163,8 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include -#ifdef HAVE_SSE -# include +#ifdef HS_SIMD_INCLUDE +# include HS_SIMD_INCLUDE #endif //#define MF_TOSSER @@ -10527,9 +10527,9 @@ void plDXPipeline::LoadResources() // Sorry about this, but it really did speed up the skinning. // Just some macros for the inner loop of IBlendVertsIntoBuffer. -#ifdef HAVE_SSE +#ifdef HS_SSE3 # define MATRIXMULTBEGIN(xfm, wgt) \ - __m128 mc0, mc1, mc2, mwt, msr, _x, _y, _z, hbuf; \ + __m128 mc0, mc1, mc2, mwt, msr, _x, _y, _z, hbuf1, hbuf2; \ ALIGN(16) float hack[4]; \ mc0 = _mm_loadu_ps(xfm.fMap[0]); \ mc1 = _mm_loadu_ps(xfm.fMap[1]); \ @@ -10541,30 +10541,26 @@ void plDXPipeline::LoadResources() _y = _mm_mul_ps(_mm_mul_ps(mc1, msr), mwt); \ _z = _mm_mul_ps(_mm_mul_ps(mc2, msr), mwt); \ \ - hbuf = _mm_hadd_ps(_x, _y); \ - hbuf = _mm_hadd_ps(hbuf, hbuf); \ - _mm_store_ps(hack, hbuf); \ + hbuf1 = _mm_hadd_ps(_x, _y); \ + hbuf2 = _mm_hadd_ps(_z, _z); \ + hbuf1 = _mm_hadd_ps(hbuf1, hbuf2); \ + _mm_store_ps(hack, hbuf1); \ dst.fX += hack[0]; \ dst.fY += hack[1]; \ - hbuf = _mm_hadd_ps(_z, _z); \ - hbuf = _mm_hadd_ps(hbuf, hbuf); \ - _mm_store_ps(hack, hbuf); \ - dst.fZ += hack[0]; + dst.fZ += hack[2]; # define MATRIXMULTVECTORADD(dst, src) \ msr = _mm_set_ps(0.f, src.fZ, src.fY, src.fX); \ _x = _mm_mul_ps(_mm_mul_ps(mc0, msr), mwt); \ _y = _mm_mul_ps(_mm_mul_ps(mc1, msr), mwt); \ _z = _mm_mul_ps(_mm_mul_ps(mc2, msr), mwt); \ \ - hbuf = _mm_hadd_ps(_x, _y); \ - hbuf = _mm_hadd_ps(hbuf, hbuf); \ - _mm_store_ps(hack, hbuf); \ + hbuf1 = _mm_hadd_ps(_x, _y); \ + hbuf2 = _mm_hadd_ps(_z, _z); \ + hbuf1 = _mm_hadd_ps(hbuf1, hbuf2); \ + _mm_store_ps(hack, hbuf1); \ dst.fX += hack[0]; \ dst.fY += hack[1]; \ - hbuf = _mm_hadd_ps(_z, _z); \ - hbuf = _mm_hadd_ps(hbuf, hbuf); \ - _mm_store_ps(hack, hbuf); \ - dst.fZ += hack[0]; + dst.fZ += hack[2]; #else # define MATRIXMULTBEGIN(xfm, wgt) \ float m00 = xfm.fMap[0][0]; \