mirror of
https://foundry.openuru.org/gitblit/r/CWE-ou-minkata.git
synced 2025-07-18 19:29:09 +00:00
SSE 4.1 Skinning
This appears to offer no benefit on my Wolfdale CPU, but I expect it will be useful on more recent ones.
This commit is contained in:
@ -10699,6 +10699,38 @@ static inline void ISkinVertexSSE3(const hsMatrix44& xfm, float wgt,
|
||||
#endif // HS_SSE3
|
||||
}
|
||||
|
||||
#ifdef HS_SSE41
|
||||
static inline void ISkinDpSSE41(const float* src, float* dst, const __m128& mc0,
|
||||
const __m128& mc1, const __m128& mc2, const __m128& mwt)
|
||||
{
|
||||
enum { DP_F4_X = 0xF1, DP_F4_Y = 0xF2, DP_F4_Z = 0xF4 };
|
||||
|
||||
__m128 msr = _mm_load_ps(src);
|
||||
__m128 _r = _mm_dp_ps(msr, mc0, DP_F4_X);
|
||||
_r = _mm_or_ps(_r, _mm_dp_ps(msr, mc1, DP_F4_Y));
|
||||
_r = _mm_or_ps(_r, _mm_dp_ps(msr, mc2, DP_F4_Z));
|
||||
|
||||
__m128 _dst = _mm_load_ps(dst);
|
||||
_dst = _mm_add_ps(_dst, _mm_mul_ps(_r, mwt));
|
||||
_mm_store_ps(dst, _dst);
|
||||
}
|
||||
#endif // HS_SSE41
|
||||
|
||||
static inline void ISkinVertexSSE41(const hsMatrix44& xfm, float wgt,
|
||||
const float* pt_src, float* pt_dst,
|
||||
const float* vec_src, float* vec_dst)
|
||||
{
|
||||
#ifdef HS_SSE41
|
||||
__m128 mc0 = _mm_load_ps(xfm.fMap[0]);
|
||||
__m128 mc1 = _mm_load_ps(xfm.fMap[1]);
|
||||
__m128 mc2 = _mm_load_ps(xfm.fMap[2]);
|
||||
__m128 mwt = _mm_set_ps1(wgt);
|
||||
|
||||
ISkinDpSSE41(pt_src, pt_dst, mc0, mc1, mc2, mwt);
|
||||
ISkinDpSSE41(vec_src, vec_dst, mc0, mc1, mc2, mwt);
|
||||
#endif // HS_SSE41
|
||||
}
|
||||
|
||||
typedef void(*skin_vert_ptr)(const hsMatrix44&, float, const float*, float*, const float*, float*);
|
||||
|
||||
template<skin_vert_ptr T>
|
||||
@ -10763,7 +10795,8 @@ static void IBlendVertBuffer(plSpan* span, hsMatrix44* matrixPalette, int numMat
|
||||
|
||||
// CPU-optimized functions requiring dispatch
|
||||
hsFunctionDispatcher<plDXPipeline::blend_vert_buffer_ptr> plDXPipeline::blend_vert_buffer(
|
||||
IBlendVertBuffer<ISkinVertexFPU>, 0, 0, IBlendVertBuffer<ISkinVertexSSE3>);
|
||||
IBlendVertBuffer<ISkinVertexFPU>, 0, 0, IBlendVertBuffer<ISkinVertexSSE3>, 0,
|
||||
IBlendVertBuffer<ISkinVertexSSE41>);
|
||||
|
||||
// ISetPipeConsts //////////////////////////////////////////////////////////////////
|
||||
// A shader can request that the pipeline fill in certain constants that are indeterminate
|
||||
|
Reference in New Issue
Block a user