Browse Source

Aligned point/vector loads

Adam Johnson 12 years ago
parent
commit
342dd5fe14
  1. 34
      Sources/Plasma/PubUtilLib/plPipeline/plDXPipeline.cpp

34
Sources/Plasma/PubUtilLib/plPipeline/plDXPipeline.cpp

@ -217,10 +217,10 @@ inline uint8_t* inlStuffUInt32( uint8_t* ptr, const uint32_t uint )
*(uint32_t*)ptr = uint;
return ptr + sizeof(uint);
}
inline uint8_t* inlExtractPoint( const uint8_t* ptr, const hsScalarTriple& pt )
inline uint8_t* inlExtractPoint( const uint8_t* ptr, hsScalarTriple* pt )
{
register const float* src = (float*)ptr;
register float* dst = (float*)&pt.fX;
register float* dst = (float*)&pt->fX;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
@ -10623,8 +10623,8 @@ inline void inlTESTPOINT(const hsPoint3& destP,
mc1 = _mm_load_ps(xfm.fMap[1]); \
mc2 = _mm_load_ps(xfm.fMap[2]); \
mwt = _mm_set_ps1(wgt);
#define MATRIXMULTPOINTADD_SSE3(dst, src) \
msr = _mm_set_ps(1.f, src.fZ, src.fY, src.fX); \
#define MATRIXMULTBUFADD_SSE3(dst, src) \
msr = _mm_load_ps(src); \
_x = _mm_mul_ps(_mm_mul_ps(mc0, msr), mwt); \
_y = _mm_mul_ps(_mm_mul_ps(mc1, msr), mwt); \
_z = _mm_mul_ps(_mm_mul_ps(mc2, msr), mwt); \
@ -10659,8 +10659,12 @@ hsFunctionDispatcher<plDXPipeline::blend_vert_buffer_ptr> plDXPipeline::blend_ve
uint8_t numUVs, numWeights; \
uint32_t i, j, indices, color, specColor, uvChanSize; \
float weights[ 4 ], weightSum; \
hsPoint3 pt, tempPt, destPt; \
hsVector3 vec, tempNorm, destNorm; \
ALIGN(16) float pt_buf[] = { 0.f, 0.f, 0.f, 1.f }; \
ALIGN(16) float vec_buf[] = { 0.f, 0.f, 0.f, 0.f }; \
hsPoint3* pt = reinterpret_cast<hsPoint3*>(pt_buf); \
hsVector3* vec = reinterpret_cast<hsVector3*>(vec_buf); \
hsPoint3 destPt; \
hsVector3 destNorm; \
\
/* Get some counts */\
switch( format & plGBufferGroup::kSkinWeightMask ) \
@ -10772,7 +10776,7 @@ hsFunctionDispatcher<plDXPipeline::blend_vert_buffer_ptr> plDXPipeline::blend_ve
uint8_t k; \
for( k = 0; k < numUVs; k++ ) \
{ \
src = inlExtractPoint( src, srcUVWs[k] ); \
src = inlExtractPoint( src, &srcUVWs[k] ); \
} \
memcpy( dstUVWs, srcUVWs, uvChanSize); \
dstUVWs[loChan].Set(0,0,0); \
@ -10823,13 +10827,13 @@ void plDXPipeline::blend_vert_buffer_fpu( plSpan* span,
BLENDVERTSTART
MATRIXMULTBEGIN_FPU(matrixPalette[indices & 0xff], weights[j]);
MATRIXMULTPOINTADD_FPU(destPt, pt);
MATRIXMULTVECTORADD_FPU(destNorm, vec);
MATRIXMULTPOINTADD_FPU(destPt, (*pt));
MATRIXMULTVECTORADD_FPU(destNorm, (*vec));
BLENDVERTMID
MATRIXMULTBEGIN_FPU(matrixPalette[indices & 0xff], weights[j]);
MATRIXMULTPOINTADD_FPU(destPt, pt);
MATRIXMULTVECTORADD_FPU(destNorm, vec);
MATRIXMULTPOINTADD_FPU(destPt, (*pt));
MATRIXMULTVECTORADD_FPU(destNorm, (*vec));
MATRIXMULTVECTORADD_FPU(dstUVWs[loChan], srcUVWs[loChan]);
MATRIXMULTVECTORADD_FPU(dstUVWs[hiChan], srcUVWs[hiChan]);
@ -10846,13 +10850,13 @@ void plDXPipeline::blend_vert_buffer_sse3( plSpan* span,
BLENDVERTSTART
MATRIXMULTBEGIN_SSE3(matrixPalette[indices & 0xff], weights[j]);
MATRIXMULTPOINTADD_SSE3(destPt, pt);
MATRIXMULTVECTORADD_SSE3(destNorm, vec);
MATRIXMULTBUFADD_SSE3(destPt, pt_buf);
MATRIXMULTBUFADD_SSE3(destNorm, vec_buf);
BLENDVERTMID
MATRIXMULTBEGIN_SSE3(matrixPalette[indices & 0xff], weights[j]);
MATRIXMULTPOINTADD_SSE3(destPt, pt);
MATRIXMULTVECTORADD_SSE3(destNorm, vec);
MATRIXMULTBUFADD_SSE3(destPt, pt_buf);
MATRIXMULTBUFADD_SSE3(destNorm, vec_buf);
MATRIXMULTVECTORADD_SSE3(dstUVWs[loChan], srcUVWs[loChan]);
MATRIXMULTVECTORADD_SSE3(dstUVWs[hiChan], srcUVWs[hiChan]);
BLENDVERTEND

Loading…
Cancel
Save