mirror of
https://foundry.openuru.org/gitblit/r/CWE-ou-minkata.git
synced 2025-07-19 11:49:09 +00:00
Fix support in plDXPipeline for SSE using temporary macros.
Re-enables FPU/SSE3 code using the FunctionDispatcher and some quick hacky macros to template out the two nearly-identical functions, awaiting branan's deep-voodoo template-specialization functor-dispatcher patch.
This commit is contained in:
@ -10525,80 +10525,10 @@ void plDXPipeline::LoadResources()
|
|||||||
plNetClientApp::StaticDebugMsg("End Device Reload");
|
plNetClientApp::StaticDebugMsg("End Device Reload");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sorry about this, but it really did speed up the skinning.
|
|
||||||
// Just some macros for the inner loop of IBlendVertsIntoBuffer.
|
|
||||||
#ifdef HS_SSE3
|
|
||||||
# define MATRIXMULTBEGIN(xfm, wgt) \
|
|
||||||
__m128 mc0, mc1, mc2, mwt, msr, _x, _y, _z, hbuf1, hbuf2; \
|
|
||||||
ALIGN(16) float hack[4]; \
|
|
||||||
mc0 = _mm_loadu_ps(xfm.fMap[0]); \
|
|
||||||
mc1 = _mm_loadu_ps(xfm.fMap[1]); \
|
|
||||||
mc2 = _mm_loadu_ps(xfm.fMap[2]); \
|
|
||||||
mwt = _mm_set_ps1(wgt);
|
|
||||||
# define MATRIXMULTPOINTADD(dst, src) \
|
|
||||||
msr = _mm_set_ps(1.f, src.fZ, src.fY, src.fX); \
|
|
||||||
_x = _mm_mul_ps(_mm_mul_ps(mc0, msr), mwt); \
|
|
||||||
_y = _mm_mul_ps(_mm_mul_ps(mc1, msr), mwt); \
|
|
||||||
_z = _mm_mul_ps(_mm_mul_ps(mc2, msr), mwt); \
|
|
||||||
\
|
|
||||||
hbuf1 = _mm_hadd_ps(_x, _y); \
|
|
||||||
hbuf2 = _mm_hadd_ps(_z, _z); \
|
|
||||||
hbuf1 = _mm_hadd_ps(hbuf1, hbuf2); \
|
|
||||||
_mm_store_ps(hack, hbuf1); \
|
|
||||||
dst.fX += hack[0]; \
|
|
||||||
dst.fY += hack[1]; \
|
|
||||||
dst.fZ += hack[2];
|
|
||||||
# define MATRIXMULTVECTORADD(dst, src) \
|
|
||||||
msr = _mm_set_ps(0.f, src.fZ, src.fY, src.fX); \
|
|
||||||
_x = _mm_mul_ps(_mm_mul_ps(mc0, msr), mwt); \
|
|
||||||
_y = _mm_mul_ps(_mm_mul_ps(mc1, msr), mwt); \
|
|
||||||
_z = _mm_mul_ps(_mm_mul_ps(mc2, msr), mwt); \
|
|
||||||
\
|
|
||||||
hbuf1 = _mm_hadd_ps(_x, _y); \
|
|
||||||
hbuf2 = _mm_hadd_ps(_z, _z); \
|
|
||||||
hbuf1 = _mm_hadd_ps(hbuf1, hbuf2); \
|
|
||||||
_mm_store_ps(hack, hbuf1); \
|
|
||||||
dst.fX += hack[0]; \
|
|
||||||
dst.fY += hack[1]; \
|
|
||||||
dst.fZ += hack[2];
|
|
||||||
#else
|
|
||||||
# define MATRIXMULTBEGIN(xfm, wgt) \
|
|
||||||
float m00 = xfm.fMap[0][0]; \
|
|
||||||
float m01 = xfm.fMap[0][1]; \
|
|
||||||
float m02 = xfm.fMap[0][2]; \
|
|
||||||
float m03 = xfm.fMap[0][3]; \
|
|
||||||
float m10 = xfm.fMap[1][0]; \
|
|
||||||
float m11 = xfm.fMap[1][1]; \
|
|
||||||
float m12 = xfm.fMap[1][2]; \
|
|
||||||
float m13 = xfm.fMap[1][3]; \
|
|
||||||
float m20 = xfm.fMap[2][0]; \
|
|
||||||
float m21 = xfm.fMap[2][1]; \
|
|
||||||
float m22 = xfm.fMap[2][2]; \
|
|
||||||
float m23 = xfm.fMap[2][3]; \
|
|
||||||
float m_wgt = wgt; \
|
|
||||||
float srcX, srcY, srcZ;
|
|
||||||
# define MATRIXMULTPOINTADD(dst, src) \
|
|
||||||
srcX = src.fX; \
|
|
||||||
srcY = src.fY; \
|
|
||||||
srcZ = src.fZ; \
|
|
||||||
\
|
|
||||||
dst.fX += (srcX * m00 + srcY * m01 + srcZ * m02 + m03) * m_wgt; \
|
|
||||||
dst.fY += (srcX * m10 + srcY * m11 + srcZ * m12 + m13) * m_wgt; \
|
|
||||||
dst.fZ += (srcX * m20 + srcY * m21 + srcZ * m22 + m23) * m_wgt;
|
|
||||||
# define MATRIXMULTVECTORADD(dst, src) \
|
|
||||||
srcX = src.fX; \
|
|
||||||
srcY = src.fY; \
|
|
||||||
srcZ = src.fZ; \
|
|
||||||
\
|
|
||||||
dst.fX += (srcX * m00 + srcY * m01 + srcZ * m02) * m_wgt; \
|
|
||||||
dst.fY += (srcX * m10 + srcY * m11 + srcZ * m12) * m_wgt; \
|
|
||||||
dst.fZ += (srcX * m20 + srcY * m21 + srcZ * m22) * m_wgt;
|
|
||||||
#endif // HAVE_SSE
|
|
||||||
|
|
||||||
// inlTESTPOINT /////////////////////////////////////////
|
// inlTESTPOINT /////////////////////////////////////////
|
||||||
// Update mins and maxs if destP is outside.
|
// Update mins and maxs if destP is outside.
|
||||||
inline void inlTESTPOINT(const hsPoint3& destP,
|
inline void inlTESTPOINT(const hsPoint3& destP,
|
||||||
float& minX, float& minY, float& minZ,
|
float& minX, float& minY, float& minZ,
|
||||||
float& maxX, float& maxY, float& maxZ)
|
float& maxX, float& maxY, float& maxZ)
|
||||||
{
|
{
|
||||||
if( destP.fX < minX )
|
if( destP.fX < minX )
|
||||||
@ -10622,189 +10552,282 @@ inline void inlTESTPOINT(const hsPoint3& destP,
|
|||||||
// format, blends them into the destination buffer given without the blending
|
// format, blends them into the destination buffer given without the blending
|
||||||
// info.
|
// info.
|
||||||
|
|
||||||
void plDXPipeline::IBlendVertsIntoBuffer( plSpan* span,
|
// FPU version
|
||||||
hsMatrix44* matrixPalette, int numMatrices,
|
#define MATRIXMULTBEGIN_FPU(xfm, wgt) \
|
||||||
const uint8_t *src, uint8_t format, uint32_t srcStride,
|
float m00 = xfm.fMap[0][0]; \
|
||||||
uint8_t *dest, uint32_t destStride, uint32_t count,
|
float m01 = xfm.fMap[0][1]; \
|
||||||
uint16_t localUVWChans )
|
float m02 = xfm.fMap[0][2]; \
|
||||||
{
|
float m03 = xfm.fMap[0][3]; \
|
||||||
uint8_t numUVs, numWeights;
|
float m10 = xfm.fMap[1][0]; \
|
||||||
uint32_t i, j, indices, color, specColor, uvChanSize;
|
float m11 = xfm.fMap[1][1]; \
|
||||||
float weights[ 4 ], weightSum;
|
float m12 = xfm.fMap[1][2]; \
|
||||||
hsPoint3 pt, tempPt, destPt;
|
float m13 = xfm.fMap[1][3]; \
|
||||||
hsVector3 vec, tempNorm, destNorm;
|
float m20 = xfm.fMap[2][0]; \
|
||||||
|
float m21 = xfm.fMap[2][1]; \
|
||||||
|
float m22 = xfm.fMap[2][2]; \
|
||||||
|
float m23 = xfm.fMap[2][3]; \
|
||||||
|
float m_wgt = wgt; \
|
||||||
|
float srcX, srcY, srcZ;
|
||||||
|
#define MATRIXMULTPOINTADD_FPU(dst, src) \
|
||||||
|
srcX = src.fX; \
|
||||||
|
srcY = src.fY; \
|
||||||
|
srcZ = src.fZ; \
|
||||||
|
\
|
||||||
|
dst.fX += (srcX * m00 + srcY * m01 + srcZ * m02 + m03) * m_wgt; \
|
||||||
|
dst.fY += (srcX * m10 + srcY * m11 + srcZ * m12 + m13) * m_wgt; \
|
||||||
|
dst.fZ += (srcX * m20 + srcY * m21 + srcZ * m22 + m23) * m_wgt;
|
||||||
|
#define MATRIXMULTVECTORADD_FPU(dst, src) \
|
||||||
|
srcX = src.fX; \
|
||||||
|
srcY = src.fY; \
|
||||||
|
srcZ = src.fZ; \
|
||||||
|
\
|
||||||
|
dst.fX += (srcX * m00 + srcY * m01 + srcZ * m02) * m_wgt; \
|
||||||
|
dst.fY += (srcX * m10 + srcY * m11 + srcZ * m12) * m_wgt; \
|
||||||
|
dst.fZ += (srcX * m20 + srcY * m21 + srcZ * m22) * m_wgt;
|
||||||
|
|
||||||
|
// SSE3 version
|
||||||
|
#ifdef HS_SSE3
|
||||||
|
#define MATRIXMULTBEGIN_SSE3(xfm, wgt) \
|
||||||
|
__m128 mc0, mc1, mc2, mwt, msr, _x, _y, _z, hbuf1, hbuf2; \
|
||||||
|
ALIGN(16) float hack[4]; \
|
||||||
|
mc0 = _mm_loadu_ps(xfm.fMap[0]); \
|
||||||
|
mc1 = _mm_loadu_ps(xfm.fMap[1]); \
|
||||||
|
mc2 = _mm_loadu_ps(xfm.fMap[2]); \
|
||||||
|
mwt = _mm_set_ps1(wgt);
|
||||||
|
#define MATRIXMULTPOINTADD_SSE3(dst, src) \
|
||||||
|
msr = _mm_set_ps(1.f, src.fZ, src.fY, src.fX); \
|
||||||
|
_x = _mm_mul_ps(_mm_mul_ps(mc0, msr), mwt); \
|
||||||
|
_y = _mm_mul_ps(_mm_mul_ps(mc1, msr), mwt); \
|
||||||
|
_z = _mm_mul_ps(_mm_mul_ps(mc2, msr), mwt); \
|
||||||
|
\
|
||||||
|
hbuf1 = _mm_hadd_ps(_x, _y); \
|
||||||
|
hbuf2 = _mm_hadd_ps(_z, _z); \
|
||||||
|
hbuf1 = _mm_hadd_ps(hbuf1, hbuf2); \
|
||||||
|
_mm_store_ps(hack, hbuf1); \
|
||||||
|
dst.fX += hack[0]; \
|
||||||
|
dst.fY += hack[1]; \
|
||||||
|
dst.fZ += hack[2];
|
||||||
|
#define MATRIXMULTVECTORADD_SSE3(dst, src) \
|
||||||
|
msr = _mm_set_ps(0.f, src.fZ, src.fY, src.fX); \
|
||||||
|
_x = _mm_mul_ps(_mm_mul_ps(mc0, msr), mwt); \
|
||||||
|
_y = _mm_mul_ps(_mm_mul_ps(mc1, msr), mwt); \
|
||||||
|
_z = _mm_mul_ps(_mm_mul_ps(mc2, msr), mwt); \
|
||||||
|
\
|
||||||
|
hbuf1 = _mm_hadd_ps(_x, _y); \
|
||||||
|
hbuf2 = _mm_hadd_ps(_z, _z); \
|
||||||
|
hbuf1 = _mm_hadd_ps(hbuf1, hbuf2); \
|
||||||
|
_mm_store_ps(hack, hbuf1); \
|
||||||
|
dst.fX += hack[0]; \
|
||||||
|
dst.fY += hack[1]; \
|
||||||
|
dst.fZ += hack[2];
|
||||||
|
#endif
|
||||||
|
|
||||||
/// Get some counts
|
// CPU-optimized functions requiring dispatch
|
||||||
switch( format & plGBufferGroup::kSkinWeightMask )
|
hsFunctionDispatcher<plDXPipeline::blend_vert_buffer_ptr> plDXPipeline::blend_vert_buffer(plDXPipeline::blend_vert_buffer_fpu, 0, 0, plDXPipeline::blend_vert_buffer_sse3);
|
||||||
{
|
|
||||||
case plGBufferGroup::kSkin1Weight: numWeights = 1; break;
|
|
||||||
case plGBufferGroup::kSkin2Weights: numWeights = 2; break;
|
|
||||||
case plGBufferGroup::kSkin3Weights: numWeights = 3; break;
|
|
||||||
default: hsAssert( false, "Invalid weight count in IBlendVertsIntoBuffer()" );
|
|
||||||
}
|
|
||||||
|
|
||||||
numUVs = plGBufferGroup::CalcNumUVs( format );
|
// Temporary macros for IBlendVertsIntoBuffer dispatch code de-duplication
|
||||||
uvChanSize = numUVs * sizeof( float ) * 3;
|
#define BLENDVERTSTART \
|
||||||
|
uint8_t numUVs, numWeights; \
|
||||||
//#define MF_RECALC_BOUNDS
|
uint32_t i, j, indices, color, specColor, uvChanSize; \
|
||||||
#ifdef MF_RECALC_BOUNDS
|
float weights[ 4 ], weightSum; \
|
||||||
float minX = 1.e33f;
|
hsPoint3 pt, tempPt, destPt; \
|
||||||
float minY = 1.e33f;
|
hsVector3 vec, tempNorm, destNorm; \
|
||||||
float minZ = 1.e33f;
|
\
|
||||||
|
/* Get some counts */\
|
||||||
float maxX = -1.e33f;
|
switch( format & plGBufferGroup::kSkinWeightMask ) \
|
||||||
float maxY = -1.e33f;
|
{ \
|
||||||
float maxZ = -1.e33f;
|
case plGBufferGroup::kSkin1Weight: numWeights = 1; break; \
|
||||||
#endif // MF_RECALC_BOUNDS
|
case plGBufferGroup::kSkin2Weights: numWeights = 2; break; \
|
||||||
|
case plGBufferGroup::kSkin3Weights: numWeights = 3; break; \
|
||||||
// localUVWChans is bump mapping tangent space vectors, which need to
|
default: hsAssert( false, "Invalid weight count in IBlendVertsIntoBuffer()" ); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
numUVs = plGBufferGroup::CalcNumUVs( format ); \
|
||||||
|
uvChanSize = numUVs * sizeof( float ) * 3; \
|
||||||
|
\
|
||||||
|
/* localUVWChans is bump mapping tangent space vectors, which need to
|
||||||
// be skinned like the normal, as opposed to passed through like
|
// be skinned like the normal, as opposed to passed through like
|
||||||
// garden variety UVW coordinates.
|
// garden variety UVW coordinates.
|
||||||
// There are no localUVWChans that I know of in production assets (i.e.
|
// There are no localUVWChans that I know of in production assets (i.e.
|
||||||
// the avatar is not skinned).
|
// the avatar is not skinned).*/\
|
||||||
if( !localUVWChans )
|
if( !localUVWChans ) \
|
||||||
{
|
{ \
|
||||||
/// Copy whilst blending
|
/* Copy whilst blending */\
|
||||||
for( i = 0; i < count; i++ )
|
for( i = 0; i < count; i++ ) \
|
||||||
{
|
{ \
|
||||||
// Extract data
|
/* Extract data */\
|
||||||
src = inlExtractPoint( src, pt );
|
src = inlExtractPoint( src, pt ); \
|
||||||
for( j = 0, weightSum = 0; j < numWeights; j++ )
|
for( j = 0, weightSum = 0; j < numWeights; j++ ) \
|
||||||
{
|
{ \
|
||||||
src = inlExtractFloat( src, weights[ j ] );
|
src = inlExtractFloat( src, weights[ j ] ); \
|
||||||
weightSum += weights[ j ];
|
weightSum += weights[ j ]; \
|
||||||
}
|
} \
|
||||||
weights[ j ] = 1 - weightSum;
|
weights[ j ] = 1 - weightSum; \
|
||||||
|
\
|
||||||
if( format & plGBufferGroup::kSkinIndices )
|
if( format & plGBufferGroup::kSkinIndices ) \
|
||||||
{
|
{ \
|
||||||
src = inlExtractUInt32( src, indices );
|
src = inlExtractUInt32( src, indices ); \
|
||||||
}
|
} \
|
||||||
else
|
else \
|
||||||
{
|
{ \
|
||||||
indices = 1 << 8;
|
indices = 1 << 8; \
|
||||||
}
|
} \
|
||||||
src = inlExtractPoint( src, vec );
|
src = inlExtractPoint( src, vec ); \
|
||||||
src = inlExtractUInt32( src, color );
|
src = inlExtractUInt32( src, color ); \
|
||||||
src = inlExtractUInt32( src, specColor );
|
src = inlExtractUInt32( src, specColor ); \
|
||||||
|
\
|
||||||
// Blend
|
/* Blend */\
|
||||||
destPt.Set( 0, 0, 0 );
|
destPt.Set( 0, 0, 0 ); \
|
||||||
destNorm.Set( 0, 0, 0 );
|
destNorm.Set( 0, 0, 0 ); \
|
||||||
for( j = 0; j < numWeights + 1; j++ )
|
for( j = 0; j < numWeights + 1; j++ ) \
|
||||||
{
|
{ \
|
||||||
if( weights[ j ] )
|
if( weights[ j ] ) \
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
MATRIXMULTBEGIN(matrixPalette[indices & 0xff], weights[j]);
|
MATRIXMULTBEGIN(matrixPalette[indices & 0xff], weights[j]);
|
||||||
|
|
||||||
MATRIXMULTPOINTADD(destPt, pt);
|
MATRIXMULTPOINTADD(destPt, pt);
|
||||||
MATRIXMULTVECTORADD(destNorm, vec);
|
MATRIXMULTVECTORADD(destNorm, vec);
|
||||||
}
|
*/
|
||||||
|
#define BLENDVERTMID \
|
||||||
indices >>= 8;
|
} \
|
||||||
}
|
\
|
||||||
// Probably don't really need to renormalize this. There errors are
|
indices >>= 8; \
|
||||||
// going to be subtle and "smooth".
|
} \
|
||||||
// hsFastMath::NormalizeAppr(destNorm);
|
/* Probably don't really need to renormalize this. There errors are
|
||||||
|
// going to be subtle and "smooth".*/\
|
||||||
#ifdef MF_RECALC_BOUNDS
|
/* hsFastMath::NormalizeAppr(destNorm);*/ \
|
||||||
inlTESTPOINT(destPt, minX, minY, minZ, maxX, maxY, maxZ);
|
\
|
||||||
#endif // MF_RECALC_BOUNDS
|
/* Slam data into position now */\
|
||||||
|
dest = inlStuffPoint( dest, destPt ); \
|
||||||
// Slam data into position now
|
dest = inlStuffPoint( dest, destNorm ); \
|
||||||
dest = inlStuffPoint( dest, destPt );
|
dest = inlStuffUInt32( dest, color ); \
|
||||||
dest = inlStuffPoint( dest, destNorm );
|
dest = inlStuffUInt32( dest, specColor ); \
|
||||||
dest = inlStuffUInt32( dest, color );
|
memcpy( dest, src, uvChanSize ); \
|
||||||
dest = inlStuffUInt32( dest, specColor );
|
src += uvChanSize; \
|
||||||
memcpy( dest, src, uvChanSize );
|
dest += uvChanSize; \
|
||||||
src += uvChanSize;
|
} \
|
||||||
dest += uvChanSize;
|
} \
|
||||||
}
|
else \
|
||||||
}
|
{ \
|
||||||
else
|
uint8_t hiChan = localUVWChans >> 8; \
|
||||||
{
|
uint8_t loChan = localUVWChans & 0xff; \
|
||||||
uint8_t hiChan = localUVWChans >> 8;
|
/* Copy whilst blending */\
|
||||||
uint8_t loChan = localUVWChans & 0xff;
|
for( i = 0; i < count; i++ ) \
|
||||||
/// Copy whilst blending
|
{ \
|
||||||
for( i = 0; i < count; i++ )
|
hsVector3 srcUVWs[plGeometrySpan::kMaxNumUVChannels]; \
|
||||||
{
|
hsVector3 dstUVWs[plGeometrySpan::kMaxNumUVChannels]; \
|
||||||
hsVector3 srcUVWs[plGeometrySpan::kMaxNumUVChannels];
|
\
|
||||||
hsVector3 dstUVWs[plGeometrySpan::kMaxNumUVChannels];
|
/* Extract data */\
|
||||||
|
src = inlExtractPoint( src, pt ); \
|
||||||
// Extract data
|
for( j = 0, weightSum = 0; j < numWeights; j++ ) \
|
||||||
src = inlExtractPoint( src, pt );
|
{ \
|
||||||
for( j = 0, weightSum = 0; j < numWeights; j++ )
|
src = inlExtractFloat( src, weights[ j ] ); \
|
||||||
{
|
weightSum += weights[ j ]; \
|
||||||
src = inlExtractFloat( src, weights[ j ] );
|
} \
|
||||||
weightSum += weights[ j ];
|
weights[ j ] = 1 - weightSum; \
|
||||||
}
|
\
|
||||||
weights[ j ] = 1 - weightSum;
|
if( format & plGBufferGroup::kSkinIndices ) \
|
||||||
|
{ \
|
||||||
if( format & plGBufferGroup::kSkinIndices )
|
src = inlExtractUInt32( src, indices ); \
|
||||||
{
|
} \
|
||||||
src = inlExtractUInt32( src, indices );
|
else \
|
||||||
}
|
{ \
|
||||||
else
|
indices = 1 << 8; \
|
||||||
{
|
} \
|
||||||
indices = 1 << 8;
|
\
|
||||||
}
|
src = inlExtractPoint( src, vec ); \
|
||||||
|
src = inlExtractUInt32( src, color ); \
|
||||||
src = inlExtractPoint( src, vec );
|
src = inlExtractUInt32( src, specColor ); \
|
||||||
src = inlExtractUInt32( src, color );
|
\
|
||||||
src = inlExtractUInt32( src, specColor );
|
uint8_t k; \
|
||||||
|
for( k = 0; k < numUVs; k++ ) \
|
||||||
uint8_t k;
|
{ \
|
||||||
for( k = 0; k < numUVs; k++ )
|
src = inlExtractPoint( src, srcUVWs[k] ); \
|
||||||
{
|
} \
|
||||||
src = inlExtractPoint( src, srcUVWs[k] );
|
memcpy( dstUVWs, srcUVWs, uvChanSize); \
|
||||||
}
|
dstUVWs[loChan].Set(0,0,0); \
|
||||||
memcpy( dstUVWs, srcUVWs, uvChanSize);
|
dstUVWs[hiChan].Set(0,0,0); \
|
||||||
dstUVWs[loChan].Set(0,0,0);
|
\
|
||||||
dstUVWs[hiChan].Set(0,0,0);
|
/* Blend */\
|
||||||
|
destPt.Set( 0, 0, 0 ); \
|
||||||
// Blend
|
destNorm.Set( 0, 0, 0 ); \
|
||||||
destPt.Set( 0, 0, 0 );
|
for( j = 0; j < numWeights + 1; j++ ) \
|
||||||
destNorm.Set( 0, 0, 0 );
|
{ \
|
||||||
for( j = 0; j < numWeights + 1; j++ )
|
if( weights[ j ] ) \
|
||||||
{
|
{ \
|
||||||
if( weights[ j ] )
|
/*
|
||||||
{
|
|
||||||
MATRIXMULTBEGIN(matrixPalette[indices & 0xff], weights[j]);
|
MATRIXMULTBEGIN(matrixPalette[indices & 0xff], weights[j]);
|
||||||
|
|
||||||
MATRIXMULTPOINTADD(destPt, pt);
|
MATRIXMULTPOINTADD(destPt, pt);
|
||||||
MATRIXMULTVECTORADD(destNorm, vec);
|
MATRIXMULTVECTORADD(destNorm, vec);
|
||||||
MATRIXMULTVECTORADD(dstUVWs[loChan], srcUVWs[loChan]);
|
MATRIXMULTVECTORADD(dstUVWs[loChan], srcUVWs[loChan]);
|
||||||
MATRIXMULTVECTORADD(dstUVWs[hiChan], srcUVWs[hiChan]);
|
MATRIXMULTVECTORADD(dstUVWs[hiChan], srcUVWs[hiChan]);
|
||||||
}
|
*/
|
||||||
|
#define BLENDVERTEND \
|
||||||
indices >>= 8;
|
} \
|
||||||
}
|
\
|
||||||
// Probably don't really need to renormalize this. There errors are
|
indices >>= 8; \
|
||||||
// going to be subtle and "smooth".
|
} \
|
||||||
// hsFastMath::NormalizeAppr(destNorm);
|
/* Probably don't really need to renormalize this. There errors are
|
||||||
// hsFastMath::NormalizeAppr(dstUVWs[loChan]);
|
// going to be subtle and "smooth". */\
|
||||||
// hsFastMath::NormalizeAppr(dstUVWs[hiChan]);
|
/* hsFastMath::NormalizeAppr(destNorm); */\
|
||||||
|
/* hsFastMath::NormalizeAppr(dstUVWs[loChan]); */\
|
||||||
#ifdef MF_RECALC_BOUNDS
|
/* hsFastMath::NormalizeAppr(dstUVWs[hiChan]); */\
|
||||||
inlTESTPOINT(destPt, minX, minY, minZ, maxX, maxY, maxZ);
|
\
|
||||||
#endif // MF_RECALC_BOUNDS
|
/* Slam data into position now */\
|
||||||
|
dest = inlStuffPoint( dest, destPt ); \
|
||||||
// Slam data into position now
|
dest = inlStuffPoint( dest, destNorm ); \
|
||||||
dest = inlStuffPoint( dest, destPt );
|
dest = inlStuffUInt32( dest, color ); \
|
||||||
dest = inlStuffPoint( dest, destNorm );
|
dest = inlStuffUInt32( dest, specColor ); \
|
||||||
dest = inlStuffUInt32( dest, color );
|
memcpy( dest, dstUVWs, uvChanSize ); \
|
||||||
dest = inlStuffUInt32( dest, specColor );
|
dest += uvChanSize; \
|
||||||
memcpy( dest, dstUVWs, uvChanSize );
|
} \
|
||||||
dest += uvChanSize;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#ifdef MF_RECALC_BOUNDS
|
|
||||||
hsBounds3Ext wBnd;
|
void plDXPipeline::blend_vert_buffer_fpu( plSpan* span,
|
||||||
wBnd.Reset(&hsPoint3(minX, minY, minZ));
|
hsMatrix44* matrixPalette, int numMatrices,
|
||||||
wBnd.Union(&hsPoint3(maxX, maxY, maxZ));
|
const uint8_t *src, uint8_t format, uint32_t srcStride,
|
||||||
span->fWorldBounds = wBnd;
|
uint8_t *dest, uint32_t destStride, uint32_t count,
|
||||||
#endif // MF_RECALC_BOUNDS
|
uint16_t localUVWChans )
|
||||||
|
{
|
||||||
|
BLENDVERTSTART
|
||||||
|
MATRIXMULTBEGIN_FPU(matrixPalette[indices & 0xff], weights[j]);
|
||||||
|
|
||||||
|
MATRIXMULTPOINTADD_FPU(destPt, pt);
|
||||||
|
MATRIXMULTVECTORADD_FPU(destNorm, vec);
|
||||||
|
BLENDVERTMID
|
||||||
|
MATRIXMULTBEGIN_FPU(matrixPalette[indices & 0xff], weights[j]);
|
||||||
|
|
||||||
|
MATRIXMULTPOINTADD_FPU(destPt, pt);
|
||||||
|
MATRIXMULTVECTORADD_FPU(destNorm, vec);
|
||||||
|
MATRIXMULTVECTORADD_FPU(dstUVWs[loChan], srcUVWs[loChan]);
|
||||||
|
MATRIXMULTVECTORADD_FPU(dstUVWs[hiChan], srcUVWs[hiChan]);
|
||||||
|
|
||||||
|
BLENDVERTEND
|
||||||
|
}
|
||||||
|
|
||||||
|
void plDXPipeline::blend_vert_buffer_sse3( plSpan* span,
|
||||||
|
hsMatrix44* matrixPalette, int numMatrices,
|
||||||
|
const uint8_t *src, uint8_t format, uint32_t srcStride,
|
||||||
|
uint8_t *dest, uint32_t destStride, uint32_t count,
|
||||||
|
uint16_t localUVWChans )
|
||||||
|
{
|
||||||
|
#ifdef HS_SSE3
|
||||||
|
BLENDVERTSTART
|
||||||
|
MATRIXMULTBEGIN_SSE3(matrixPalette[indices & 0xff], weights[j]);
|
||||||
|
|
||||||
|
MATRIXMULTPOINTADD_SSE3(destPt, pt);
|
||||||
|
MATRIXMULTVECTORADD_SSE3(destNorm, vec);
|
||||||
|
BLENDVERTMID
|
||||||
|
MATRIXMULTBEGIN_SSE3(matrixPalette[indices & 0xff], weights[j]);
|
||||||
|
|
||||||
|
MATRIXMULTPOINTADD_SSE3(destPt, pt);
|
||||||
|
MATRIXMULTVECTORADD_SSE3(destNorm, vec);
|
||||||
|
MATRIXMULTVECTORADD_SSE3(dstUVWs[loChan], srcUVWs[loChan]);
|
||||||
|
MATRIXMULTVECTORADD_SSE3(dstUVWs[hiChan], srcUVWs[hiChan]);
|
||||||
|
BLENDVERTEND
|
||||||
|
#endif // HS_SSE3
|
||||||
}
|
}
|
||||||
|
|
||||||
// ISetPipeConsts //////////////////////////////////////////////////////////////////
|
// ISetPipeConsts //////////////////////////////////////////////////////////////////
|
||||||
|
@ -465,7 +465,8 @@ protected:
|
|||||||
void IBlendVertsIntoBuffer( plSpan* span,
|
void IBlendVertsIntoBuffer( plSpan* span,
|
||||||
hsMatrix44* matrixPalette, int numMatrices,
|
hsMatrix44* matrixPalette, int numMatrices,
|
||||||
const uint8_t *src, uint8_t format, uint32_t srcStride,
|
const uint8_t *src, uint8_t format, uint32_t srcStride,
|
||||||
uint8_t *dest, uint32_t destStride, uint32_t count, uint16_t localUVWChans );
|
uint8_t *dest, uint32_t destStride, uint32_t count, uint16_t localUVWChans )
|
||||||
|
{ blend_vert_buffer.call(span, matrixPalette, numMatrices, src, format, srcStride, dest, destStride, count, localUVWChans); };
|
||||||
hsBool ISoftwareVertexBlend( plDrawableSpans* drawable, const hsTArray<int16_t>& visList );
|
hsBool ISoftwareVertexBlend( plDrawableSpans* drawable, const hsTArray<int16_t>& visList );
|
||||||
|
|
||||||
|
|
||||||
@ -734,7 +735,7 @@ public:
|
|||||||
virtual void GetDepth(float& hither, float& yon) const;
|
virtual void GetDepth(float& hither, float& yon) const;
|
||||||
virtual void SetDepth(float hither, float yon);
|
virtual void SetDepth(float hither, float yon);
|
||||||
|
|
||||||
virtual float GetZBiasScale() const;
|
virtual float GetZBiasScale() const;
|
||||||
virtual void SetZBiasScale(float scale);
|
virtual void SetZBiasScale(float scale);
|
||||||
|
|
||||||
virtual const hsMatrix44& GetWorldToCamera() const;
|
virtual const hsMatrix44& GetWorldToCamera() const;
|
||||||
@ -798,6 +799,13 @@ public:
|
|||||||
virtual int GetMaxAnisotropicSamples();
|
virtual int GetMaxAnisotropicSamples();
|
||||||
virtual int GetMaxAntiAlias(int Width, int Height, int ColorDepth);
|
virtual int GetMaxAntiAlias(int Width, int Height, int ColorDepth);
|
||||||
|
|
||||||
|
|
||||||
|
// CPU-optimized functions
|
||||||
|
protected:
|
||||||
|
typedef void(*blend_vert_buffer_ptr)(plSpan*, hsMatrix44*, int, const uint8_t *, uint8_t , uint32_t, uint8_t *, uint32_t, uint32_t, uint16_t);
|
||||||
|
static void blend_vert_buffer_fpu(plSpan*, hsMatrix44*, int, const uint8_t *, uint8_t , uint32_t, uint8_t *, uint32_t, uint32_t, uint16_t);
|
||||||
|
static void blend_vert_buffer_sse3(plSpan*, hsMatrix44*, int, const uint8_t *, uint8_t , uint32_t, uint8_t *, uint32_t, uint32_t, uint16_t);
|
||||||
|
static hsFunctionDispatcher<blend_vert_buffer_ptr> blend_vert_buffer;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user