Browse Source

Fix support in plDXPipeline for SSE using temporary macros.

Re-enables FPU/SSE3 code using the FunctionDispatcher and some quick
hacky macros to template out the two nearly-identical functions,
awaiting branan's deep-voodoo template-specialization functor-dispatcher
patch.
Joseph Davies 13 years ago
parent
commit
072bf3570c
  1. 491
      Sources/Plasma/PubUtilLib/plPipeline/plDXPipeline.cpp
  2. 12
      Sources/Plasma/PubUtilLib/plPipeline/plDXPipeline.h

491
Sources/Plasma/PubUtilLib/plPipeline/plDXPipeline.cpp

@ -10525,17 +10525,76 @@ void plDXPipeline::LoadResources()
plNetClientApp::StaticDebugMsg("End Device Reload"); plNetClientApp::StaticDebugMsg("End Device Reload");
} }
// Sorry about this, but it really did speed up the skinning. // inlTESTPOINT /////////////////////////////////////////
// Just some macros for the inner loop of IBlendVertsIntoBuffer. // Update mins and maxs if destP is outside.
inline void inlTESTPOINT(const hsPoint3& destP,
float& minX, float& minY, float& minZ,
float& maxX, float& maxY, float& maxZ)
{
if( destP.fX < minX )
minX = destP.fX;
else if( destP.fX > maxX )
maxX = destP.fX;
if( destP.fY < minY )
minY = destP.fY;
else if( destP.fY > maxY )
maxY = destP.fY;
if( destP.fZ < minZ )
minZ = destP.fZ;
else if( destP.fZ > maxZ )
maxZ = destP.fZ;
}
//// IBlendVertsIntoBuffer ////////////////////////////////////////////////////
// Given a pointer into a buffer of verts that have blending data in the D3D
// format, blends them into the destination buffer given without the blending
// info.
// FPU version
#define MATRIXMULTBEGIN_FPU(xfm, wgt) \
float m00 = xfm.fMap[0][0]; \
float m01 = xfm.fMap[0][1]; \
float m02 = xfm.fMap[0][2]; \
float m03 = xfm.fMap[0][3]; \
float m10 = xfm.fMap[1][0]; \
float m11 = xfm.fMap[1][1]; \
float m12 = xfm.fMap[1][2]; \
float m13 = xfm.fMap[1][3]; \
float m20 = xfm.fMap[2][0]; \
float m21 = xfm.fMap[2][1]; \
float m22 = xfm.fMap[2][2]; \
float m23 = xfm.fMap[2][3]; \
float m_wgt = wgt; \
float srcX, srcY, srcZ;
#define MATRIXMULTPOINTADD_FPU(dst, src) \
srcX = src.fX; \
srcY = src.fY; \
srcZ = src.fZ; \
\
dst.fX += (srcX * m00 + srcY * m01 + srcZ * m02 + m03) * m_wgt; \
dst.fY += (srcX * m10 + srcY * m11 + srcZ * m12 + m13) * m_wgt; \
dst.fZ += (srcX * m20 + srcY * m21 + srcZ * m22 + m23) * m_wgt;
#define MATRIXMULTVECTORADD_FPU(dst, src) \
srcX = src.fX; \
srcY = src.fY; \
srcZ = src.fZ; \
\
dst.fX += (srcX * m00 + srcY * m01 + srcZ * m02) * m_wgt; \
dst.fY += (srcX * m10 + srcY * m11 + srcZ * m12) * m_wgt; \
dst.fZ += (srcX * m20 + srcY * m21 + srcZ * m22) * m_wgt;
// SSE3 version
#ifdef HS_SSE3 #ifdef HS_SSE3
# define MATRIXMULTBEGIN(xfm, wgt) \ #define MATRIXMULTBEGIN_SSE3(xfm, wgt) \
__m128 mc0, mc1, mc2, mwt, msr, _x, _y, _z, hbuf1, hbuf2; \ __m128 mc0, mc1, mc2, mwt, msr, _x, _y, _z, hbuf1, hbuf2; \
ALIGN(16) float hack[4]; \ ALIGN(16) float hack[4]; \
mc0 = _mm_loadu_ps(xfm.fMap[0]); \ mc0 = _mm_loadu_ps(xfm.fMap[0]); \
mc1 = _mm_loadu_ps(xfm.fMap[1]); \ mc1 = _mm_loadu_ps(xfm.fMap[1]); \
mc2 = _mm_loadu_ps(xfm.fMap[2]); \ mc2 = _mm_loadu_ps(xfm.fMap[2]); \
mwt = _mm_set_ps1(wgt); mwt = _mm_set_ps1(wgt);
# define MATRIXMULTPOINTADD(dst, src) \ #define MATRIXMULTPOINTADD_SSE3(dst, src) \
msr = _mm_set_ps(1.f, src.fZ, src.fY, src.fX); \ msr = _mm_set_ps(1.f, src.fZ, src.fY, src.fX); \
_x = _mm_mul_ps(_mm_mul_ps(mc0, msr), mwt); \ _x = _mm_mul_ps(_mm_mul_ps(mc0, msr), mwt); \
_y = _mm_mul_ps(_mm_mul_ps(mc1, msr), mwt); \ _y = _mm_mul_ps(_mm_mul_ps(mc1, msr), mwt); \
@ -10548,7 +10607,7 @@ void plDXPipeline::LoadResources()
dst.fX += hack[0]; \ dst.fX += hack[0]; \
dst.fY += hack[1]; \ dst.fY += hack[1]; \
dst.fZ += hack[2]; dst.fZ += hack[2];
# define MATRIXMULTVECTORADD(dst, src) \ #define MATRIXMULTVECTORADD_SSE3(dst, src) \
msr = _mm_set_ps(0.f, src.fZ, src.fY, src.fX); \ msr = _mm_set_ps(0.f, src.fZ, src.fY, src.fX); \
_x = _mm_mul_ps(_mm_mul_ps(mc0, msr), mwt); \ _x = _mm_mul_ps(_mm_mul_ps(mc0, msr), mwt); \
_y = _mm_mul_ps(_mm_mul_ps(mc1, msr), mwt); \ _y = _mm_mul_ps(_mm_mul_ps(mc1, msr), mwt); \
@ -10561,250 +10620,214 @@ void plDXPipeline::LoadResources()
dst.fX += hack[0]; \ dst.fX += hack[0]; \
dst.fY += hack[1]; \ dst.fY += hack[1]; \
dst.fZ += hack[2]; dst.fZ += hack[2];
#else #endif
# define MATRIXMULTBEGIN(xfm, wgt) \
float m00 = xfm.fMap[0][0]; \
float m01 = xfm.fMap[0][1]; \
float m02 = xfm.fMap[0][2]; \
float m03 = xfm.fMap[0][3]; \
float m10 = xfm.fMap[1][0]; \
float m11 = xfm.fMap[1][1]; \
float m12 = xfm.fMap[1][2]; \
float m13 = xfm.fMap[1][3]; \
float m20 = xfm.fMap[2][0]; \
float m21 = xfm.fMap[2][1]; \
float m22 = xfm.fMap[2][2]; \
float m23 = xfm.fMap[2][3]; \
float m_wgt = wgt; \
float srcX, srcY, srcZ;
# define MATRIXMULTPOINTADD(dst, src) \
srcX = src.fX; \
srcY = src.fY; \
srcZ = src.fZ; \
\
dst.fX += (srcX * m00 + srcY * m01 + srcZ * m02 + m03) * m_wgt; \
dst.fY += (srcX * m10 + srcY * m11 + srcZ * m12 + m13) * m_wgt; \
dst.fZ += (srcX * m20 + srcY * m21 + srcZ * m22 + m23) * m_wgt;
# define MATRIXMULTVECTORADD(dst, src) \
srcX = src.fX; \
srcY = src.fY; \
srcZ = src.fZ; \
\
dst.fX += (srcX * m00 + srcY * m01 + srcZ * m02) * m_wgt; \
dst.fY += (srcX * m10 + srcY * m11 + srcZ * m12) * m_wgt; \
dst.fZ += (srcX * m20 + srcY * m21 + srcZ * m22) * m_wgt;
#endif // HAVE_SSE
// inlTESTPOINT /////////////////////////////////////////
// Update mins and maxs if destP is outside.
inline void inlTESTPOINT(const hsPoint3& destP,
float& minX, float& minY, float& minZ,
float& maxX, float& maxY, float& maxZ)
{
if( destP.fX < minX )
minX = destP.fX;
else if( destP.fX > maxX )
maxX = destP.fX;
if( destP.fY < minY )
minY = destP.fY;
else if( destP.fY > maxY )
maxY = destP.fY;
if( destP.fZ < minZ )
minZ = destP.fZ;
else if( destP.fZ > maxZ )
maxZ = destP.fZ;
}
//// IBlendVertsIntoBuffer ////////////////////////////////////////////////////
// Given a pointer into a buffer of verts that have blending data in the D3D
// format, blends them into the destination buffer given without the blending
// info.
void plDXPipeline::IBlendVertsIntoBuffer( plSpan* span,
hsMatrix44* matrixPalette, int numMatrices,
const uint8_t *src, uint8_t format, uint32_t srcStride,
uint8_t *dest, uint32_t destStride, uint32_t count,
uint16_t localUVWChans )
{
uint8_t numUVs, numWeights;
uint32_t i, j, indices, color, specColor, uvChanSize;
float weights[ 4 ], weightSum;
hsPoint3 pt, tempPt, destPt;
hsVector3 vec, tempNorm, destNorm;
/// Get some counts
switch( format & plGBufferGroup::kSkinWeightMask )
{
case plGBufferGroup::kSkin1Weight: numWeights = 1; break;
case plGBufferGroup::kSkin2Weights: numWeights = 2; break;
case plGBufferGroup::kSkin3Weights: numWeights = 3; break;
default: hsAssert( false, "Invalid weight count in IBlendVertsIntoBuffer()" );
}
numUVs = plGBufferGroup::CalcNumUVs( format );
uvChanSize = numUVs * sizeof( float ) * 3;
//#define MF_RECALC_BOUNDS
#ifdef MF_RECALC_BOUNDS
float minX = 1.e33f;
float minY = 1.e33f;
float minZ = 1.e33f;
float maxX = -1.e33f;
float maxY = -1.e33f;
float maxZ = -1.e33f;
#endif // MF_RECALC_BOUNDS
// localUVWChans is bump mapping tangent space vectors, which need to // CPU-optimized functions requiring dispatch
hsFunctionDispatcher<plDXPipeline::blend_vert_buffer_ptr> plDXPipeline::blend_vert_buffer(plDXPipeline::blend_vert_buffer_fpu, 0, 0, plDXPipeline::blend_vert_buffer_sse3);
// Temporary macros for IBlendVertsIntoBuffer dispatch code de-duplication
#define BLENDVERTSTART \
uint8_t numUVs, numWeights; \
uint32_t i, j, indices, color, specColor, uvChanSize; \
float weights[ 4 ], weightSum; \
hsPoint3 pt, tempPt, destPt; \
hsVector3 vec, tempNorm, destNorm; \
\
/* Get some counts */\
switch( format & plGBufferGroup::kSkinWeightMask ) \
{ \
case plGBufferGroup::kSkin1Weight: numWeights = 1; break; \
case plGBufferGroup::kSkin2Weights: numWeights = 2; break; \
case plGBufferGroup::kSkin3Weights: numWeights = 3; break; \
default: hsAssert( false, "Invalid weight count in IBlendVertsIntoBuffer()" ); \
} \
\
numUVs = plGBufferGroup::CalcNumUVs( format ); \
uvChanSize = numUVs * sizeof( float ) * 3; \
\
/* localUVWChans is bump mapping tangent space vectors, which need to
// be skinned like the normal, as opposed to passed through like // be skinned like the normal, as opposed to passed through like
// garden variety UVW coordinates. // garden variety UVW coordinates.
// There are no localUVWChans that I know of in production assets (i.e. // There are no localUVWChans that I know of in production assets (i.e.
// the avatar is not skinned). // the avatar is not skinned).*/\
if( !localUVWChans ) if( !localUVWChans ) \
{ { \
/// Copy whilst blending /* Copy whilst blending */\
for( i = 0; i < count; i++ ) for( i = 0; i < count; i++ ) \
{ { \
// Extract data /* Extract data */\
src = inlExtractPoint( src, pt ); src = inlExtractPoint( src, pt ); \
for( j = 0, weightSum = 0; j < numWeights; j++ ) for( j = 0, weightSum = 0; j < numWeights; j++ ) \
{ { \
src = inlExtractFloat( src, weights[ j ] ); src = inlExtractFloat( src, weights[ j ] ); \
weightSum += weights[ j ]; weightSum += weights[ j ]; \
} } \
weights[ j ] = 1 - weightSum; weights[ j ] = 1 - weightSum; \
\
if( format & plGBufferGroup::kSkinIndices ) if( format & plGBufferGroup::kSkinIndices ) \
{ { \
src = inlExtractUInt32( src, indices ); src = inlExtractUInt32( src, indices ); \
} } \
else else \
{ { \
indices = 1 << 8; indices = 1 << 8; \
} } \
src = inlExtractPoint( src, vec ); src = inlExtractPoint( src, vec ); \
src = inlExtractUInt32( src, color ); src = inlExtractUInt32( src, color ); \
src = inlExtractUInt32( src, specColor ); src = inlExtractUInt32( src, specColor ); \
\
// Blend /* Blend */\
destPt.Set( 0, 0, 0 ); destPt.Set( 0, 0, 0 ); \
destNorm.Set( 0, 0, 0 ); destNorm.Set( 0, 0, 0 ); \
for( j = 0; j < numWeights + 1; j++ ) for( j = 0; j < numWeights + 1; j++ ) \
{ { \
if( weights[ j ] ) if( weights[ j ] ) \
{ {
/*
MATRIXMULTBEGIN(matrixPalette[indices & 0xff], weights[j]); MATRIXMULTBEGIN(matrixPalette[indices & 0xff], weights[j]);
MATRIXMULTPOINTADD(destPt, pt); MATRIXMULTPOINTADD(destPt, pt);
MATRIXMULTVECTORADD(destNorm, vec); MATRIXMULTVECTORADD(destNorm, vec);
} */
#define BLENDVERTMID \
indices >>= 8; } \
} \
// Probably don't really need to renormalize this. There errors are indices >>= 8; \
// going to be subtle and "smooth". } \
// hsFastMath::NormalizeAppr(destNorm); /* Probably don't really need to renormalize this. There errors are
// going to be subtle and "smooth".*/\
#ifdef MF_RECALC_BOUNDS /* hsFastMath::NormalizeAppr(destNorm);*/ \
inlTESTPOINT(destPt, minX, minY, minZ, maxX, maxY, maxZ); \
#endif // MF_RECALC_BOUNDS /* Slam data into position now */\
dest = inlStuffPoint( dest, destPt ); \
// Slam data into position now dest = inlStuffPoint( dest, destNorm ); \
dest = inlStuffPoint( dest, destPt ); dest = inlStuffUInt32( dest, color ); \
dest = inlStuffPoint( dest, destNorm ); dest = inlStuffUInt32( dest, specColor ); \
dest = inlStuffUInt32( dest, color ); memcpy( dest, src, uvChanSize ); \
dest = inlStuffUInt32( dest, specColor ); src += uvChanSize; \
memcpy( dest, src, uvChanSize ); dest += uvChanSize; \
src += uvChanSize; } \
dest += uvChanSize; } \
} else \
} { \
else uint8_t hiChan = localUVWChans >> 8; \
{ uint8_t loChan = localUVWChans & 0xff; \
uint8_t hiChan = localUVWChans >> 8; /* Copy whilst blending */\
uint8_t loChan = localUVWChans & 0xff; for( i = 0; i < count; i++ ) \
/// Copy whilst blending { \
for( i = 0; i < count; i++ ) hsVector3 srcUVWs[plGeometrySpan::kMaxNumUVChannels]; \
{ hsVector3 dstUVWs[plGeometrySpan::kMaxNumUVChannels]; \
hsVector3 srcUVWs[plGeometrySpan::kMaxNumUVChannels]; \
hsVector3 dstUVWs[plGeometrySpan::kMaxNumUVChannels]; /* Extract data */\
src = inlExtractPoint( src, pt ); \
// Extract data for( j = 0, weightSum = 0; j < numWeights; j++ ) \
src = inlExtractPoint( src, pt ); { \
for( j = 0, weightSum = 0; j < numWeights; j++ ) src = inlExtractFloat( src, weights[ j ] ); \
{ weightSum += weights[ j ]; \
src = inlExtractFloat( src, weights[ j ] ); } \
weightSum += weights[ j ]; weights[ j ] = 1 - weightSum; \
} \
weights[ j ] = 1 - weightSum; if( format & plGBufferGroup::kSkinIndices ) \
{ \
if( format & plGBufferGroup::kSkinIndices ) src = inlExtractUInt32( src, indices ); \
{ } \
src = inlExtractUInt32( src, indices ); else \
} { \
else indices = 1 << 8; \
{ } \
indices = 1 << 8; \
} src = inlExtractPoint( src, vec ); \
src = inlExtractUInt32( src, color ); \
src = inlExtractPoint( src, vec ); src = inlExtractUInt32( src, specColor ); \
src = inlExtractUInt32( src, color ); \
src = inlExtractUInt32( src, specColor ); uint8_t k; \
for( k = 0; k < numUVs; k++ ) \
uint8_t k; { \
for( k = 0; k < numUVs; k++ ) src = inlExtractPoint( src, srcUVWs[k] ); \
{ } \
src = inlExtractPoint( src, srcUVWs[k] ); memcpy( dstUVWs, srcUVWs, uvChanSize); \
} dstUVWs[loChan].Set(0,0,0); \
memcpy( dstUVWs, srcUVWs, uvChanSize); dstUVWs[hiChan].Set(0,0,0); \
dstUVWs[loChan].Set(0,0,0); \
dstUVWs[hiChan].Set(0,0,0); /* Blend */\
destPt.Set( 0, 0, 0 ); \
// Blend destNorm.Set( 0, 0, 0 ); \
destPt.Set( 0, 0, 0 ); for( j = 0; j < numWeights + 1; j++ ) \
destNorm.Set( 0, 0, 0 ); { \
for( j = 0; j < numWeights + 1; j++ ) if( weights[ j ] ) \
{ { \
if( weights[ j ] ) /*
{
MATRIXMULTBEGIN(matrixPalette[indices & 0xff], weights[j]); MATRIXMULTBEGIN(matrixPalette[indices & 0xff], weights[j]);
MATRIXMULTPOINTADD(destPt, pt); MATRIXMULTPOINTADD(destPt, pt);
MATRIXMULTVECTORADD(destNorm, vec); MATRIXMULTVECTORADD(destNorm, vec);
MATRIXMULTVECTORADD(dstUVWs[loChan], srcUVWs[loChan]); MATRIXMULTVECTORADD(dstUVWs[loChan], srcUVWs[loChan]);
MATRIXMULTVECTORADD(dstUVWs[hiChan], srcUVWs[hiChan]); MATRIXMULTVECTORADD(dstUVWs[hiChan], srcUVWs[hiChan]);
} */
#define BLENDVERTEND \
indices >>= 8; } \
} \
// Probably don't really need to renormalize this. There errors are indices >>= 8; \
// going to be subtle and "smooth". } \
// hsFastMath::NormalizeAppr(destNorm); /* Probably don't really need to renormalize this. There errors are
// hsFastMath::NormalizeAppr(dstUVWs[loChan]); // going to be subtle and "smooth". */\
// hsFastMath::NormalizeAppr(dstUVWs[hiChan]); /* hsFastMath::NormalizeAppr(destNorm); */\
/* hsFastMath::NormalizeAppr(dstUVWs[loChan]); */\
#ifdef MF_RECALC_BOUNDS /* hsFastMath::NormalizeAppr(dstUVWs[hiChan]); */\
inlTESTPOINT(destPt, minX, minY, minZ, maxX, maxY, maxZ); \
#endif // MF_RECALC_BOUNDS /* Slam data into position now */\
dest = inlStuffPoint( dest, destPt ); \
// Slam data into position now dest = inlStuffPoint( dest, destNorm ); \
dest = inlStuffPoint( dest, destPt ); dest = inlStuffUInt32( dest, color ); \
dest = inlStuffPoint( dest, destNorm ); dest = inlStuffUInt32( dest, specColor ); \
dest = inlStuffUInt32( dest, color ); memcpy( dest, dstUVWs, uvChanSize ); \
dest = inlStuffUInt32( dest, specColor ); dest += uvChanSize; \
memcpy( dest, dstUVWs, uvChanSize ); } \
dest += uvChanSize; }
}
} void plDXPipeline::blend_vert_buffer_fpu( plSpan* span,
#ifdef MF_RECALC_BOUNDS hsMatrix44* matrixPalette, int numMatrices,
hsBounds3Ext wBnd; const uint8_t *src, uint8_t format, uint32_t srcStride,
wBnd.Reset(&hsPoint3(minX, minY, minZ)); uint8_t *dest, uint32_t destStride, uint32_t count,
wBnd.Union(&hsPoint3(maxX, maxY, maxZ)); uint16_t localUVWChans )
span->fWorldBounds = wBnd; {
#endif // MF_RECALC_BOUNDS BLENDVERTSTART
MATRIXMULTBEGIN_FPU(matrixPalette[indices & 0xff], weights[j]);
MATRIXMULTPOINTADD_FPU(destPt, pt);
MATRIXMULTVECTORADD_FPU(destNorm, vec);
BLENDVERTMID
MATRIXMULTBEGIN_FPU(matrixPalette[indices & 0xff], weights[j]);
MATRIXMULTPOINTADD_FPU(destPt, pt);
MATRIXMULTVECTORADD_FPU(destNorm, vec);
MATRIXMULTVECTORADD_FPU(dstUVWs[loChan], srcUVWs[loChan]);
MATRIXMULTVECTORADD_FPU(dstUVWs[hiChan], srcUVWs[hiChan]);
BLENDVERTEND
}
void plDXPipeline::blend_vert_buffer_sse3( plSpan* span,
hsMatrix44* matrixPalette, int numMatrices,
const uint8_t *src, uint8_t format, uint32_t srcStride,
uint8_t *dest, uint32_t destStride, uint32_t count,
uint16_t localUVWChans )
{
#ifdef HS_SSE3
BLENDVERTSTART
MATRIXMULTBEGIN_SSE3(matrixPalette[indices & 0xff], weights[j]);
MATRIXMULTPOINTADD_SSE3(destPt, pt);
MATRIXMULTVECTORADD_SSE3(destNorm, vec);
BLENDVERTMID
MATRIXMULTBEGIN_SSE3(matrixPalette[indices & 0xff], weights[j]);
MATRIXMULTPOINTADD_SSE3(destPt, pt);
MATRIXMULTVECTORADD_SSE3(destNorm, vec);
MATRIXMULTVECTORADD_SSE3(dstUVWs[loChan], srcUVWs[loChan]);
MATRIXMULTVECTORADD_SSE3(dstUVWs[hiChan], srcUVWs[hiChan]);
BLENDVERTEND
#endif // HS_SSE3
} }
// ISetPipeConsts ////////////////////////////////////////////////////////////////// // ISetPipeConsts //////////////////////////////////////////////////////////////////

12
Sources/Plasma/PubUtilLib/plPipeline/plDXPipeline.h

@ -465,7 +465,8 @@ protected:
void IBlendVertsIntoBuffer( plSpan* span, void IBlendVertsIntoBuffer( plSpan* span,
hsMatrix44* matrixPalette, int numMatrices, hsMatrix44* matrixPalette, int numMatrices,
const uint8_t *src, uint8_t format, uint32_t srcStride, const uint8_t *src, uint8_t format, uint32_t srcStride,
uint8_t *dest, uint32_t destStride, uint32_t count, uint16_t localUVWChans ); uint8_t *dest, uint32_t destStride, uint32_t count, uint16_t localUVWChans )
{ blend_vert_buffer.call(span, matrixPalette, numMatrices, src, format, srcStride, dest, destStride, count, localUVWChans); };
hsBool ISoftwareVertexBlend( plDrawableSpans* drawable, const hsTArray<int16_t>& visList ); hsBool ISoftwareVertexBlend( plDrawableSpans* drawable, const hsTArray<int16_t>& visList );
@ -734,7 +735,7 @@ public:
virtual void GetDepth(float& hither, float& yon) const; virtual void GetDepth(float& hither, float& yon) const;
virtual void SetDepth(float hither, float yon); virtual void SetDepth(float hither, float yon);
virtual float GetZBiasScale() const; virtual float GetZBiasScale() const;
virtual void SetZBiasScale(float scale); virtual void SetZBiasScale(float scale);
virtual const hsMatrix44& GetWorldToCamera() const; virtual const hsMatrix44& GetWorldToCamera() const;
@ -798,6 +799,13 @@ public:
virtual int GetMaxAnisotropicSamples(); virtual int GetMaxAnisotropicSamples();
virtual int GetMaxAntiAlias(int Width, int Height, int ColorDepth); virtual int GetMaxAntiAlias(int Width, int Height, int ColorDepth);
// CPU-optimized functions
protected:
typedef void(*blend_vert_buffer_ptr)(plSpan*, hsMatrix44*, int, const uint8_t *, uint8_t , uint32_t, uint8_t *, uint32_t, uint32_t, uint16_t);
static void blend_vert_buffer_fpu(plSpan*, hsMatrix44*, int, const uint8_t *, uint8_t , uint32_t, uint8_t *, uint32_t, uint32_t, uint16_t);
static void blend_vert_buffer_sse3(plSpan*, hsMatrix44*, int, const uint8_t *, uint8_t , uint32_t, uint8_t *, uint32_t, uint32_t, uint16_t);
static hsFunctionDispatcher<blend_vert_buffer_ptr> blend_vert_buffer;
}; };

Loading…
Cancel
Save