mirror of
https://foundry.openuru.org/gitblit/r/CWE-ou-minkata.git
synced 2025-07-18 11:19:10 +00:00
@ -59,16 +59,31 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com
|
|||||||
#include "hsCpuID.h"
|
#include "hsCpuID.h"
|
||||||
|
|
||||||
hsCpuId::hsCpuId() {
|
hsCpuId::hsCpuId() {
|
||||||
const unsigned int sse1_flag = 1<<25;
|
enum : unsigned int {
|
||||||
const unsigned int sse2_flag = 1<<26;
|
// EAX=1; EDX=:
|
||||||
const unsigned int sse3_flag = 1<<0;
|
sse1_flag = 1U<<25,
|
||||||
const unsigned int ssse3_flag = 1<<9;
|
sse2_flag = 1U<<26,
|
||||||
const unsigned int sse41_flag = 1<<19;
|
|
||||||
const unsigned int sse42_flag = 1<<20;
|
|
||||||
const unsigned int avx_flag = 1<<28;
|
|
||||||
|
|
||||||
unsigned int ax = 0, bx = 0, cx = 0, dx = 0;
|
// EAX=1; ECX=:
|
||||||
|
sse3_flag = 1U<<0,
|
||||||
|
ssse3_flag = 1U<<9,
|
||||||
|
sse41_flag = 1U<<19,
|
||||||
|
sse42_flag = 1U<<20,
|
||||||
|
avx_flag = 1U<<28,
|
||||||
|
|
||||||
|
// EAX=7; ECX=0; EBX=:
|
||||||
|
avx2_flag = 1U<<5
|
||||||
|
};
|
||||||
|
|
||||||
|
union RegSet {
|
||||||
|
struct {
|
||||||
|
unsigned int eax, ebx, ecx, edx;
|
||||||
|
};
|
||||||
|
int array[4];
|
||||||
|
};
|
||||||
|
|
||||||
|
RegSet CPUInfo_Features = { 0, 0, 0, 0 };
|
||||||
|
RegSet CPUInfo_Ext = { 0, 0, 0, 0 };
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Portable implementation of CPUID, successfully tested with:
|
* Portable implementation of CPUID, successfully tested with:
|
||||||
@ -80,33 +95,33 @@ hsCpuId::hsCpuId() {
|
|||||||
*
|
*
|
||||||
* Ref: http://primesieve.googlecode.com/svn-history/r388/trunk/soe/cpuid.h
|
* Ref: http://primesieve.googlecode.com/svn-history/r388/trunk/soe/cpuid.h
|
||||||
*/
|
*/
|
||||||
#if defined(MSC_COMPATIBLE)
|
#if defined(MSC_COMPATIBLE)
|
||||||
int CPUInfo[4] = {ax, bx, cx, dx};
|
__cpuid(CPUInfo_Features.array, 0);
|
||||||
__cpuid(CPUInfo, 0);
|
|
||||||
|
|
||||||
// check if the CPU supports the cpuid instruction.
|
// check if the CPU supports the cpuid instruction.
|
||||||
if (CPUInfo[0] != 0) {
|
if (CPUInfo_Features.eax != 0) {
|
||||||
__cpuid(CPUInfo, 1);
|
__cpuid(CPUInfo_Features.array, 1);
|
||||||
ax = CPUInfo[0];
|
__cpuid(CPUInfo_Ext.array, 7);
|
||||||
bx = CPUInfo[1];
|
|
||||||
cx = CPUInfo[2];
|
|
||||||
dx = CPUInfo[3];
|
|
||||||
}
|
}
|
||||||
#elif defined(GCC_COMPATIBLE)
|
#elif defined(GCC_COMPATIBLE)
|
||||||
__get_cpuid(1, &ax, &bx, &cx, &dx);
|
__get_cpuid(1, &CPUInfo_Features.eax, &CPUInfo_Features.ebx,
|
||||||
#endif
|
&CPUInfo_Features.ecx, &CPUInfo_Features.edx);
|
||||||
|
__get_cpuid(7, &CPUInfo_Ext.eax, &CPUInfo_Ext.ebx,
|
||||||
|
&CPUInfo_Ext.ecx, &CPUInfo_Ext.edx);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
has_sse1 = (dx & sse1_flag) || false;
|
has_sse1 = (CPUInfo_Features.edx & sse1_flag) || false;
|
||||||
has_sse2 = (dx & sse2_flag) || false;
|
has_sse2 = (CPUInfo_Features.edx & sse2_flag) || false;
|
||||||
has_sse3 = (cx & sse3_flag) || false;
|
has_sse3 = (CPUInfo_Features.ecx & sse3_flag) || false;
|
||||||
has_ssse3 = (cx & ssse3_flag) || false;
|
has_ssse3 = (CPUInfo_Features.ecx & ssse3_flag) || false;
|
||||||
has_sse41 = (cx & sse41_flag) || false;
|
has_sse41 = (CPUInfo_Features.ecx & sse41_flag) || false;
|
||||||
has_sse42 = (cx & sse42_flag) || false;
|
has_sse42 = (CPUInfo_Features.ecx & sse42_flag) || false;
|
||||||
has_avx = (cx & avx_flag) || false;
|
has_avx = (CPUInfo_Features.ecx & avx_flag) || false;
|
||||||
|
has_avx2 = (CPUInfo_Ext.ebx & avx2_flag) || false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const hsCpuId& hsCpuId::instance()
|
const hsCpuId& hsCpuId::Instance()
|
||||||
{
|
{
|
||||||
static hsCpuId self;
|
static hsCpuId self;
|
||||||
return self;
|
return self;
|
||||||
|
@ -65,7 +65,8 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com
|
|||||||
// typedef float(*func_ptr)();
|
// typedef float(*func_ptr)();
|
||||||
// static hsFunctionDispatcher<func_ptr> my_func;
|
// static hsFunctionDispatcher<func_ptr> my_func;
|
||||||
//
|
//
|
||||||
// hsFunctionDispatcher<float::func_ptr> float::my_func(float::my_func_fpu, 0, 0, 0, 0, 0, 0, float::my_func_avx);
|
// hsFunctionDispatcher<func_ptr> my_func(my_func_fpu, 0, 0, 0, 0, 0, 0, my_func_avx);
|
||||||
|
// my_func();
|
||||||
//
|
//
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
@ -74,6 +75,12 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com
|
|||||||
#ifndef hsCpuID_inc
|
#ifndef hsCpuID_inc
|
||||||
#define hsCpuID_inc
|
#define hsCpuID_inc
|
||||||
|
|
||||||
|
#if defined __AVX2__ || _MSC_VER >= 1600
|
||||||
|
#define HS_AVX2
|
||||||
|
#ifndef HS_SIMD_INCLUDE
|
||||||
|
# define HS_SIMD_INCLUDE "immintrin.h"
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
#if defined __AVX__ || _MSC_VER >= 1600
|
#if defined __AVX__ || _MSC_VER >= 1600
|
||||||
#define HS_AVX
|
#define HS_AVX
|
||||||
#ifndef HS_SIMD_INCLUDE
|
#ifndef HS_SIMD_INCLUDE
|
||||||
@ -126,16 +133,31 @@ struct hsCpuId {
|
|||||||
bool has_sse41;
|
bool has_sse41;
|
||||||
bool has_sse42;
|
bool has_sse42;
|
||||||
bool has_avx;
|
bool has_avx;
|
||||||
|
bool has_avx2;
|
||||||
|
|
||||||
hsCpuId();
|
hsCpuId();
|
||||||
static const hsCpuId& instance();
|
static const hsCpuId& Instance();
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename func_ptr>
|
template <typename func_ptr>
|
||||||
struct hsFunctionDispatcher {
|
struct hsCpuFunctionDispatcher {
|
||||||
hsFunctionDispatcher(func_ptr fpu, func_ptr sse1=0, func_ptr sse2=0, func_ptr sse3=0, func_ptr ssse3=0, func_ptr sse41=0, func_ptr sse42=0, func_ptr avx=0) {
|
hsCpuFunctionDispatcher(func_ptr fpu,
|
||||||
|
func_ptr sse1 = nullptr,
|
||||||
|
func_ptr sse2 = nullptr,
|
||||||
|
func_ptr sse3 = nullptr,
|
||||||
|
func_ptr ssse3 = nullptr,
|
||||||
|
func_ptr sse41 = nullptr,
|
||||||
|
func_ptr sse42 = nullptr,
|
||||||
|
func_ptr avx = nullptr,
|
||||||
|
func_ptr avx2 = nullptr)
|
||||||
|
{
|
||||||
hsAssert(fpu, "FPU fallback function required.");
|
hsAssert(fpu, "FPU fallback function required.");
|
||||||
const hsCpuId& cpu = hsCpuId::instance();
|
const hsCpuId& cpu = hsCpuId::Instance();
|
||||||
|
#ifdef HS_AVX2
|
||||||
|
if (cpu.has_avx2 && avx2) {
|
||||||
|
call = avx2;
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
#ifdef HS_AVX
|
#ifdef HS_AVX
|
||||||
if (cpu.has_avx && avx) {
|
if (cpu.has_avx && avx) {
|
||||||
call = avx;
|
call = avx;
|
||||||
|
@ -57,9 +57,6 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com
|
|||||||
static hsMatrix44 myIdent = hsMatrix44().Reset();
|
static hsMatrix44 myIdent = hsMatrix44().Reset();
|
||||||
const hsMatrix44& hsMatrix44::IdentityMatrix() { return myIdent; }
|
const hsMatrix44& hsMatrix44::IdentityMatrix() { return myIdent; }
|
||||||
|
|
||||||
// CPU-optimized functions requiring dispatch
|
|
||||||
hsFunctionDispatcher<hsMatrix44::mat_mult_ptr> hsMatrix44::mat_mult(hsMatrix44::mat_mult_fpu, 0, 0, hsMatrix44::mat_mult_sse3);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
For the rotation:
|
For the rotation:
|
||||||
<20> 2 2 <20>
|
<20> 2 2 <20>
|
||||||
@ -102,7 +99,7 @@ void hsMatrix44::DecompRigid(hsScalarTriple &translate, hsQuat &rotate) const
|
|||||||
rotate.QuatFromMatrix44(*this);
|
rotate.QuatFromMatrix44(*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
hsMatrix44 hsMatrix44::mat_mult_fpu(const hsMatrix44 &a, const hsMatrix44 &b)
|
static hsMatrix44 mat_mult_fpu(const hsMatrix44 &a, const hsMatrix44 &b)
|
||||||
{
|
{
|
||||||
hsMatrix44 c;
|
hsMatrix44 c;
|
||||||
|
|
||||||
@ -153,7 +150,7 @@ hsMatrix44 hsMatrix44::mat_mult_fpu(const hsMatrix44 &a, const hsMatrix44 &b)
|
|||||||
_mm_storeu_ps(c.fMap[i], xmm[1]);
|
_mm_storeu_ps(c.fMap[i], xmm[1]);
|
||||||
#endif // HS_SSE3
|
#endif // HS_SSE3
|
||||||
|
|
||||||
hsMatrix44 hsMatrix44::mat_mult_sse3(const hsMatrix44 &a, const hsMatrix44 &b)
|
static hsMatrix44 mat_mult_sse3(const hsMatrix44 &a, const hsMatrix44 &b)
|
||||||
{
|
{
|
||||||
hsMatrix44 c;
|
hsMatrix44 c;
|
||||||
#ifdef HS_SSE3
|
#ifdef HS_SSE3
|
||||||
@ -201,6 +198,26 @@ hsMatrix44 hsMatrix44::mat_mult_sse3(const hsMatrix44 &a, const hsMatrix44 &b)
|
|||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CPU-optimized functions requiring dispatch
|
||||||
|
hsCpuFunctionDispatcher<hsMatrix44::mat_mult_ptr> hsMatrix44::mat_mult {
|
||||||
|
&mat_mult_fpu,
|
||||||
|
nullptr, // SSE1
|
||||||
|
nullptr, // SSE2
|
||||||
|
&mat_mult_sse3
|
||||||
|
};
|
||||||
|
|
||||||
|
hsPoint3 hsMatrix44::operator*(const hsPoint3& p) const
|
||||||
|
{
|
||||||
|
if (fFlags & hsMatrix44::kIsIdent)
|
||||||
|
return p;
|
||||||
|
|
||||||
|
hsPoint3 rVal;
|
||||||
|
rVal.fX = (p.fX * fMap[0][0]) + (p.fY * fMap[0][1]) + (p.fZ * fMap[0][2]) + fMap[0][3];
|
||||||
|
rVal.fY = (p.fX * fMap[1][0]) + (p.fY * fMap[1][1]) + (p.fZ * fMap[1][2]) + fMap[1][3];
|
||||||
|
rVal.fZ = (p.fX * fMap[2][0]) + (p.fY * fMap[2][1]) + (p.fZ * fMap[2][2]) + fMap[2][3];
|
||||||
|
return rVal;
|
||||||
|
}
|
||||||
|
|
||||||
hsVector3 hsMatrix44::operator*(const hsVector3& p) const
|
hsVector3 hsMatrix44::operator*(const hsVector3& p) const
|
||||||
{
|
{
|
||||||
if( fFlags & hsMatrix44::kIsIdent )
|
if( fFlags & hsMatrix44::kIsIdent )
|
||||||
|
@ -133,17 +133,7 @@ struct hsMatrix44 {
|
|||||||
void MakeZRotation(float radians);
|
void MakeZRotation(float radians);
|
||||||
|
|
||||||
|
|
||||||
hsPoint3 operator*(const hsPoint3& p) const
|
hsPoint3 operator*(const hsPoint3& p) const;
|
||||||
{
|
|
||||||
if( fFlags & hsMatrix44::kIsIdent )
|
|
||||||
return p;
|
|
||||||
|
|
||||||
hsPoint3 rVal;
|
|
||||||
rVal.fX = (p.fX * fMap[0][0]) + (p.fY * fMap[0][1]) + (p.fZ * fMap[0][2]) + fMap[0][3];
|
|
||||||
rVal.fY = (p.fX * fMap[1][0]) + (p.fY * fMap[1][1]) + (p.fZ * fMap[1][2]) + fMap[1][3];
|
|
||||||
rVal.fZ = (p.fX * fMap[2][0]) + (p.fY * fMap[2][1]) + (p.fZ * fMap[2][2]) + fMap[2][3];
|
|
||||||
return rVal;
|
|
||||||
}
|
|
||||||
hsVector3 operator*(const hsVector3& p) const;
|
hsVector3 operator*(const hsVector3& p) const;
|
||||||
hsMatrix44 operator *(const hsMatrix44& other) const { return mat_mult.call(*this, other); }
|
hsMatrix44 operator *(const hsMatrix44& other) const { return mat_mult.call(*this, other); }
|
||||||
|
|
||||||
@ -160,9 +150,7 @@ struct hsMatrix44 {
|
|||||||
|
|
||||||
// CPU-optimized functions
|
// CPU-optimized functions
|
||||||
typedef hsMatrix44(*mat_mult_ptr)(const hsMatrix44&, const hsMatrix44&);
|
typedef hsMatrix44(*mat_mult_ptr)(const hsMatrix44&, const hsMatrix44&);
|
||||||
static hsMatrix44 mat_mult_fpu(const hsMatrix44&, const hsMatrix44&);
|
static hsCpuFunctionDispatcher<mat_mult_ptr> mat_mult;
|
||||||
static hsMatrix44 mat_mult_sse3(const hsMatrix44&, const hsMatrix44&);
|
|
||||||
static hsFunctionDispatcher<mat_mult_ptr> mat_mult;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -10714,9 +10714,14 @@ static void IBlendVertBuffer(plSpan* span, hsMatrix44* matrixPalette, int numMat
|
|||||||
}
|
}
|
||||||
|
|
||||||
// CPU-optimized functions requiring dispatch
|
// CPU-optimized functions requiring dispatch
|
||||||
hsFunctionDispatcher<plDXPipeline::blend_vert_buffer_ptr> plDXPipeline::blend_vert_buffer(
|
hsCpuFunctionDispatcher<plDXPipeline::blend_vert_buffer_ptr> plDXPipeline::blend_vert_buffer {
|
||||||
IBlendVertBuffer<ISkinVertexFPU>, 0, 0, IBlendVertBuffer<ISkinVertexSSE3>, 0,
|
&IBlendVertBuffer<ISkinVertexFPU>,
|
||||||
IBlendVertBuffer<ISkinVertexSSE41>);
|
nullptr, // SSE1
|
||||||
|
nullptr, // SSE2
|
||||||
|
&IBlendVertBuffer<ISkinVertexSSE3>,
|
||||||
|
nullptr, // SSSE3
|
||||||
|
&IBlendVertBuffer<ISkinVertexSSE41>
|
||||||
|
};
|
||||||
|
|
||||||
// ISetPipeConsts //////////////////////////////////////////////////////////////////
|
// ISetPipeConsts //////////////////////////////////////////////////////////////////
|
||||||
// A shader can request that the pipeline fill in certain constants that are indeterminate
|
// A shader can request that the pipeline fill in certain constants that are indeterminate
|
||||||
|
@ -798,8 +798,10 @@ public:
|
|||||||
|
|
||||||
// CPU-optimized functions
|
// CPU-optimized functions
|
||||||
protected:
|
protected:
|
||||||
typedef void(*blend_vert_buffer_ptr)(plSpan*, hsMatrix44*, int, const uint8_t *, uint8_t , uint32_t, uint8_t *, uint32_t, uint32_t, uint16_t);
|
typedef void(*blend_vert_buffer_ptr)(plSpan*, hsMatrix44*, int, const uint8_t *,
|
||||||
static hsFunctionDispatcher<blend_vert_buffer_ptr> blend_vert_buffer;
|
uint8_t , uint32_t, uint8_t *, uint32_t,
|
||||||
|
uint32_t, uint16_t);
|
||||||
|
static hsCpuFunctionDispatcher<blend_vert_buffer_ptr> blend_vert_buffer;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user