You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
306 lines
9.1 KiB
306 lines
9.1 KiB
/*==LICENSE==* |
|
|
|
CyanWorlds.com Engine - MMOG client, server and tools |
|
Copyright (C) 2011 Cyan Worlds, Inc. |
|
|
|
This program is free software: you can redistribute it and/or modify |
|
it under the terms of the GNU General Public License as published by |
|
the Free Software Foundation, either version 3 of the License, or |
|
(at your option) any later version. |
|
|
|
This program is distributed in the hope that it will be useful, |
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
GNU General Public License for more details. |
|
|
|
You should have received a copy of the GNU General Public License |
|
along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
|
|
Additional permissions under GNU GPL version 3 section 7 |
|
|
|
If you modify this Program, or any covered work, by linking or |
|
combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, |
|
NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent |
|
JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK |
|
(or a modified version of those libraries), |
|
containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, |
|
PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG |
|
JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the |
|
licensors of this Program grant you additional |
|
permission to convey the resulting work. Corresponding Source for a |
|
non-source form of such a combination shall include the source code for |
|
the parts of OpenSSL and IJG JPEG Library used as well as that of the covered |
|
work. |
|
|
|
You can contact Cyan Worlds, Inc. by email legal@cyan.com |
|
or by snail mail at: |
|
Cyan Worlds, Inc. |
|
14617 N Newport Hwy |
|
Mead, WA 99021 |
|
|
|
*==LICENSE==*/ |
|
|
|
#ifndef hsFastMath_inc |
|
#define hsFastMath_inc |
|
|
|
#include "hsPoint2.h" |
|
#include "hsGeometry3.h" |
|
|
|
class hsFastMath { |
|
protected: |
|
static const hsPoint2* fCosSinTable; |
|
|
|
public: |
|
static const hsScalar kSqrtTwo; |
|
static const hsScalar kInvSqrtTwo; |
|
static const hsScalar kTwoPI; |
|
|
|
static hsScalar IATan2OverTwoPi(hsScalar y, hsScalar x); |
|
|
|
static inline hsScalar InvSqrtAppr(hsScalar x); |
|
static inline hsScalar InvSqrt(hsScalar x); |
|
static inline hsVector3& Normalize(hsVector3& v) { return (v *= InvSqrt(v.MagnitudeSquared())); } |
|
static inline hsVector3& NormalizeAppr(hsVector3& v) { return (v *= InvSqrtAppr(v.MagnitudeSquared())); } |
|
|
|
static inline void SinCosAppr(hsScalar rads, hsScalar& sinRads, hsScalar& cosRads); |
|
static inline void SinCosInRangeAppr(hsScalar rads, hsScalar& sinRads, hsScalar& cosRads); |
|
|
|
static inline void SinCos(hsScalar rads, hsScalar& sinRads, hsScalar& cosRads); |
|
static inline void SinCosInRange(hsScalar ang, hsScalar& sinRads, hsScalar& cosRads); |
|
|
|
static inline hsScalar Sin(hsScalar rads); |
|
static inline hsScalar Cos(hsScalar rads); |
|
static inline hsScalar SinInRange(hsScalar rads); |
|
static inline hsScalar CosInRange(hsScalar rads); |
|
}; |
|
|
|
|
|
// One over Square Root - from Graphics Gems |
|
// Interesting combo's are |
|
// NUM_ITER LOOKUP_BITS err frac us per call |
|
// 0 8 5e-3 0.045 |
|
// 1 8 3e-5 0.082 |
|
// 0 6 1e-2 0.045 |
|
// 1 6 1e-4 0.082 |
|
// 2 6 1e-7 0.11 |
|
// 1 4 2e-3 0.082 |
|
// 2 4 5e-6 0.11 |
|
// 2 3 8e-5 0.11 |
|
// Tested on 5000 random numbers from [1.e-6..1.e3] over several runs |
|
// These are tight loops, though, so they don't weigh in a bigger |
|
// table trashing the cache. |
|
#define NUM_ITER 0 |
|
#define LOOKUP_BITS 8 |
|
#define EXP_POS 23 |
|
#define EXP_BIAS 127 |
|
|
|
#define LOOKUP_POS (EXP_POS - LOOKUP_BITS) |
|
#define SEED_POS (EXP_POS - 8) |
|
#define TABLE_SIZE (2 << LOOKUP_BITS) |
|
#define LOOKUP_MASK (TABLE_SIZE - 1) |
|
#define GET_EXP(a) (((a) >> EXP_POS) & 0xff) |
|
#define SET_EXP(a) ((a) << EXP_POS) |
|
#define GET_EMANT(a) (((a) >> LOOKUP_POS) & LOOKUP_MASK) |
|
|
|
#define SET_MANTSEED(a) (((unsigned long) (a)) << SEED_POS) |
|
|
|
inline hsScalar hsFastMath::InvSqrtAppr(hsScalar x) |
|
{ |
|
register unsigned long a = *(long*)&x; |
|
register float arg = x; |
|
union { |
|
long i; |
|
float f; |
|
} seed; |
|
register float r; |
|
|
|
extern unsigned char statSeedTable[]; |
|
|
|
seed.i = SET_EXP(((3*EXP_BIAS - 1) - GET_EXP(a)) >> 1) | SET_MANTSEED(statSeedTable[GET_EMANT(a)]); |
|
|
|
r = seed.f; |
|
|
|
#if NUM_ITER > 0 |
|
r = (3.0f - r * r * arg) * r * 0.5f; |
|
|
|
#if NUM_ITER > 1 |
|
r = (3.0f - r * r * arg) * r * 0.5f; |
|
#endif |
|
#endif |
|
|
|
return r; |
|
} |
|
|
|
inline hsScalar hsFastMath::InvSqrt(hsScalar x) |
|
{ |
|
register unsigned long a = *(long*)&x; |
|
register float arg = x; |
|
union { |
|
long i; |
|
float f; |
|
} seed; |
|
register float r; |
|
|
|
extern unsigned char statSeedTable[]; |
|
|
|
seed.i = SET_EXP(((3*EXP_BIAS - 1) - GET_EXP(a)) >> 1) | SET_MANTSEED(statSeedTable[GET_EMANT(a)]); |
|
|
|
r = seed.f; |
|
|
|
r = (3.0f - r * r * arg) * r * 0.5f; |
|
|
|
r = (3.0f - r * r * arg) * r * 0.5f; |
|
|
|
return r; |
|
} |
|
|
|
|
|
inline void hsFastMath::SinCosAppr(hsScalar rads, hsScalar& sinRads, hsScalar& cosRads) |
|
{ |
|
rads = fmodf(rads, kTwoPI); |
|
if( rads < 0 ) |
|
rads += kTwoPI; |
|
SinCosInRangeAppr(rads, sinRads, cosRads); |
|
} |
|
|
|
inline void hsFastMath::SinCosInRangeAppr(hsScalar rads, hsScalar& sinRads, hsScalar& cosRads) |
|
{ |
|
const int kNumSinCosEntries = 8; |
|
const hsScalar kNumEntriesOverTwoPI = kNumSinCosEntries * 0.5f / hsScalarPI; |
|
hsScalar t = rads * kNumEntriesOverTwoPI; |
|
int iLo = (int)t; |
|
t -= iLo; |
|
|
|
const hsPoint2* p = &fCosSinTable[iLo + 1]; |
|
cosRads = p->fX; |
|
sinRads = p->fY; |
|
p--; |
|
cosRads -= p->fX; |
|
sinRads -= p->fY; |
|
cosRads *= t; |
|
sinRads *= t; |
|
cosRads += p->fX; |
|
sinRads += p->fY; |
|
|
|
} |
|
|
|
inline hsScalar hsFastMath::Sin(hsScalar rads) |
|
{ |
|
rads = fmodf(rads, kTwoPI); |
|
if( rads < 0 ) |
|
rads += kTwoPI; |
|
|
|
return SinInRange(rads); |
|
} |
|
|
|
inline hsScalar hsFastMath::Cos(hsScalar rads) |
|
{ |
|
rads = fmodf(rads, kTwoPI); |
|
if( rads < 0 ) |
|
rads += kTwoPI; |
|
|
|
return CosInRange(rads); |
|
} |
|
|
|
inline hsScalar hsFastMath::SinInRange(hsScalar ang) |
|
{ |
|
float sgn = 1.f; |
|
|
|
if(ang >= (0.75f * kTwoPI)) |
|
ang -= kTwoPI; |
|
else if(ang >= (0.25f * kTwoPI)) |
|
{ |
|
ang -= 3.141592654f; |
|
sgn = -1.0f; |
|
} |
|
|
|
return (ang - (ang*ang*ang) * (1.0f/6.0f) + (ang*ang*ang*ang*ang) / 120.0f) * sgn; |
|
} |
|
|
|
inline hsScalar hsFastMath::CosInRange(hsScalar ang) |
|
{ |
|
float sgn = 1.f; |
|
|
|
if(ang >= (0.75f * kTwoPI)) |
|
ang -= kTwoPI; |
|
else if(ang >= (0.25f * kTwoPI)) |
|
{ |
|
ang -= 3.141592654f; |
|
sgn = -1.0f; |
|
} |
|
|
|
return (1.0f - (ang*ang / 2.0f) + (ang*ang*ang*ang) / 24.0f) *sgn; |
|
} |
|
|
|
inline void hsFastMath::SinCos(hsScalar rads, hsScalar& sinRads, hsScalar& cosRads) |
|
{ |
|
rads = fmodf(rads, kTwoPI); |
|
if( rads < 0 ) |
|
rads += kTwoPI; |
|
SinCosInRange(rads, sinRads, cosRads); |
|
} |
|
|
|
inline void hsFastMath::SinCosInRange(hsScalar ang, hsScalar& sinRads, hsScalar& cosRads) |
|
{ |
|
float sgn = 1.f; |
|
|
|
if(ang >= (0.75f * kTwoPI)) |
|
ang -= kTwoPI; |
|
else if(ang >= (0.25f * kTwoPI)) |
|
{ |
|
ang -= 3.141592654f; |
|
sgn = -1.0f; |
|
} |
|
|
|
sinRads = (ang - (ang*ang*ang) * (1.0f/6.0f) + (ang*ang*ang*ang*ang) / 120.0f) * sgn; |
|
cosRads = (1.0f - (ang*ang / 2.0f) + (ang*ang*ang*ang) / 24.0f) *sgn; |
|
} |
|
// |
|
// Here's an interesting one from GDalgorithms, which doesn't need a LUT |
|
// Not sure how the accuracy compares, but it's probably fine for this purpose. |
|
#if 0 // For future reference |
|
/* |
|
From: "Jason Dorie" <jason.dorie@blackboxgames.com> |
|
To: "GDAlgorithms" <gdalgorithms-list@lists.sourceforge.net> |
|
Date: Wed, 14 Mar 2001 11:43:48 -0800 |
|
Subject: [Algorithms] Fast simultaneous Sin() and Cos() |
|
Reply-To: gdalgorithms-list@lists.sourceforge.net |
|
|
|
|
|
I know someone (Jason Zisk?) was looking for fast rotation matrix |
|
generation code. I realize that a Sin/Cos lookup table is the way to go for |
|
absolute speed, but if storage is a concern and the accuracy isn't, this |
|
code is about 5x faster than using the built-in sin and cos instructions, |
|
and accurate to about 4 decimal places. |
|
|
|
If you really want speed, and don't care about accuracy, drop the 2nd |
|
polynomial from each term. It's less accurate and faster still. It could |
|
probably be made even faster by replacing the if/else with branchless code, |
|
but I haven't bothered to figure out how yet. |
|
|
|
|
|
My angles are 0-65535 so that they can be masked into range easily, stored |
|
as shorts, and converted to normalized floats where necessary using SIMD |
|
instructions. |
|
*/ |
|
|
|
void FastSinCos(long Angle, float *pSin, float *pCos) |
|
{ |
|
float ang, sgn; |
|
|
|
ang = (Angle & 65535) * ((1.0f/65536.0f) * TwoPI); |
|
|
|
sgn = 1.0f; |
|
if(ang >= (0.75f * TwoPI)) |
|
ang -= TwoPI; |
|
else if(ang >= (0.25f * TwoPI)) |
|
{ |
|
ang -= 3.141592654f; |
|
sgn = -1.0f; |
|
} |
|
|
|
*pSin = (ang - (ang*ang*ang) * (1.0f/6.0f) + (ang*ang*ang*ang*ang) / 120.0f) * sgn; |
|
*pCos = (1.0f - (ang*ang / 2.0f) + (ang*ang*ang*ang) / 24.0f) *sgn; |
|
} |
|
#endif // For future reference |
|
#endif // hsFastMath_inc
|
|
|