mirror of
https://foundry.openuru.org/gitblit/r/CWE-ou-minkata.git
synced 2025-07-18 11:19:10 +00:00
Aligned matrix loading
This commit is contained in:
@ -74,6 +74,7 @@ endif(UNIX)
|
||||
|
||||
set(CoreLib_HEADERS
|
||||
HeadSpin.h
|
||||
hsAlignedAllocator.hpp
|
||||
hsBiExpander.h
|
||||
hsBitVector.h
|
||||
hsBounds.h
|
||||
|
129
Sources/Plasma/CoreLib/hsAlignedAllocator.hpp
Normal file
129
Sources/Plasma/CoreLib/hsAlignedAllocator.hpp
Normal file
@ -0,0 +1,129 @@
|
||||
/*==LICENSE==*
|
||||
|
||||
CyanWorlds.com Engine - MMOG client, server and tools
|
||||
Copyright (C) 2011 Cyan Worlds, Inc.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Additional permissions under GNU GPL version 3 section 7
|
||||
|
||||
If you modify this Program, or any covered work, by linking or
|
||||
combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK,
|
||||
NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent
|
||||
JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK
|
||||
(or a modified version of those libraries),
|
||||
containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA,
|
||||
PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG
|
||||
JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the
|
||||
licensors of this Program grant you additional
|
||||
permission to convey the resulting work. Corresponding Source for a
|
||||
non-source form of such a combination shall include the source code for
|
||||
the parts of OpenSSL and IJG JPEG Library used as well as that of the covered
|
||||
work.
|
||||
|
||||
You can contact Cyan Worlds, Inc. by email legal@cyan.com
|
||||
or by snail mail at:
|
||||
Cyan Worlds, Inc.
|
||||
14617 N Newport Hwy
|
||||
Mead, WA 99021
|
||||
|
||||
*==LICENSE==*/
|
||||
|
||||
#ifndef _HS_ALIGNED_ALLOCATOR_H
|
||||
#define _HS_ALIGNED_ALLOCATOR_H
|
||||
|
||||
#include "HeadSpin.h"
|
||||
|
||||
template<class T, size_t ALIGNMENT=16>
|
||||
|
||||
/**
|
||||
* An aligned allocator for storing SIMD ready values in STL containers
|
||||
* \remarks Based on https://gist.github.com/donny-dont/1471329
|
||||
*/
|
||||
class hsAlignedAllocator
|
||||
{
|
||||
hsAlignedAllocator& operator=(const hsAlignedAllocator&) { }
|
||||
|
||||
public:
|
||||
template <typename U, size_t ALIGNMENT=16>
|
||||
struct rebind
|
||||
{
|
||||
typedef hsAlignedAllocator<U, ALIGNMENT> other;
|
||||
};
|
||||
|
||||
typedef T* pointer;
|
||||
typedef const T* const_pointer;
|
||||
typedef T& reference;
|
||||
typedef const T& const_reference;
|
||||
typedef T value_type;
|
||||
typedef size_t size_type;
|
||||
typedef ptrdiff_t difference_type;
|
||||
|
||||
hsAlignedAllocator() { }
|
||||
hsAlignedAllocator(const hsAlignedAllocator&) { }
|
||||
template <typename U> hsAlignedAllocator(const hsAlignedAllocator<U, ALIGNMENT>&) { }
|
||||
~hsAlignedAllocator() { }
|
||||
|
||||
pointer address(reference r) const { return &r; }
|
||||
const_pointer address(const_reference r) const { return &r; }
|
||||
|
||||
pointer allocate(size_type size, const_pointer hint=nullptr)
|
||||
{
|
||||
if (size == 0)
|
||||
return nullptr;
|
||||
if (size > max_size())
|
||||
throw std::length_error("integer overflow");
|
||||
|
||||
#ifdef HS_BUILD_FOR_WIN32
|
||||
void* ptr = _aligned_malloc(size * sizeof(value_type), ALIGNMENT);
|
||||
#else
|
||||
void* ptr = nullptr;
|
||||
posix_memalign(&ptr, ALIGNMENT, size * sizeof(value_type));
|
||||
#endif // HS_BUILD_FOR_WIN32
|
||||
|
||||
if (!ptr)
|
||||
throw std::bad_alloc();
|
||||
return static_cast<pointer>(ptr);
|
||||
}
|
||||
|
||||
void construct(T* const p, const_reference t) const
|
||||
{
|
||||
void * const pv = static_cast<void *>(p);
|
||||
new (pv) value_type(t);
|
||||
}
|
||||
|
||||
|
||||
void deallocate(pointer ptr, size_type size)
|
||||
{
|
||||
#ifdef HS_BUILD_FOR_WIN32
|
||||
_aligned_free(ptr);
|
||||
#else
|
||||
free(ptr);
|
||||
#endif // HS_BUILD_FOR_WIN32
|
||||
}
|
||||
|
||||
void destroy(T* const p) const
|
||||
{
|
||||
p->~T();
|
||||
}
|
||||
|
||||
size_type max_size() const
|
||||
{
|
||||
return static_cast<size_t>(-1) / sizeof(value_type);
|
||||
}
|
||||
|
||||
bool operator==(const hsAlignedAllocator& other) const { return true; }
|
||||
};
|
||||
|
||||
#endif // _HS_ALIGNED_ALLOCATOR_H
|
@ -61,7 +61,11 @@ struct hsMatrix44 {
|
||||
kView
|
||||
};
|
||||
float fMap[4][4];
|
||||
uint32_t fFlags;
|
||||
union
|
||||
{
|
||||
uint8_t alignment[16];
|
||||
uint32_t fFlags;
|
||||
};
|
||||
|
||||
hsMatrix44() : fFlags(0) {}
|
||||
hsMatrix44(const hsScalarTriple &translate, const hsQuat &rotate);
|
||||
|
@ -63,7 +63,7 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com
|
||||
#ifndef _plDrawableSpans_h
|
||||
#define _plDrawableSpans_h
|
||||
|
||||
|
||||
#include "hsAlignedAllocator.hpp"
|
||||
#include "hsBitVector.h"
|
||||
#include "hsTemplates.h"
|
||||
#include "plDrawable.h"
|
||||
@ -132,7 +132,7 @@ class plDrawableSpans : public plDrawable
|
||||
hsMatrix44 fLocalToWorld;
|
||||
hsMatrix44 fWorldToLocal;
|
||||
|
||||
std::vector<hsMatrix44> fLocalToWorlds;
|
||||
std::vector<hsMatrix44, hsAlignedAllocator<hsMatrix44>> fLocalToWorlds; // used in SIMD skinning
|
||||
std::vector<hsMatrix44> fWorldToLocals;
|
||||
|
||||
std::vector<hsMatrix44> fLocalToBones;
|
||||
|
@ -10619,9 +10619,9 @@ inline void inlTESTPOINT(const hsPoint3& destP,
|
||||
#define MATRIXMULTBEGIN_SSE3(xfm, wgt) \
|
||||
__m128 mc0, mc1, mc2, mwt, msr, _x, _y, _z, hbuf1, hbuf2; \
|
||||
ALIGN(16) float hack[4]; \
|
||||
mc0 = _mm_loadu_ps(xfm.fMap[0]); \
|
||||
mc1 = _mm_loadu_ps(xfm.fMap[1]); \
|
||||
mc2 = _mm_loadu_ps(xfm.fMap[2]); \
|
||||
mc0 = _mm_load_ps(xfm.fMap[0]); \
|
||||
mc1 = _mm_load_ps(xfm.fMap[1]); \
|
||||
mc2 = _mm_load_ps(xfm.fMap[2]); \
|
||||
mwt = _mm_set_ps1(wgt);
|
||||
#define MATRIXMULTPOINTADD_SSE3(dst, src) \
|
||||
msr = _mm_set_ps(1.f, src.fZ, src.fY, src.fX); \
|
||||
|
Reference in New Issue
Block a user