/*==LICENSE==*

CyanWorlds.com Engine - MMOG client, server and tools
Copyright (C) 2011  Cyan Worlds, Inc.

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.

Additional permissions under GNU GPL version 3 section 7

If you modify this Program, or any covered work, by linking or
combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK,
NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent
JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK
(or a modified version of those libraries),
containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA,
PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG
JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the
licensors of this Program grant you additional
permission to convey the resulting work. Corresponding Source for a
non-source form of such a combination shall include the source code for
the parts of OpenSSL and IJG JPEG Library used as well as that of the covered
work.

You can contact Cyan Worlds, Inc. by email legal@cyan.com
 or by snail mail at:
      Cyan Worlds, Inc.
      14617 N Newport Hwy
      Mead, WA   99021

*==LICENSE==*/
#include "HeadSpin.h"
#include "hsAffineParts.h"
#include "plInterp/hsInterp.h"
#include "hsStream.h"

#include "plProfile.h"

#define PL_OPTIMIZE_COMPOSE

inline void QuatTo3Vectors(const hsQuat& q, hsVector3* const v)
{
    v[0][0] = 1.0f - 2.0f*q.fY*q.fY - 2.0f*q.fZ*q.fZ;
    v[0][1] = 2.0f*q.fX*q.fY - 2.0f*q.fW*q.fZ;
    v[0][2] = 2.0f*q.fX*q.fZ + 2.0f*q.fW*q.fY;

    v[1][0] = 2.0f*q.fX*q.fY + 2.0f*q.fW*q.fZ;
    v[1][1] = 1.0f - 2.0f*q.fX*q.fX - 2.0f*q.fZ*q.fZ;
    v[1][2] = 2.0f*q.fY*q.fZ - 2.0f*q.fW*q.fX;

    v[2][0] = 2.0f*q.fX*q.fZ - 2.0f*q.fW*q.fY;
    v[2][1] = 2.0f*q.fY*q.fZ + 2.0f*q.fW*q.fX;
    v[2][2] = 1.0f - 2.0f*q.fX*q.fX - 2.0f*q.fY*q.fY;
}

inline void QuatTo3VectorsTranspose(const hsQuat& q, hsVector3* const v)
{
    v[0][0] = 1.0f - 2.0f*q.fY*q.fY - 2.0f*q.fZ*q.fZ;
    v[1][0] = 2.0f*q.fX*q.fY - 2.0f*q.fW*q.fZ;
    v[2][0] = 2.0f*q.fX*q.fZ + 2.0f*q.fW*q.fY;

    v[0][1] = 2.0f*q.fX*q.fY + 2.0f*q.fW*q.fZ;
    v[1][1] = 1.0f - 2.0f*q.fX*q.fX - 2.0f*q.fZ*q.fZ;
    v[2][1] = 2.0f*q.fY*q.fZ - 2.0f*q.fW*q.fX;

    v[0][2] = 2.0f*q.fX*q.fZ - 2.0f*q.fW*q.fY;
    v[1][2] = 2.0f*q.fY*q.fZ + 2.0f*q.fW*q.fX;
    v[2][2] = 1.0f - 2.0f*q.fX*q.fX - 2.0f*q.fY*q.fY;
}

//
// Constructors
// Convert from Gems struct for now
//
hsAffineParts::hsAffineParts(gemAffineParts *ap)
{
    AP_SET((*this), (*ap));
}

//
//
//
hsAffineParts::hsAffineParts()
{

}

//
//
//
void hsAffineParts::Reset()
{
    fT.Set(0,0,0);
    fQ.Identity();
    fU.Identity();
    fK.Set(1,1,1);
    fF = 1.0;
}

plProfile_CreateTimer("Compose", "Affine", Compose);
plProfile_CreateTimer("ComposeInv", "Affine", ComposeInv);
//
// Create an affine matrix from the various parts
//
// AffineParts:
//    Vector t; /* Translation components */
//    Quat   q; /* Essential rotation     */
//    Quat   u; /* Stretch rotation   */
//    Vector k; /* Stretch factors    */
//    float  f; /* Sign of determinant    */
//
// A matrix M is decomposed by : M = T F R U K Utranspose.
//      T is the translate mat.
//      F is +-Identity (to flip the rotation or not).
//      R is the rot matrix.
//      U is the stretch matrix.
//      K is the scale factor matrix.
//
void hsAffineParts::ComposeMatrix(hsMatrix44 *out) const
{
    plProfile_BeginTiming(Compose);
#ifndef PL_OPTIMIZE_COMPOSE
    // Built U matrix
    hsMatrix44 U;
    fU.MakeMatrix(&U);

    // Build scale factor matrix
    hsMatrix44 K;
    K.MakeScaleMat(&fK);

    // Build Utranspose matrix
    hsMatrix44 Utp;
    U.GetTranspose(&Utp);

    // Build R matrix
    hsMatrix44 R;
    fQ.MakeMatrix(&R);

    // Build flip matrix
//  hsAssert(fF == 1.0 || fF == -1.0, "Invalid flip portion of affine parts");
    hsMatrix44 F;
    if (fF==-1.0)
    {
        hsVector3 s;
        s.Set(-1,-1,-1);
        F.MakeScaleMat(&s);
    }
    else
        F.Reset();

    // Build translate matrix
    hsMatrix44 T;
    T.MakeTranslateMat(&fT);

    //
    // Concat mats
    //
    *out =  K * Utp;
    *out =  U * (*out);
    *out =  R * (*out);     // Q
    *out =  F * (*out);
    *out =  T * (*out);     // Translate happens last
#else // PL_OPTIMIZE_COMPOSE
    // M = T F R U K Ut,
    // but these are mostly very sparse matrices. So rather
    // than construct the full 6 matrices and concatenate them,
    // we'll work out by hand what the non-zero results will be.
    // T =  |1  0   0   Tx|
    //      |0  1   0   Ty|
    //      |0  0   1   Tz|
    // F =  |f  0   0   0|
    //      |0  f   0   0|
    //      |0  0   f   0|, where f is either 1 or -1
    // R =  |R00    R01 R02 0|
    //      |R10    R11 R12 0|
    //      |R20    R21 R22 0|
    // U =  |U00    U01 U02 0|
    //      |U10    U11 U12 0|
    //      |U20    U21 U22 0|
    // K =  |Sx     0   0   0|
    //      |0      Sy  0   0|
    //      |0      0   Sz  0|
    // Ut = |U00    U10 U20 0|
    //      |U01    U11 U21 0|
    //      |U02    U12 U22 0|, where Uij is from matrix U
    //
    // So, K * Ut = 
    //      |Sx*U00 Sx*U10  Sx*U20  0|
    //      |Sy*U01 Sy*U11  Sy*U21  0|
    //      |Sz*U02 Sz*U12  Sz*U22  0|
    //
    // U * (K * Ut) =
    //      | U0 dot S*U0   U0 dot S*U1 U0 dot S*U2 0|
    //      | U1 dot S*U0   U1 dot S*U1 U1 dot S*U2 0|
    //      | U2 dot S*U0   U2 dot S*U1 U2 dot S*U2 0|
    //
    // Let's call that matrix UK
    //
    // Now R * U * K * Ut = R * UK =
    //      | R0 dot UKc0   R0 dot UKc1 R0 dot UKc2     0|
    //      | R1 dot UKc0   R1 dot UKc1 R1 dot UKc2     0|
    //      | R2 dot UKc0   R2 dot UKc1 R2 dot UKc2     0|, where UKci is column i from UK
    //
    // if f is -1, we negate the matrix we have so far, else we don't. We can 
    // accomplish this cleanly by just negating the scale vector S if f == -1.
    //
    // Since the translate is last, we can just stuff it into the 4th column.
    //
    // Since we only ever use UK as column vectors, we'll just construct it
    // into 3 vectors representing the columns.
    //
    // The quat MakeMatrix function is pretty efficient, but it does a little more work
    // than it has to filling out the whole matrix when we only need the 3x3 rotation,
    // and we'd rather have it in the form of vectors anyway, so we'll use our own
    // quat to 3 vectors function here.

    hsVector3 U[3];
    QuatTo3Vectors(fU, U);

    int i, j;

    hsVector3 UKt[3];
    for( i = 0; i < 3; i++ )
    {
        for( j = 0; j < 3; j++ )
        {
            // SU[j] = (fK.fX * U[j].fX, fK.fY * U[j].fY, fK.fZ * U[j].fZ)
            UKt[j][i] = U[i].fX * fK.fX * U[j].fX 
                + U[i].fY * fK.fY * U[j].fY 
                + U[i].fZ * fK.fZ * U[j].fZ;
        }
    }

    hsVector3 R[3];
    QuatTo3Vectors(fQ, R);

    float f = fF < 0 ? -1.f : 1.f;
    for( i = 0; i < 3; i++ )
    {
        for( j = 0; j < 3; j++ )
        {
            out->fMap[i][j] = R[i].InnerProduct(UKt[j]) * f;
        }

        out->fMap[i][3] = fT[i];
    }

    out->fMap[3][0] = out->fMap[3][1] = out->fMap[3][2] = 0.f;
    out->fMap[3][3] = 1.f;
    out->NotIdentity();

#endif // PL_OPTIMIZE_COMPOSE
    plProfile_EndTiming(Compose);
}

void hsAffineParts::ComposeInverseMatrix(hsMatrix44 *out) const
{
    plProfile_BeginTiming(Compose);
#ifndef PL_OPTIMIZE_COMPOSE
    // Built U matrix
    hsMatrix44 U;
    fU.Conjugate().MakeMatrix(&U);

    // Build scale factor matrix
    hsMatrix44 K;
    hsVector3 invK;
    invK.Set(hsInvert(fK.fX),hsInvert(fK.fY),hsInvert(fK.fZ));
    K.MakeScaleMat(&invK);

    // Build Utranspose matrix
    hsMatrix44 Utp;
    U.GetTranspose(&Utp);

    // Build R matrix
    hsMatrix44 R;
    fQ.Conjugate().MakeMatrix(&R);

    // Build flip matrix
//  hsAssert(fF == 1.0 || fF == -1.0, "Invalid flip portion of affine parts");
    hsMatrix44 F;
    if (fF==-1.0)
    {
        hsVector3 s;
        s.Set(-1,-1,-1);
        F.MakeScaleMat(&s);
    }
    else
        F.Reset();

    // Build translate matrix
    hsMatrix44 T;
    T.MakeTranslateMat(&-fT);

    //
    // Concat mats
    //
    *out = Utp * K;
    *out = (*out) * U;
    *out = (*out) * R;
    *out = (*out) * F;
    *out = (*out) * T;
#else // PL_OPTIMIZE_COMPOSE
    // Same kind of thing here, except now M = Ut K U R F T
    // and again
    // T =  |1  0   0   Tx|
    //      |0  1   0   Ty|
    //      |0  0   1   Tz|
    // F =  |f  0   0   0|
    //      |0  f   0   0|
    //      |0  0   f   0|, where f is either 1 or -1
    // R =  |R00    R01 R02 0|
    //      |R10    R11 R12 0|
    //      |R20    R21 R22 0|
    // U =  |U00    U01 U02 0|
    //      |U10    U11 U12 0|
    //      |U20    U21 U22 0|
    // K =  |Sx     0   0   0|
    //      |0      Sy  0   0|
    //      |0      0   Sz  0|
    // Ut = |U00    U10 U20 0|
    //      |U01    U11 U21 0|
    //      |U02    U12 U22 0|, where Uij is from matrix U
    //
    // So, Ut * K = 
    //      |U00*Sx     U10*Sy  U20*Sz  0|
    //      |U01*Sx     U11*Sy  U21*Sz  0|
    //      |U02*Sx     U12*Sy  U22*Sz  0|
    //
    // (Ut * K) * U = UK =
    //      |Ut0*S dot Ut0  Ut0*S dot Ut1   Ut0*S dot Ut2   0|
    //      |Ut1*S dot Ut0  Ut1*S dot Ut1   Ut1*S dot Ut2   0|
    //      |Ut2*S dot Ut0  Ut2*S dot Ut1   Ut2*S dot Ut2   0|
    //
    // (((Ut * K) * U) * R)[i][j] = UK[i] dot Rc[j]
    //
    // Again we'll stuff the flip into the scale.
    //
    // Now, because the T is on the other end of the concat (closest
    // to the vertex), we can't just stuff it in. If Mr is the 
    // rotation part of the final matrix (Ut * K * U * R * F), then
    // the translation components M[i][3] = Mr[i] dot T.
    //      
    //
    hsVector3 Ut[3];
    QuatTo3VectorsTranspose(fU.Conjugate(), Ut);

    int i, j;

    hsVector3 invK;
    invK.Set(hsInvert(fK.fX),hsInvert(fK.fY),hsInvert(fK.fZ));
    hsVector3 UK[3];
    for( i = 0; i < 3; i++ )
    {
        for( j = 0; j < 3; j++ )
        {
            // SUt[i] = (Ut[i].fX * invK.fX, Ut[i].fY * invK.fY, Ut[i].fZ * invK.fZ)
            // So SUt[i].InnerProduct(Ut[j]) ==
            //      Ut[i].fX * invK.fX * Ut[j].fX 
            //          + Ut[i].fY * invK.fY * Ut[j].fY 
            //          + Ut[i].fZ * invK.fZ * Ut[j].fZ 

            UK[i][j] = Ut[i].fX * invK.fX * Ut[j].fX
                + Ut[i].fY * invK.fY * Ut[j].fY
                + Ut[i].fZ * invK.fZ * Ut[j].fZ;
        }
    }

    hsVector3 Rt[3];
    QuatTo3VectorsTranspose(fQ.Conjugate(), Rt);

    float f = fF < 0 ? -1.f : 1.f;
    for( i = 0; i < 3; i++ )
    {
        for( j = 0; j < 3; j++ )
        {
            out->fMap[i][j] = UK[i].InnerProduct(Rt[j]) * f;
        }

        out->fMap[i][3] = -(fT.InnerProduct((hsPoint3*)(&out->fMap[i])));
    }

    out->fMap[3][0] = out->fMap[3][1] = out->fMap[3][2] = 0.f;
    out->fMap[3][3] = 1.f;
    out->NotIdentity();

#endif // PL_OPTIMIZE_COMPOSE
    plProfile_EndTiming(Compose);
}

//
// Given 2 affineparts structs and a p value (between 0-1),
// compute a new affine parts.
//
void hsAffineParts::SetFromInterp(const hsAffineParts &ap1, const hsAffineParts &ap2, float p)
{
    hsAssert(p>=0.0 && p<=1.0, "Interpolate param must be 0-1");

#if 0
    // Debug
    float rad1,rad2, rad3;
    hsVector3 axis1, axis2, axis3;
    k1->fQ.GetAngleAxis(&rad1, &axis1);
    k2->fQ.GetAngleAxis(&rad2, &axis2);
    fQ.GetAngleAxis(&rad3, &axis3);
#endif

    hsInterp::LinInterp(&ap1, &ap2, p, this);
}

//
// Read
//
void hsAffineParts::Read(hsStream *stream)
{
    fT.Read(stream);
    fQ.Read(stream);
    fU.Read(stream);
    fK.Read(stream);
    fF = stream->ReadLEFloat();
}

//
// Write
//
void hsAffineParts::Write(hsStream *stream)
{
    fT.Write(stream);
    fQ.Write(stream);
    fU.Write(stream);
    fK.Write(stream);
    stream->WriteLEFloat(fF);
}