/*==LICENSE==*

CyanWorlds.com Engine - MMOG client, server and tools
Copyright (C) 2011  Cyan Worlds, Inc.

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.

Additional permissions under GNU GPL version 3 section 7

If you modify this Program, or any covered work, by linking or
combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK,
NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent
JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK
(or a modified version of those libraries),
containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA,
PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG
JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the
licensors of this Program grant you additional
permission to convey the resulting work. Corresponding Source for a
non-source form of such a combination shall include the source code for
the parts of OpenSSL and IJG JPEG Library used as well as that of the covered
work.

You can contact Cyan Worlds, Inc. by email legal@cyan.com
 or by snail mail at:
      Cyan Worlds, Inc.
      14617 N Newport Hwy
      Mead, WA   99021

*==LICENSE==*/
#include <memory.h>
#include "hsTypes.h"
#include "hsDXTSoftwareCodec.h"
#include "plMipmap.h"
#include "hsCodecManager.h"

#define SWAPVARS( x, y, t ) { t = x; x = y; y = t; }

// This is the color depth that we decompress to by default if we're not told otherwise
#define kDefaultDepth   32


hsBool hsDXTSoftwareCodec::fRegistered = false;

hsDXTSoftwareCodec& hsDXTSoftwareCodec::Instance()
{
    static hsDXTSoftwareCodec the_instance;
    
    return the_instance;
}

hsDXTSoftwareCodec::hsDXTSoftwareCodec()
{
}

hsDXTSoftwareCodec::~hsDXTSoftwareCodec()
{
}

//// CreateCompressedMipmap ///////////////////////////////////////////////////
//  Updated 8.15.2000 mcn to generate uncompressed mipmaps down to 1x1 (the
//  decompressor better know how to deal with this!) Also cleaned up so I can
//  read it :)

plMipmap *hsDXTSoftwareCodec::CreateCompressedMipmap( plMipmap *uncompressed )
{
    UInt8           format;
    plMipmap        *compressed = nil;
    UInt8           i;


    /// Sanity checks
    hsAssert( fRegistered, "Calling member of unregistered codec." );
    hsAssert( !uncompressed->IsCompressed(), "Trying to compress already compressed bitmap.");

    /// Check width and height
    if( ( uncompressed->GetWidth() | uncompressed->GetHeight() ) & 0x03 )
        return nil;     /// Width and height must be multiple of 4

    format = ICalcCompressedFormat( uncompressed );
    if( format == plMipmap::DirectXInfo::kError )
        return nil;


    /// Set up structure
    compressed = TRACKED_NEW plMipmap( uncompressed->GetWidth(), uncompressed->GetHeight(), plMipmap::kARGB32Config,
                                uncompressed->GetNumLevels(), plMipmap::kDirectXCompression, format );

    {
        /// Now loop through and compress each one (that is a valid size!)
        for( i = 0; i < compressed->GetNumLevels(); i++ )
        {
            uncompressed->SetCurrLevel( i );
            compressed->SetCurrLevel( i );
            if( ( compressed->GetCurrWidth() | compressed->GetCurrHeight() ) & 0x03 )
                break;

            CompressMipmapLevel( uncompressed, compressed );
        }

        /// Now copy the rest straight over
        for( ; i < compressed->GetNumLevels(); i++ )
            memcpy( compressed->GetLevelPtr( i ), uncompressed->GetLevelPtr( i ), uncompressed->GetLevelSize( i ) );

        /// Reset
        uncompressed->SetCurrLevel( 0 );
        compressed->SetCurrLevel( 0 );
    }

    return compressed;
}

//// CreateUncompressedBitmap /////////////////////////////////////////////////
//
//  Takes a compressed bitmap and uncompresses it. Duh!
//
//  8.14.2000 mcn - Updated to decompress to 16-bit modes and set flags
//                  accordingly.
//  8.15.2000 mcn - Updated to handle compressed mipmaps with invalid levels
//                  (i.e. they're not compressed). See CreateCompressedMipmap().
//  8.18.2000 mcn - Updated to handle new flags instead of just a bit depth

plMipmap *hsDXTSoftwareCodec::CreateUncompressedMipmap( plMipmap *compressed, UInt8 flags )
{
    plMipmap    *uncompressed = nil;
    Int32       totalSize;
    Int32       width, height;
    UInt8       i;


    /// Sanity checks
    hsAssert( fRegistered, "Calling member of unregistered codec." );
    hsAssert( compressed->fCompressionType == plMipmap::kDirectXCompression, "Uncompressing wrong format." );
    hsAssert( ( compressed->fDirectXInfo.fCompressionType == plMipmap::DirectXInfo::kDXT1 ) ||
              ( compressed->fDirectXInfo.fCompressionType == plMipmap::DirectXInfo::kDXT5 ),
              "Unsupported directX compression format." );


    /// Set up the info
    uncompressed = ICreateUncompressedMipmap( compressed, flags );

    /// Handle mipmaps?
    {
        totalSize = 0;
        width = compressed->GetWidth();
        height = compressed->GetHeight();

        for( i = 0; i < compressed->GetNumLevels(); i++ )
        {
            /// Note: THIS is valid no matter whether the compressed level is
            /// really compressed or not
            totalSize += width * height * uncompressed->GetPixelSize() >> 3;
            width >>= 1;
            height >>= 1;
        }

        /// Loop through and decompress!
        width = compressed->GetWidth();
        height = compressed->GetHeight();
        for( i = 0; i < uncompressed->GetNumLevels(); i++ )
        {
            uncompressed->SetCurrLevel( i );
            compressed->SetCurrLevel( i );

            if( ( width | height ) & 0x03 )
                break;

            UncompressMipmap( uncompressed, compressed, flags );

            if( width > 1 )
                width >>= 1;
            if( height > 1 )
                height >>= 1;
        }

        /// Now loop through the *rest* and just copy (they won't be compressed)
        for( ; i < uncompressed->GetNumLevels(); i++ )
        {
            uncompressed->SetCurrLevel( i );
            compressed->SetCurrLevel( i );

            IXlateColorBlock( (plMipmap *)uncompressed, (UInt32 *)compressed->GetLevelPtr( i ), flags );
        }

        /// Reset
        uncompressed->SetCurrLevel( 0 );
        compressed->SetCurrLevel( 0 );
    }

    return uncompressed;
}

//// IXlateColorBlock /////////////////////////////////////////////////////////
//  Converts a block from ARGB 8888 to the given format of the bitmap.

void    hsDXTSoftwareCodec::IXlateColorBlock( plMipmap *destBMap, UInt32 *srcBlock, UInt8 flags )
{
    UInt8       *bytePtr;
    UInt16      *wordPtr, tempVal;
    UInt32      i;


    if( destBMap->fUncompressedInfo.fType == plMipmap::UncompressedInfo::kRGB8888 )
        memcpy( destBMap->GetCurrLevelPtr(), srcBlock, destBMap->GetCurrLevelSize() );
    else
    {
        if( destBMap->fUncompressedInfo.fType == plMipmap::UncompressedInfo::kAInten88 )
        {
            /// Upper 8 bits from alpha, lower 8 bits from blue
            wordPtr = (UInt16 *)destBMap->GetCurrLevelPtr();
            for( i = 0; i < destBMap->GetCurrWidth() * destBMap->GetCurrHeight(); i++ )
            {
                wordPtr[ i ] = (UInt16)( ( ( srcBlock[ i ] >> 16 ) & 0xff00 ) | ( srcBlock[ i ] & 0x00ff ) );
            }
        }
        else if( destBMap->fUncompressedInfo.fType == plMipmap::UncompressedInfo::kInten8 )
        {
            /// 8 bits from blue
            bytePtr = (UInt8 *)destBMap->GetCurrLevelPtr();
            for( i = 0; i < destBMap->GetCurrWidth() * destBMap->GetCurrHeight(); i++ )
            {
                bytePtr[ i ] = (UInt8)( srcBlock[ i ] & 0x00ff );
            }
        }
        else if( destBMap->fUncompressedInfo.fType == plMipmap::UncompressedInfo::kRGB1555 )
        {
            wordPtr = (UInt16 *)destBMap->GetCurrLevelPtr();

            if( ( flags & hsCodecManager::kCompOrderMask ) == hsCodecManager::kWeirdCompOrder )
            {
                /// Really do 5551
                for( i = 0; i < destBMap->GetCurrWidth() * destBMap->GetCurrHeight(); i++ )
                {
                    tempVal = (UInt16)( ( ( srcBlock[ i ] & 0x000000f8 ) >> 2 ) |
                              ( ( ( srcBlock[ i ] & 0x0000f800 ) >> 5 ) ) |
                              ( ( ( srcBlock[ i ] & 0x00f80000 ) >> 8 ) ) |
                              ( ( srcBlock[ i ] & 0x80000000 ) >> 31 ) );

                    wordPtr[ i ] = tempVal;
                }
            }
            else
            {
                /// Normal 1555
                for( i = 0; i < destBMap->GetCurrWidth() * destBMap->GetCurrHeight(); i++ )
                {
                    tempVal = (UInt16)( ( ( srcBlock[ i ] & 0x000000f8 ) >> 3 ) |
                              ( ( ( srcBlock[ i ] & 0x0000f800 ) >> 6 ) ) |
                              ( ( ( srcBlock[ i ] & 0x00f80000 ) >> 9 ) ) |
                              ( ( srcBlock[ i ] & 0x80000000 ) >> 16 ) );

                    wordPtr[ i ] = tempVal;
                }
            }
        }
        else if( destBMap->fUncompressedInfo.fType == plMipmap::UncompressedInfo::kRGB4444 )
        {
            wordPtr = (UInt16 *)destBMap->GetCurrLevelPtr();

            if( ( flags & hsCodecManager::kCompOrderMask ) == hsCodecManager::kWeirdCompOrder )
            {
                /// Really do 4444 reversed (switch red and blue)
                for( i = 0; i < destBMap->GetCurrWidth() * destBMap->GetCurrHeight(); i++ )
                {
                    tempVal = (UInt16)( ( ( srcBlock[ i ] & 0x000000f0 ) << 4 ) |
                              ( ( ( srcBlock[ i ] & 0x0000f000 ) >> 8 ) ) |
                              ( ( ( srcBlock[ i ] & 0x00f00000 ) >> 20 ) ) |
                              ( ( ( srcBlock[ i ] & 0xf0000000 ) >> 16 ) ) );

                    wordPtr[ i ] = tempVal;
                }
            }
            else
            {
                /// Normal 4444
                for( i = 0; i < destBMap->GetCurrWidth() * destBMap->GetCurrHeight(); i++ )
                {
                    tempVal = (UInt16)( ( ( srcBlock[ i ] & 0x000000f0 ) >> 4 ) |
                              ( ( ( srcBlock[ i ] & 0x0000f000 ) >> 8 ) ) |
                              ( ( ( srcBlock[ i ] & 0x00f00000 ) >> 12 ) ) |
                              ( ( ( srcBlock[ i ] & 0xf0000000 ) >> 16 ) ) );

                    wordPtr[ i ] = tempVal;
                }
            }
        }
        else
            hsAssert( false, "Unsupported pixel format during color block translation" );
    }
}

//// ICreateUncompressedMipmap ////////////////////////////////////////////////
//  Sets the fields of an uncompressed bitmap according to the source
//  (compressed) bitmap and the given bit depth.

plMipmap    *hsDXTSoftwareCodec::ICreateUncompressedMipmap( plMipmap *compressed, UInt8 flags )
{
    UInt8       bitDepth, type;
    plMipmap    *newMap;


    if( compressed->GetFlags() & plMipmap::kIntensityMap )
    {
        /// Intensity map--alpha too?
        if( compressed->GetFlags() & plMipmap::kAlphaChannelFlag )
        {
            type = plMipmap::UncompressedInfo::kAInten88;
            bitDepth = 16;
        }
        else
        {
            type = plMipmap::UncompressedInfo::kInten8;
            bitDepth = 8;
        }
    }
    else
    {
        /// Default to 32 bit depth
        if( ( flags & hsCodecManager::kBitDepthMask ) == hsCodecManager::kDontCareDepth )
            bitDepth = kDefaultDepth;
        else if( ( flags & hsCodecManager::kBitDepthMask ) == hsCodecManager::k32BitDepth )
            bitDepth = 32;
        else
            bitDepth = 16;

        if( bitDepth == 32 )
            type = plMipmap::UncompressedInfo::kRGB8888;
        else if( compressed->GetFlags() & plMipmap::kAlphaChannelFlag )
            type = plMipmap::UncompressedInfo::kRGB4444;
        else
            type = plMipmap::UncompressedInfo::kRGB1555;
    }

    newMap = TRACKED_NEW plMipmap( compressed->GetWidth(), compressed->GetHeight(), bitDepth,
                            compressed->GetNumLevels(), plMipmap::kUncompressed, type );
    newMap->SetFlags( compressed->GetFlags() );                         

    return newMap;
}


/*
-----------------------------------------------------------
  OLD DECOMPRESSION FUNCTION--KEEPING HERE FOR LEGACY
-----------------------------------------------------------

void hsDXTSoftwareCodec::UncompressMipmap(plMipmap *uncompressed, plMipmap *compressed)
{
    UInt32 *compressedImage = (UInt32 *)compressed->GetCurrLevelPtr();
    UInt32 *uncompressedImage = (UInt32 *)uncompressed->GetCurrLevelPtr();
    UInt32 blockSize = compressed->fDirectXInfo.fBlockSize;
    Int32 x, y;
    Int32 xMax = compressed->GetCurrWidth() >> 2;
    Int32 yMax = compressed->GetCurrHeight() >> 2;
    for (x = 0; x < xMax; ++x)
    {
        for (y = 0; y < yMax; ++y)
        {
            UInt8 alpha[8];
            UInt16 color[4];
            UInt32 *block = &compressedImage[(x + xMax * y) * (blockSize >> 2)];
            UInt8 *charBlock = (UInt8 *)block;
            UInt8 *alphaBlock = nil;
            UInt8 *colorBlock = nil;

            if (compressed->fDirectXInfo.fCompressionType == hsGBitmap::DirectXInfo::kDXT5)
            {
                alphaBlock = charBlock;
                colorBlock = (charBlock + 8);

                alpha[0] = charBlock[0];
                alpha[1] = charBlock[1];

                // 8-alpha or 6-alpha block?    
                if (alpha[0] > alpha[1]) {    
                    // 8-alpha block:  derive the other 6 alphas.    
                    // 000 = alpha[0], 001 = alpha[1], others are interpolated
                    alpha[2] = (6 * alpha[0] + alpha[1]) / 7;      // bit code 010
                    alpha[3] = (5 * alpha[0] + 2 * alpha[1]) / 7;  // Bit code 011    
                    alpha[4] = (4 * alpha[0] + 3 * alpha[1]) / 7;  // Bit code 100    
                    alpha[5] = (3 * alpha[0] + 4 * alpha[1]) / 7;  // Bit code 101
                    alpha[6] = (2 * alpha[0] + 5 * alpha[1]) / 7;  // Bit code 110    
                    alpha[7] = (alpha[0] + 6 * alpha[1]) / 7;      // Bit code 111
                }    
                else {  // 6-alpha block:  derive the other alphas.    
                    // 000 = alpha[0], 001 = alpha[1], others are interpolated
                    alpha[2] = (4 * alpha[0] + alpha[1]) / 5;      // Bit code 010
                    alpha[3] = (3 * alpha[0] + 2 * alpha[1]) / 5;  // Bit code 011    
                    alpha[4] = (2 * alpha[0] + 3 * alpha[1]) / 5;  // Bit code 100    
                    alpha[5] = (alpha[0] + 4 * alpha[1]) / 5;      // Bit code 101
                    alpha[6] = 0;                                // Bit code 110
                    alpha[7] = 255;                              // Bit code 111
                }
            }
            else if (compressed->fDirectXInfo.fCompressionType == hsGBitmap::DirectXInfo::kDXT1)
            {
                alphaBlock = nil;
                colorBlock = charBlock;
            }
            else
            {
                hsAssert(false, "Unrecognized compression scheme.");
            }

            UInt32 encoding;
            color[0] = (colorBlock[1] << 8) | colorBlock[0];
            color[1] = (colorBlock[3] << 8) | colorBlock[2];

            if (color[0] > color[1]) 
            {
                // Four-color block: derive the other two colors.    
                // 00 = color[0], 01 = color[1], 10 = color[2, 11 = color[3
                // These two bit codes correspond to the 2-bit fields 
                // stored in the 64-bit block.
                color[2] = BlendColors16(2, color[0], 1, color[1]);
                color[3] = BlendColors16(1, color[0], 2, color[1]);

                encoding = kFourColorEncoding;
            }    
            else
            { 
                // Three-color block: derive the other color.
                // 00 = color[0],  01 = color[1],  10 = color[2,  
                // 11 = transparent.
                // These two bit codes correspond to the 2-bit fields 
                // stored in the 64-bit block. 
                color[2] = BlendColors16(1, color[0], 1, color[1]);
                color[3] = 0;    

                encoding = kThreeColorEncoding;
            }

            UInt8 r, g, b, a;
            Int32 xx, yy;
            for (xx = 0; xx < 4; ++xx)
            {
                for (yy = 0; yy < 4; ++yy)
                {
                    if (alphaBlock)
                    {
                        UInt32 alphaMask = 0x7;
                        UInt32 firstThreeBytes = (alphaBlock[4] << 16) + (alphaBlock[3] << 8) + alphaBlock[2];
                        UInt32 secondThreeBytes = (alphaBlock[7] << 16) + (alphaBlock[6] << 8) + alphaBlock[5];
                        UInt32 alphaIndex;
                        UInt32 alphaShift;
                        if (yy < 2)
                        {
                            alphaShift = 3 * (4 * yy + xx);
                            alphaIndex = (firstThreeBytes >> alphaShift) & alphaMask;
                        }
                        else
                        {
                            alphaShift = 3 * (4 * (yy - 2) + xx);
                            alphaIndex = (secondThreeBytes >> alphaShift) & alphaMask;
                        }

                        a = alpha[alphaIndex];
                    }
                    else
                    {
                        a = 255;
                    }

                    UInt32 colorMask = 0x3;
                    UInt32 colorDWord = (colorBlock[7] << 24) | (colorBlock[6] << 16) | 
                        (colorBlock[5] << 8) | colorBlock[4];
                    UInt32 colorShift = 2 * (4 * yy + xx);
                    UInt32 colorIndex = (colorDWord >> colorShift) & colorMask;

                    if ((encoding == kThreeColorEncoding) && (colorIndex == 3))
                    {
                        r = g = b = a = 0;
                    }
                    else
                    {
                        r = (UInt8)((color[colorIndex] >> 8) & 0xf8);
                        g = (UInt8)((color[colorIndex] >> 3) & 0xfc);
                        b = (UInt8)((color[colorIndex] << 3) & 0xf8);
                    }

                    hsRGBAColor32* pixel = (hsRGBAColor32*)uncompressed->GetAddr32(4 * x + xx, 4 * y + yy);
                    pixel->a = a;
                    pixel->r = r;
                    pixel->g = g;
                    pixel->b = b;
                }
            }   
        }
    }
}
*/

//// UncompressBitmap /////////////////////////////////////////////////////////
//
//  Workhorse function distribution. Takes a compressed GBitmapClass as input 
//  and writes the uncompressed version to the destination bitmap class.
//  (Doesn't actually do anything but call the appropriate function per format.
//  Change this function to add support for more compression formats.)
//
//  Note: Supports DXT1 and DXT5 compression formats.
//
//  7.31.2000 mcn - Created, based on old code (uncredited)
//  8.18.2000 mcn - Updated to handle 'weird' formats and other flags.

void    hsDXTSoftwareCodec::UncompressMipmap( plMipmap *destBMap, plMipmap *srcBMap, UInt8 flags )
{
    if( destBMap->fUncompressedInfo.fType == plMipmap::UncompressedInfo::kRGB8888 )
    {
        /// 32-bit ARGB - Can be either DXT5 or DXT1
        if( srcBMap->fDirectXInfo.fCompressionType == plMipmap::DirectXInfo::kDXT5 )
            IUncompressMipmapDXT5To32( destBMap, srcBMap );
        else if( srcBMap->fDirectXInfo.fCompressionType == plMipmap::DirectXInfo::kDXT1 )
            IUncompressMipmapDXT1To32( destBMap, srcBMap );
    }
    else if( destBMap->fUncompressedInfo.fType == plMipmap::UncompressedInfo::kRGB1555 )
    {
        /// 16-bit ARGB 1555--can ONLY be DXT1
        hsAssert( srcBMap->fDirectXInfo.fCompressionType == plMipmap::DirectXInfo::kDXT1,
                    "Only DXT1 bitmaps can decompress to ARGB1555 format!" );
    
        if( ( flags & hsCodecManager::kCompOrderMask ) == hsCodecManager::kWeirdCompOrder )
            IUncompressMipmapDXT1To16Weird( destBMap, srcBMap );
        else
            IUncompressMipmapDXT1To16( destBMap, srcBMap );
    }
    else if( destBMap->fUncompressedInfo.fType == plMipmap::UncompressedInfo::kRGB4444 )
    {
        /// 16-bit ARGB 4444--can ONLY be DXT5
        hsAssert( srcBMap->fDirectXInfo.fCompressionType == plMipmap::DirectXInfo::kDXT5,
                    "Only DXT5 bitmaps can decompress to ARGB4444 format!" );

        if( ( flags & hsCodecManager::kCompOrderMask ) == hsCodecManager::kWeirdCompOrder )
            IUncompressMipmapDXT5To16Weird( destBMap, srcBMap );
        else
            IUncompressMipmapDXT5To16( destBMap, srcBMap );
    }
    else if( destBMap->fUncompressedInfo.fType == plMipmap::UncompressedInfo::kInten8 )
    {
        /// 8-bit intensity--can ONLY be DXT1
        hsAssert( srcBMap->fDirectXInfo.fCompressionType == plMipmap::DirectXInfo::kDXT1,
                    "Only DXT1 bitmaps can decompress to 8-bit Intensity format!" );
        IUncompressMipmapDXT1ToInten( destBMap, srcBMap );
    }
    else if( destBMap->fUncompressedInfo.fType == plMipmap::UncompressedInfo::kAInten88 )
    {
        /// 16-bit alpha-intensity--can ONLY be DXT5
        hsAssert( srcBMap->fDirectXInfo.fCompressionType == plMipmap::DirectXInfo::kDXT5,
                    "Only DXT5 bitmaps can decompress to 8-8 Alpha-Intensity format!" );
        IUncompressMipmapDXT5ToAInten( destBMap, srcBMap );
    }
    else
        hsAssert( false, "Unsupported target decompression format" );
}


//// IUncompressMipmapDXT5To16 ////////////////////////////////////////////////
//
//  UncompressBitmap internal call for DXT5 compression. DXT5 is 3-bit linear
//  interpolated alpha channel compression. Output is a 16-bit RGB 4444 bitmap.

void    hsDXTSoftwareCodec::IUncompressMipmapDXT5To16( plMipmap *destBMap, plMipmap *srcBMap )
{
    UInt16      *srcData;
    UInt16      *destData, destBlock[ 16 ];
    UInt32      blockSize;
    UInt32      x, y, bMapStride;
    UInt16      colors[ 4 ];
    Int32       numBlocks, i, j;
    UInt8       *bytePtr;
    UInt16      alphas[ 8 ], aTemp, a0, a1;

    UInt32      aBitSrc1, aBitSrc2;
    UInt16      cBitSrc1, cBitSrc2;


    /// Setup some nifty stuff
    hsAssert( ( srcBMap->GetCurrWidth() & 3 ) == 0, "Bitmap width must be multiple of 4" );
    hsAssert( ( srcBMap->GetCurrHeight() & 3 ) == 0, "Bitmap height must be multiple of 4" );
    numBlocks = ( srcBMap->GetCurrWidth() * srcBMap->GetCurrHeight() ) >> 4;

    blockSize = srcBMap->fDirectXInfo.fBlockSize >> 1; // In 16-bit words
    srcData = (UInt16 *)srcBMap->GetCurrLevelPtr();
    // Note our trick here to make sure nothing breaks if GetAddr16's 
    // formula changes
    bMapStride = (UInt32)( destBMap->GetAddr16( 0, 1 ) - destBMap->GetAddr16( 0, 0 ) );
    x = y = 0;


    /// Loop through the # of blocks (width*height / 16-pixel-blocks)
    for( i = 0; i < numBlocks; i++ )
    {
        /// Per block--determine alpha compression type first
        bytePtr = (UInt8 *)srcData;
        alphas[ 0 ] = bytePtr[ 0 ];
        alphas[ 1 ] = bytePtr[ 1 ];

        /// Note that we use the preshifted alphas really as fixed point.
        /// The result: more accuracy, and no need to shift the alphas afterwards
        if( alphas[ 0 ] > alphas[ 1 ] )
        {
            /// 8-alpha block: interpolate 6 others
            alphas[ 0 ] <<= 8;
            alphas[ 1 ] <<= 8;

            /// Note that, unlike below, we can't combine a0 and a1 into
            /// one value, because that would give us a negative value,
            /// and we're using unsigned values here. (i.e. we need all the bits)
            aTemp = alphas[ 0 ];
            a0 = ( aTemp / 7 );
            a1 = ( alphas[ 1 ] / 7 );           
            for( j = 2; j < 8; j++ )
            {
                aTemp += a1 - a0;
                alphas[ j ] = aTemp & 0xf000;   /// Mask done here to retain as
                                                /// much accuracy as possible
            }
        }
        else
        {
            /// 6-alpha block: interpolate 4 others, then assume last 2 are 0 and 255
            alphas[ 0 ] <<= 8;
            alphas[ 1 ] <<= 8;

            aTemp = alphas[ 0 ];
            a0 = ( alphas[ 1 ] - aTemp ) / 5;
            for( j = 2; j < 6; j++ )
            {
                aTemp += a0;
                alphas[ j ] = aTemp & 0xf000;   /// Mask done here to retain as
                                                /// much accuracy as possible
            }

            alphas[ 6 ] = 0;
            alphas[ 7 ] = 0xf000;
        }

        /// Mask off the original two now
        alphas[ 0 ] &= 0xf000;
        alphas[ 1 ] &= 0xf000;

        /// Now do the 16 pixels in 2 blocks, decompressing 3-bit lookups
        aBitSrc1 = ( (UInt32)bytePtr[ 4 ] << 16 ) + 
                    ( (UInt32)bytePtr[ 3 ] << 8 ) + 
                    ( (UInt32)bytePtr[ 2 ] );
        aBitSrc2 = ( (UInt32)bytePtr[ 7 ] << 16 ) + 
                    ( (UInt32)bytePtr[ 6 ] << 8 ) + 
                    ( (UInt32)bytePtr[ 5 ] );

        /// Now decompress color data
        srcData += 4;       // Alpha was 4 16-bit words worth
        //hsAssert( srcData[ 0 ] > srcData[ 1 ], "Invalid block compression for DX5 method" );  /// If not, then it's one-bit
                                                /// alpha, but this is DX5!
        colors[ 0 ] = IRGB565To4444( srcData[ 0 ] );
        colors[ 1 ] = IRGB565To4444( srcData[ 1 ] );
        colors[ 2 ] = IMixTwoThirdsRGB4444( colors[ 0 ], colors[ 1 ] );
        colors[ 3 ] = IMixTwoThirdsRGB4444( colors[ 1 ], colors[ 0 ] );
        
        cBitSrc1 = srcData[ 2 ];
        cBitSrc2 = srcData[ 3 ];

#ifdef HS_BUILD_FOR_MAC
        cBitSrc1 = ISwapWordOrder( cBitSrc1 );
        cBitSrc2 = ISwapWordOrder( cBitSrc2 );
#endif
        
        for( j = 0; j < 8; j++ )
        {
            destBlock[ j ] = alphas[ aBitSrc1 & 0x07 ] | colors[ cBitSrc1 & 0x03 ];
            aBitSrc1 >>= 3;
            cBitSrc1 >>= 2;
            destBlock[ j + 8 ] = alphas[ aBitSrc2 & 0x07 ] | colors[ cBitSrc2 & 0x03 ];
            aBitSrc2 >>= 3;
            cBitSrc2 >>= 2;
#ifdef HS_BUILD_FOR_MAC
            destBlock[ j ] = ISwapWordOrder( destBlock[ j ] );
            destBlock[ j + 8 ] = ISwapWordOrder( destBlock[ j + 8 ] );
#endif
        }
        
        /// Now copy the block to the destination bitmap
        /// (Trust me, this is actually *faster* than memcpy for some reason
        destData = destBMap->GetAddr16( x, y );
        destData[ 0 ] = destBlock[ 0 ];
        destData[ 1 ] = destBlock[ 1 ];
        destData[ 2 ] = destBlock[ 2 ];
        destData[ 3 ] = destBlock[ 3 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 4 ];
        destData[ 1 ] = destBlock[ 5 ];
        destData[ 2 ] = destBlock[ 6 ];
        destData[ 3 ] = destBlock[ 7 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 8 ];
        destData[ 1 ] = destBlock[ 9 ];
        destData[ 2 ] = destBlock[ 10 ];
        destData[ 3 ] = destBlock[ 11 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 12 ];
        destData[ 1 ] = destBlock[ 13 ];
        destData[ 2 ] = destBlock[ 14 ];
        destData[ 3 ] = destBlock[ 15 ];

        /// Increment and loop!
        srcData += blockSize - 4;       /// JUUUST in case our block size is diff
        x += 4;
        if( x == srcBMap->GetCurrWidth() )
        {
            x = 0;
            y += 4;
        }
    }
}

//// IUncompressMipmapDXT5To16Weird ///////////////////////////////////////////
//
//  UncompressBitmap internal call for DXT5 compression. DXT5 is 3-bit linear
//  interpolated alpha channel compression. Output is a 16-bit RGB 4444 
//  reversed bitmap (Red and blue are swapped ala OpenGL).
//  (Note: Annoyingly enough, this is exactly the same as the above function
//  EXCEPT for two stupid lines. We can't use function pointers as the two
//  function calls are inline. Wouldn't it be nice if we could somehow write
//  the inline opcodes beforehand?)

void    hsDXTSoftwareCodec::IUncompressMipmapDXT5To16Weird( plMipmap *destBMap, plMipmap *srcBMap )
{
    UInt16      *srcData;
    UInt16      *destData, destBlock[ 16 ];
    UInt32      blockSize;
    UInt32      x, y, bMapStride;
    UInt16      colors[ 4 ];
    Int32       numBlocks, i, j;
    UInt8       *bytePtr;
    UInt16      alphas[ 8 ], aTemp, a0, a1;

    UInt32      aBitSrc1, aBitSrc2;
    UInt16      cBitSrc1, cBitSrc2;


    /// Setup some nifty stuff
    hsAssert( ( srcBMap->GetCurrWidth() & 3 ) == 0, "Bitmap width must be multiple of 4" );
    hsAssert( ( srcBMap->GetCurrHeight() & 3 ) == 0, "Bitmap height must be multiple of 4" );
    numBlocks = ( srcBMap->GetCurrWidth() * srcBMap->GetCurrHeight() ) >> 4;

    blockSize = srcBMap->fDirectXInfo.fBlockSize >> 1; // In 16-bit words
    srcData = (UInt16 *)srcBMap->GetCurrLevelPtr();
    // Note our trick here to make sure nothing breaks if GetAddr16's 
    // formula changes
    bMapStride = (UInt32)( destBMap->GetAddr16( 0, 1 ) - destBMap->GetAddr16( 0, 0 ) );
    x = y = 0;


    /// Loop through the # of blocks (width*height / 16-pixel-blocks)
    for( i = 0; i < numBlocks; i++ )
    {
        /// Per block--determine alpha compression type first
        bytePtr = (UInt8 *)srcData;
        alphas[ 0 ] = bytePtr[ 0 ];
        alphas[ 1 ] = bytePtr[ 1 ];

        /// Note that we use the preshifted alphas really as fixed point.
        /// The result: more accuracy, and no need to shift the alphas afterwards
        if( alphas[ 0 ] > alphas[ 1 ] )
        {
            /// 8-alpha block: interpolate 6 others
            alphas[ 0 ] <<= 8;
            alphas[ 1 ] <<= 8;

            /// Note that, unlike below, we can't combine a0 and a1 into
            /// one value, because that would give us a negative value,
            /// and we're using unsigned values here. (i.e. we need all the bits)
            aTemp = alphas[ 0 ];
            a0 = ( aTemp / 7 );
            a1 = ( alphas[ 1 ] / 7 );           
            for( j = 2; j < 8; j++ )
            {
                aTemp += a1 - a0;
                alphas[ j ] = aTemp & 0xf000;   /// Mask done here to retain as
                                                /// much accuracy as possible
            }
        }
        else
        {
            /// 6-alpha block: interpolate 4 others, then assume last 2 are 0 and 255
            alphas[ 0 ] <<= 8;
            alphas[ 1 ] <<= 8;

            aTemp = alphas[ 0 ];
            a0 = ( alphas[ 1 ] - aTemp ) / 5;
            for( j = 2; j < 6; j++ )
            {
                aTemp += a0;
                alphas[ j ] = aTemp & 0xf000;   /// Mask done here to retain as
                                                /// much accuracy as possible
            }

            alphas[ 6 ] = 0;
            alphas[ 7 ] = 0xf000;
        }

        /// Mask off the original two now
        alphas[ 0 ] &= 0xf000;
        alphas[ 1 ] &= 0xf000;

        /// Now do the 16 pixels in 2 blocks, decompressing 3-bit lookups
        aBitSrc1 = ( (UInt32)bytePtr[ 4 ] << 16 ) + 
                    ( (UInt32)bytePtr[ 3 ] << 8 ) + 
                    ( (UInt32)bytePtr[ 2 ] );
        aBitSrc2 = ( (UInt32)bytePtr[ 7 ] << 16 ) + 
                    ( (UInt32)bytePtr[ 6 ] << 8 ) + 
                    ( (UInt32)bytePtr[ 5 ] );

        /// Now decompress color data
        srcData += 4;       // Alpha was 4 16-bit words worth
        colors[ 0 ] = IRGB565To4444Rev( srcData[ 0 ] );
        colors[ 1 ] = IRGB565To4444Rev( srcData[ 1 ] );
        colors[ 2 ] = IMixTwoThirdsRGB4444( colors[ 0 ], colors[ 1 ] );
        colors[ 3 ] = IMixTwoThirdsRGB4444( colors[ 1 ], colors[ 0 ] );
        
        cBitSrc1 = srcData[ 2 ];
        cBitSrc2 = srcData[ 3 ];

#ifdef HS_BUILD_FOR_MAC
        cBitSrc1 = ISwapWordOrder( cBitSrc1 );
        cBitSrc2 = ISwapWordOrder( cBitSrc2 );
#endif
        
        for( j = 0; j < 8; j++ )
        {
            destBlock[ j ] = alphas[ aBitSrc1 & 0x07 ] | colors[ cBitSrc1 & 0x03 ];
            aBitSrc1 >>= 3;
            cBitSrc1 >>= 2;
            destBlock[ j + 8 ] = alphas[ aBitSrc2 & 0x07 ] | colors[ cBitSrc2 & 0x03 ];
            aBitSrc2 >>= 3;
            cBitSrc2 >>= 2;
#ifdef HS_BUILD_FOR_MAC
            destBlock[ j ] = ISwapWordOrder( destBlock[ j ] );
            destBlock[ j + 8 ] = ISwapWordOrder( destBlock[ j + 8 ] );
#endif
        }
        
        /// Now copy the block to the destination bitmap
        /// (Trust me, this is actually *faster* than memcpy for some reason
        destData = destBMap->GetAddr16( x, y );
        destData[ 0 ] = destBlock[ 0 ];
        destData[ 1 ] = destBlock[ 1 ];
        destData[ 2 ] = destBlock[ 2 ];
        destData[ 3 ] = destBlock[ 3 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 4 ];
        destData[ 1 ] = destBlock[ 5 ];
        destData[ 2 ] = destBlock[ 6 ];
        destData[ 3 ] = destBlock[ 7 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 8 ];
        destData[ 1 ] = destBlock[ 9 ];
        destData[ 2 ] = destBlock[ 10 ];
        destData[ 3 ] = destBlock[ 11 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 12 ];
        destData[ 1 ] = destBlock[ 13 ];
        destData[ 2 ] = destBlock[ 14 ];
        destData[ 3 ] = destBlock[ 15 ];

        /// Increment and loop!
        srcData += blockSize - 4;       /// JUUUST in case our block size is diff
        x += 4;
        if( x == srcBMap->GetCurrWidth() )
        {
            x = 0;
            y += 4;
        }
    }
}

//// IUncompressMipmapDXT5To32 ////////////////////////////////////////////////
//
//  UncompressBitmap internal call for DXT5 compression. DXT5 is 3-bit linear
//  interpolated alpha channel compression. Output is a 32-bit ARGB 8888 bitmap.
//
//  7.31.2000 - M.Burrack - Created, based on old code (uncredited)
//
//  8.14.2000 - M.Burrack - Optimized on the alpha blending. Now we precalc
//                          the divided values and run a for loop. This gets
//                          us only about 10% :(

void    hsDXTSoftwareCodec::IUncompressMipmapDXT5To32( plMipmap *destBMap, plMipmap *srcBMap )
{
    UInt16      *srcData;
    UInt32      *destData, destBlock[ 16 ];
    UInt32      blockSize;
    UInt32      x, y, bMapStride;
    UInt32      colors[ 4 ];
    Int32       numBlocks, i, j;
    UInt8       *bytePtr;
    UInt32      alphas[ 8 ], aTemp, a0, a1;

    UInt32      aBitSrc1, aBitSrc2;
    UInt16      cBitSrc1, cBitSrc2;


    /// Setup some nifty stuff
    hsAssert( ( srcBMap->GetCurrWidth() & 3 ) == 0, "Bitmap width must be multiple of 4" );
    hsAssert( ( srcBMap->GetCurrHeight() & 3 ) == 0, "Bitmap height must be multiple of 4" );
    numBlocks = ( srcBMap->GetCurrWidth() * srcBMap->GetCurrHeight() ) >> 4;

    blockSize = srcBMap->fDirectXInfo.fBlockSize >> 1; // In 16-bit words
    srcData = (UInt16 *)srcBMap->GetCurrLevelPtr();
    // Note our trick here to make sure nothing breaks if GetAddr32's 
    // formula changes
    bMapStride = (UInt32)( destBMap->GetAddr32( 0, 1 ) - destBMap->GetAddr32( 0, 0 ) );
    x = y = 0;


    /// Loop through the # of blocks (width*height / 16-pixel-blocks)
    for( i = 0; i < numBlocks; i++ )
    {
        /// Per block--determine alpha compression type first
        bytePtr = (UInt8 *)srcData;
        alphas[ 0 ] = bytePtr[ 0 ];
        alphas[ 1 ] = bytePtr[ 1 ];

        /// Note that we use the preshifted alphas really as fixed point.
        /// The result: more accuracy, and no need to shift the alphas afterwards
        if( alphas[ 0 ] > alphas[ 1 ] )
        {
            /// 8-alpha block: interpolate 6 others
/*          //// Here's the old code, for reference ////
            alphas[ 2 ] = ( 6 * alphas[ 0 ] +     alphas[ 1 ] ) / 7;
            alphas[ 3 ] = ( 5 * alphas[ 0 ] + 2 * alphas[ 1 ] ) / 7;
            alphas[ 4 ] = ( 4 * alphas[ 0 ] + 3 * alphas[ 1 ] ) / 7;
            alphas[ 5 ] = ( 3 * alphas[ 0 ] + 4 * alphas[ 1 ] ) / 7;
            alphas[ 6 ] = ( 2 * alphas[ 0 ] + 5 * alphas[ 1 ] ) / 7;
            alphas[ 7 ] = (     alphas[ 0 ] + 6 * alphas[ 1 ] ) / 7;
*/
            alphas[ 0 ] <<= 24;
            alphas[ 1 ] <<= 24;

            /// Note that, unlike below, we can't combine a0 and a1 into
            /// one value, because that would give us a negative value,
            /// and we're using unsigned values here. (i.e. we need all the bits)
            aTemp = alphas[ 0 ];
            a0 = ( aTemp / 7 ) & 0xff000000;
            a1 = ( alphas[ 1 ] / 7 ) & 0xff000000;          
            for( j = 2; j < 8; j++ )
            {
                aTemp += a1 - a0;
                alphas[ j ] = aTemp;
            }
        }
        else
        {
            /// 6-alpha block: interpolate 4 others, then assume last 2 are 0 and 255
/*          //// Here's the old code, for reference ////
            alphas[ 2 ] = ( 4 * alphas[ 0 ] +     alphas[ 1 ] ) / 5;
            alphas[ 3 ] = ( 3 * alphas[ 0 ] + 2 * alphas[ 1 ] ) / 5;
            alphas[ 4 ] = ( 2 * alphas[ 0 ] + 3 * alphas[ 1 ] ) / 5;
            alphas[ 5 ] = (     alphas[ 0 ] + 4 * alphas[ 1 ] ) / 5;
*/
            alphas[ 0 ] <<= 24;
            alphas[ 1 ] <<= 24;

            aTemp = alphas[ 0 ];
            a0 = ( alphas[ 1 ] - aTemp ) / 5;
            for( j = 2; j < 6; j++ )
            {
                aTemp += a0;
                alphas[ j ] = aTemp & 0xff000000;
            }

            alphas[ 6 ] = 0;
            alphas[ 7 ] = 255 << 24;
        }

        /// Now do the 16 pixels in 2 blocks, decompressing 3-bit lookups
        aBitSrc1 = ( (UInt32)bytePtr[ 4 ] << 16 ) + 
                    ( (UInt32)bytePtr[ 3 ] << 8 ) + 
                    ( (UInt32)bytePtr[ 2 ] );
        aBitSrc2 = ( (UInt32)bytePtr[ 7 ] << 16 ) + 
                    ( (UInt32)bytePtr[ 6 ] << 8 ) + 
                    ( (UInt32)bytePtr[ 5 ] );

        /// Now decompress color data
        srcData += 4;       // Alpha was 4 16-bit words worth
        //hsAssert( srcData[ 0 ] > srcData[ 1 ], "Invalid block compression for DX5 method" );  /// If not, then it's one-bit
                                                /// alpha, but this is DX5!
        colors[ 0 ] = IRGB16To32Bit( srcData[ 0 ] );
        colors[ 1 ] = IRGB16To32Bit( srcData[ 1 ] );
        colors[ 2 ] = IMixTwoThirdsRGB32( colors[ 0 ], colors[ 1 ] );
        colors[ 3 ] = IMixTwoThirdsRGB32( colors[ 1 ], colors[ 0 ] );
        
        cBitSrc1 = srcData[ 2 ];
        cBitSrc2 = srcData[ 3 ];

#ifdef HS_BUILD_FOR_MAC
        cBitSrc1 = ( cBitSrc1 >> 8 ) | ( ( cBitSrc1 & 0xff ) << 8 );
        cBitSrc2 = ( cBitSrc2 >> 8 ) | ( ( cBitSrc2 & 0xff ) << 8 );
#endif
        
        for( j = 0; j < 8; j++ )
        {
            destBlock[ j ] = alphas[ aBitSrc1 & 0x07 ] | colors[ cBitSrc1 & 0x03 ];
            aBitSrc1 >>= 3;
            cBitSrc1 >>= 2;
            destBlock[ j + 8 ] = alphas[ aBitSrc2 & 0x07 ] | colors[ cBitSrc2 & 0x03 ];
            aBitSrc2 >>= 3;
            cBitSrc2 >>= 2;
#ifdef HS_BUILD_FOR_MAC
            destBlock[ j ] = ISwapDwordOrder( destBlock[ j ] );
            destBlock[ j + 8 ] = ISwapDwordOrder( destBlock[ j + 8 ] );
#endif
        }
        
        /// Now copy the block to the destination bitmap
        /// (Trust me, this is actually *faster* than memcpy for some reason
        destData = destBMap->GetAddr32( x, y );
        destData[ 0 ] = destBlock[ 0 ];
        destData[ 1 ] = destBlock[ 1 ];
        destData[ 2 ] = destBlock[ 2 ];
        destData[ 3 ] = destBlock[ 3 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 4 ];
        destData[ 1 ] = destBlock[ 5 ];
        destData[ 2 ] = destBlock[ 6 ];
        destData[ 3 ] = destBlock[ 7 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 8 ];
        destData[ 1 ] = destBlock[ 9 ];
        destData[ 2 ] = destBlock[ 10 ];
        destData[ 3 ] = destBlock[ 11 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 12 ];
        destData[ 1 ] = destBlock[ 13 ];
        destData[ 2 ] = destBlock[ 14 ];
        destData[ 3 ] = destBlock[ 15 ];

        /// Increment and loop!
        srcData += blockSize - 4;       /// JUUUST in case our block size is diff
        x += 4;
        if( x == srcBMap->GetCurrWidth() )
        {
            x = 0;
            y += 4;
        }
    }
}

//// IUncompressMipmapDXT5ToAInten ////////////////////////////////////////////
//
//  UncompressBitmap internal call for DXT5 compression. DXT5 is 3-bit linear
//  interpolated alpha channel compression. Output is a 16-bit Alpha-intensity
//  map.

void    hsDXTSoftwareCodec::IUncompressMipmapDXT5ToAInten( plMipmap *destBMap, plMipmap *srcBMap )
{
    UInt16      *srcData;
    UInt16      *destData, destBlock[ 16 ];
    UInt32      blockSize;
    UInt32      x, y, bMapStride;
    UInt8       colors[ 4 ];
    Int32       numBlocks, i, j;
    UInt8       *bytePtr;
    UInt16      alphas[ 8 ], aTemp, a0, a1;

    UInt32      aBitSrc1, aBitSrc2;
    UInt16      cBitSrc1, cBitSrc2;


    /// Setup some nifty stuff
    hsAssert( ( srcBMap->GetCurrWidth() & 3 ) == 0, "Bitmap width must be multiple of 4" );
    hsAssert( ( srcBMap->GetCurrHeight() & 3 ) == 0, "Bitmap height must be multiple of 4" );
    numBlocks = ( srcBMap->GetCurrWidth() * srcBMap->GetCurrHeight() ) >> 4;

    blockSize = srcBMap->fDirectXInfo.fBlockSize >> 1; // In 16-bit words
    srcData = (UInt16 *)srcBMap->GetCurrLevelPtr();
    // Note our trick here to make sure nothing breaks if GetAddr32's 
    // formula changes
    bMapStride = (UInt32)( destBMap->GetAddr16( 0, 1 ) - destBMap->GetAddr16( 0, 0 ) );
    x = y = 0;


    /// Loop through the # of blocks (width*height / 16-pixel-blocks)
    for( i = 0; i < numBlocks; i++ )
    {
        /// Per block--determine alpha compression type first
        bytePtr = (UInt8 *)srcData;
        alphas[ 0 ] = bytePtr[ 0 ];
        alphas[ 1 ] = bytePtr[ 1 ];

        /// Note that we use the preshifted alphas really as fixed point.
        /// The result: more accuracy, and no need to shift the alphas afterwards
        if( alphas[ 0 ] > alphas[ 1 ] )
        {
            /// 8-alpha block: interpolate 6 others
            alphas[ 0 ] <<= 8;
            alphas[ 1 ] <<= 8;

            /// Note that, unlike below, we can't combine a0 and a1 into
            /// one value, because that would give us a negative value,
            /// and we're using unsigned values here. (i.e. we need all the bits)
            aTemp = alphas[ 0 ];
            a0 = ( aTemp / 7 );
            a1 = ( alphas[ 1 ] / 7 );           
            for( j = 2; j < 8; j++ )
            {
                aTemp += a1 - a0;
                alphas[ j ] = aTemp & 0xff00;   /// Mask done here to retain as
                                                /// much accuracy as possible
            }
        }
        else
        {
            /// 6-alpha block: interpolate 4 others, then assume last 2 are 0 and 255
            alphas[ 0 ] <<= 8;
            alphas[ 1 ] <<= 8;

            aTemp = alphas[ 0 ];
            a0 = ( alphas[ 1 ] - aTemp ) / 5;
            for( j = 2; j < 6; j++ )
            {
                aTemp += a0;
                alphas[ j ] = aTemp & 0xff00;   /// Mask done here to retain as
                                                /// much accuracy as possible
            }

            alphas[ 6 ] = 0;
            alphas[ 7 ] = 0xff00;
        }

        /// Now do the 16 pixels in 2 blocks, decompressing 3-bit lookups
        aBitSrc1 = ( (UInt32)bytePtr[ 4 ] << 16 ) + 
                    ( (UInt32)bytePtr[ 3 ] << 8 ) + 
                    ( (UInt32)bytePtr[ 2 ] );
        aBitSrc2 = ( (UInt32)bytePtr[ 7 ] << 16 ) + 
                    ( (UInt32)bytePtr[ 6 ] << 8 ) + 
                    ( (UInt32)bytePtr[ 5 ] );

        /// Now decompress color data
        srcData += 4;       // Alpha was 4 16-bit words worth
        colors[ 0 ] = (UInt8)IRGB16To32Bit( srcData[ 0 ] );
        colors[ 1 ] = (UInt8)IRGB16To32Bit( srcData[ 1 ] );
        colors[ 2 ] = IMixTwoThirdsInten( colors[ 0 ], colors[ 1 ] );
        colors[ 3 ] = IMixTwoThirdsInten( colors[ 1 ], colors[ 0 ] );
        
        cBitSrc1 = srcData[ 2 ];
        cBitSrc2 = srcData[ 3 ];

#ifdef HS_BUILD_FOR_MAC
        cBitSrc1 = ( cBitSrc1 >> 8 ) | ( ( cBitSrc1 & 0xff ) << 8 );
        cBitSrc2 = ( cBitSrc2 >> 8 ) | ( ( cBitSrc2 & 0xff ) << 8 );
#endif
        
        for( j = 0; j < 8; j++ )
        {
            destBlock[ j ] = alphas[ aBitSrc1 & 0x07 ] | (UInt16)colors[ cBitSrc1 & 0x03 ];
            aBitSrc1 >>= 3;
            cBitSrc1 >>= 2;
            destBlock[ j + 8 ] = alphas[ aBitSrc2 & 0x07 ] | (UInt16)colors[ cBitSrc2 & 0x03 ];
            aBitSrc2 >>= 3;
            cBitSrc2 >>= 2;
#ifdef HS_BUILD_FOR_MAC
            destBlock[ j ] = ISwapWordOrder( destBlock[ j ] );
            destBlock[ j + 8 ] = ISwapWordOrder( destBlock[ j + 8 ] );
#endif
        }
        
        /// Now copy the block to the destination bitmap
        /// (Trust me, this is actually *faster* than memcpy for some reason
        destData = destBMap->GetAddr16( x, y );
        destData[ 0 ] = destBlock[ 0 ];
        destData[ 1 ] = destBlock[ 1 ];
        destData[ 2 ] = destBlock[ 2 ];
        destData[ 3 ] = destBlock[ 3 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 4 ];
        destData[ 1 ] = destBlock[ 5 ];
        destData[ 2 ] = destBlock[ 6 ];
        destData[ 3 ] = destBlock[ 7 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 8 ];
        destData[ 1 ] = destBlock[ 9 ];
        destData[ 2 ] = destBlock[ 10 ];
        destData[ 3 ] = destBlock[ 11 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 12 ];
        destData[ 1 ] = destBlock[ 13 ];
        destData[ 2 ] = destBlock[ 14 ];
        destData[ 3 ] = destBlock[ 15 ];

        /// Increment and loop!
        srcData += blockSize - 4;       /// JUUUST in case our block size is diff
        x += 4;
        if( x == srcBMap->GetCurrWidth() )
        {
            x = 0;
            y += 4;
        }
    }
}

//// IUncompressMipmapDXT1To16 ////////////////////////////////////////////////
//
//  UncompressBitmap internal call for DXT1 compression. DXT1 is a simple on/off
//  or all-on alpha 'compression'.
//
//  Note: this version decompresses to a 1-5-5-5 ARGB format.

void    hsDXTSoftwareCodec::IUncompressMipmapDXT1To16( plMipmap *destBMap, plMipmap *srcBMap )
{
    UInt16      *srcData, tempW1, tempW2;
    UInt16      *destData, destBlock[ 16 ];
    UInt32      blockSize;
    UInt32      bitSource, bitSource2, x, y, bMapStride;
    UInt16      colors[ 4 ];
    Int32       numBlocks, i, j;


    /// Setup some nifty stuff
    hsAssert( ( srcBMap->GetCurrWidth() & 3 ) == 0, "Bitmap width must be multiple of 4" );
    hsAssert( ( srcBMap->GetCurrHeight() & 3 ) == 0, "Bitmap height must be multiple of 4" );
    numBlocks = ( srcBMap->GetCurrWidth() * srcBMap->GetCurrHeight() ) >> 4;

    blockSize = srcBMap->fDirectXInfo.fBlockSize >> 1; // In 16-bit words
    srcData = (UInt16 *)srcBMap->GetCurrLevelPtr();
    // Note our trick here to make sure nothing breaks if GetAddr32's 
    // formula changes
    bMapStride = (UInt32)( destBMap->GetAddr16( 0, 1 ) - destBMap->GetAddr16( 0, 0 ) );
    x = y = 0;


    /// Loop through the # of blocks (width*height / 16-pixel-blocks)
    for( i = 0; i < numBlocks; i++ )
    {
        /// Decompress color data block
        colors[ 0 ] = IRGB565To1555( srcData[ 0 ] ) | 0x8000;       // Make sure alpha is set
        colors[ 1 ] = IRGB565To1555( srcData[ 1 ] ) | 0x8000;       // Make sure alpha is set

#ifdef HS_BUILD_FOR_MAC
        tempW1 = ISwapWordOrder( srcData[ 0 ] );
        tempW2 = ISwapWordOrder( srcData[ 1 ] );
#else 
        tempW1 = srcData[ 0 ]; 
        tempW2 = srcData[ 1 ];
#endif      
        if( tempW1 > tempW2 )
        {
            /// Four-color block--mix the other two
            colors[ 2 ] = IMixTwoThirdsRGB1555( colors[ 0 ], colors[ 1 ] ) | 0x8000;//IMixTwoThirdsRGB32( colors[ 0 ], colors[ 1 ] ) | 0xff000000;
            colors[ 3 ] = IMixTwoThirdsRGB1555( colors[ 1 ], colors[ 0 ] ) | 0x8000;//IMixTwoThirdsRGB32( colors[ 1 ], colors[ 0 ] ) | 0xff000000;
        }
        else
        {
            /// Three-color block and transparent
            colors[ 2 ] = IMixEqualRGB1555( colors[ 0 ], colors[ 1 ] ) | 0x8000;//IMixEqualRGB32( colors[ 0 ], colors[ 1 ] ) | 0xff000000;
            colors[ 3 ] = 0;
        }

        bitSource = srcData[ 2 ];
        bitSource2 = srcData[ 3 ];

#ifdef HS_BUILD_FOR_MAC
        bitSource = ( bitSource >> 8 ) | ( ( bitSource & 0xff ) << 8 );
        bitSource2 = ( bitSource2 >> 8 ) | ( ( bitSource2 & 0xff ) << 8 );      
#endif

        for( j = 0; j < 8; j++ )
        {
            destBlock[ j ] = colors[ bitSource & 0x03 ];
            bitSource >>= 2;
            destBlock[ j + 8 ] = colors[ bitSource2 & 0x03 ];
            bitSource2 >>= 2;
#ifdef HS_BUILD_FOR_MAC
            destBlock[ j ] = ISwapWordOrder( destBlock[ j ] );
            destBlock[ j + 8 ] = ISwapWordOrder( destBlock[ j + 8 ] );
#endif
        }
        
        /// Now copy the block to the destination bitmap
        /// (Trust me, this is actually *faster* than memcpy for some reason
        destData = destBMap->GetAddr16( x, y );
        destData[ 0 ] = destBlock[ 0 ];
        destData[ 1 ] = destBlock[ 1 ];
        destData[ 2 ] = destBlock[ 2 ];
        destData[ 3 ] = destBlock[ 3 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 4 ];
        destData[ 1 ] = destBlock[ 5 ];
        destData[ 2 ] = destBlock[ 6 ];
        destData[ 3 ] = destBlock[ 7 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 8 ];
        destData[ 1 ] = destBlock[ 9 ];
        destData[ 2 ] = destBlock[ 10 ];
        destData[ 3 ] = destBlock[ 11 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 12 ];
        destData[ 1 ] = destBlock[ 13 ];
        destData[ 2 ] = destBlock[ 14 ];
        destData[ 3 ] = destBlock[ 15 ];

        /// Increment and loop!
        srcData += blockSize;
        x += 4;
        if( x == srcBMap->GetCurrWidth() )
        {
            x = 0;
            y += 4;
        }
    }
}

//// IUncompressMipmapDXT1To16Weird ///////////////////////////////////////////
//
//  UncompressBitmap internal call for DXT1 compression. DXT1 is a simple on/off
//  or all-on alpha 'compression'.
//
//  Note: this version decompresses to a 5-5-5-1 RGBA format.

void    hsDXTSoftwareCodec::IUncompressMipmapDXT1To16Weird( plMipmap *destBMap, 
                                                   plMipmap *srcBMap )
{
    UInt16      *srcData, tempW1, tempW2;
    UInt16      *destData, destBlock[ 16 ];
    UInt32      blockSize;
    UInt32      bitSource, bitSource2, x, y, bMapStride;
    UInt16      colors[ 4 ];
    Int32       numBlocks, i, j;


    /// Setup some nifty stuff
    hsAssert( ( srcBMap->GetCurrWidth() & 3 ) == 0, "Bitmap width must be multiple of 4" );
    hsAssert( ( srcBMap->GetCurrHeight() & 3 ) == 0, "Bitmap height must be multiple of 4" );
    numBlocks = ( srcBMap->GetCurrWidth() * srcBMap->GetCurrHeight() ) >> 4;

    blockSize = srcBMap->fDirectXInfo.fBlockSize >> 1; // In 16-bit words
    srcData = (UInt16 *)srcBMap->GetCurrLevelPtr();
    // Note our trick here to make sure nothing breaks if GetAddr32's 
    // formula changes
    bMapStride = (UInt32)( destBMap->GetAddr16( 0, 1 ) - destBMap->GetAddr16( 0, 0 ) );
    x = y = 0;


    /// Loop through the # of blocks (width*height / 16-pixel-blocks)
    for( i = 0; i < numBlocks; i++ )
    {
        /// Decompress color data block
        colors[ 0 ] = IRGB565To5551( srcData[ 0 ] ) | 0x0001;       // Make sure alpha is set
        colors[ 1 ] = IRGB565To5551( srcData[ 1 ] ) | 0x0001;       // Make sure alpha is set

#ifdef HS_BUILD_FOR_MAC
        tempW1 = ISwapWordOrder( srcData[ 0 ] );
        tempW2 = ISwapWordOrder( srcData[ 1 ] );
#else 
        tempW1 = srcData[ 0 ]; 
        tempW2 = srcData[ 1 ];
#endif      
        if( tempW1 > tempW2 )
        {
            /// Four-color block--mix the other two
            colors[ 2 ] = IMixTwoThirdsRGB5551( colors[ 0 ], colors[ 1 ] ) | 0x0001;
            colors[ 3 ] = IMixTwoThirdsRGB5551( colors[ 1 ], colors[ 0 ] ) | 0x0001;
        }
        else
        {
            /// Three-color block and transparent
            colors[ 2 ] = IMixEqualRGB5551( colors[ 0 ], colors[ 1 ] ) | 0x0001;
            colors[ 3 ] = 0;
        }

        bitSource = srcData[ 2 ];
        bitSource2 = srcData[ 3 ];

#ifdef HS_BUILD_FOR_MAC
        bitSource = ( bitSource >> 8 ) | ( ( bitSource & 0xff ) << 8 );
        bitSource2 = ( bitSource2 >> 8 ) | ( ( bitSource2 & 0xff ) << 8 );      
#endif

        for( j = 0; j < 8; j++ )
        {
            destBlock[ j ] = colors[ bitSource & 0x03 ];
            bitSource >>= 2;
            destBlock[ j + 8 ] = colors[ bitSource2 & 0x03 ];
            bitSource2 >>= 2;
#ifdef HS_BUILD_FOR_MAC
            destBlock[ j ] = ISwapWordOrder( destBlock[ j ] );
            destBlock[ j + 8 ] = ISwapWordOrder( destBlock[ j + 8 ] );
#endif
        }
        
        /// Now copy the block to the destination bitmap
        /// (Trust me, this is actually *faster* than memcpy for some reason
        destData = destBMap->GetAddr16( x, y );
        destData[ 0 ] = destBlock[ 0 ];
        destData[ 1 ] = destBlock[ 1 ];
        destData[ 2 ] = destBlock[ 2 ];
        destData[ 3 ] = destBlock[ 3 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 4 ];
        destData[ 1 ] = destBlock[ 5 ];
        destData[ 2 ] = destBlock[ 6 ];
        destData[ 3 ] = destBlock[ 7 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 8 ];
        destData[ 1 ] = destBlock[ 9 ];
        destData[ 2 ] = destBlock[ 10 ];
        destData[ 3 ] = destBlock[ 11 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 12 ];
        destData[ 1 ] = destBlock[ 13 ];
        destData[ 2 ] = destBlock[ 14 ];
        destData[ 3 ] = destBlock[ 15 ];

        /// Increment and loop!
        srcData += blockSize;
        x += 4;
        if( x == srcBMap->GetCurrWidth() )
        {
            x = 0;
            y += 4;
        }
    }
}

//// IUncompressMipmapDXT1To32 ////////////////////////////////////////////////
//
//  UncompressBitmap internal call for DXT1 compression. DXT1 is a simple on/off
//  or all-on alpha 'compression'. Output is a 32-bit ARGB 8888 bitmap.
//
//  7.31.2000 - M.Burrack - Created, based on old code (uncredited)

void    hsDXTSoftwareCodec::IUncompressMipmapDXT1To32( plMipmap *destBMap, 
                                                   plMipmap *srcBMap )
{
    UInt16      *srcData, tempW1, tempW2;
    UInt32      *destData, destBlock[ 16 ];
    UInt32      blockSize;
    UInt32      bitSource, bitSource2, x, y, bMapStride;
    UInt32      colors[ 4 ];
    Int32       numBlocks, i, j;


    /// Setup some nifty stuff
    hsAssert( ( srcBMap->GetCurrWidth() & 3 ) == 0, "Bitmap width must be multiple of 4" );
    hsAssert( ( srcBMap->GetCurrHeight() & 3 ) == 0, "Bitmap height must be multiple of 4" );
    numBlocks = ( srcBMap->GetCurrWidth() * srcBMap->GetCurrHeight() ) >> 4;

    blockSize = srcBMap->fDirectXInfo.fBlockSize >> 1; // In 16-bit words
    srcData = (UInt16 *)srcBMap->GetCurrLevelPtr();
    // Note our trick here to make sure nothing breaks if GetAddr32's 
    // formula changes
    bMapStride = (UInt32)( destBMap->GetAddr32( 0, 1 ) - destBMap->GetAddr32( 0, 0 ) );
    x = y = 0;


    /// Loop through the # of blocks (width*height / 16-pixel-blocks)
    for( i = 0; i < numBlocks; i++ )
    {
        /// Decompress color data block
        colors[ 0 ] = IRGB16To32Bit( srcData[ 0 ] ) | 0xff000000;
        colors[ 1 ] = IRGB16To32Bit( srcData[ 1 ] ) | 0xff000000;

#ifdef HS_BUILD_FOR_MAC
        tempW1 = ISwapWordOrder( srcData[ 0 ] );
        tempW2 = ISwapWordOrder( srcData[ 1 ] );
#else 
        tempW1 = srcData[ 0 ]; 
        tempW2 = srcData[ 1 ];
#endif      
        if( tempW1 > tempW2 )
        {
            /// Four-color block--mix the other two
            colors[ 2 ] = IMixTwoThirdsRGB32( colors[ 0 ], colors[ 1 ] ) | 0xff000000;
            colors[ 3 ] = IMixTwoThirdsRGB32( colors[ 1 ], colors[ 0 ] ) | 0xff000000;
        }
        else
        {
            /// Three-color block and transparent
            colors[ 2 ] = IMixEqualRGB32( colors[ 0 ], colors[ 1 ] ) | 0xff000000;
            colors[ 3 ] = 0;
        }

        bitSource = srcData[ 2 ];
        bitSource2 = srcData[ 3 ];

#ifdef HS_BUILD_FOR_MAC
        bitSource = ( bitSource >> 8 ) | ( ( bitSource & 0xff ) << 8 );
        bitSource2 = ( bitSource2 >> 8 ) | ( ( bitSource2 & 0xff ) << 8 );      
#endif

        for( j = 0; j < 8; j++ )
        {
            destBlock[ j ] = colors[ bitSource & 0x03 ];
            bitSource >>= 2;
            destBlock[ j + 8 ] = colors[ bitSource2 & 0x03 ];
            bitSource2 >>= 2;
#ifdef HS_BUILD_FOR_MAC
            destBlock[ j ] = ISwapDwordOrder( destBlock[ j ] );
            destBlock[ j + 8 ] = ISwapDwordOrder( destBlock[ j + 8 ] );
#endif
        }
        
        /// Now copy the block to the destination bitmap
        /// (Trust me, this is actually *faster* than memcpy for some reason
        destData = destBMap->GetAddr32( x, y );
        destData[ 0 ] = destBlock[ 0 ];
        destData[ 1 ] = destBlock[ 1 ];
        destData[ 2 ] = destBlock[ 2 ];
        destData[ 3 ] = destBlock[ 3 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 4 ];
        destData[ 1 ] = destBlock[ 5 ];
        destData[ 2 ] = destBlock[ 6 ];
        destData[ 3 ] = destBlock[ 7 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 8 ];
        destData[ 1 ] = destBlock[ 9 ];
        destData[ 2 ] = destBlock[ 10 ];
        destData[ 3 ] = destBlock[ 11 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 12 ];
        destData[ 1 ] = destBlock[ 13 ];
        destData[ 2 ] = destBlock[ 14 ];
        destData[ 3 ] = destBlock[ 15 ];

        /// Increment and loop!
        srcData += blockSize;
        x += 4;
        if( x == srcBMap->GetCurrWidth() )
        {
            x = 0;
            y += 4;
        }
    }
}

//// IUncompressMipmapDXT1ToInten /////////////////////////////////////////////
//
//  UncompressBitmap internal call for DXT1 compression. DXT1 is a simple on/off
//  or all-on alpha 'compression'. Output is an 8-bit intensity bitmap, 
//  constructed from the blue-color channel of the DXT1 output.

void    hsDXTSoftwareCodec::IUncompressMipmapDXT1ToInten( plMipmap *destBMap, 
                                                   plMipmap *srcBMap )
{
    UInt16      *srcData, tempW1, tempW2;
    UInt8       *destData, destBlock[ 16 ];
    UInt32      blockSize;
    UInt32      bitSource, bitSource2, x, y, bMapStride;
    UInt8       colors[ 4 ];
    Int32       numBlocks, i, j;


    /// Setup some nifty stuff
    hsAssert( ( srcBMap->GetCurrWidth() & 3 ) == 0, "Bitmap width must be multiple of 4" );
    hsAssert( ( srcBMap->GetCurrHeight() & 3 ) == 0, "Bitmap height must be multiple of 4" );
    numBlocks = ( srcBMap->GetCurrWidth() * srcBMap->GetCurrHeight() ) >> 4;

    blockSize = srcBMap->fDirectXInfo.fBlockSize >> 1; // In 16-bit words
    srcData = (UInt16 *)srcBMap->GetCurrLevelPtr();
    // Note our trick here to make sure nothing breaks if GetAddr8's 
    // formula changes
    bMapStride = (UInt32)( destBMap->GetAddr8( 0, 1 ) - destBMap->GetAddr8( 0, 0 ) );
    x = y = 0;

    /// Loop through the # of blocks (width*height / 16-pixel-blocks)
    for( i = 0; i < numBlocks; i++ )
    {
        /// Decompress color data block (cast will automatically truncate to blue)
        colors[ 0 ] = (UInt8)IRGB16To32Bit( srcData[ 0 ] );
        colors[ 1 ] = (UInt8)IRGB16To32Bit( srcData[ 1 ] );

#ifdef HS_BUILD_FOR_MAC
        tempW1 = ISwapWordOrder( srcData[ 0 ] );
        tempW2 = ISwapWordOrder( srcData[ 1 ] );
#else 
        tempW1 = srcData[ 0 ]; 
        tempW2 = srcData[ 1 ];
#endif      
        if( tempW1 > tempW2 )
        {
            /// Four-color block--mix the other two
            colors[ 2 ] = IMixTwoThirdsInten( colors[ 0 ], colors[ 1 ] );
            colors[ 3 ] = IMixTwoThirdsInten( colors[ 1 ], colors[ 0 ] );
        }
        else
        {
            /// Three-color block and transparent
            colors[ 2 ] = IMixEqualInten( colors[ 0 ], colors[ 1 ] );
            colors[ 3 ] = 0;
        }

        bitSource = srcData[ 2 ];
        bitSource2 = srcData[ 3 ];

#ifdef HS_BUILD_FOR_MAC
        bitSource = ( bitSource >> 8 ) | ( ( bitSource & 0xff ) << 8 );
        bitSource2 = ( bitSource2 >> 8 ) | ( ( bitSource2 & 0xff ) << 8 );      
#endif

        for( j = 0; j < 8; j++ )
        {
            destBlock[ j ] = colors[ bitSource & 0x03 ];
            bitSource >>= 2;
            destBlock[ j + 8 ] = colors[ bitSource2 & 0x03 ];
            bitSource2 >>= 2;
        }
        
        /// Now copy the block to the destination bitmap
        /// (Trust me, this is actually *faster* than memcpy for some reason
        destData = destBMap->GetAddr8( x, y );
        destData[ 0 ] = destBlock[ 0 ];
        destData[ 1 ] = destBlock[ 1 ];
        destData[ 2 ] = destBlock[ 2 ];
        destData[ 3 ] = destBlock[ 3 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 4 ];
        destData[ 1 ] = destBlock[ 5 ];
        destData[ 2 ] = destBlock[ 6 ];
        destData[ 3 ] = destBlock[ 7 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 8 ];
        destData[ 1 ] = destBlock[ 9 ];
        destData[ 2 ] = destBlock[ 10 ];
        destData[ 3 ] = destBlock[ 11 ];
        destData += bMapStride;
        destData[ 0 ] = destBlock[ 12 ];
        destData[ 1 ] = destBlock[ 13 ];
        destData[ 2 ] = destBlock[ 14 ];
        destData[ 3 ] = destBlock[ 15 ];

        /// Increment and loop!
        srcData += blockSize;
        x += 4;
        if( x == srcBMap->GetCurrWidth() )
        {
            x = 0;
            y += 4;
        }
    }
}

//// ISwapDwordOrder //////////////////////////////////////////////////////////

UInt32  hsDXTSoftwareCodec::ISwapDwordOrder( UInt32 color )
{
    UInt8   a, r, g, b;


    a = (UInt8)( color >> 24 );
    r = (UInt8)( color >> 16 ) & 0xff;
    g = (UInt8)( color >> 8 ) & 0xff;
    b = (UInt8)( color & 0xff );
    return( ( b << 24 ) | ( g << 16 ) | ( r << 8 ) | a );
}

//// ISwapWordOrder ///////////////////////////////////////////////////////////

UInt16  hsDXTSoftwareCodec::ISwapWordOrder( UInt16 color )
{
    return( ( color >> 8 ) | ( color << 8 ) );
}

//// IRGB16To32Bit ////////////////////////////////////////////////////////////
//
//  Converts a RGB565 16-bit color into a RGB888 32-bit color. Alpha (upper 8
//  bits) is 0. Will be optimized LATER.
//

UInt32  hsDXTSoftwareCodec::IRGB16To32Bit( UInt16 color )
{
    UInt32      r, g, b;

#ifdef HS_BUILD_FOR_MAC
    color = ( ( color >> 8 ) | ( ( color & 0xff ) << 8 ) );
#endif
    
    b = ( color & 31 ) << 3;
    color >>= 5;

    g = ( color & 63 ) << ( 2 + 8 );
    color >>= 6;
    
    r = ( color & 31 ) << ( 3 + 16 );

    return( r + g + b );
}

//// IRGB565To4444 ////////////////////////////////////////////////////////////
//
//  Converts a RGB565 16-bit color into a RGB4444 16-bit color. Alpha (upper 8
//  bits) is 0.
//

UInt16  hsDXTSoftwareCodec::IRGB565To4444( UInt16 color )
{
    UInt16      r, g, b;

#ifdef HS_BUILD_FOR_MAC
    color = ISwapWordOrder( color );
#endif
    
    b = ( color & 31 ) >> 1;
    color >>= 5;

    g = ( color & 63 ) >> 2;
    color >>= 6;
    g <<= 4;
    
    r = ( color & 31 ) >> 1;
    r <<= 8;

    return( r + g + b );
}

//// IRGB565To4444Rev /////////////////////////////////////////////////////////
//
//  Converts a RGB565 16-bit color into a RGB4444 16-bit color. Alpha (upper 8
//  bits) is 0. This is the OpenGL-friendly version (B and R are swapped)
//

UInt16  hsDXTSoftwareCodec::IRGB565To4444Rev( UInt16 color )
{
    UInt16      r, g, b;

#ifdef HS_BUILD_FOR_MAC
    color = ISwapWordOrder( color );
#endif
    
    r = ( color & 31 ) >> 1;
    color >>= 5;
    r <<= 8;

    g = ( color & 63 ) >> 2;
    color >>= 6;
    g <<= 4;
    
    b = ( color & 31 ) >> 1;

    return( r + g + b );
}

//// IRGB565To1555 ////////////////////////////////////////////////////////////
//
//  Converts a RGB565 16-bit color into a RGB1555 16-bit color. Alpha (upper 1
//  bit) is 0.
//

UInt16  hsDXTSoftwareCodec::IRGB565To1555( UInt16 color )
{
    UInt16      r, g, b;

#ifdef HS_BUILD_FOR_MAC
    color = ISwapWordOrder( color );
#endif
    
    b = ( color & 31 );
    color >>= 5;

    g = ( color & 63 ) >> 1;
    g <<= 5;
    color >>= 6;
    
    r = ( color & 31 );
    r <<= 10;

    return( r + g + b );
}

//// IRGB565To5551 ////////////////////////////////////////////////////////////
//
//  Converts a RGB565 16-bit color into a RGB5551 16-bit color. Alpha (lowest 1
//  bit) is 0.
//

UInt16  hsDXTSoftwareCodec::IRGB565To5551( UInt16 color )
{
    UInt16      rg, b;

#ifdef HS_BUILD_FOR_MAC
    color = ISwapWordOrder( color );
#endif
    
    rg = color & 0xffc0;        /// Masks off red and green
    b = ( color & 31 );
    b <<= 1;

    return( rg | b );
}

//// IMixTwoThirdsRGB32 ///////////////////////////////////////////////////////
//
//  Returns a 32-bit RGB888 value resulting from the mixing of 2/3rds of the
//  first parameter and 1/3rd of the second. Will be optimized LATER.
//

UInt32  hsDXTSoftwareCodec::IMixTwoThirdsRGB32( UInt32 twoThirds, UInt32 oneThird )
{
    UInt32  r, g, b;


    r = ( ( twoThirds & 0x00ff0000 ) + ( twoThirds & 0x00ff0000 )
        + ( oneThird & 0x00ff0000 ) ) / 3;
    r &= 0x00ff0000;

    g = ( ( twoThirds & 0x0000ff00 ) + ( twoThirds & 0x0000ff00 )
        + ( oneThird & 0x0000ff00 ) ) / 3;
    g &= 0x0000ff00;

    b = ( ( twoThirds & 0x000000ff ) + ( twoThirds & 0x000000ff )
        + ( oneThird & 0x000000ff ) ) / 3;
    b &= 0x000000ff;

    return( r + g + b );
}

//// IMixTwoThirdsRGB1555 /////////////////////////////////////////////////////
//
//  Returns a 16-bit RGB1555 value resulting from the mixing of 2/3rds of the
//  first parameter and 1/3rd of the second. Alpha is ignored.
//

UInt16  hsDXTSoftwareCodec::IMixTwoThirdsRGB1555( UInt16 twoThirds, UInt16 oneThird )
{
    UInt16  r, g, b;


    r = ( ( twoThirds & 0x7c00 ) + ( twoThirds & 0x7c00 )
        + ( oneThird & 0x7c00 ) ) / 3;
    r &= 0x7c00;

    g = ( ( twoThirds & 0x03e0 ) + ( twoThirds & 0x03e0 )
        + ( oneThird & 0x03e0 ) ) / 3;
    g &= 0x03e0;

    b = ( ( twoThirds & 0x001f ) + ( twoThirds & 0x001f )
        + ( oneThird & 0x001f ) ) / 3;
    b &= 0x001f;

    return( r + g + b );
}

//// IMixTwoThirdsRGB5551 /////////////////////////////////////////////////////
//
//  Returns a 16-bit RGB5551 value resulting from the mixing of 2/3rds of the
//  first parameter and 1/3rd of the second. Alpha is ignored.
//

UInt16  hsDXTSoftwareCodec::IMixTwoThirdsRGB5551( UInt16 twoThirds, UInt16 oneThird )
{
    UInt16  r, g, b;


    r = ( ( twoThirds & 0xf800 ) + ( twoThirds & 0xf800 )
        + ( oneThird & 0xf800 ) ) / 3;
    r &= 0xf800;

    g = ( ( twoThirds & 0x07c0 ) + ( twoThirds & 0x07c0 )
        + ( oneThird & 0x07c0 ) ) / 3;
    g &= 0x07c0;

    b = ( ( twoThirds & 0x003e ) + ( twoThirds & 0x003e )
        + ( oneThird & 0x003e ) ) / 3;
    b &= 0x003e;

    return( r + g + b );
}

//// IMixTwoThirdsRGB4444 /////////////////////////////////////////////////////
//
//  Returns a 16-bit RGB4444 value resulting from the mixing of 2/3rds of the
//  first parameter and 1/3rd of the second. Alpha is ignored.
//

UInt16  hsDXTSoftwareCodec::IMixTwoThirdsRGB4444( UInt16 twoThirds, UInt16 oneThird )
{
    UInt16  r, g, b;


    r = ( ( twoThirds & 0x0f00 ) + ( twoThirds & 0x0f00 )
        + ( oneThird & 0x0f00 ) ) / 3;
    r &= 0x0f00;

    g = ( ( twoThirds & 0x00f0 ) + ( twoThirds & 0x00f0 )
        + ( oneThird & 0x00f0 ) ) / 3;
    g &= 0x00f0;

    b = ( ( twoThirds & 0x000f ) + ( twoThirds & 0x000f )
        + ( oneThird & 0x000f ) ) / 3;
    b &= 0x000f;

    return( r + g + b );
}

//// IMixTwoThirdsInten ///////////////////////////////////////////////////////
//
//  Returns an 8-bit intensity value resulting from the mixing of 2/3rds of the
//  first parameter and 1/3rd of the second.
//

UInt8   hsDXTSoftwareCodec::IMixTwoThirdsInten( UInt8 twoThirds, UInt8 oneThird )
{
    return( ( twoThirds + twoThirds + oneThird ) / 3 );
}

//// IMixEqualRGB32 ///////////////////////////////////////////////////////////
//
//  Returns a 32-bit RGB888 value resulting from the mixing of equal parts of
//  the two colors given.
//

UInt32  hsDXTSoftwareCodec::IMixEqualRGB32( UInt32 color1, UInt32 color2 )
{
    UInt32  r, g, b;


    r = ( ( color1 & 0x00ff0000 ) + ( color2 & 0x00ff0000 ) ) >> 1;
    r &= 0x00ff0000;

    g = ( ( color1 & 0x0000ff00 ) + ( color2 & 0x0000ff00 ) ) >> 1;
    g &= 0x0000ff00;

    b = ( ( color1 & 0x000000ff ) + ( color2 & 0x000000ff ) ) >> 1;
    b &= 0x000000ff;

    return( r + g + b );
}

//// IMixEqualRGB1555 /////////////////////////////////////////////////////////
//
//  Returns a 16-bit RGB1555 value resulting from the mixing of equal parts of
//  the two colors given.
//

UInt16  hsDXTSoftwareCodec::IMixEqualRGB1555( UInt16 color1, UInt16 color2 )
{
    UInt16  r, g, b;


    r = ( ( color1 & 0x7c00 ) + ( color2 & 0x7c00 ) ) >> 1;
    r &= 0x7c00;

    g = ( ( color1 & 0x03e0 ) + ( color2 & 0x03e0 ) ) >> 1;
    g &= 0x03e0;

    b = ( ( color1 & 0x001f ) + ( color2 & 0x001f ) ) >> 1;
    b &= 0x001f;

    return( r + g + b );
}

//// IMixEqualRGB5551 /////////////////////////////////////////////////////////
//
//  Returns a 16-bit RGB5551 value resulting from the mixing of equal parts of
//  the two colors given.
//

UInt16  hsDXTSoftwareCodec::IMixEqualRGB5551( UInt16 color1, UInt16 color2 )
{
    UInt16  r, g, b;


    r = ( ( color1 & 0xf800 ) + ( color2 & 0xf800 ) ) >> 1;
    r &= 0xf800;

    g = ( ( color1 & 0x07c0 ) + ( color2 & 0x07c0 ) ) >> 1;
    g &= 0x07c0;

    b = ( ( color1 & 0x003e ) + ( color2 & 0x003e ) ) >> 1;
    b &= 0x003e;

    return( r + g + b );
}

//// IMixEqualRGB4444 /////////////////////////////////////////////////////////
//
//  Returns a 16-bit RGB4444 value resulting from the mixing of equal parts of
//  the two colors given.
//

UInt16  hsDXTSoftwareCodec::IMixEqualRGB4444( UInt16 color1, UInt16 color2 )
{
    UInt16  r, g, b;


    r = ( ( color1 & 0x0f00 ) + ( color2 & 0x0f00 ) ) >> 1;
    r &= 0x0f00;

    g = ( ( color1 & 0x00f0 ) + ( color2 & 0x00f0 ) ) >> 1;
    g &= 0x00f0;

    b = ( ( color1 & 0x000f ) + ( color2 & 0x000f ) ) >> 1;
    b &= 0x000f;

    return( r + g + b );
}

//// IMixEqualInten ///////////////////////////////////////////////////////////
//
//  Returns an 8-bit intensity value resulting from the mixing of equal parts of
//  the two colors given.
//

UInt8   hsDXTSoftwareCodec::IMixEqualInten( UInt8 color1, UInt8 color2 )
{
    return( ( color1 + color2 ) >> 1 );
}


void hsDXTSoftwareCodec::CompressMipmapLevel( plMipmap *uncompressed, plMipmap *compressed )
{
    UInt32 *compressedImage = (UInt32 *)compressed->GetCurrLevelPtr();
    UInt32 *uncompressedImage = (UInt32 *)uncompressed->GetCurrLevelPtr();
    Int32 x, y;
    Int32 xMax = uncompressed->GetCurrWidth() >> 2;
    Int32 yMax = uncompressed->GetCurrHeight() >> 2;
    for (x = 0; x < xMax; ++x)
    {
        for (y = 0; y < yMax; ++y)
        {
            UInt8 maxAlpha = 0;
            UInt8 minAlpha = 255;
            UInt8 oldMaxAlpha = 0;
            UInt8 oldMinAlpha = 255;
            UInt8 alpha[8];
            Int32 maxDistance = 0;
            hsRGBAColor32 color[4];
            hsBool hasTransparency = false;

            Int32 xx, yy;
            for (xx = 0; xx < 4; ++xx)
            {
                for (yy = 0; yy < 4; ++yy)
                {
                    hsRGBAColor32* pixel = (hsRGBAColor32*)uncompressed->GetAddr32(4 * x + xx, 4 * y + yy);
                    UInt8 pixelAlpha = pixel->a;
                    if (pixelAlpha != 255)
                    {
                        hasTransparency = true;
                    }

                    if (compressed->fDirectXInfo.fCompressionType == plMipmap::DirectXInfo::kDXT5)
                    {
                        if (pixelAlpha > maxAlpha)
                        {
                            maxAlpha = pixelAlpha;
                        }
                        
                        if ((pixelAlpha > oldMaxAlpha) && (pixelAlpha < 255))
                        {
                            oldMaxAlpha = pixelAlpha;
                        }

                        if (pixelAlpha < minAlpha)
                        {
                            minAlpha = pixelAlpha;
                        }

                        if ((pixelAlpha < oldMinAlpha) && (pixelAlpha > 0))
                        {
                            oldMinAlpha = minAlpha;
                        }
                    }
                    
                    Int32 xx2, yy2;
                    for (xx2 = 0; xx2 < 4; ++xx2)
                    {
                        for (yy2 = 0; yy2 < 4; ++yy2)
                        {
                            hsRGBAColor32* pixel1 = (hsRGBAColor32*)uncompressed->GetAddr32(4 * x + xx, 4 * y + yy);
                            hsRGBAColor32* pixel2 = (hsRGBAColor32*)uncompressed->GetAddr32(4 * x + xx2, 4 * y + yy2);
                            
                            Int32 distance = ColorDistanceARGBSquared(*pixel1, *pixel2);
                            if (distance >= maxDistance)
                            {
                                maxDistance = distance;
                                color[0] = *pixel1;
                                color[1] = *pixel2;
                            }
                        } // for yy2
                    } // for xx2
                } // for yy
            } // for xx
            
            if (oldMinAlpha == 255)
            {
                hsAssert(oldMaxAlpha == 0, "Weirdness in oldMaxAlpha hsDXTSoftwareCodec::CompressBitmap.");
                oldMinAlpha = 0;
                oldMaxAlpha = 255;
            }

            if (compressed->fDirectXInfo.fCompressionType == plMipmap::DirectXInfo::kDXT5)
            {
                if ((maxAlpha == 255) && (minAlpha == 0))
                {
                    hsAssert(oldMinAlpha <= oldMaxAlpha, "Min > Max in hsDXTSoftwareCodec::CompressBitmap 1.");
                    alpha[0] = oldMinAlpha;
                    alpha[1] = oldMaxAlpha;
                    alpha[2] = (4 * alpha[0] + alpha[1]) / 5;      // Bit code 010
                    alpha[3] = (3 * alpha[0] + 2 * alpha[1]) / 5;  // Bit code 011    
                    alpha[4] = (2 * alpha[0] + 3 * alpha[1]) / 5;  // Bit code 100    
                    alpha[5] = (alpha[0] + 4 * alpha[1]) / 5;      // Bit code 101
                    alpha[6] = 0;                                // Bit code 110
                    alpha[7] = 255;                              // Bit code 111
                }
                else if (maxAlpha == minAlpha)
                {
                    alpha[0] = minAlpha;
                    alpha[1] = maxAlpha;
                    alpha[2] = (4 * alpha[0] + alpha[1]) / 5;      // Bit code 010
                    alpha[3] = (3 * alpha[0] + 2 * alpha[1]) / 5;  // Bit code 011    
                    alpha[4] = (2 * alpha[0] + 3 * alpha[1]) / 5;  // Bit code 100    
                    alpha[5] = (alpha[0] + 4 * alpha[1]) / 5;      // Bit code 101
                    alpha[6] = 0;                                // Bit code 110
                    alpha[7] = 255;                              // Bit code 111
                }
                else
                {
                    hsAssert(minAlpha < maxAlpha, "Min => Max in hsDXTSoftwareCodec::CompressBitmap 3.");
                    alpha[0] = maxAlpha;
                    alpha[1] = minAlpha;
                    alpha[2] = (6 * alpha[0] + alpha[1]) / 7;      // bit code 010
                    alpha[3] = (5 * alpha[0] + 2 * alpha[1]) / 7;  // Bit code 011    
                    alpha[4] = (4 * alpha[0] + 3 * alpha[1]) / 7;  // Bit code 100    
                    alpha[5] = (3 * alpha[0] + 4 * alpha[1]) / 7;  // Bit code 101
                    alpha[6] = (2 * alpha[0] + 5 * alpha[1]) / 7;  // Bit code 110    
                    alpha[7] = (alpha[0] + 6 * alpha[1]) / 7;      // Bit code 111
                }
            }
            
            UInt32 encoding;
            UInt16 shortColor[2];
            shortColor[0] = Color32To16(color[0]);
            shortColor[1] = Color32To16(color[1]);
            if ((shortColor[0] == shortColor[1]) ||
                ((compressed->fDirectXInfo.fCompressionType == plMipmap::DirectXInfo::kDXT1) &&
                hasTransparency))
            {
                encoding = kThreeColorEncoding;

                if (shortColor[0] > shortColor[1])
                {
                    UInt16 temp = shortColor[1];
                    shortColor[1] = shortColor[0];
                    shortColor[0] = temp;
                    
                    hsRGBAColor32 temp32 = color[1];
                    color[1] = color[0];
                    color[0] = temp32;
                }

                color[2] = BlendColors32(1, color[0], 1, color[1]);

                hsRGBAColor32 black;
                black.Set(0, 0, 0, 0);

                color[3] = black;
            }
            else
            {
                encoding = kFourColorEncoding;

                if (shortColor[0] < shortColor[1])
                {
                    UInt16 temp = shortColor[1];
                    shortColor[1] = shortColor[0];
                    shortColor[0] = temp;
                    
                    hsRGBAColor32 temp32 = color[1];
                    color[1] = color[0];
                    color[0] = temp32;
                }

                color[2] = BlendColors32(2, color[0], 1, color[1]);
                color[3] = BlendColors32(1, color[0], 2, color[1]);
            }
            
            // Process each pixel in block
            UInt32 blockSize = compressed->fDirectXInfo.fBlockSize;
            UInt32 *block = &compressedImage[(x + xMax * y) * (blockSize >> 2)];
            UInt8 *byteBlock = (UInt8 *)block;
            UInt8 *alphaBlock = nil;
            UInt16 *colorBlock = nil;
            if (compressed->fDirectXInfo.fCompressionType == plMipmap::DirectXInfo::kDXT5)
            {
                alphaBlock = byteBlock;
                colorBlock = (UInt16 *)(byteBlock + 8);
                alphaBlock[0] = 0;
                alphaBlock[1] = 0;
                alphaBlock[2] = 0;
                alphaBlock[3] = 0;
                alphaBlock[4] = 0;
                alphaBlock[5] = 0;
                alphaBlock[6] = 0;
                alphaBlock[7] = 0;
            }
            else if (compressed->fDirectXInfo.fCompressionType == plMipmap::DirectXInfo::kDXT1)
            {
                alphaBlock = nil;
                colorBlock = (UInt16 *)(byteBlock);
            }
            else
            {
                hsAssert(false, "Unrecognized compression scheme.");
            }
            
            colorBlock[0] = 0;
            colorBlock[1] = 0;
            colorBlock[2] = 0;
            colorBlock[3] = 0;
            for (xx = 0; xx < 4; ++xx)
            {
                for (yy = 0; yy < 4; ++yy)
                {
                    hsRGBAColor32* pixel = (hsRGBAColor32*)uncompressed->GetAddr32(4 * x + xx, 4 * y + yy);
                    UInt8 pixelAlpha = pixel->a;
                    if (alphaBlock)
                    {
                        UInt32 alphaIndex = 0;
                        UInt32 alphaDistance = abs(pixelAlpha - alpha[0]);
                        
                        Int32 i;
                        for (i = 1; i < 8; i++)
                        {
                            UInt32 distance = abs(pixelAlpha - alpha[i]);
                            if (distance < alphaDistance)
                            {
                                alphaIndex = i;
                                alphaDistance = distance;
                            }
                        }
                        
                        if (yy < 2)
                        {
                            UInt32 alphaShift = 3 * (4 * yy + xx);
                            UInt32 threeAlphaBytes = alphaIndex << alphaShift;
                            alphaBlock[2] |= (threeAlphaBytes & 0xff);
                            alphaBlock[3] |= ((threeAlphaBytes >> 8) & 0xff);
                            alphaBlock[4] |= ((threeAlphaBytes >> 16) & 0xff);
                        }
                        else
                        {
                            UInt32 alphaShift = 3 * (4 * (yy - 2) + xx);
                            UInt32 threeAlphaBytes = alphaIndex << alphaShift;
                            alphaBlock[5] |= (threeAlphaBytes & 0xff);
                            alphaBlock[6] |= ((threeAlphaBytes >> 8) & 0xff);
                            alphaBlock[7] |= ((threeAlphaBytes >> 16) & 0xff);
                        }
                    }
                    
                    UInt32 colorShift = 2 * (4 * yy + xx);
                    UInt32 colorIndex = 0;
                    UInt32 colorDistance = ColorDistanceARGBSquared(*pixel, color[0]);
                    
                    if ((encoding == kThreeColorEncoding) &&
                        (pixelAlpha == 0))
                    {
                        colorIndex = 3;
                    }
                    else
                    {
                        Int32 i;
                        Int32 colorMax = (encoding == kThreeColorEncoding) ? 3 : 4;
                        for (i = 1; i < colorMax; i++)
                        {
                            UInt32 distance = ColorDistanceARGBSquared(*pixel, color[i]);
                            if (distance < colorDistance)
                            {
                                colorIndex = i;
                                colorDistance = distance;
                            }
                        }
                    }

                    if (yy < 2)
                    {
                        UInt32 colorShift = 2 * (4 * yy + xx);
                        UInt16 colorWord = (UInt16)(colorIndex << colorShift);
                        colorBlock[2] |= colorWord;
                    }
                    else
                    {
                        UInt32 colorShift = 2 * (4 * (yy - 2) + xx);
                        UInt16 colorWord = (UInt16)(colorIndex << colorShift);
                        colorBlock[3] |= colorWord;
                    }
                } // for yy
            } // for xx
            
            if (alphaBlock)
            {
                alphaBlock[0] = alpha[0];
                alphaBlock[1] = alpha[1];
            }
            
            colorBlock[0] = shortColor[0];
            colorBlock[1] = shortColor[1];
        } // for y
    } // for x
}

UInt16 hsDXTSoftwareCodec::BlendColors16(UInt16 weight1, UInt16 color1, UInt16 weight2, UInt16 color2)
{
    UInt16 r1, r2, g1, g2, b1, b2;
    UInt16 r, g, b;

    r1 = (color1 >> 8) & 0xf8;
    g1 = (color1 >> 3) & 0xfc;
    b1 = (color1 << 3) & 0xf8;
    
    r2 = (color2 >> 8) & 0xf8;
    g2 = (color2 >> 3) & 0xfc;
    b2 = (color2 << 3) & 0xf8;
    
    r = ((UInt16)r1 * weight1 + (UInt16)r2 * weight2)/(weight1 + weight2);
    g = ((UInt16)g1 * weight1 + (UInt16)g2 * weight2)/(weight1 + weight2);
    b = ((UInt16)b1 * weight1 + (UInt16)b2 * weight2)/(weight1 + weight2);

    return ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | ((b & 0xf8) >> 3);
}


hsRGBAColor32 hsDXTSoftwareCodec::BlendColors32(UInt32 weight1, hsRGBAColor32 color1, 
                                         UInt32 weight2, hsRGBAColor32 color2)
{
    hsRGBAColor32 result;

    result.r = static_cast<UInt8>((color1.r * weight1 + color2.r * weight2)/(weight1 + weight2));
    result.g = static_cast<UInt8>((color1.g * weight1 + color2.g * weight2)/(weight1 + weight2));
    result.b = static_cast<UInt8>((color1.b * weight1 + color2.b * weight2)/(weight1 + weight2));

    return result;
}


Int32 hsDXTSoftwareCodec::ColorDistanceARGBSquared(hsRGBAColor32 color1, hsRGBAColor32 color2)
{
    Int32 r1, g1, b1;
    Int32 r2, g2, b2;

    r1 = color1.r;
    r2 = color2.r;
    g1 = color1.g;
    g2 = color2.g;
    b1 = color1.b;
    b2 = color2.b;

    return (r1 - r2) * (r1 - r2) + (g1 - g2) * (g1 - g2) + (b1 - b2) * (b1 - b2);
}

UInt16 hsDXTSoftwareCodec::Color32To16(hsRGBAColor32 color)
{
    UInt8 r = (UInt8)(color.r & 0xf8);
    UInt8 g = (UInt8)(color.g & 0xfc);
    UInt8 b = (UInt8)(color.b & 0xf8);

    return (r << 8) | (g << 3) | (b >> 3);
}

hsBool hsDXTSoftwareCodec::Register()
{
    return hsCodecManager::Instance().Register(&(Instance()), plMipmap::kDirectXCompression, 100);
}

//// ICalcCompressedFormat ////////////////////////////////////////////////////
//  Determine the DXT compression format based on a bitmap.

UInt8   hsDXTSoftwareCodec::ICalcCompressedFormat( plMipmap *bMap )
{
    if( bMap->GetFlags() & plMipmap::kAlphaChannelFlag )
        return plMipmap::DirectXInfo::kDXT5;

    return plMipmap::DirectXInfo::kDXT1;
}


//// ColorizeCompBitmap ///////////////////////////////////////////////////////
//  Colorizes a compressed bitmap according to the color mask given.

hsBool hsDXTSoftwareCodec::ColorizeCompMipmap( plMipmap *bMap, const UInt8 *colorMask )
{
    UInt32  numBlocks, blockSize;
    UInt16  *srcData, color1, color2, gray, grayDiv2, i;
    UInt8   compMasks[ 3 ][ 2 ] = { { 0, 0 }, { 0, 0xff }, { 0xff, 0 } };


    /// Sanity checks
    hsAssert( bMap != nil, "Nil bitmap passed to ColorizeCompMipmap()" );
    hsAssert( colorMask != nil, "Nil color mask passed to ColorizeCompMipmap()" );
    hsAssert( bMap->IsCompressed(), "Trying to colorize uncompressed bitmap" );


    /// Calc some stuff
    numBlocks = ( bMap->GetCurrWidth() * bMap->GetCurrHeight() ) >> 4;

    blockSize = bMap->fDirectXInfo.fBlockSize >> 1; // In 16-bit words
    srcData = (UInt16 *)bMap->GetCurrLevelPtr();
    
    // If we're DXT5, we'll artificially advance srcData so it points to the start
    // of the first *color* block, not the first compressed block
    if( bMap->fDirectXInfo.fCompressionType == plMipmap::DirectXInfo::kDXT5 )
        srcData += 4;       // Alpha was 4 16-bit words worth


    /// Loop!
    for( ; numBlocks > 0; numBlocks-- )
    {
        /// Get the two colors to colorize (our decompression scheme will do the rest...
        /// handy, eh? :)
#if HS_BUILD_FOR_MAC
        color1 = ISwapWordOrder( srcData[ 0 ] );
        color2 = ISwapWordOrder( srcData[ 1 ] );
#else
        color1 = srcData[ 0 ];
        color2 = srcData[ 1 ];
#endif

        /// Now colorize using our age-old formula (see hsGMipmap::ColorLevel for details)
        gray = ( ( color1 >> 11 ) & 0x1f ) + ( ( color1 >> 6 ) & 0x1f ) + ( color1 & 0x1f );
        gray /= 3;
        gray = 0x1f - ( ( 0x1f - gray ) >> 1 );     // Lighten it 50%
        grayDiv2 = gray >> 1;

        color1 = ( ( gray & compMasks[ colorMask[ 0 ] ][ 0 ] ) |
                ( grayDiv2 & compMasks[ colorMask[ 0 ] ][ 1 ] ) ) << 11;
        color1 |= ( ( gray & compMasks[ colorMask[ 1 ] ][ 0 ] ) |
                ( grayDiv2 & compMasks[ colorMask[ 1 ] ][ 1 ] ) ) << 6;
        color1 |= ( ( gray & compMasks[ colorMask[ 2 ] ][ 0 ] ) |
                ( grayDiv2 & compMasks[ colorMask[ 2 ] ][ 1 ] ) );

        gray = ( ( color2 >> 11 ) & 0x1f ) + ( ( color2 >> 6 ) & 0x1f ) + ( color2 & 0x1f );
        gray /= 3;
        gray = 0x1f - ( ( 0x1f - gray ) >> 1 );     // Lighten it 50%
        grayDiv2 = gray >> 1;

        color2 = ( ( gray & compMasks[ colorMask[ 0 ] ][ 0 ] ) |
                ( grayDiv2 & compMasks[ colorMask[ 0 ] ][ 1 ] ) ) << 11;
        color2 |= ( ( gray & compMasks[ colorMask[ 1 ] ][ 0 ] ) |
                ( grayDiv2 & compMasks[ colorMask[ 1 ] ][ 1 ] ) ) << 6;
        color2 |= ( ( gray & compMasks[ colorMask[ 2 ] ][ 0 ] ) |
                ( grayDiv2 & compMasks[ colorMask[ 2 ] ][ 1 ] ) );

        /// IMPORTANT: Check to make sure we're preserving the block type
        if( srcData[ 0 ] > srcData[ 1 ] && color1 <= color2 )
        {
            /// NOPE! We better flip the colors and flip all the color refs
            /// in this block, to preserve block type
            if( color1 == color2 )
                color1++;       // This is a hack--our rounding may cause this,
                                // in which case we need to make SURE c1 > c2
            else
                SWAPVARS( color1, color2, i );

            srcData[ 2 ] ^= 0x5555;
            srcData[ 3 ] ^= 0x5555;
        }
        else if( srcData[ 0 ] <= srcData[ 1 ] && color1 > color2 )
        {
            SWAPVARS( color1, color2, i );

            /// 1/2 block w/ alpha -- switch only first two
            /// (watch carefully :)
            srcData[ 2 ] ^= ( ( ~( srcData[ i ] >> 1 ) ) & 0x5555 );
            srcData[ 3 ] ^= ( ( ~( srcData[ i ] >> 1 ) ) & 0x5555 );

            /// Spoiler for above: we shift the word right one bit, then
            /// not the bits, then mask off the lower bits of the pairs
            /// (which of course used to be the upper bits). Now any upper
            /// bits that were 0 are now lower bits of 1, and everything 
            /// else 0's. This we then xor with the original value to 
            /// flip the lower bits of those pairs whose upper bits are 0.
            /// Nifty, eh?
        }

        /// Write back and go!
#if HS_BUILD_FOR_MAC
        srcData[ 0 ] = ISwapWordOrder( color1 );
        srcData[ 1 ] = ISwapWordOrder( color2 );
#else
        srcData[ 0 ] = color1;
        srcData[ 1 ] = color2;
#endif
        srcData += blockSize;
    }

    return true;        /// We handled this bitmap
}