
CyanWorlds.com Engine - MMOG client, server and tools
Copyright (C) 2011  Cyan Worlds, Inc.

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.

You can contact Cyan Worlds, Inc. by email legal@cyan.com
 or by snail mail at:
      Cyan Worlds, Inc.
      14617 N Newport Hwy
      Mead, WA   99021

//                                                                           //
//  plDXPipeline Class Functions                                             //
//  plPipeline derivative for DirectX                                        //
//  Cyan, Inc.                                                               //
//                                                                           //
//// Version History //////////////////////////////////////////////////////////
//                                                                           //
//  2.23.2001 mcn - Created.                                                 //
//                                                                           //

#include "hsConfig.h"
#include "hsWindows.h"

#include <d3d9.h>
#include <ddraw.h>
#include <d3dx9mesh.h>

#ifdef DX_OLD_SDK
    #include <dxerr9.h>
    #define DXGetErrorString9 DXGetErrorString
    #include <dxerr.h>

#include "hsWinRef.h"

#include "hsTypes.h"
#include "plDXPipeline.h"
#include "plPipelineCreate.h"
#include "plDebugText.h"
#include "plDXEnumerate.h"
#include "hsG3DDeviceSelector.h"
#include "hsGDDrawDllLoad.h"
#include "hsResMgr.h"
#include "plStatusLogDrawer.h"
#include "plQuality.h"

#include "plPipeDebugFlags.h"

#include "hsTemplates.h"
//#include "hsGEnviron.h"
#include "plProfile.h"
#include "plMessage/plDeviceRecreateMsg.h"
#include "pnMessage/plSelfDestructMsg.h"
#include "pnMessage/plClientMsg.h"
#include "plSurface/hsGMaterial.h"
#include "plSurface/plLayerInterface.h"
#include "plSurface/plLayerShadowBase.h"
#include "plGImage/plMipmap.h"
#include "plGImage/plCubicEnvironmap.h"
#include "plDrawable/plDrawableSpans.h"
#include "plDrawable/plGeometrySpan.h"
#include "plDrawable/plSpaceTree.h"
#include "plDrawable/plDrawableGenerator.h"
#include "plDrawable/plSpanTypes.h"
#include "plDrawable/plAccessSpan.h"
#include "plDrawable/plAuxSpan.h"
#include "pnSceneObject/plSceneObject.h"
#include "pnSceneObject/plDrawInterface.h"
#include "hsFastMath.h"
#include "plGLight/plLightInfo.h"
#include "plParticleSystem/plParticleEmitter.h"
#include "plParticleSystem/plParticle.h"
#include "plAvatar/plAvatarClothing.h"
#include "plDebugText.h"
#include "plFogEnvironment.h"
#include "plDXTextFont.h"
#include "plGBufferGroup.h"
#include "hsTimer.h"
#include "plgDispatch.h"
#include "plScene/plRenderRequest.h"
#include "plScene/plVisMgr.h"
#include "plRenderTarget.h"
#include "plCubicRenderTarget.h"
#include "plDynamicEnvMap.h"
#include "pfCamera/plVirtualCamNeu.h"

#include "plDXBufferRefs.h"
#include "plDXTextureRef.h"
#include "plDXLightRef.h"
#include "plDXRenderTargetRef.h"
#include "plDXVertexShader.h"
#include "plDXPixelShader.h"

#include "plGLight/plShadowSlave.h"
#include "plGLight/plShadowCaster.h"

#include "hsGMatState.inl"

#include "plSurface/plShader.h"
#include "plDXVertexShader.h"
#include "plDXPixelShader.h"

#include "pnMessage/plPipeResMakeMsg.h"
#include "plPipeResReq.h"
#include "pnNetCommon/plNetApp.h"   // for dbg logging
#include "pfCamera/plVirtualCamNeu.h"
#include "pfCamera/plCameraModifier.h"
#include "plResMgr/plLocalization.h"

// mf horse - test hack, nuke this later
#include "plSurface/plLayerDepth.h"

#include "plGImage/hsCodecManager.h"
//#include "plGImage/hsDXTDirectXCodec.h"

// This is so VC++ will let us view the contents of plIcicle::fOwnerKey
#include "pnKeyedObject/plKey.h"

#include "plCullTree.h"

#include "plTweak.h"

#include <algorithm>

//#define MF_TOSSER

int mfCurrentTest = 100;
PipelineParams plPipeline::fDefaultPipeParams;
PipelineParams plPipeline::fInitialPipeParams;
static hsTArray<plRenderTarget*> hackOffscreens;
UInt32 doHackPlate = UInt32(-1);

UInt32  fDbgSetupInitFlags;     // HACK temp only

void plReleaseObject(IUnknown* x)
    if( x )
        int refs = x->Release();
        if( refs )
            refs = 0;
void plReleaseObject(IUnknown* x)
    if( x )
#endif // HS_DEBUGGING

//// Local Static Stuff ///////////////////////////////////////////////////////

/// Macros for getting/setting data in a D3D vertex buffer
inline UInt8* inlStuffPoint( UInt8* ptr, const hsScalarTriple& point )
    register float* dst = (float*)ptr;
    register const float* src = (float*)&point.fX;
    *dst++ = *src++;
    *dst++ = *src++;
    *dst++ = *src++;
    return (UInt8*)dst;
inline UInt8* inlStuffUInt32( UInt8* ptr, const UInt32 uint )
    *(UInt32*)ptr = uint;
    return ptr + sizeof(uint);
inline UInt8* inlExtractPoint( const UInt8* ptr, const hsScalarTriple& pt )
    register const float* src = (float*)ptr;
    register float* dst = (float*)&pt.fX;
    *dst++ = *src++;
    *dst++ = *src++;
    *dst++ = *src++;
    return (UInt8*)src;
inline UInt8* inlExtractFloat( const UInt8*& ptr, float& f )
    register const float* src = (float*)ptr;
    f = *src++;
    return (UInt8*)src;
inline UInt8* inlExtractUInt32( const UInt8*& ptr, UInt32& uint )
    const UInt32* src = (UInt32*)ptr;
    uint = *src++;
    return (UInt8*)src;

inline DWORD F2DW( FLOAT f ) 
    return *((DWORD*)&f); 

//// Macros for D3D error handling
#define INIT_ERROR_CHECK( cond, errMsg ) if( FAILED( fSettings.fDXError = cond ) ) { return ICreateFail( errMsg ); }    

#if 1       // DEBUG
#define STRONG_ERROR_CHECK( cond ) if( FAILED( fSettings.fDXError = cond ) ) { IGetD3DError(); IShowErrorMessage(); }   
#define WEAK_ERROR_CHECK( cond )    STRONG_ERROR_CHECK( cond )
#define STRONG_ERROR_CHECK( cond ) if( FAILED( fSettings.fDXError = cond ) ) { IGetD3DError(); }    
#define WEAK_ERROR_CHECK( cond )    cond

static D3DXMATRIX d3dIdentityMatrix( 1.0f, 0.0f, 0.0f, 0.0f,
                                     0.0f, 1.0f, 0.0f, 0.0f,
                                     0.0f, 0.0f, 1.0f, 0.0f,
                                     0.0f, 0.0f, 0.0f, 1.0f );

static const enum _D3DTRANSFORMSTATETYPE    sTextureStages[ 8 ] =

static const float kPerspLayerScale  = 0.00001f;
static const float kPerspLayerScaleW = 0.001f;
static const float kPerspLayerTrans  = 0.00002f;
static const hsScalar kAvTexPoolShrinkThresh = 30.f; // seconds

// This caps the number of D3D lights we use. We'll use up to the max allowed
// or this number, whichever is smaller. (This is to prevent us going haywire
// on trying to allocate an array for ALL of the lights in the Ref device.)
//#define kD3DMaxTotalLights        32
///HAAAAACK Let's be mean and limit the artists to only 4 run-time lights.... hehehehhehe (not my idea!!!)
const int kD3DMaxTotalLights = 8;
// The framerate is the limit on the number of projected lights an object can have.
const int kMaxProjectors = 100;

/// This controls whether we can draw bounds boxes around all the ice spans.
#define MCN_BOUNDS_SPANS    1


/// Define this to write out z-buffer debug info to plasmalog.txt

plProfile_CreateMemCounter("Pipeline Surfaces", "Memory", MemPipelineSurfaces);
plProfile_CreateCounter("Feed Triangles", "Draw", DrawFeedTriangles);
plProfile_CreateCounter("Polys", "General", DrawTriangles);
plProfile_CreateCounter("Draw Prim Static", "Draw", DrawPrimStatic);
plProfile_CreateMemCounter("Total Texture Size", "Draw", TotalTexSize);
plProfile_CreateTimer("Harvest", "Draw", Harvest);
plProfile_CreateCounter("Material Change", "Draw", MatChange);
plProfile_CreateCounter("Layer Change", "Draw", LayChange);


plProfile_CreateCounterNoReset("Reload", "PipeC", PipeReload);

plProfile_CreateTimer("RenderScene", "PipeT", RenderScene);
plProfile_CreateTimer("VisEval", "PipeT", VisEval);
plProfile_CreateTimer("VisSelect", "PipeT", VisSelect);
plProfile_CreateTimer("FindSceneLights", "PipeT", FindSceneLights);
plProfile_CreateTimer("PrepShadows", "PipeT", PrepShadows);
plProfile_CreateTimer("PrepDrawable", "PipeT", PrepDrawable);
plProfile_CreateTimer("  Skin", "PipeT", Skin);
plProfile_CreateTimer("  AvSort", "PipeT", AvatarSort);
plProfile_CreateTimer("  Find Lights", "PipeT", FindLights);
plProfile_CreateTimer("    Find Perms", "PipeT", FindPerm);
plProfile_CreateTimer("    FindSpan", "PipeT", FindSpan);
plProfile_CreateTimer("    FindActiveLights", "PipeT", FindActiveLights);
plProfile_CreateTimer("    ApplyActiveLights", "PipeT", ApplyActiveLights);
plProfile_CreateTimer("      ApplyMoving", "PipeT", ApplyMoving);
plProfile_CreateTimer("      ApplyToSpec", "PipeT", ApplyToSpec);
plProfile_CreateTimer("      ApplyToMoving", "PipeT", ApplyToMoving);
plProfile_CreateTimer("     ClearLights", "PipeT", ClearLights);
plProfile_CreateTimer("RenderSpan", "PipeT", RenderSpan);
plProfile_CreateTimer("  MergeCheck", "PipeT", MergeCheck);
plProfile_CreateTimer("  MergeSpan", "PipeT", MergeSpan);
plProfile_CreateTimer("  SpanTransforms", "PipeT", SpanTransforms);
plProfile_CreateTimer("  SpanFog", "PipeT", SpanFog);
plProfile_CreateTimer("  SelectLights", "PipeT", SelectLights);
plProfile_CreateTimer("  SelectProj", "PipeT", SelectProj);
plProfile_CreateTimer("  CheckDyn", "PipeT", CheckDyn);
plProfile_CreateTimer("  CheckStat", "PipeT", CheckStat);
plProfile_CreateTimer("  RenderBuff", "PipeT", RenderBuff);
plProfile_CreateTimer("  RenderPrim", "PipeT", RenderPrim);
plProfile_CreateTimer("PlateMgr", "PipeT", PlateMgr);
plProfile_CreateTimer("DebugText", "PipeT", DebugText);
plProfile_CreateTimer("Reset", "PipeT", Reset);

plProfile_CreateMemCounter("DefMem", "PipeC", DefaultMem);
plProfile_CreateMemCounter("ManMem", "PipeC", ManagedMem);
plProfile_CreateMemCounterReset("CurrTex", "PipeC", CurrTex);
plProfile_CreateMemCounterReset("CurrVB", "PipeC", CurrVB);
plProfile_CreateMemCounter("TexTot", "PipeC", TexTot);
plProfile_CreateMemCounterReset("fTexUsed", "PipeC", fTexUsed);
plProfile_CreateMemCounterReset("fTexManaged", "PipeC", fTexManaged);
plProfile_CreateMemCounterReset("fVtxUsed", "PipeC", fVtxUsed);
plProfile_CreateMemCounterReset("fVtxManaged", "PipeC", fVtxManaged);
plProfile_CreateMemCounter("ManSeen", "PipeC", ManSeen);
plProfile_CreateCounterNoReset("ManEvict", "PipeC", ManEvict);
plProfile_CreateCounter("LightOn", "PipeC", LightOn);
plProfile_CreateCounter("LightVis", "PipeC", LightVis);
plProfile_CreateCounter("LightChar", "PipeC", LightChar);
plProfile_CreateCounter("LightActive", "PipeC", LightActive);
plProfile_CreateCounter("Lights Found", "PipeC", FindLightsFound);
plProfile_CreateCounter("Perms Found", "PipeC", FindLightsPerm);
plProfile_CreateCounter("Merge", "PipeC", SpanMerge);
plProfile_CreateCounter("TexNum", "PipeC", NumTex);
plProfile_CreateCounter("LiState", "PipeC", MatLightState);
plProfile_CreateCounter("OccPoly", "PipeC", OccPolyUsed);
plProfile_CreateCounter("OccNode", "PipeC", OccNodeUsed);
plProfile_CreateCounter("NumSkin", "PipeC", NumSkin);
plProfile_CreateCounter("AvatarFaces", "PipeC", AvatarFaces);
plProfile_CreateCounter("VertexChange", "PipeC", VertexChange);
plProfile_CreateCounter("IndexChange", "PipeC", IndexChange);
plProfile_CreateCounter("DynVBuffs", "PipeC", DynVBuffs);
plProfile_CreateCounter("EmptyList", "PipeC", EmptyList);
plProfile_CreateCounter("AvRTPoolUsed", "PipeC", AvRTPoolUsed);
plProfile_CreateCounter("AvRTPoolCount", "PipeC", AvRTPoolCount);
plProfile_CreateCounter("AvRTPoolRes", "PipeC", AvRTPoolRes);
plProfile_CreateCounter("AvRTShrinkTime", "PipeC", AvRTShrinkTime);

/// Fun inlines for keeping track of surface creation/deletion memory
void D3DSURF_MEMNEW(IDirect3DSurface9* surf)
    if( surf ) 
        D3DSURFACE_DESC info; 
        surf->GetDesc( &info );
        PROFILE_POOL_MEM(D3DPOOL_DEFAULT, info.Width * info.Height * plDXPipeline::GetDXBitDepth(info.Format) / 8 + sizeof(IDirect3DSurface9), true, "D3DSurface");
        plProfile_NewMem(MemPipelineSurfaces, info.Width * info.Height * plDXPipeline::GetDXBitDepth(info.Format) / 8 + sizeof(IDirect3DSurface9)); 

void D3DSURF_MEMNEW(IDirect3DTexture9* tex)
    if( tex )
        IDirect3DSurface9* surf;
        tex->GetSurfaceLevel(0, &surf);
        if( surf )

void D3DSURF_MEMNEW(IDirect3DCubeTexture9* cTex)
    if( cTex )
        IDirect3DSurface9* surf;
        cTex->GetCubeMapSurface(D3DCUBEMAP_FACE_POSITIVE_X, 0, &surf);
        if( surf )

void D3DSURF_MEMDEL(IDirect3DSurface9* surf)
    if( surf ) 
        D3DSURFACE_DESC info; 
        surf->GetDesc( &info );
        PROFILE_POOL_MEM(D3DPOOL_DEFAULT, info.Width * info.Height * plDXPipeline::GetDXBitDepth(info.Format) / 8 + sizeof(IDirect3DSurface9), false, "D3DSurface");
        plProfile_DelMem(MemPipelineSurfaces, info.Width * info.Height * plDXPipeline::GetDXBitDepth(info.Format) / 8 + sizeof(IDirect3DSurface9)); 

void D3DSURF_MEMDEL(IDirect3DTexture9* tex)
    if( tex )
        IDirect3DSurface9* surf;
        tex->GetSurfaceLevel(0, &surf);
        if( surf )

void D3DSURF_MEMDEL(IDirect3DCubeTexture9* cTex)
    if( cTex )
        IDirect3DSurface9* surf;
        cTex->GetCubeMapSurface(D3DCUBEMAP_FACE_POSITIVE_X, 0, &surf);
        if( surf )
void D3DSURF_MEMNEW(IDirect3DSurface9* surf) {}
void D3DSURF_MEMNEW(IDirect3DTexture9* tex) {}
void D3DSURF_MEMNEW(IDirect3DCubeTexture9* cTex) {}
void D3DSURF_MEMDEL(IDirect3DSurface9* surf) {}
void D3DSURF_MEMDEL(IDirect3DTexture9* tex) {}
void D3DSURF_MEMDEL(IDirect3DCubeTexture9* cTex) {}

void plDXPipeline::ProfilePoolMem(D3DPOOL poolType, UInt32 size, hsBool add, char *id)
    switch( poolType )
        if (add)
            plProfile_NewMem(ManagedMem, size);
            //plStatusLog::AddLineS("pipeline.log", 0xffff0000, "Adding   MANAGED mem. Size: %10d, Total: %10d ID: %s",
            //                    size, gProfileVarManagedMem.GetValue(), id);
            plProfile_DelMem(ManagedMem, size);
            //plStatusLog::AddLineS("pipeline.log", 0xffff0000, "Deleting MANAGED mem. Size: %10d, Total: %10d ID: %s",
            //                    size, gProfileVarManagedMem.GetValue(), id);
        if (add)
            plProfile_NewMem(DefaultMem, size);
            //plStatusLog::AddLineS("pipeline.log", 0xffff0000, "Adding   DEFAULT mem. Size: %10d, Total: %10d ID: %s",
            //                    size, gProfileVarDefaultMem.GetValue(), id);
            plProfile_DelMem(DefaultMem, size);
            //plStatusLog::AddLineS("pipeline.log", 0xffff0000, "Deleting DEFAULT mem. Size: %10d, Total: %10d ID: %s",
            //                    size, gProfileVarDefaultMem.GetValue(), id);

// Implementations of RenderPrims types.
// Currently support render tri list
// These allow the same setup code path to be followed, no matter what the primitive type
// (i.e. data-type/draw-call is going to happen once the render state is set.
// Originally useful to make one code path for trilists, tri-patches, and rect-patches, but
// we've since dropped support for patches. We still use the RenderNil function to allow the
// code to go through all the state setup without knowing whether a render call is going to
// come out the other end.
// Would allow easy extension for supporting tristrips or pointsprites, but we've never had
// a strong reason to use either.
// First, Declarations.

// Adding a nil RenderPrim for turning off drawing
class plRenderNilFunc : public plRenderPrimFunc
    plRenderNilFunc() {}

    virtual hsBool RenderPrims() const { return false; }
static plRenderNilFunc sRenderNil;

class plRenderTriListFunc : public plRenderPrimFunc
    int                 fBaseVertexIndex;
    int                 fVStart;
    int                 fVLength;
    int                 fIStart;
    int                 fNumTris;
    plRenderTriListFunc(LPDIRECT3DDEVICE9 d3dDevice, int baseVertexIndex,
                        int vStart, int vLength, int iStart, int iNumTris)
        : fD3DDevice(d3dDevice), fBaseVertexIndex(baseVertexIndex), fVStart(vStart), fVLength(vLength), fIStart(iStart), fNumTris(iNumTris) {}

    virtual hsBool RenderPrims() const;

// Implementations

hsBool plRenderTriListFunc::RenderPrims() const
    plProfile_IncCount(DrawFeedTriangles, fNumTris);
    plProfile_IncCount(DrawTriangles, fNumTris);

    return FAILED( fD3DDevice->DrawIndexedPrimitive( D3DPT_TRIANGLELIST, fBaseVertexIndex, fVStart, fVLength, fIStart, fNumTris ) );

//// Constructor & Destructor /////////////////////////////////////////////////

UInt32 plDXPipeline::fTexUsed(0);
UInt32 plDXPipeline::fTexManaged(0);
UInt32 plDXPipeline::fVtxUsed(0);
UInt32 plDXPipeline::fVtxManaged(0);

plDXPipeline::plDXPipeline( hsWinRef hWnd, const hsG3DDeviceModeRecord *devModeRec )
:   fManagedAlloced(false),
    hsAssert(D3DTSS_TCI_PASSTHRU == plLayerInterface::kUVWPassThru, "D3D Enum has changed. Notify graphics department.");
    hsAssert(D3DTSS_TCI_CAMERASPACENORMAL == plLayerInterface::kUVWNormal, "D3D Enum has changed. Notify graphics department.");
    hsAssert(D3DTSS_TCI_CAMERASPACEPOSITION == plLayerInterface::kUVWPosition, "D3D Enum has changed. Notify graphics department.");
    hsAssert(D3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR == plLayerInterface::kUVWReflect, "D3D Enum has changed. Notify graphics department.");

    // Initialize everything to NULL.

    // Get the requested mode and setup
    const hsG3DDeviceRecord *devRec = devModeRec->GetDevice();
    const hsG3DDeviceMode *devMode = devModeRec->GetMode();

    /// Init our screen mode
    fSettings.fHWnd = hWnd;
        fSettings.fOrigWidth = devMode->GetWidth();
        fSettings.fOrigHeight = devMode->GetHeight();
        // windowed can run in any mode
        fSettings.fOrigHeight = fInitialPipeParams.Height;
        fSettings.fOrigWidth = fInitialPipeParams.Width;
    IGetViewTransform().SetScreenSize((UInt16)(fSettings.fOrigWidth), (UInt16)(fSettings.fOrigHeight));
    fSettings.fColorDepth = devMode->GetColorDepth();
    fVSync = fInitialPipeParams.VSync;
    if( devRec->GetAASetting() == 0 )
        fSettings.fNumAASamples = 0;
        fSettings.fNumAASamples = devMode->GetFSAAType( devRec->GetAASetting() - 1 );

    hsGDirect3DTnLEnumerate d3dEnum;
    if( d3dEnum.GetEnumeErrorStr()[ 0 ] )
        IShowErrorMessage( (char *)d3dEnum.GetEnumeErrorStr() );

    if( d3dEnum.SelectFromDevMode(devRec, devMode) )
        IShowErrorMessage( (char *)d3dEnum.GetEnumeErrorStr() );

    // Gotta create this very first, so that the device/driver init works
    if( !fD3DObject )
        if( ICreateMaster() )
            IShowErrorMessage( "Cannot create D3D master object" );

    // Record the requested mode/setup.
    ISetCurrentDriver( d3dEnum.GetCurrentDriver() );
    ISetCurrentDevice( d3dEnum.GetCurrentDevice() );
    D3DEnum_ModeInfo *pModeInfo = d3dEnum.GetCurrentMode();
    pModeInfo->fWindowed = fInitialPipeParams.Windowed;     // set windowed mode from ini file
    ISetCurrentMode( d3dEnum.GetCurrentMode() );

    fSettings.fFullscreen = !fCurrentMode->fWindowed;

    fSettings.fNumAASamples = fInitialPipeParams.AntiAliasingAmount;

    // ISetCaps just records the card capabilities that were passed in.
    // IRestrictCaps looks over those explicit caps and makes some decisions on 
    // what the card can really do.
    IRestrictCaps( *devRec );

    fSettings.fMaxAnisotropicSamples = fInitialPipeParams.AnisotropicLevel;
    if(fSettings.fMaxAnisotropicSamples > fCurrentDevice->fDDCaps.MaxAnisotropy)
        fSettings.fMaxAnisotropicSamples = (UInt8)fCurrentDevice->fDDCaps.MaxAnisotropy;

    plConst(UInt32) kDefaultDynVtxSize(32000 * 44);
    plConst(UInt32) kDefaultDynIdxSize(0 * plGBufferGroup::kMaxNumIndicesPerBuffer * 2);
    fDynVtxSize = kDefaultDynVtxSize;
    fVtxRefTime = 0;
    // Go create surfaces and DX-dependent objects
    if( ICreateDeviceObjects() )
        IShowErrorMessage( "Cannot create Direct3D device" );
    /*plStatusLog::AddLineS("pipeline.log", "Supported Resolutions:");
    std::vector<plDisplayMode> temp;
    GetSupportedDisplayModes( &temp, 16 );
    for(int i = 0; i < temp.size(); i++)
        plStatusLog::AddLineS("pipeline.log", "%d, %d, %d", temp[i].Width, temp[i].Height, 16);
    GetSupportedDisplayModes( &temp, 32 );
    for(int i = 0; i < temp.size(); i++)
        plStatusLog::AddLineS("pipeline.log", "%d, %d, %d", temp[i].Width, temp[i].Height, 32);


// Cleanup - Most happens in IReleaseDeviceObject().
    fCurrLay = nil;
    hsAssert( fCurrMaterial == nil, "Current material not unrefed properly" );

    // fCullProxy is a debugging representation of our CullTree. See plCullTree.cpp, 
    // plScene/plOccluder.cpp and plScene/plOccluderProxy.cpp for more info
    if( fCullProxy )
    delete fCurrentDriver;
    delete fCurrentDevice;
    delete fCurrentMode;


//// IClearMembers ////////////////////////////////////////////////////////////
// Initialize everything to a nil state.
// This does not initialize to a working state, but to a state that can be
// built from. For example, the fD3DObject pointer is set to nil so that it's safe
// to delete or set to a valid pointer. It must be set to a valid pointer
// before the pipeline can be used for much.
// After the core initialization is done (in ICreateMaster and ICreateDeviceObjects)
// render state will be initialized in IInitDeviceState.

void    plDXPipeline::IClearMembers()
    /// Clear some stuff
    fVtxBuffRefList = nil;
    fIdxBuffRefList = nil;
    fTextureRefList = nil;
    fTextFontRefList = nil;
    fRenderTargetRefList = nil;
    fVShaderRefList = nil;
    fPShaderRefList = nil;
    fCurrMaterial = nil;
    fCurrLay = nil;
    fCurrRenderLayer = 0;
    fBoundsMat = nil;
    fBoundsSpans = nil;
    fPlateMgr = nil;
    fLogDrawer = nil;
    fDebugTextMgr = nil;
    fCurrLightingMethod = plSpan::kLiteMaterial;

    fCurrCullMode = D3DCULL_CW;
    fTexturing = false;
    fCurrNumLayers = 0;
    fLastEndingStage = -1;

    fDeviceLost = false;
    fDevWasLost = false;

    fSettings.fCurrFVFFormat = 0;
    fDynVtxBuff = nil;
    fNextDynVtx = 0;

    int i;
    for( i = 0; i < 8; i++ )
        fLayerRef[i] = nil;

    fULutTextureRef = nil;
    for( i = 0; i < kMaxRenderTargetNext; i++ )
        fBlurVBuffers[i] = nil;
    fBlurVSHandle = nil;

    fD3DObject = nil;
    fD3DDevice = nil;
    fD3DBackBuff = nil;
    fD3DDepthSurface = nil;
    fD3DMainSurface = nil;

    fSharedDepthSurface[0] = nil;
    fSharedDepthFormat[0] = D3DFMT_UNKNOWN;
    fSharedDepthSurface[1] = nil;
    fSharedDepthFormat[1] = D3DFMT_UNKNOWN;

    fCurrentMode = nil;
    fCurrentDriver = nil;
    fCurrentDevice = nil;

    fOverBaseLayer = nil;
    fOverAllLayer = nil;
    fMatPiggyBacks = 0;
    fActivePiggyBacks = 0;

    for( i = 0; i < 8; i++ )
//  SetMaterialOverride( hsGMatState::kShade, hsGMatState::kShadeSpecularHighlight, false );


    fCullProxy = nil;

    fTime = 0;
    fFrame = 0;

    fInSceneDepth = 0;
    fTextUseTime = 0;
    fEvictTime = 0;
    fManagedSeen = 0;
    fManagedCutoff = 0;
    fRenderCnt = 0;


    fForceMatHandle = true;
    fAvRTShrinkValidSince = 0;
    fAvRTWidth = 1024;
    fAvNextFreeRT = 0;

// plDXViewSettings are just a convenience member struct to segregate the current view settings.
// Reset - Initialize the ViewSettings to default (normal/neutral) values.
void plDXViewSettings::Reset()
    // Normal render, on clear, clear the color buffer and depth buffer.
    fRenderState = plPipeline::kRenderNormal | plPipeline::kRenderClearColor | plPipeline::kRenderClearDepth;

    fRenderRequest = nil;

    fDrawableTypeMask = plDrawable::kNormal;
    fSubDrawableTypeMask = plDrawable::kSubNormal;

    // Clear color to black, depth to yon.
    fClearColor = 0;
    fClearDepth = 1.f;

    // Want to limit the number of nodes in the cull tree. After adding so many nodes,
    // the benefits (#objects culled) falls off, but the cost (evaluating objects against
    // node planes) keeps rising.
    const UInt16 kCullMaxNodes = 250;
    fCullTreeDirty = true;
    fCullMaxNodes = kCullMaxNodes;

    // Object Local to world transform and its inverse.

    // see Core/plViewTransform.h

    fTransform.SetScreenSize(800, 600);

    // Keep track of handedness of local to world and camera transform for winding.
    fLocalToWorldLeftHanded = false;
    fWorldToCamLeftHanded = false;

//// plDXGeneralSettings::Reset //////////////////////////////////////////////
// Catch all struct of general settings plus pointers to current d3d objects.

void    plDXGeneralSettings::Reset()
    fCurrVertexBuffRef = nil;
    fCurrIndexBuffRef = nil;
    fFullscreen = false;
    fHWnd = nil;
    fColorDepth = 32;
    fD3DCaps = 0;
    fBoardKluge = 0;
    fStageEnd = 0;
    fMaxNumLights = kD3DMaxTotalLights;
    fMaxNumProjectors = kMaxProjectors;
    fMaxLayersAtOnce = 1;
    fMaxPiggyBacks = 0;
    fBoundsDrawLevel = -1;

    fProperties = 0;
    fClearColor = 0;

    fNoGammaCorrect = false;
    fMaxUVWSrc = 8;
    fCantProj = false;
    fLimitedProj = false;
    fBadManaged = false;
    fShareDepth = false;
    fCurrAnisotropy = false;
    fIsIntel = false;

    fDXError = D3D_OK;
    memset( fErrorStr, 0, sizeof( fErrorStr ) );

    fCurrRenderTarget = nil;
    fCurrBaseRenderTarget = nil;
    fCurrD3DMainSurface = nil;
    fCurrD3DDepthSurface = nil;
    fCurrRenderTargetRef = nil;

    fCurrFVFFormat = 0;
    fCurrVertexShader = nil;
    fCurrPixelShader = nil;

    fVeryAnnoyingTextureInvalidFlag = false;

//// IInitDeviceState /////////////////////////////////////////////////////////
// Initialize the device to a known state. This also syncs it up with our internal state
// as recorded in the fLayerStates. 
// Some of these states reflect the caps of the device, but for the most part, the 
// important thing here is NOT what state we're in coming out of this function, but
// that we are in a known state, and that the known state is recorded in fLayerStates.
void    plDXPipeline::IInitDeviceState()
    fCurrCullMode = D3DCULL_CW;

    /// Set D3D states
    ISetFogParameters( nil, nil );

    fD3DDevice->SetRenderState( D3DRS_ZFUNC,        D3DCMP_LESSEQUAL );
    fD3DDevice->SetRenderState( D3DRS_ZWRITEENABLE, TRUE );
    fD3DDevice->SetRenderState( D3DRS_ZENABLE,      ( fSettings.fD3DCaps & kCapsWBuffer ) ? D3DZB_USEW : D3DZB_TRUE );
    fD3DDevice->SetRenderState( D3DRS_CLIPPING,     TRUE ); 
    fD3DDevice->SetRenderState( D3DRS_CULLMODE,     fCurrCullMode );

    fD3DDevice->SetRenderState( D3DRS_ALPHATESTENABLE,  TRUE );
    fD3DDevice->SetRenderState( D3DRS_ALPHAFUNC,        D3DCMP_GREATEREQUAL );
    fD3DDevice->SetRenderState( D3DRS_ALPHAREF,         0x00000001 );

    fD3DDevice->SetRenderState( D3DRS_MULTISAMPLEANTIALIAS, ( fSettings.fD3DCaps & kCapsFSAntiAlias ) ? TRUE : FALSE );
    fD3DDevice->SetRenderState( D3DRS_ANTIALIASEDLINEENABLE,        FALSE );

    fD3DDevice->SetRenderState( D3DRS_DITHERENABLE,     ( fSettings.fD3DCaps & kCapsDither ) ? TRUE : FALSE );
    fD3DDevice->SetRenderState( D3DRS_SPECULARENABLE,   FALSE );
    fD3DDevice->SetRenderState( D3DRS_LIGHTING,         FALSE );    
    fCurrD3DLiteState = false;
    fD3DDevice->SetRenderState( D3DRS_TEXTUREFACTOR,    0x0 );
    fD3DDevice->SetRenderState( D3DRS_STENCILENABLE,    FALSE );
    fD3DDevice->SetTransform( D3DTS_TEXTURE0,           &d3dIdentityMatrix );
    fD3DDevice->SetTransform( D3DTS_WORLD,              &d3dIdentityMatrix );

    /// NEW: to compensate for scaling transformations that might screw up our nicely
    /// normalized normals. Note: nVidia says this is as fast or faster than with
    /// this disabled, but who knows what it'll do on other cards...
    fD3DDevice->SetRenderState( D3DRS_NORMALIZENORMALS, TRUE );
    fD3DDevice->SetRenderState( D3DRS_LOCALVIEWER, TRUE );

    UInt32 totalMem = fD3DDevice->GetAvailableTextureMem();
    plProfile_Set(TotalTexSize, totalMem);

    // Initialization for all 8 stages (even though we only use a few of them).
    int i;
    for( i = 0; i < 8; i++ )
        fLayerLODBias[ i ] = fTweaks.fDefaultLODBias;
        fLayerTransform[ i ] = false;
        fLayerRef[ i ] = nil;
        fLayerUVWSrcs[ i ] = i;
        fLayerState[ i ].Reset();

        fD3DDevice->SetTexture( i, nil );
        fD3DDevice->SetTextureStageState( i, D3DTSS_TEXCOORDINDEX, i );
        fD3DDevice->SetSamplerState( i, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP  );
        fD3DDevice->SetSamplerState( i, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP  );
        fD3DDevice->SetSamplerState( i, D3DSAMP_MIPMAPLODBIAS, *(DWORD *)( &fLayerLODBias[ i ] ) );

        if( fSettings.fMaxAnisotropicSamples > 0 && !IsDebugFlagSet(plPipeDbg::kFlagNoAnisotropy))
            fD3DDevice->SetSamplerState( i, D3DSAMP_MINFILTER, D3DTEXF_ANISOTROPIC );
            fD3DDevice->SetSamplerState( i, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR );
            fD3DDevice->SetSamplerState( i, D3DSAMP_MAXANISOTROPY, (DWORD)fSettings.fMaxAnisotropicSamples );
            fSettings.fCurrAnisotropy = true;
            fD3DDevice->SetSamplerState( i, D3DSAMP_MINFILTER, D3DTEXF_LINEAR );
            fD3DDevice->SetSamplerState( i, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR );
            fSettings.fCurrAnisotropy = false;
        fD3DDevice->SetSamplerState( i, D3DSAMP_MIPFILTER, D3DTEXF_LINEAR );

        fD3DDevice->SetTransform( sTextureStages[ i ], &d3dIdentityMatrix );
        fLayerXformFlags[ i ] = D3DTTFF_COUNT2;
        fD3DDevice->SetTextureStageState( i, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTTFF_COUNT2 );

    // Initialize our bump mapping matrices.
    for( i = 0; i < 4; i++ )
        int j;
        for( j = 0; j < 4; j++ )
            fBumpDuMatrix.fMap[i][j] = 0;
            fBumpDvMatrix.fMap[i][j] = 0;
            fBumpDwMatrix.fMap[i][j] = 0;
    PushMaterialOverride( hsGMatState::kShade, hsGMatState::kShadeSpecularHighlight, false );



//// ISetCaps /////////////////////////////////////////////////////////////////
// We've recorded the capabilities of the current device in fCurrentDevice (traditionally in the setup program),
// now translate that into our own caps flags.
void    plDXPipeline::ISetCaps()
    fSettings.fD3DCaps = kCapsNone;

    // Set relevant caps (ones we can do something about).
    if( fCurrentDevice->fDDCaps.RasterCaps & D3DPRASTERCAPS_DEPTHBIAS )
        fSettings.fD3DCaps |= kCapsZBias;
    if( fCurrentDevice->fDDCaps.RasterCaps & D3DPRASTERCAPS_FOGRANGE )
        fSettings.fD3DCaps |= kCapsRangeFog;
    if( fCurrentDevice->fDDCaps.RasterCaps & D3DPRASTERCAPS_FOGTABLE )
        fSettings.fD3DCaps |= kCapsLinearFog | kCapsExpFog | kCapsExp2Fog | kCapsPixelFog;
        fSettings.fD3DCaps |= kCapsLinearFog;
    if( fCurrentDevice->fDDCaps.TextureFilterCaps & D3DPTFILTERCAPS_MIPFLINEAR )
        fSettings.fD3DCaps |= kCapsMipmap;
    if( fCurrentDevice->fDDCaps.TextureCaps & D3DPTEXTURECAPS_MIPCUBEMAP )
        fSettings.fD3DCaps |= kCapsCubicMipmap;
    if( fCurrentDevice->fDDCaps.RasterCaps & D3DPRASTERCAPS_WBUFFER )
        fSettings.fD3DCaps |= kCapsWBuffer;
    if( fCurrentDevice->fDDCaps.RasterCaps & D3DPRASTERCAPS_DITHER )
        fSettings.fD3DCaps |= kCapsDither;
    if( fSettings.fNumAASamples > 0 )
        fSettings.fD3DCaps |= kCapsFSAntiAlias;
    if( fCurrentDevice->fDDCaps.RasterCaps & D3DPRASTERCAPS_WFOG )
        fSettings.fD3DCaps |= kCapsDoesWFog;
    if( fCurrentDevice->fDDCaps.TextureCaps & D3DPTEXTURECAPS_CUBEMAP )
        fSettings.fD3DCaps |= kCapsCubicTextures;

    /// New 1.5.2000 - cull out mixed vertex processing
    if( fCurrentDevice->fDDCaps.DevCaps & D3DDEVCAPS_HWTRANSFORMANDLIGHT
        && fCurrentMode->fDDBehavior == D3DCREATE_HARDWARE_VERTEXPROCESSING 
        fSettings.fD3DCaps |= kCapsHWTransform;

    // Currently always want d3d to transform
    fSettings.fD3DCaps |= kCapsHWTransform;

    /// Always assume we can do small textures (IRestrictCaps will turn this off
    /// if necessary)
    fSettings.fD3DCaps |= kCapsDoesSmallTextures;

    /// Look for supported texture formats
    if( IFindCompressedFormats() )
        fSettings.fD3DCaps |= kCapsCompressTextures;
    if( IFindLuminanceFormats() )
        fSettings.fD3DCaps |= kCapsLuminanceTextures;

    /// Max # of hardware lights
    fSettings.fMaxNumLights = fCurrentDevice->fDDCaps.MaxActiveLights;
    if( fSettings.fMaxNumLights > kD3DMaxTotalLights )
        fSettings.fMaxNumLights = kD3DMaxTotalLights;

    // Intel Extreme chips report 0 lights, meaning T&L is done
    // in software, so you can have as many lights as you want.
    // We only need 8, so set that here. Also turn off shadows,
    // since the extreme can't really afford them, and record
    // the fact this is the extreme for other driver problem
    // workarounds.
    if( !fSettings.fMaxNumLights )
        fSettings.fMaxNumLights = kD3DMaxTotalLights;
        fSettings.fIsIntel = true;

    /// Max # of textures at once
    fSettings.fMaxLayersAtOnce = fCurrentDevice->fDDCaps.MaxSimultaneousTextures;
    if( fCurrentDevice->fDDCaps.DevCaps & D3DDEVCAPS_SEPARATETEXTUREMEMORIES )
        fSettings.fMaxLayersAtOnce = 1;
    // Alloc half our simultaneous textures to piggybacks.
    // Won't hurt us unless we try to many things at once.
    fSettings.fMaxPiggyBacks = fSettings.fMaxLayersAtOnce >> 1; 

    // Less than 4 layers at once means we have to fallback on uv bumpmapping
    if (fSettings.fMaxLayersAtOnce < 4)
        SetDebugFlag(plPipeDbg::kFlagBumpUV, true);

    fSettings.fMaxAnisotropicSamples = (UInt8)(fCurrentDevice->fDDCaps.MaxAnisotropy);

    fSettings.fNoGammaCorrect = !(fCurrentDevice->fDDCaps.Caps2 & D3DCAPS2_FULLSCREENGAMMA);

    if (!(fCurrentDevice->fDDCaps.TextureCaps & D3DPTEXTURECAPS_PROJECTED))


// ISetGraphicsCapability ///////////////////////////////////////////////////////
// Tell our global quality settings what we can do. We'll use this to only load
// versions we can render. So if we can render it, we load it and skip its low quality substitute,
// if we can't render it, we skip it and load its low quality substitute. 
// Naturally, this must happen before we do any loading.
void plDXPipeline::ISetGraphicsCapability(UInt32 v)
    int pixelMajor = D3DSHADER_VERSION_MAJOR(v);
    int pixelMinor = D3DSHADER_VERSION_MINOR(v);
    if( pixelMajor > 1 )
    else if( pixelMajor > 0 )
        if( pixelMinor >= 4 )
        else if( pixelMinor > 0 )

//// IRestrictCaps ////////////////////////////////////////////////////////////
// ISetCaps() sets our native caps based on the D3D caps bits D3D returns.
// IRestrictCaps looks at our hsG3DDeviceSelector flags and translates those
// into our runtime native caps.
// The DeviceSelector flags aren't set by what the board claims, but rather
// we try to identify the board and set them according to previous knowledge.
// For example, the ATI7500 will only use uvw coordinates 0 or 1. There's
// no d3d cap to reflect this, and it really should support [0..7], but 
// there's no way to force it to be d3d compliant. So when we see we have
// an ATI7500, we set the cap kCapsMaxUVWSrc2.
// See hsG3DDeviceSelector.cpp for details and implementation.
void    plDXPipeline::IRestrictCaps( const hsG3DDeviceRecord& devRec )
    if( !devRec.GetCap( hsG3DDeviceSelector::kCapsMipmap ) )
        fSettings.fD3DCaps &= ~kCapsMipmap;
    if( !devRec.GetCap( hsG3DDeviceSelector::kCapsCubicMipmap ) )
        fSettings.fD3DCaps &= ~kCapsCubicMipmap;
    if( !devRec.GetCap( hsG3DDeviceSelector::kCapsWBuffer ) )
        fSettings.fD3DCaps &= ~kCapsWBuffer;
    if( !devRec.GetCap( hsG3DDeviceSelector::kCapsZBias ) )
        fSettings.fD3DCaps &= ~kCapsZBias;
//  if( !devRec.GetCap( hsG3DDeviceSelector::kCapsHWTransform ) )
//      fSettings.fD3DCaps &= ~kCapsHWTransform;
    if( !devRec.GetCap( hsG3DDeviceSelector::kCapsDither ) )
        fSettings.fD3DCaps &= ~kCapsDither;
//  if( devRec.GetAASetting() == 0 )
//      fSettings.fD3DCaps &= ~kCapsFSAntiAlias;
    if( !devRec.GetCap( hsG3DDeviceSelector::kCapsFogExp ) )
        fSettings.fD3DCaps &= ~kCapsExpFog;
    if( !devRec.GetCap( hsG3DDeviceSelector::kCapsCubicTextures ) )
        fSettings.fD3DCaps &= ~kCapsCubicTextures;

    if( devRec.GetCap(hsG3DDeviceSelector::kCapsCantShadow) )

    if( devRec.GetCap(hsG3DDeviceSelector::kCapsCantProj) )
        fSettings.fCantProj = true;
    if( devRec.GetCap(hsG3DDeviceSelector::kCapsLimitedProj) )
        fSettings.fLimitedProj = true;
    if( devRec.GetCap(hsG3DDeviceSelector::kCapsBadManaged) )
        fSettings.fBadManaged = true;
    if( devRec.GetCap(hsG3DDeviceSelector::kCapsShareDepth) )
        fSettings.fShareDepth = true;

    /// Added 9.6.2000 mcn - shouldn't they be here anyway?
    if( !devRec.GetCap( hsG3DDeviceSelector::kCapsFogExp2 ) )
        fSettings.fD3DCaps &= ~kCapsExp2Fog;
    if( !devRec.GetCap( hsG3DDeviceSelector::kCapsDoesSmallTextures ) )
        fSettings.fD3DCaps &= ~kCapsDoesSmallTextures;

    /// 9.22.2000 mcn - dFlag for bad (savage4) yon fix
    if( devRec.GetCap( hsG3DDeviceSelector::kCapsBadYonStuff ) )
        fSettings.fD3DCaps |= kCapsHasBadYonStuff;

    /// 10.31.2000 mcn - Flag for can't-handle-under-8-pixel-dimensions-on-textures
    /// (see, isn't the name flag actually better in retrospect? :)
    if( devRec.GetCap( hsG3DDeviceSelector::kCapsNoKindaSmallTexs ) )
        fSettings.fD3DCaps |= kCapsNoKindaSmallTexs;

    /// Note: the following SHOULD be here, but we later detect for texture
    /// formats and reset this flag. It should only be set if it is set already,
    /// but that means ensuring it's set beforehand, which it might not be.
    if( !devRec.GetCap( hsG3DDeviceSelector::kCapsCompressTextures ) )
        fSettings.fD3DCaps &= ~kCapsCompressTextures;

    /// Set up tweaks
    SetZBiasScale( (float)devRec.GetZBiasRating() );
    fTweaks.fDefaultLODBias = (float)-( 0.25 + (float)devRec.GetLODBiasRating() );
    devRec.GetFogApproxStarts( fTweaks.fFogExpApproxStart, fTweaks.fFogExp2ApproxStart );
    fTweaks.fFogEndBias = (float)devRec.GetFogEndBias();

    // Fog knee stuff
    devRec.GetFogKneeParams( hsG3DDeviceRecord::kFogExp, fTweaks.fExpFogKnee, fTweaks.fExpFogKneeVal );
    devRec.GetFogKneeParams( hsG3DDeviceRecord::kFogExp2, fTweaks.fExp2FogKnee, fTweaks.fExp2FogKneeVal );

    // Max # of layers
    UInt32 max = devRec.GetLayersAtOnce();
    if( max > 0 && max < fSettings.fMaxLayersAtOnce )
        fSettings.fMaxLayersAtOnce = max;

    /// Debug flag to force high-level cards down to GeForce 2 caps
    if( fDbgSetupInitFlags & 0x00000004 )
        fSettings.fD3DCaps &= ~kCapsFSAntiAlias;
        if( fSettings.fMaxLayersAtOnce > 2 )
            fSettings.fMaxLayersAtOnce = 2;
        fSettings.fMaxAnisotropicSamples = 0;


    // There's a bug in NVidia drivers on Windows 2000 for GeForce1-4 (all flavors, including MX).
    // When the amount allocated into managed memory approaches the on board memory size, the performance
    // severely degrades, no matter how little is actually in use in the current rendering. So say all
    // our d3d textures are created into managed memory at age load. Also say you are
    // consistently viewing only 5Mb of managed materials (texture + vertex buffer). So as
    // you walk through the age, the new textures you see get loaded on demand into video memory.
    // Once you've seen enough to fill the on board memory, your frame rate starts falling and 
    // continues to fall as more textures get loaded. So either the memory manager is not letting
    // go of LRU textures, or fragmentation is so horrible as to make the manager useless.
    // So on these boards and with this OS, we keep track of how much managed memory we've seen,
    // and when it reaches a threshhold, we flush managed memory with an EvictManagedResources() call.
    // There's an unfortunate glitch, and then the frame rate is fine again.
    // So if we need this workaround, we set fManagedCutoff to 1 here, and then once we have our
    // D3D device, we query for the amount of memory and set the threshhold for flushing memory
    // based on that.
    memset(&osinfo, 0, sizeof(osinfo));
    osinfo.dwOSVersionInfoSize = sizeof(osinfo);
    if( (osinfo.dwMajorVersion == 5)
        &&(osinfo.dwMinorVersion == 0) )
        // It's the dreaded win2k
        if( devRec.GetCap(hsG3DDeviceSelector::kCapsDoubleFlush) )
            fManagedCutoff = 1;
        else if( devRec.GetCap(hsG3DDeviceSelector::kCapsSingleFlush) )
            fManagedCutoff = 1;

    //// Our temp debug flag to force z-buffering...
    if( !( fDbgSetupInitFlags & 0x00000001 ) )
        fSettings.fD3DCaps &= ~kCapsWBuffer;

    /// Set up the z-bias scale values, based on z- or w-buffering
    if( fSettings.fD3DCaps & kCapsWBuffer )
        fTweaks.fDefaultPerspLayerScale = kPerspLayerScaleW;
        fTweaks.fDefaultPerspLayerScale = kPerspLayerScale;

    // Less than 4 layers at once means we have to fallback on uv bumpmapping
    if( fSettings.fMaxLayersAtOnce < 4 )
        SetDebugFlag(plPipeDbg::kFlagBumpUV, true);

    if( ( fSettings.fD3DCaps & kCapsHWTransform ) && ( fCurrentMode->fDDBehavior == D3DCREATE_SOFTWARE_VERTEXPROCESSING ) )
        fSettings.fD3DCaps &= ~kCapsHWTransform;

    if( devRec.GetCap(hsG3DDeviceSelector::kCapsMaxUVWSrc2) )
        fSettings.fMaxUVWSrc = 2;

    /// Anisotropy stuff
    //if( devRec.GetMaxAnisotropicSamples() < fSettings.fMaxAnisotropicSamples )
    //  fSettings.fMaxAnisotropicSamples = devRec.GetMaxAnisotropicSamples();
    if( devRec.GetCap(hsG3DDeviceSelector::kCapsNoAniso) || (fSettings.fMaxAnisotropicSamples <= 1) )
        fSettings.fMaxAnisotropicSamples = 0;

//// Get/SetZBiasScale ////////////////////////////////////////////////////////
// If the board really doesn't support Z-biasing, we adjust the perspective matrix in IGetCameraToNDC
// The layer scale and translation are tailored to the current hardware.
hsScalar    plDXPipeline::GetZBiasScale() const
    return ( fTweaks.fPerspLayerScale / fTweaks.fDefaultPerspLayerScale ) - 1.0f;

void    plDXPipeline::SetZBiasScale( hsScalar scale )
    scale += 1.0f;
    fTweaks.fPerspLayerScale = fTweaks.fDefaultPerspLayerScale * scale;
    fTweaks.fPerspLayerTrans = kPerspLayerTrans * scale;

// Create all our video memory consuming D3D objects.
hsBool plDXPipeline::ICreateDynDeviceObjects()
    // Front/Back/Depth buffers
    if( ICreateNormalSurfaces() )
        return true;

    // RenderTarget pools are shared for our shadow generation algorithm.
    // Different sizes for different resolutions.

    // Create device-specific stuff
    fDebugTextMgr = TRACKED_NEW plDebugTextManager();
    if( fDebugTextMgr == nil )
        return true;

    // Vertex buffers, index buffers, textures, etc.

    return false;
//// ICreateDeviceObjects /////////////////////////////////////////////////////
//  Create all of our steady state D3D objects. More D3D objects will be created
// and destroyed as ages are loaded and unloaded, but these are the things that
// only go away when we lose the device.

hsBool  plDXPipeline::ICreateDeviceObjects()
    // The D3D device
    if( ICreateDevice(!fSettings.fFullscreen) )
        return true;

    // Most everything else D3D
    if( ICreateDynDeviceObjects() )
        return true;

    // PlateMgr is largely for debugging and performance stats,
    // but also gets used for some things like the cursor and 
    // linking fade to/from black.
    fPlateMgr = TRACKED_NEW plDXPlateManager( this, fD3DDevice );
    if( fPlateMgr == nil || !fPlateMgr->IsValid() )
        return true;

    // We've got everything created now, initialize to a known state.
    if( FAILED( fD3DDevice->Clear( 0, nil, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, fSettings.fClearColor, 1.0f, 0L ) ) )
        return true;

    // You may be wondering what this is. It's a workaround for a GeForce2 driver bug, where
    // clears to the Zbuffer (but not color) are getting partially ignored. Don't even ask.
    // So this is just to try and get the board used to the kind of foolishness it can expect
    // from here out.
    if( FAILED( fD3DDevice->Clear( 0, nil, D3DCLEAR_ZBUFFER, fSettings.fClearColor, 1.0f, 0L ) ) )
        return true;
    if( FAILED( fD3DDevice->Clear( 0, nil, D3DCLEAR_ZBUFFER, fSettings.fClearColor, 1.0f, 0L ) ) )
        return true;
    if( FAILED( fD3DDevice->Clear( 0, nil, D3DCLEAR_ZBUFFER, fSettings.fClearColor, 1.0f, 0L ) ) )
        return true;

    /// Log renderer
    fLogDrawer = TRACKED_NEW plStatusLogDrawer( this );
    plStatusLogMgr::GetInstance().SetDrawer( fLogDrawer );

    /// Ok, we're done now
    fBoundsSpans = TRACKED_NEW plDrawableSpans();
    hsgResMgr::ResMgr()->NewKey( "BoundsSpans", fBoundsSpans, plLocation::kGlobalFixedLoc );
    fBoundsSpans->SetNativeProperty( plDrawable::kPropVolatile, true );
    fBoundsMat = TRACKED_NEW hsGMaterial();
    hsgResMgr::ResMgr()->NewKey( "BoundsMaterial", fBoundsMat, plLocation::kGlobalFixedLoc );
    plLayer *lay = fBoundsMat->MakeBaseLayer();
    lay->SetMiscFlags( hsGMatState::kMiscWireFrame | hsGMatState::kMiscTwoSided );
    lay->SetShadeFlags( lay->GetShadeFlags() | hsGMatState::kShadeWhite );

    // Set up a ref to these. Since we don't have a key, we use the
    // generic RefObject() (and matching UnRefObject() when we're done).
    // If we had a key, we would use myKey->AddViaNotify(otherKey) and myKey->Release(otherKey).

    return false;

//// ISetCurrentDriver ////////////////////////////////////////////////////////
// Copy over the driver info.
void    plDXPipeline::ISetCurrentDriver( D3DEnum_DriverInfo *driv )
    if( fCurrentDriver != nil )
        delete fCurrentDriver;

    fCurrentDriver = TRACKED_NEW D3DEnum_DriverInfo;

    fCurrentDriver->fGuid = driv->fGuid;
    hsStrncpy( fCurrentDriver->fStrDesc, driv->fStrDesc, 40 );
    hsStrncpy( fCurrentDriver->fStrName, driv->fStrName, 40 );

    fCurrentDriver->fDesktopMode = driv->fDesktopMode;
    fCurrentDriver->fAdapterInfo = driv->fAdapterInfo;

    fCurrentDriver->fCurrentMode = nil;
    fCurrentDriver->fCurrentDevice = nil;

    /// Go looking for an adapter to match this one
    UINT    iAdapter;
    for( fCurrentAdapter = 0, iAdapter = 0; iAdapter < fD3DObject->GetAdapterCount(); iAdapter++ )
        D3DADAPTER_IDENTIFIER9      adapterInfo;
        fD3DObject->GetAdapterIdentifier( iAdapter, 0, &adapterInfo );

        if( adapterInfo.DeviceIdentifier == fCurrentDriver->fAdapterInfo.DeviceIdentifier )
            fCurrentAdapter = iAdapter;

//// ISetCurrentDevice ////////////////////////////////////////////////////////
// Copy over the device info.
void    plDXPipeline::ISetCurrentDevice( D3DEnum_DeviceInfo *dev )
    if( fCurrentDevice != nil )
        delete fCurrentDevice;
    fCurrentDevice = TRACKED_NEW D3DEnum_DeviceInfo;

    hsStrncpy( fCurrentDevice->fStrName, dev->fStrName, 40 );

    fCurrentDevice->fDDCaps = dev->fDDCaps;
    fCurrentDevice->fDDType = dev->fDDType;
    fCurrentDevice->fIsHardware = dev->fIsHardware;
    fCurrentDevice->fCanWindow = dev->fCanWindow;
//  fCurrentDevice->fCanAntialias = dev->fCanAntialias;
    fCurrentDevice->fCompatibleWithDesktop = dev->fCompatibleWithDesktop;

    // copy over supported device modes
    D3DEnum_ModeInfo currMode;

    for(int i = 0; i < dev->fModes.Count(); i++)
        // filter unusable modes
        if(dev->fModes[i].fDDmode.Width < MIN_WIDTH || dev->fModes[i].fDDmode.Height < MIN_HEIGHT)

        currMode.fBitDepth = dev->fModes[i].fBitDepth;
        currMode.fCanRenderToCubic = dev->fModes[i].fCanRenderToCubic;
        currMode.fDDBehavior = dev->fModes[i].fDDBehavior;
        currMode.fDepthFormats = dev->fModes[i].fDepthFormats;
        currMode.fFSAATypes = dev->fModes[i].fFSAATypes;
        memcpy(&currMode.fDDmode, &dev->fModes[i].fDDmode, sizeof(D3DDISPLAYMODE));
        strcpy(currMode.fStrDesc, dev->fModes[i].fStrDesc);
        currMode.fWindowed = dev->fModes[i].fWindowed;


//// ISetCurrentMode //////////////////////////////////////////////////////////
// Copy over the mode info.
void    plDXPipeline::ISetCurrentMode( D3DEnum_ModeInfo *mode )
    if( fCurrentMode != nil )
        delete fCurrentMode;
    fCurrentMode = TRACKED_NEW D3DEnum_ModeInfo;

    *fCurrentMode = *mode;

//// IFindCompressedFormats ///////////////////////////////////////////////////
//  New DX Way: Check to see if each format is valid.

hsBool  plDXPipeline::IFindCompressedFormats()
    D3DFORMAT   toCheckFor[] = {D3DFMT_DXT1, 
                                D3DFMT_UNKNOWN };
    short       i = 0;

    for( i = 0; toCheckFor[ i ] != D3DFMT_UNKNOWN; i++ )
        if( FAILED( fD3DObject->CheckDeviceFormat( fCurrentAdapter, fCurrentDevice->fDDType,
                                                    0, D3DRTYPE_TEXTURE, toCheckFor[ i ] ) ) )
            return false;

    /// Got here, must have found them all
    return true;

//// IFindLuminanceFormats ////////////////////////////////////////////////////
//  New DX Way: Check to see if each format we want is valid

hsBool  plDXPipeline::IFindLuminanceFormats()
    D3DFORMAT   toCheckFor[] = { D3DFMT_L8, D3DFMT_A8L8, D3DFMT_UNKNOWN };
    short       i = 0;

    for( i = 0; toCheckFor[ i ] != D3DFMT_UNKNOWN; i++ )
        if( FAILED( fD3DObject->CheckDeviceFormat( fCurrentAdapter, fCurrentDevice->fDDType,
                                                    0, D3DRTYPE_TEXTURE, toCheckFor[ i ] ) ) )
            return false;

    /// Got here, must have found them all
    return true;

//// ITextureFormatAllowed ////////////////////////////////////////////////////
//  Returns true if the given format is supported on the current device and
//  mode, false if it isn't.

hsBool      plDXPipeline::ITextureFormatAllowed( D3DFORMAT format )
    if( FAILED( fD3DObject->CheckDeviceFormat( fCurrentAdapter, fCurrentDevice->fDDType,
                                                0, D3DRTYPE_TEXTURE, format ) ) )
        return false;

    return true;

//// SetDebugFlag /////////////////////////////////////////////////////////////
// Debug flags should never be employed to do a game effect, although they can
// be useful for developing effects. Mostly they help in diagnosing problems
// in rendering or performance.
void        plDXPipeline::SetDebugFlag( UInt32 flag, hsBool on )
    fDebugFlags.SetBit(flag, on);

    if (flag == plPipeDbg::kFlagColorizeMipmaps)
        // Force textures to reload
        plDXTextureRef      *ref = fTextureRefList;
        while( ref != nil )
            ref->SetDirty( true );
            ref = ref->GetNext();

        // Reset mipmap filtering state (usually is LINEAR, but we set it to POINT for coloring)
        int i;
        for( i = 0; i < 8; i++ )
            fD3DDevice->SetSamplerState( i, D3DSAMP_MIPFILTER, on ? D3DTEXF_POINT : D3DTEXF_LINEAR );

    if (flag == plPipeDbg::kFlagNoAnisotropy)

hsBool plDXPipeline::IsDebugFlagSet( UInt32 flag ) const
    return fDebugFlags.IsBitSet(flag);

//// Device Creation //////////////////////////////////////////////////////////

//// ICreateMaster ////////////////////////////////////////////////////////////
//  Creates the master Direct3D objects. I guess just in case you want
//  multiple Direct3D devices.... :~

hsBool plDXPipeline::ICreateMaster()
    hsAssert( !fD3DObject, "ICreateMaster() should only be called for Master Direct3DDevice" );

    /// The new DirectX Way: Create a Direct3D object, out of which everything else springs
    if( hsGDDrawDllLoad::GetD3DDll() == nil )
        return ICreateFail( "Cannot load Direct3D driver!" );

    Direct3DCreateProc      procPtr;
    procPtr = (Direct3DCreateProc)GetProcAddress( hsGDDrawDllLoad::GetD3DDll(), "Direct3DCreate9" );
    if( procPtr == nil )
        return ICreateFail( "Cannot load D3D Create Proc!" );

    // Create a D3D object to use
    fD3DObject = procPtr( D3D_SDK_VERSION );

    if( fD3DObject == nil )
        return ICreateFail( "Cannot create Direct3D object" );

    return false;

//// ICreateDevice ////////////////////////////////////////////////////
//  Creates the device. Surfaces, buffers, etc. created separately (in case of lost device).
// See ICreateDeviceObjects.

hsBool plDXPipeline::ICreateDevice(hsBool windowed)
    /// First, create the D3D Device object
    D3DPRESENT_PARAMETERS       params;
    D3DDISPLAYMODE              dispMode;
    int                         i;
    char                        msg[ 256 ];

    INIT_ERROR_CHECK( fD3DObject->GetAdapterDisplayMode( fCurrentAdapter, &dispMode ),
        "Cannot get desktop display mode" );

    // save desktop properties
    fDesktopParams.Width = dispMode.Width;
    fDesktopParams.Height = dispMode.Height;
    fDesktopParams.ColorDepth = GetDXBitDepth( dispMode.Format );

    if( windowed )
        // Reset fColor, since we're getting the desktop bitdepth
        fSettings.fColorDepth = GetDXBitDepth( dispMode.Format );
        if(fSettings.fOrigWidth > fDesktopParams.Width || fSettings.fOrigHeight > fDesktopParams.Height)
            fSettings.fOrigWidth = fDesktopParams.Width;
            fSettings.fOrigHeight = fDesktopParams.Height;
            IGetViewTransform().SetScreenSize(fDesktopParams.Width, fDesktopParams.Height);

    memset( &params, 0, sizeof( params ) );
    params.Windowed = ( windowed ? TRUE : FALSE );
    params.BackBufferCount = 1;
    params.BackBufferWidth = GetViewTransform().GetScreenWidth();
    params.BackBufferHeight = GetViewTransform().GetScreenHeight();
    params.EnableAutoDepthStencil = TRUE;

    // NOTE: This was changed 5.29.2001 mcn to avoid the nasty flashing bug on nVidia's 12.60 beta drivers
// SWAPEFFECT must be _DISCARD when using antialiasing, so we'll just go with _DISCARD for the time being. mf
    params.SwapEffect = D3DSWAPEFFECT_DISCARD;
    params.FullScreen_RefreshRateInHz = ( windowed ? 0 : D3DPRESENT_RATE_DEFAULT );
        params.PresentationInterval = D3DPRESENT_INTERVAL_DEFAULT;
        params.PresentationInterval = ( fVSync ? D3DPRESENT_INTERVAL_DEFAULT : D3DPRESENT_INTERVAL_IMMEDIATE );

    for( i = 0; i < fCurrentMode->fDepthFormats.GetCount(); i++ )
        sprintf( msg, "-- Valid depth buffer format: %s", IGetDXFormatName( fCurrentMode->fDepthFormats[ i ] ) );
        hsDebugMessage( msg, 0 );

    // Attempt to find the closest AA setting we can
    params.MultiSampleType = D3DMULTISAMPLE_NONE;
    for( i = fSettings.fNumAASamples; i >= 2; i-- )
        if( fCurrentMode->fFSAATypes.Find( (D3DMULTISAMPLE_TYPE)i ) != fCurrentMode->fFSAATypes.kMissingIndex )
            params.MultiSampleType = (D3DMULTISAMPLE_TYPE)i;

    if( !IFindDepthFormat(params) )
        // If we haven't found a depth format, turn off multisampling and try it again.
        params.MultiSampleType = D3DMULTISAMPLE_NONE;
        if( !IFindDepthFormat(params) )
            // Okay, we're screwed here, we might as well bail.
            return ICreateFail( "Can't find a Depth Buffer format" );

    /// TEMP HACK--if we're running 16-bit z-buffer or below, use our z-bias (go figure, it works better
    /// in 16-bit, worse in 24 and 32)
    if( params.AutoDepthStencilFormat == D3DFMT_D15S1 || 
        params.AutoDepthStencilFormat == D3DFMT_D16 ||
        params.AutoDepthStencilFormat == D3DFMT_D16_LOCKABLE )
        fSettings.fD3DCaps &= ~kCapsZBias;

    sprintf( msg, "-- Requesting depth buffer format: %s", IGetDXFormatName( params.AutoDepthStencilFormat ) );
    hsDebugMessage( msg, 0 );

    params.BackBufferFormat = ( windowed ? dispMode.Format : fCurrentMode->fDDmode.Format );
    sprintf( msg, "-- Requesting back buffer format: %s", IGetDXFormatName( params.BackBufferFormat ) );
    hsDebugMessage( msg, 0 );

    params.hDeviceWindow = fSettings.fHWnd;

    // Enable this to switch to a pure device. 
//  fCurrentMode->fDDBehavior |= D3DCREATE_PUREDEVICE;

    UINT adapter;
    for (adapter = 0; adapter < fD3DObject->GetAdapterCount(); adapter++)
        fD3DObject->GetAdapterIdentifier(adapter, 0, &id);

        // We should be matching against "NVIDIA NVPerfHUD", but the space
        // in the description seems to be bogus. This seems to be a fair
        // alternative
        if (strstr(id.Description, "NVPerfHUD"))
            // This won't actually use the REF device, but we ask for
            // it as part of the handshake to let NVPerfHUD know we give
            // it permission to analyze us.
            fCurrentAdapter = adapter;
            fCurrentDevice->fDDType= D3DDEVTYPE_REF;
            SetDebugFlag(plPipeDbg::kFlagNVPerfHUD, true);

    INIT_ERROR_CHECK( fD3DObject->CreateDevice( fCurrentAdapter, fCurrentDevice->fDDType, 
                                              fSettings.fHWnd, fCurrentMode->fDDBehavior,
                                              &params, &fD3DDevice ),
                        "Cannot create primary display surface via CreateDevice()" );

    fSettings.fPresentParams = params;

    // This bit matches up with the fManagedCutoff workaround for a problem
    // with the NVidia drivers on win2k. Search for "GetVersionEx" in IRestrictCaps
    // for more info.
    UInt32 mem = fD3DDevice->GetAvailableTextureMem();
    plProfile_IncCount(TexTot, mem);

    const UInt32 kSingleFlush(40000000);
    const UInt32 kDoubleFlush(24000000);
    if( fManagedCutoff )
        if( mem < 64000000 )
            fManagedCutoff = kDoubleFlush;
            fManagedCutoff = kSingleFlush;

    return false;

// IFindDepthFormat //////////////////////////////////////////////////////////////
// Look through available depth formats for the closest to what we want that
// will work.
hsBool plDXPipeline::IFindDepthFormat(D3DPRESENT_PARAMETERS& params)
    // Okay, we're not using the stencil buffer right now, and it's bringing out
    // some painful driver bugs on the GeForce2. So rather than go out of our way
    // looking for trouble, we're going to look for a depth buffer with NO STENCIL.
    int i;
    for( i = fCurrentMode->fDepthFormats.GetCount() - 1; i >= 0; i-- )
        D3DFORMAT fmt = fCurrentMode->fDepthFormats[ i ];
        if( (fmt == D3DFMT_D32)
            ||(fmt == D3DFMT_D24X8)
            ||(fmt == D3DFMT_D16) )
            HRESULT hr = fD3DObject->CheckDeviceMultiSampleType(fCurrentAdapter, 
                                                                fCurrentMode->fWindowed ? TRUE : FALSE,
                                                                params.MultiSampleType, NULL);
            if( !FAILED(hr) )
                params.AutoDepthStencilFormat = fmt;
                fStencil.fDepth = 0;
    if( i < 0 )
        for( i = fCurrentMode->fDepthFormats.GetCount() - 1; i >= 0; i-- )
            D3DFORMAT fmt = fCurrentMode->fDepthFormats[ i ];
            if( fmt == D3DFMT_D15S1 || fmt == D3DFMT_D24X4S4 || fmt == D3DFMT_D24S8 )
                HRESULT hr = fD3DObject->CheckDeviceMultiSampleType(fCurrentAdapter, 
                                                                    fCurrentMode->fWindowed ? TRUE : FALSE,
                                                                    params.MultiSampleType, NULL);
                if( !FAILED(hr) )
                    params.AutoDepthStencilFormat = fmt;
                    if( fmt == D3DFMT_D15S1 )
                        fStencil.fDepth = 1;
                    else if( fmt == D3DFMT_D24X4S4 )
                        fStencil.fDepth = 4;
                        fStencil.fDepth = 8;
    return i >= 0;

// ICreateNormalSurfaces //////////////////////////////////////////////////////
// Create the primary color and depth buffers.
hsBool plDXPipeline::ICreateNormalSurfaces()
    /// Now get the backbuffer surface pointer
    INIT_ERROR_CHECK( fD3DDevice->GetBackBuffer( 0, 0, D3DBACKBUFFER_TYPE_MONO, &fD3DBackBuff ), 
                        "Cannot get primary surface's back buffer" );

    /// And finally, get the main D3D surfaces (for restoring after rendertargets )
    INIT_ERROR_CHECK( fD3DDevice->GetRenderTarget( 0, &fD3DMainSurface ), "Cannot capture primary surface" );
    INIT_ERROR_CHECK( fD3DDevice->GetDepthStencilSurface( &fD3DDepthSurface ), "Cannot capture primary depth surface" );

    fSettings.fCurrD3DMainSurface = fD3DMainSurface;
    fSettings.fCurrD3DDepthSurface = fD3DDepthSurface;

    D3DSURF_MEMNEW( fD3DMainSurface );
    D3DSURF_MEMNEW( fD3DDepthSurface );
    D3DSURF_MEMNEW( fD3DBackBuff );

    D3DSURFACE_DESC info; 
    fD3DMainSurface->GetDesc( &info );
    fD3DDepthSurface->GetDesc( &info );
    fD3DBackBuff->GetDesc( &info );

    return false;

// IReleaseRenderTargetPools //////////////////////////////////////////////////
// Free up all resources assosiated with our pools of rendertargets of varying
// sizes. Primary user of these pools is the shadow generation.
void plDXPipeline::IReleaseRenderTargetPools()
    int i;

    for( i = 0; i < fRenderTargetPool512.GetCount(); i++ )
        delete fRenderTargetPool512[i];
        fRenderTargetPool512[i] = nil;

    for( i = 0; i < fRenderTargetPool256.GetCount(); i++ )
        delete fRenderTargetPool256[i];
        fRenderTargetPool256[i] = nil;

    for( i = 0; i < fRenderTargetPool128.GetCount(); i++ )
        delete fRenderTargetPool128[i];
        fRenderTargetPool128[i] = nil;

    for( i = 0; i < fRenderTargetPool64.GetCount(); i++ )
        delete fRenderTargetPool64[i];
        fRenderTargetPool64[i] = nil;

    for( i = 0; i < fRenderTargetPool32.GetCount(); i++ )
        delete fRenderTargetPool32[i];
        fRenderTargetPool32[i] = nil;

    for( i = 0; i < kMaxRenderTargetNext; i++ )
        fRenderTargetNext[i] = 0;
        fBlurScratchRTs[i] = nil;
        fBlurDestRTs[i] = nil;


// IReleaseDynDeviceObjects //////////////////////////////////////////////
// Make sure we aren't holding on to anything, and release all of
// the D3D resources that we normally hang on to forever. Meaning things
// that persist through unloading one age and loading the next.
void plDXPipeline::IReleaseDynDeviceObjects()
    // We should do this earlier, but the textFont objects don't remove
    // themselves from their parent objects yet
    delete fDebugTextMgr;
    fDebugTextMgr = nil;

    if( fD3DDevice )
        fD3DDevice->SetStreamSource(0, nil, 0, 0);

    /// Delete actual d3d objects
    hsRefCnt_SafeUnRef( fSettings.fCurrVertexBuffRef );
    fSettings.fCurrVertexBuffRef = nil;
    hsRefCnt_SafeUnRef( fSettings.fCurrIndexBuffRef );
    fSettings.fCurrIndexBuffRef = nil;

    while( fTextFontRefList )
        delete fTextFontRefList;

    while( fRenderTargetRefList )
        plDXRenderTargetRef* rtRef = fRenderTargetRefList;

    // The shared dynamic vertex buffers used by things like objects skinned on CPU, or
    // particle systems.

    if( fSharedDepthSurface[0] )
        fSharedDepthFormat[0] = D3DFMT_UNKNOWN;
    if( fSharedDepthSurface[1] )
        fSharedDepthFormat[1] = D3DFMT_UNKNOWN;

    D3DSURF_MEMDEL( fD3DMainSurface );
    D3DSURF_MEMDEL( fD3DDepthSurface );
    D3DSURF_MEMDEL( fD3DBackBuff );

    ReleaseObject( fD3DBackBuff );
    ReleaseObject( fD3DDepthSurface );
    ReleaseObject( fD3DMainSurface );


// IReleaseShaders ///////////////////////////////////////////////////////////////
// Delete our vertex and pixel shaders. Releasing the plasma ref will release the
// D3D handle. 
void plDXPipeline::IReleaseShaders()
    while( fVShaderRefList )
        plDXVertexShader* ref = fVShaderRefList;

    while( fPShaderRefList )
        plDXPixelShader* ref = fPShaderRefList;

//// IReleaseDeviceObjects ///////////////////////////////////////////////////////
// Release everything we've created. This is the main cleanup function.
void    plDXPipeline::IReleaseDeviceObjects()
    plDXDeviceRef   *ref;

    /// Delete d3d-dependent objects
    if( fBoundsSpans )
    fBoundsSpans = nil;
    if( fBoundsMat )
    fBoundsMat = nil;

    plStatusLogMgr::GetInstance().SetDrawer( nil );
    delete fLogDrawer;
    fLogDrawer = nil;

    IGetPixelScratch( 0 );  

    int i;
    for( i = 0; i < 8; i++ )
        if( fLayerRef[i] )
            fLayerRef[i] = nil;


    if( fULutTextureRef )
        delete [] fULutTextureRef->fData;
    fULutTextureRef = nil;

    while( fVtxBuffRefList )
        ref = fVtxBuffRefList;
    while( fIdxBuffRefList )
        ref = fIdxBuffRefList;
    while( fTextureRefList )
        ref = fTextureRefList;




    delete fPlateMgr;
    fPlateMgr = nil;

    if( fD3DDevice != nil )
        LONG ret;
        while( ret = fD3DDevice->Release() )
            hsStatusMessageF("%d - Error releasing device", ret);
        fD3DDevice = nil;

    if( fD3DObject != nil )
        LONG ret;
        while( ret = fD3DObject->Release() )
            hsStatusMessageF("%d - Error releasing Direct3D Object", ret);
        fD3DObject = nil;

    fManagedAlloced = false;
    fAllocUnManaged = false;

// IReleaseDynamicBuffers /////////////////////////////////////////////////
// Release everything we've created in POOL_DEFAULT.
// This is called on shutdown or when we lose the device. Search for D3DERR_DEVICELOST.
void plDXPipeline::IReleaseDynamicBuffers()
    // Workaround for ATI driver bug.
    if( fSettings.fBadManaged )
        plDXTextureRef* tRef = fTextureRefList;
        while( tRef )
            tRef = tRef->GetNext();
    plDXVertexBufferRef* vbRef = fVtxBuffRefList;
    while( vbRef )
        if( vbRef->Volatile() && vbRef->fD3DBuffer )
            vbRef->fD3DBuffer = nil;

            // Actually, if it's volatile, it's sharing the global dynamic vertex buff, so we're already
            // accounting for the memory when we clear the global buffer.
            //PROFILE_POOL_MEM(D3DPOOL_DEFAULT, vbRef->fCount * vbRef->fVertexSize, false, "VtxBuff");
        // 9600 THRASH
        else if( fSettings.fBadManaged )
        vbRef = vbRef->GetNext();
    plDXIndexBufferRef* iRef = fIdxBuffRefList;
    while( iRef )
        // If it's volatile, we have to release it.
        // If it's not, we want to release it so
        // we can make it volatile (D3DPOOL_DEFAULT)
        if (iRef->fD3DBuffer)
            iRef->fD3DBuffer = nil;
            PROFILE_POOL_MEM(iRef->fPoolType, iRef->fCount * sizeof(UInt16), false, "IndexBuff");
        iRef = iRef->GetNext();
    if (fDynVtxBuff)
        PROFILE_POOL_MEM(D3DPOOL_DEFAULT, fDynVtxSize, false, "DynVtxBuff");
        fDynVtxBuff = nil;

    fNextDynVtx = 0;


    // PlateMgr has a POOL_DEFAULT vertex buffer for drawing quads.
    if( fPlateMgr )

    // Also has POOL_DEFAULT vertex buffer.


// ICreateDynamicBuffers /////////////////////////////////////////////////////
// Create the things we need in POOL_DEFAULT. We clump them into this function,
// because they must be created before anything in POOL_MANAGED.
// So we create these global POOL_DEFAULT objects here, then send out a message
// to the objects in the scene to create anything they need in POOL_DEFAULT,
// then go on to create things on POOL_MANAGED.
// Set LoadResources().
void plDXPipeline::ICreateDynamicBuffers()


    if( fPlateMgr )

    fNextDynVtx = 0;


    hsAssert(!fManagedAlloced, "Alloc default with managed alloc'd");
    if( fDynVtxSize )
        PROFILE_POOL_MEM(poolType, fDynVtxSize, true, "DynVtxBuff");
        if( FAILED( fD3DDevice->CreateVertexBuffer( fDynVtxSize,
                                                    &fDynVtxBuff, NULL) ) )
            hsAssert(false, "Don't know what to do here.");

void plDXPipeline::IPrintDeviceInitError()
    char str[256];
    char err[16];
        case plLocalization::kFrench:   strcpy(err, "Erreur"); strcpy(str, "Erreur d'initialisation de votre carte graphique. Les valeurs par d�faut de ses param�tres ont �t� r�tablis. ");    break;
        case plLocalization::kGerman:   strcpy(err, "Fehler");  strcpy(str, "Bei der Initialisierung Ihrer Grafikkarte ist ein Fehler aufgetreten. Standardeinstellungen werden wiederhergestellt."); break;
        case plLocalization::kSpanish:  strcpy(err, "Error"); strcpy(str, "Ocurri� un error al inicializar tu tarjeta de v�deo. Hemos restaurado los ajustes por defecto. "); break;
        case plLocalization::kItalian:  strcpy(err, "Errore");  strcpy(str, "Errore di inizializzazione della scheda video. Sono state ripristinate le impostazioni predefinite."); break;
        default:                        strcpy(err, "Error"); strcpy(str, "There was an error initializing your video card. We have reset it to its Default settings."); break;
    hsMessageBox(str, err, hsMessageBoxNormal, hsMessageBoxIconError);

// Reset device creation parameters to default and write to ini file
void plDXPipeline::IResetToDefaults(D3DPRESENT_PARAMETERS *params)
    // this will reset device parameters to default and make sure all other necessary parameters are updated
    params->BackBufferWidth = fDefaultPipeParams.Width;
    params->BackBufferHeight = fDefaultPipeParams.Height;
    fSettings.fOrigWidth = fDefaultPipeParams.Width;
    fSettings.fOrigHeight = fDefaultPipeParams.Height;
    IGetViewTransform().SetScreenSize(fDefaultPipeParams.Width, fDefaultPipeParams.Height);
    params->BackBufferFormat = D3DFMT_X8R8G8B8;
    fSettings.fColorDepth = fDefaultPipeParams.ColorDepth;

    int i;
    hsTArray<D3DEnum_ModeInfo> *modes = &fCurrentDevice->fModes;
    for( i = 0; i < modes->Count(); i++ )
        D3DEnum_ModeInfo *mode = &(*modes)[i];
        if(mode->fDDmode.Width == params->BackBufferWidth &&
            mode->fDDmode.Height == params->BackBufferHeight &&
            mode->fBitDepth == 32 )
    params->Windowed = fDefaultPipeParams.Windowed;
    fSettings.fFullscreen = !fDefaultPipeParams.Windowed;
    fCurrentMode->fWindowed = fDefaultPipeParams.Windowed;

     // Attempt to find the closest AA setting we can
    params->MultiSampleType = D3DMULTISAMPLE_NONE;
    fSettings.fNumAASamples = 0;
    for( int i = fDefaultPipeParams.AntiAliasingAmount; i >= 2; i-- )
        if( fCurrentMode->fFSAATypes.Find( (D3DMULTISAMPLE_TYPE)i ) != fCurrentMode->fFSAATypes.kMissingIndex )
            fSettings.fNumAASamples = i;
            params->MultiSampleType = (D3DMULTISAMPLE_TYPE)i;
    fSettings.fMaxAnisotropicSamples = fDefaultPipeParams.AnisotropicLevel;

    fVSync = false;

    params->PresentationInterval = D3DPRESENT_INTERVAL_IMMEDIATE;

    plShadowCaster::EnableShadowCast(fDefaultPipeParams.Shadows ? true : false);
    plDynamicCamMap::SetEnabled(fDefaultPipeParams.PlanarReflections ? true : false);
    plBitmap::SetGlobalLevelChopCount(2 - fDefaultPipeParams.TextureQuality);

    // adjust camera properties
    plVirtualCam1::SetAspectRatio((float)fSettings.fOrigWidth / (float)fSettings.fOrigHeight);
    plVirtualCam1::SetFOV(plVirtualCam1::GetFOVw(), plVirtualCam1::GetFOVh());
    // fire off a message to the client so we can write defaults to the ini file, and adjust the window size
    plKey clientKey = hsgResMgr::ResMgr()->FindKey( kClient_KEY );
    plClientMsg* clientMsg = TRACKED_NEW plClientMsg(plClientMsg::kSetGraphicsDefaults);


// IResetDevice
// reset the device to its operational state.
// returns true if not ready yet, false if the reset was successful.
// All this is generally in response to a fullscreen alt-tab.
hsBool plDXPipeline::IResetDevice()
    hsBool fakeDevLost(false);
    if( fakeDevLost )
        fDeviceLost = true;

    if( fDeviceLost )

        HRESULT coopLev = fD3DDevice->TestCooperativeLevel();
        if( coopLev == D3DERR_DEVICELOST )
            // Nothing to do yet.
            return true;
        if( fakeDevLost )
            coopLev = D3DERR_DEVICENOTRESET;
        if( coopLev == D3DERR_DEVICENOTRESET || fForceDeviceReset)
            plStatusLog::AddLineS("pipeline.log", 0xffff0000, "Resetting Device");
            if( !IFindDepthFormat(fSettings.fPresentParams) )
                // If we haven't found a depth format, turn off multisampling and try it again.
                fSettings.fPresentParams.MultiSampleType = D3DMULTISAMPLE_NONE;
            HRESULT hr = fD3DDevice->Reset(&fSettings.fPresentParams);
            int count = 0;
            while( FAILED(hr) )
                if(count++ == 25)
                // Still not ready? This is bad.
                // Until we called Reset(), we could make any D3D call we wanted,
                // and it would turn into a no-op. But once we call Reset(), until
                // the device really is reset, anything but TestCoop/Reset/Release
                // has just become illegal. We've already released everything, Reset
                // just failed, not much to do but wait and try again.
                hr = fD3DDevice->Reset(&fSettings.fPresentParams);
            fSettings.fCurrFVFFormat = 0;
            fSettings.fCurrVertexShader = NULL;
            fManagedAlloced = false;

            /// Broadcast a message letting everyone know that we were recreated and that
            /// all device-specific stuff needs to be recreated
            plDeviceRecreateMsg* clean = TRACKED_NEW plDeviceRecreateMsg();

        fDevWasLost = true;
        fDeviceLost = false;

        // We return true here, even though we've successfully recreated, to take
        // another spin around the update loop and give everyone a chance to
        // get back in sync.
        return true;
    return false;

void plDXPipeline::ResetDisplayDevice(int Width, int Height, int ColorDepth, hsBool Windowed, int NumAASamples, int MaxAnisotropicSamples, hsBool VSync /* = false */)
    if( fSettings.fPresentParams.BackBufferWidth == Width &&
        fSettings.fPresentParams.BackBufferHeight == Height &&
        (fSettings.fPresentParams.Windowed ? 1 : fSettings.fColorDepth == ColorDepth) && // if we're windowed dont check color depth we just use the desktop colordepth
        ((fSettings.fPresentParams.Windowed && Windowed)  || (!fSettings.fPresentParams.Windowed && !Windowed)) &&
        fSettings.fNumAASamples == NumAASamples &&
        fSettings.fMaxAnisotropicSamples == MaxAnisotropicSamples &&
        fVSync == VSync 
        return;     // nothing has changed

    fVSync = VSync;
    int i = 0;
    hsTArray<D3DEnum_ModeInfo> *modes = &fCurrentDevice->fModes;
    // check for supported resolution if we're not going to windowed mode
        for( i = 0; i < modes->Count(); i++ )
            D3DEnum_ModeInfo *mode = &(*modes)[i];
            if(mode->fDDmode.Width == Width &&
                mode->fDDmode.Height == Height &&
                mode->fBitDepth == ColorDepth )
    if(i != modes->Count())
        // Set Resolution
        fSettings.fOrigWidth = Width;
        fSettings.fOrigHeight = Height;
        IGetViewTransform().SetScreenSize(Width, Height);
        fSettings.fPresentParams.BackBufferWidth = Width;
        fSettings.fPresentParams.BackBufferHeight = Height;
        fSettings.fColorDepth = ColorDepth;
        fSettings.fPresentParams.BackBufferFormat = D3DFMT_X8R8G8B8;

    // set windowed/fullscreen mode
    fCurrentMode->fWindowed = Windowed;
    fSettings.fPresentParams.Windowed = Windowed;
    fSettings.fFullscreen = !Windowed;

    // set Antialiasing
    fSettings.fNumAASamples = 0;
    // Attempt to find the closest AA setting we can
    fSettings.fPresentParams.MultiSampleType = D3DMULTISAMPLE_NONE;
    for( i = NumAASamples; i >= 2; i-- )
        if( fCurrentMode->fFSAATypes.Find( (D3DMULTISAMPLE_TYPE)i ) != fCurrentMode->fFSAATypes.kMissingIndex )
            fSettings.fNumAASamples = i;
            fSettings.fPresentParams.MultiSampleType = (D3DMULTISAMPLE_TYPE)i;
    if( fSettings.fNumAASamples > 0 )
        fSettings.fD3DCaps |= kCapsFSAntiAlias;
        fSettings.fD3DCaps &= ~kCapsFSAntiAlias;

    // Set Anisotropic filtering
    fSettings.fMaxAnisotropicSamples = MaxAnisotropicSamples;
    ISetAnisotropy(MaxAnisotropicSamples > 0);
        fSettings.fPresentParams.PresentationInterval = D3DPRESENT_INTERVAL_DEFAULT;
        fSettings.fPresentParams.PresentationInterval = ( fVSync ? D3DPRESENT_INTERVAL_DEFAULT : D3DPRESENT_INTERVAL_IMMEDIATE );

    // Force a device reset
    fDeviceLost = true;
    fForceDeviceReset = true;
    plVirtualCam1::SetAspectRatio((float)Width / (float)Height);
    plVirtualCam1::SetFOV(plVirtualCam1::GetFOVw(), plVirtualCam1::GetFOVh());


void plDXPipeline::GetSupportedColorDepths(hsTArray<int> &ColorDepths)
    int i, j;
    // iterate through display modes
    for( i = 0; i < fCurrentDevice->fModes.Count(); i++ )
        // Check to see if color depth has been added already
        for( j = 0; j < ColorDepths.Count(); j++ )
            if( fCurrentDevice->fModes[i].fBitDepth == ColorDepths[i] )
        if(j == ColorDepths.Count())
            //add it
            ColorDepths.Push( fCurrentDevice->fModes[i].fBitDepth );

void plDXPipeline::GetSupportedDisplayModes(std::vector<plDisplayMode> *res, int ColorDepth  )
    int i, j;
    std::vector<plDisplayMode> supported;
    // loop through display modes
    for( i = 0; i < fCurrentDevice->fModes.Count(); i++ )
        if( fCurrentDevice->fModes[i].fBitDepth == ColorDepth )
            // check for duplicate mode
            for( j = 0; j < supported.size(); j++ )
                if(supported[j].Width == fCurrentDevice->fModes[i].fDDmode.Width && supported[j].Height == fCurrentDevice->fModes[i].fDDmode.Height)
            if(j == supported.size())
                // new mode, add it
                plDisplayMode mode;
                mode.Width = fCurrentDevice->fModes[i].fDDmode.Width;
                mode.Height = fCurrentDevice->fModes[i].fDDmode.Height;
                mode.ColorDepth = ColorDepth;

    *res = supported;

// Get max anitialias for the specified displaymode
int plDXPipeline::GetMaxAntiAlias(int Width, int Height, int ColorDepth)
    int max = 0;
    D3DEnum_ModeInfo *pCurrMode = nil;
    hsTArray<D3DEnum_ModeInfo> *modes = &fCurrentDevice->fModes;
    for(int i = 0; i < modes->Count(); i++ )
        D3DEnum_ModeInfo *mode = &(*modes)[i];
        if( mode->fDDmode.Width == Width &&
            mode->fDDmode.Height == Height &&
            mode->fBitDepth == ColorDepth )
            pCurrMode = mode;
        for(int i = 0; i < pCurrMode->fFSAATypes.Count(); i++)
            if(pCurrMode->fFSAATypes[i] > max)
                max = pCurrMode->fFSAATypes[i];
    return max;

int plDXPipeline::GetMaxAnisotropicSamples()
    return fCurrentDevice ? fCurrentDevice->fDDCaps.MaxAnisotropy : 0;

//// Resize ///////////////////////////////////////////////////////////////////
// Resize is fairly obsolete, having been replaced by IResetDevice, which is
// automatically called if needed on BeginRender.
// This Resize function used to serve as both to Resize the primary buffers and
// to restore after losing the device (alt-tab). It didn't actually do either
// very well, so I'm not sure why I haven't deleted it.
void    plDXPipeline::Resize( UInt32 width, UInt32 height )
    hsMatrix44  w2c, c2w, proj;

    HRESULT coopLev = fD3DDevice->TestCooperativeLevel();
    if( coopLev == D3DERR_DEVICELOST )
        /// Direct3D is reporting that we lost the device but are unable to reset
        /// it yet, so ignore.
        hsStatusMessage( "Received Resize() request at an invalid time. Ignoring...\n" );
    if( !width && !height )
        if( D3D_OK == coopLev )

        HRESULT hr = fD3DDevice->Reset(&fSettings.fPresentParams);
        fManagedAlloced = false;
        if( !FAILED(hr) )

    // Store some states that we *want* to restore back...
    plViewTransform resetTransform = GetViewTransform();

    /// HACK: Don't recreate if we're windowed, bad things happen
    /// Comment out this if if you want to test the crashing thing in windowed alt-tabbing
#if 0
    if( ( width == 0 || height == 0 ) && !fSettings.fFullscreen )

    // Destroy old

    // Reset width and height
    if( width != 0 && height != 0 )
        // Width and height of zero mean just recreate
        fSettings.fOrigWidth = width;
        fSettings.fOrigHeight = height;
        IGetViewTransform().SetScreenSize((UInt16)(fSettings.fOrigWidth), (UInt16)(fSettings.fOrigHeight));
        resetTransform.SetScreenSize((UInt16)(fSettings.fOrigWidth), (UInt16)(fSettings.fOrigHeight));
        // Just for debug
        hsStatusMessage( "Recreating the pipeline...\n" );

    // Recreate
    if( !fD3DObject )
        if( ICreateMaster() )
            IShowErrorMessage( "Cannot create D3D master object" );

    // Go recreate surfaces and DX-dependent objects
    if( ICreateDeviceObjects() )
        IShowErrorMessage( "Cannot create Direct3D device" );

    // Restore states

    /// Broadcast a message letting everyone know that we were recreated and that
    /// all device-specific stuff needs to be recreated
    plDeviceRecreateMsg* clean = TRACKED_NEW plDeviceRecreateMsg();

//// Debug Text ///////////////////////////////////////////////////////////////

//// MakeTextFont /////////////////////////////////////////////////////////////

plTextFont  *plDXPipeline::MakeTextFont( char *face, UInt16 size )
    plTextFont  *font;

    font = TRACKED_NEW plDXTextFont( this, fD3DDevice );
    if( font == nil )
        return nil;
    font->Create( face, size );
    font->Link( &fTextFontRefList );

    return font;

//// Drawable Stuff ///////////////////////////////////////////////////////////

//// Draw /////////////////////////////////////////////////////////////////////

// PreRender //////////////////////////////////////////////////////////////////
// Most of this is debugging stuff, drawing the bounds, drawing the normals, etc.
// The functional part is in IGetVisibleSpans, which creates a list of the visible (non-culled)
// span indices within this drawable.
// This is called once per render, and generally well before rendering begins (as part of the 
// cull phase).
hsBool  plDXPipeline::PreRender( plDrawable* drawable, hsTArray<Int16>& visList, plVisMgr* visMgr )
    plDrawableSpans *ds = plDrawableSpans::ConvertNoRef(drawable);
    if( !ds )
        return false;
    if( ( ds->GetType() & fView.fDrawableTypeMask ) == 0 )
        return false;

    IGetVisibleSpans( ds, visList, visMgr );

    if( ( drawable != fBoundsSpans ) && IsDebugFlagSet(plPipeDbg::kFlagShowAllBounds) )
        const hsTArray<plSpan *>    &spans = ds->GetSpanArray();
        int i;
        for( i = 0; i < visList.GetCount(); i++ )
            /// Add a span to our boundsIce to show this
            IAddBoundsSpan( fBoundsSpans, &spans[ visList[i] ]->fWorldBounds );
    else if( ( drawable != fBoundsSpans ) && IsDebugFlagSet(plPipeDbg::kFlagShowNormals) )
        const hsTArray<plSpan *>    &spans = ds->GetSpanArray();
        int i;
        for( i = 0; i < visList.GetCount(); i++ )
            /// Add a span to our boundsIce to show this
            plIcicle    *span = (plIcicle *)spans[ visList[ i ] ];
            if( span->fTypeMask & plSpan::kIcicleSpan )
                IAddNormalsSpan( fBoundsSpans, span, (plDXVertexBufferRef *)ds->GetVertexRef( span->fGroupIdx, span->fVBufferIdx ), 0xff0000ff );
    if( (fSettings.fBoundsDrawLevel >= 0) && ( drawable != fBoundsSpans ) )
        hsTArray<Int16> bndList;
        drawable->GetSpaceTree()->HarvestLevel(fSettings.fBoundsDrawLevel, bndList);
        int i;
        for( i = 0; i < bndList.GetCount(); i++ )
            IAddBoundsSpan( fBoundsSpans, &hsBounds3Ext(drawable->GetSpaceTree()->GetNode(bndList[i]).GetWorldBounds()), 0xff000000 | (0xf << ((fSettings.fBoundsDrawLevel % 6) << 2)) );

    return visList.GetCount() > 0;

struct plSortFace
    UInt16      fIdx[3];
    hsScalar    fDist;

struct plCompSortFace : public std::binary_function<plSortFace, plSortFace, bool>
    bool operator()( const plSortFace& lhs, const plSortFace& rhs) const
        return lhs.fDist > rhs.fDist;

// IAvatarSort /////////////////////////////////////////////////////////////////////////
// We handle avatar sort differently from the rest of the face sort. The reason is that
// within the single avatar index buffer, we want to only sort the faces of spans requesting
// a sort, and sort them in place.
// Contrast that with the normal scene translucency sort. There, we sort all the spans in a drawble,
// then we sort all the faces in that drawable, then for each span in the sorted span list, we extract
// the faces for that span appending onto the index buffer. This gives great efficiency because
// only the visible faces are sorted and they wind up packed into the front of the index buffer, which
// permits more batching. See plDrawableSpans::SortVisibleSpans.
// For the avatar, it's generally the case that all the avatar is visible or not, and there is only
// one material, so neither of those efficiencies is helpful. Moreover, for the avatar the faces we
// want sorted are a tiny subset of the avatar's faces. Moreover, and most importantly, for the avatar, we
// want to preserve the order that spans are drawn, so, for example, the opaque base head will always be
// drawn before the translucent hair fringe, which will always be drawn before the pink clear plastic baseball cap.
hsBool plDXPipeline::IAvatarSort(plDrawableSpans* d, const hsTArray<Int16>& visList)
    int i;
    for( i = 0; i < visList.GetCount(); i++ )
        hsAssert(d->GetSpan(visList[i])->fTypeMask & plSpan::kIcicleSpan, "Unknown type for sorting faces");

        plIcicle* span = (plIcicle*)d->GetSpan(visList[i]);

        if( span->fProps & plSpan::kPartialSort )
            hsAssert(d->GetBufferGroup(span->fGroupIdx)->AreIdxVolatile(), "Badly setup buffer group - set PartialSort too late?");

            const hsPoint3 viewPos = GetViewPositionWorld();

            plGBufferGroup* group = d->GetBufferGroup(span->fGroupIdx);

            plDXVertexBufferRef* vRef = (plDXVertexBufferRef*)group->GetVertexBufferRef(span->fVBufferIdx); 

            const UInt8* vdata = vRef->fData;
            const UInt32 stride = vRef->fVertexSize;

            const int numTris = span->fILength/3;
            static hsTArray<plSortFace> sortScratch;

            plProfile_IncCount(AvatarFaces, numTris);

            plSortFace* begin = sortScratch.AcquireArray();
            plSortFace* end = begin + numTris;

            // Have three very similar sorts here, differing only on where the "position" of
            // each triangle is defined, either as the center of the triangle, the nearest
            // point on the triangle, or the farthest point on the triangle.
            // Having tried all three on the avatar (the only thing this sort is used on),
            // the best results surprisingly came from using the center of the triangle.
            UInt16* indices = group->GetIndexBufferData(span->fIBufferIdx) + span->fIStartIdx;
            int j;
            for( j = 0; j < numTris; j++ )
#if 1 // TRICENTER
                UInt16 idx = *indices++;
                sortScratch[j].fIdx[0] = idx;
                hsPoint3 pos = *(hsPoint3*)(vdata + idx * stride);

                idx = *indices++;
                sortScratch[j].fIdx[1] = idx;
                pos += *(hsPoint3*)(vdata + idx * stride);

                idx = *indices++;
                sortScratch[j].fIdx[2] = idx;
                pos += *(hsPoint3*)(vdata + idx * stride);

                pos *= 0.3333f;

                sortScratch[j].fDist = hsVector3(&pos, &viewPos).MagnitudeSquared();
#elif 0 // NEAREST
                UInt16 idx = *indices++;
                sortScratch[j].fIdx[0] = idx;
                hsPoint3 pos = *(hsPoint3*)(vdata + idx * stride);
                hsScalar dist = hsVector3(&pos, &viewPos).MagnitudeSquared();
                hsScalar minDist = dist;

                idx = *indices++;
                sortScratch[j].fIdx[1] = idx;
                pos = *(hsPoint3*)(vdata + idx * stride);
                dist = hsVector3(&pos, &viewPos).MagnitudeSquared();
                if( dist < minDist )
                    minDist = dist;

                idx = *indices++;
                sortScratch[j].fIdx[2] = idx;
                pos = *(hsPoint3*)(vdata + idx * stride);
                dist = hsVector3(&pos, &viewPos).MagnitudeSquared();
                if( dist < minDist )
                    minDist = dist;

                sortScratch[j].fDist = minDist;
#elif 1 // FURTHEST
                UInt16 idx = *indices++;
                sortScratch[j].fIdx[0] = idx;
                hsPoint3 pos = *(hsPoint3*)(vdata + idx * stride);
                hsScalar dist = hsVector3(&pos, &viewPos).MagnitudeSquared();
                hsScalar maxDist = dist;

                idx = *indices++;
                sortScratch[j].fIdx[1] = idx;
                pos = *(hsPoint3*)(vdata + idx * stride);
                dist = hsVector3(&pos, &viewPos).MagnitudeSquared();
                if( dist > maxDist )
                    maxDist = dist;

                idx = *indices++;
                sortScratch[j].fIdx[2] = idx;
                pos = *(hsPoint3*)(vdata + idx * stride);
                dist = hsVector3(&pos, &viewPos).MagnitudeSquared();
                if( dist > maxDist )
                    maxDist = dist;

                sortScratch[j].fDist = maxDist;
#endif // SORTTYPES

            std::sort(begin, end, plCompSortFace());

            indices = group->GetIndexBufferData(span->fIBufferIdx) + span->fIStartIdx;
            plSortFace* iter = sortScratch.AcquireArray();;
            for( j = 0; j < numTris; j++ )
                *indices++ = iter->fIdx[0];
                *indices++ = iter->fIdx[1];
                *indices++ = iter->fIdx[2];

    return true;

// PrepForRender //////////////////////////////////////////////////////////////////
// Make sure the given drawable and each of the spans to be drawn (as noted in the
// indices in visList) is ready to be rendered.
// This means:
// a) select which lights will be used for each span
// b) do any necessary sorting (if required, spans are already in sorted order in visList,
//      so this only means face sorting).
// c) do any necessary software skinning.
// This is called once per render, and before any rendering actually starts. See plPageTreeMgr.cpp.
// So any preperation needs to last until rendering actually begins. So cached information, like
// which lights a span will use, needs to be stored on the span.
hsBool plDXPipeline::PrepForRender(plDrawable* d, hsTArray<Int16>& visList, plVisMgr* visMgr)

    plDrawableSpans *drawable = plDrawableSpans::ConvertNoRef(d);
    if( !drawable )
        return false;

    // Find our lights
    ICheckLighting(drawable, visList, visMgr);

    // Sort our faces
    if( drawable->GetNativeProperty(plDrawable::kPropSortFaces) )
        drawable->SortVisibleSpans(visList, this);

    // Prep for render. This is gives the drawable a chance to
    // do any last minute updates for its buffers, including
    // generating particle tri lists.
    drawable->PrepForRender( this );

    // Any skinning necessary
    if( !ISoftwareVertexBlend(drawable, visList) )
        return false;
    // Avatar face sorting happens after the software skin.
    if( drawable->GetNativeProperty(plDrawable::kPropPartialSort) )
        IAvatarSort(drawable, visList);


    return true;

// Draw ///////////////////////////////////////////////////////////
// Convenience function for a drawable that needs to get drawn outside of
// the normal scene graph render (i.e. something not managed by the plPageTreeMgr).
// Not nearly as efficient, so only useful as a special case.
void    plDXPipeline::Draw( plDrawable *d )
    plDrawableSpans *ds = plDrawableSpans::ConvertNoRef( d );

    if( ds )
        if( ( ds->GetType() & fView.fDrawableTypeMask ) == 0 )

        static hsTArray<Int16>visList;

        PreRender( ds, visList );
        PrepForRender(ds, visList);
        Render( ds, visList );

// Render ////////////////////////////////////////////////////////////////////////////////
// The normal way to render a subset of a drawable.
// This assumes that PreRender and PrepForRender have already been called.
// Note that PreRender and PrepForRender are called once per drawable per render
// with a visList containing all of the spans which will be rendered, but
// Render itself may be called with multiple visList subsets which union to
// the visList passed into PreRender/PrepForRender. This happens when drawing
// sorted spans, because some spans from drawable B may be in the middle of 
// the spans of drawable A, so the sequence would be:
// PreRender(A, ATotalVisList);
// PreRender(B, BTotalVisList);
// PrepForRender(A, ATotalVisList);
// PrepForRender(B, BTotalVisList);
// Render(A, AFarHalfVisList);
// Render(B, BTotalVisList);
// Render(A, ANearHalfVisList);
// See plPageTreeMgr, which handles all this.
void    plDXPipeline::Render( plDrawable *d, const hsTArray<Int16>& visList )
    // Reset here, since we can push/pop renderTargets after BeginRender() but before
    // this function, which necessitates this being called
    if( fView.fXformResetFlags != 0 )

    plDrawableSpans *ds = plDrawableSpans::ConvertNoRef( d );

    if( ds )
        IRenderSpans( ds, visList );

//// BeginDrawable ////////////////////////////////////////////////////////////
// Obsolete, should be removed
hsBool plDXPipeline::BeginDrawable( plDrawable *d )
    return true;

//// EndDrawable //////////////////////////////////////////////////////////////
// Obsolete, should be removed

hsBool plDXPipeline::EndDrawable( plDrawable *d )
    return true;

// IMakeLightLists ///////////////////////////////////////////////////////////
// Look through all the current lights, and fill out two lists.
// Only active lights (not disabled, not exactly black, and not
// ignored because of visibility regions by plVisMgr) will
// be considered.
// The first list is lights that will affect the avatar and similar
// indeterminately mobile (physical) objects - fLights.fCharLights.
// The second list is lights that aren't restricted by light include
// lists. 
// These two abbreviated lists will be further refined for each object
// and avatar to find the strongest 8 lights which affect that object.
// A light with an include list, or LightGroup Component) has
// been explicitly told which objects it affects, so they don't
// need to be in the search lists.
// These lists are only constructed once per render, but searched
// multiple times
void plDXPipeline::IMakeLightLists(plVisMgr* visMgr)
    if( visMgr )
        const hsBitVector& visSet = visMgr->GetVisSet();
        const hsBitVector& visNot = visMgr->GetVisNot();
        plLightInfo* light;
        for( light = fLights.fActiveList; light != nil; light = light->GetNext() )
            plProfile_IncCount(LightActive, 1);
            if( !light->IsIdle() && !light->InVisNot(visNot) && light->InVisSet(visSet) )
                plProfile_IncCount(LightOn, 1);
                if( light->GetProperty(plLightInfo::kLPHasIncludes) )
                    if( light->GetProperty(plLightInfo::kLPIncludesChars) )
        plLightInfo* light;
        for( light = fLights.fActiveList; light != nil; light = light->GetNext() )
            plProfile_IncCount(LightActive, 1);
            if( !light->IsIdle() )
                plProfile_IncCount(LightOn, 1);
                if( light->GetProperty(plLightInfo::kLPHasIncludes) )
                    if( light->GetProperty(plLightInfo::kLPIncludesChars) )
    plProfile_IncCount(LightVis, fLights.fVisLights.GetCount());
    plProfile_IncCount(LightChar, fLights.fCharLights.GetCount());


// BeginVisMgr /////////////////////////////////////////////////////////
// Marks the beginning of a render with the given visibility manager.
// In particular, we cache which lights the visMgr believes to be
// currently active
void plDXPipeline::BeginVisMgr(plVisMgr* visMgr)

// EndVisMgr ///////////////////////////////////////////////////////////
// Marks the end of a render with the given visibility manager.
void plDXPipeline::EndVisMgr(plVisMgr* visMgr)

// ICheckLighting ///////////////////////////////////////////////////////
// For every span in the list of visible span indices, find the list of
// lights that currently affect the span with an estimate of the strength
// of how much the light affects it. The strongest 8 lights will be used
// to illuminate that span.
// For projective lights, there is no limit on how many are supported, other
// than performance (usually fill rate limited).
// The permaLights and permaProjs are lights explicitly selected for a span
// via the LightGroup component.
// For static objects and static lights, the lighting was done offline and stored
// in the vertex diffuse color.
// So here we're only looking for:
// A) moving objects, which can't be staticly lit, so are affected by all runtime lights.
// B) moving lights, which can't staticly light, so affect all objects
// C) specular objects + specular lights, since specular can't be precomputed.
void plDXPipeline::ICheckLighting(plDrawableSpans* drawable, hsTArray<Int16>& visList, plVisMgr* visMgr)
    if( fView.fRenderState & kRenderNoLights )

    if( !visList.GetCount() )

    plLightInfo     *light;
    int             j;

    // First add in the explicit lights (from LightGroups).
    // Refresh the lights as they are added (actually a lazy eval).
    for( j = 0; j < visList.GetCount(); j++ )
        drawable->GetSpan( visList[ j ] )->ClearLights();

        if (IsDebugFlagSet(plPipeDbg::kFlagNoRuntimeLights))

        // Set the bits for the lights added from the permanent lists (during ClearLights()).
        int k;
        const hsTArray<plLightInfo*>& permaLights = drawable->GetSpan(visList[j])->fPermaLights;
        for( k = 0; k < permaLights.GetCount(); k++ )
            if( permaLights[k]->GetProperty(plLightInfo::kLPShadowLightGroup) && !permaLights[k]->IsIdle() )
                // If it casts a shadow, attach the shadow now.
                ISetShadowFromGroup(drawable, drawable->GetSpan(visList[j]), permaLights[k]);
        const hsTArray<plLightInfo*>& permaProjs = drawable->GetSpan(visList[j])->fPermaProjs;
        for( k = 0; k < permaProjs.GetCount(); k++ )
            if( permaProjs[k]->GetProperty(plLightInfo::kLPShadowLightGroup) && !permaProjs[k]->IsIdle() )
                // If it casts a shadow, attach the shadow now.
                ISetShadowFromGroup(drawable, drawable->GetSpan(visList[j]), permaProjs[k]);

    if (IsDebugFlagSet(plPipeDbg::kFlagNoRuntimeLights))
        plProfile_EndTiming( FindLights );

    // Sort the incoming spans as either
    // A) moving - affected by all lights - moveList
    // B) specular - affected by specular lights - specList
    // C) visible - affected by moving lights - visList
    static hsTArray<Int16> tmpList;
    static hsTArray<Int16> moveList;
    static hsTArray<Int16> specList;
    int k;
    for( k = 0; k < visList.GetCount(); k++ )
        const plSpan* span = drawable->GetSpan(visList[k]);
        if( span->fProps & plSpan::kPropRunTimeLight )
        else if( span->fProps & plSpan::kPropMatHasSpecular )

    // Make a list of lights that can potentially affect spans in this drawable
    // based on the drawables bounds and properties.
    // If the drawable has the PropCharacter property, it is affected by lights
    // in fLights.fCharLights, else only by the smaller list of fLights.fVisLights.
    static hsTArray<plLightInfo*> lightList;
    const hsBool isChar = 0 != drawable->GetNativeProperty(plDrawable::kPropCharacter);
    if( isChar )
        int i;
        for( i = 0; i < fLights.fCharLights.GetCount(); i++ )
            if( fLights.fCharLights[i]->AffectsBound(drawable->GetSpaceTree()->GetWorldBounds()) )
        int i;
        for( i = 0; i < fLights.fVisLights.GetCount(); i++ )
            if( fLights.fVisLights[i]->AffectsBound(drawable->GetSpaceTree()->GetWorldBounds()) )
    // Loop over the lights and for each light, extract a list of the spans that light
    // affects. Append the light to each spans list with a scalar strength of how strongly
    // the light affects it. Since the strength is based on the object's center position, 
    // it's not very accurate, but good enough for selecting which lights to use.
    for( k = 0; k < lightList.GetCount(); k++ )
        light = lightList[k];
        if( light->GetProperty(plLightInfo::kLPMovable) )

            const hsTArray<Int16>& litList = light->GetAffected(drawable->GetSpaceTree(), 
                drawable->GetNativeProperty(plDrawable::kPropCharacter) );
            hsBool proj = nil != light->GetProjection();
            if( fView.fRenderState & kRenderNoProjection )
                proj = false;
            for( j = 0; j < litList.GetCount(); j++ )
                // Use the light IF light is enabled and 
                //      1) light is movable
                //      2) span is movable, or
                //      3) Both the light and the span have specular
                const plSpan* span = drawable->GetSpan(litList[j]);
                hsBool currProj = proj;
                if( span->fProps & plSpan::kPropProjAsVtx )
                    currProj = false;

                if( !(currProj && (span->fProps & plSpan::kPropSkipProjection)) )
                    plDXLightRef    *ref = (plDXLightRef *)light->GetDeviceRef();
                    hsScalar        strength, scale;
                    light->GetStrengthAndScale(span->fWorldBounds, strength, scale);
                    // We can't pitch a light because it's "strength" is zero, because the strength is based
                    // on the center of the span and isn't conservative enough. We can pitch based on the
                    // scale though, since a light scaled down to zero will have no effect no where.
                    if( scale > 0 )
                        span->AddLight(light, strength, scale, currProj);
        else if( light->GetProperty(plLightInfo::kLPHasSpecular) )
            if( !specList.GetCount() )

            const hsTArray<Int16>& litList = light->GetAffected(drawable->GetSpaceTree(), 
                drawable->GetNativeProperty(plDrawable::kPropCharacter) );
            hsBool proj = nil != light->GetProjection();
            if( fView.fRenderState & kRenderNoProjection )
                proj = false;
            for( j = 0; j < litList.GetCount(); j++ )
                // Use the light IF light is enabled and 
                //      1) light is movable
                //      2) span is movable, or
                //      3) Both the light and the span have specular
                const plSpan* span = drawable->GetSpan(litList[j]);
                hsBool currProj = proj;
                if( span->fProps & plSpan::kPropProjAsVtx )
                    currProj = false;

                if( !(currProj && (span->fProps & plSpan::kPropSkipProjection)) )
                    plDXLightRef    *ref = (plDXLightRef *)light->GetDeviceRef();
                    hsScalar        strength, scale;
                    light->GetStrengthAndScale(span->fWorldBounds, strength, scale);
                    // We can't pitch a light because it's "strength" is zero, because the strength is based
                    // on the center of the span and isn't conservative enough. We can pitch based on the
                    // scale though, since a light scaled down to zero will have no effect no where.
                    if( scale > 0 )
                        span->AddLight(light, strength, scale, currProj);
            if( !moveList.GetCount() )

            const hsTArray<Int16>& litList = light->GetAffected(drawable->GetSpaceTree(), 
                drawable->GetNativeProperty(plDrawable::kPropCharacter) );
            hsBool proj = nil != light->GetProjection();
            if( fView.fRenderState & kRenderNoProjection )
                proj = false;
            for( j = 0; j < litList.GetCount(); j++ )
                // Use the light IF light is enabled and 
                //      1) light is movable
                //      2) span is movable, or
                //      3) Both the light and the span have specular
                const plSpan* span = drawable->GetSpan(litList[j]);
                hsBool currProj = proj;
                if( span->fProps & plSpan::kPropProjAsVtx )
                    currProj = false;

                if( !(currProj && (span->fProps & plSpan::kPropSkipProjection)) )
                    plDXLightRef    *ref = (plDXLightRef *)light->GetDeviceRef();
                    hsScalar        strength, scale;
                    light->GetStrengthAndScale(span->fWorldBounds, strength, scale);
                    // We can't pitch a light because it's "strength" is zero, because the strength is based
                    // on the center of the span and isn't conservative enough. We can pitch based on the
                    // scale though, since a light scaled down to zero will have no effect no where.
                    if( scale > 0 )
                        span->AddLight(light, strength, scale, currProj);

    IAttachShadowsToReceivers(drawable, visList);


// HarvestVisible ////////////////////////////////////////////////////////////////////////
// Contruct a list of the indices of leaf nodes in the given spacetree which are currently
// visible according to the current cull tree. The cull tree factors in camera frustum and
// occluder polys, but _not_ the current visibility regions, plVisMgr.
// This is the normal path for visibility culling at a gross level (e.g. which SceneNodes
// to bother with, which drawables within the SceneNode). For finer objects, like the spans
// themselves, the culling is done via IGetVisibleSpans, which also takes the plVisMgr into
// account.
hsBool plDXPipeline::HarvestVisible(plSpaceTree* space, hsTArray<Int16>& visList)
    if( !space )
        return false;



    if( fView.fCullTreeDirty )

    fView.fCullTree.Harvest(space, visList);

    return visList.GetCount() != 0;

//// IGetVisibleSpans /////////////////////////////////////////////////////
//  Given a drawable, returns a list of visible span indices. Disabled spans will not
//  show up in the list, behaving as if they were culled. 
//  See plCullTree (in plPipeline) and plSpaceTree (in plDrawable) and plVisMgr (in plScene).
void plDXPipeline::IGetVisibleSpans( plDrawableSpans* drawable, hsTArray<Int16>& visList, plVisMgr* visMgr )
    static hsTArray<Int16> tmpVis;



    if( fView.fCullTreeDirty )

    const hsScalar viewDist = GetViewDirWorld().InnerProduct(GetViewPositionWorld());

    const hsTArray<plSpan *>    &spans = drawable->GetSpanArray();

    if( visMgr )
        fView.fCullTree.Harvest(drawable->GetSpaceTree(), tmpVis);
        fView.fCullTree.Harvest(drawable->GetSpaceTree(), tmpVis);

    // This is a big waste of time, As a desparate "optimization" pass, the artists
    // insist on going through and marking objects to fade or pop out of rendering
    // past a certain distance. This breaks the batching and requires more CPU to
    // check the objects by distance. Since there is no pattern to the distance at
    // which objects will be told not to draw, there's no way to make this hierarchical,
    // which is what it would take to make it a performance win. So they succeed in
    // reducing the poly count, but generally the frame rate goes _down_ as well.
    // Unfortunately, this technique actually does work in a few key areas, so
    // I haven't been able to purge it.
    if (IsDebugFlagSet(plPipeDbg::kFlagSkipVisDist))
        int i;
        for( i = 0; i < tmpVis.GetCount(); i++ )
            if( spans[tmpVis[i]]->fSubType & GetSubDrawableTypeMask() )
        int i;
        for( i = 0; i < tmpVis.GetCount(); i++ )
            if( spans[tmpVis[i]]->fSubType & GetSubDrawableTypeMask() )
                // We'll check here for spans we can discard because they've completely distance faded out.
                // Note this is based on view direction distance (because the fade is), rather than the
                // preferrable distance to camera we sort by.
                hsScalar minDist, maxDist;
                if( drawable->GetSubVisDists(tmpVis[i], minDist, maxDist) )
                    const hsBounds3Ext& bnd = drawable->GetSpaceTree()->GetNode(tmpVis[i]).fWorldBounds;
                    hsPoint2 depth;
                    bnd.TestPlane(GetViewDirWorld(), depth);
                    if( (0 < minDist + viewDist - depth.fY)
                            ||(0 > maxDist + viewDist - depth.fX) )


// ISetupTransforms //////////////////////////////////////////////////////////////////////////////////
// Set the D3D world transform according to the input span.
// Engine currently supports HW vertex blending with 2 matrices,
// else a single Local To World.
// If software skinning is being used, the WORLD matrix will be identity,
// because the full local to world is folded into the skinned vertices.
void plDXPipeline::ISetupTransforms(plDrawableSpans* drawable, const plSpan& span, hsMatrix44& lastL2W)
    if( span.fNumMatrices )
        if( span.fNumMatrices <= 2 )
            ISetLocalToWorld( span.fLocalToWorld, span.fWorldToLocal );
            lastL2W = span.fLocalToWorld;
            ISetLocalToWorld( lastL2W, lastL2W );
            fView.fLocalToWorldLeftHanded = span.fLocalToWorld.GetParity();
    if( lastL2W != span.fLocalToWorld )
        ISetLocalToWorld( span.fLocalToWorld, span.fWorldToLocal );
        lastL2W = span.fLocalToWorld;
        fView.fLocalToWorldLeftHanded = lastL2W.GetParity();

    if( span.fNumMatrices == 2 )
        D3DXMATRIX  mat;    
        IMatrix44ToD3DMatrix(mat, drawable->GetPaletteMatrix(span.fBaseMatrix+1));
        fD3DDevice->SetTransform(D3DTS_WORLDMATRIX(1), &mat);
        fD3DDevice->SetRenderState(D3DRS_VERTEXBLEND, D3DVBF_1WEIGHTS);
        fD3DDevice->SetRenderState(D3DRS_VERTEXBLEND, D3DVBF_DISABLE);

// IRefreshDynVertices ////////////////////////////////////////////////////////////////////////
// All dynamic vertices share a single dynamic vertex buffer. They are cycled through
// that buffer using the NOOVERWRITE/DISCARD paradigm. Since the vertices sharing that
// buffer may be of different formats, care is taken to always start a group of vertices
// a the next available position in the buffer aligned with that vertex size.
// Only software skinned objects, dynamic decals, and particle systems currently use the 
// dynamic vertex buffer.
hsBool plDXPipeline::IRefreshDynVertices(plGBufferGroup* group, plDXVertexBufferRef* vRef)
    // First, pad out our next slot to be on a vertex boundary (for this vertex size).
    fNextDynVtx = ((fNextDynVtx + vRef->fVertexSize-1) / vRef->fVertexSize) * vRef->fVertexSize;

    Int32 size = (group->GetVertBufferEnd(vRef->fIndex) - group->GetVertBufferStart(vRef->fIndex)) * vRef->fVertexSize;
    if( !size )
        return false; // No error, just nothing to do.

    hsAssert(size > 0, "Bad start and end counts in a group");

    // If we DON'T have room in our dynamic buffer
    if( fNextDynVtx + size > fDynVtxSize )
        plProfile_IncCount(DynVBuffs, 1);

        // Advance the timestamp, because we're about to reuse the buffer

        // Reset next available spot index to zero
        fNextDynVtx = 0;

    // Point our ref at the next available spot
    Int32 newStart = fNextDynVtx / vRef->fVertexSize;

    vRef->fOffset = newStart - group->GetVertBufferStart(vRef->fIndex);

    // Lock the buffer
    // If index is zero, lock with discard, else with overwrite.
    UInt8*  destPtr = nil;
    if( FAILED( fDynVtxBuff->Lock( fNextDynVtx, 
                                (void **)&destPtr, 
                                lockFlag) ) )
        hsAssert( false, "Cannot lock vertex buffer for writing" );
        return true;

    UInt8* vData;
    if( vRef->fData )
        vData = vRef->fData;
        vData = group->GetVertBufferData(vRef->fIndex) + group->GetVertBufferStart(vRef->fIndex) * vRef->fVertexSize;
    memcpy(destPtr, vData, size);

    // Unlock the buffer

    // Advance next available spot index
    fNextDynVtx += size;

    // Set the timestamp
    vRef->fRefTime = fVtxRefTime;

    if( !vRef->fD3DBuffer )
        vRef->fD3DBuffer = fDynVtxBuff;
    hsAssert(vRef->fD3DBuffer == fDynVtxBuff, "Holding on to an old dynamic buffer?");

//  vRef->SetRebuiltSinceUsed(true);

    return false;

// ICheckAuxBuffers ///////////////////////////////////////////////////////////////////////
// The AuxBuffers are associated with drawables for things to be drawn right after that
// drawable's contents. In particular, see the plDynaDecal, which includes things like
// water ripples, bullet hits, and footprints.
// This function just makes sure they are ready to be rendered, called right before
// the rendering.
hsBool plDXPipeline::ICheckAuxBuffers(const plAuxSpan* span)
    plGBufferGroup* group = span->fGroup;

    plDXVertexBufferRef* vRef = (plDXVertexBufferRef*)group->GetVertexBufferRef(span->fVBufferIdx); 
    if( !vRef )
        return true;

    plDXIndexBufferRef* iRef = (plDXIndexBufferRef*)group->GetIndexBufferRef(span->fIBufferIdx);
    if( !iRef )
        return true;

    // If our vertex buffer ref is volatile and the timestamp is off
    // then it needs to be refilled
    if( vRef->Expired(fVtxRefTime) )
        IRefreshDynVertices(group, vRef);
    if( vRef->fOffset != iRef->fOffset )
        iRef->fOffset = vRef->fOffset;


    return false; // No error

// ICheckDynBuffers ////////////////////////////////////////////////////////////////////////////////////////
// Make sure the buffers underlying this span are ready to be rendered. Meaning that the underlying
// D3D buffers are in sync with the plasma buffers.
hsBool plDXPipeline::ICheckDynBuffers(plDrawableSpans* drawable, plGBufferGroup* group, const plSpan* spanBase)
    if( !(spanBase->fTypeMask & plSpan::kVertexSpan) )
        return false;
    // If we arent' an trilist, we're toast.
    if( !(spanBase->fTypeMask & plSpan::kIcicleSpan) )
        return false;

    plIcicle* span = (plIcicle*)spanBase;

    plDXVertexBufferRef* vRef = (plDXVertexBufferRef*)group->GetVertexBufferRef(span->fVBufferIdx); 
    if( !vRef )
        return true;

    plDXIndexBufferRef* iRef = (plDXIndexBufferRef*)group->GetIndexBufferRef(span->fIBufferIdx);
    if( !iRef )
        return true;

    // If our vertex buffer ref is volatile and the timestamp is off
    // then it needs to be refilled
    if( vRef->Expired(fVtxRefTime) )
        IRefreshDynVertices(group, vRef);
    if( vRef->fOffset != iRef->fOffset )
        iRef->fOffset = vRef->fOffset;

    if( iRef->IsDirty()  )
        IFillIndexBufferRef(iRef, group, span->fIBufferIdx);

    return false; // No error

//// IRenderSpans /////////////////////////////////////////////////////////////
// Renders an array of spans obtained from a plDrawableSpans object
// The incoming visList gives the indices of the spans which are visible and should
// be drawn now, and gives them in sorted order.
void    plDXPipeline::IRenderSpans( plDrawableSpans *drawable, const hsTArray<Int16>& visList )

    hsMatrix44      lastL2W;
    UInt32          i, j;
    bool            drewPatch = false;
    hsGMaterial     *material;

    const hsTArray<plSpan *>&       spans = drawable->GetSpanArray();

    plProfile_IncCount(EmptyList, !visList.GetCount());

    /// Set this (*before* we do our TestVisibleWorld stuff...)
    ISetLocalToWorld( lastL2W, lastL2W );   // This is necessary; otherwise, we have to test for
                                            // the first transform set, since this'll be identity
                                            // but the actual device transform won't be (unless
                                            // we do this)

    /// Loop through our spans, combining them when possible
    for( i = 0; i < visList.GetCount(); )
        material = GetOverrideMaterial() ? GetOverrideMaterial() : drawable->GetMaterial( spans[ visList[ i ] ]->fMaterialIdx );

        /// It's an icicle--do our icicle merge loop
        plIcicle tempIce(*( (plIcicle *)spans[ visList[ i ] ] ));

        // Start at i + 1, look for as many spans as we can add to tempIce
        for( j = i + 1; j < visList.GetCount(); j++ )
            if( GetOverrideMaterial() )
                tempIce.fMaterialIdx = spans[visList[j]]->fMaterialIdx;

            if( !spans[ visList[ j ] ]->CanMergeInto( &tempIce ) )

            spans[ visList[ j ] ]->MergeInto( &tempIce );

        if( material != nil )
            // What do we change?

            ISetupTransforms(drawable, tempIce, lastL2W);

            // Turn on this spans lights and turn off the rest.
            IEnableLights( &tempIce );

            // Check that the underlying buffers are ready to go.
            ICheckDynBuffers(drawable, drawable->GetBufferGroup(tempIce.fGroupIdx), &tempIce);

            CheckVertexBufferRef(drawable->GetBufferGroup(tempIce.fGroupIdx), tempIce.fVBufferIdx);
            CheckIndexBufferRef(drawable->GetBufferGroup(tempIce.fGroupIdx), tempIce.fIBufferIdx);

            // Draw this span now
            IRenderBufferSpan( tempIce,
                                drawable->GetVertexRef( tempIce.fGroupIdx, tempIce.fVBufferIdx ),
                                drawable->GetIndexRef( tempIce.fGroupIdx, tempIce.fIBufferIdx ),
                                tempIce.fVStartIdx, tempIce.fVLength,   // These are used as our accumulated range
                                tempIce.fIPackedIdx, tempIce.fILength );

        // Restart our search...
        i = j;

    /// All done!

//// IAddBoundsSpan ///////////////////////////////////////////////////////////
//  Creates a new span for the given drawable to represent the specified
//  world bounds.
// Debugging only.

void    plDXPipeline::IAddBoundsSpan( plDrawableSpans *ice, const hsBounds3Ext *bounds, UInt32 bndColor )
    static hsTArray<plGeometrySpan *>   spanArray;
    static hsMatrix44       identMatrix;
    static hsPoint3     c[ 8 ], n[ 8 ];
    static int          nPts[ 8 ][ 3 ] = { { -1, -1, -1 }, { 1, -1, -1 }, { -1, 1, -1 }, { 1, 1, -1 },
                                        { -1, -1, 1 }, { 1, -1, 1 }, { -1, 1, 1 }, { 1, 1, 1 } };
    int             i;
    plGeometrySpan  *newSpan;

    if( spanArray.GetCount() == 0 )
        spanArray.Append( TRACKED_NEW plGeometrySpan() );

        // Make normals
        for( i = 0; i < 8; i++ )
            n[ i ].fX = (float)nPts[ i ][ 0 ];
            n[ i ].fY = (float)nPts[ i ][ 1 ];
            n[ i ].fZ = (float)nPts[ i ][ 2 ];
        spanArray[ 0 ] = TRACKED_NEW plGeometrySpan();

    newSpan = spanArray[ 0 ];

    newSpan->BeginCreate( fBoundsMat, identMatrix, 0 );

    // Make corners
    c[1] = c[2] = c[4] = *bounds->GetCorner(&c[0]);
    hsVector3 axes[3];
    bounds->GetAxes(axes+0, axes+1, axes+2);
    c[1] += axes[0];
    c[2] += axes[1];
    c[4] += axes[2];

    c[3] = c[1];
    c[3] += axes[1];

    c[5] = c[1];
    c[5] += axes[2];

    c[6] = c[2];
    c[6] += axes[2];

    c[7] = c[6];
    c[7] += axes[0];

    for( i = 0; i < 8; i++ )
        newSpan->AddVertex( &c[ i ], &n[ i ], bndColor );

    newSpan->AddTriIndices( 0, 1, 2 );
    newSpan->AddTriIndices( 2, 1, 3 );

    newSpan->AddTriIndices( 6, 3, 7 );
    newSpan->AddTriIndices( 7, 1, 5 );
    newSpan->AddTriIndices( 5, 0, 4 );
    newSpan->AddTriIndices( 4, 2, 6 );


    fBSpansToDelete.Append( ice->AppendDISpans( spanArray ) );


//// IAddNormalsSpan //////////////////////////////////////////////////////////
//  Creates a new span for the given drawable to represent the specified
//  world bounds.
// Debugging only.

void    plDXPipeline::IAddNormalsSpan( plDrawableSpans *ice, plIcicle *span, plDXVertexBufferRef *vRef, UInt32 bndColor )
    static hsTArray<plGeometrySpan *>   spanArray;
    static hsMatrix44       identMatrix;
    static hsPoint3     point, off, blank;
    hsVector3   b2;
    UInt16      v1, v2, v3;
    int             i;
    plGeometrySpan  *newSpan;

    if( spanArray.GetCount() == 0 )
        spanArray.Append( TRACKED_NEW plGeometrySpan() );
        spanArray[ 0 ] = TRACKED_NEW plGeometrySpan();

    newSpan = spanArray[ 0 ];

    newSpan->BeginCreate( fBoundsMat, span->fLocalToWorld, 0 );

    for( i = 0; i < span->fVLength; i++ )
        point = vRef->fOwner->Position( span->fVBufferIdx, span->fCellIdx, span->fCellOffset + i );
        b2 = vRef->fOwner->Normal( span->fVBufferIdx, span->fCellIdx, span->fCellOffset + i );
        off.Set( point.fX + b2.fX, point.fY + b2.fY, point.fZ + b2.fZ );
        v1 = newSpan->AddVertex( &point, &blank, bndColor );
        v2 = newSpan->AddVertex( &off, &blank, bndColor );
        v3 = newSpan->AddVertex( &point, &blank, bndColor );
        newSpan->AddTriIndices( v1, v2, v3 );


    fBSpansToDelete.Append( ice->AppendDISpans( spanArray ) );


//// BeginRender //////////////////////////////////////////////////////////////
// Specifies the beginning of the render frame.
// If this succeeds (returns false) it must be matched with a call to EndRender.
// Normally, the main client loop will wrap the entire scene render (including
// any offscreen rendering) in a BeginRender/EndRender pair. There is no need
// for further calls for sub-renders.
hsBool plDXPipeline::BeginRender()
    // Do we have some restoration work ahead of us?
    // Checks for Device Lost condition
    if( IResetDevice() )
        return true;

    // We were lost, but now we're found! Spread the good word brother!
    if( fDevWasLost )
        /// Broadcast a message letting everyone know that we were recreated and that
        /// all device-specific stuff needs to be recreated
//      plDeviceRecreateMsg* clean = TRACKED_NEW plDeviceRecreateMsg();
//      plgDispatch::MsgSend(clean);

        fDevWasLost = false;

    if (IsDebugFlagSet(plPipeDbg::kFlagReload))
        fEvictTime = fTextUseTime;
        fManagedSeen = 0;
        SetDebugFlag(plPipeDbg::kFlagReload, false);

    // offset transform

    // If this is the primary BeginRender, make sure we're really ready.
    if( !fInSceneDepth++ )
        // Workaround for NVidia memory manager bug. Search for "OSVERSIONINFO" to
        // find notes on the bug. This is where we purge managed memory periodically.
        plProfile_Set(ManSeen, fManagedSeen);
        if( fManagedCutoff )
            plConst(UInt32) kMinEvictTime(1800); // ~2 minutes @ 15FPS
            if( (fManagedSeen > fManagedCutoff) && (fTexUsed + fVtxUsed < fManagedCutoff) && (fTextUseTime - fEvictTime > kMinEvictTime) )
                fManagedSeen = 0;
                fEvictTime = fTextUseTime;
                plProfile_IncCount(ManEvict, 1);

        // Superfluous setting of Z state.
        fD3DDevice->SetRenderState( D3DRS_ZENABLE, 
                                    ( fView.IsPerspective() && ( fSettings.fD3DCaps & kCapsWBuffer ) ) 
                                    ? D3DZB_USEW : D3DZB_TRUE );

        /// If we have a renderTarget active, use its viewport

        // Tell D3D we're ready to start rendering.
        if( FAILED(fD3DDevice->BeginScene()) )
            fDeviceLost = true;

        // Reset all our buffer/image usage counters
        fNextDynVtx = 0;

        fTexUsed = 0;
        fVtxUsed = 0;

        // Render any shadow maps that have been submitted for this frame.

    // Would probably rather this be an input.
    fTime = hsTimer::GetSysSeconds();

    return false;

//// ISetViewport /////////////////////////////////////////////////////////////
// Translate our viewport into a D3D viewport
void    plDXPipeline::ISetViewport()
    D3DVIEWPORT9 vp = { GetViewTransform().GetViewPortLeft(),
                        0.f, 1.f };

    WEAK_ERROR_CHECK( fD3DDevice->SetViewport( &vp ) );

//// RenderScreenElements /////////////////////////////////////////////////////
//  Renders all the screen elements, such as debug text and plates. Also puts
//  up all the info about vertex buffers and such. Should be called right 
//  before EndRender(), but only on the main surface (not on renderTargets,
//  for example).

void    plDXPipeline::RenderScreenElements()
    bool        reset = false;

    if( fBoundsSpans && fBSpansToDelete.GetCount() > 0 )
        Draw( fBoundsSpans );
        int     i;
        for( i = 0; i < fBSpansToDelete.GetCount(); i++ )
            fBoundsSpans->RemoveDISpans( fBSpansToDelete[ i ] );

    if( fCullProxy )
        Draw( fCullProxy );

    static plPlate* hackPlate = nil;
    if( doHackPlate < hackOffscreens.GetCount() )
        if( !hackPlate )
            fPlateMgr->CreatePlate(&hackPlate, 0.5f, 0.5f, 1.0f, 1.0f);
            hackPlate->CreateBlankMaterial(32, 32, false);
    if( hackPlate )
        if( doHackPlate < hackOffscreens.GetCount() )
            hsGMaterial* hackMat = hackPlate->GetMaterial();
            plLayer* lay = plLayer::ConvertNoRef(hackMat->GetLayer(0));
            if( lay )
            hackPlate->SetVisible( true );
            hackPlate->SetVisible( false );

    hsGMatState tHack = PushMaterialOverride(hsGMatState::kMisc, hsGMatState::kMiscWireFrame, false);
    hsGMatState ambHack = PushMaterialOverride(hsGMatState::kShade, hsGMatState::kShadeWhite, true);
    /// Plates
    if( fPlateMgr )
        fPlateMgr->DrawToDevice( this );
        reset = true;

    PopMaterialOverride(ambHack, true);
    PopMaterialOverride(tHack, false);

    /// Debug text
    if( fDebugTextMgr && plDebugText::Instance().IsEnabled() )
        fDebugTextMgr->DrawToDevice( this );

        reset = true;

    if( reset )
        // Reset these since the drawing might have trashed them
        hsRefCnt_SafeUnRef( fSettings.fCurrVertexBuffRef );
        hsRefCnt_SafeUnRef( fSettings.fCurrIndexBuffRef );
        fSettings.fCurrVertexBuffRef = nil;
        fSettings.fCurrIndexBuffRef = nil;

        fView.fXformResetFlags = fView.kResetAll;       // Text destroys view transforms
        hsRefCnt_SafeUnRef( fLayerRef[ 0 ] );
        fLayerRef[ 0 ] = nil;       // Text destroys stage 0 texture

//// EndRender ////////////////////////////////////////////////////////////////
// Tell D3D we're through rendering for this frame, and flip the back buffer to front.
// Also includes a bit of making sure we're not holding onto anything that might
// get deleted before the next render.
hsBool plDXPipeline::EndRender()


    hsBool retVal = false;
    /// Actually end the scene
    if( !--fInSceneDepth )
        WEAK_ERROR_CHECK( fD3DDevice->EndScene() );
        retVal = IFlipSurface();


    // Do this last, after we've drawn everything
    // Just letting go of things we're done with for the frame.
    fForceMatHandle = true;
    hsRefCnt_SafeUnRef( fCurrMaterial );
    fCurrMaterial = nil;

    int i;
    for( i = 0; i < 8; i++ )
        if( fLayerRef[i] )
            fLayerRef[i] = nil;

    return retVal;

// SetGamma ////////////////////////////////////////////////////////////
// Create and set a gamma table based on the input exponent values for
// R, G, and B. Can also set explicit table using the other SetGamma().
hsBool plDXPipeline::SetGamma(hsScalar eR, hsScalar eG, hsScalar eB)
    if( fSettings.fNoGammaCorrect )
        return false;

    D3DGAMMARAMP ramp;

    ramp.red[0] = ramp.green[0] = ramp.blue[0] = 0L;

    plConst(hsScalar) kMinE(0.1f);
    if( eR > kMinE )
        eR = 1.f / eR;
        eR = 1.f / kMinE;
    if( eG > kMinE )
        eG = 1.f / eG;
        eG = 1.f / kMinE;
    if( eB > kMinE )
        eB = 1.f / eB;
        eB = 1.f / kMinE;

    int i;
    for( i = 1; i < 256; i++ )
        hsScalar orig = hsScalar(i) / 255.f;

        hsScalar gamm;
        gamm = pow(orig, eR);
        gamm *= hsScalar(UInt16(-1));
        ramp.red[i] = UInt16(gamm);

        gamm = pow(orig, eG);
        gamm *= hsScalar(UInt16(-1));
        ramp.green[i] = UInt16(gamm);

        gamm = pow(orig, eB);
        gamm *= hsScalar(UInt16(-1));
        ramp.blue[i] = UInt16(gamm);

    fD3DDevice->SetGammaRamp(0, D3DSGR_NO_CALIBRATION, &ramp);

    return true;

// SetGamma
// Copy the input gamma tables and pass them to the hardware.
hsBool plDXPipeline::SetGamma(const UInt16* const tabR, const UInt16* const tabG, const UInt16* const tabB)
    if( fSettings.fNoGammaCorrect )
        return false;

    D3DGAMMARAMP ramp;
    memcpy(ramp.red, tabR, 256 * sizeof(WORD));
    memcpy(ramp.green, tabG, 256 * sizeof(WORD));
    memcpy(ramp.blue, tabB, 256 * sizeof(WORD));

    fD3DDevice->SetGammaRamp(0, D3DSGR_NO_CALIBRATION, &ramp);

    return true;

//// IFlipSurface /////////////////////////////////////////////////////////////
// Initiate moving the back buffer contents to the front buffer. Will detect
// and set the device lost condition when it occurs.
hsBool  plDXPipeline::IFlipSurface()
    /// Works now for both fullscreen and windowed modes
    HRESULT hr = D3D_OK;
    if( fSettings.fCurrRenderTarget == nil )
        hr = fD3DDevice->Present( nil, nil, fSettings.fHWnd, nil );

    if( FAILED(hr) )
        fDeviceLost = true;
    return fDeviceLost;

// ExtractMipMap
// This code works and is fairly fast for creating a new mipmap
// as a copy of the data in an offscreen render target. It's not
// currently used, because of driver bugs found in rendering to
// offscreen render targets.
plMipmap* plDXPipeline::ExtractMipMap(plRenderTarget* targ)
    if( plCubicRenderTarget::ConvertNoRef(targ) )
        return nil;

    if( targ->GetPixelSize() != 32 )
        hsAssert(false, "Only RGBA8888 currently implemented");
        return nil;

    plDXRenderTargetRef* ref = (plDXRenderTargetRef*)targ->GetDeviceRef();
    if( !ref )
        return nil;

    IDirect3DSurface9* surf = ref->GetColorSurface();
    if( !surf )
        return nil;

    D3DLOCKED_RECT rect;
    if( FAILED( surf->LockRect(&rect, nil, D3DLOCK_READONLY) ) )
        return nil;

    const int width = targ->GetWidth();
    const int height = targ->GetHeight();

    plMipmap* mipMap = TRACKED_NEW plMipmap(width, height, plMipmap::kARGB32Config, 1);

    UInt8* ptr = (UInt8*)(rect.pBits);
    const int pitch = rect.Pitch;

    const UInt32 blackOpaque = 0xff000000;
    int y;
    for( y = 0; y < height; y++ )
        UInt32* destPtr = mipMap->GetAddr32(0, y);
        UInt32* srcPtr = (UInt32*)ptr;
        int x;
        for( x = 0; x < width; x++ )
            destPtr[x] = srcPtr[x] | blackOpaque;
        ptr += pitch;


    return mipMap;

//// CaptureScreen ////////////////////////////////////////////////////////////
// Copy the current contents of the front buffer to the destination mipmap, with optional
// rescaling. Note that the mipmap function which does this rescaling is of low quality
// (pyramid filter even though it claims a box filter) and low performance (slow).
// If it mattered, it would take about an hour to have a higher performance, higher quality,
// more robust rescale function.
// This function is fairly straightforward, the complexity only comes from making sure
// all pixels in dest get written to, even though the client window may be partially 
// offscreen. If the client window is partially offscreen, there will be no values
// for the "offscreen pixels" to copy to dest, so opaque black is used.
hsBool  plDXPipeline::CaptureScreen( plMipmap *dest, bool flipVertical, UInt16 desiredWidth, UInt16 desiredHeight )
    UInt32              y, *destPtr, *srcPtr, width, height, bigWidth, bigHeight;
    IDirect3DSurface9   *surface;
    D3DLOCKED_RECT      rect;
    RECT                rToLock;

    width = GetViewTransform().GetViewPortWidth();
    height = GetViewTransform().GetViewPortHeight();

    int left = 0;
    int right = width;
    int top = 0;
    int bottom = height;

    if( fSettings.fFullscreen )
        if (FAILED(fD3DDevice->CreateOffscreenPlainSurface(width, height, D3DFMT_A8R8G8B8, D3DPOOL_SCRATCH, &surface, NULL)))
            return false;

        rToLock.left = GetViewTransform().GetViewPortLeft();
        rToLock.top = GetViewTransform().GetViewPortTop();
        rToLock.right = GetViewTransform().GetViewPortRight();
        rToLock.bottom = GetViewTransform().GetViewPortBottom();
        bigWidth = GetSystemMetrics( SM_CXSCREEN );
        bigHeight = GetSystemMetrics( SM_CYSCREEN );

        if (FAILED(fD3DDevice->CreateOffscreenPlainSurface(bigWidth, bigHeight, D3DFMT_A8R8G8B8, D3DPOOL_SCRATCH, &surface, NULL)))
            return false;

        GetClientRect( fSettings.fHWnd, &rToLock );
        MapWindowPoints( fSettings.fHWnd, nil, (POINT *)&rToLock, 2 );

        if( rToLock.right > bigWidth )
            right -= (rToLock.right - bigWidth);
            rToLock.right = bigWidth;
        if( rToLock.bottom > bigHeight )
            bottom -= (rToLock.bottom - bigHeight);
            rToLock.bottom = bigHeight;
        if( rToLock.top < 0 )
            top -= rToLock.top;
            rToLock.top = 0;
        if( rToLock.left < 0 )
            left -= rToLock.left;
            rToLock.left = 0;

    UINT swapChain = 0;
    if( FAILED( fD3DDevice->GetFrontBufferData(swapChain, surface) ) )
        ReleaseObject( surface );
        return false;

    if( FAILED( surface->LockRect( &rect, &rToLock, D3DLOCK_READONLY ) ) )
        ReleaseObject( surface );
        return false;

    if( dest->GetWidth() != width || dest->GetHeight() != height ||
        dest->GetPixelSize() != 32 )
        dest->Create( width, height, plMipmap::kARGB32Config, 1 );

    const UInt32 blackOpaque = 0xff000000;
    /// Copy over
    for( y = 0; y < top; y++ )
        if (flipVertical)
            destPtr = dest->GetAddr32( 0, height - 1 - y );
            destPtr = dest->GetAddr32( 0, y );

        int x;
        for( x = 0; x < width; x++ )
            *destPtr++ = blackOpaque;
    for( y = top; y < bottom; y++ )
        srcPtr = (UInt32 *)( (UInt8 *)rect.pBits + rect.Pitch * y );
        if (flipVertical)
            destPtr = dest->GetAddr32( 0, height - 1 - y );
            destPtr = dest->GetAddr32( 0, y );

        int x;
        for( x = 0; x < left; x++ )
            *destPtr++ = blackOpaque;

        memcpy( destPtr, srcPtr, (right - left) * sizeof( UInt32 ) );
        destPtr += (right - left);

        for( x = right; x < width; x++ )
            *destPtr++ = blackOpaque;
    for( y = bottom; y < height; y++ )
        if (flipVertical)
            destPtr = dest->GetAddr32( 0, height - 1 - y );
            destPtr = dest->GetAddr32( 0, y );

        int x;
        for( x = 0; x < width; x++ )
            *destPtr++ = blackOpaque;

    ReleaseObject( surface );

    if( desiredWidth != 0 && desiredHeight != nil )
        // Rescale to the right size
        dest->ResizeNicely( desiredWidth, desiredHeight, plMipmap::kDefaultFilter );
    return true;

//// Render Targets ///////////////////////////////////////////////////////////

//// MakeRenderTargetRef //////////////////////////////////////////////////////
// Create the a Plasma render target ref, filling in the underlying D3D resources
// (e.g. color/depth buffers).
// Note that for ATI boards, we create a single depth surface for them to share.
// That can actually be 2 depth surfaces, if some color surfaces are 16 bit and
// others are 24/32 bit, since the ATI's want to match color depth with depth depth.
hsGDeviceRef    *plDXPipeline::MakeRenderTargetRef( plRenderTarget *owner )
    plDXRenderTargetRef *ref = nil;
    IDirect3DSurface9       *surface = nil, *depthSurface = nil;
    IDirect3DTexture9       *texture = nil;
    IDirect3DCubeTexture9   *cTexture = nil;
    D3DFORMAT               surfFormat = D3DFMT_UNKNOWN, depthFormat = D3DFMT_UNKNOWN;
    D3DRESOURCETYPE         resType;
    int                     i;
    plCubicRenderTarget     *cubicRT;
    UInt16                  width, height;

    hsAssert(!fManagedAlloced, "Allocating non-managed resource with managed resources alloc'd");
    /// Check--is this renderTarget really a child of a cubicRenderTarget?
    if( owner->GetParent() != nil )
        /// This'll create the deviceRefs for all of its children as well
        MakeRenderTargetRef( owner->GetParent() );
        return owner->GetDeviceRef();

    // If we already have a rendertargetref, we just need it filled out with D3D resources.
    if( owner->GetDeviceRef() != nil )
        ref = (plDXRenderTargetRef *)owner->GetDeviceRef();

    // Look for supported format. Note that the surfFormat and depthFormat are
    // passed in by ref, so they may be different after this function call (if
    // an exact match isn't supported, but something similar is).
    if( !IPrepRenderTargetInfo( owner, surfFormat, depthFormat, resType ) )
        hsAssert( false, "Error getting renderTarget info" );
        return nil;

    /// Create the render target now
    // Start with the depth surface.
    // Note that we only ever give a cubic rendertarget a single shared depth buffer, 
    // since we only render one face at a time. If we were rendering part of face X, then part
    // of face Y, then more of face X, then they would all need their own depth buffers.
    if( owner->GetZDepth() && (owner->GetFlags() & ( plRenderTarget::kIsTexture | plRenderTarget::kIsOffscreen )) )
        // 9600 THRASH
        if( !fSettings.fShareDepth )
            /// Create the depthbuffer
            if( FAILED( fD3DDevice->CreateDepthStencilSurface(
                                owner->GetWidth(), owner->GetHeight(), depthFormat, 
                                D3DMULTISAMPLE_NONE, 0, FALSE,
                                &depthSurface, NULL ) ) )
                return nil;

            // See plDXRenderTargetRef::Release()
            const int iZ = owner->GetZDepth() / 24;
            if( !fSharedDepthSurface[iZ] )
                plConst(DWORD) kSharedWidth(800);
                plConst(DWORD) kSharedHeight(600);
                if( FAILED( fD3DDevice->CreateDepthStencilSurface(
                                    kSharedWidth, kSharedHeight, depthFormat, 
                                    D3DMULTISAMPLE_NONE, 0, FALSE,
                                    &fSharedDepthSurface[iZ], NULL ) ) )
                    return nil;
                // See plDXRenderTargetRef::Release()
                fSharedDepthFormat[iZ] = depthFormat;
            hsAssert(depthFormat == fSharedDepthFormat[iZ], "Mismatch on render target types");
            depthSurface = fSharedDepthSurface[iZ];

    // See if it's a cubic render target. 
    // Primary consumer here is the vertex/pixel shader water.
    cubicRT = plCubicRenderTarget::ConvertNoRef( owner );
    if( cubicRT != nil )
        /// And create the ref (it'll know how to set all the flags)
        if( ref != nil )
            ref->Set( surfFormat, 0, owner );
            ref = TRACKED_NEW plDXRenderTargetRef( surfFormat, 0, owner );

        if( !FAILED( fD3DDevice->CreateCubeTexture( owner->GetWidth(), 1, D3DUSAGE_RENDERTARGET, surfFormat, 
                                                        D3DPOOL_DEFAULT, (IDirect3DCubeTexture9 **)&cTexture, NULL ) ) )
            /// Create a CUBIC texture
            for( i = 0; i < 6; i++ )
                plRenderTarget          *face = cubicRT->GetFace( i );
                plDXRenderTargetRef *fRef;

                if( face->GetDeviceRef() != nil )
                    fRef = (plDXRenderTargetRef *)face->GetDeviceRef();
                    fRef->Set( surfFormat, 0, face );
                    if( !fRef->IsLinked() )
                        fRef->Link( &fRenderTargetRefList );
                    face->SetDeviceRef( TRACKED_NEW plDXRenderTargetRef( surfFormat, 0, face, false ) );
                    ( (plDXRenderTargetRef *)face->GetDeviceRef())->Link( &fRenderTargetRefList );
                    // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least)
                    hsRefCnt_SafeUnRef( face->GetDeviceRef() );

            ref->SetTexture( cTexture, depthSurface );
            ref = nil;
    // Not a cubic, is it a texture render target? These are currently used
    // primarily for shadow map generation.
    else if( owner->GetFlags() & plRenderTarget::kIsTexture )
        /// Create a normal texture
        if( ref != nil )
            ref->Set( surfFormat, 0, owner );
            ref = TRACKED_NEW plDXRenderTargetRef( surfFormat, 0, owner );

        if( !FAILED( fD3DDevice->CreateTexture( owner->GetWidth(), owner->GetHeight(), 1, D3DUSAGE_RENDERTARGET, surfFormat, 
                                                        D3DPOOL_DEFAULT, (IDirect3DTexture9 **)&texture, NULL ) ) )

            ref->SetTexture( texture, depthSurface );
            ref = nil;
    // Not a texture either, must be a plain offscreen.
    // Note that the plain offscreen code path works and was used until recently,
    // until it turned up that some hardware had bugs on rendering to
    // an offscreen. 
    // Some GeForce1's had lighting anomolies, although my GeForce1 DDR didn't.
    // Some ATI's showed a momemtary glitch of corrupted rendering on the frame
    // when rendering both to the primary and an offscreen (again, not mine).
    // So the Offscreen isn't currently used for anything.
    else if( owner->GetFlags() & plRenderTarget::kIsOffscreen )
        /// Create a blank surface
        if( ref != nil )
            ref->Set( surfFormat, 0, owner );
            ref = TRACKED_NEW plDXRenderTargetRef( surfFormat, 0, owner );

        width = owner->GetWidth();
        height = owner->GetHeight();

        // Specify true for lockable, otherwise I'm not sure what we'd do with it. I guess we
        // could copyrect to another surface, presumably a texture. But right now the only
        // thing we use this for is to render a snapshot and copy it to sysmem, which implies
        // lockable.
        if( !FAILED( fD3DDevice->CreateRenderTarget( width, height, surfFormat, 
                            D3DMULTISAMPLE_NONE, 0,
                            TRUE, &surface, NULL ) ) )

            ref->SetTexture( surface, depthSurface );
            ref = nil;


    // Keep it in a linked list for ready destruction.
    if( owner->GetDeviceRef() != ref )
        owner->SetDeviceRef( ref );
        // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least)
        hsRefCnt_SafeUnRef( ref );
        if( ref != nil && !ref->IsLinked() )
            ref->Link( &fRenderTargetRefList );
        if( ref != nil && !ref->IsLinked() )
            ref->Link( &fRenderTargetRefList );

    // Mark as dirty.
    if( ref != nil )
        ref->SetDirty( false );

    return ref;

//// SharedRenderTargetRef //////////////////////////////////////////////////////
// Same as MakeRenderTargetRef, except specialized for the shadow map generation.
// The shadow map pools of a given dimension (called RenderTargetPool) all share
// a single depth buffer of that size. This allows sharing on NVidia hardware
// that wants the depth buffer dimensions to match the color buffer size.
// It may be that NVidia hardware doesn't care any more. Contact Matthias 
// about that.
hsGDeviceRef* plDXPipeline::SharedRenderTargetRef(plRenderTarget* share, plRenderTarget *owner)
    plDXRenderTargetRef*    ref = nil;
    IDirect3DSurface9*      surface = nil;
    IDirect3DSurface9*      depthSurface = nil;
    IDirect3DTexture9*      texture = nil;
    IDirect3DCubeTexture9*  cTexture = nil;
    D3DFORMAT               surfFormat = D3DFMT_UNKNOWN, depthFormat = D3DFMT_UNKNOWN;
    D3DRESOURCETYPE         resType;
    int                     i;
    plCubicRenderTarget*    cubicRT;
    UInt16                  width, height;

    // If we don't already have one to share from, start from scratch.
    if( !share )
        return MakeRenderTargetRef(owner);

    hsAssert(!fManagedAlloced, "Allocating non-managed resource with managed resources alloc'd");

    // Check out the validity of the match. Debug only.
    hsAssert(!owner->GetParent() == !share->GetParent(), "Mismatch on shared render target");
    hsAssert(owner->GetWidth() == share->GetWidth(), "Mismatch on shared render target");
    hsAssert(owner->GetHeight() == share->GetHeight(), "Mismatch on shared render target");
    hsAssert(owner->GetZDepth() == share->GetZDepth(), "Mismatch on shared render target");
    hsAssert(owner->GetStencilDepth() == share->GetStencilDepth(), "Mismatch on shared render target");
#endif // HS_DEBUGGING

    /// Check--is this renderTarget really a child of a cubicRenderTarget?
    if( owner->GetParent() != nil )
        /// This'll create the deviceRefs for all of its children as well
        SharedRenderTargetRef(share->GetParent(), owner->GetParent());
        return owner->GetDeviceRef();

    if( owner->GetDeviceRef() != nil )
        ref = (plDXRenderTargetRef *)owner->GetDeviceRef();

    // Look for a good format of matching color and depth size. 
    if( !IFindRenderTargetInfo(owner, surfFormat, resType) )
        hsAssert( false, "Error getting renderTarget info" );
        return nil;

    /// Create the render target now
    // Start with the depth. We're just going to share the depth surface on the
    // input shareRef.
    plDXRenderTargetRef* shareRef = (plDXRenderTargetRef*)share->GetDeviceRef();
    hsAssert(shareRef, "Trying to share from a render target with no ref");
    if( shareRef->fD3DDepthSurface )
    depthSurface = shareRef->fD3DDepthSurface;

    // Check for Cubic. This is unlikely, since this function is currently only
    // used for the shadow map pools.
    cubicRT = plCubicRenderTarget::ConvertNoRef( owner );
    if( cubicRT != nil )
        /// And create the ref (it'll know how to set all the flags)
        if( ref != nil )
            ref->Set( surfFormat, 0, owner );
            ref = TRACKED_NEW plDXRenderTargetRef( surfFormat, 0, owner );

        hsAssert(!fManagedAlloced, "Alloc default with managed alloc'd");
        if( !FAILED( fD3DDevice->CreateCubeTexture( owner->GetWidth(), 1, D3DUSAGE_RENDERTARGET, surfFormat, 
                                                        D3DPOOL_DEFAULT, (IDirect3DCubeTexture9 **)&cTexture, NULL ) ) )

            /// Create a CUBIC texture
            for( i = 0; i < 6; i++ )
                plRenderTarget          *face = cubicRT->GetFace( i );
                plDXRenderTargetRef *fRef;

                if( face->GetDeviceRef() != nil )
                    fRef = (plDXRenderTargetRef *)face->GetDeviceRef();
                    fRef->Set( surfFormat, 0, face );
                    if( !fRef->IsLinked() )
                        fRef->Link( &fRenderTargetRefList );
                    face->SetDeviceRef( TRACKED_NEW plDXRenderTargetRef( surfFormat, 0, face, false ) );
                    ( (plDXRenderTargetRef *)face->GetDeviceRef())->Link( &fRenderTargetRefList );
                    // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least)
                    hsRefCnt_SafeUnRef( face->GetDeviceRef() );

            ref->SetTexture( cTexture, depthSurface );
            ref = nil;
    // Is it a texture render target? Probably, since shadow maps are all we use this for.
    else if( owner->GetFlags() & plRenderTarget::kIsTexture )
        /// Create a normal texture
        if( ref != nil )
            ref->Set( surfFormat, 0, owner );
            ref = TRACKED_NEW plDXRenderTargetRef( surfFormat, 0, owner );

        hsAssert(!fManagedAlloced, "Alloc default with managed alloc'd");
        if( !FAILED( fD3DDevice->CreateTexture( owner->GetWidth(), owner->GetHeight(), 1, D3DUSAGE_RENDERTARGET, surfFormat, 
                                                        D3DPOOL_DEFAULT, (IDirect3DTexture9 **)&texture, NULL ) ) )

            ref->SetTexture( texture, depthSurface );
            ref = nil;
    // Pretty sure this code path has never been followed.
    else if( owner->GetFlags() & plRenderTarget::kIsOffscreen )
        /// Create a blank surface
        if( ref != nil )
            ref->Set( surfFormat, 0, owner );
            ref = TRACKED_NEW plDXRenderTargetRef( surfFormat, 0, owner );

        width = owner->GetWidth();
        height = owner->GetHeight();

        if( !FAILED( fD3DDevice->CreateRenderTarget( width, height, surfFormat, 
                            D3DMULTISAMPLE_NONE, 0,
                            FALSE, &surface, NULL ) ) )

            ref->SetTexture( surface, depthSurface );
            ref = nil;


    if( owner->GetDeviceRef() != ref )
        owner->SetDeviceRef( ref );
        // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least)
        hsRefCnt_SafeUnRef( ref );
        if( ref != nil && !ref->IsLinked() )
            ref->Link( &fRenderTargetRefList );
        if( ref != nil && !ref->IsLinked() )
            ref->Link( &fRenderTargetRefList );

    if( ref != nil )
        ref->SetDirty( false );

    return ref;

//// IPrepRenderTargetInfo ////////////////////////////////////////////////////
//  Shared processing of render target creation parameters. Also does the 
//  dirty work of finding a good surface format to use.
hsBool  plDXPipeline::IPrepRenderTargetInfo( plRenderTarget *owner, D3DFORMAT &surfFormat,
                                              D3DFORMAT &depthFormat, D3DRESOURCETYPE &resType )
    int         i, j;
    UInt16      flags, width, height;
    Int8        bitDepth, zDepth, stencilDepth, stencilIndex;
    D3DFORMAT   depthFormats[] = { D3DFMT_D24X8, D3DFMT_D24X4S4, D3DFMT_D24S8 };

    flags = owner->GetFlags();
    width = owner->GetWidth();
    height = owner->GetHeight();
    bitDepth = owner->GetPixelSize();
    zDepth = owner->GetZDepth();
    stencilDepth = owner->GetStencilDepth();

    if( flags != 0 )
        if( flags & plRenderTarget::kIsTexture )
            /// Do an extra check for width and height here
            for( i = width >> 1, j = 0; i != 0; i >>= 1, j++ );
            if( width != ( 1 << j ) )
                return false;

            for( i = height >> 1, j = 0; i != 0; i >>= 1, j++ );
            if( height!= ( 1 << j ) )
                return false;

            resType = D3DRTYPE_TEXTURE;
            resType = D3DRTYPE_SURFACE;

        if( bitDepth == 16 )
            surfFormat = D3DFMT_A4R4G4B4;
        else if( bitDepth == 32 )
            surfFormat = D3DFMT_A8R8G8B8;

        /// Get the backbuffer format (if one is requested)
        if( zDepth )
            if( zDepth == 16 && stencilDepth == 0 )
                depthFormat = D3DFMT_D16;
            else if( zDepth == 24 )
                if( stencilDepth == 0 ) stencilIndex = 0;
                else if( stencilDepth <= 4 ) stencilIndex = 1;
                else if( stencilDepth <= 8 ) stencilIndex = 2;
                    stencilIndex = 2;

                depthFormat = depthFormats[ stencilIndex ];
            else if( zDepth == 32 && stencilDepth == 0 )
                depthFormat = D3DFMT_D32;
            else if( zDepth == 15 && stencilDepth == 1 )
                depthFormat = D3DFMT_D15S1;

            if( surfFormat == D3DFMT_UNKNOWN || depthFormat == D3DFMT_UNKNOWN )
                return false;
            depthFormat = D3DFMT_UNKNOWN;

        /// Check the device format
        if( FAILED( fSettings.fDXError = fD3DObject->CheckDeviceFormat( fCurrentAdapter, fCurrentDevice->fDDType, fCurrentMode->fDDmode.Format,
                                                    D3DUSAGE_RENDERTARGET, resType, surfFormat ) ) )
            if( bitDepth == 16 )
                bitDepth = 32;
                surfFormat = D3DFMT_A8R8G8B8;
            else if( bitDepth == 32 )
                bitDepth = 16;
                surfFormat = D3DFMT_A4R4G4B4;
            if( FAILED( fSettings.fDXError = fD3DObject->CheckDeviceFormat( fCurrentAdapter, fCurrentDevice->fDDType, fCurrentMode->fDDmode.Format,
                                                        D3DUSAGE_RENDERTARGET, resType, surfFormat ) ) )
                return false;

        if( zDepth )
            while( FAILED( fSettings.fDXError = fD3DObject->CheckDeviceFormat( fCurrentAdapter, fCurrentDevice->fDDType, fCurrentMode->fDDmode.Format,
                                                        D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, depthFormat ) ) )
                if( stencilIndex < sizeof( depthFormats ) / sizeof( depthFormats[ 0 ] ) - 1 )
                    depthFormat = depthFormats[ stencilIndex ];
                    return false;

            if( FAILED( fSettings.fDXError = fD3DObject->CheckDepthStencilMatch( fCurrentAdapter, fCurrentDevice->fDDType, fCurrentMode->fDDmode.Format,
                                                        surfFormat, depthFormat ) ) )
                return false;

    return true;

//// IFindRenderTargetInfo ////////////////////////////////////////////////////
//  Shared processing of render target creation parameters. Also does the 
//  dirty work of finding a good surface format to use.
// Doesn't bother checking depth buffer, since this is only used for a render target
// that's going to share a depth buffer that's already been created.
hsBool  plDXPipeline::IFindRenderTargetInfo( plRenderTarget *owner, D3DFORMAT &surfFormat, D3DRESOURCETYPE &resType )
    UInt16      flags, width, height;
    Int8        bitDepth;

    flags = owner->GetFlags();
    width = owner->GetWidth();
    height = owner->GetHeight();
    bitDepth = owner->GetPixelSize();

    if( flags != 0 )
        if( flags & plRenderTarget::kIsTexture )
            resType = D3DRTYPE_TEXTURE;
            resType = D3DRTYPE_SURFACE;

        if( bitDepth == 16 )
            surfFormat = D3DFMT_A4R4G4B4;
        else if( bitDepth == 32 )
            surfFormat = D3DFMT_A8R8G8B8;

        if( surfFormat == D3DFMT_UNKNOWN )
            return false;

        /// Check the device format
        if( FAILED( fSettings.fDXError = fD3DObject->CheckDeviceFormat( fCurrentAdapter, fCurrentDevice->fDDType, fCurrentMode->fDDmode.Format,
                                                    D3DUSAGE_RENDERTARGET, resType, surfFormat ) ) )
            if( bitDepth == 16 )
                bitDepth = 32;
                surfFormat = D3DFMT_A8R8G8B8;
            else if( bitDepth == 32 )
                bitDepth = 16;
                surfFormat = D3DFMT_A4R4G4B4;
            if( FAILED( fSettings.fDXError = fD3DObject->CheckDeviceFormat( fCurrentAdapter, fCurrentDevice->fDDType, fCurrentMode->fDDmode.Format,
                                                        D3DUSAGE_RENDERTARGET, resType, surfFormat ) ) )
                return false;

    return true;

// PushRenderRequest ///////////////////////////////////////////////
// We're moving from our current render (probably to primary) onto 
// another specialized render request. This may be to the primary (if req->GetRenderTarget() is nil)
// or to a texture. This function saves enough state to resume rendering on PopRenderRequest.
// The render request may just be a new camera position.
void plDXPipeline::PushRenderRequest(plRenderRequest* req)
    // Save these, since we want to copy them to our current view
    hsMatrix44 l2w = fView.fLocalToWorld;
    hsMatrix44 w2l = fView.fWorldToLocal;

    plFogEnvironment defFog = fView.fDefaultFog;



    fView.fRenderState = req->GetRenderState();

    fView.fRenderRequest = req;


    fView.fClearColor = inlGetD3DColor( req->GetClearColor() );
    fView.fClearDepth = req->GetClearDepth();

    if( req->GetFogStart() < 0 )
        fView.fDefaultFog = defFog;
        fView.fDefaultFog.Set( req->GetYon() * (1.f - req->GetFogStart()), req->GetYon(), 1.f, &req->GetClearColor());
        fCurrFog.fEnvPtr = nil;

    if( req->GetOverrideMat() )

    // Set from our saved ones...
    fView.fWorldToLocal = w2l;
    fView.fLocalToWorld = l2w;


    if (req->GetIgnoreOccluders())
        fView.fCullMaxNodes = 0;

    fView.fCullTreeDirty = true;

// PopRenderRequest //////////////////////////////////////////////////
// Restore state to resume rendering as before the preceding PushRenderRequest.
void plDXPipeline::PopRenderRequest(plRenderRequest* req)
    if( req->GetOverrideMat() )

    fView = fSettings.fViewStack.Pop();

    // Force the next thing drawn to update the fog settings.
    fD3DDevice->SetRenderState(D3DRS_FOGENABLE, FALSE);
    fCurrFog.fEnvPtr = nil;

    fView.fXformResetFlags = fView.kResetProjection | fView.kResetCamera;

//// PushRenderTarget /////////////////////////////////////////////////////////
// Begin rendering to the specified target. If target is nil, that's the primary surface.
void    plDXPipeline::PushRenderTarget( plRenderTarget *target )
    if( target && (hackOffscreens.kMissingIndex == hackOffscreens.Find(target)) )

    fSettings.fCurrRenderTarget = target;
    hsRefCnt_SafeAssign( fSettings.fCurrRenderTargetRef, ( target != nil ) ? (plDXDeviceRef *)target->GetDeviceRef() : nil );

    while( target != nil )
        fSettings.fCurrBaseRenderTarget = target;
        target = target->GetParent();

    fSettings.fRenderTargets.Push( fSettings.fCurrRenderTarget );
    ISetRenderTarget( fSettings.fCurrRenderTarget );

//// PopRenderTarget //////////////////////////////////////////////////////////
// Resume rendering to the render target before the last PushRenderTarget, 
// making sure we aren't holding on to anything from the render target getting
// popped.
plRenderTarget      *plDXPipeline::PopRenderTarget()
    plRenderTarget  *old = fSettings.fRenderTargets.Pop(), *temp;
    int             i = fSettings.fRenderTargets.GetCount();

    if( i == 0 )
        fSettings.fCurrRenderTarget = nil;
        fSettings.fCurrBaseRenderTarget = nil;
        hsRefCnt_SafeUnRef( fSettings.fCurrRenderTargetRef );
        fSettings.fCurrRenderTargetRef = nil;
        fSettings.fCurrRenderTarget = fSettings.fRenderTargets[ i - 1 ];
        temp = fSettings.fCurrRenderTarget;
        while( temp != nil )
            fSettings.fCurrBaseRenderTarget = temp;
            temp = temp->GetParent();
        hsRefCnt_SafeAssign( fSettings.fCurrRenderTargetRef, 
                             ( fSettings.fCurrRenderTarget != nil ) ? 
                                    (plDXDeviceRef *)fSettings.fCurrRenderTarget->GetDeviceRef() 
                                    : nil );
    ISetRenderTarget( fSettings.fCurrRenderTarget );

    return old;

// ISetAnisotropy ///////////////////////////////////////////////////////////
// Set the current anisotropic filtering settings to D3D
void plDXPipeline::ISetAnisotropy(hsBool on)
    if( (fSettings.fMaxAnisotropicSamples <= 0) || IsDebugFlagSet(plPipeDbg::kFlagNoAnisotropy) )
        on = false;

    if( on == fSettings.fCurrAnisotropy )

    if( on )
        int i;
        for( i = 0; i < 8; i++ )
            // GeForce cards have decided that they no longer handle anisotropic as a mag filter.
            // We could detect caps... but I don't think we'd notice if we just made the mag
            // filter always be linear.
            fD3DDevice->SetSamplerState( i, D3DSAMP_MINFILTER, D3DTEXF_ANISOTROPIC );
            fD3DDevice->SetSamplerState( i, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR );
            fD3DDevice->SetSamplerState( i, D3DSAMP_MAXANISOTROPY, (DWORD)fSettings.fMaxAnisotropicSamples );
        fSettings.fCurrAnisotropy = true;
        int i;
        for( i = 0; i < 8; i++ )
            fD3DDevice->SetSamplerState( i, D3DSAMP_MINFILTER, D3DTEXF_LINEAR );
            fD3DDevice->SetSamplerState( i, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR );
        fSettings.fCurrAnisotropy = false;

//// ISetRenderTarget /////////////////////////////////////////////////////////
// Set rendering to the specified render target. Nil rendertarget is the primary.
// Invalidates the state as required by experience, not documentation.
void    plDXPipeline::ISetRenderTarget( plRenderTarget *target )
    IDirect3DSurface9       *main, *depth;
    plDXRenderTargetRef *ref = nil;

    if( target != nil )
        ref = (plDXRenderTargetRef *)target->GetDeviceRef();
        if( ref == nil || ref->IsDirty() )
            ref = (plDXRenderTargetRef *)MakeRenderTargetRef( target );

    if( ref == nil || ref->GetColorSurface() == nil )
        /// Set to main screen
        main = fD3DMainSurface;
        depth = fD3DDepthSurface;
        /// Set to this target
        main = ref->GetColorSurface();
        depth = ref->fD3DDepthSurface;

    if( main != fSettings.fCurrD3DMainSurface || depth != fSettings.fCurrD3DDepthSurface )
        fSettings.fCurrD3DMainSurface = main;
        fSettings.fCurrD3DDepthSurface = depth;
        fD3DDevice->SetRenderTarget(0, main);



// SetClear /////////////////////////////////////////////////////////////////////
// Set the color and depth clear values.
void plDXPipeline::SetClear(const hsColorRGBA* col, const hsScalar* depth)
    if( col )
        fView.fClearColor = inlGetD3DColor(*col);
    if( depth )
        fView.fClearDepth = *depth;

// GetClearColor ////////////////////////////////////////////////////////////////
// Return the current clear color.
hsColorRGBA plDXPipeline::GetClearColor() const
    return hsColorRGBA().FromARGB32(fView.fClearColor);

// GetClearDepth ////////////////////////////////////////////////////////////////
// Return the current clear depth.
hsScalar plDXPipeline::GetClearDepth() const
    return fView.fClearDepth;

//// ClearRenderTarget ////////////////////////////////////////////////////////
// Clear the current color and depth buffers. If a drawable is passed in, then
// the color buffer will be cleared by rendering that drawable.
// The depth buffer is always cleared  with a clear call.
// Clearing of depth and/or color may be turned off by setting the kRenderClearDepth 
// and kRenderClearColor bits in fView.fRenderState to false.
void plDXPipeline::ClearRenderTarget( plDrawable* d )
    plDrawableSpans* src = plDrawableSpans::ConvertNoRef(d);

    if( !src )
    // First clear the depth buffer as normal.
    if( fView.fRenderState & kRenderClearDepth )
        D3DRECT r;
        hsBool useRect = IGetClearViewPort(r);

        if( useRect )
            WEAK_ERROR_CHECK( fD3DDevice->Clear( 1, &r, D3DCLEAR_ZBUFFER, 0, fView.fClearDepth, 0L ) );
            WEAK_ERROR_CHECK( fD3DDevice->Clear( 0, nil, D3DCLEAR_ZBUFFER, 0, fView.fClearDepth, 0L ) );
// debug, clears to red         WEAK_ERROR_CHECK( fD3DDevice->Clear( 0, nil, D3DCLEAR_ZBUFFER | D3DCLEAR_TARGET, 0xffff0000, fView.fClearDepth, 0L ) );

    UInt32 s = fView.fRenderState;
    UInt32 dtm = fView.fDrawableTypeMask;
    UInt32 sdtm = fView.fSubDrawableTypeMask;
    fView.fDrawableTypeMask = plDrawable::kNormal;
    fView.fSubDrawableTypeMask = UInt32(-1);


    fView.fSubDrawableTypeMask = sdtm;
    fView.fDrawableTypeMask = dtm;
    fView.fRenderState = s;


// IGetClearViewPort //////////////////////////////////////////////
// Sets the input rect to the current viewport. Returns true if
// that is a subset of the current render target, else false.
hsBool plDXPipeline::IGetClearViewPort(D3DRECT& r)
    r.x1 = GetViewTransform().GetViewPortLeft();
    r.y1 = GetViewTransform().GetViewPortTop();
    r.x2 = GetViewTransform().GetViewPortRight();
    r.y2 = GetViewTransform().GetViewPortBottom();

    hsBool useRect = false;
    if( fSettings.fCurrRenderTarget != nil )
        useRect = ( (r.x1 != 0) || (r.y1 != 0) || (r.x2 != fSettings.fCurrRenderTarget->GetWidth()) || (r.y2 != fSettings.fCurrRenderTarget->GetHeight()) );

        useRect = ( (r.x1 != 0) || (r.y1 != 0) || (r.x2 != fSettings.fOrigWidth) || (r.y2 != fSettings.fOrigHeight) );

    return useRect;

// ClearRenderTarget //////////////////////////////////////////////////////////////////////////////
// Flat fill the current render target with the specified color and depth values.
void    plDXPipeline::ClearRenderTarget( const hsColorRGBA *col, const hsScalar* depth )
    if( fView.fRenderState & (kRenderClearColor | kRenderClearDepth) )
        DWORD clearColor = inlGetD3DColor(col ? *col : GetClearColor());
        hsScalar clearDepth = depth ? *depth : fView.fClearDepth;

        DWORD   dwFlags = 0;//fStencil.fDepth > 0 ? D3DCLEAR_STENCIL : 0;
        if( fView.fRenderState & kRenderClearColor )
            dwFlags |= D3DCLEAR_TARGET;
        if( fView.fRenderState & kRenderClearDepth )
            dwFlags |= D3DCLEAR_ZBUFFER;

        D3DRECT r;
        hsBool useRect = IGetClearViewPort(r);
        if( useRect )
            WEAK_ERROR_CHECK( fD3DDevice->Clear( 1, &r, dwFlags, clearColor, clearDepth, 0L ) );
            WEAK_ERROR_CHECK( fD3DDevice->Clear( 0, nil, dwFlags, clearColor, clearDepth, 0L ) );

//// Fog //////////////////////////////////////////////////////////////////////
// The current fog system sucks. It was never meant to get used this way, but
// the production artists started using it with debug commands that were around,
// and before they could be stopped it was too late.
// The good news is that there's a lot that could be done with fog here that
// would be greatly appreciated.

// IGetVSFogSet ///////////////////////////////////////////////////////////////
// Translate the current fog settings into a linear fog that the current
// vertex shaders can use.
void plDXPipeline::IGetVSFogSet(float* const set) const
    set[2] = 0.f;
    set[3] = 1.f;
    if( fCurrFog.fEnvPtr )
        hsColorRGBA colorTrash;
        hsScalar start;
        hsScalar end;
        fCurrFog.fEnvPtr->GetPipelineParams(&start, &end, &colorTrash);
        if( end > start )
            set[0] = -end;
            set[1] = 1.f / (start - end);
            set[0] = 1.f;
            set[1] = 0.f;
        set[0] = 1.f;
        set[1] = 0.f;

//// ISetFogParameters ////////////////////////////////////////////////////////
// So looking at this function, one might guess that fog parameters were settable
// individually for different objects, and that was the original intent, with transitions
// as something like the avatar moved from one fog region to another.
// Never happened.
// So the current state is that there is one set of fog parameters per age, and things
// are either fogged, or not fogged.
// This is complicated by the DX vertex/pixel shaders only supporting per-vertex fog,
// so the same plasma fog settings may turn into differing D3D fog state.
void plDXPipeline::ISetFogParameters(const plSpan* span, const plLayerInterface* baseLay)
    if (IsDebugFlagSet(plPipeDbg::kFlagNoFog))
        fCurrFog.fEnvPtr = nil;
        fD3DDevice->SetRenderState(D3DRS_FOGENABLE, FALSE);

    plFogEnvironment* fog = (span ? (span->fFogEnvironment ? span->fFogEnvironment : &fView.fDefaultFog) : nil);

    UInt8 isVertex = 0;
    UInt8 isShader = false;
    if (baseLay)
        if ((baseLay->GetShadeFlags() & hsGMatState::kShadeReallyNoFog) && !(fMatOverOff.fShadeFlags & hsGMatState::kShadeReallyNoFog))
            fog = nil;
        if (baseLay->GetVertexShader())
            isShader = true;
    if (fMatOverOn.fShadeFlags & hsGMatState::kShadeReallyNoFog)
        fog = nil;

    bool forceLoad = false;
    D3DRENDERSTATETYPE  d3dFogType = D3DRS_FOGTABLEMODE;        // Use VERTEXMODE for vertex fog

    if (!(fSettings.fD3DCaps & kCapsPixelFog) || isShader)
        d3dFogType = D3DRS_FOGVERTEXMODE;
        isVertex = true;

    // Quick check
    if ((fCurrFog.fEnvPtr == fog) && (fCurrFog.fIsVertex == isVertex) && (fCurrFog.fIsShader == isShader))

    UInt8 type = ( fog == nil ) ? plFogEnvironment::kNoFog : fog->GetType();

    if (type == plFogEnvironment::kNoFog)
        /// No fog, just disable
        fD3DDevice->SetRenderState( D3DRS_FOGENABLE, FALSE );
        fCurrFog.fEnvPtr = nil;
    else if( fCurrFog.fEnvPtr != fog )
        fD3DDevice->SetRenderState( D3DRS_FOGENABLE, TRUE );
        forceLoad = true;
        fCurrFog.fEnvPtr = fog;

    if( isShader )
        type = plFogEnvironment::kLinearFog;

    if( fCurrFog.fIsShader != isShader )
        forceLoad = true;

    if( fCurrFog.fIsVertex != isVertex )
        forceLoad = true;

    fCurrFog.fIsShader = isShader;
    fCurrFog.fIsVertex = isVertex;

    hsScalar    startOrDensity, end;
    hsColorRGBA color;

    /// Get params
    if( type == plFogEnvironment::kLinearFog )
        fog->GetPipelineParams( &startOrDensity, &end, &color );

        if (startOrDensity == end)
            // This should be legal, but some cards don't like it. Just disable. Same thing.
            fD3DDevice->SetRenderState(D3DRS_FOGENABLE, FALSE);
        fog->GetPipelineParams( &startOrDensity, &color );

    if( isShader )
        // None of this is technically necessary, but it's to work around
        // a known goofiness in the NVidia drivers. Actually, I don't think
        // having to set the tablemode fog to linear in addition to setting
        // the vertexmode is even a "known" issue. But turns out to be 
        // necessary on GeForceFX latest drivers.
        startOrDensity = 1.f;
        end = 0.f;

        // Setting FOGTABLEMODE to none seems to work on both ATI and NVidia,
        // but I haven't tried it on the GeForceFX yet.
        //      if( fCurrFog.fMode != D3DFOG_LINEAR )
        //          fD3DDevice->SetRenderState(D3DRS_FOGTABLEMODE, D3DFOG_LINEAR);
        fD3DDevice->SetRenderState(D3DRS_FOGTABLEMODE, D3DFOG_NONE);

    /// Set color
    if( !( fCurrFog.fColor == color ) || forceLoad )
        fCurrFog.fColor = color;
        fCurrFog.fHexColor = inlGetD3DColor( color );
        fD3DDevice->SetRenderState( D3DRS_FOGCOLOR, fCurrFog.fHexColor );

    D3DFOGMODE          modes[ 4 ] = { D3DFOG_LINEAR, D3DFOG_EXP, D3DFOG_EXP2, D3DFOG_NONE };

    /// Set type
    if( fCurrFog.fMode != modes[type] || forceLoad )
        fCurrFog.fMode = modes[type];

        if( fCurrFog.fMode == D3DFOG_LINEAR )
            fCurrFog.fStart = startOrDensity;
            fCurrFog.fEnd = end;

            fD3DDevice->SetRenderState( d3dFogType, fCurrFog.fMode );
            fD3DDevice->SetRenderState( D3DRS_FOGSTART, *(DWORD *)( &fCurrFog.fStart ) );
            fD3DDevice->SetRenderState( D3DRS_FOGEND, *(DWORD *)( &fCurrFog.fEnd ) );
            fCurrFog.fDensity = startOrDensity;

            fD3DDevice->SetRenderState( d3dFogType, fCurrFog.fMode );
            fD3DDevice->SetRenderState( D3DRS_FOGDENSITY, *(DWORD *)( &fCurrFog.fDensity ) );
        // Type is the same, but are the params?
        if( fCurrFog.fMode == D3DFOG_LINEAR )
            if( fCurrFog.fStart != startOrDensity )
                fCurrFog.fStart = startOrDensity;
                fD3DDevice->SetRenderState( D3DRS_FOGSTART, *(DWORD *)( &fCurrFog.fStart ) );

            if( fCurrFog.fEnd != end )
                fCurrFog.fEnd = end;
                fD3DDevice->SetRenderState( D3DRS_FOGEND, *(DWORD *)( &fCurrFog.fEnd ) );
            if( fCurrFog.fDensity != startOrDensity )
                fCurrFog.fDensity = startOrDensity;
                fD3DDevice->SetRenderState( D3DRS_FOGDENSITY, *(DWORD *)( &fCurrFog.fDensity ) );

//// Stenciling ///////////////////////////////////////////////////////////////
// I know that none of this stencil code has ever been used in production.
// To my knowledge, none of this stencil code was ever even tested.
// It may save you some time as a starting point, but don't trust it.

//// StencilEnable ////////////////////////////////////////////////////////////

hsBool  plDXPipeline::StencilEnable( hsBool enable )
    if( fStencil.fEnabled == enable )
        return true;

    if( enable && fStencil.fDepth == 0 )
        return false;           // Can't enable stenciling when we don't support it!

    fD3DDevice->SetRenderState( D3DRS_STENCILENABLE, enable ? TRUE : FALSE );

    return true;

//// StencilSetCompareFunc ////////////////////////////////////////////////////

void    plDXPipeline::StencilSetCompareFunc( UInt8 func, UInt32 refValue )
    D3DCMPFUNC  newFunc;

    switch( func )
        case plStencilCaps::kCmpNever:              newFunc = D3DCMP_NEVER; break;
        case plStencilCaps::kCmpLessThan:           newFunc = D3DCMP_LESS; break;
        case plStencilCaps::kCmpEqual:              newFunc = D3DCMP_EQUAL; break;
        case plStencilCaps::kCmpLessThanOrEqual:    newFunc = D3DCMP_LESSEQUAL; break;
        case plStencilCaps::kCmpGreaterThan:        newFunc = D3DCMP_GREATER; break;
        case plStencilCaps::kCmpNotEqual:           newFunc = D3DCMP_NOTEQUAL; break;
        case plStencilCaps::kCmpGreaterThanOrEqual: newFunc = D3DCMP_GREATEREQUAL; break;
        case plStencilCaps::kCmpAlways:             newFunc = D3DCMP_ALWAYS; break;
        default: hsAssert( false, "Invalid compare function to StencilSetCompareFunc()" ); return;

    if( fStencil.fCmpFunc != newFunc )
        fD3DDevice->SetRenderState( D3DRS_STENCILFUNC, newFunc );
        fStencil.fCmpFunc = newFunc;

    if( fStencil.fRefValue != refValue )
        fD3DDevice->SetRenderState( D3DRS_STENCILREF, refValue );
        fStencil.fRefValue = refValue;

//// StencilSetMask ///////////////////////////////////////////////////////////

void    plDXPipeline::StencilSetMask( UInt32 mask, UInt32 writeMask )
    if( fStencil.fMask != mask )
        fD3DDevice->SetRenderState( D3DRS_STENCILMASK, mask );
        fStencil.fMask = mask;

    if( fStencil.fWriteMask != writeMask )
        fD3DDevice->SetRenderState( D3DRS_STENCILWRITEMASK, writeMask );
        fStencil.fWriteMask = writeMask;

//// StencilSetOps ////////////////////////////////////////////////////////////

void    plDXPipeline::StencilSetOps( UInt8 passOp, UInt8 failOp, UInt8 passButZFailOp )
    D3DSTENCILOP        op;

    /// Pass op
    switch( passOp )
        case plStencilCaps::kOpKeep:        op = D3DSTENCILOP_KEEP; break;
        case plStencilCaps::kOpSetToZero:   op = D3DSTENCILOP_ZERO; break;
        case plStencilCaps::kOpReplace:     op = D3DSTENCILOP_REPLACE; break;
        case plStencilCaps::kOpIncClamp:    op = D3DSTENCILOP_INCRSAT; break;
        case plStencilCaps::kOpDecClamp:    op = D3DSTENCILOP_DECRSAT; break;
        case plStencilCaps::kOpInvert:      op = D3DSTENCILOP_INVERT; break;
        case plStencilCaps::kOpIncWrap:     op = D3DSTENCILOP_INCR; break;
        case plStencilCaps::kOpDecWrap:     op = D3DSTENCILOP_DECR; break;
        default: hsAssert( false, "Invalid op to StencilSetOps()" ); return;

    if( fStencil.fPassOp != op )
        fD3DDevice->SetRenderState( D3DRS_STENCILPASS, op );
        fStencil.fPassOp = op;

    /// Fail op
    switch( failOp )
        case plStencilCaps::kOpKeep:        op = D3DSTENCILOP_KEEP; break;
        case plStencilCaps::kOpSetToZero:   op = D3DSTENCILOP_ZERO; break;
        case plStencilCaps::kOpReplace:     op = D3DSTENCILOP_REPLACE; break;
        case plStencilCaps::kOpIncClamp:    op = D3DSTENCILOP_INCRSAT; break;
        case plStencilCaps::kOpDecClamp:    op = D3DSTENCILOP_DECRSAT; break;
        case plStencilCaps::kOpInvert:      op = D3DSTENCILOP_INVERT; break;
        case plStencilCaps::kOpIncWrap:     op = D3DSTENCILOP_INCR; break;
        case plStencilCaps::kOpDecWrap:     op = D3DSTENCILOP_DECR; break;
        default: hsAssert( false, "Invalid op to StencilSetOps()" ); return;

    if( fStencil.fFailOp != op )
        fD3DDevice->SetRenderState( D3DRS_STENCILFAIL, op );
        fStencil.fFailOp = op;

    /// Pass-but-z-fail op
    switch( passButZFailOp )
        case plStencilCaps::kOpKeep:        op = D3DSTENCILOP_KEEP; break;
        case plStencilCaps::kOpSetToZero:   op = D3DSTENCILOP_ZERO; break;
        case plStencilCaps::kOpReplace:     op = D3DSTENCILOP_REPLACE; break;
        case plStencilCaps::kOpIncClamp:    op = D3DSTENCILOP_INCRSAT; break;
        case plStencilCaps::kOpDecClamp:    op = D3DSTENCILOP_DECRSAT; break;
        case plStencilCaps::kOpInvert:      op = D3DSTENCILOP_INVERT; break;
        case plStencilCaps::kOpIncWrap:     op = D3DSTENCILOP_INCR; break;
        case plStencilCaps::kOpDecWrap:     op = D3DSTENCILOP_DECR; break;
        default: hsAssert( false, "Invalid op to StencilSetOps()" ); return;

    if( fStencil.fPassButZFailOp != op )
        fD3DDevice->SetRenderState( D3DRS_STENCILZFAIL, op );
        fStencil.fPassButZFailOp = op;

//// StencilGetCaps ///////////////////////////////////////////////////////////

hsBool  plDXPipeline::StencilGetCaps( plStencilCaps *caps )
    hsAssert( caps != nil, "Invalid pointer to StencilGetCaps()" );

    int     i;

    /// Find supported depths
    caps->fSupportedDepths = 0;
    for( i = 0; i < fCurrentMode->fDepthFormats.GetCount(); i++ )
        switch( fCurrentMode->fDepthFormats[ i ] )
            case D3DFMT_D15S1:      caps->fSupportedDepths |= plStencilCaps::kDepth1Bit; break;
            case D3DFMT_D24X4S4:    caps->fSupportedDepths |= plStencilCaps::kDepth4Bits; break;
            case D3DFMT_D24S8:      caps->fSupportedDepths |= plStencilCaps::kDepth8Bits; break;

    if( caps->fSupportedDepths == 0 )
        caps->fIsSupported = false;
        return false;

    /// Get supported ops
    caps->fSupportedOps = 0;

    if( fCurrentDevice->fDDCaps.StencilCaps & D3DSTENCILCAPS_DECR )
        caps->fSupportedOps |= plStencilCaps::kOpDecWrap;
    if( fCurrentDevice->fDDCaps.StencilCaps & D3DSTENCILCAPS_DECRSAT )
        caps->fSupportedOps |= plStencilCaps::kOpDecClamp;
    if( fCurrentDevice->fDDCaps.StencilCaps & D3DSTENCILCAPS_INCR )
        caps->fSupportedOps |= plStencilCaps::kOpIncWrap;
    if( fCurrentDevice->fDDCaps.StencilCaps & D3DSTENCILCAPS_INCRSAT )
        caps->fSupportedOps |= plStencilCaps::kOpIncClamp;

    if( fCurrentDevice->fDDCaps.StencilCaps & D3DSTENCILCAPS_INVERT )
        caps->fSupportedOps |= plStencilCaps::kOpInvert;
    if( fCurrentDevice->fDDCaps.StencilCaps & D3DSTENCILCAPS_KEEP )
        caps->fSupportedOps |= plStencilCaps::kOpKeep;
    if( fCurrentDevice->fDDCaps.StencilCaps & D3DSTENCILCAPS_REPLACE )
        caps->fSupportedOps |= plStencilCaps::kOpReplace;
    if( fCurrentDevice->fDDCaps.StencilCaps & D3DSTENCILCAPS_ZERO )
        caps->fSupportedOps |= plStencilCaps::kOpSetToZero;

    return true;

//// Lighting /////////////////////////////////////////////////////////////////

//// IMakeLightRef ////////////////////////////////////////////////////////////
// Create a plasma device ref for a light. Includes reserving a D3D light
// index for the light. Ref is kept in a linked list for ready disposal
// as well as attached to the light.
hsGDeviceRef    *plDXPipeline::IMakeLightRef( plLightInfo *owner )
    plDXLightRef    *lRef = TRACKED_NEW plDXLightRef();

    /// Assign stuff and update
    lRef->fD3DIndex = fLights.ReserveD3DIndex();
    lRef->fOwner = owner;
    owner->SetDeviceRef( lRef );
    // Unref now, since for now ONLY the BG owns the ref, not us (not until we use it, at least)
    hsRefCnt_SafeUnRef( lRef );

    lRef->Link( &fLights.fRefList );

    lRef->UpdateD3DInfo( fD3DDevice, &fLights );

    // Neutralize it until we need it.
    fD3DDevice->LightEnable(lRef->fD3DIndex, false);

    return lRef;

//// RegisterLight ////////////////////////////////////////////////////////////
// Register a light with the pipeline. Light become immediately
// ready to illuminate the scene.
void plDXPipeline::RegisterLight(plLightInfo* liInfo)
    if( liInfo->IsLinked() )

    liInfo->Link( &fLights.fActiveList );
    liInfo->SetDeviceRef( IMakeLightRef( liInfo ) );

//// UnRegisterLight //////////////////////////////////////////////////////////
// Remove a light from the pipeline's active light list. Light will
// no longer illuminate the scene.
void plDXPipeline::UnRegisterLight(plLightInfo* liInfo)
    liInfo->SetDeviceRef( nil );


//// IEnableLights ////////////////////////////////////////////////////////////
//  Does the lighting enable pass. Given a span with lights to use, builds
//  a bit vector representing the lights to use, then uses that to mask off
//  which lights actually need to be enabled/disabled.
// Constructs 2 lists on the span, one for normal lights, and one for projective lights.

void    plDXPipeline::IEnableLights( plSpan *span )
    ISelectLights( span, fSettings.fMaxNumLights, false );
    if( !(fView.fRenderState & kRenderNoProjection) )
        ISelectLights( span, fSettings.fMaxNumProjectors, true );

// ISelectLights ///////////////////////////////////////////////////////////////
// Find the strongest numLights lights to illuminate the span with.
// Weaker lights are faded out in effect so they won't pop when the
// strongest N changes membership.
void    plDXPipeline::ISelectLights( plSpan *span, int numLights, hsBool proj )
    int                 i, startScale;
    static hsBitVector  newFlags;   
    static hsTArray<plLightInfo*>   onLights;
    plDXLightRef        *ref;
    float               threshhold, overHold = 0.3, scale;

    /// Build new flags

    /// Step 1: Find the n strongest lights

    if  (!IsDebugFlagSet(plPipeDbg::kFlagNoRuntimeLights) &&
        !(IsDebugFlagSet(plPipeDbg::kFlagNoApplyProjLights) && proj) &&
        !(IsDebugFlagSet(plPipeDbg::kFlagOnlyApplyProjLights) && !proj))
        hsTArray<plLightInfo*>& spanLights = span->GetLightList(proj);

        for( i = 0; i < spanLights.GetCount() && i < numLights; i++ )
            ref = (plDXLightRef *)spanLights[i]->GetDeviceRef();

            if( ref->IsDirty() )
                if( ref->fD3DIndex == 0 )
                    ref->fD3DIndex = fLights.ReserveD3DIndex();
                ref->UpdateD3DInfo( fD3DDevice, &fLights );
                ref->SetDirty( false );

            newFlags.SetBit( ref->fD3DIndex );
        startScale = i;

        /// Attempt #2: Take some of the n strongest lights (below a given threshhold) and
        /// fade them out to nothing as they get closer to the bottom. This way, they fade
        /// out of existence instead of pop out.

        if( i < spanLights.GetCount() - 1 && i > 0 )
            threshhold = span->GetLightStrength( i, proj );
            overHold = threshhold * 1.5f;
            if( overHold > span->GetLightStrength( 0, proj ) )
                overHold = span->GetLightStrength( 0, proj );

            for( ; i > 0 && span->GetLightStrength( i, proj ) < overHold; i-- )
                scale = ( overHold - span->GetLightStrength( i, proj ) ) / ( overHold - threshhold );

                ref = (plDXLightRef *)spanLights[i]->GetDeviceRef();

                IScaleD3DLight( ref, (1 - scale) * span->GetLightScale(i, proj) );
            startScale = i + 1;

        /// Make sure those lights that aren't scaled....aren't
        for( i = 0; i < startScale; i++ )
            ref = (plDXLightRef *)spanLights[i]->GetDeviceRef();
            IScaleD3DLight(ref, span->GetLightScale(i, proj) );


    // If these are non-projected lights, go ahead and enable them.
    // For the projected lights, don't enable, just remember who they are.
    if( !proj )
        // A little change here. Some boards get sticky about exactly
        // how many lights you have enabled, whether you are currently
        // rendering or not. So if we go through enabling the lights
        // we want and disabling the ones we don't, then even though
        // at the end of the loop, less than MaxNumLights are enabled,
        // we can still wind up screwed.
        // Think about if we have 8 lights enabled, and they all happen
        // to be at the end of fLights. Now we want to enable a different
        // 8 lights, which happen to be at the beginning of the list.
        // So we loop through and enable the lights we want, and then later
        // in the loop disable the lights we don't want. Problem is that
        // when we were enabling the ones we want we went over our 8 light
        // limit, and some boards (ATI) react by ignoring the enable request.
        // So then we disable the other lights at the end of the loop, but
        // it's too late because our enable requests at the beginning of the
        // loop were ignored.
        // Solution is to go through the list twice, first disabling, then
        // enabling. mf
        hsBitVector newOff = fLights.fEnabledFlags - newFlags;
        hsBitIterator iterOff(newOff);
        for( iterOff.Begin(); !iterOff.End(); iterOff.Advance() )
            fD3DDevice->LightEnable(iterOff.Current(), false);

        hsBitVector newOn = newFlags - fLights.fEnabledFlags;
        hsBitIterator iterOn(newOn);
        for( iterOn.Begin(); !iterOn.End(); iterOn.Advance() )
            fD3DDevice->LightEnable(iterOn.Current(), true);
        fLights.fEnabledFlags = newFlags;
        for( i = 0; i < onLights.GetCount(); i++ )
            if( onLights[i]->OverAll() )

// IDisableSpanLights /////////////////////////////////////////////////////
// Disable all the enabled lights, remembering which they are for
// quick reenabling.
void plDXPipeline::IDisableSpanLights()
    int i;
    for( i = 0; i < fLights.fLastIndex + 1; i++ )
        if( fLights.fEnabledFlags.IsBitSet(i) )
            fD3DDevice->LightEnable(i, false);

// IRestoreSpanLights //////////////////////////////////////////////////////
// Re-enable all the lights disabled by the matching IDisableSpanLights.
void plDXPipeline::IRestoreSpanLights()
    int i;
    for( i = 0; i < fLights.fLastIndex + 1; i++ )
        if( fLights.fHoldFlags.IsBitSet(i) )
            fD3DDevice->LightEnable(i, true);

//// IScaleD3DLight ///////////////////////////////////////////////////////////
// Scale the D3D light by the given scale factor, used for fading lights
// in and out by importance.
void    plDXPipeline::IScaleD3DLight( plDXLightRef *ref, hsScalar scale )
    scale = int(scale * 1.e1f) * 1.e-1f;
    if( ref->fScale != scale )
        D3DLIGHT9       light = ref->fD3DInfo;

        light.Diffuse.r *= scale;
        light.Diffuse.g *= scale;
        light.Diffuse.b *= scale;

        light.Ambient.r *= scale;
        light.Ambient.g *= scale;
        light.Ambient.b *= scale;

        light.Specular.r *= scale;
        light.Specular.g *= scale;
        light.Specular.b *= scale;

        fD3DDevice->SetLight( ref->fD3DIndex, &light );
        ref->fScale = scale;

// inlPlToDWORDColor /////////////////////////////////////////////////
// Convert a plasma floating point color to a D3D DWORD color
static inline DWORD inlPlToDWORDColor(const hsColorRGBA& c)
    return (DWORD(c.a * 255.99f) << 24)
        | (DWORD(c.r * 255.99f) << 16)
        | (DWORD(c.g * 255.99f) << 8)
        | (DWORD(c.b * 255.99f) << 0);

// inlPlToD3DColor ////////////////////////////////////////////////////
// Convert a plasma floating point color to a D3D floating point color.
inline D3DCOLORVALUE plDXPipeline::inlPlToD3DColor(const hsColorRGBA& c, float a) const
    ret.r = c.r; 
    ret.g = c.g; 
    ret.b = c.b; 
    ret.a = a; 
    return ret; 

// inlEnsureLightingOn ////////////////////////////////////////////////
// Turn D3D lighting on if it isn't already.
inline void plDXPipeline::inlEnsureLightingOn()
    if( !fCurrD3DLiteState )
        fD3DDevice->SetRenderState( D3DRS_LIGHTING, TRUE );
        fCurrD3DLiteState = true;

// inlEnsureLightingOff ///////////////////////////////////////////////
// Turn D3D lighting off if it isn't already.
inline void plDXPipeline::inlEnsureLightingOff()
    if( fCurrD3DLiteState )
        fD3DDevice->SetRenderState( D3DRS_LIGHTING, FALSE );
        fCurrD3DLiteState = false;

// ColorMul ///////////////////////////////////////////////////////////
// Multiply a D3D floating point color by a plasma floating point color,
// returning the result as a D3D floating point color.
static inline D3DCOLORVALUE ColorMul(const D3DCOLORVALUE& c0, const hsColorRGBA& c1)
    ret.r = c0.r * c1.r;
    ret.g = c0.g * c1.g;
    ret.b = c0.b * c1.b;
    ret.a = c0.a * c1.a;

    return ret;

//// ICalcLighting ////////////////////////////////////////////////////////////
// Kind of misnamed. Sets the D3D material lighting model based on what we're
// currently doing.
void    plDXPipeline::ICalcLighting( const plLayerInterface *currLayer, const plSpan *currSpan )
    D3DMATERIAL9    mat;
    static hsScalar diffScale = 1.f;
    static hsScalar ambScale = 1.f;
    UInt32          props;


            /// New (temporary) lighting method:
            /// The vertices now include the following:
            ///     diffuse = maxVertexColor * matDiffuse + matAmbient
            ///     specular = ( maxLighting + maxIllum ) * matDiffuse + matAmbient
            /// And we want the lighting set up like:
            ///     L = I*v1 + v2 + (sigma)(light stuff * v3 + 0)
            /// Where I = 0 for now (will be the environmental light constant eventually),
            /// v1 is the diffuse vertex color and v2 is the specular vertex color.
            /// So it basically translates into:
            ///     D3D ambient color = diffuse vertex color
            ///     D3D ambient constant = environmental light constant (0 for now)
            ///     D3D emissive color = specular vertex color
            ///     D3D diffuse color = diffuse vertex color

    /// We now provide three lighting equations at the pipeline's disposal:
    ///     Material: (the one we all know and love)
    ///             MATd * VTXd + MATa + <sigma of lighting w/ MATd>
    ///     Vtx preshaded: (particle systems)
    ///             MATa * VTXd + 0 + <sigma of lighting w/ VTXd>
    ///     Vtx non-preshaded:
    ///             white * VTXd + MATa + <sigma of lighting w/ VTXd>
    /// We also have a few more for shadows and such, which are handled individually
    memset( &mat, 0, sizeof( mat ) );

    /// Normal rendering--select the right lighting equation
    if (IsDebugFlagSet(plPipeDbg::kFlagAllBright))
        mat.Diffuse.r = mat.Diffuse.g = mat.Diffuse.b = mat.Diffuse.a = 1.f;
        mat.Ambient.r = mat.Ambient.g = mat.Ambient.b = mat.Ambient.a = 1.f;
        mat.Emissive.r = mat.Emissive.g = mat.Emissive.b = mat.Emissive.a = 1.f;
        fD3DDevice->SetMaterial( &mat );
        fD3DDevice->SetRenderState( D3DRS_AMBIENT, 0xffffffff );
    props = ( currSpan != nil ) ? ( currSpan->fProps & plSpan::kLiteMask ) : plSpan::kLiteMaterial;
    if( fLayerState[0].fMiscFlags & hsGMatState::kMiscBumpChans )
        props = plSpan::kLiteMaterial;
        fLayerState[0].fShadeFlags |= hsGMatState::kShadeNoShade | hsGMatState::kShadeWhite;
    /// Select one of our three lighting methods
    switch( props )
    case plSpan::kLiteMaterial:     // Material shading
        ///     Material: (the one we all know and love)
        ///             MATd * VTXd + MATa + <sigma of lighting w/ MATd>
        // D3D ambient - give it our material static diffuse, since it will be multiplied by the vertex color
        if( fLayerState[0].fShadeFlags & hsGMatState::kShadeWhite )
            mat.Ambient.r = mat.Ambient.g = mat.Ambient.b = diffScale;
            mat.Ambient.a = 1.f;
        else if (IsDebugFlagSet(plPipeDbg::kFlagNoPreShade))
            mat.Ambient.r = mat.Ambient.g = mat.Ambient.b = 0;
            mat.Ambient.a = 1.f;
            mat.Ambient = inlPlToD3DColor(currLayer->GetPreshadeColor() * diffScale, 1.f);
        // D3D diffuse - give it our runtime material diffuse
        mat.Diffuse = inlPlToD3DColor(currLayer->GetRuntimeColor() * diffScale, currLayer->GetOpacity());
        // D3D emissive - give it our material ambient
        mat.Emissive = inlPlToD3DColor(currLayer->GetAmbientColor() * ambScale, 1.f);
        // Set specular properties
        if( fLayerState[0].fShadeFlags & hsGMatState::kShadeSpecular )
            mat.Specular = inlPlToD3DColor( currLayer->GetSpecularColor(), 1.f);
            mat.Power = currLayer->GetSpecularPower();
        fD3DDevice->SetMaterial( &mat );
        if( fLayerState[0].fShadeFlags & hsGMatState::kShadeWhite )
            fD3DDevice->SetRenderState( D3DRS_AMBIENT, 0xffffffff );
            fD3DDevice->SetRenderState( D3DRS_AMBIENT, inlGetD3DColor( *(hsColorRGBA*)&mat.Ambient ) );
        if( fLayerState[0].fShadeFlags & hsGMatState::kShadeNoShade )
            fD3DDevice->SetRenderState( D3DRS_AMBIENTMATERIALSOURCE, D3DMCS_COLOR1 );
        fCurrLightingMethod = plSpan::kLiteMaterial;
    case plSpan::kLiteVtxPreshaded:  // Vtx preshaded
        //              MATa * VTXd  + 0     + <sigma of lighting w/ VTXd>
        // Mapping to:  GLa  * AMSrc + EMSrc + <.....................DMSrc> 
        if( fLayerState[0].fShadeFlags & hsGMatState::kShadeEmissive )
            // Set a black material (we ONLY care about vertex color when doing particles, 
            //                       er I mean, vtxPreshaded)
            fD3DDevice->SetMaterial( &mat );
            fD3DDevice->SetRenderState( D3DRS_DIFFUSEMATERIALSOURCE, D3DMCS_COLOR1 );
            fD3DDevice->SetRenderState( D3DRS_AMBIENT, 0 );
        // MATa * white + 0 + <sigma of lighting with VTXd>

        fD3DDevice->SetMaterial( &mat );
        fD3DDevice->SetRenderState( D3DRS_AMBIENT, 0 );

        if( fLayerState[0].fShadeFlags & hsGMatState::kShadeEmissive )
            fD3DDevice->SetRenderState( D3DRS_EMISSIVEMATERIALSOURCE, D3DMCS_COLOR1 );

        fCurrLightingMethod = plSpan::kLiteVtxPreshaded;
    case plSpan::kLiteVtxNonPreshaded:      // Vtx non-preshaded
        //              white * VTXd + MATa  + <sigma of lighting w/ VTXd>
        // Mapping to:  GLa  * AMSrc + EMSrc + <.....................DMSrc>
        // D3D emissive - give it our material ambient
        mat.Emissive = inlPlToD3DColor(currLayer->GetAmbientColor() * ambScale, 1.f);
        // Set specular properties
        if( fLayerState[0].fShadeFlags & hsGMatState::kShadeSpecular )
            mat.Specular = inlPlToD3DColor( currLayer->GetSpecularColor(), 1.f);
            mat.Power = currLayer->GetSpecularPower();
        fD3DDevice->SetMaterial( &mat );
        // Lightmaps want WHITE here, otherwise we want BLACK
        DWORD preShadeStrength;
        preShadeStrength = inlPlToDWORDColor(currLayer->GetPreshadeColor());
        fD3DDevice->SetRenderState(D3DRS_AMBIENT, preShadeStrength);
        fCurrLightingMethod = plSpan::kLiteVtxNonPreshaded;
        hsAssert( false, "Bad lighting type" );


//// plDXLightSettings Functions /////////////////////////////////////////////

:   fActiveList(nil),

//// Reset ////////////////////////////////////////////////////////////////////
//  Sets member variables to initial states. 

void    plDXLightSettings::Reset( plDXPipeline *pipe )

    fNextShadowLight = 0;

    fNextIndex = 1;     /// Light 0 is reserved
    fLastIndex = 1;
    fTime = 0;
    fRefList = nil;
    fPipeline = pipe;

//// Release //////////////////////////////////////////////////////////////////
//  Releases/deletes anything associated with these settings.
// This includes unregistering all lights.
void    plDXLightSettings::Release()
    plDXLightRef    *ref;


    while( fRefList )
        ref = fRefList;

    // Tell the light infos to unlink themselves
    while( fActiveList )
        fPipeline->UnRegisterLight( fActiveList );

    int i;
    for( i = 0; i < fShadowLights.GetCount(); i++ )
        fShadowLights[i] = nil;


//// ReserveD3DIndex //////////////////////////////////////////////////////////
//  Reserve a D3D light index.

UInt32  plDXLightSettings::ReserveD3DIndex()
    for( ; fNextIndex < (UInt32)-1; fNextIndex++ )
        if( !fUsedFlags.IsBitSet( fNextIndex ) )

    fUsedFlags.SetBit( fNextIndex );
    fEnabledFlags.ClearBit( fNextIndex );   // Ensure it's cleared
    fHoldFlags.ClearBit( fNextIndex );
    if( fNextIndex > fLastIndex )
        fLastIndex = fNextIndex;

    return fNextIndex;

//// ReleaseD3DIndex //////////////////////////////////////////////////////////
//  Release a reserved D3D light index to be reused.

void    plDXLightSettings::ReleaseD3DIndex( UInt32 idx )
    fUsedFlags.SetBit( idx, false );
    if( fNextIndex > idx )
        fNextIndex = idx;       // Forces search to start here next time

    // Dec down fLastIndex
    while( fLastIndex > 0 && !fUsedFlags.IsBitSet( fLastIndex ) )

    if( fNextIndex > fLastIndex )
        fNextIndex = fLastIndex;

//// Materials ////////////////////////////////////////////////////////////////

//// ISetLayer ////////////////////////////////////////////////////////////////
// Sets whether we're rendering a base layer or upper layer. Upper layer has
// a Z bias to avoid Z fighting.
void    plDXPipeline::ISetLayer( UInt32 lay )
    if( lay )
        if( fCurrRenderLayer != lay )
            fCurrRenderLayer = lay;

            plCONST(int) kBiasMult = 8;
            if( !( fSettings.fD3DCaps & kCapsZBias ) )
                fD3DDevice->SetRenderState( D3DRS_DEPTHBIAS, kBiasMult * fCurrRenderLayer );

//// IBottomLayer /////////////////////////////////////////////////////////////
// Turn off any Z bias.
void    plDXPipeline::IBottomLayer()
    if( fCurrRenderLayer != 0 )
        fCurrRenderLayer = 0;
        if( !( fSettings.fD3DCaps & kCapsZBias ) )
            fD3DDevice->SetRenderState( D3DRS_DEPTHBIAS, 0 );

// Special effects /////////////////////////////////////////////////////////////

// IPushOverBaseLayer /////////////////////////////////////////////////////////
// Sets fOverBaseLayer (if any) as a wrapper on top of input layer.
// This allows the OverBaseLayer to intercept and modify queries of
// the real current layer's properties (e.g. color or state).
// fOverBaseLayer is set to only get applied to the base layer during
// multitexturing.
// Must be matched with call to IPopOverBaseLayer.
plLayerInterface* plDXPipeline::IPushOverBaseLayer(plLayerInterface* li)
    if( !li )
        return nil;


    if( !fOverBaseLayer )
        return fOverBaseLayer = li;

    fForceMatHandle = true;
    fOverBaseLayer = fOverBaseLayer->Attach(li);
    fOverBaseLayer->Eval(fTime, fFrame, 0);
    return fOverBaseLayer;

// IPopOverBaseLayer /////////////////////////////////////////////////////////
// Removes fOverBaseLayer as wrapper on top of input layer.
// Should match calls to IPushOverBaseLayer.
plLayerInterface* plDXPipeline::IPopOverBaseLayer(plLayerInterface* li)
    if( !li )
        return nil;

    fForceMatHandle = true;

    plLayerInterface* pop = fOverLayerStack.Pop();
    fOverBaseLayer = fOverBaseLayer->Detach(pop);

    return pop;

// IPushOverAllLayer ///////////////////////////////////////////////////
// Push fOverAllLayer (if any) as wrapper around the input layer.
// fOverAllLayer is set to be applied to each layer during multitexturing.
// Must be matched by call to IPopOverAllLayer
plLayerInterface* plDXPipeline::IPushOverAllLayer(plLayerInterface* li)
    if( !li )
        return nil;


    if( !fOverAllLayer )
        fOverAllLayer = li;
        fOverAllLayer->Eval(fTime, fFrame, 0);
        return fOverAllLayer;

    fForceMatHandle = true;
    fOverAllLayer = fOverAllLayer->Attach(li);
    fOverAllLayer->Eval(fTime, fFrame, 0);

    return fOverAllLayer;

// IPopOverAllLayer //////////////////////////////////////////////////
// Remove fOverAllLayer as wrapper on top of input layer.
// Should match calls to IPushOverAllLayer.
plLayerInterface* plDXPipeline::IPopOverAllLayer(plLayerInterface* li)
    if( !li )
        return nil;

    fForceMatHandle = true;

    plLayerInterface* pop = fOverLayerStack.Pop();
    fOverAllLayer = fOverAllLayer->Detach(pop);

    return pop;

// PiggyBacks - used in techniques like projective lighting.
// PiggyBacks are layers appended to each drawprimitive pass.
// For example, if a material has 3 layers which will be drawn
// in 2 passes,
//      pass0: layer0+layer1
//      pass1: layer2
// Then if a piggyback layer layerPB is active, the actual rendering would be
//      pass0: layer0+layer1+layerPB
//      pass1: layer2 + layerPB

// ISetNumActivePiggyBacks /////////////////////////////////////////////
// Calculate the number of active piggy backs.
int plDXPipeline::ISetNumActivePiggyBacks()
    return fActivePiggyBacks = hsMinimum(fSettings.fMaxPiggyBacks, fPiggyBackStack.GetCount());

// IPushProjPiggyBack //////////////////////////////////////////////////
// Push a projected texture on as a piggy back.
void plDXPipeline::IPushProjPiggyBack(plLayerInterface* li)
    if( fView.fRenderState & plPipeline::kRenderNoPiggyBacks )

    fActivePiggyBacks = fPiggyBackStack.GetCount() - fMatPiggyBacks;
    fForceMatHandle = true;

// IPopProjPiggyBacks /////////////////////////////////////////////////
// Remove a projected texture from use as a piggy back.
void plDXPipeline::IPopProjPiggyBacks()
    if( fView.fRenderState & plPipeline::kRenderNoPiggyBacks )

    fForceMatHandle = true;

// IPushPiggyBacks ////////////////////////////////////////////////////
// Push any piggy backs associated with a material, presumed to
// be a light map because that's all they are used for.
// Matched with IPopPiggyBacks
void plDXPipeline::IPushPiggyBacks(hsGMaterial* mat)
    hsAssert(!fMatPiggyBacks, "Push/Pop Piggy mismatch");

    if( fView.fRenderState & plPipeline::kRenderNoPiggyBacks )

    int i;
    for( i = 0; i < mat->GetNumPiggyBacks(); i++ )
        if( !mat->GetPiggyBack(i) )

        if ((mat->GetPiggyBack(i)->GetMiscFlags() & hsGMatState::kMiscLightMap)
            && IsDebugFlagSet(plPipeDbg::kFlagNoLightmaps))

    fForceMatHandle = true;

// IPopPiggyBacks ///////////////////////////////////////////////////////
// Pop any current piggy backs set from IPushPiggyBacks.
// Matches IPushPiggyBacks.
void plDXPipeline::IPopPiggyBacks()
    if( fView.fRenderState & plPipeline::kRenderNoPiggyBacks )

    fPiggyBackStack.SetCount(fPiggyBackStack.GetCount() - fMatPiggyBacks);
    fMatPiggyBacks = 0;

    fForceMatHandle = true;

//// IHandleMaterial //////////////////////////////////////////////////////////
//  Takes the starting "layer" and uses as many layers as possible in the given
//  material and sets up the device to draw with it. Returns the first layer
//  index not yet used. (I.e. if we ate layers 0 and 1, it'll return 2). 
// A return value of -1 means don't bother rendering.

Int32   plDXPipeline::IHandleMaterial( hsGMaterial *newMat, UInt32 layer, const plSpan *currSpan )
    // No material means no draw.
    if( !newMat && newMat->GetLayer(layer) )
        return -1;

    // If this is a bump mapping pass but the object isn't currently runtime lit, just skip.
    // Note that <layer> may change here, if we're skipping past the bump layers but there
    // are more layers (passes) to do after that.
    if( ISkipBumpMap(newMat, layer, currSpan) )
        return -1;

    // Workaround for the ATI Radeon 7500's inability to use uvw coordinates above 1.
    // If we have a layer trying to use uvw 2 or higher, skip it and any layers bound to
    // it.
    while( (layer < newMat->GetNumLayers()) 
        && newMat->GetLayer(layer) 
        && ((newMat->GetLayer(layer)->GetUVWSrc() & 0xf) > fSettings.fMaxUVWSrc) )
        if( newMat->GetLayer(layer)->GetMiscFlags() & hsGMatState::kMiscBindNext )
    if( layer >= newMat->GetNumLayers() )
        return -1;

    // If nothing has changed, we don't need to recompute and set state.
    if( !fForceMatHandle && (newMat == fCurrMaterial && layer == fCurrLayerIdx) )
        // Before returning, check if we have to redo our lighting
        UInt32      lightType = ( currSpan != nil ) ? ( currSpan->fProps & plSpan::kLiteMask ) : plSpan::kLiteMaterial;
        if( lightType != fCurrLightingMethod )
            ICalcLighting( fCurrLay, currSpan );    
        if( fLayerState[0].fMiscFlags & (hsGMatState::kMiscBumpDu|hsGMatState::kMiscBumpDw) )
            ISetBumpMatrices(fCurrLay, currSpan);

        return layer + fCurrNumLayers;

    fForceMatHandle = false;

    fCurrLayerIdx = layer;
//  fCurrNumLayers = newMat->GetNumLayers();

    if (newMat != fCurrMaterial)

    /// Test for fail states
    if (IsDebugFlagSet(plPipeDbg::kFlagNoDecals) && (newMat->GetCompositeFlags() & hsGMaterial::kCompDecal))
        return -1;

    /// Workaround for a D3D limitation--you're not allowed to render with a texture that you're
    /// rendering INTO. Hence we can't have self-reflecting cubicRenderTargets (damn)
    if( fSettings.fCurrBaseRenderTarget != nil && 
        newMat->GetLayer( layer )->GetTexture() == plBitmap::ConvertNoRef( fSettings.fCurrBaseRenderTarget ) )
        return -1;

    /// Figure out our current states
    // Start with the base layer.
    plLayerInterface    *currLay = IPushOverBaseLayer(newMat->GetLayer(layer));
    if (IsDebugFlagSet(plPipeDbg::kFlagBumpW) && (currLay->GetMiscFlags() & hsGMatState::kMiscBumpDu) )
        currLay = newMat->GetLayer(fCurrLayerIdx = ++layer);

    currLay = IPushOverAllLayer(currLay);

    /// Save stuff for next time around
    ICompositeLayerState(0, currLay);
    hsRefCnt_SafeAssign( fCurrMaterial, newMat );
    fCurrLayerIdx = layer;
    fCurrLay = currLay;

    if (IsDebugFlagSet(plPipeDbg::kFlagDisableSpecular))
        fLayerState[0].fShadeFlags &= ~hsGMatState::kShadeSpecular;

    // ZIncLayer requests Z bias for upper layers.
    if( fLayerState[0].fZFlags & hsGMatState::kZIncLayer )
        ISetLayer( 1 );
    /// A few debugging things
    if (IsDebugFlagSet(plPipeDbg::kFlagNoAlphaBlending))
        fLayerState[0].fBlendFlags &= ~hsGMatState::kBlendMask;

    if ((IsDebugFlagSet(plPipeDbg::kFlagBumpUV) || IsDebugFlagSet(plPipeDbg::kFlagBumpW)) && (fLayerState[0].fMiscFlags & hsGMatState::kMiscBumpChans) )
        switch( fLayerState[0].fMiscFlags & hsGMatState::kMiscBumpChans )
        case hsGMatState::kMiscBumpDu:
        case hsGMatState::kMiscBumpDv:
            if( !(fCurrMaterial->GetLayer(layer-2)->GetBlendFlags() & hsGMatState::kBlendAdd) )
                fLayerState[0].fBlendFlags &= ~hsGMatState::kBlendMask;
                fLayerState[0].fBlendFlags |= hsGMatState::kBlendMADD;
        case hsGMatState::kMiscBumpDw:
            if( !(fCurrMaterial->GetLayer(layer-1)->GetBlendFlags() & hsGMatState::kBlendAdd) )
                fLayerState[0].fBlendFlags &= ~hsGMatState::kBlendMask;
                fLayerState[0].fBlendFlags |= hsGMatState::kBlendMADD;

    /// Get the # of layers we can draw in this pass into fCurrNumLayers
    int oldNumLayers = fCurrNumLayers;
    ILayersAtOnce( newMat, layer );
    if( oldNumLayers != fCurrNumLayers )
        // This hack is necessary to cover a hack necessary to cover a "limitation" in the GeForce2 drivers.
        // Basically, we have to handle NoTexAlpha/Color differently if it's stage 1 than other stages,
        // so even though the BlendFlags haven't changed, the calls to D3D are different. Another
        // way to handle this would be to have a different handler based on whether we are 2 TMU limited
        // or not, but whatever.
        if( fLayerState[1].fBlendFlags & (hsGMatState::kBlendNoTexAlpha | hsGMatState::kBlendNoTexColor) )
            fLayerState[1].fBlendFlags = UInt32(-1);

    // Placed here, since it's material-dependent (or more accurately, current-layer-dependent)
    ICalcLighting( currLay, currSpan ); 

    // If we're bump mapping, compute the texture transforms.
    if( fLayerState[0].fMiscFlags & (hsGMatState::kMiscBumpDu|hsGMatState::kMiscBumpDw) )
        ISetBumpMatrices(currLay, currSpan);

    /// Transfer states to D3D now
    IHandleFirstTextureStage( currLay );

    currLay = IPopOverAllLayer(currLay);
    currLay = IPopOverBaseLayer(currLay);
    fCurrLay = currLay;

    int nextLayer = fCurrLayerIdx + fCurrNumLayers;
    if (IsDebugFlagSet(plPipeDbg::kFlagBumpW) && (fLayerState[0].fMiscFlags & hsGMatState::kMiscBumpDw) )
        // Bump mapping approximation using only the W (normal direction) component of lighting.
        plLayerInterface* layPtr = IPushOverAllLayer(newMat->GetLayer(fCurrLayerIdx + 2));
        if( !layPtr )
            return -1;
        ICompositeLayerState(1, layPtr);
        IHandleTextureStage( 1, layPtr );
        layPtr = IPopOverAllLayer(layPtr);
        nextLayer = fCurrLayerIdx + 3;
    else if (IsDebugFlagSet(plPipeDbg::kFlagBumpUV) && (fLayerState[0].fMiscFlags & hsGMatState::kMiscBumpDu) )
        // Bump mapping approximation using only the UV (surface tangent directions) component of lighting.
        plLayerInterface* layPtr = IPushOverAllLayer(newMat->GetLayer(fCurrLayerIdx + 3));
        if( !layPtr )
            return -1;
        ICompositeLayerState(1, layPtr);
        IHandleTextureStage( 1, layPtr );
        layPtr = IPopOverAllLayer(layPtr);
        nextLayer = fCurrLayerIdx + 2;
        // Normal multi texturing.
        /// Loop through all multitexturing layers
        int i;
        if( fView.fRenderState & plPipeline::kRenderBaseLayerOnly )
            nextLayer = newMat->GetNumLayers();

        for( i = 1; i < fCurrNumLayers; i++ )
            plLayerInterface* layPtr = newMat->GetLayer( fCurrLayerIdx + i );
            if( !layPtr )
                return -1;

            // Can't render into a render target using same rendertarget as a texture.
            if( fSettings.fCurrBaseRenderTarget 
                layPtr->GetTexture() == (plBitmap*)(fSettings.fCurrBaseRenderTarget) )
                // Oops, just bail
                return -1;

            layPtr = IPushOverAllLayer(layPtr);
            ICompositeLayerState(i, layPtr);
            IHandleTextureStage( i, layPtr );
            layPtr = IPopOverAllLayer(layPtr);


    // More cleanup for the DX9.0c 2 texture limitation. See ILayersAtOnce()
    if (fSettings.fMaxLayersAtOnce == 2)
        if ((fLayerState[0].fBlendFlags & hsGMatState::kBlendAdd)
            && (newMat->GetNumLayers() > fCurrLayerIdx + 1)
            && (newMat->GetLayer(fCurrLayerIdx + 1)->GetUVWSrc() & plLayerInterface::kUVWPosition))
            // If we're doing additive blending and the next layer is based on position,
            // it's probably a distance fade. We'd rather have our diffuse color.
            // ILayersAtOnce will already have told us we can't use it this pass.
            // Skip it so it won't draw on its own next pass.

    int numActivePiggyBacks = 0;
    if( !(fLayerState[0].fMiscFlags & hsGMatState::kMiscBumpChans) && !(fLayerState[0].fShadeFlags & hsGMatState::kShadeEmissive) )
        /// Tack lightmap onto last stage if we have one
        numActivePiggyBacks = fActivePiggyBacks;
        if( numActivePiggyBacks > fSettings.fMaxLayersAtOnce - fCurrNumLayers )
            numActivePiggyBacks = fSettings.fMaxLayersAtOnce - fCurrNumLayers;
        if( numActivePiggyBacks )
            int i;
            for( i = 0; i < numActivePiggyBacks; i++ )
                // Note that we take piggybacks off the end of fPiggyBackStack.
                plLayerInterface* layPtr = IPushOverAllLayer( fPiggyBackStack[fPiggyBackStack.GetCount()-1-i] );
                if( !layPtr )
                    return -1;
                ICompositeLayerState(fCurrNumLayers+i, layPtr);
                IHandleTextureStage( fCurrNumLayers+i, layPtr );
                layPtr = IPopOverAllLayer(layPtr);

            // If we've got a piggyback, plus two layers that must be drawn together, but
            // only two TMU's to work with, we're screwed. Someone has got to get skipped and
            // hope no one notices. Typically, the first (base) layer has the color info,
            // and the second the opacity. So we'll try using the projection to brighten
            // the color, ignoring the opacity. 
//          if( ((fCurrNumLayers + numActivePiggyBacks) == fSettings.fMaxLayersAtOnce)
//                  && (fLayerState[0].fMiscFlags & hsGMatState::kMiscBindNext) )
            if( (fLayerState[0].fMiscFlags & hsGMatState::kMiscBindNext)
                && (fCurrNumLayers < 2) )

    // Declare we won't be using any more texture stages.
    IStageStop( fCurrNumLayers + numActivePiggyBacks );

    return nextLayer;

// ICompositeLayerState /////////////////////////////////////////////////////////////////
// Set the current Plasma state based on the input layer state and the material overrides.
// fMatOverOn overrides to set a state bit whether it is set in the layer or not.
// fMatOverOff overrides to clear a state bit whether it is set in the layer or not.
const hsGMatState& plDXPipeline::ICompositeLayerState(int which, plLayerInterface* layer)
    fOldLayerState[which] = fLayerState[which];
    fLayerState[which].Composite(layer->GetState(), fMatOverOn, fMatOverOff);
    if( fOldLayerState[which].fBlendFlags == UInt32(-1) )
        fOldLayerState[which].fBlendFlags = ~fLayerState[which].fBlendFlags;

    return fLayerState[which];

//// IHandleFirstTextureStage /////////////////////////////////////////////////
// Convert internal material state to D3D state for the base layer.
void    plDXPipeline::IHandleFirstTextureStage( plLayerInterface *layer )
    if( fLayerState[0].Differs( fLayerState[0].fZFlags, fOldLayerState[0].fZFlags, hsGMatState::kZMask ) )

    IHandleTextureStage( 0, layer );

//// IHandleShadeMode /////////////////////////////////////////////////////////
// Convert shade state into D3D settings.
void    plDXPipeline::IHandleShadeMode()
    if( fLayerState[0].Differs( fLayerState[0].fShadeFlags, fOldLayerState[0].fShadeFlags, hsGMatState::kShadeSpecular ) )
        if( fLayerState[0].fShadeFlags & hsGMatState::kShadeSpecular )
            fD3DDevice->SetRenderState( D3DRS_SPECULARENABLE, TRUE );
            fD3DDevice->SetRenderState( D3DRS_SPECULARENABLE, FALSE );

//// IHandleZMode /////////////////////////////////////////////////////////////
// Convert Z state into D3D settings.
void    plDXPipeline::IHandleZMode()
    switch( fLayerState[0].fZFlags & hsGMatState::kZMask )
        case hsGMatState::kZClearZ:
            fD3DDevice->SetRenderState( D3DRS_ZFUNC,        D3DCMP_ALWAYS );
            fD3DDevice->SetRenderState( D3DRS_ZWRITEENABLE, TRUE );
        case hsGMatState::kZNoZRead:
            fD3DDevice->SetRenderState( D3DRS_ZFUNC,        D3DCMP_ALWAYS );
            fD3DDevice->SetRenderState( D3DRS_ZWRITEENABLE, TRUE );
        case hsGMatState::kZNoZWrite:
            fD3DDevice->SetRenderState( D3DRS_ZFUNC,        D3DCMP_LESSEQUAL );
            fD3DDevice->SetRenderState( D3DRS_ZWRITEENABLE, FALSE );
        case hsGMatState::kZNoZRead | hsGMatState::kZClearZ:
            fD3DDevice->SetRenderState( D3DRS_ZFUNC,        D3DCMP_ALWAYS );
            fD3DDevice->SetRenderState( D3DRS_ZWRITEENABLE, TRUE );
        case hsGMatState::kZNoZRead | hsGMatState::kZNoZWrite:
            fD3DDevice->SetRenderState( D3DRS_ZWRITEENABLE, FALSE );
            fD3DDevice->SetRenderState( D3DRS_ZFUNC,        D3DCMP_ALWAYS );
        case 0:
            fD3DDevice->SetRenderState( D3DRS_ZFUNC,        D3DCMP_LESSEQUAL );
            fD3DDevice->SetRenderState( D3DRS_ZWRITEENABLE, TRUE );

        // illegal combinations
        case hsGMatState::kZClearZ | hsGMatState::kZNoZWrite:
        case hsGMatState::kZClearZ | hsGMatState::kZNoZWrite | hsGMatState::kZNoZRead:
            hsAssert(false, "Illegal combination of Z Buffer modes (Clear but don't write)");

//// IHandleMiscMode //////////////////////////////////////////////////////////
// Convert Misc state into D3D settings.
void    plDXPipeline::IHandleMiscMode()
    if( fLayerState[0].Differs(fLayerState[0].fMiscFlags, fOldLayerState[0].fMiscFlags, hsGMatState::kMiscWireFrame) )
        if( fLayerState[0].fMiscFlags & hsGMatState::kMiscWireFrame )
            fD3DDevice->SetRenderState( D3DRS_FILLMODE, D3DFILL_WIREFRAME );
            fD3DDevice->SetRenderState( D3DRS_FILLMODE, D3DFILL_SOLID );

//// IHandleTextureStage //////////////////////////////////////////////////////
// Issue D3D calls to enable rendering the given layer at the given texture stage.
void    plDXPipeline::IHandleTextureStage( UInt32 stage, plLayerInterface *layer )
    hsGDeviceRef        *ref = nil;
    plBitmap            *texture;

    // Blend mode
    const hsGMatState& layState = fLayerState[stage];
    if( fLayerState[ stage ].fBlendFlags ^ fOldLayerState[stage].fBlendFlags )

    // Texture wrap/clamp mode
    if( fLayerState[ stage ].fClampFlags ^ fOldLayerState[stage].fClampFlags )

    // UVW transform
    IHandleStageTransform( stage, layer );

    // Create the D3D texture (if necessary) and set it to the device.
    if( ( texture = layer->GetTexture() ) != nil )
        ref = texture->GetDeviceRef();
        if( ref == nil || ref->IsDirty() )
            // Normal textures
            plMipmap            *mip;
            plCubicEnvironmap   *cubic;

            if( ( mip = plMipmap::ConvertNoRef( texture ) ) != nil )
                ref = MakeTextureRef( layer, mip );

            // Cubic environment maps
            else if( ( cubic = plCubicEnvironmap::ConvertNoRef( texture ) ) != nil )
                ref = IMakeCubicTextureRef( layer, cubic );

    if( ref != nil )
        IUseTextureRef(stage, ref, layer);
        fD3DDevice->SetTexture( stage, NULL );
        hsRefCnt_SafeUnRef( fLayerRef[ stage ] );
        fLayerRef[ stage ] = nil;

// CheckTextureRef //////////////////////////////////////////////////////
// Make sure the given layer's texture has background D3D resources allocated.
void plDXPipeline::CheckTextureRef(plLayerInterface* layer)
    plBitmap* bitmap = layer->GetTexture();
    if( bitmap )
        hsGDeviceRef* ref = bitmap->GetDeviceRef();

        if( !ref )
            plMipmap* mip = plMipmap::ConvertNoRef(bitmap);
            if( mip )
                MakeTextureRef(layer, mip);

            plCubicEnvironmap* cubic = plCubicEnvironmap::ConvertNoRef(bitmap);
            if( cubic )
                IMakeCubicTextureRef(layer, cubic);

// IHandleBumpEnv //////////////////////////////////////////////////////////////
// This has never been used in production assets, because I never got 
// a good effect out of it, and BUMPENVMAPLUMINANCE isn't universally
// supported in hardware.
void plDXPipeline::IHandleBumpEnv(int stage, UInt32 blendFlags)
    DWORD current = stage ? D3DTA_CURRENT : D3DTA_DIFFUSE;
    UInt32 colorSrc = blendFlags & hsGMatState::kBlendInvertColor ? D3DTA_TEXTURE | D3DTA_COMPLEMENT : D3DTA_TEXTURE;

    fD3DDevice->SetTextureStageState(stage, D3DTSS_COLOROP, D3DTOP_BUMPENVMAPLUMINANCE);
    fD3DDevice->SetTextureStageState(stage, D3DTSS_COLORARG1, colorSrc); 
    fD3DDevice->SetTextureStageState(stage, D3DTSS_COLORARG2, current); 

    fD3DDevice->SetTextureStageState(stage, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2);
    fD3DDevice->SetTextureStageState(stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT); 

    const hsMatrix44& envXfm = fCurrLay->GetBumpEnvMatrix();
    fD3DDevice->SetTextureStageState(stage, D3DTSS_BUMPENVMAT00, F2DW(envXfm.fMap[0][0]));
    fD3DDevice->SetTextureStageState(stage, D3DTSS_BUMPENVMAT01, F2DW(envXfm.fMap[1][0]));
    fD3DDevice->SetTextureStageState(stage, D3DTSS_BUMPENVMAT10, F2DW(envXfm.fMap[0][1]));
    fD3DDevice->SetTextureStageState(stage, D3DTSS_BUMPENVMAT11, F2DW(envXfm.fMap[1][1]));

    fD3DDevice->SetTextureStageState(stage, D3DTSS_BUMPENVLSCALE, F2DW(envXfm.fMap[2][2]));
    fD3DDevice->SetTextureStageState(stage, D3DTSS_BUMPENVLOFFSET, F2DW(envXfm.fMap[2][3]));

//// IHandleStageBlend ////////////////////////////////////////////////////////
// Translate current blend state for this stage into D3D settings.
void    plDXPipeline::IHandleStageBlend(int stage)
    const UInt32 blendFlags = fLayerState[stage].fBlendFlags;
    // If it's the base layer, handle that differently, because it's not really
    // texture stage settings, but frame buffer blend settings.
    if( stage == 0 )

    UInt32 colorSrc = D3DTA_TEXTURE;
    if( blendFlags & hsGMatState::kBlendInvertColor )
        colorSrc |= D3DTA_COMPLEMENT ;
    // kBlendEnvBumpNext not really used.
    if( blendFlags & hsGMatState::kBlendEnvBumpNext )
        IHandleBumpEnv(stage, blendFlags);
    else switch( blendFlags & hsGMatState::kBlendMask )
        // Alpha blending. Complicated by the ability to ignore either
        // color or alpha for any given texture. The lower end GeForces
        // don't orthogonally support settings, especially when the final
        // (3rd) stage is the diffuse color/alpha modulate and the board
        // really only wants to support 2 stages.
        // So we couldn't just translate our internal plasma stage states
        // into D3D states, we had to do some rearranging.
        // Note that by the time we get here, we _know_ that this isn't the
        // base layer (stage 0), because that's handled elsewhere.
        case hsGMatState::kBlendAlpha:
            // If the current number of layers is 2, then we've already handled the
            // base layer, so this must be layer 1 and the final layer.
            // If the base layer has NoTexColor or this layer has NoTexColor, we need
            // to do some rearranging.
            if( (fCurrNumLayers == 2)
                &&((blendFlags | fLayerState[0].fBlendFlags) & hsGMatState::kBlendNoTexColor) )
                // If this layer AND base layer are NoTexColor, then we just want the diffuse color.
                if( (blendFlags & hsGMatState::kBlendNoTexColor)
                    &&(fLayerState[0].fBlendFlags & hsGMatState::kBlendNoTexColor) )
                    // select diffuse color
                    fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_DIFFUSE ); 
                    fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP, D3DTOP_SELECTARG2 );
                // If the base layer has NoTexColor but this layer doesn't, then we
                // want the output to be this texture color times diffuse (ignoring base texture color).
                else if( fLayerState[0].fBlendFlags & hsGMatState::kBlendNoTexColor )
                    // diffuse is arg2, modulate
                    fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG1, colorSrc );
                    fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_DIFFUSE );
                    fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP, D3DTOP_MODULATE );
                // If base layer doesn't have NoTexColor, but this layer does, then
                // we want the output to be diffuse times base texture, which is in current.
                else if( blendFlags & hsGMatState::kBlendNoTexColor )
                    // diffuse is arg1, modulate
                    fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG1, D3DTA_DIFFUSE );
                    fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_CURRENT );
                    fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP, D3DTOP_MODULATE );

            // If we get here and this layer has NoTexColor, then we MUST be on a layer
            // above 1, which means we're on an advanced enough board to handle this orthogonally,
            // i.e. one with more than 2 texture stages.
            else if( blendFlags & hsGMatState::kBlendNoTexColor )
                fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_CURRENT ); 
                fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP, D3DTOP_SELECTARG2 );
            // Finally, no NoTexColor in sight, just set it.
                fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG1, colorSrc );
                fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_CURRENT ); 
                fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP,   
                                                blendFlags & hsGMatState::kBlendInvertAlpha 
                                                    ? D3DTOP_MODULATEINVALPHA_ADDCOLOR
                                                    : D3DTOP_BLENDTEXTUREALPHA );
            // The same ordeal for alpha, and the ability to ignore the alpha on any texture.
            // Note the additional logic for how to combine the alphas of multiple textures
            // into a final FB alpha.
            // This is orthogonal to using the alpha to combine colors of two different textures.
            // The default behavior is to use the upper texture alpha to blend the upper layer color
            // with the lower texture color, but retain the lower texture alpha (modulated by diffuse)
            // for the frame buffer alpha.
            switch( blendFlags & ( hsGMatState::kBlendAlphaAdd | hsGMatState::kBlendAlphaMult ) )
                case 0:
                    // Using alpha to blend textures, but this layer's alpha doesn't affect final FB
                    // alpha.
                    // Two layer setup with one or the other (or both) ignoring alpha.
                    if( (fCurrNumLayers == 2)
                        &&((blendFlags | fLayerState[0].fBlendFlags) & hsGMatState::kBlendNoTexAlpha) )
                        // Both ignoring alpha, use diffuse.
                        if( (blendFlags & hsGMatState::kBlendNoTexAlpha)
                            &&(fLayerState[0].fBlendFlags & hsGMatState::kBlendNoTexAlpha) )
                            // select diffuse alpha
                            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_DIFFUSE ); 
                            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP, D3DTOP_SELECTARG2 );
                        // Base ignoring alpha, use diffuse times this texure alpha.
                        else if( fLayerState[0].fBlendFlags & hsGMatState::kBlendNoTexAlpha )
                            // diffuse is arg2, modulate
                            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_MODULATE );
                            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG1, 
                                                            blendFlags & hsGMatState::kBlendInvertAlpha 
                                                                ? D3DTA_TEXTURE | D3DTA_COMPLEMENT 
                                                                : D3DTA_TEXTURE);
                            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_DIFFUSE ); 
                        // This ignoring alpha, use diffuse times base alpha (in current).
                        else if( blendFlags & hsGMatState::kBlendNoTexAlpha )
                            // diffuse is arg1, modulate
                            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_MODULATE );
                            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG1, D3DTA_DIFFUSE );
                            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT ); 
                    // Ignoring alpha or not, with more than 2 texture stages, 
                    // Either way, we'll ignore this texture's alpha, because it's an upper layer
                    // and has already been used (if it's going to get used) to blend this texture's
                    // color with the lower layers.
                        fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2 );
                        fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT ); 
                    // Alpha coming out of this stage is lower stage alpha plus this texture alpha.
                case hsGMatState::kBlendAlphaAdd:
                    fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_ADD );
                    fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG1, 
                                                    blendFlags & hsGMatState::kBlendInvertAlpha 
                                                        ? D3DTA_TEXTURE | D3DTA_COMPLEMENT 
                                                        : D3DTA_TEXTURE);
                    fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT ); 
                    // Alpha coming out of this stage is lower stage alpha times this texture alpha.
                case hsGMatState::kBlendAlphaMult:
                    fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_MODULATE );
                    fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG1, 
                                                    blendFlags & hsGMatState::kBlendInvertAlpha 
                                                        ? D3DTA_TEXTURE | D3DTA_COMPLEMENT 
                                                        : D3DTA_TEXTURE);
                    fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT ); 

            // Add texture colors, pass through current alpha.
        case hsGMatState::kBlendAdd:
            fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG1, colorSrc );
            fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_CURRENT ); 
            fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP,   D3DTOP_ADD );

            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2 );
            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT ); 

            // Multiply texture colors, pass through current alpha
        case hsGMatState::kBlendMult:
            fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG1, colorSrc );
            fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_CURRENT ); 
            fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP,   D3DTOP_MODULATE );

            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2 );
            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT ); 
            if (fSettings.fMaxLayersAtOnce == 2 && stage == 1)
                // On these boards, the only way we can do 2 textures plus diffuse is to
                // multiply it in during stage 0, but that only gives the same result
                // when doing a mult blend, which we won't know when setting up stage 0.
                // Now that we know, adjust stage 0 settings.
                fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG2, D3DTA_DIFFUSE);
                fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP, D3DTOP_MODULATE);

            // Dot3 texture colors, pass through current alpha.
        case hsGMatState::kBlendDot3:
            fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG1, colorSrc );
            fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_CURRENT ); 
            fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP,   D3DTOP_DOTPRODUCT3 );

            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2 );
            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT ); 

            // Add signed texture colors, pass through current alpha.
        case hsGMatState::kBlendAddSigned:
            fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG1, colorSrc );
            fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_CURRENT ); 
            fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP,   D3DTOP_ADDSIGNED );

            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2 );
            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT ); 

            // Add signed * 2 texture colors, pass through current alpha.
        case hsGMatState::kBlendAddSigned2X:
            fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG1, colorSrc );
            fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_CURRENT ); 
            fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP,   D3DTOP_ADDSIGNED2X );

            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2 );
            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT ); 

            // kBlendAddColorTimesAlpha is only supported for the base layer.
        case hsGMatState::kBlendAddColorTimesAlpha:
            hsAssert(false, "Blend mode unsupported on upper layers");

            // No blend, select this texture color and pass through current alpha
        case 0:
            fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG1, colorSrc );
            fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_CURRENT ); 
            fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP,   D3DTOP_SELECTARG1 );

            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2 );
            fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT ); 

//// IHandleFirstStageBlend ///////////////////////////////////////////////////
// Set frame buffer blend mode for blending the base layer
// For the case of rendering to a texture with alpha, the alpha written to
// the render target will be computed exactly as the color (limitation of D3D).
void    plDXPipeline::IHandleFirstStageBlend()
    // No color, just writing out Z values.
    if( fLayerState[0].fBlendFlags & hsGMatState::kBlendNoColor )
        fD3DDevice->SetRenderState( D3DRS_ALPHABLENDENABLE,   TRUE );
        fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_ZERO );
        fD3DDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_ONE );
        fLayerState[0].fBlendFlags |= 0x80000000;
        switch( fLayerState[0].fBlendFlags & hsGMatState::kBlendMask )
            // Detail is just a special case of alpha, handled in construction of the texture
            // mip chain by making higher levels of the chain more transparent.
            case hsGMatState::kBlendDetail:
            case hsGMatState::kBlendAlpha:
                fD3DDevice->SetRenderState( D3DRS_ALPHABLENDENABLE,   TRUE );
                if( fLayerState[0].fBlendFlags & hsGMatState::kBlendInvertFinalAlpha )
                    fD3DDevice->SetRenderState( D3DRS_SRCBLEND, D3DBLEND_INVSRCALPHA  );
                    fD3DDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_SRCALPHA );
                    fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_SRCALPHA );
                    fD3DDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA );
            // Multiply the final color onto the frame buffer.
            case hsGMatState::kBlendMult:
                fD3DDevice->SetRenderState( D3DRS_ALPHABLENDENABLE,   TRUE );
                if( fLayerState[0].fBlendFlags & hsGMatState::kBlendInvertFinalColor )
                    fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_ZERO );
                    fD3DDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_INVSRCCOLOR );
                    fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_ZERO );
                    fD3DDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_SRCCOLOR );

            // Add final color to FB.
            case hsGMatState::kBlendAdd:
                fD3DDevice->SetRenderState( D3DRS_ALPHABLENDENABLE,   TRUE );
                fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_ONE );
                fD3DDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_ONE );


            // Multiply final color by FB color and add it into the FB.
            case hsGMatState::kBlendMADD:
                fD3DDevice->SetRenderState( D3DRS_ALPHABLENDENABLE,   TRUE );
                fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_DESTCOLOR );
                fD3DDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_ONE );


            // Final color times final alpha, added into the FB.
            case hsGMatState::kBlendAddColorTimesAlpha:
                fD3DDevice->SetRenderState( D3DRS_ALPHABLENDENABLE,   TRUE );
                if( fLayerState[0].fBlendFlags & hsGMatState::kBlendInvertFinalAlpha )
                    fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_INVSRCALPHA );
                    fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_SRCALPHA );
                fD3DDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_ONE );


            // Overwrite final color onto FB
            case 0:
                fD3DDevice->SetRenderState( D3DRS_ALPHABLENDENABLE,   TRUE );
                fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_ONE );
                fD3DDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_ZERO );


                    hsAssert(false, "Too many blend modes specified in material");
                    plLayer* lay = plLayer::ConvertNoRef(fCurrMaterial->GetLayer(fCurrLayerIdx)->BottomOfStack());
                    if( lay )
                        if( lay->GetBlendFlags() & hsGMatState::kBlendAlpha )
                            lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAlpha);
                            lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAdd);
    // Blend ops, not currently used in production.
    if( fLayerState[0].Differs( fLayerState[0].fBlendFlags, fOldLayerState[0].fBlendFlags, (hsGMatState::kBlendSubtract | hsGMatState::kBlendRevSubtract) ) )
        if( fLayerState[0].fBlendFlags & hsGMatState::kBlendSubtract )
            fD3DDevice->SetRenderState( D3DRS_BLENDOP, D3DBLENDOP_SUBTRACT );
        else if( fLayerState[0].fBlendFlags & hsGMatState::kBlendRevSubtract )
            fD3DDevice->SetRenderState( D3DRS_BLENDOP, D3DBLENDOP_REVSUBTRACT );
            fD3DDevice->SetRenderState( D3DRS_BLENDOP, D3DBLENDOP_ADD );


    // AlphaTestHigh is used for reducing sort artifacts on textures that are mostly opaque or transparent, but
    // have regions of translucency in transition. Like a texture for a bush billboard. It lets there be some
    // transparency falloff, but quit drawing before it gets so transparent that draw order problems (halos)
    // become apparent.
    if( fLayerState[0].Differs( fLayerState[0].fBlendFlags, fOldLayerState[0].fBlendFlags, hsGMatState::kBlendAlphaTestHigh) )
        plConst(UInt32) kHighAlphaTest(0x40);
        if( fLayerState[0].fBlendFlags & hsGMatState::kBlendAlphaTestHigh )
            fD3DDevice->SetRenderState(D3DRS_ALPHAREF, kHighAlphaTest);
            fD3DDevice->SetRenderState(D3DRS_ALPHAREF, 0x00000001);
    // Set the alpha test function, turn on for alpha blending, else off.
    if( fLayerState[0].Differs( fLayerState[0].fBlendFlags, fOldLayerState[0].fBlendFlags, hsGMatState::kBlendAlpha | hsGMatState::kBlendTest | hsGMatState::kBlendAlphaAlways | hsGMatState::kBlendAddColorTimesAlpha) )
        if( (fLayerState[0].fBlendFlags & (hsGMatState::kBlendAlpha | hsGMatState::kBlendTest | hsGMatState::kBlendAddColorTimesAlpha))
                && !(fLayerState[0].fBlendFlags & hsGMatState::kBlendAlphaAlways) )
            fD3DDevice->SetRenderState( D3DRS_ALPHAFUNC, D3DCMP_GREATER );
            fD3DDevice->SetRenderState( D3DRS_ALPHAFUNC, D3DCMP_ALWAYS );
    // Adjust the fog color based on the blend mode. Setting fog color to black for additive modes is
    // an exact solution, setting it to white for multipication is as close of an approximation to correct
    // as you're going to get with DX.
    if( fLayerState[0].Differs( fLayerState[0].fBlendFlags, fOldLayerState[0].fBlendFlags, hsGMatState::kBlendAdd | hsGMatState::kBlendMult | hsGMatState::kBlendMADD | hsGMatState::kBlendAddColorTimesAlpha ) )
        if( fLayerState[0].fBlendFlags & (hsGMatState::kBlendAdd | hsGMatState::kBlendMADD | hsGMatState::kBlendAddColorTimesAlpha) )
            fD3DDevice->SetRenderState( D3DRS_FOGCOLOR, 0 );
        else if( fLayerState[0].fBlendFlags & hsGMatState::kBlendMult )
            fD3DDevice->SetRenderState( D3DRS_FOGCOLOR, 0xffffffff );
            fD3DDevice->SetRenderState( D3DRS_FOGCOLOR, fCurrFog.fHexColor );

//// IHandleTextureMode ///////////////////////////////////////////////////////
// Handle the texture stage state for the base layer.
void    plDXPipeline::IHandleTextureMode(plLayerInterface* layer)
    plBitmap *bitmap = layer->GetTexture();
    if( bitmap )
        // EnvBumpNext not used in production.
        if( fLayerState[0].fBlendFlags & hsGMatState::kBlendEnvBumpNext )
            IHandleBumpEnv(0, fLayerState[0].fBlendFlags);
        // If the texture stage settings have changed. Note that this
        // is a bad test, we should just be doing something like keeping
        // an array of D3D TextureStageStates as we set them and checking against
        // that directly rather than trying to infer from higher level state
        // whether we need to make the D3D call.
        else if( fSettings.fVeryAnnoyingTextureInvalidFlag 
            || !fTexturing  
            || ( fLayerState[ 0 ].fBlendFlags ^ fOldLayerState[0].fBlendFlags ) 
            || ( fCurrNumLayers + fActivePiggyBacks != fLastEndingStage )
            // If we're only doing one layer, just modulate texture color by diffuse and we're done.
            if( ( fCurrNumLayers + fActivePiggyBacks ) <= 1 )
                // See IHandleStageBlend for notes on NoTexColor.
                if( fLayerState[0].fBlendFlags & hsGMatState::kBlendNoTexColor )
                    fD3DDevice->SetTextureStageState( 0, D3DTSS_COLOROP, D3DTOP_SELECTARG2 );
                    fD3DDevice->SetTextureStageState( 0, D3DTSS_COLOROP, D3DTOP_MODULATE );   
                fD3DDevice->SetTextureStageState( 0, D3DTSS_COLORARG1, 
                    fLayerState[0].fBlendFlags & hsGMatState::kBlendInvertColor 
                        ? D3DTA_TEXTURE | D3DTA_COMPLEMENT 
                        : D3DTA_TEXTURE);
                fD3DDevice->SetTextureStageState( 0, D3DTSS_COLORARG2, D3DTA_DIFFUSE );

                // See the check in IHandleStageBlend for fSettings.fMaxLayersAtOnce == 2.
                // It depends on these settings and adjusts what it needs.

                // Multitexturing, select texture color to make its way upstream on stages.
                fD3DDevice->SetTextureStageState( 0, D3DTSS_COLOROP,   D3DTOP_SELECTARG1 );
                fD3DDevice->SetTextureStageState( 0, D3DTSS_COLORARG1, 
                    fLayerState[0].fBlendFlags & hsGMatState::kBlendInvertColor 
                        ? D3DTA_TEXTURE | D3DTA_COMPLEMENT 
                        : D3DTA_TEXTURE);

                // If our NoTexColor setting has changed, for a refresh of blend state on the next stage
                // since it's affected by our NoTexColor state.
                if( fLayerState[0].Differs( fLayerState[0].fBlendFlags, fOldLayerState[0].fBlendFlags, hsGMatState::kBlendNoTexColor) )
                    fLayerState[1].fBlendFlags = UInt32(-1);

            // Alpha Arg1 is texture alpha (possibly complemented), and Arg2 is diffuse (possibly complemented).
            // If we want to ignore vertex alpha, select arg1
            // If we want to ignore texture alpha, select arg2
            // Otherwise (and normally) multiply the two.
            fD3DDevice->SetTextureStageState( 0, D3DTSS_ALPHAOP,
                fLayerState[0].fBlendFlags & hsGMatState::kBlendNoVtxAlpha
                    ? D3DTOP_SELECTARG1
                    :   fLayerState[0].fBlendFlags & hsGMatState::kBlendNoTexAlpha
                        ? D3DTOP_SELECTARG2
                        : D3DTOP_MODULATE );
            fD3DDevice->SetTextureStageState( 0, D3DTSS_ALPHAARG1, 
                fLayerState[0].fBlendFlags & hsGMatState::kBlendInvertAlpha 
                    ? D3DTA_TEXTURE | D3DTA_COMPLEMENT 
                    : D3DTA_TEXTURE);
            fD3DDevice->SetTextureStageState( 0, D3DTSS_ALPHAARG2, D3DTA_DIFFUSE | 
                        ( fLayerState[0].fBlendFlags & hsGMatState::kBlendInvertVtxAlpha 
                            ? D3DTA_COMPLEMENT 
                            : 0 ) ); 

            fTexturing = true;
    // Here we've no texture for the base layer, but we have more than layer.
    // Select diffuse color and alpha, and pretend we have a texture but we're ignoring its
    // color and alpha.
    else if( fCurrNumLayers + fActivePiggyBacks > 1 )
        fLayerState[0].fBlendFlags |= hsGMatState::kBlendNoTexColor | hsGMatState::kBlendNoTexAlpha;
        fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP,   D3DTOP_SELECTARG2);
        fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2);
        fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG1, D3DTA_DIFFUSE);
        fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAARG1, D3DTA_DIFFUSE);
        if( fLayerState[0].Differs( fLayerState[0].fBlendFlags, fOldLayerState[0].fBlendFlags, (hsGMatState::kBlendNoTexColor|hsGMatState::kBlendNoTexAlpha)) )
            fLayerState[1].fBlendFlags = UInt32(-1);
        fTexturing = false;
    // Finally, a color only (non-textured) pass. Just select diffuse.
        if( fTexturing || fSettings.fVeryAnnoyingTextureInvalidFlag )
            fD3DDevice->SetTextureStageState( 0, D3DTSS_COLOROP,   D3DTOP_SELECTARG1 );
            fD3DDevice->SetTextureStageState( 0, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG1 );
            fD3DDevice->SetTextureStageState( 0, D3DTSS_COLORARG1, D3DTA_DIFFUSE );
            fD3DDevice->SetTextureStageState( 0, D3DTSS_ALPHAARG1, D3DTA_DIFFUSE |
                        ( fLayerState[0].fBlendFlags & hsGMatState::kBlendInvertVtxAlpha ? D3DTA_COMPLEMENT : 0 ) ); 

            fTexturing = false;

    fSettings.fVeryAnnoyingTextureInvalidFlag = false;

//// IHandleStageClamp ////////////////////////////////////////////////////////
// Translate our current wrap/clamp mode to D3D calls.
void    plDXPipeline::IHandleStageClamp(int stage)
    const UInt32 flags = fLayerState[stage].fClampFlags;
    switch( flags )
        case 0:
            fD3DDevice->SetSamplerState( stage, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP  );
            fD3DDevice->SetSamplerState( stage, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP  );
        case hsGMatState::kClampTextureU:
            fD3DDevice->SetSamplerState( stage, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP );
            fD3DDevice->SetSamplerState( stage, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP  );
        case hsGMatState::kClampTextureV:
            fD3DDevice->SetSamplerState( stage, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP  );
            fD3DDevice->SetSamplerState( stage, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP );
        case hsGMatState::kClampTexture:
            fD3DDevice->SetSamplerState( stage, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP );
            fD3DDevice->SetSamplerState( stage, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP );

void plDXPipeline::ISetBumpMatrices(const plLayerInterface* layer, const plSpan* span)
    // This section is just debugging, to compute the matrices that will be set.
    static hsMatrix44 preMDu;
    static hsMatrix44 preMDv;
    static hsMatrix44 preMDw;
    static int preMInit = false;
    if( !preMInit )
        hsMatrix44 rotAndCollapseToX;
        int i, j;
        for( i = 0; i < 4; i++ )
            for( j = 0; j < 4; j++ )
                rotAndCollapseToX.fMap[i][j] = 0;
        rotAndCollapseToX.fMap[0][2] = 1.f;
        rotAndCollapseToX.fMap[3][3] = 1.f;

        hsMatrix44 offset;
        offset.fMap[0][0] = 0.5f;
        offset.fMap[0][3] = 0.5f;

        preMDu = offset * rotAndCollapseToX;

        offset.fMap[1][3] = 0.5f;

        preMDv = offset * rotAndCollapseToX;

        offset.fMap[1][3] = 1.f;

        preMDw = offset * rotAndCollapseToX;

        preMInit = true;

    hsMatrix44 localToLight = span->GetLight(0, false)->GetWorldToLight() * span->fLocalToWorld;
    localToLight.fMap[0][3] = localToLight.fMap[1][3] = localToLight.fMap[2][3] = 0;
    fBumpDuMatrix = preMDu * localToLight;
    fBumpDvMatrix = preMDv * localToLight;

    hsMatrix44 c2w = fView.fCameraToWorld;
    hsMatrix44 cameraToLight = span->GetLight(0, false)->GetWorldToLight() * c2w;
    cameraToLight.fMap[0][3] = cameraToLight.fMap[1][3] = cameraToLight.fMap[2][3] = 0;
    fBumpDwMatrix = preMDw * cameraToLight;

    hsMatrix44 bDu = fBumpDuMatrix;
    hsMatrix44 bDv = fBumpDvMatrix;
    hsMatrix44 bDw = fBumpDwMatrix;
    static hsMatrix44 zeroMatrix;
    fBumpDuMatrix = zeroMatrix;
    fBumpDvMatrix = zeroMatrix;
    fBumpDwMatrix = zeroMatrix;


    // Here's the math
    // The incoming uv coordinate is either:
    //  kMiscBumpDu - dPos/dU (in other words, the direction in space from this vertex where U increases and V remains constant) in local space.
    //  kMiscBumpDv - dPos/dV (in other words, the direction in space from this vertex where V increases and U remains constant) in local space.
    //  kMiscBumpDw - the normal in camera space.
    // In each case, we need to transform the vector (uvw coord) into light space, and dot it with the light direction.
    // Well, in light space, the light direction is always (0,0,1).
    // So really, we just transform the vector into light space, and the z component is what we want.
    // Then, for each of these, we take that z value (the dot product) and put it into a color channel.
    // R = dPos/dU dot liDir
    // G = dPos/dV dot liDir
    // B = dPos/dW dot liDir
    // That's what we want, here's how we get it.
    // Here, Li(vec) means the vector in light space, Loc(vec) is local space, Tan(vec) is tangent space
    // Li(uvw) = local2Light * Loc(uvw) (uvw comes in in local space, ie input uvw == Loc(uvw)
    // Then we want to:
    //      a) Rotate the Z component to be along X (U) axis
    //      b) Zero out the new Y and Z
    //      c) Scale and offset our new X (the old Z) so -1 => 0, 1 => 1 (scale by 0.5, add 0.5).
    // The following matrix does all this (it's just a concatenation of the above 3 simple matrices).
    //  M = |0 0 0.5 0.5|
    //      |0 0 0   0  |
    //      |0 0 0   0  |
    //      |0 0 0   1  |
    // Our lookup texture that these transformed coords will read into has three horizontal bands,
    // the bottom 3rd is a ramp along U of 0->red
    // middle 3rd is a ramp along U of 0->green
    // last third (highest V) is a ramp along U of 0->blue.
    // So we can do the conversion from our dot to a color with an appropriate V offset in the above M.
    // dPos/dU and dPos/dV are both input in local space, so the transform to get them into light space is
    // the same for each, and that's obviously WorldToLight * LocalToWorld.
    // That's a little inconvenient and inefficient. It's inconvenient, because for an omni light, we
    // can easily fake a light direction (span position - light position), but the full matrix is kind
    // of arbitrary. We could fake it, but instead we move on. It's inefficient because, looking at the 
    // form of matrix M, we know we'll be throwing away a lot of it anyway. So we work through the matrix
    // math and find that we're going to wind up with:
    //      M1 =    |   M[0][2] * loc2li[2][0]  M[0][2] * loc2li[2][1]  M[0][2] * loc2li[2][2]  0.5 |
    //              |                   0                   0                       0           0   |
    //              |                   0                   0                       0           0   |
    //              |                   0                   0                       0           1   |
    // So all we really need is loc2li[2] (row 2). A little more matrix math gives us:
    //      loc2li[2] = (w2li[2] dot loc2wT[0], w2li[2] dot loc2wT[1], w2li[2] dot loc2wT[2]) (where loc2wT is Transpose(loc2w)
    // And hey, that's just dependent on the light's direction w2li[2]. The same thing works out for dPos/dW, except
    // substitue cam2w for loc2w (since input is in camera space instead of world space).
    // And that's about it. We don't actually have to multiply all those matrices at run-time, because
    // we know what the answer will be anyway. We just construct the matrices, making sure we set the
    // appropriate translate for V to get each into the right color channel. The hardware does the three
    // uv transforms and lookups, sums the results, and the output is:
    // (dPos/dU dot liDir, dPos/dV dot liDir, dPos/dW dot liDir), which also happens to be the light direction
    // transformed into tangent space. We dot that with our bump map (which has the normals in tangent space),
    // and we've got per-pixel shading for this light direction.

    hsPoint3 spanPos = span->fWorldBounds.GetCenter();
    hsVector3 liDir(0,0,0);
    int i;
    const hsTArray<plLightInfo*>& spanLights = span->GetLightList(false);
    hsScalar maxStrength = 0;
    for( i = 0; i < spanLights.GetCount(); i++ )
        hsScalar liWgt = span->GetLightStrength(i, false);
        // A light strength of 2.f means it's from a light group, and we haven't actually calculated
        // the strength. So calculate it now.
        if( liWgt == 2.f )
            hsScalar scale;
            spanLights[i]->GetStrengthAndScale(span->fWorldBounds, liWgt, scale);
        if( liWgt > maxStrength )
            maxStrength = liWgt;
        liDir += spanLights[i]->GetNegativeWorldDirection(spanPos) * liWgt;

    static hsScalar kUVWScale = 1.f;
    hsScalar uvwScale = kUVWScale;
    if( fLayerState[0].fBlendFlags & hsGMatState::kBlendAdd )
        hsVector3 cam2span(&GetViewPositionWorld(), &spanPos);
        liDir += cam2span;
        static hsScalar kSpecularMax = 0.1f;
        static hsScalar kSpecularMaxUV = 0.5f;
        if (IsDebugFlagSet(plPipeDbg::kFlagBumpUV))
            uvwScale *= kSpecularMaxUV;
            uvwScale *= kSpecularMax;

    switch( fCurrMaterial->GetLayer(fCurrLayerIdx)->GetMiscFlags() & hsGMatState::kMiscBumpChans )
    case hsGMatState::kMiscBumpDu:
        uvwScale *= fCurrMaterial->GetLayer(fCurrLayerIdx+3)->GetRuntimeColor().r;
    case hsGMatState::kMiscBumpDv: // This currently should never happen
        uvwScale *= fCurrMaterial->GetLayer(fCurrLayerIdx+1)->GetRuntimeColor().r;
    case hsGMatState::kMiscBumpDw:
        uvwScale *= fCurrMaterial->GetLayer(fCurrLayerIdx+2)->GetRuntimeColor().r;
    maxStrength *= 20.f;
    if( maxStrength > 1.f )
        maxStrength = 1.f;
    liDir *= uvwScale * maxStrength;

    const hsScalar kUVWOffset = 0.5f;

    hsScalar kOffsetToRed;
    hsScalar kOffsetToGreen;
    hsScalar kOffsetToBlue;

    if (IsDebugFlagSet(plPipeDbg::kFlagBumpUV) || IsDebugFlagSet(plPipeDbg::kFlagBumpW))
        kOffsetToRed = 0.2f;
        kOffsetToGreen = 0.6f;
        kOffsetToBlue = 1.f;
        kOffsetToRed = 0.f;
        kOffsetToGreen = 0.4f;
        kOffsetToBlue = 0.8f;

    const hsMatrix44& l2w = span->fLocalToWorld;

    fBumpDvMatrix.fMap[0][0] = fBumpDuMatrix.fMap[0][0] = (liDir.fX * l2w.fMap[0][0] + liDir.fY * l2w.fMap[1][0] + liDir.fZ * l2w.fMap[2][0]);
    fBumpDvMatrix.fMap[0][1] = fBumpDuMatrix.fMap[0][1] = (liDir.fX * l2w.fMap[0][1] + liDir.fY * l2w.fMap[1][1] + liDir.fZ * l2w.fMap[2][1]);
    fBumpDvMatrix.fMap[0][2] = fBumpDuMatrix.fMap[0][2] = (liDir.fX * l2w.fMap[0][2] + liDir.fY * l2w.fMap[1][2] + liDir.fZ * l2w.fMap[2][2]);

    fBumpDvMatrix.fMap[0][3] = fBumpDuMatrix.fMap[0][3] = kUVWOffset;

    fBumpDuMatrix.fMap[1][3] = kOffsetToRed;
    fBumpDvMatrix.fMap[1][3] = kOffsetToGreen;

    hsMatrix44 c2w = fView.GetCameraToWorld();

    // The bump textures created so far have very strong blue components, which make anything
    // bump mapped glow. The ideal fix would be to have the artists adjust the blue component
    // to a better (lower) value, so there would be a little extra illumination where the bump
    // is straight out into the normal direction, to complement the lateral illumination. 
    // Attempts so far have been unsuccessful in getting them to get a better understanding
    // of bump maps, so I've just zeroed out the contribution in the normal direction.
    plConst(int) kBumpUVOnly(true);
    if( !kBumpUVOnly )
        fBumpDwMatrix.fMap[0][0] = (liDir.fX * c2w.fMap[0][0] + liDir.fY * c2w.fMap[1][0] + liDir.fZ * c2w.fMap[2][0]);
        fBumpDwMatrix.fMap[0][1] = (liDir.fX * c2w.fMap[0][1] + liDir.fY * c2w.fMap[1][1] + liDir.fZ * c2w.fMap[2][1]);
        fBumpDwMatrix.fMap[0][2] = (liDir.fX * c2w.fMap[0][2] + liDir.fY * c2w.fMap[1][2] + liDir.fZ * c2w.fMap[2][2]);
        fBumpDwMatrix.fMap[0][0] = 0;
        fBumpDwMatrix.fMap[0][1] = 0;
        fBumpDwMatrix.fMap[0][2] = 0;

    fBumpDwMatrix.fMap[0][3] = kUVWOffset;
    fBumpDwMatrix.fMap[1][3] = kOffsetToBlue;

// IGetBumpMatrix ///////////////////////////////////////////////////////
// Return the correct uvw transform for the bump map channel implied
// in the miscFlags. The matrices have been previously set in ISetBumpMatrices.
const hsMatrix44& plDXPipeline::IGetBumpMatrix(UInt32 miscFlags) const
    switch( miscFlags & hsGMatState::kMiscBumpChans )
    case hsGMatState::kMiscBumpDu:
        return fBumpDuMatrix;
    case hsGMatState::kMiscBumpDv:
        return fBumpDvMatrix;
    case hsGMatState::kMiscBumpDw:
        return fBumpDwMatrix;

// ISkipBumpMap /////////////////////////////////////////////////////////////////////////
// Determine whether to skip bumpmapping on this object/material/layer combination.
// We skip if the span isn't illuminated by any lights, or bump mapping is disabled.
// If skipping, we advance <layer> past the bump layers. 
// If there are no more layers after that, we return true (to abort further rendering of currSpan),
// else false to continue rendering.
hsBool plDXPipeline::ISkipBumpMap(hsGMaterial* newMat, UInt32& layer, const plSpan* currSpan) const
    if( newMat && currSpan )
        if (newMat->GetLayer(layer) 
            &&(newMat->GetLayer(layer)->GetMiscFlags() & hsGMatState::kMiscBumpChans) 
            &&(!currSpan->GetNumLights(false) || IsDebugFlagSet(plPipeDbg::kFlagNoBump)) )
            layer += 4;
            if( layer >= newMat->GetNumLayers() )
                return true;
    return false;

//// IHandleStageTransform ////////////////////////////////////////////////////
// Compute and set the UVW transform to D3D.
// This only gets interesting if the transform is dependent on on the current camera transform,
// as is the case with Reflection, Projection, or bump mapping.
void    plDXPipeline::IHandleStageTransform( int stage, plLayerInterface *layer )
    if( 1 
        || !(layer->GetTransform().fFlags & hsMatrix44::kIsIdent) 
        || (fLayerState[stage].fMiscFlags & (hsGMatState::kMiscUseReflectionXform|hsGMatState::kMiscUseRefractionXform|hsGMatState::kMiscProjection|hsGMatState::kMiscBumpChans)) )
        D3DXMATRIX tXfm;

        if( fLayerState[stage].fMiscFlags & (hsGMatState::kMiscUseReflectionXform | hsGMatState::kMiscUseRefractionXform) )
            // Reflection - this is just the camera to world, with translation removed,
            // and rotated to match cube map conventions.
            hsMatrix44 c2env = fView.GetCameraToWorld();
            c2env = fView.GetCameraToWorld();

                = c2env.fMap[1][3]
                = c2env.fMap[2][3]
                = 0.f;

            if( fLayerState[stage].fMiscFlags & hsGMatState::kMiscUseReflectionXform )

                // This is just a rotation about X of Pi/2 (y = z, z = -y), 
                // followed by flipping Z to reflect back towards us (z = -z).
                hsScalar t = c2env.fMap[1][0];
                c2env.fMap[1][0] = c2env.fMap[2][0];
                c2env.fMap[2][0] = t;

                t = c2env.fMap[1][1];
                c2env.fMap[1][1] = c2env.fMap[2][1];
                c2env.fMap[2][1] = t;

                t = c2env.fMap[1][2];
                c2env.fMap[1][2] = c2env.fMap[2][2];
                c2env.fMap[2][2] = t;
            else // must be kMiscUseRefractionXform

                // Okay, I know this refraction isn't any where near
                // right, so don't sit down and try to figure out the
                // math and hook it to the refractive index.
                // It's just a hack that will fool anyone that isn't
                // really paying attention.

                // This is just a rotation about X of Pi/2 (y = z, z = -y), 
                // followed by NOT flipping Z to reflect back towards us (z = -z).
                // In other words, same as reflection, but then c2env = c2env * scaleMatNegateZ.
                hsScalar t = c2env.fMap[1][0];
                c2env.fMap[1][0] = c2env.fMap[2][0];
                c2env.fMap[2][0] = t;

                t = c2env.fMap[1][1];
                c2env.fMap[1][1] = c2env.fMap[2][1];
                c2env.fMap[2][1] = t;

                t = c2env.fMap[1][2];
                c2env.fMap[1][2] = c2env.fMap[2][2];
                c2env.fMap[2][2] = t;

                c2env.fMap[0][2] = -c2env.fMap[0][2];
                c2env.fMap[1][2] = -c2env.fMap[1][2];
                c2env.fMap[2][2] = -c2env.fMap[2][2];

#if 0
                const hsScalar kFishEyeScale = 0.5f;
                // You can adjust the fish-eye-ness of this by scaling
                // X and Y as well. Eventually, you wind up with the same
                // as c2env * scaleMatXYAndNegateZ, but this is shorter.
                // kFishEyeScale gets pretty fish-eye at about 0.5, and
                // like you're looking through the wrong end of a telescope
                // at about 1.5. 
                // Ideally kFishEyeScale would be a parameter of the layer.
                c2env.fMap[0][0] *= kFishEyeScale;
                c2env.fMap[1][0] *= kFishEyeScale;
                c2env.fMap[2][0] *= kFishEyeScale;
                c2env.fMap[0][1] *= kFishEyeScale;
                c2env.fMap[1][1] *= kFishEyeScale;
                c2env.fMap[2][1] *= kFishEyeScale;

            IMatrix44ToD3DMatrix( tXfm, c2env );
        // cam2Screen will also have the kMiscPerspProjection flag set, so this needs
        // to go before the regular kMiscProjection check.
        else if (fLayerState[stage].fMiscFlags & hsGMatState::kMiscCam2Screen )
            // Still needs a bit of cleaning...
            static hsVector3 camScale(0.5f, -0.5f, 1.f);
            static hsVector3 camTrans(0.5f, 0.5f, 0.f);
            hsMatrix44 p2s;
            p2s.fMap[0][3] += camTrans.fX;
            p2s.fMap[1][3] += camTrans.fY;

            // The scale and trans move us from NDC to Screen space. We need to swap
            // the Z and W coordinates so that the texture projection will divide by W
            // and give us projected 2D coordinates.
            hsScalar temp = p2s.fMap[2][2];
            p2s.fMap[2][2] = p2s.fMap[3][2];
            p2s.fMap[3][2] = temp;

            temp = p2s.fMap[2][3];
            p2s.fMap[2][3] = p2s.fMap[3][3];
            p2s.fMap[3][3] = temp;

            IMatrix44ToD3DMatrix(tXfm, p2s * IGetCameraToNDC());
        else if( fLayerState[stage].fMiscFlags & hsGMatState::kMiscProjection )
            // For projection, the worldToLight transform is in the layer transform,
            // so we append the cameraToWorld, getting cameraToLight
            hsMatrix44 c2w = fView.GetCameraToWorld();
            if( !(layer->GetUVWSrc() & plLayerInterface::kUVWPosition) )
                c2w.fMap[0][3] = 0;
                c2w.fMap[1][3] = 0;
                c2w.fMap[2][3] = 0;

            // We've already stuffed the worldToLight transform into the layer.
            hsMatrix44 c2l = layer->GetTransform() * c2w;

            IMatrix44ToD3DMatrix(tXfm, c2l);
        else if( fLayerState[stage].fMiscFlags & hsGMatState::kMiscBumpChans )
            // Bump matrices are already set, just get the right one and stuff it in.
            hsMatrix44 m = IGetBumpMatrix(fLayerState[stage].fMiscFlags);

            IMatrix44ToD3DMatrix(tXfm, m);
            // Just put take the layer transform and stuff it in.
            IMatrix44ToD3DMatrix( tXfm, layer->GetTransform() );

        fD3DDevice->SetTransform( sTextureStages[ stage ], &tXfm );
        fLayerTransform[ stage ] = true;
    else if( fLayerTransform[ stage ] )
        // We'd like to just turn it off, but the Voodoo board freaks if the
        // texture coordinates are 3-tuple for no apparent reason.
        fD3DDevice->SetTransform( sTextureStages[ stage ], &d3dIdentityMatrix );
        fLayerTransform[ stage ] = false;

    // If there's an lod bias associated with the layer, set it here.
    // There usually isn't.
    float newBias = fLayerState[stage].fZFlags & hsGMatState::kZLODBias ? layer->GetLODBias() : fTweaks.fDefaultLODBias;
    if( newBias != fLayerLODBias[ stage ] )
        fLayerLODBias[ stage ] = newBias;
        fD3DDevice->SetSamplerState( stage, D3DSAMP_MIPMAPLODBIAS, *(DWORD*)(&fLayerLODBias[ stage ]) );

//// IUseTextureRef ///////////////////////////////////////////////////////////
// Set the texturing flags and texture.
void    plDXPipeline::IUseTextureRef( int stage, hsGDeviceRef *dRef, plLayerInterface* layer )
    plDXTextureRef *ref = (plDXTextureRef *)dRef;
    UInt32          xformFlags;

    UInt32 uvwSrc = layer->GetUVWSrc();

    // Keep track of how much managed memory has been "seen" since the last
    // evict, for that NVidia bug. Look for OSVERSIONINFO for more notes.
    if( ref->fUseTime <= fEvictTime )
        fManagedSeen += ref->fDataSize;

    // Also used for the same thing.
    if( ref->fUseTime ^ fTextUseTime )
        plProfile_NewMem(CurrTex, ref->fDataSize);
        ref->fUseTime = fTextUseTime;

        fTexUsed += ref->fDataSize;

    // DX pixel shaders require the TEXCOORDINDEX to be equal to the stage,
    // even though its ignored.
    if( layer->GetPixelShader() && (stage != uvwSrc) )
        uvwSrc = stage;

    // Update our UVW source
    if( fLayerUVWSrcs[ stage ] != uvwSrc )
        fD3DDevice->SetTextureStageState( stage, D3DTSS_TEXCOORDINDEX, uvwSrc );
        fLayerUVWSrcs[ stage ] = uvwSrc;

    if (!layer->GetVertexShader() && !layer->GetPixelShader())
        /// Set the transform flags
        /// Note: the perspective projection flag must be taken from the layer, since it's layer-specific.
        /// Storing it on the texture ref is bad, because the texture ref can be shared among layers whose
        /// projection flags might not match. This should probably be cleaned up, but for now this fixes the
        /// problem.
        if( ref->GetFlags() & plDXTextureRef::kCubicMap )
            xformFlags = D3DTTFF_COUNT3;
        else if( layer->GetMiscFlags() & hsGMatState::kMiscPerspProjection )
            xformFlags = D3DTTFF_COUNT3 | D3DTTFF_PROJECTED;
            xformFlags = D3DTTFF_COUNT2;

        if( xformFlags != fLayerXformFlags[ stage ] )
            fLayerXformFlags[ stage ] = xformFlags;
            fD3DDevice->SetTextureStageState( stage, D3DTSS_TEXTURETRANSFORMFLAGS, xformFlags );

    // Update our current ref
    if( !ref->fD3DTexture )
        if( ref->fData )
            IReloadTexture( ref );
    else if( dRef == fLayerRef[ stage ] )
    hsRefCnt_SafeAssign( fLayerRef[ stage ], dRef );

    /// Actually make it active!
    fD3DDevice->SetTexture( stage, ref->fD3DTexture );

//// IStageStop ///////////////////////////////////////////////////////////////
// Tell the hardware we won't be using any more stages.
// This is more complicated than it sounds. Cases:
// a) single texture stage, we're done (because we've already set
//      texture times diffuse), so just disable stage 1.
// b) we have 2 stages active.
//      b.0) we're skipping texture color on one of those 2 stages. In that
//              case, we've already modulated in our diffuse, so just
//              disable stage 2.
//      b.1) we're using texture color from both stages 0 and 1, and still need
//              to modulate in diffuse. So set stage 2 to modulate in diffuse,
//              and disable stage 3.
// c) we have 3 or more stages active. Append a modulation by diffuse
// Note that this only applies to color, because diffuse alpha is always modulated
// in from the start.
void    plDXPipeline::IStageStop( UInt32 stage )
    int disableStage = stage;

    // Note: even if we don't have a texture, we handle it similar to if we had one,
    // so the only special case we need here is if we only had one stage to set up -mcn
    if( ( stage <= 1 ) )
        fD3DDevice->SetTextureStageState(stage, D3DTSS_COLOROP, D3DTOP_DISABLE);
        fD3DDevice->SetTextureStageState(stage, D3DTSS_ALPHAOP, D3DTOP_DISABLE);
        fLayerState[ stage ].fBlendFlags = UInt32(-1);
        disableStage = stage;
    else if( stage == 2 )
        // The fMaxLayersAtOnce == 2 check is for the DX9.0c 2 texture limitation.
        // See ILayersAtOnce()
        if ((fLayerState[0].fBlendFlags & hsGMatState::kBlendNoTexColor)
            || (fLayerState[1].fBlendFlags & hsGMatState::kBlendNoTexColor)
            || fSettings.fMaxLayersAtOnce == 2)
            fD3DDevice->SetTextureStageState(2, D3DTSS_COLOROP, D3DTOP_DISABLE);
            disableStage = 2;
            fD3DDevice->SetTextureStageState(2, D3DTSS_COLOROP,   D3DTOP_MODULATE);
            fD3DDevice->SetTextureStageState(2, D3DTSS_COLORARG1, D3DTA_DIFFUSE);
            fD3DDevice->SetTextureStageState(2, D3DTSS_COLORARG2, D3DTA_CURRENT);
            fD3DDevice->SetTextureStageState(3, D3DTSS_COLOROP, D3DTOP_DISABLE);
            disableStage = 3;

        fD3DDevice->SetTextureStageState(2, D3DTSS_ALPHAOP, D3DTOP_DISABLE);
        fLayerState[2].fBlendFlags = UInt32(-1);
        fLayerState[3].fBlendFlags = UInt32(-1);
        // This is directly contrary to the DX documentation, but in line with
        // the code generated by MFCTex (which works). The docs say:
        //  "Alpha operations cannot be disabled when color operations are enabled. 
        //      Setting the alpha operation to D3DTOP_DISABLE when color blending 
        //      is enabled causes undefined behavior."
        // But not disabling the earliest possible alpha stage causes the driver
        // to choke.

        fD3DDevice->SetTextureStageState(stage, D3DTSS_COLOROP,   D3DTOP_MODULATE);
        fD3DDevice->SetTextureStageState(stage, D3DTSS_COLORARG1, D3DTA_DIFFUSE);
        fD3DDevice->SetTextureStageState(stage, D3DTSS_COLORARG2, D3DTA_CURRENT);

        fD3DDevice->SetTextureStageState(stage, D3DTSS_ALPHAOP, D3DTOP_DISABLE);
        fLayerState[stage].fBlendFlags = UInt32(-1);

        fD3DDevice->SetTextureStageState(stage+1, D3DTSS_COLOROP, D3DTOP_DISABLE);
        fLayerState[stage+1].fBlendFlags = UInt32(-1);

        disableStage = stage+1;

    fLastEndingStage = stage;

    if( fSettings.fIsIntel )
        int maxUVW = 0;
        int k;
        for( k = 0; k < fCurrNumLayers; k++ )
            if( (fCurrMaterial->GetLayer(k + fCurrLayerIdx)->GetUVWSrc() & 0xf) > maxUVW )
                maxUVW = fCurrMaterial->GetLayer(k + fCurrLayerIdx)->GetUVWSrc() & 0xf;
        for( k = disableStage; k <= maxUVW; k++ )
            fD3DDevice->SetTextureStageState(k, D3DTSS_COLOROP,   D3DTOP_SELECTARG2);
            fD3DDevice->SetTextureStageState(k, D3DTSS_COLORARG2, D3DTA_CURRENT);
        fD3DDevice->SetTextureStageState(k, D3DTSS_COLOROP, D3DTOP_DISABLE);

// IInvalidateState /////////////////////////////////////////////////////////////
// Documentation is unclear on what state persists or becomes invalid on switching
// a render target or finishing a frame. I put into this function things that show
// up as suspect, whether they "ought" to be here or not.
void plDXPipeline::IInvalidateState()
    fLastEndingStage = 0;
    fTexturing = false;
    int i;
    for( i = 0; i < 8; i++ )
        hsRefCnt_SafeUnRef( fLayerRef[ i ] );
        fLayerRef[ i ] = nil;
        fD3DDevice->SetTexture( i, nil );   

    fLayerState[ 0 ].fZFlags = 0;
    fD3DDevice->SetRenderState( D3DRS_ZFUNC,        D3DCMP_LESSEQUAL );
    fD3DDevice->SetRenderState( D3DRS_ZWRITEENABLE, TRUE );

    // This is a workaround for the latest ATI drivers (
    // They seem to be caching something on lights (possibly only specular
    // lights, but I haven't been able to prove it) the first time they
    // are used in a render, and then not letting go when the camera
    // is moved for another render in the same frame (same BeginScene/EndScene pair).
    // The effect is very incorrect lighting. Moreover, if the multiple renders
    // per frame are infrequent (e.g. refreshing an environment map every few
    // seconds), you'll get flashes after the double render frames.
    // Workaround is to Disable all lights at render target switch, although
    // a more correct workaround might be to disable all lights at camera move.
    // All of this is strictly conjecture, so I'm going with what works.
    // Note also that I'm only disabling lights that are currently enabled
    // at the time of the render target switch. Since this is dealing with
    // a driver bug, it might be safer to disable them all, but timings
    // show that looping through all the lights in a scene like Teledahn exterior,
    // with hundreds of active lights, incurs a measurable expense (some milliseconds),
    // whereas disabling only the active lights fixes the known problem but costs
    // zero.
    hsBitIterator iterOff(fLights.fEnabledFlags);
    for( iterOff.Begin(); !iterOff.End(); iterOff.Advance() )
        fD3DDevice->LightEnable(iterOff.Current(), false);


    // This is very annoying. Set fTexturing to false doesn't work if the next layer
    // we draw doesn't have a texture. So we have to set this flag instead to force
    // a state update. I have an idea about how to do all of this a lot better, but
    // it's not time to do it...not yet at least.... --mcn
    fSettings.fVeryAnnoyingTextureInvalidFlag = true;

//// ILayersAtOnce ////////////////////////////////////////////////////////////
// Compute how many of the upcoming layers we can render in a single pass on the
// current hardware.
UInt32  plDXPipeline::ILayersAtOnce( hsGMaterial *mat, UInt32 which )
    fCurrNumLayers = 1;

    if( fView.fRenderState & plPipeline::kRenderBaseLayerOnly )
        return fCurrNumLayers;

    plLayerInterface *lay = mat->GetLayer( which );

    if (IsDebugFlagSet(plPipeDbg::kFlagNoMultitexture))
        return fCurrNumLayers;

    if ((IsDebugFlagSet(plPipeDbg::kFlagBumpUV) || IsDebugFlagSet(plPipeDbg::kFlagBumpW)) && (lay->GetMiscFlags() & hsGMatState::kMiscBumpChans) )
        return fCurrNumLayers = 2;

    if( (lay->GetBlendFlags() & hsGMatState::kBlendNoColor)
        ||(lay->GetMiscFlags() & hsGMatState::kMiscTroubledLoner)
        return fCurrNumLayers;

    // New DX9.0c limitation for cards that can only do 2 textures per pass.
    // We used to be able to set stage 0 and 1 to textures, and set stage 2 to the
    // diffuse color. With DX9.0c we just get two texture stages. Period. 
    // Either we give up a texture or the diffuse color.
    if (fSettings.fMaxLayersAtOnce == 2)
        if ((mat->GetNumLayers() > which + 1)
            && !(mat->GetLayer(which + 1)->GetBlendFlags() & hsGMatState::kBlendNoTexColor))
            // If we're just using the texture for alpha, we can multiply
            // the diffuse color in stage 1. Otherwise, save it for the next pass.
            return fCurrNumLayers;

    int i;
    int maxLayersAtOnce = fSettings.fMaxLayersAtOnce;

    // Now Reserve space for piggy backs, and see if there are 
    // are any more layers we can pick up.
    maxLayersAtOnce = fSettings.fMaxLayersAtOnce - fActivePiggyBacks;
    if( which + maxLayersAtOnce > mat->GetNumLayers() )
        maxLayersAtOnce = mat->GetNumLayers() - which;

    for( i = fCurrNumLayers; i < maxLayersAtOnce; i++ )
        plLayerInterface *lay = mat->GetLayer(which + i);
        if( (lay->GetUVWSrc() & 0xf) > fSettings.fMaxUVWSrc )
        if( (lay->GetMiscFlags() & hsGMatState::kMiscBindNext)
                &&(i+1 >= maxLayersAtOnce) )
        if( lay->GetMiscFlags() & hsGMatState::kMiscRestartPassHere )
        if( !(mat->GetLayer(which+i-1)->GetMiscFlags() & hsGMatState::kMiscBindNext)
                && !ICanEatLayer(lay) )
    return fCurrNumLayers;

//// ICanEatLayer /////////////////////////////////////////////////////////////
// Determine if this layer can be an upper layer, or if it needs
// to be the base on another pass.
hsBool  plDXPipeline::ICanEatLayer( plLayerInterface* lay )
    if( !lay->GetTexture() )
        return false;

    if( (lay->GetBlendFlags() & hsGMatState::kBlendNoColor)
        ||(lay->GetBlendFlags() & hsGMatState::kBlendAddColorTimesAlpha) // has to be base layer
        ||(lay->GetMiscFlags() & hsGMatState::kMiscTroubledLoner) )
        return false;

    if( (lay->GetBlendFlags() & hsGMatState::kBlendAlpha )
        &&(lay->GetAmbientColor().a < hsScalar1) )
        return false;

    if( !(lay->GetZFlags() & hsGMatState::kZNoZWrite) )
        return false;

    return true;

//// Textures /////////////////////////////////////////////////////////////////

//// IReloadTexture ///////////////////////////////////////////////////////////
// Fills in D3D texture resource, creating it if necessary.
void    plDXPipeline::IReloadTexture( plDXTextureRef *ref )
    if( ref->GetFlags() & plDXTextureRef::kCubicMap )
        if( ref->fD3DTexture == nil )
            ref->fD3DTexture = IMakeD3DCubeTexture( ref, ref->fFormatType );

        if( ref->fD3DTexture != nil )
            IFillD3DCubeTexture( (plDXCubeTextureRef *)ref );
        if( ref->fD3DTexture == nil )
            ref->fD3DTexture = IMakeD3DTexture( ref, ref->fFormatType );

        if( ref->fD3DTexture != nil )
            IFillD3DTexture( ref );

//// IMakeD3DTexture //////////////////////////////////////////////////////////
//  Makes a DX Texture object based on the ref given.

IDirect3DTexture9   *plDXPipeline::IMakeD3DTexture( plDXTextureRef *ref, D3DFORMAT formatType )
    IDirect3DTexture9   *texPtr;
    fManagedAlloced = true;
    if( FAILED( fSettings.fDXError = fD3DDevice->CreateTexture( ref->fMaxWidth, ref->fMaxHeight, 
                                          &texPtr, NULL ) ) )
        plStatusLog::AddLineS( "pipeline.log", 0xffff0000, "Unable to create texture (%s) Owner: %s "
                                            "Size: %d x %d NumLvls: %d Flags: %x",
                                            fSettings.fErrorStr, ref->fOwner ? ref->fOwner->GetKey() ? ref->fOwner->GetKey()->GetUoid().GetObjectName() : "" : "",
                                            ref->fMaxWidth, ref->fMaxHeight, ref->fMMLvs, ref->GetFlags() );
        return nil;
    PROFILE_POOL_MEM(poolType, ref->fDataSize, true, (ref->fOwner ? ref->fOwner->GetKey() ? ref->fOwner->GetKey()->GetUoid().GetObjectName() : "(UnknownTexture)" : "(UnknownTexture)"));
    fTexManaged += ref->fDataSize;

    return texPtr;

//// IFillD3DTexture //////////////////////////////////////////////////////////
// Copies the data from the ref into the D3D texture, filling in all
// mip levels. 
void    plDXPipeline::IFillD3DTexture( plDXTextureRef *ref )
    int         i;
    UInt8       *pTexDat = (UInt8 *)ref->fData;

    if( pTexDat == nil )
        plStatusLog::AddLineS( "pipeline.log", 0xffff0000, "Unable to fill texture ref (data is nil) Owner: %s",
                                            ref->fOwner ? ref->fOwner->GetKey() ? ref->fOwner->GetKey()->GetUoid().GetObjectName() : "" : "" );

    IDirect3DTexture9 *lpDst = (IDirect3DTexture9 *)ref->fD3DTexture;

    for( i = 0; i < ref->fMMLvs; i++ )
        D3DLOCKED_RECT      lockInfo;

        if( FAILED( fSettings.fDXError = lpDst->LockRect( i, &lockInfo, nil, 0 ) ) )
            plStatusLog::AddLineS( "pipeline.log", 0xffff0000, "Unable to lock texture level %d for filling (%s) Owner: %s "
                                                "Size: %d x %d NumLvls: %d Flags: %x",
                                                i, fSettings.fErrorStr, ref->fOwner ? ref->fOwner->GetKey() ? ref->fOwner->GetKey()->GetUoid().GetObjectName() : "" : "",
                                                ref->fMaxWidth, ref->fMaxHeight, ref->fMMLvs, ref->GetFlags() );

        memcpy( (char *)lockInfo.pBits, pTexDat, ref->fLevelSizes[ i ] );
        pTexDat += ref->fLevelSizes[ i ];
        lpDst->UnlockRect( i );

//// IMakeD3DCubeTexture //////////////////////////////////////////////////////
//  Makes a DX Cubic Texture object based on the ref given.

IDirect3DCubeTexture9   *plDXPipeline::IMakeD3DCubeTexture( plDXTextureRef *ref, D3DFORMAT formatType )
    D3DPOOL                 poolType = D3DPOOL_MANAGED;
    IDirect3DCubeTexture9   *texPtr = nil;
    fManagedAlloced = true;
    WEAK_ERROR_CHECK(fD3DDevice->CreateCubeTexture( ref->fMaxWidth, ref->fMMLvs, 0, formatType, poolType, &texPtr, NULL));
    PROFILE_POOL_MEM(poolType, ref->fDataSize, true, (ref->fOwner ? ref->fOwner->GetKey() ? ref->fOwner->GetKey()->GetUoid().GetObjectName() : "(UnknownTexture)" : "(UnknownTexture)"));
    fTexManaged += ref->fDataSize;
    return texPtr;

//// IFillD3DCubeTexture //////////////////////////////////////////////////////
// Fill in all faces of the D3D cube map from the input reference.
void    plDXPipeline::IFillD3DCubeTexture( plDXCubeTextureRef *ref )
    int                 i, f;
    D3DCUBEMAP_FACES    faces[ 6 ] = {  D3DCUBEMAP_FACE_NEGATIVE_X,     // Left
                                        D3DCUBEMAP_FACE_POSITIVE_X,     // Right
                                        D3DCUBEMAP_FACE_POSITIVE_Z,     // Front
                                        D3DCUBEMAP_FACE_NEGATIVE_Z,     // Back
                                        D3DCUBEMAP_FACE_POSITIVE_Y,     // Top
                                        D3DCUBEMAP_FACE_NEGATIVE_Y };   // Bottom
    for( f = 0; f < 6; f++ )
        UInt8                   *pTexDat = ( f == 0 ) ? (UInt8 *)ref->fData : (UInt8 *)ref->fFaceData[ f - 1 ];
        IDirect3DCubeTexture9   *lpDst = (IDirect3DCubeTexture9 *)ref->fD3DTexture;

        for( i = 0; i < ref->fMMLvs; i++ )
            D3DLOCKED_RECT      lockInfo;

            lpDst->LockRect( faces[ f ], i, &lockInfo, nil, 0 );
            memcpy( (char *)lockInfo.pBits, pTexDat, ref->fLevelSizes[ i ] );
            pTexDat += ref->fLevelSizes[ i ];
            lpDst->UnlockRect( faces[ f ], i );

//// MakeTextureRef ///////////////////////////////////////////////////////////
//  Creates a hsGDeviceRef for a texture.
// May have to decompress the texture if the hardware doesn't support compressed textures (unlikely).
hsGDeviceRef    *plDXPipeline::MakeTextureRef( plLayerInterface* layer, plMipmap *b )
    plMipmap    *original = b, *colorized = nil;

    // If the hardware doesn't support Luminance maps, we'll just treat as ARGB.
    if( !( fSettings.fD3DCaps & kCapsLuminanceTextures ) )
        b->SetFlags( b->GetFlags() & ~plMipmap::kIntensityMap );

    /// Colorize if we're supposed to (8.21.2000 mcn)
    // Debugging only.
    if (IsDebugFlagSet(plPipeDbg::kFlagColorizeMipmaps))
        b = original->Clone();
        if( b != nil )
            b = original;

    if( !( fSettings.fD3DCaps & kCapsCompressTextures ) && b->IsCompressed() )
        b = hsCodecManager::Instance().CreateUncompressedMipmap( b, hsCodecManager::k16BitDepth );

    /// Set up some stuff
    UInt32      mmlvs      = 1;
    D3DFORMAT   formatType = D3DFMT_UNKNOWN;    // D3D Format
    UInt32      formatSize = 0;
    UInt32      totalSize = 0;
    UInt32*     levelSizes = nil;
    UInt32      numPix = 0;
    UInt32      externData = false;
    void        *tData;
    hsBool      noMip = !(fSettings.fD3DCaps & kCapsMipmap);

    /// Convert the bitmap over
    // Select a target format
    IGetD3DTextureFormat( b, formatType, formatSize );

    // Process the texture data into a format that can be directly copied to the D3D texture.
    // externData returned as true means that tData just points directly into the mipmap's fImage,
    // so don't delete it when deleting the texture device ref. externData false means this is
    // a reformatted copy, so the ref owns it.
    externData = IProcessMipmapLevels( b, mmlvs, levelSizes, totalSize, numPix, tData, noMip );

    // If the texture has a device ref, just re-purpose it, else make one and initialize it.
    plDXTextureRef *ref = (plDXTextureRef *)b->GetDeviceRef();
    if( !ref )
        ref = TRACKED_NEW plDXTextureRef( formatType, 
                                          mmlvs, b->GetWidth(), b->GetHeight(), 
                                          numPix, totalSize, totalSize, levelSizes,
                                          tData, externData );
        ref->fOwner = original;
        ref->Link( &fTextureRefList );
        original->SetDeviceRef( ref );
        // Note: this is because SetDeviceRef() will ref it, and at this point,
        // only the bitmap should own the ref, not us. We ref/unref it on Use()
        hsRefCnt_SafeUnRef( ref );  
        ref->Set( formatType, mmlvs, b->GetWidth(), b->GetHeight(), 
                  numPix, totalSize, totalSize, levelSizes, tData, externData );

    // Keep the refs in a linked list for easy disposal.
    if( !ref->IsLinked() )
        // Re-linking
        ref->Link( &fTextureRefList );

    /// Copy the data into the ref
    IReloadTexture( ref );

    ref->fData = nil;
    ref->SetDirty( false );

    // Set any implied flags.
    if (layer)
        if( layer->GetMiscFlags() & hsGMatState::kMiscPerspProjection )
            ref->SetFlags(ref->GetFlags() | plDXTextureRef::kPerspProjection);
        else if( layer->GetMiscFlags() & hsGMatState::kMiscOrthoProjection )
            ref->SetFlags(ref->GetFlags() | plDXTextureRef::kOrthoProjection);

        if( layer->GetMiscFlags() & hsGMatState::kMiscBumpDw )
            ref->SetFlags(ref->GetFlags() | plDXTextureRef::kUVWNormal);

    if( b != original )
        delete b;       // Delete if we created a new (temporary) one

    // Turn this on to delete the plasma system memory copy once we have a D3D managed version.
    // Currently disabled, because there are still mipmaps that are read from after their managed
    // versions are created, but aren't flagged DontThrowAwayImage or kUserOwnesBitmap.
    if( !( original->GetFlags() & ( plMipmap::kUserOwnsBitmap | plMipmap::kDontThrowAwayImage ) )
        && !GetProperty( kPropDontDeleteTextures ) )
#ifdef MF_TOSSER
#endif // MF_TOSSER

    return ref;

//// IMakeCubicTextureRef /////////////////////////////////////////////////////
// Same as MakeTextureRef, except done for the six faces of a cube map.
hsGDeviceRef    *plDXPipeline::IMakeCubicTextureRef( plLayerInterface* layer, plCubicEnvironmap *cubic )
    plDXCubeTextureRef  *ref;
    plMipmap            *faces[ 6 ];
    int                 i;
    D3DFORMAT           formatType = D3DFMT_UNKNOWN;
    UInt32              formatSize = 0;
    UInt32              numLevels = 1;
    UInt32              totalSize = 0;
    UInt32              *levelSizes = nil;
    UInt32              numPixels = 0;
    UInt32              externData;
    void                *textureData[ 6 ];

    if( cubic == nil || !( fSettings.fD3DCaps & kCapsCubicTextures ) )
        return nil;

    hsBool noMip = !(fSettings.fD3DCaps & kCapsMipmap) || !(fSettings.fD3DCaps & kCapsCubicMipmap);

    /// Get the mips
    if( !( fSettings.fD3DCaps & kCapsCompressTextures ) )
        for( i = 0; i < 6; i++ )
            faces[ i ] = cubic->GetFace( i );
            if( faces[ i ]->IsCompressed() )
                faces[ i ] = hsCodecManager::Instance().CreateUncompressedMipmap( faces[ i ], hsCodecManager::k16BitDepth );
        for( i = 0; i < 6; i++ )
            faces[ i ] = cubic->GetFace( i );

    /// Create the ref
    // Get format
    IGetD3DTextureFormat( faces[0], formatType, formatSize );

    // Process the data.
    if( faces[0]->IsCompressed() || ( faces[0]->GetPixelSize() < 32 ) )
        /// For this, we just take the image data pointers directly, so only call IProcess once
        externData = IProcessMipmapLevels( faces[ 0 ], numLevels, levelSizes, totalSize, numPixels, textureData[ 0 ], noMip );
        for( i = 1; i < 6; i++ )
            textureData[ i ] = faces[ i ]->GetImage();
        for( i = 0; i < 6; i++ )
            /// Some of this will be redundant, but oh well
            externData = IProcessMipmapLevels( faces[ i ], numLevels, levelSizes, totalSize, numPixels, textureData[ i ], noMip );

    ref = (plDXCubeTextureRef *)cubic->GetDeviceRef();
    if( !ref )
        ref = TRACKED_NEW plDXCubeTextureRef( formatType, 
                                          numLevels, faces[ 0 ]->GetWidth(), faces[ 0 ]->GetHeight(), 
                                          numPixels, totalSize, totalSize * 6, levelSizes,
                                          textureData[ 0 ], externData );
        ref->fOwner = cubic;
        ref->Link( &fTextureRefList );  // So we don't ref later on down
        for( i = 0; i < 5; i++ )
            ref->fFaceData[ i ] = textureData[ i + 1 ];

        cubic->SetDeviceRef( ref );
        // Note: this is because SetDeviceRef() will ref it, and at this point,
        // only the bitmap should own the ref, not us. We ref/unref it on Use()
        hsRefCnt_SafeUnRef( ref );
        ref->Set( formatType, numLevels, faces[ 0 ]->GetWidth(), faces[ 0 ]->GetHeight(), 
                  numPixels, totalSize, totalSize * 6, levelSizes, textureData[ 0 ], externData );

        for( i = 0; i < 5; i++ )
            ref->fFaceData[ i ] = textureData[ i + 1 ];
    ref->SetFlags( ref->GetFlags() | plDXTextureRef::kCubicMap );

    // Put in linked list for easy disposal.
    if( !ref->IsLinked() )
        // Re-linking
        ref->Link( &fTextureRefList );

    /// Copy the data into the ref
    IReloadTexture( ref );
    ref->SetDirty( false );

    /// Cleanup
    for( i = 0; i < 6; i++ )
        if( faces[ i ] != cubic->GetFace( i ) )
            delete faces[ i ];
        if( !( cubic->GetFace(i)->GetFlags() & (plMipmap::kUserOwnsBitmap | plMipmap::kDontThrowAwayImage) ) && !GetProperty( kPropDontDeleteTextures ) )
            // Turn this on to delete the plasma system memory copy once we have a D3D managed version.
            // Currently disabled, because there are still mipmaps that are read from after their managed
            // versions are created, but aren't flagged DontThrowAwayImage or kUserOwnesBitmap.
//          cubic->GetFace(i)->Reset();

    return ref;

//// IProcessMipmapLevels /////////////////////////////////////////////////////
// Compute proper values for the arguments passed in.
// Return true if the data returned points directly into the mipmap data,
// return false if textureData is a reformatted copy of the mipmap's data.
hsBool  plDXPipeline::IProcessMipmapLevels( plMipmap *mipmap, UInt32 &numLevels,
                                            UInt32 *&levelSizes, UInt32 &totalSize, 
                                            UInt32 &numPixels, void *&textureData, hsBool noMip )
    hsBool      externData = false;
    D3DFORMAT   formatType = D3DFMT_UNKNOWN;    // D3D Format
    UInt32      formatSize;

    IGetD3DTextureFormat( mipmap, formatType, formatSize );

    // Compressed or 16 bit, we can use directly.
    if( mipmap->IsCompressed() || ( mipmap->GetPixelSize() < 32 ) )
        numPixels = 0;
        if( noMip )
            numLevels = 1;
            levelSizes = nil;
            totalSize = mipmap->GetLevelSize(0);
            UInt32          sizeMask = 0x03;

            /// 10.31.2000 - If we have this flag set, we really have to cut out
            /// sizes under 8x8. So far only true on the KYRO...
            if( fSettings.fD3DCaps & kCapsNoKindaSmallTexs )
                sizeMask = 0x07;

            int maxLevel = mipmap->GetNumLevels() - 1;

            /// 9.7.2000 - Also do this test if the card doesn't support
            /// itty bitty textures
            if( mipmap->IsCompressed() || !( fSettings.fD3DCaps & kCapsDoesSmallTextures ) )
                mipmap->SetCurrLevel( maxLevel );
                while( ( mipmap->GetCurrWidth() | mipmap->GetCurrHeight() ) & sizeMask )
                    hsAssert( maxLevel >= 0, "How was this ever compressed?" );
                    mipmap->SetCurrLevel( maxLevel );

            mipmap->SetCurrLevel( 0 );
            totalSize = 0;
            numLevels = maxLevel + 1;
            levelSizes = TRACKED_NEW UInt32[ numLevels ];
            int i;
            for( i = 0; i < numLevels; i++ )
                levelSizes[ i ] = mipmap->GetLevelSize( i );
                totalSize += mipmap->GetLevelSize( i );

        textureData = mipmap->GetImage();
        externData = true;
        // 32 bit uncompressed data. In general, we reformat to 16 bit if we're running
        // 16 bit, or if 32 bit leave it at 32. All subject to what the hardware can do
        // and what the texture is for. See IGetD3DTextureFormat.
        formatSize >>= 3;

        if( !noMip )
            numPixels = mipmap->GetTotalSize() * 8 / mipmap->GetPixelSize();
            numLevels = mipmap->GetNumLevels();

            levelSizes = TRACKED_NEW UInt32[ numLevels ];

            int     i;
            UInt32 w, h;
            for( i = 0; i < numLevels; i++ )
                mipmap->GetLevelPtr( i, &w, &h );
                levelSizes[ i ] = w * h * formatSize;
            numPixels = mipmap->GetWidth() * mipmap->GetHeight();
            numLevels = 1;
            levelSizes = nil;
        totalSize = numPixels * formatSize;

        // Shared scratch space to reformat a texture before it's copied into
        // the D3D surface.
        textureData = IGetPixelScratch( totalSize );

        // Convert it to the requested format.
        IFormatTextureData( formatType, numPixels, (hsRGBAColor32 *)mipmap->GetImage(), textureData );

    return externData;

//// IGetPixelScratch /////////////////////////////////////////////////////////
// Return scratch space at least of at least size bytes, to reformat a mipmap into.
void    *plDXPipeline::IGetPixelScratch( UInt32 size )
    static char     *sPtr = nil;
    static UInt32   sSize = 0;

    if( size > sSize )
        if( sPtr != nil )
            delete [] sPtr;
        if( size > 0 )
            sPtr = TRACKED_NEW char[ sSize = size ];
            sPtr = nil;
    else if( size == 0 )
        if( sPtr != nil )
            delete [] sPtr;

        sPtr = nil;
        sSize = 0;

    return sPtr;

//// IGetD3DTextureFormat /////////////////////////////////////////////////////
//  Given a bitmap, finds the matching D3D format.

void    plDXPipeline::IGetD3DTextureFormat( plBitmap *b, D3DFORMAT &formatType, UInt32& texSize )
    hsAssert( b, "Nil input to GetTextureFormat()" );

    hsBool prefer32bit = 0 != (b->GetFlags() & plBitmap::kForce32Bit);

    if( b->IsCompressed() )
        hsAssert( plMipmap::kDirectXCompression == b->fCompressionType, "Unsupported compression format" );
        texSize = 0;
        switch( b->fDirectXInfo.fCompressionType )
            case plMipmap::DirectXInfo::kDXT1:
                formatType = D3DFMT_DXT1;
//          case plMipmap::DirectXInfo::kDXT2:
//              formatType = D3DFMT_DXT2;
//              break;
//          case plMipmap::DirectXInfo::kDXT3:
//              formatType = D3DFMT_DXT3;
//              break;
//          case plMipmap::DirectXInfo::kDXT4:
//              formatType = D3DFMT_DXT4;
//              break;
            case plMipmap::DirectXInfo::kDXT5:
                formatType = D3DFMT_DXT5;
                hsAssert(false, "Unknown DirectX compression format");
    else if( b->GetFlags() & plMipmap::kBumpEnvMap )
        texSize = 16;
        if( b->GetFlags() & plMipmap::kAlphaChannelFlag )
            formatType = D3DFMT_L6V5U5;
            formatType = D3DFMT_V8U8;
    else if( b->GetPixelSize() == 16 )
        texSize = 16;
        if( b->GetFlags() & plMipmap::kIntensityMap )
            if( b->GetFlags() & plMipmap::kAlphaChannelFlag )
                formatType = D3DFMT_A8L8;
                formatType = D3DFMT_L8;
            if( b->GetFlags() & plMipmap::kAlphaChannelFlag )
                formatType = D3DFMT_A4R4G4B4;
                formatType = D3DFMT_A1R5G5B5;
    else if( b->GetFlags() & plMipmap::kIntensityMap )
        if( b->GetFlags() & plMipmap::kAlphaChannelFlag )
            if( ITextureFormatAllowed( D3DFMT_A8L8 ) )
                formatType = D3DFMT_A8L8;
                texSize = 16;
            else if( !prefer32bit && ( fSettings.fColorDepth == 16 ) && ITextureFormatAllowed( D3DFMT_A4R4G4B4 ) )
                formatType = D3DFMT_A4R4G4B4;
                texSize = 16;
            else if( ITextureFormatAllowed( D3DFMT_A8R8G8B8 ) )
                formatType = D3DFMT_A8R8G8B8;
                texSize = 32;
            else if( ITextureFormatAllowed( D3DFMT_A4R4G4B4 ) )
                formatType = D3DFMT_A4R4G4B4;
                texSize = 16;
            if( ITextureFormatAllowed( D3DFMT_L8 ) )
                formatType = D3DFMT_L8;
                texSize = 8;
            else if( !prefer32bit && ( fSettings.fColorDepth == 16 ) && ITextureFormatAllowed( D3DFMT_A1R5G5B5 ) )
                formatType = D3DFMT_A1R5G5B5;
                texSize = 16;
            else if( ITextureFormatAllowed( D3DFMT_A8R8G8B8 ) )
                formatType = D3DFMT_A8R8G8B8;
                texSize = 32;
            else if( ITextureFormatAllowed( D3DFMT_A1R5G5B5 ) )
                formatType = D3DFMT_A1R5G5B5;
                texSize = 16;
        if( b->GetFlags() & plMipmap::kAlphaChannelFlag )
            if( !prefer32bit && ( fSettings.fColorDepth == 16 ) && ITextureFormatAllowed( D3DFMT_A4R4G4B4 ) )
                formatType = D3DFMT_A4R4G4B4;
                texSize = 16;
            else if( ITextureFormatAllowed( D3DFMT_A8R8G8B8 ) )
                formatType = D3DFMT_A8R8G8B8;
                texSize = 32;
            else if( ITextureFormatAllowed( D3DFMT_A4R4G4B4 ) )
                formatType = D3DFMT_A4R4G4B4;
                texSize = 16;
            if( !prefer32bit && ( fSettings.fColorDepth == 16 ) && ITextureFormatAllowed( D3DFMT_A1R5G5B5 ) )
                formatType = D3DFMT_A1R5G5B5;
                texSize = 16;
            else if( ITextureFormatAllowed( D3DFMT_A8R8G8B8 ) )
                formatType = D3DFMT_A8R8G8B8;
                texSize = 32;
            else if( ITextureFormatAllowed( D3DFMT_A1R5G5B5 ) )
                formatType = D3DFMT_A1R5G5B5;
                texSize = 16;

    hsAssert( formatType, "failing to find format type" );

//// IFormatTextureData ///////////////////////////////////////////////////////
// Convert the input 32 bit uncompressed RGBA data into the requested format.
void    plDXPipeline::IFormatTextureData( UInt32 formatType, UInt32 numPix, hsRGBAColor32* const src, void *dst )
    switch( formatType )
        case D3DFMT_L6V5U5:
                UInt16 *pixels = (UInt16 *)dst;
                hsRGBAColor32* p = src;
                hsRGBAColor32* end = src + numPix;

                while( p < end )
                    *pixels = ((p->a << 8) & 0xfc00) 
                        | ((p->g << 2) & 0x03e0)
                        | ((p->r >> 3) & 0x001f);
                    if( *pixels & 0xfc00 )
                    else if( *pixels & 0x03e0 )
                    else if( *pixels & 0x001f )
#endif // HS_DEBUGGING

        case D3DFMT_V8U8:
                UInt16 *pixels = (UInt16 *)dst;
                hsRGBAColor32* p = src;
                hsRGBAColor32* end = src + numPix;

                while( p < end )
                    *pixels = (p->g << 8)
                        | (p->r << 0);

        case D3DFMT_A8L8:
                UInt16 *pixels = (UInt16 *)dst;
                int i;
                hsRGBAColor32* const p = src;
                for(i =0; i < numPix; i++)
                    pixels[i]= ((p[i].a & 0xff) << 8) | (p[i].r & 0xff);

        case D3DFMT_A4R4G4B4:
                UInt16 *pixels = (UInt16 *)dst;
                int i;
                hsRGBAColor32* const p = src;

                for(i =0; i < numPix; i++)
                    pixels[i]= (((p[i].r>>4) & 0xf) << 8) 
                                | (((p[i].g >> 4) & 0xf) << 4) 
                                | (((p[i].b >> 4) & 0xf) )
                                | (((p[i].a >> 4) & 0xf) << 12);

        case D3DFMT_A1R5G5B5:
                UInt16 *pixels = (UInt16 *)dst;
                int i;
                hsRGBAColor32* const p = src;

                for(i =0; i < numPix; i++)
                    pixels[i]= (((p[i].r>>3) & 0x1f) << 10) | 
                                (((p[i].g >> 3) & 0x1f) << 5) |
                                ((p[i].b >> 3) & 0x1f) | ((p[i].a == 0) ? 0 : 0x8000);

        case D3DFMT_L8:
                UInt8 *pixels = (UInt8 *)dst;
                int i;
                hsRGBAColor32* const p = src;

                for(i =0; i < numPix; i++)
                    pixels[i]= p[i].r;

        case D3DFMT_A8R8G8B8:
                UInt32 *pixels = (UInt32 *)dst;
                int i;
                hsRGBAColor32* const p = src;

                for(i =0; i < numPix; i++)
                    pixels[i]= ( ( p[i].a << 24 ) | ( p[i].r << 16 ) | ( p[i].g << 8 ) | p[i].b );

            hsAssert(false, "Unknown texture format selected");

//// View Stuff ///////////////////////////////////////////////////////////////

//// TestVisibleWorld /////////////////////////////////////////////////////////
// Check if the world space bounds are visible within the current view frustum.
hsBool plDXPipeline::TestVisibleWorld( const hsBounds3Ext& wBnd )
    if( fView.fCullTreeDirty )
    if (wBnd.GetType() == kBoundsNormal)
        return fView.fCullTree.BoundsVisible(wBnd);
        return false;

hsBool plDXPipeline::TestVisibleWorld( const plSceneObject* sObj )
    const plDrawInterface* di = sObj->GetDrawInterface();
    if( !di )
        return false;

    const int numDraw = di->GetNumDrawables();
    int i;
    for( i = 0; i < numDraw; i++ )
        plDrawableSpans* dr = plDrawableSpans::ConvertNoRef(di->GetDrawable(i));
        if( !dr )

        plDISpanIndex& diIndex = dr->GetDISpans(di->GetDrawableMeshIndex(i));
        if( diIndex.IsMatrixOnly() )

        const int numSpan = diIndex.GetCount();
        int j;
        for( j = 0; j < numSpan; j++ )
            const plSpan* span = dr->GetSpan(diIndex[j]);

            if( span->fProps & plSpan::kPropNoDraw )

            if( !span->GetVisSet().Overlap(plGlobalVisMgr::Instance()->GetVisSet())
                || span->GetVisSet().Overlap(plGlobalVisMgr::Instance()->GetVisNot()) )


            if( !TestVisibleWorld(span->fWorldBounds) )

            return true;
    return false;

//// GetViewAxesWorld /////////////////////////////////////////////////////////
// Get the current view direction, up and direction X up.
void    plDXPipeline::GetViewAxesWorld(hsVector3 axes[3] /* ac,up,at */ ) const
    axes[ 0 ] = GetViewAcrossWorld();
    axes[ 1 ] = GetViewUpWorld();
    axes[ 2 ] = GetViewDirWorld();

//// GetFOV ///////////////////////////////////////////////////////////////////
// Get the current FOV in degrees.
void    plDXPipeline::GetFOV(hsScalar& fovX, hsScalar& fovY) const
    fovX = GetViewTransform().GetFovXDeg();
    fovY = GetViewTransform().GetFovYDeg();

//// SetFOV ///////////////////////////////////////////////////////////////////
// Set the current FOV in degrees. Forces perspective rendering to be true.
void    plDXPipeline::SetFOV( hsScalar fovX, hsScalar fovY )
    IGetViewTransform().SetFovDeg(fovX, fovY);

// Get the orthogonal projection view size in world units (e.g. feet).
void    plDXPipeline::GetSize( hsScalar& width, hsScalar& height ) const
    width = GetViewTransform().GetScreenWidth();
    height = GetViewTransform().GetScreenHeight();

// Set the orthogonal projection view size in world units (e.g. feet).
// Forces projection to orthogonal if it wasn't.
void    plDXPipeline::SetSize( hsScalar width, hsScalar height )

//// GetDepth /////////////////////////////////////////////////////////////////
// Get the current hither and yon.
void plDXPipeline::GetDepth(hsScalar& hither, hsScalar& yon) const
    GetViewTransform().GetDepth(hither, yon);

//// SetDepth /////////////////////////////////////////////////////////////////
// Set the current hither and yon.
void plDXPipeline::SetDepth(hsScalar hither, hsScalar yon)
    IGetViewTransform().SetDepth(hither, yon);

//// ISavageYonHack ///////////////////////////////////////////////////////////
//  Corrects the yon for the *#(&$*#&$(*& Savage4 chipset (ex. Diamond Stealth
//  III S540). Let's just say this card SUCKS.
// Obsolete since we don't support the Savage4 chipset any more.
void    plDXPipeline::ISavageYonHack()
    hsScalar yon = GetViewTransform().GetYon();

    if( ( yon > 128.f - 5.0f ) && ( yon < 128.f + 1.01f ) )
        yon = 128.f + 1.01f;
    else if( ( yon > 256.f - 10.0f ) && ( yon < 256.f + 1.02f ) )
        yon = 256.f + 1.02f;
    else if( ( yon > 512.f - 35.0f ) && ( yon < 512.f + 1.02f ) )
        yon = 512.f + 1.02f;
    else if( ( yon > 1024.f - 120.0f ) && ( yon < 1024.f + 1.f ) )
        yon = 1024.f + 1.f;

//// GetWorldToCamera /////////////////////////////////////////////////////////
// Return current world to camera transform.
const hsMatrix44& plDXPipeline::GetWorldToCamera() const
    return fView.GetWorldToCamera();

//// GetCameraToWorld /////////////////////////////////////////////////////////
// Return current camera to world transform.
const hsMatrix44& plDXPipeline::GetCameraToWorld() const
    return fView.GetCameraToWorld();

// IUpdateViewFlags /////////////////////////////////////////////////////////
// Dirty anything cached dependent on the current camera matrix.
void plDXPipeline::IUpdateViewFlags()
    fView.fCullTreeDirty = true;

    fView.fWorldToCamLeftHanded = fView.GetWorldToCamera().GetParity();
//// SetWorldToCamera /////////////////////////////////////////////////////////
// Immediate set of camera transform.
void plDXPipeline::SetWorldToCamera(const hsMatrix44& w2c, const hsMatrix44& c2w)
    IGetViewTransform().SetCameraTransform(w2c, c2w);



// IWorldToCameraToD3D ///////////////////////////////////////////////////////
// Pass the current camera transform through to D3D.
void plDXPipeline::IWorldToCameraToD3D()
    D3DXMATRIX  mat;

    IMatrix44ToD3DMatrix( mat, fView.GetWorldToCamera() );
    fD3DDevice->SetTransform( D3DTS_VIEW, &mat );

    fView.fXformResetFlags &= ~fView.kResetCamera;


// SetViewTransform ///////////////////////////////////////////////////////////
// ViewTransform encapsulates everything about the current camera, viewport and
// window necessary to render or convert from world space to pixel space. Doesn't
// include the object dependent local to world transform.
// Set plViewTransform.h
void plDXPipeline::SetViewTransform(const plViewTransform& v)
    fView.fTransform = v;

    if( !v.GetScreenWidth() || !v.GetScreenHeight() )
        fView.fTransform.SetScreenSize((UInt16)(fSettings.fOrigWidth), (UInt16)(fSettings.fOrigHeight));



//// GetWorldToLocal //////////////////////////////////////////////////////////
// Return current World to Local transform. Note that this is only meaningful while an
// object is being rendered, so this function is pretty worthless.
const hsMatrix44& plDXPipeline::GetWorldToLocal() const
    return fView.fWorldToLocal;

//// GetLocalToWorld //////////////////////////////////////////////////////////
// Return current Local to World transform. Note that this is only meaningful while an
// object is being rendered, so this function is pretty worthless.

const hsMatrix44& plDXPipeline::GetLocalToWorld() const
    return fView.fLocalToWorld;

//// ISetLocalToWorld /////////////////////////////////////////////////////////
// Record and pass on to D3D the current local to world transform for the object
// about to be rendered.
void    plDXPipeline::ISetLocalToWorld( const hsMatrix44& l2w, const hsMatrix44& w2l )
    fView.fLocalToWorld = l2w;
    fView.fWorldToLocal = w2l;

    fView.fViewVectorsDirty = true;

    // We keep track of parity for winding order culling.
    fView.fLocalToWorldLeftHanded = fView.fLocalToWorld.GetParity();


// ILocalToWorldToD3D ///////////////////////////////////////////////////////////
// pass the current local to world tranform on to D3D.
void plDXPipeline::ILocalToWorldToD3D()
    D3DXMATRIX  mat;

    if( fView.fLocalToWorld.fFlags & hsMatrix44::kIsIdent )
        fD3DDevice->SetTransform( D3DTS_WORLD, &d3dIdentityMatrix );
        IMatrix44ToD3DMatrix( mat, fView.fLocalToWorld );
        fD3DDevice->SetTransform( D3DTS_WORLD, &mat );

    fView.fXformResetFlags &= ~fView.kResetL2W;

//// IIsViewLeftHanded ////////////////////////////////////////////////////////
//  Returns true if the combination of the local2world and world2camera
//  matrices is left-handed.

hsBool  plDXPipeline::IIsViewLeftHanded()
    return fView.fTransform.GetOrthogonal() ^ ( fView.fLocalToWorldLeftHanded ^ fView.fWorldToCamLeftHanded ) ? true : false;

//// ScreenToWorldPoint ///////////////////////////////////////////////////////
// Given a screen space pixel position, and a world space distance from the camera, return a
// full world space position. I.e. cast a ray through a screen pixel dist feet, and where
// is it.
void    plDXPipeline::ScreenToWorldPoint( int n, UInt32 stride, Int32 *scrX, Int32 *scrY, hsScalar dist, UInt32 strideOut, hsPoint3 *worldOut )
    while( n-- )
        hsPoint3 scrP;
        scrP.Set(float(*scrX++), float(*scrY++), float(dist));
        *worldOut++ = GetViewTransform().ScreenToWorld(scrP);

// IRefreshCullTree ////////////////////////////////////////////////////////////////////
// The cull tree captures the view frustum and any occluders in the scene into a single
// BSP tree. See plCullTree.h. It must be recomputed any time the camera moves.
void plDXPipeline::IRefreshCullTree()
    if( fView.fCullTreeDirty )



        if (fCullProxy && !IsDebugFlagSet(plPipeDbg::kFlagOcclusionSnap))
            fCullProxy = nil;
            SetDrawableTypeMask(GetDrawableTypeMask() & ~plDrawable::kOccSnapProxy);
        hsBool doCullSnap = IsDebugFlagSet(plPipeDbg::kFlagOcclusionSnap)&& !fCullProxy && !fSettings.fViewStack.GetCount();
        if( doCullSnap )
        fView.fCullTreeDirty = false;

        if( fView.fCullMaxNodes )
            int i;
            for( i = 0; i < fCullPolys.GetCount(); i++ )
                if( fView.fCullTree.GetNumNodes() >= fView.fCullMaxNodes )
            plProfile_Set(OccPolyUsed, i);

            for( i = 0; i < fCullHoles.GetCount(); i++ )
            plProfile_Set(OccNodeUsed, fView.fCullTree.GetNumNodes());
        if( doCullSnap )


// IMakeOcclusionSnap /////////////////////////////////////////////////////////////////////
// Debugging visualization tool only. Takes a snapshot of the current occlusion
// BSP tree and renders it until told to stop.
void plDXPipeline::IMakeOcclusionSnap()
    hsTArray<hsPoint3>& pos = fView.fCullTree.GetCaptureVerts();
    hsTArray<hsVector3>& norm = fView.fCullTree.GetCaptureNorms();
    hsTArray<hsColorRGBA>& color = fView.fCullTree.GetCaptureColors();
    hsTArray<UInt16>& tris = fView.fCullTree.GetCaptureTris();

    if( tris.GetCount() )
        hsMatrix44 ident;

        hsGMaterial* mat = TRACKED_NEW hsGMaterial;
        hsgResMgr::ResMgr()->NewKey( "OcclusionSnapMat", mat, plLocation::kGlobalFixedLoc );
        plLayer *lay = mat->MakeBaseLayer();
        lay->SetPreshadeColor(hsColorRGBA().Set(1.f, 0.5f, 0.5f, 1.f));
        lay->SetRuntimeColor(hsColorRGBA().Set(1.f, 0.5f, 0.5f, 1.f));
        lay->SetBlendFlags(lay->GetBlendFlags() | hsGMatState::kBlendAlpha);

        fCullProxy = plDrawableGenerator::GenerateDrawable(pos.GetCount(), 

        if( fCullProxy )

            SetDrawableTypeMask(GetDrawableTypeMask() | plDrawable::kOccSnapProxy);

// SubmitOccluders /////////////////////////////////////////////////////////////
// Add the input polys into the list of polys from which to generate the cull tree.
hsBool plDXPipeline::SubmitOccluders(const hsTArray<const plCullPoly*>& polyList)
    int i;
    for( i = 0; i < polyList.GetCount(); i++ )
        if( polyList[i]->IsHole() )
    fView.fCullTreeDirty = true;

    return true;

//// RefreshScreenMatrices ////////////////////////////////////////////////////
// Force a refresh of cached state when the projection matrix changes.
void    plDXPipeline::RefreshScreenMatrices()
    fView.fCullTreeDirty = true;

//// RefreshMatrices //////////////////////////////////////////////////////////
//  Just a wrapper

void    plDXPipeline::RefreshMatrices()

//// Overrides ////////////////////////////////////////////////////////////////

//// PushOverrideMaterial /////////////////////////////////////////////////////
// Push a material to be used instead of the material associated with objects
// for rendering.
// Must be matched with a PopOverrideMaterial.
hsGMaterial *plDXPipeline::PushOverrideMaterial( hsGMaterial *mat )
    hsGMaterial *ret = GetOverrideMaterial();
    hsRefCnt_SafeRef( mat );
    fOverrideMat.Push( mat );
    fForceMatHandle = true;

    return ret;

//// PopOverrideMaterial //////////////////////////////////////////////////////
// Stop overriding with the current override material.
// Must match a preceding PushOverrideMaterial.
void plDXPipeline::PopOverrideMaterial( hsGMaterial *restore )
    hsGMaterial *pop = fOverrideMat.Pop();
    hsRefCnt_SafeUnRef( pop );

    if( fCurrMaterial == pop )
        fForceMatHandle = true;

//// GetOverrideMaterial //////////////////////////////////////////////////////
// Return the current override material, or nil if there isn't any.
hsGMaterial *plDXPipeline::GetOverrideMaterial() const
    return fOverrideMat.GetCount() ? fOverrideMat.Peek() : nil;

//// GetMaterialOverrideOn ////////////////////////////////////////////////////
// Return the current bits set to be always on for the given category (e.g. ZFlags).
UInt32  plDXPipeline::GetMaterialOverrideOn( hsGMatState::StateIdx category ) const
    return fMatOverOn.Value(category);

//// GetMaterialOverrideOff ///////////////////////////////////////////////////
// Return the current bits set to be always off for the given category (e.g. ZFlags).
UInt32  plDXPipeline::GetMaterialOverrideOff( hsGMatState::StateIdx category ) const
    return fMatOverOff.Value(category);

//// PushMaterialOverride /////////////////////////////////////////////////////
// Force material state bits on or off. If you use this, save the return value
// as input to PopMaterialOverride, to restore previous values.
hsGMatState plDXPipeline::PushMaterialOverride( const hsGMatState& state, hsBool on )
    hsGMatState ret = GetMaterialOverride( on );
    if( on )
        fMatOverOn |= state;
        fMatOverOff -= state;
        fMatOverOff |= state;
        fMatOverOn -= state;
    fForceMatHandle = true;
    return ret;

// PushMaterialOverride ///////////////////////////////////////////////////////
// Force material state bits on or off. If you use this, save the return value
// as input to PopMaterialOverride, to restore previous values.
// This version just sets for one category (e.g. Z flags).
hsGMatState plDXPipeline::PushMaterialOverride(hsGMatState::StateIdx cat, UInt32 which, hsBool on)
    hsGMatState ret = GetMaterialOverride( on );
    if( on )
        fMatOverOn[ cat ] |= which;
        fMatOverOff[ cat ] &= ~which;
        fMatOverOn[ cat ] &= ~which;
        fMatOverOff[ cat ] |= which;
    fForceMatHandle = true;
    return ret;

//// PopMaterialOverride //////////////////////////////////////////////////////
// Restore the previous settings returned from the matching PushMaterialOverride.
void plDXPipeline::PopMaterialOverride(const hsGMatState& restore, hsBool on)
    if( on )
        fMatOverOn = restore;
        fMatOverOff.Clear( restore );
        fMatOverOff = restore;
        fMatOverOn.Clear( restore );
    fForceMatHandle = true;

//// GetMaterialOverride //////////////////////////////////////////////////////
// Return the current material state bits force to on or off, depending on input <on>.
const hsGMatState& plDXPipeline::GetMaterialOverride(hsBool on) const
    return on ? fMatOverOn : fMatOverOff;

//// PushColorOverride //////////////////////////////////////////////////
// Obsolete and unused.
hsColorOverride plDXPipeline::PushColorOverride(const hsColorOverride& over)
    hsColorOverride ret = GetColorOverride();
    PopColorOverride( over );
    return ret;

// PopColorOverride ////////////////////////////////////////////////////////
// Obsolete and unused.
void plDXPipeline::PopColorOverride(const hsColorOverride& restore)
    hsColorOverride cpy = restore;
    if( !(cpy.fFlags & hsColorOverride::kModAlpha) )
        cpy.fColor.a = 1.f;
    if( !(cpy.fFlags & (hsColorOverride::kModAlpha | hsColorOverride::kModColor)) )
        fDev->SetColorOverride(cpy.fColor, !(cpy.fFlags & hsColorOverride::kModColor));

//// GetColorOverride /////////////////////////////////////////////////////////
// Obsolete and unused.
const hsColorOverride& plDXPipeline::GetColorOverride() const
    static hsColorOverride ret;
    return ret;

/*  ret.fFlags = hsColorOverride::kNone;
    if( fDev->GetDebugFlags() & hsG3DDevice::kDeviceColor )
        ret.fFlags |= hsColorOverride::kModColor;
    if( fDev->GetDebugFlags() & hsG3DDevice::kDeviceAlpha )
        ret.fFlags |= hsColorOverride::kModAlpha;

    ret.fColor = fDev->GetColorOverride();
    return ret;

//// Transforms ///////////////////////////////////////////////////////////////

//// IMatrix44ToD3DMatrix /////////////////////////////////////////////////////
// Make a D3DXMATRIX matching the input plasma matrix. Mostly a transpose.
D3DXMATRIX&     plDXPipeline::IMatrix44ToD3DMatrix( D3DXMATRIX& dst, const hsMatrix44& src )
    if( src.fFlags & hsMatrix44::kIsIdent )
        dst = d3dIdentityMatrix;
        dst(0,0) = src.fMap[0][0];
        dst(1,0) = src.fMap[0][1];
        dst(2,0) = src.fMap[0][2];
        dst(3,0) = src.fMap[0][3];

        dst(0,1) = src.fMap[1][0];
        dst(1,1) = src.fMap[1][1];
        dst(2,1) = src.fMap[1][2];
        dst(3,1) = src.fMap[1][3];

        dst(0,2) = src.fMap[2][0];
        dst(1,2) = src.fMap[2][1];
        dst(2,2) = src.fMap[2][2];
        dst(3,2) = src.fMap[2][3];

        dst(0,3) = src.fMap[3][0];
        dst(1,3) = src.fMap[3][1];
        dst(2,3) = src.fMap[3][2];
        dst(3,3) = src.fMap[3][3];

    return dst;

// IGetCameraToNDC /////////////////////////////////////////////
// Get the camera to NDC transform. This may be adjusted to create
// a Z bias towards the camera for cases where the D3D Z bias fails us.
hsMatrix44 plDXPipeline::IGetCameraToNDC()
    hsMatrix44 cam2ndc = GetViewTransform().GetCameraToNDC();
    if( fView.IsPerspective() )
        // Want to scale down W and offset in Z without
        // changing values of x/w, y/w. This is just
        // minimal math for
        // Mproj' * p = Mscaletrans * Mproj * p
        // where Mscaletrans = 
        // [ s 0 0 0 ]
        // [ 0 s 0 0 ]
        // [ 0 0 s 0 ]
        // [ 0 0 t s ]
        // Resulting matrix Mproj' is not exactly "Fog Friendly",
        // but is close enough.
        // Resulting point is [sx, sy, sz + tw, sw] and after divide
        // is [x/w, y/w, z/w + t/s, 1/sw]

        if( fSettings.fD3DCaps & kCapsWBuffer )
            // W-buffering is only true w-buffering on 3dfx cards. On everything else, 
            // they REALLY base it off the Z value. So we want to scale (but NOT translate)
            // the Z...
            // Note: the base value for perspLayerScale should be 0.001 for w-buffering,
            // not the normal 0.00001
            float scale = 1.f - float(fCurrRenderLayer) * fTweaks.fPerspLayerScale;

            cam2ndc.fMap[0][0] *= scale;
            cam2ndc.fMap[1][1] *= scale;
            cam2ndc.fMap[2][2] *= scale;
            cam2ndc.fMap[3][2] *= scale;
            // Z-buffering, so do it the traditional way
            float scale = 1.f - float(fCurrRenderLayer) * fTweaks.fPerspLayerScale;
//              scale = -1.f;
            float zTrans = -scale * float(fCurrRenderLayer) * fTweaks.fPerspLayerTrans;

            cam2ndc.fMap[0][0] *= scale;
            cam2ndc.fMap[1][1] *= scale;

            cam2ndc.fMap[2][2] *= scale;
            cam2ndc.fMap[2][2] += zTrans * cam2ndc.fMap[3][2];
            cam2ndc.fMap[3][2] *= scale;
        plConst(float) kZTrans = -1.e-4f;
        cam2ndc.fMap[2][3] += kZTrans * fCurrRenderLayer;

    return cam2ndc;

// IProjectionMatrixToD3D //////////////////////////////////////////////////////////
// Send the current camera to NDC transform to D3D.
void plDXPipeline::IProjectionMatrixToD3D()
    D3DXMATRIX matProjection;

    IMatrix44ToD3DMatrix( matProjection, IGetCameraToNDC() );

    fD3DDevice->SetTransform( D3DTS_PROJECTION, &matProjection );
    fView.fXformResetFlags &= ~fView.kResetProjection;

//// ISetCullMode /////////////////////////////////////////////////////////////
//  Tests and sets the current winding order cull mode (CW, CCW, or none).
// Will reverse the cull mode as necessary for left handed camera or local to world
// transforms.
void    plDXPipeline::ISetCullMode(hsBool flip)
    D3DCULL newCull = D3DCULL_NONE;

    if( !(fLayerState[0].fMiscFlags & hsGMatState::kMiscTwoSided) )
        newCull = !IIsViewLeftHanded() ^ !flip ? D3DCULL_CW : D3DCULL_CCW;

    if( newCull != fCurrCullMode )
        fCurrCullMode = newCull;
        fD3DDevice->SetRenderState( D3DRS_CULLMODE, fCurrCullMode );

//// ITransformsToD3D //////////////////////////////////////////////////////////
//  Refreshes all transforms. Useful after popping renderTargets :)

void plDXPipeline::ITransformsToD3D()
    hsBool resetCullMode = fView.fXformResetFlags & (fView.kResetCamera | fView.kResetL2W);

    if( fView.fXformResetFlags & fView.kResetCamera )

    if( fView.fXformResetFlags & fView.kResetL2W )

    if( fView.fXformResetFlags & fView.kResetProjection )

// ISetupVertexBufferRef /////////////////////////////////////////////////////////
// Initialize input vertex buffer ref according to source.
void plDXPipeline::ISetupVertexBufferRef(plGBufferGroup* owner, UInt32 idx, plDXVertexBufferRef* vRef)
    // Initialize to nil, in case something goes wrong.
    vRef->fD3DBuffer = nil;

    UInt8 format = owner->GetVertexFormat();

    // All indexed skinning is currently done on CPU, so the source data
    // will have indices, but we strip them out for the D3D buffer.
    if( format & plGBufferGroup::kSkinIndices )
        format &= ~(plGBufferGroup::kSkinWeightMask | plGBufferGroup::kSkinIndices);
        format |= plGBufferGroup::kSkinNoWeights;       // Should do nothing, but just in case...

    UInt32 vertSize = IGetBufferFormatSize(format); // vertex stride
    UInt32 numVerts = owner->GetVertBufferCount(idx);

    vRef->fDevice = fD3DDevice;

    vRef->fOwner = owner;
    vRef->fCount = numVerts;
    vRef->fVertexSize = vertSize;
    vRef->fFormat = format;
    vRef->fRefTime = 0;

    vRef->fData = nil;

    vRef->SetVolatile(vRef->Volatile() || owner->AreVertsVolatile());

    vRef->fIndex = idx;

    owner->SetVertexBufferRef(idx, vRef);

// ICheckStaticVertexBuffer ///////////////////////////////////////////////////////////////////////
// Ensure a static vertex buffer has any D3D resources necessary for rendering created and filled
// with proper vertex data.
void plDXPipeline::ICheckStaticVertexBuffer(plDXVertexBufferRef* vRef, plGBufferGroup* owner, UInt32 idx)
    hsAssert(!vRef->Volatile(), "Creating a managed vertex buffer for a volatile buffer ref");

    if( !vRef->fD3DBuffer )
        // Okay, haven't done this one.

        DWORD fvfFormat = IGetBufferD3DFormat(vRef->fFormat);

        D3DPOOL poolType = D3DPOOL_MANAGED;
        DWORD usage = 0;
        const int numVerts = vRef->fCount;
        const int vertSize = vRef->fVertexSize;
        fManagedAlloced = true;
        if( FAILED( fD3DDevice->CreateVertexBuffer( numVerts * vertSize,
                                                    &vRef->fD3DBuffer, NULL) ) )
            hsAssert( false, "CreateVertexBuffer() call failed!" );
            vRef->fD3DBuffer = nil;
        PROFILE_POOL_MEM(poolType, numVerts * vertSize, true, "VtxBuff");

        // Record that we've allocated this into managed memory, in case we're
        // fighting that NVidia driver bug. Search for OSVERSION for mor info.
        AllocManagedVertex(numVerts * vertSize);

        // Fill in the vertex data.
        IFillStaticVertexBufferRef(vRef, owner, idx);

        // This is currently a no op, but this would let the buffer know it can
        // unload the system memory copy, since we have a managed version now.

// IFillStaticVertexBufferRef //////////////////////////////////////////////////
// BufferRef is set up, just copy the data in.
// This is uglied up hugely by the insane non-interleaved data case with cells
// and whatever else.
void plDXPipeline::IFillStaticVertexBufferRef(plDXVertexBufferRef *ref, plGBufferGroup *group, UInt32 idx)
    IDirect3DVertexBuffer9* vertexBuff = ref->fD3DBuffer;

    if( !vertexBuff )
        // We most likely already warned about this earlier, best to just quietly return now

    const UInt32 vertSize = ref->fVertexSize;
    const UInt32 vertStart = group->GetVertBufferStart(idx) * vertSize;
    const UInt32 size = group->GetVertBufferEnd(idx) * vertSize - vertStart;
    if( !size )

    /// Lock the buffer
    UInt8* ptr;
    if( FAILED( vertexBuff->Lock( vertStart, size, (void **)&ptr, group->AreVertsVolatile() ? D3DLOCK_DISCARD : 0 ) ) )
        hsAssert( false, "Failed to lock vertex buffer for writing" );

    if( ref->fData )
        memcpy(ptr, ref->fData + vertStart, size);
        hsAssert(0 == vertStart, "Offsets on non-interleaved data not supported");
        hsAssert(group->GetVertBufferCount(idx) * vertSize == size, "Trailing dead space on non-interleaved data not supported");

        const UInt32 vertSmallSize = group->GetVertexLiteStride() - sizeof( hsPoint3 ) * 2;
        UInt8* srcVPtr = group->GetVertBufferData(idx);
        plGBufferColor* const srcCPtr = group->GetColorBufferData( idx );

        const int numCells = group->GetNumCells(idx);
        int i;
        for( i = 0; i < numCells; i++ )
            plGBufferCell   *cell = group->GetCell( idx, i );

            if( cell->fColorStart == (UInt32)-1 )
                /// Interleaved, do straight copy
                memcpy( ptr, srcVPtr + cell->fVtxStart, cell->fLength * vertSize );
                ptr += cell->fLength * vertSize;
                /// Separated, gotta interleave
                UInt8* tempVPtr = srcVPtr + cell->fVtxStart;
                plGBufferColor* tempCPtr = srcCPtr + cell->fColorStart;
                int j;
                for( j = 0; j < cell->fLength; j++ )
                    memcpy( ptr, tempVPtr, sizeof( hsPoint3 ) * 2 );
                    ptr += sizeof( hsPoint3 ) * 2;
                    tempVPtr += sizeof( hsPoint3 ) * 2;

                    memcpy( ptr, &tempCPtr->fDiffuse, sizeof( UInt32 ) );
                    ptr += sizeof( UInt32 );
                    memcpy( ptr, &tempCPtr->fSpecular, sizeof( UInt32 ) );
                    ptr += sizeof( UInt32 );

                    memcpy( ptr, tempVPtr, vertSmallSize );
                    ptr += vertSmallSize;
                    tempVPtr += vertSmallSize;

    /// Unlock and clean up

// OpenAccess ////////////////////////////////////////////////////////////////////////////////////////
// Lock the managed buffer and setup the accessSpan to point into the buffers data.
hsBool plDXPipeline::OpenAccess(plAccessSpan& dst, plDrawableSpans* drawable, const plVertexSpan* span, hsBool readOnly)
    plGBufferGroup* grp = drawable->GetBufferGroup(span->fGroupIdx);
    hsAssert(!grp->AreVertsVolatile(), "Don't ask for D3DBuffer data on a volatile buffer");

    plDXVertexBufferRef* vRef = (plDXVertexBufferRef*)grp->GetVertexBufferRef(span->fVBufferIdx);
    if( !vRef )
        return false;

    IDirect3DVertexBuffer9* vertexBuff = vRef->fD3DBuffer;
    if( !vertexBuff )
        return false;

    const UInt32 stride = vRef->fVertexSize;
    const UInt32 vertStart = span->fVStartIdx * stride;
    const UInt32 size = span->fVLength * stride;

    if( !size )
        return false;

    DWORD lockFlags = readOnly ? D3DLOCK_READONLY : 0;

    UInt8* ptr;
    if( FAILED( vertexBuff->Lock(vertStart, size, (void **)&ptr, lockFlags) ) )
        hsAssert( false, "Failed to lock vertex buffer for writing" );
        return false;

    plAccessVtxSpan& acc = dst.AccessVtx();


    Int32 offset = (-(Int32)(span->fVStartIdx)) * ((Int32)stride);

    acc.PositionStream(ptr, (UInt16)stride, offset);
    ptr += sizeof(hsPoint3);

    int numWgts = grp->GetNumWeights();
    if( numWgts )
        acc.WeightStream(ptr, (UInt16)stride, offset);
        ptr += numWgts * sizeof(hsScalar);
        if( grp->GetVertexFormat() & plGBufferGroup::kSkinIndices )
            acc.WgtIndexStream(ptr, (UInt16)stride, offset);
            ptr += sizeof(UInt32);
            acc.WgtIndexStream(nil, 0, offset);

    acc.NormalStream(ptr, (UInt16)stride, offset);
    ptr += sizeof(hsVector3);

    acc.DiffuseStream(ptr, (UInt16)stride, offset);
    ptr += sizeof(UInt32);

    acc.SpecularStream(ptr, (UInt16)stride, offset);
    ptr += sizeof(UInt32);

    acc.UVWStream(ptr, (UInt16)stride, offset);



    return true;

// CloseAccess /////////////////////////////////////////////////////////////////////
// Unlock the buffer, invalidating the accessSpan.
hsBool plDXPipeline::CloseAccess(plAccessSpan& dst)
    if( !dst.HasAccessVtx() )
        return false;

    plAccessVtxSpan& acc = dst.AccessVtx();

    plDXVertexBufferRef* vRef = (plDXVertexBufferRef*)acc.GetVtxDeviceRef();
    if( !vRef )
        return false;

    IDirect3DVertexBuffer9* vertexBuff = vRef->fD3DBuffer;
    if( !vertexBuff )
        return false;


    return true;

// CheckVertexBufferRef /////////////////////////////////////////////////////
// Make sure the buffer group has a valid buffer ref and that it is up to date.
void plDXPipeline::CheckVertexBufferRef(plGBufferGroup* owner, UInt32 idx)
    // First, do we have a device ref at this index?
    plDXVertexBufferRef* vRef = (plDXVertexBufferRef*)owner->GetVertexBufferRef(idx);
    // If not
    if( !vRef )
        // Make the blank ref
        vRef = TRACKED_NEW plDXVertexBufferRef;

        ISetupVertexBufferRef(owner, idx, vRef);

    if( !vRef->IsLinked() )
        vRef->Link( &fVtxBuffRefList );

    // One way or another, we now have a vbufferref[idx] in owner.
    // Now, does it need to be (re)filled?
    // If the owner is volatile, then we hold off. It might not
    // be visible, and we might need to refill it again if we
    // have an overrun of our dynamic D3D buffer.
    if( !vRef->Volatile() )
        if( fAllocUnManaged )

        // If it's a static buffer, allocate a D3D vertex buffer for it. Otherwise, it'll
        // be sharing the global D3D dynamic buffer, and marked as volatile.
        ICheckStaticVertexBuffer(vRef, owner, idx);

        // Might want to remove this assert, and replace it with a dirty check if
        // we have static buffers that change very seldom rather than never.
        hsAssert(!vRef->IsDirty(), "Non-volatile vertex buffers should never get dirty");
        // Make sure we're going to be ready to fill it.

        if( !vRef->fData && (vRef->fFormat != owner->GetVertexFormat()) )
            vRef->fData = TRACKED_NEW UInt8[vRef->fCount * vRef->fVertexSize];

// CheckIndexBufferRef /////////////////////////////////////////////////////
// Make sure the buffer group has an index buffer ref and that its data is current.
void plDXPipeline::CheckIndexBufferRef(plGBufferGroup* owner, UInt32 idx)
    plDXIndexBufferRef* iRef = (plDXIndexBufferRef*)owner->GetIndexBufferRef(idx);
    if( !iRef )
        // Create one from scratch.

        iRef = TRACKED_NEW plDXIndexBufferRef;

        ISetupIndexBufferRef(owner, idx, iRef);

    if( !iRef->IsLinked() )

    // Make sure it has all D3D resources created.

    // If it's dirty, refill it.
    if( iRef->IsDirty()  )
        IFillIndexBufferRef(iRef, owner, idx);

// IFillIndexBufferRef ////////////////////////////////////////////////////////////
// Refresh the D3D index buffer from the plasma index buffer.
void plDXPipeline::IFillIndexBufferRef(plDXIndexBufferRef* iRef, plGBufferGroup* owner, UInt32 idx)
    UInt32 startIdx = owner->GetIndexBufferStart(idx);
    UInt32 size = (owner->GetIndexBufferEnd(idx) - startIdx) * sizeof(UInt16);
    if( !size )

    DWORD lockFlags = iRef->Volatile() ? D3DLOCK_DISCARD : 0;
    UInt16* destPtr = nil;
    if( FAILED( iRef->fD3DBuffer->Lock(startIdx * sizeof(UInt16), size, (void **)&destPtr, lockFlags) ) )
        hsAssert( false, "Cannot lock index buffer for writing" );

    memcpy( destPtr, owner->GetIndexBufferData(idx) + startIdx, size );

    iRef->SetDirty( false );


// ICheckIndexBuffer ////////////////////////////////////////////////////////
// Make sure index buffer ref has any D3D resources it needs.
void plDXPipeline::ICheckIndexBuffer(plDXIndexBufferRef* iRef)
    if( !iRef->fD3DBuffer && iRef->fCount )
        D3DPOOL poolType = fAllocUnManaged ? D3DPOOL_DEFAULT : D3DPOOL_MANAGED;
        if( FAILED( fD3DDevice->CreateIndexBuffer( sizeof( UInt16 ) * iRef->fCount,
                                                    &iRef->fD3DBuffer, NULL) ) )
            hsAssert( false, "CreateIndexBuffer() call failed!" );
            iRef->fD3DBuffer = nil;
        PROFILE_POOL_MEM(poolType, sizeof(UInt16) * iRef->fCount, true, "IndexBuff");

        iRef->fPoolType = poolType;

// ISetupIndexBufferRef ////////////////////////////////////////////////////////////////
// Initialize the index buffer ref, but don't create anything for it.
void plDXPipeline::ISetupIndexBufferRef(plGBufferGroup* owner, UInt32 idx, plDXIndexBufferRef* iRef)
    UInt32 numIndices = owner->GetIndexBufferCount(idx);
    iRef->fCount = numIndices;
    iRef->fOwner = owner;
    iRef->fIndex = idx;
    iRef->fRefTime = 0;


    owner->SetIndexBufferRef(idx, iRef);


//// ISoftwareVertexBlend ///////////////////////////////////////////////////////
// Emulate matrix palette operations in software. The big difference between the hardware
// and software versions is we only want to lock the vertex buffer once and blend all the
// verts we're going to in software, so the vertex blend happens once for an entire drawable.
// In hardware, we want the opposite, to break it into managable chunks, manageable meaning
// few enough matrices to fit into hardware registers. So for hardware version, we set up
// our palette, draw a span or few, setup our matrix palette with new matrices, draw, repeat.
hsBool      plDXPipeline::ISoftwareVertexBlend( plDrawableSpans* drawable, const hsTArray<Int16>& visList )
    if (IsDebugFlagSet(plPipeDbg::kFlagNoSkinning))
        return true;

    if( drawable->GetSkinTime() == fRenderCnt )
        return true;

    const hsBitVector   &blendBits = drawable->GetBlendingSpanVector();

    if( drawable->GetBlendingSpanVector().Empty() )
        // This sucker doesn't have any skinning spans anyway. Just return
        drawable->SetSkinTime( fRenderCnt );
        return true;


    // lock the data buffer

    // First, figure out which buffers we need to blend.
    const int kMaxBufferGroups = 20;
    const int kMaxVertexBuffers = 20;
    static char blendBuffers[kMaxBufferGroups][kMaxVertexBuffers];
    memset(blendBuffers, 0, kMaxBufferGroups * kMaxVertexBuffers * sizeof(**blendBuffers));

    hsAssert(kMaxBufferGroups >= drawable->GetNumBufferGroups(), "Bigger than we counted on num groups skin.");

    const hsTArray<plSpan *>& spans = drawable->GetSpanArray();
    int i;
    for( i = 0; i < visList.GetCount(); i++ )
        if( blendBits.IsBitSet( visList[ i ] ) )
            const plVertexSpan &vSpan = *(plVertexSpan *)spans[visList[i]];
            hsAssert(kMaxVertexBuffers > vSpan.fVBufferIdx, "Bigger than we counted on num buffers skin.");

            blendBuffers[vSpan.fGroupIdx][vSpan.fVBufferIdx] = 1;
            drawable->SetBlendingSpanVectorBit( visList[ i ], false );

    // Now go through each of the group/buffer (= a real vertex buffer) pairs we found,
    // and blend into it. We'll lock the buffer once, and then for each span that
    // uses it, set the matrix palette and and then do the blend for that span.
    // When we've done all the spans for a group/buffer, we unlock it and move on.
    int j;
    for( i = 0; i < kMaxBufferGroups; i++ )
        for( j = 0; j < kMaxVertexBuffers; j++ )
            if( blendBuffers[i][j] )
                // Found one. Do the lock.
                plDXVertexBufferRef* vRef = (plDXVertexBufferRef*)drawable->GetVertexRef(i, j);

                hsAssert(vRef->fData, "Going into skinning with no place to put results!");

                UInt8*  destPtr = vRef->fData;

                int k;
                for( k = 0; k < visList.GetCount(); k++ )
                    const plIcicle& span = *(plIcicle*)spans[visList[k]];
                    if( (span.fGroupIdx == i)&&(span.fVBufferIdx == j) )

                        hsMatrix44* matrixPalette = drawable->GetMatrixPalette(span.fBaseMatrix);
                        matrixPalette[0] = span.fLocalToWorld;

                        UInt8* ptr = vRef->fOwner->GetVertBufferData(vRef->fIndex);
                        ptr += span.fVStartIdx * vRef->fOwner->GetVertexSize();
                        IBlendVertsIntoBuffer( (plSpan*)&span,
                                                matrixPalette, span.fNumMatrices,
                                                destPtr + span.fVStartIdx * vRef->fVertexSize, 
                                                span.fLocalUVWChans );
                // Unlock and move on.


    if( drawable->GetBlendingSpanVector().Empty() )
        // Only do this if we've blended ALL of the spans. Thus, this becomes a trivial 
        // rejection for all the skinning flags being cleared

    return true;

// IBeginAllocUnmanaged ///////////////////////////////////////////////////////////////////
// Before allocating anything into POOL_DEFAULT, we must evict managed memory.
// See LoadResources.
void plDXPipeline::IBeginAllocUnManaged()
    // Flush out all managed resources to make room for unmanaged resources.
    fEvictTime = fTextUseTime;
    fManagedSeen = 0;

    fManagedAlloced = false;
    fAllocUnManaged = true; // we're currently only allocating POOL_DEFAULT

// IEndAllocUnManged.
// Before allocating anything into POOL_DEFAULT, we must evict managed memory.
// See LoadResources.
void plDXPipeline::IEndAllocUnManaged()
    fAllocUnManaged = false;

    // Flush the (should be empty) resource manager to reset its internal allocation pool.
    fEvictTime = fTextUseTime;
    fManagedSeen = 0;

// ICheckTextureUsage ////////////////////////////////////////////////////////////////////
// Obsolete, unused.
// Deletes textures LRU to try to get around NVidia memory manager bug. Found a 
// more robust/efficent way. Besides, it didn't help. See OSVERSION.
void plDXPipeline::ICheckTextureUsage()
    plProfile_IncCount(fTexUsed, fTexUsed);
    plProfile_IncCount(fTexManaged, fTexManaged);

    plConst(UInt32) kMinTexManaged(5000000);
    if( fTexManaged < kMinTexManaged )

    plConst(UInt32) kScale(2);
    if( fTexUsed * kScale < fTexManaged )
        // Find the stalest
        UInt32 stalest = fTextUseTime;
        plDXTextureRef* ref = fTextureRefList;
        while( ref )
            // I don't know if render targets even get put in this list.
            if( !(ref->GetFlags() & plDXTextureRef::kRenderTarget) && (ref->fUseTime < stalest) )
                stalest = ref->fUseTime;
            ref = ref->GetNext();
        stalest = fTextUseTime - stalest;

        // If the stalest is fresh, live with thrashing
        plConst(UInt32) kMinAge(60);
        if( stalest < kMinAge )

        // Kill the stalest, and everything more than half as stale
        stalest /= 2;
        if( stalest < kMinAge )
            stalest = kMinAge;

        stalest = fTextUseTime - stalest;

        // Go through again slaughtering left and right
        ref = fTextureRefList;
        while( ref )
            if( !(ref->GetFlags() & plDXTextureRef::kRenderTarget) && (ref->fUseTime < stalest) )
                plDXTextureRef* nuke = ref;
                ref = ref->GetNext();
                ref = ref->GetNext();

// ICheckVtxUsage ////////////////////////////////////////////////////////////////////
// Obsolete, unused.
// Deletes textures LRU to try to get around NVidia memory manager bug. Found a 
// more robust/efficent way. Besides, it didn't help. See OSVERSION.
void plDXPipeline::ICheckVtxUsage()
    plProfile_IncCount(fVtxUsed, fVtxUsed);
    plProfile_IncCount(fVtxManaged, fVtxManaged);

    plConst(UInt32) kMinVtxManaged(5000000);
    if( fVtxManaged < kMinVtxManaged )

    plConst(UInt32) kScale(2);
    if( fVtxUsed * kScale < fVtxManaged )
        // Find the stalest
        UInt32 stalest = fTextUseTime;
        plDXVertexBufferRef* ref = fVtxBuffRefList;
        while( ref )
            if( !ref->Volatile() && (ref->fUseTime < stalest) )
                stalest = ref->fUseTime;
            ref = ref->GetNext();
        stalest = fTextUseTime - stalest;

        // If the stalest is fresh, live with thrashing
        plConst(UInt32) kMinAge(60);
        if( stalest < kMinAge )

        // Kill the stalest, and everything more than half as stale
        stalest /= 2;
        if( stalest < kMinAge )
            stalest = kMinAge;

        stalest = fTextUseTime - stalest;

        // Go through again slaughtering left and right
        ref = fVtxBuffRefList;
        while( ref )
            if( !ref->Volatile() && (ref->fUseTime < stalest) )
                plDXVertexBufferRef* nuke = ref;
                ref = ref->GetNext();
                ref = ref->GetNext();

hsBool plDXPipeline::CheckResources()
    if ((fClothingOutfits.GetCount() <= 1 && fAvRTPool.GetCount() > 1) ||
        (fAvRTPool.GetCount() >= 16 && (fAvRTPool.GetCount() / 2 >= fClothingOutfits.GetCount())))
        return (hsTimer::GetSysSeconds() - fAvRTShrinkValidSince > kAvTexPoolShrinkThresh);

    fAvRTShrinkValidSince = hsTimer::GetSysSeconds();
    return (fAvRTPool.GetCount() < fClothingOutfits.GetCount());

// LoadResources ///////////////////////////////////////////////////////////////////////
// Basically, we have to load anything that goes into POOL_DEFAULT before
// anything into POOL_MANAGED, or the memory manager gets confused.
// More precisely, we have to evict everything from POOL_MANAGED before we
// can allocate anything into POOL_DEFAULT.
// So, this function frees up everything in POOL_DEFAULT, evicts managed memory,
// calls out for anything needing to be created POOL_DEFAULT to do so,
// Then we're free to load into POOL_MANAGED on demand.
// This is typically called at the beginning of the first render after loading
// a new age.
void plDXPipeline::LoadResources()
    hsStatusMessageF("Begin Device Reload t=%f",hsTimer::GetSeconds());
    plNetClientApp::StaticDebugMsg("Begin Device Reload");

    // Just to be safe.
    IInitDeviceState(); // 9700 THRASH

    // Evict mananged memory.

    // Release everything we have in POOL_DEFAULT.

    // Create all RenderTargets
    plPipeRTMakeMsg* rtMake = TRACKED_NEW plPipeRTMakeMsg(this);

    // Create all our shadow render targets and pipeline specific POOL_DEFAULT vertex buffers.
    // This includes our single dynamic vertex buffer that we cycle through for software
    // skinned, particle systems, etc.

    // Create all POOL_DEFAULT (sorted) index buffers in the scene.
    plPipeGeoMakeMsg* defMake = TRACKED_NEW plPipeGeoMakeMsg(this, true);

    // This can be a bit of a mem hog and will use more mem if available, so keep it last in the
    // POOL_DEFAULT allocs.

    // We should have everything POOL_DEFAULT we need now.

    // Force a create of all our static D3D vertex buffers.
    plPipeGeoMakeMsg* manMake = TRACKED_NEW plPipeGeoMakeMsg(this, false);

    // Forcing a preload of textures turned out to not be so great,
    // since there are typically so many in an age, it swamped out
    // VM.
#ifdef MF_TOSSER
#endif // MF_TOSSER
    plPipeTexMakeMsg* texMake = TRACKED_NEW plPipeTexMakeMsg(this);


    // Okay, we've done it, clear the request.

    plProfile_IncCount(PipeReload, 1);

    hsStatusMessageF("End Device Reload t=%f",hsTimer::GetSeconds());
    plNetClientApp::StaticDebugMsg("End Device Reload");

// Sorry about this, but it really did speed up the skinning.
// Just some macros for the inner loop of IBlendVertsIntoBuffer.
#define MATRIXMULTBEGIN(xfm, wgt) \
    register float m00 = xfm.fMap[0][0]; \
    register float m01 = xfm.fMap[0][1]; \
    register float m02 = xfm.fMap[0][2]; \
    register float m03 = xfm.fMap[0][3]; \
    register float m10 = xfm.fMap[1][0]; \
    register float m11 = xfm.fMap[1][1]; \
    register float m12 = xfm.fMap[1][2]; \
    register float m13 = xfm.fMap[1][3]; \
    register float m20 = xfm.fMap[2][0]; \
    register float m21 = xfm.fMap[2][1]; \
    register float m22 = xfm.fMap[2][2]; \
    register float m23 = xfm.fMap[2][3]; \
    register float m_wgt = wgt; \
    register float srcX, srcY, srcZ;

#define MATRIXMULTPOINTADD(dst, src) \
    srcX = src.fX; \
    srcY = src.fY; \
    srcZ = src.fZ; \
    dst.fX += (srcX * m00 + srcY * m01 + srcZ * m02 + m03) * m_wgt; \
    dst.fY += (srcX * m10 + srcY * m11 + srcZ * m12 + m13) * m_wgt; \
    dst.fZ += (srcX * m20 + srcY * m21 + srcZ * m22 + m23) * m_wgt;

#define MATRIXMULTVECTORADD(dst, src) \
    srcX = src.fX; \
    srcY = src.fY; \
    srcZ = src.fZ; \
    dst.fX += (srcX * m00 + srcY * m01 + srcZ * m02) * m_wgt; \
    dst.fY += (srcX * m10 + srcY * m11 + srcZ * m12) * m_wgt; \
    dst.fZ += (srcX * m20 + srcY * m21 + srcZ * m22) * m_wgt;

// inlTESTPOINT /////////////////////////////////////////
// Update mins and maxs if destP is outside.
inline void inlTESTPOINT(const hsPoint3& destP, 
                         hsScalar& minX, hsScalar& minY, hsScalar& minZ, 
                         hsScalar& maxX, hsScalar& maxY, hsScalar& maxZ)
    if( destP.fX < minX )
        minX = destP.fX;
    else if( destP.fX > maxX )
        maxX = destP.fX;

    if( destP.fY < minY )
        minY = destP.fY;
    else if( destP.fY > maxY )
        maxY = destP.fY;

    if( destP.fZ < minZ )
        minZ = destP.fZ;
    else if( destP.fZ > maxZ )
        maxZ = destP.fZ;

//// IBlendVertsIntoBuffer ////////////////////////////////////////////////////
//  Given a pointer into a buffer of verts that have blending data in the D3D
//  format, blends them into the destination buffer given without the blending
//  info.

void    plDXPipeline::IBlendVertsIntoBuffer( plSpan* span, 
                                              hsMatrix44* matrixPalette, int numMatrices,
                                              const UInt8 *src, UInt8 format, UInt32 srcStride, 
                                              UInt8 *dest, UInt32 destStride, UInt32 count,
                                              UInt16 localUVWChans )
    UInt8       numUVs, numWeights;
    UInt32      i, j, indices, color, specColor, uvChanSize;
    float       weights[ 4 ], weightSum;
    hsPoint3    pt, tempPt, destPt;
    hsVector3   vec, tempNorm, destNorm;

    /// Get some counts
    switch( format & plGBufferGroup::kSkinWeightMask )
        case plGBufferGroup::kSkin1Weight:  numWeights = 1; break;
        case plGBufferGroup::kSkin2Weights: numWeights = 2; break;
        case plGBufferGroup::kSkin3Weights: numWeights = 3; break;
        default: hsAssert( false, "Invalid weight count in IBlendVertsIntoBuffer()" );

    numUVs = plGBufferGroup::CalcNumUVs( format );
    uvChanSize = numUVs * sizeof( float ) * 3;

    hsScalar minX = 1.e33f;
    hsScalar minY = 1.e33f;
    hsScalar minZ = 1.e33f;

    hsScalar maxX = -1.e33f;
    hsScalar maxY = -1.e33f;
    hsScalar maxZ = -1.e33f;

    // localUVWChans is bump mapping tangent space vectors, which need to
    // be skinned like the normal, as opposed to passed through like 
    // garden variety UVW coordinates.
    // There are no localUVWChans that I know of in production assets (i.e.
    // the avatar is not skinned).
    if( !localUVWChans )
        /// Copy whilst blending
        for( i = 0; i < count; i++ )
            // Extract data
            src = inlExtractPoint( src, pt );
            for( j = 0, weightSum = 0; j < numWeights; j++ )
                src = inlExtractFloat( src, weights[ j ] );
                weightSum += weights[ j ];
            weights[ j ] = 1 - weightSum;

            if( format & plGBufferGroup::kSkinIndices )
                src = inlExtractUInt32( src, indices );
                indices = 1 << 8;
            src = inlExtractPoint( src, vec );
            src = inlExtractUInt32( src, color );
            src = inlExtractUInt32( src, specColor );

            // Blend
            destPt.Set( 0, 0, 0 );
            destNorm.Set( 0, 0, 0 );
            for( j = 0; j < numWeights + 1; j++ )
                if( weights[ j ] )
                    MATRIXMULTBEGIN(matrixPalette[indices & 0xff], weights[j]);

                    MATRIXMULTPOINTADD(destPt, pt);
                    MATRIXMULTVECTORADD(destNorm, vec);

                indices >>= 8;
            // Probably don't really need to renormalize this. There errors are
            // going to be subtle and "smooth".
//          hsFastMath::NormalizeAppr(destNorm);

            inlTESTPOINT(destPt, minX, minY, minZ, maxX, maxY, maxZ);

            // Slam data into position now
            dest = inlStuffPoint( dest, destPt );
            dest = inlStuffPoint( dest, destNorm );
            dest = inlStuffUInt32( dest, color );
            dest = inlStuffUInt32( dest, specColor );
            memcpy( dest, src, uvChanSize );
            src += uvChanSize;
            dest += uvChanSize;
        UInt8 hiChan = localUVWChans >> 8;
        UInt8 loChan = localUVWChans & 0xff;
        /// Copy whilst blending
        for( i = 0; i < count; i++ )
            hsVector3 srcUVWs[plGeometrySpan::kMaxNumUVChannels];
            hsVector3 dstUVWs[plGeometrySpan::kMaxNumUVChannels];

            // Extract data
            src = inlExtractPoint( src, pt );
            for( j = 0, weightSum = 0; j < numWeights; j++ )
                src = inlExtractFloat( src, weights[ j ] );
                weightSum += weights[ j ];
            weights[ j ] = 1 - weightSum;

            if( format & plGBufferGroup::kSkinIndices )
                src = inlExtractUInt32( src, indices );
                indices = 1 << 8;

            src = inlExtractPoint( src, vec );
            src = inlExtractUInt32( src, color );
            src = inlExtractUInt32( src, specColor );

            UInt8 k;
            for( k = 0; k < numUVs; k++ )
                src = inlExtractPoint( src, srcUVWs[k] );
            memcpy( dstUVWs, srcUVWs, uvChanSize);

            // Blend
            destPt.Set( 0, 0, 0 );
            destNorm.Set( 0, 0, 0 );
            for( j = 0; j < numWeights + 1; j++ )
                if( weights[ j ] )
                    MATRIXMULTBEGIN(matrixPalette[indices & 0xff], weights[j]);

                    MATRIXMULTPOINTADD(destPt, pt);
                    MATRIXMULTVECTORADD(destNorm, vec);
                    MATRIXMULTVECTORADD(dstUVWs[loChan], srcUVWs[loChan]);
                    MATRIXMULTVECTORADD(dstUVWs[hiChan], srcUVWs[hiChan]);

                indices >>= 8;
            // Probably don't really need to renormalize this. There errors are
            // going to be subtle and "smooth".
//          hsFastMath::NormalizeAppr(destNorm);
//          hsFastMath::NormalizeAppr(dstUVWs[loChan]);
//          hsFastMath::NormalizeAppr(dstUVWs[hiChan]);

            inlTESTPOINT(destPt, minX, minY, minZ, maxX, maxY, maxZ);

            // Slam data into position now
            dest = inlStuffPoint( dest, destPt );
            dest = inlStuffPoint( dest, destNorm );
            dest = inlStuffUInt32( dest, color );
            dest = inlStuffUInt32( dest, specColor );
            memcpy( dest, dstUVWs, uvChanSize );
            dest += uvChanSize;
    hsBounds3Ext wBnd;
    wBnd.Reset(&hsPoint3(minX, minY, minZ));
    wBnd.Union(&hsPoint3(maxX, maxY, maxZ));
    span->fWorldBounds = wBnd;

// ISetPipeConsts //////////////////////////////////////////////////////////////////
// A shader can request that the pipeline fill in certain constants that are indeterminate
// until the pipeline is about to render the object the shader is applied to. For example,
// the object's local to world. A single shader may be used on multiple objects with 
// multiple local to world transforms. This ensures the pipeline will shove the proper
// local to world into the shader immediately before the render.
// See plShader.h for the list of available pipe constants.
// Note that the lighting pipe constants are NOT implemented.
void plDXPipeline::ISetPipeConsts(plShader* shader)
    int n = shader->GetNumPipeConsts(); 
    int i;
    for( i = 0; i < n; i++ )
        const plPipeConst& pc = shader->GetPipeConst(i);
        switch( pc.fType )
        case plPipeConst::kFogSet:
                float set[4];
                shader->SetFloat4(pc.fReg, set);
        case plPipeConst::kLayAmbient:
                hsColorRGBA col = fCurrLay->GetAmbientColor();
                shader->SetColor(pc.fReg, col);
        case plPipeConst::kLayRuntime:
                hsColorRGBA col = fCurrLay->GetRuntimeColor();
                col.a = fCurrLay->GetOpacity();
                shader->SetColor(pc.fReg, col);
        case plPipeConst::kLaySpecular:
                hsColorRGBA col = fCurrLay->GetSpecularColor();
                shader->SetColor(pc.fReg, col);
        case plPipeConst::kTex3x4_0:
        case plPipeConst::kTex3x4_1:
        case plPipeConst::kTex3x4_2:
        case plPipeConst::kTex3x4_3:
        case plPipeConst::kTex3x4_4:
        case plPipeConst::kTex3x4_5:
        case plPipeConst::kTex3x4_6:
        case plPipeConst::kTex3x4_7:
                int stage = pc.fType - plPipeConst::kTex3x4_0;

                if( stage > fCurrNumLayers )
                    // Ooops. This is bad, means the shader is expecting more layers than
                    // we actually have (or is just bogus). Assert and quietly continue.
                    hsAssert(false, "Shader asking for higher stage transform than we have");
                const hsMatrix44& xfm = fCurrMaterial->GetLayer(fCurrLayerIdx + stage)->GetTransform();

                shader->SetMatrix34(pc.fReg, xfm);
        case plPipeConst::kTex2x4_0:
        case plPipeConst::kTex2x4_1:
        case plPipeConst::kTex2x4_2:
        case plPipeConst::kTex2x4_3:
        case plPipeConst::kTex2x4_4:
        case plPipeConst::kTex2x4_5:
        case plPipeConst::kTex2x4_6:
        case plPipeConst::kTex2x4_7:
                int stage = pc.fType - plPipeConst::kTex2x4_0;

                if( stage > fCurrNumLayers )
                    // Ooops. This is bad, means the shader is expecting more layers than
                    // we actually have (or is just bogus). Assert and quietly continue.
                    hsAssert(false, "Shader asking for higher stage transform than we have");
                const hsMatrix44& xfm = fCurrMaterial->GetLayer(fCurrLayerIdx + stage)->GetTransform();

                shader->SetMatrix24(pc.fReg, xfm);
        case plPipeConst::kTex1x4_0:
        case plPipeConst::kTex1x4_1:
        case plPipeConst::kTex1x4_2:
        case plPipeConst::kTex1x4_3:
        case plPipeConst::kTex1x4_4:
        case plPipeConst::kTex1x4_5:
        case plPipeConst::kTex1x4_6:
        case plPipeConst::kTex1x4_7:
                int stage = pc.fType - plPipeConst::kTex1x4_0;

                if( stage > fCurrNumLayers )
                    // Ooops. This is bad, means the shader is expecting more layers than
                    // we actually have (or is just bogus). Assert and quietly continue.
                    hsAssert(false, "Shader asking for higher stage transform than we have");
                const hsMatrix44& xfm = fCurrMaterial->GetLayer(fCurrLayerIdx + stage)->GetTransform();

                shader->SetFloat4(pc.fReg, xfm.fMap[0]);
        case plPipeConst::kLocalToNDC:
                hsMatrix44 cam2ndc = IGetCameraToNDC();
                hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera();

                hsMatrix44 local2ndc = cam2ndc * world2cam * GetLocalToWorld();

                shader->SetMatrix44(pc.fReg, local2ndc);

        case plPipeConst::kCameraToNDC:
                hsMatrix44 cam2ndc = IGetCameraToNDC();

                shader->SetMatrix44(pc.fReg, cam2ndc);

        case plPipeConst::kWorldToNDC:
                hsMatrix44 cam2ndc = IGetCameraToNDC();
                hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera();

                hsMatrix44 world2ndc = cam2ndc * world2cam;

                shader->SetMatrix44(pc.fReg, world2ndc);

        case plPipeConst::kLocalToWorld:
            shader->SetMatrix34(pc.fReg, GetLocalToWorld());

        case plPipeConst::kWorldToLocal:
            shader->SetMatrix34(pc.fReg, GetWorldToLocal());

        case plPipeConst::kWorldToCamera:
                hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera();

                shader->SetMatrix34(pc.fReg, world2cam);

        case plPipeConst::kCameraToWorld:
                hsMatrix44 cam2world = GetViewTransform().GetCameraToWorld();

                shader->SetMatrix34(pc.fReg, cam2world);

        case plPipeConst::kLocalToCamera:
                hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera();

                hsMatrix44 local2cam = world2cam * GetLocalToWorld();

                shader->SetMatrix34(pc.fReg, local2cam);

        case plPipeConst::kCameraToLocal:
                hsMatrix44 cam2world = GetViewTransform().GetCameraToWorld();

                hsMatrix44 cam2local = GetWorldToLocal() * cam2world;

                shader->SetMatrix34(pc.fReg, cam2local);

        case plPipeConst::kCamPosWorld:
                shader->SetVectorW(pc.fReg, GetViewTransform().GetCameraToWorld().GetTranslate(), 1.f);

        case plPipeConst::kCamPosLocal:
                hsPoint3 localCam = GetWorldToLocal() * GetViewTransform().GetCameraToWorld().GetTranslate();

                shader->SetVectorW(pc.fReg, localCam, 1.f);

        case plPipeConst::kObjPosWorld:
                shader->SetVectorW(pc.fReg, GetLocalToWorld().GetTranslate(), 1.f);

        case plPipeConst::kDirLight1:
        case plPipeConst::kDirLight2:
        case plPipeConst::kDirLight3:
        case plPipeConst::kDirLight4:
        case plPipeConst::kPointLight1:
        case plPipeConst::kPointLight2:
        case plPipeConst::kPointLight3:
        case plPipeConst::kPointLight4:

// ISetShaders /////////////////////////////////////////////////////////////////////////////////////
// Setup to render using the input vertex and pixel shader. Either or both may
// be nil, in which case the fixed function pipeline is indicated.
// Any Pipe Constants the non-FFP shader wants will be set here.
// Lastly, all constants will be set (as a block) for any non-FFP vertex or pixel shader.
HRESULT plDXPipeline::ISetShaders(plShader* vShader, plShader* pShader)
    IDirect3DVertexShader9 *vsHandle = NULL;
    if( vShader )
        hsAssert(vShader->IsVertexShader(), "Wrong type shader as vertex shader");

        plDXVertexShader* vRef = (plDXVertexShader*)vShader->GetDeviceRef();
        if( !vRef )
            vRef = TRACKED_NEW plDXVertexShader(vShader);
        if( !vRef->IsLinked() )
        vsHandle = vRef->GetShader(this);

        // This is truly obnoxious, but D3D insists that, while using the progammable pipeline,
        // all stages be set up like this, not just the ones we're going to use. We have to
        // do this if we have either a vertex or a pixel shader. See below. Whatever. mf
        int i;
        for( i = 0; i < 8; i++ )
            fD3DDevice->SetTextureStageState(i, D3DTSS_TEXCOORDINDEX, fLayerUVWSrcs[i] = i);
            fD3DDevice->SetTextureStageState(i, D3DTSS_TEXTURETRANSFORMFLAGS, fLayerXformFlags[i] = 0);

    IDirect3DPixelShader9 *psHandle = NULL;
    if( pShader )
        hsAssert(pShader->IsPixelShader(), "Wrong type shader as pixel shader");


        plDXPixelShader* pRef = (plDXPixelShader*)pShader->GetDeviceRef();
        if( !pRef )
            pRef = TRACKED_NEW plDXPixelShader(pShader);
        if( !pRef->IsLinked() )
        psHandle = pRef->GetShader(this);

        if( !vShader )
            int i;
            for( i = 0; i < 8; i++ )
                fD3DDevice->SetTextureStageState(i, D3DTSS_TEXCOORDINDEX, fLayerUVWSrcs[i] = i);
                fD3DDevice->SetTextureStageState(i, D3DTSS_TEXTURETRANSFORMFLAGS, fLayerXformFlags[i] = 0);

    if( vsHandle != fSettings.fCurrVertexShader )
        HRESULT hr = fD3DDevice->SetVertexShader(fSettings.fCurrVertexShader = vsHandle);
        hsAssert(!FAILED(hr), "Error setting vertex shader");

    if( psHandle != fSettings.fCurrPixelShader )
        HRESULT hr = fD3DDevice->SetPixelShader(fSettings.fCurrPixelShader = psHandle);
        hsAssert(!FAILED(hr), "Error setting pixel shader");

    // Handle cull mode here, because current cullmode is dependent on
    // the handedness of the LocalToCamera AND whether we are twosided.

    return S_OK;

// IRenderAuxSpan //////////////////////////////////////////////////////////
// Aux spans (auxilliary) are geometry rendered immediately after, and therefore dependent, on
// other normal geometry. They don't have SceneObjects, Drawables, DrawInterfaces or
// any of that, and therefore don't correspond to any object in the scene.
// They are dynamic procedural decals. See plDynaDecal.cpp and plDynaDecalMgr.cpp.
// This is wrapped by IRenderAuxSpans, which makes sure state is restored to resume
// normal rendering after the AuxSpan is rendered.
void plDXPipeline::IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux)
    // Make sure the underlying resources are created and filled in with current data.
    CheckVertexBufferRef(aux->fGroup, aux->fVBufferIdx);
    CheckIndexBufferRef(aux->fGroup, aux->fIBufferIdx);

    // Set to render from the aux spans buffers.
    plDXVertexBufferRef* vRef = (plDXVertexBufferRef*)aux->fGroup->GetVertexBufferRef(aux->fVBufferIdx); 

    if( !vRef )

    plDXIndexBufferRef* iRef = (plDXIndexBufferRef*)aux->fGroup->GetIndexBufferRef(aux->fIBufferIdx);

    if( !iRef )

    HRESULT     r;

    r = fD3DDevice->SetStreamSource( 0, vRef->fD3DBuffer, 0, vRef->fVertexSize );
    hsAssert( r == D3D_OK, "Error trying to set the stream source!" );

    fD3DDevice->SetFVF(fSettings.fCurrFVFFormat = IGetBufferD3DFormat(vRef->fFormat));
    r = fD3DDevice->SetIndices( iRef->fD3DBuffer );
    hsAssert( r == D3D_OK, "Error trying to set the indices!" );

    plRenderTriListFunc render(fD3DDevice, iRef->fOffset, aux->fVStartIdx, aux->fVLength, aux->fIStartIdx, aux->fILength/3);

    // Now just loop through the aux material, rendering in as many passes as it takes.
    hsGMaterial* material = aux->fMaterial;
    int j;
    for( j = 0; j < material->GetNumLayers(); )
        int iCurrMat = j;
        j = IHandleMaterial( material, iCurrMat, &span ); 
        if (j == -1)

        ISetShaders(material->GetLayer(iCurrMat)->GetVertexShader(), material->GetLayer(iCurrMat)->GetPixelShader());

        if( aux->fFlags & plAuxSpan::kOverrideLiteModel )
            static D3DMATERIAL9 mat;
            fD3DDevice->SetRenderState(D3DRS_AMBIENT, 0xffffffff);

            fD3DDevice->SetRenderState( D3DRS_AMBIENTMATERIALSOURCE, D3DMCS_COLOR1 );

            fD3DDevice->SetMaterial( &mat );



// IRenderAuxSpans ////////////////////////////////////////////////////////////////////////////
// Save and restore render state around calls to IRenderAuxSpan. This lets
// a list of aux spans get rendered with only one save/restore state.
void plDXPipeline::IRenderAuxSpans(const plSpan& span)
    if (IsDebugFlagSet(plPipeDbg::kFlagNoAuxSpans))

    plDXVertexBufferRef* oldVRef = fSettings.fCurrVertexBuffRef;
    plDXIndexBufferRef* oldIRef = fSettings.fCurrIndexBuffRef;

    ISetLocalToWorld(hsMatrix44::IdentityMatrix(), hsMatrix44::IdentityMatrix());

    int i;
    for( i = 0; i < span.GetNumAuxSpans(); i++ )
        IRenderAuxSpan(span, span.GetAuxSpan(i));

    ISetLocalToWorld(span.fLocalToWorld, span.fWorldToLocal);

    HRESULT     r;

    r = fD3DDevice->SetStreamSource( 0, oldVRef->fD3DBuffer, 0, oldVRef->fVertexSize );
    hsAssert( r == D3D_OK, "Error trying to set the stream source!" );

    r = fD3DDevice->SetFVF(fSettings.fCurrFVFFormat = IGetBufferD3DFormat(oldVRef->fFormat));

    r = fD3DDevice->SetIndices( oldIRef->fD3DBuffer );
    hsAssert( r == D3D_OK, "Error trying to set the indices!" );


// ICheckVBUsage //////////////////////////////////////////////////////////////
// Keep track of how much managed vertex buffer memory is being used and
// has been used since the last evict.
inline void plDXPipeline::ICheckVBUsage(plDXVertexBufferRef* vRef)
    if( !vRef->fOwner->AreVertsVolatile() )
        if( vRef->fUseTime <= fEvictTime )
            fManagedSeen += vRef->fVertexSize * vRef->fCount;

        if( vRef->fUseTime != fTextUseTime )
            plProfile_NewMem(CurrVB, vRef->fVertexSize * vRef->fCount);
            fVtxUsed += vRef->fVertexSize * vRef->fCount;
            vRef->fUseTime = fTextUseTime;

//// IRenderBufferSpan ////////////////////////////////////////////////////////
// Sets up the vertex and index buffers for a span, and then
// renders it in as many passes as it takes in ILoopOverLayers.
void    plDXPipeline::IRenderBufferSpan( const plIcicle& span,
                                         hsGDeviceRef *vb, hsGDeviceRef *ib, 
                                         hsGMaterial *material, UInt32 vStart, UInt32 vLength, 
                                         UInt32 iStart, UInt32 iLength )

    plDXVertexBufferRef *vRef = (plDXVertexBufferRef *)vb;
    plDXIndexBufferRef      *iRef = (plDXIndexBufferRef *)ib;

    HRESULT     r;

    if( vRef->fD3DBuffer == nil || iRef->fD3DBuffer == nil )
        hsAssert( false, "Trying to render a nil buffer pair!" );

    /// Switch to the vertex buffer we want
    if( fSettings.fCurrVertexBuffRef != vRef )
        hsRefCnt_SafeAssign( fSettings.fCurrVertexBuffRef, vRef );
        hsAssert( vRef->fD3DBuffer != nil, "Trying to render a buffer pair without a vertex buffer!" );

    if( vRef->RebuiltSinceUsed() )
        r = fD3DDevice->SetStreamSource( 0, vRef->fD3DBuffer, 0, vRef->fVertexSize );
        hsAssert( r == D3D_OK, "Error trying to set the stream source!" );

        DWORD fvf = IGetBufferD3DFormat(vRef->fFormat);
        if (fSettings.fCurrFVFFormat != fvf)
            fD3DDevice->SetFVF(fSettings.fCurrFVFFormat = fvf);



    // Note: both these stats are the same, since we don't do any culling or clipping on the tris
    if( fSettings.fCurrIndexBuffRef != iRef )
        hsRefCnt_SafeAssign( fSettings.fCurrIndexBuffRef, iRef );
        hsAssert( iRef->fD3DBuffer != nil, "Trying to render with a nil index buffer" );

    if( iRef->RebuiltSinceUsed() )
        r = fD3DDevice->SetIndices( iRef->fD3DBuffer );
        hsAssert( r == D3D_OK, "Error trying to set the indices!" );

    plRenderTriListFunc render(fD3DDevice, iRef->fOffset, vStart, vLength, iStart, iLength/3);

    ILoopOverLayers(render, material, span);

// ILoopOverLayers /////////////////////////////////////////////////////////////////////////////////
// Render the input span with the input material in as many passes as it takes.
// Also handles rendering projected lights, either onto each pass or 
// once onto the FB after all the passes, as appropriate.
hsBool plDXPipeline::ILoopOverLayers(const plRenderPrimFunc& inRender, hsGMaterial* material, const plSpan& span)

    const plRenderPrimFunc& render = IsDebugFlagSet(plPipeDbg::kFlagNoRender) ? (const plRenderPrimFunc&)sRenderNil : inRender;

    if( GetOverrideMaterial() )
        material = GetOverrideMaterial();


    hsBool normalLightsDisabled = false;

    // Loop across all the layers we need to draw
    int j;
    for( j = 0; j < material->GetNumLayers(); )
        int iCurrMat = j;
        j = IHandleMaterial( material, iCurrMat, &span );
        if (j == -1)

        if( (fLayerState[0].fBlendFlags & hsGMatState::kBlendAlpha)
                &&(material->GetLayer(iCurrMat)->GetOpacity() <= 0) 
                &&(fCurrLightingMethod != plSpan::kLiteVtxPreshaded) ) // This opt isn't good for particles, since their
                                                                        // material opacity is undefined/unused... -mcn

        ISetFogParameters(&span, material->GetLayer(iCurrMat));

        ISetShaders(material->GetLayer(iCurrMat)->GetVertexShader(), material->GetLayer(iCurrMat)->GetPixelShader());

        if( normalLightsDisabled )

        DWORD nPass;
        fSettings.fDXError = fD3DDevice->ValidateDevice(&nPass);
        if( fSettings.fDXError != D3D_OK )
#endif // HS_DEBUGGING

        // Do the regular draw.

        // Take care of projections that get applied to each pass.
        if( fLights.fProjEach.GetCount() && !(fView.fRenderState & kRenderNoProjection) ) 
            // Disable all the normal lights.
            normalLightsDisabled = true;

            IRenderProjectionEach(render, material, iCurrMat, span);

        if (IsDebugFlagSet(plPipeDbg::kFlagNoUpperLayers))
            j = material->GetNumLayers();

    // If we disabled lighting, re-enable it.
    if( normalLightsDisabled )

    // Render any aux spans associated.
    if( span.GetNumAuxSpans() )

    // Only render projections and shadows if we successfully rendered the span.
    // j == -1 means we aborted render.
    if( j >= 0 )
        // Projections that get applied to the frame buffer (after all passes).
        if( fLights.fProjAll.GetCount() && !(fView.fRenderState & kRenderNoProjection) ) 

        // Handle render of shadows onto geometry.
        if( fShadows.GetCount() )
            IRenderShadowsOntoSpan(render, &span, material);

    // Debug only
    if (IsDebugFlagSet(plPipeDbg::kFlagOverlayWire))
        IRenderOverWire(render, material, span);

    return false;

// IRenderOverWire ///////////////////////////////////////////////////////////////////////////////
// Debug only, renders wireframe on top of normal render.
void plDXPipeline::IRenderOverWire(const plRenderPrimFunc& render, hsGMaterial* material, const plSpan& span)
    UInt32 state = fView.fRenderState;
    fView.fRenderState |= plPipeline::kRenderBaseLayerOnly;
    static plLayerDepth depth;
    depth.SetMiscFlags(depth.GetMiscFlags() | hsGMatState::kMiscWireFrame | hsGMatState::kMiscTwoSided);
    depth.SetZFlags((depth.GetZFlags() & ~hsGMatState::kZNoZRead) | hsGMatState::kZIncLayer);

    AppendLayerInterface(&depth, false);

    if( IHandleMaterial( material, 0, &span ) >= 0 )
        ISetShaders(nil, nil);

    RemoveLayerInterface(&depth, false) ;
    fView.fRenderState = state;

// IRenderProjectionEach ///////////////////////////////////////////////////////////////////////////////////////
// Render any lights that are to be projected onto each pass of the object.
void plDXPipeline::IRenderProjectionEach(const plRenderPrimFunc& render, hsGMaterial* material, int iPass, const plSpan& span)
    // If this is a bump map pass, forget it, we've already "done" per-pixel lighting.
    if( fLayerState[iPass].fMiscFlags & (hsGMatState::kMiscBumpLayer | hsGMatState::kMiscBumpChans) )

    // Push the LayerShadowBase override. This sets the blend
    // to framebuffer as Add/ZNoWrite and AmbientColor = 0.
    static plLayerLightBase layLightBase;

    int iNextPass = iPass + fCurrNumLayers;

    if( fSettings.fLimitedProj && (material->GetLayer(iPass)->GetUVWSrc() & ~plLayerInterface::kUVWIdxMask) )

    // For each projector:
    int k;
    for( k = 0; k < fLights.fProjEach.GetCount(); k++ )
        // Push it's projected texture as a piggyback.
        plLightInfo* li = fLights.fProjEach[k];

        // Lower end boards are iffy on when they'll project correctly.
        if( fSettings.fCantProj && !li->GetProperty(plLightInfo::kLPForceProj) )

        plLayerInterface* proj = li->GetProjection();
        hsAssert(proj, "A projector with no texture to project?");

        // Enable the projecting light only.
        plDXLightRef* ref = (plDXLightRef *)li->GetDeviceRef();
        fD3DDevice->LightEnable( ref->fD3DIndex, true );

        AppendLayerInterface(&layLightBase, false);

        // Render until it's done.
        int iRePass = iPass;
        while( iRePass < iNextPass )
            iRePass = IHandleMaterial( material, iRePass, &span );
            ISetShaders(nil, nil);

            // Do the render with projection.

        RemoveLayerInterface(&layLightBase, false);

        // Disable the projecting light
        fD3DDevice->LightEnable(ref->fD3DIndex, false);

        // Pop it's projected texture off piggyback



// IRenderProjections ///////////////////////////////////////////////////////////
// Render any projected lights that want to be rendered a single time after
// all passes on the object are complete.
void plDXPipeline::IRenderProjections(const plRenderPrimFunc& render)
    int i;
    for( i = 0; i < fLights.fProjAll.GetCount(); i++ )
        plLightInfo* li = fLights.fProjAll[i];

        if( fSettings.fCantProj && !li->GetProperty(plLightInfo::kLPForceProj) )
        IRenderProjection(render, li);

// IRenderProjection //////////////////////////////////////////////////////////////
// Render this light's projection onto the frame buffer.
void plDXPipeline::IRenderProjection(const plRenderPrimFunc& render, plLightInfo* li)
    plDXLightRef* ref = (plDXLightRef *)li->GetDeviceRef();
    fD3DDevice->LightEnable(ref->fD3DIndex, true);

    plLayerInterface* proj = li->GetProjection();

    static D3DMATERIAL9 mat;
    mat.Diffuse.r = mat.Diffuse.g = mat.Diffuse.b = mat.Diffuse.a = 1.f;

    fD3DDevice->SetMaterial( &mat );


    fD3DDevice->SetRenderState( D3DRS_AMBIENT, 0xffffffff ); //@@@

    // Set the FB blend mode, texture, all that.
    ICompositeLayerState(0, proj);
    // We should have put ZNoZWrite on during export, but we didn't.
    fLayerState[0].fZFlags = hsGMatState::kZNoZWrite;
    fCurrNumLayers = 1;

    if( proj->GetBlendFlags() & hsGMatState::kBlendInvertFinalColor )
        fD3DDevice->SetTextureStageState( 0, D3DTSS_COLORARG2, D3DTA_DIFFUSE | D3DTA_COMPLEMENT);

    // Seal it up
    fLastEndingStage = 1;
    fD3DDevice->SetTextureStageState(1, D3DTSS_COLOROP, D3DTOP_DISABLE);
    fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAOP, D3DTOP_DISABLE);
    fLayerState[1].fBlendFlags = UInt32(-1);

    DWORD nPass;
    fSettings.fDXError = fD3DDevice->ValidateDevice(&nPass);
    if( fSettings.fDXError != D3D_OK )
#endif // HS_DEBUGGING

    // Okay, render it already.


    fD3DDevice->LightEnable(ref->fD3DIndex, false);

//// IGetBufferD3DFormat //////////////////////////////////////////////////////
// Convert the dumbest vertex format on the planet (ours) into an FVF code.
// Note the assumption of position, normal, diffuse, and specular.
// We no longer use FVF codes, just shader handles.
long    plDXPipeline::IGetBufferD3DFormat( UInt8 format ) const
    long    fmt, i;
    switch( format & plGBufferGroup::kSkinWeightMask )
        case plGBufferGroup::kSkinNoWeights: 
        case plGBufferGroup::kSkin1Weight: 
        case plGBufferGroup::kSkin2Weights: 
        case plGBufferGroup::kSkin3Weights: 
            hsAssert( false, "Bad skin weight value in IGetBufferD3DFormat()" );
    if( format & plGBufferGroup::kSkinIndices )
        hsAssert(false, "Indexed skinning not supported");
        fmt |= D3DFVF_LASTBETA_UBYTE4;

    switch( plGBufferGroup::CalcNumUVs( format ) )
        case 0: fmt |= D3DFVF_TEX0; break;
        case 1: fmt |= D3DFVF_TEX1; break;
        case 2: fmt |= D3DFVF_TEX2; break;
        case 3: fmt |= D3DFVF_TEX3; break;
        case 4: fmt |= D3DFVF_TEX4; break;
        case 5: fmt |= D3DFVF_TEX5; break;
        case 6: fmt |= D3DFVF_TEX6; break;
        case 7: fmt |= D3DFVF_TEX7; break;
        case 8: fmt |= D3DFVF_TEX8; break;

    for( i = 0; i < plGBufferGroup::CalcNumUVs( format ); i++ )
        fmt |= D3DFVF_TEXCOORDSIZE3( i );

    return fmt;

//// IGetBufferFormatSize /////////////////////////////////////////////////////
// Calculate the vertex stride from the given format.
UInt32  plDXPipeline::IGetBufferFormatSize( UInt8 format ) const
    UInt32  size = sizeof( float ) * 6 + sizeof( UInt32 ) * 2; // Position and normal, and two packed colors

    switch( format & plGBufferGroup::kSkinWeightMask )
        case plGBufferGroup::kSkinNoWeights: 
        case plGBufferGroup::kSkin1Weight:
            size += sizeof(float);
            hsAssert( false, "Invalid skin weight value in IGetBufferFormatSize()" );

    size += sizeof( float ) * 3 * plGBufferGroup::CalcNumUVs( format );

    return size;

//// Plate and PlateManager Functions /////////////////////////////////////////

// None of this plate code is mine, so your guess is as good as mine. 
// I'll throw in comments where I happen to know what it's doing, but a lot
// of this is just ugly.
// The plates are mostly used for debugging/performance tools, but they do
// unfortunately get used for some production things like the cursor.
// By the way, a Plate is just a screen aligned textured quad that is rendered
// on top of the normal scene. mf

// ICreateGeometry /////////////////////////////////////////////////////////
// Make a quad suitable for rendering as a tristrip.
void plDXPlateManager::ICreateGeometry(plDXPipeline* pipe)
    UInt32 fvfFormat = PLD3D_PLATEFVF;
    hsAssert(!pipe->ManagedAlloced(), "Alloc default with managed alloc'd");
    if( FAILED( fD3DDevice->CreateVertexBuffer( 4 * sizeof( plPlateVertex ),
                                                poolType, &fVertBuffer, NULL ) ) )
        hsAssert( false, "CreateVertexBuffer() call failed!" );
        fCreatedSucessfully = false;
    PROFILE_POOL_MEM(poolType, 4 * sizeof(plPlateVertex), true, "PlateMgrVtxBuff");

    /// Lock the buffer
    plPlateVertex *ptr;
    if( FAILED( fVertBuffer->Lock( 0, 0, (void **)&ptr, D3DLOCK_NOSYSLOCK ) ) )
        hsAssert( false, "Failed to lock vertex buffer for writing" );
        fCreatedSucessfully = false;
    /// Set 'em up
    ptr[ 0 ].fPoint.Set( -0.5f, -0.5f, 0.0f );
    ptr[ 0 ].fColor = 0xffffffff;
    ptr[ 0 ].fUV.Set( 0.0f, 0.0f, 0.0f );

    ptr[ 1 ].fPoint.Set( -0.5f, 0.5f, 0.0f );
    ptr[ 1 ].fColor = 0xffffffff;
    ptr[ 1 ].fUV.Set( 0.0f, 1.0f, 0.0f );

    ptr[ 2 ].fPoint.Set( 0.5f, -0.5f, 0.0f );
    ptr[ 2 ].fColor = 0xffffffff;
    ptr[ 2 ].fUV.Set( 1.0f, 0.0f, 0.0f );

    ptr[ 3 ].fPoint.Set( 0.5f, 0.5f, 0.0f );
    ptr[ 3 ].fColor = 0xffffffff;
    ptr[ 3 ].fUV.Set( 1.0f, 1.0f, 0.0f );

    /// Unlock and we're done!
    fCreatedSucessfully = true;


// IReleaseGeometry ////////////////////////////////////////////////////////////
// Let go of any D3D resources created for this.
void plDXPlateManager::IReleaseGeometry()
    if (fVertBuffer)
        PROFILE_POOL_MEM(D3DPOOL_DEFAULT, 4 * sizeof(plPlateVertex), false, "PlateMgrVtxBuff");
        fVertBuffer = nil;

//// Constructor & Destructor /////////////////////////////////////////////////

plDXPlateManager::plDXPlateManager( plDXPipeline *pipe, IDirect3DDevice9 *device ) : plPlateManager( pipe ),


//// IDrawPlate ///////////////////////////////////////////////////////////////
// Render all currently enabled plates to the screen.
void    plDXPlateManager::IDrawToDevice( plPipeline *pipe )
    plDXPipeline    *dxPipe = (plDXPipeline *)pipe;
    plPlate         *plate;
    UInt32          scrnWidthDiv2 = fOwner->Width() >> 1;
    UInt32          scrnHeightDiv2 = fOwner->Height() >> 1;
    D3DXMATRIX      mat;
    D3DCULL         oldCullMode;
    if( !fVertBuffer )
    // Make sure skinning is disabled.
    fD3DDevice->SetRenderState(D3DRS_VERTEXBLEND, D3DVBF_DISABLE);
    fD3DDevice->SetVertexShader( dxPipe->fSettings.fCurrVertexShader = NULL);
    fD3DDevice->SetFVF(dxPipe->fSettings.fCurrFVFFormat = PLD3D_PLATEFVF);
    fD3DDevice->SetStreamSource( 0, fVertBuffer, 0, sizeof( plPlateVertex ) );  
    // To get plates properly pixel-aligned, we need to compensate for D3D9's weird half-pixel
    // offset (see http://drilian.com/2008/11/25/understanding-half-pixel-and-half-texel-offsets/
    // or http://msdn.microsoft.com/en-us/library/bb219690(VS.85).aspx).
    D3DXMatrixTranslation(&mat, -0.5f/scrnWidthDiv2, -0.5f/scrnHeightDiv2, 0.0f);
    fD3DDevice->SetTransform( D3DTS_VIEW, &mat );
    oldCullMode = dxPipe->fCurrCullMode;

    for( plate = fPlates; plate != nil; plate = plate->GetNext() )
        if( plate->IsVisible() )
            dxPipe->IDrawPlate( plate );

            const char *title = plate->GetTitle();
            if( plDebugText::Instance().IsEnabled() && title[ 0 ] != 0 )
                hsPoint3 pt;
                pt.Set( 0, -0.5, 0 );
                pt = plate->GetTransform() * pt;
                pt.fX = pt.fX * scrnWidthDiv2 + scrnWidthDiv2;
                pt.fY = pt.fY * scrnHeightDiv2 + scrnHeightDiv2;
                pt.fX -= plDebugText::Instance().CalcStringWidth( title ) >> 1;
                plDebugText::Instance().DrawString( (UInt16)pt.fX, (UInt16)pt.fY + 1, title, 255, 255, 255, 255, plDebugText::kStyleBold ); 

            if( plate->GetFlags() & plPlate::kFlagIsAGraph )
                plGraphPlate    *graph = (plGraphPlate *)plate;
                hsPoint3        pt, pt2;
                int             i;

                if( graph->GetLabelText( 0 )[ 0 ] != 0 )
                    /// Draw key
                    const char *str;

                    pt.Set( -0.5, -0.5, 0 );
                    pt = plate->GetTransform() * pt;
                    pt.fX = pt.fX * scrnWidthDiv2 + scrnWidthDiv2;
                    pt.fY = pt.fY * scrnHeightDiv2 + scrnHeightDiv2;
                    pt.fY += plDebugText::Instance().GetFontHeight();

                    UInt32 numLabels = graph->GetNumLabels();
                    if (numLabels > graph->GetNumColors())
                        numLabels = graph->GetNumColors();

                    for( i = 0; i < numLabels; i++ )
                        str = graph->GetLabelText( i );
                        if( str[ 0 ] == 0 )

                        pt2 = pt;
                        pt2.fX -= plDebugText::Instance().CalcStringWidth( str );
                        plDebugText::Instance().DrawString( (UInt16)pt2.fX, (UInt16)pt2.fY, str, 
                                                            graph->GetDataColor( i ), plDebugText::kStyleBold ); 
                        pt.fY += plDebugText::Instance().GetFontHeight();

    dxPipe->fCurrCullMode = ( dxPipe->fLayerState[0].fMiscFlags & hsGMatState::kMiscTwoSided ) ? D3DCULL_NONE : oldCullMode;
    fD3DDevice->SetRenderState( D3DRS_CULLMODE, dxPipe->fCurrCullMode );

// IDrawPlate ///////////////////////////////////////////////////////////////////////
// Render this plate, in as many passes as it takes.
void    plDXPipeline::IDrawPlate( plPlate *plate )
    int         i;
    hsGMaterial *material = plate->GetMaterial();
    D3DXMATRIX  mat;

    /// Set up the D3D transform directly
    IMatrix44ToD3DMatrix( mat, plate->GetTransform() );
    fD3DDevice->SetTransform( D3DTS_WORLD, &mat );
    mat = d3dIdentityMatrix;
    mat(1,1) = -1.0f;
    mat(2,2) = 2.0f;
    mat(2,3) = 1.0f;
    mat(3,2) = -2.0f;
    mat(3,3) = 0.0f;


    /// Draw the vertex buffer once for each material pass
    for( i = 0; i < material->GetNumLayers(); )
        // Stat gather adjust: since IHandleMaterial will count this in the stat gather,
        // artificially decrement here so that the plates don't skew the stat gathering
        // Taking this out. If the plates are causing more material changes, they should
        // show up in the stats. mf

        i = IHandleMaterial( material, i, nil );
        ISetShaders(nil, nil);

        // To override the transform done by the z-bias
        fD3DDevice->SetTransform( D3DTS_PROJECTION, &mat );
        // And this to override cullmode set based on material 2-sidedness.
        fD3DDevice->SetRenderState( D3DRS_CULLMODE, fCurrCullMode = D3DCULL_CW );

        WEAK_ERROR_CHECK( fD3DDevice->DrawPrimitive( D3DPT_TRIANGLESTRIP, 0, 2 ) );


//// Error Message Stuff //////////////////////////////////////////////////////

// IAddErrorMessage ////////////////////////////////////////////////////
// Append the error string to the current error string.
void    plDXPipeline::IAddErrorMessage( char *errStr )
    static char str[ 512 ];
    if( errStr && strlen( errStr ) + strlen( fSettings.fErrorStr ) < sizeof( fSettings.fErrorStr ) - 4 )
        strcpy( str, fSettings.fErrorStr );
        sprintf( fSettings.fErrorStr, "%s\n(%s)", errStr, str );
        plStatusLog::AddLineS("pipeline.log", fSettings.fErrorStr);

// ISetErrorMessage //////////////////////////////////////////////////////////
// Clear the current error string to the input string.
void    plDXPipeline::ISetErrorMessage( char *errStr )
    if( errStr )
        strcpy( fSettings.fErrorStr, errStr );
        plStatusLog::AddLineS("pipeline.log", fSettings.fErrorStr);
        fSettings.fErrorStr[ 0 ] = nil;

// IGetD3DError /////////////////////////////////////////////////////////////////
// Convert the last D3D error code to a string (probably "Conflicting Render State").
void    plDXPipeline::IGetD3DError()
    sprintf( fSettings.fErrorStr, "D3DError : %s", (char *)DXGetErrorString( fSettings.fDXError ) );

// IShowErrorMessage /////////////////////////////////////////////////////////////
// Append the string to the running error string.
void    plDXPipeline::IShowErrorMessage( char *errStr )
    if( errStr != nil )
        IAddErrorMessage( errStr );

//  hsAssert( false, fSettings.fErrorStr );

// ICreateFail ////////////////////////////////////////////////////////////////////
// Called on unrecoverable error during device creation. Frees up anything 
// allocated so far, sets the error string, and returns true.
hsBool  plDXPipeline::ICreateFail( char *errStr )
    // Don't overwrite any error string we already had
    if( fSettings.fErrorStr[ 0 ] == 0 )

    if( errStr && *errStr )
        IAddErrorMessage( errStr );
    else if( !*fSettings.fErrorStr )
        IAddErrorMessage( "unknown" );

    return true;

// GetErrorString ///////////////////////////////////////////////////////////////////////////
// Return the current error string.
const char  *plDXPipeline::GetErrorString()
    if( fSettings.fErrorStr[ 0 ] == 0 )
        return nil;

    return fSettings.fErrorStr;

//// Miscellaneous Utility Functions //////////////////////////////////////////

//// GetDXBitDepth //////////////////////////////////////////////////////////
//  From a D3DFORMAT enumeration, return the bit depth associated with it.

short   plDXPipeline::GetDXBitDepth( D3DFORMAT format )
    if( format == D3DFMT_UNKNOWN )
        return 0;
    else if( format == D3DFMT_R8G8B8 )
        return 24;
    else if( format == D3DFMT_A8R8G8B8 )
        return 32;
    else if( format == D3DFMT_X8R8G8B8 )
        return 32;
    else if( format == D3DFMT_R5G6B5 )
        return 16;
    else if( format == D3DFMT_X1R5G5B5 )
        return 16;
    else if( format == D3DFMT_A1R5G5B5 )
        return 16;
    else if( format == D3DFMT_A4R4G4B4 )
        return 16;
    else if( format == D3DFMT_R3G3B2 )
        return 8;
    else if( format == D3DFMT_A8 )
        return 8;
    else if( format == D3DFMT_A8R3G3B2 )
        return 16;
    else if( format == D3DFMT_X4R4G4B4 )
        return 16;
    else if( format == D3DFMT_A8P8 )
        return 16;
    else if( format == D3DFMT_P8 )
        return 8;
    else if( format == D3DFMT_L8 )
        return 8;
    else if( format == D3DFMT_A8L8 )
        return 16;
    else if( format == D3DFMT_A4L4 )
        return 8;
    else if( format == D3DFMT_V8U8 )
        return 16;
    else if( format == D3DFMT_L6V5U5 )
        return 16;
    else if( format == D3DFMT_X8L8V8U8 )
        return 32;
    else if( format == D3DFMT_Q8W8V8U8 )
        return 32;
    else if( format == D3DFMT_V16U16 )
        return 32;
//  else if( format == D3DFMT_W11V11U10 )
//      return 32;
    /* /// These formats really don't have bit depths associated with them
    else if( format == D3DFMT_D16_LOCKABLE )
        return 16;
    else if( format == D3DFMT_D32 )
        return 32;
    else if( format == D3DFMT_D15S1 )
        return 16;
    else if( format == D3DFMT_D24S8 )
        return 32;
    else if( format == D3DFMT_D16 )
        return 16;
    else if( format == D3DFMT_D24X8 )
        return 32;
    else if( format == D3DFMT_D24X4S4 )
        return 32;
    else if( format == D3DFMT_INDEX16 )
        return 16;
    else if( format == D3DFMT_INDEX32 )
        return 32;
    // Unsupported translation format--return 0
    return 0;

//// IGetDXFormatName ////////////////////////////////////////////////////////
//  From a D3DFORMAT enumeration, return the string for it.

const char  *plDXPipeline::IGetDXFormatName( D3DFORMAT format )
    switch( format )
        case D3DFMT_UNKNOWN: return "D3DFMT_UNKNOWN"; 
        case D3DFMT_R8G8B8: return "D3DFMT_R8G8B8";
        case D3DFMT_A8R8G8B8: return "D3DFMT_A8R8G8B8";
        case D3DFMT_X8R8G8B8: return "D3DFMT_X8R8G8B8";
        case D3DFMT_R5G6B5: return "D3DFMT_R5G6B5";
        case D3DFMT_X1R5G5B5: return "D3DFMT_X1R5G5B5";
        case D3DFMT_A1R5G5B5: return "D3DFMT_A1R5G5B5";
        case D3DFMT_A4R4G4B4: return "D3DFMT_A4R4G4B4";
        case D3DFMT_R3G3B2: return "D3DFMT_R3G3B2";
        case D3DFMT_A8: return "D3DFMT_A8";
        case D3DFMT_A8R3G3B2: return "D3DFMT_A8R3G3B2";
        case D3DFMT_X4R4G4B4: return "D3DFMT_X4R4G4B4";
        case D3DFMT_A8P8: return "D3DFMT_A8P8";
        case D3DFMT_P8: return "D3DFMT_P8";
        case D3DFMT_L8: return "D3DFMT_L8";
        case D3DFMT_A8L8: return "D3DFMT_A8L8";
        case D3DFMT_A4L4: return "D3DFMT_A4L4";
        case D3DFMT_V8U8: return "D3DFMT_V8U8";
        case D3DFMT_L6V5U5: return "D3DFMT_L6V5U5";
        case D3DFMT_X8L8V8U8: return "D3DFMT_X8L8V8U8";
        case D3DFMT_Q8W8V8U8: return "D3DFMT_Q8W8V8U8";
        case D3DFMT_V16U16: return "D3DFMT_V16U16";
        //case D3DFMT_W11V11U10: return "D3DFMT_W11V11U10";
        case D3DFMT_UYVY: return "D3DFMT_UYVY";
        case D3DFMT_YUY2: return "D3DFMT_YUY2";
        case D3DFMT_DXT1: return "D3DFMT_DXT1";    
//      case D3DFMT_DXT2: return "D3DFMT_DXT2";    
//      case D3DFMT_DXT3: return "D3DFMT_DXT3";    
//      case D3DFMT_DXT4: return "D3DFMT_DXT4";    
        case D3DFMT_DXT5: return "D3DFMT_DXT5";
        case D3DFMT_D16_LOCKABLE: return "D3DFMT_D16_LOCKABLE";
        case D3DFMT_D32: return "D3DFMT_D32";
        case D3DFMT_D15S1: return "D3DFMT_D15S1";
        case D3DFMT_D24S8: return "D3DFMT_D24S8";
        case D3DFMT_D16: return "D3DFMT_D16";
        case D3DFMT_D24X8: return "D3DFMT_D24X8";
        case D3DFMT_D24X4S4: return "D3DFMT_D24X4S4";
        case D3DFMT_INDEX16: return "D3DFMT_INDEX16";
        case D3DFMT_INDEX32: return "D3DFMT_INDEX32";
        default: return "Bad format";   

//// IFPUCheck ////////////////////////////////////////////////////////////////
//  Checks the FPU to make sure it's in the right mode
// This should return wSave to allow it to be restored after rendering.
// This is obsolete as of DX8
void    plDXPipeline::IFPUCheck()
    WORD    wSave, wTemp;
    __asm fstcw wSave
    if (wSave & 0x300 ||            // Not single mode
        0x3f != (wSave & 0x3f) ||   // Exceptions enabled
        wSave & 0xC00)              // Not round to nearest mode
            mov ax, wSave
            and ax, not 0x300    ;; single mode
            or  ax, 0x3f         ;; disable all exceptions
            and ax, not 0xC00   ;; round to nearest mode
            mov wTemp, ax
            fldcw   wTemp

// PushPiggyBackLayer /////////////////////////////////////////////////////
// Push a piggy back onto the stack.
plLayerInterface* plDXPipeline::PushPiggyBackLayer(plLayerInterface* li)


    fForceMatHandle = true;

    return li;

// PopPiggyBackLayer ///////////////////////////////////////////////////////////////////
// Pull the piggy back out of the stack (if it's there).
plLayerInterface* plDXPipeline::PopPiggyBackLayer(plLayerInterface* li)
    int idx = fPiggyBackStack.Find(li);
    if( fPiggyBackStack.kMissingIndex == idx )
        return nil;


    fForceMatHandle = true;

    return li;

// AppendLayerInterface ///////////////////////////////////////////////////////////////////
// Setup a layer wrapper to wrap around either all layers rendered with or just the base layers.
// Note that a single material has multiple base layers if it takes mutliple passes to render.
// Stays in effect until removed by RemoveLayerInterface.
plLayerInterface* plDXPipeline::AppendLayerInterface(plLayerInterface* li, hsBool onAllLayers)
    fForceMatHandle = true;
    if( onAllLayers )
        return fOverAllLayer = li->Attach(fOverAllLayer);
        return fOverBaseLayer = li->Attach(fOverBaseLayer);

// RemoveLayerInterface //////////////////////////////////////////////////////////////////
// Removes a layer wrapper installed by AppendLayerInterface.
plLayerInterface* plDXPipeline::RemoveLayerInterface(plLayerInterface* li, hsBool onAllLayers)
    fForceMatHandle = true;

    if( onAllLayers )
        if( !fOverAllLayer )
            return nil;
        return fOverAllLayer = fOverAllLayer->Remove(li);
    if( !fOverBaseLayer )
        return nil;

    return fOverBaseLayer = fOverBaseLayer->Remove(li);

//// ShadowSection
//// Shadow specific internal functions
// See plGLight/plShadowMaster.cpp for more notes.

// IAttachShadowsToReceivers ///////////////////////////////////////////////////////////
// For each active shadow map (in fShadows), attach it to all of the visible spans in drawable
// that it affects. Shadows explicitly attached via light groups are handled separately in ISetShadowFromGroup.
void plDXPipeline::IAttachShadowsToReceivers(plDrawableSpans* drawable, const hsTArray<Int16>& visList)
    int i;
    for( i = 0; i < fShadows.GetCount(); i++ )
        IAttachSlaveToReceivers(i, drawable, visList);

// IAttachSlaveToReceivers /////////////////////////////////////////////////////
// Find all the visible spans in this drawable affected by this shadow map, 
// and attach it to them.
void plDXPipeline::IAttachSlaveToReceivers(int which, plDrawableSpans* drawable, const hsTArray<Int16>& visList)
    plShadowSlave* slave = fShadows[which];

    // Whether the drawable is a character affects which lights/shadows affect it.
    hsBool isChar = drawable->GetNativeProperty(plDrawable::kPropCharacter);

    // If the shadow is part of a light group, it gets handled in ISetShadowFromGroup.
    // Unless the drawable is a character (something that moves around indeterminately, 
    // like the avatar or a physical object), and the shadow affects all characters.
    if( slave->ObeysLightGroups() && !(slave->IncludesChars() && isChar) )

    // Do a space tree harvest looking for spans that are visible and whose bounds
    // intercect the shadow volume.
    plSpaceTree* space = drawable->GetSpaceTree();

    static hsBitVector cache;
    space->EnableLeaves(visList, cache);

    static hsTArray<Int16> hitList;
    space->HarvestEnabledLeaves(slave->fIsect, cache, hitList);

    // For the visible spans that intercect the shadow volume, attach the shadow
    // to all appropriate for receiving this shadow map.
    int i;
    for( i = 0; i < hitList.GetCount(); i++ )
        const plSpan* span = drawable->GetSpan(hitList[i]);
        hsGMaterial* mat = drawable->GetMaterial(span->fMaterialIdx);

        // Check that the span isn't flagged as unshadowable, or has
        // a material that we can't shadow onto.
        if( !IReceivesShadows(span, mat) )

        // Check for self shadowing. If the shadow doesn't want self shadowing,
        // and the span is part of the shadow caster, then skip.
        if( !IAcceptsShadow(span, slave) )

        // Add it to this span's shadow list for this frame.


// ISetShadowFromGroup ////////////////////////////////////////////////////////////////////////
// The light casting this shadow has been explicitly attached to this span, so no need
// for checking bounds, but we do anyway because the artists aren't very conservative
// along those lines. The light has a bitvector indicating which of the current shadows
// are from it (there will be a shadow map for each shadow-light/shadow-caster pair),
// so we look through those shadow maps and if they are acceptable, attach them to
// the span.
// Note that a shadow slave corresponds to a shadow map.
void plDXPipeline::ISetShadowFromGroup(plDrawableSpans* drawable, const plSpan* span, plLightInfo* liInfo)
    hsGMaterial* mat = drawable->GetMaterial(span->fMaterialIdx);

    // Check that this span/material combo can receive shadows at all.
    if( !IReceivesShadows(span, mat) )

    const hsBitVector& slaveBits = liInfo->GetSlaveBits();
    int i;
    for( i = 0; i < fShadows.GetCount(); i++ )
        if( slaveBits.IsBitSet(fShadows[i]->fIndex) )
            // Check self shadowing.
            if( IAcceptsShadow(span, fShadows[i]) )
                // Check for overlapping bounds.
                if( fShadows[i]->fIsect->Test(span->fWorldBounds) != kVolumeCulled )

// SubmitShadowSlave ////////////////////////////////////////////////////////
// Puts the slave in a list valid for this frame only. The list will
// be preprocessed at BeginRender. See IPreprocessShadows.

void plDXPipeline::SubmitShadowSlave(plShadowSlave* slave)
    // Check that it's a valid slave.
    if( !(slave && slave->fCaster && slave->fCaster->GetKey()) )

    // A board with limited projection capability (i.e. GeForce1) can't 
    // do perspective shadows (from point source lights) because it
    // requires a count3 uvw on 2 texture units (0,1) simultaneously. Just skip.
    if( (fSettings.fLimitedProj || fSettings.fCantProj) && slave->fView.GetPerspective() )

    // Ref the shadow caster so we're sure it will still be around when we go to
    // render it.

    // Keep the shadow slaves in a priority sorted list. For performance reasons,
    // we may want only the strongest N or those of a minimum priority.
    int i;
    for( i = 0; i < fShadows.GetCount(); i++ )
        if( slave->fPriority <= fShadows[i]->fPriority )

    // Note that fIndex is no longer the index in the fShadows list, but
    // is still used as a unique identifier for this slave.
    slave->fIndex = fShadows.GetCount();
    fShadows.Insert(i, slave);

hsScalar blurScale = -1.f;
static  const int kL2NumSamples = 3; // Log2(4)

// IBlurShadowMap //////////////////////////////////////////////////////////////////
// For a shadow map, we've got a specific (non-general) blurring in mind.
// This could be used as a loose model for more general blurring, but you
// wouldn't want to run a generic texture or render target through this.
// Specifically, we assume:
//  Input: 
//      An RGBA rendertarget with an alpha we want to preserve, and color
//          going from black (unused) to white (written).
//      A blur factor
//  Output:
//      The rendertarget with alpha preserved, and the color channel blurred
//          appropriately.
//  We'll want to minimize our render target changes, so
//      we clear our scratch render target to black/white (color/alpha), then
//      render additively the color of our input with a zero alpha. The scratch
//      accumulates the color sum, but the alpha starts and stays saturated to 100%.
//      Then we modulate that back into the input, so the alpha is unchanged, the
//      color (within the white region) falls off at the edges. The color outside the
//      white region is black and stays black, but we don't care because we'll be ignoring
//      that anyway.
//  Notice that this depends on the input, each pixel having been all black or all "white".
// Also depends on "white" having 1/N premodulated in, where N is the number of samples.
//      That's why we can just sum up the colors, without needing to do a divide. Otherwise
//      we'd saturate at 255 during the sum, and the divide would be pointless.
// One other thing we're counting on here, is that we've just been rendering to an
//      offscreen, we're done, and we're about to pop our rendertarget, which is going
//      to reset a lot of render state that we would otherwise be responsible for here.
// We're hoping that this blur function (if efficient enough) can get called enough times
//      per frame to warrant the sins described above.
void plDXPipeline::IBlurShadowMap(plShadowSlave* slave)
    plRenderTarget* smap = (plRenderTarget*)slave->fPipeData;
    hsScalar scale = slave->fBlurScale;

    // Find a scratch rendertarget which matches the input.
    int which = IGetScratchRenderTarget(smap);
    plRenderTarget* scratchRT = fBlurScratchRTs[which];
    if( !scratchRT )
    plRenderTarget* destRT = fBlurDestRTs[which];
    if( !destRT )

    // Set up to render into it.

    // Clear it appropriately
    fD3DDevice->Clear(0, nil, D3DCLEAR_TARGET, 0xff000000L, 1.0f, 0L);

    // Setup our quad for rendering

    // Render the input image into the scratch image, creating the blur.
    IRenderBlurFromShadowMap(scratchRT, smap, scale);

    // Set the rendertarget back to src
    // Setup renderstate to render it back modulating.
    // Render the scratch back into the input.
    IRenderBlurBackToShadowMap(smap, scratchRT, destRT);

    // dst is now now slave's rendertarget and smap is the new scratch dst
    // for this size.
    slave->fPipeData = (void*)destRT;
    fBlurDestRTs[which] = smap;

// IGetScratchRenderTarget ////////////////////////////////////////////
// Look for a render target for as scratch space for blurring the input render target.
// Note that the whole blur process requires 3 render targets, the source,
// an intermediate, and the destination (which gets swapped with the source).
// But that's only an extra 2 render targets for all shadow maps of a given
// size.
// Note also that the intermediate is one size smaller than the source,
// to get better blurring through bilerp magnification.
int plDXPipeline::IGetScratchRenderTarget(plRenderTarget* smap)
    int which = -1;
    case 512:
        which = 9;
    case 256:
        which = 8;
    case 128:
        which = 7;
    case 64:
        which = 6;
    case 32:
        which = 5;
        return false;
    if( !fBlurScratchRTs[which] )
        // We may or may not get back the size we requested here, but if we didn't,
        // we aren't going to later, so we might as well stuff the smaller render target
        // into the bigger slot. Bad thing is that we might want a smaller render target
        // later, and we won't know to look in the bigger slot for it, so we could wind
        // up using say two 128x128's (one in the 256 slot, one in the 128 slot). 
        // This intermediate is one power of 2 smaller than the source.
        UInt32 width = smap->GetWidth();
        UInt32 height = smap->GetHeight();
        if( width > 32 )
            width >>= 1;
            height >>= 1;
        fBlurScratchRTs[which] = IFindRenderTarget(width, height, smap->GetFlags() & plRenderTarget::kIsOrtho);
    if( !fBlurDestRTs[which] )
        // Destination is same size as source.
        UInt32 width = smap->GetWidth();
        UInt32 height = smap->GetHeight();
        fBlurDestRTs[which] = IFindRenderTarget(width, height, smap->GetFlags() & plRenderTarget::kIsOrtho);
    if( hackOffscreens.kMissingIndex == hackOffscreens.Find(fBlurScratchRTs[which]) )
    if( hackOffscreens.kMissingIndex == hackOffscreens.Find(fBlurDestRTs[which]) )
    return which;

// IBlurSetRenderTarget /////////////////////////////////////////////////////////////////////
// Set the input render target up to be rendered into. This abbreviated version
// of PushRenderTarget is possible because of the special case of the state coming
// in, and that we know we're going to immediately pop the previous render target
// when we're done.
void plDXPipeline::IBlurSetRenderTarget(plRenderTarget* rt)
    plDXRenderTargetRef* ref = (plDXRenderTargetRef *)rt->GetDeviceRef();
    // Set the rendertarget
    IDirect3DSurface9* main = ref->GetColorSurface();
    IDirect3DSurface9* depth = ref->fD3DDepthSurface;

    fSettings.fCurrD3DMainSurface = main;
    fSettings.fCurrD3DDepthSurface = depth;
    fD3DDevice->SetRenderTarget(0, main);

    // Now set the correct viewport
    D3DVIEWPORT9 vp = { 0,
                        0.f, 1.f };

    WEAK_ERROR_CHECK( fD3DDevice->SetViewport( &vp ) );

// IRenderBlurFromShadowMap ////////////////////////////////////////////////////////////////////////////////
// Render a shadow map into a scratch render target multiple times offset slightly to create a blur
// in the color, preserving alpha exactly. It's just rendering a single quad with slight offsets
// in the UVW transform.
void plDXPipeline::IRenderBlurFromShadowMap(plRenderTarget* scratchRT, plRenderTarget* smap, hsScalar scale)
    // Quad is set up in camera space.
    fD3DDevice->SetTransform(D3DTS_VIEW, &d3dIdentityMatrix);
    fD3DDevice->SetTransform(D3DTS_WORLD, &d3dIdentityMatrix);
    fD3DDevice->SetTransform(D3DTS_PROJECTION, &d3dIdentityMatrix);

    // Figure out how many passes we'll need.
//  const int kNumSamples = 1 << kL2NumSamples; // HACKSAMPLE
    const int kNumSamples = mfCurrentTest > 101 ? 8 : 4;
    int nPasses = (int)hsCeil(float(kNumSamples) / fSettings.fMaxLayersAtOnce);
    int nSamplesPerPass = kNumSamples / nPasses;

    // Attenuate by number of passes, to average as we sum.
    DWORD atten = 255 / nPasses;
    plConst(float) kAtten(1.f);
    atten = DWORD(atten * kAtten);
    atten = (atten << 24)
        | (atten << 16)
        | (atten << 8)
        | (atten << 0);

    // Disable skinning
    fD3DDevice->SetRenderState(D3DRS_VERTEXBLEND, D3DVBF_DISABLE);
    //  AlphaEnable = true
    //  AlphaTest OFF
    fD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE);
    fD3DDevice->SetRenderState(D3DRS_SRCBLEND,  D3DBLEND_SRCALPHA);
    fD3DDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ONE);
    fD3DDevice->SetRenderState(D3DRS_ALPHAFUNC, D3DCMP_ALWAYS);

    //  ZBUFFER disabled
    fD3DDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_ALWAYS);
    fD3DDevice->SetRenderState(D3DRS_ZWRITEENABLE, FALSE);
    fLayerState[0].fZFlags &= ~hsGMatState::kZMask;
    fLayerState[0].fZFlags |= hsGMatState::kZNoZWrite | hsGMatState::kZNoZRead;
    //  Cullmode is NONE
    fCurrCullMode = D3DCULL_NONE; 
    fD3DDevice->SetRenderState( D3DRS_CULLMODE, fCurrCullMode );

    plDXTextureRef* ref = (plDXTextureRef*)smap->GetDeviceRef();
    hsAssert(ref, "Shadow map ref should have been made when it was rendered");
    if( !ref )

    // TFactor contains the attenuation
    fD3DDevice->SetRenderState(D3DRS_TEXTUREFACTOR, atten);

    // Set the N texture stages all to use the same
    // src rendertarget texture.
    // Blend modes are:
    //  Stage0:
    //      Color
    //      Arg1 = texture
    //      Op = selectArg1
    //      Alpha
    //      Arg1 = TFACTOR = white
    //      Op = selectArg1
    //  Stage[1..n-1]
    //      Color
    //      Arg1 = texture
    //      Arg2 = current
    //      Op = AddSigned
    //      Alpha
    //      Arg1 = texture
    //      Arg2 = current
    //      Op = SelectArg2
    //  StageN
    //      Color/Alpha
    //      Op = disable
    // Frame buffer blend is
    //      SRCBLEND = ONE
    //      DSTBLEND = ONE
    //  All texture stages are clamped
    // Set stage0, then loop over the rest
    fD3DDevice->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
    fD3DDevice->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
    fLayerState[0].fClampFlags = hsGMatState::kClampTexture;

    fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG1, D3DTA_TEXTURE);
    fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP,   D3DTOP_SELECTARG1);

    fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAARG1, D3DTA_TFACTOR); 
    fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG1);
    fLayerState[0].fBlendFlags = UInt32(-1);

    hsRefCnt_SafeAssign( fLayerRef[0], ref );
    fD3DDevice->SetTexture( 0, ref->fD3DTexture );

    if( D3DTTFF_COUNT2 != fLayerXformFlags[0] )
        fLayerXformFlags[0] = D3DTTFF_COUNT2;
        fD3DDevice->SetTextureStageState(0, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTTFF_COUNT2);
    fD3DDevice->SetTextureStageState(0, D3DTSS_TEXCOORDINDEX, 0);
    fLayerUVWSrcs[0] = 0;

    int i;
    for( i = 1; i < nSamplesPerPass; i++ )
        fD3DDevice->SetSamplerState(i, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
        fD3DDevice->SetSamplerState(i, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
        fLayerState[i].fClampFlags = hsGMatState::kClampTexture;

        fD3DDevice->SetTextureStageState(i, D3DTSS_COLORARG1, D3DTA_TEXTURE);
        fD3DDevice->SetTextureStageState(i, D3DTSS_COLORARG2, D3DTA_CURRENT);
        fD3DDevice->SetTextureStageState(i, D3DTSS_COLOROP,   D3DTOP_ADDSIGNED);

        fD3DDevice->SetTextureStageState(i, D3DTSS_ALPHAARG1, D3DTA_TEXTURE); 
        fD3DDevice->SetTextureStageState(i, D3DTSS_ALPHAARG2, D3DTA_CURRENT);
        fD3DDevice->SetTextureStageState(i, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2);
        fLayerState[i].fBlendFlags = UInt32(-1);

        hsRefCnt_SafeAssign( fLayerRef[i], ref );
        fD3DDevice->SetTexture( i, ref->fD3DTexture );

        if( D3DTTFF_COUNT2 != fLayerXformFlags[i] )
            fLayerXformFlags[i] = D3DTTFF_COUNT2;
            fD3DDevice->SetTextureStageState(i, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTTFF_COUNT2);
        fD3DDevice->SetTextureStageState(i, D3DTSS_TEXCOORDINDEX, 0);
        fLayerUVWSrcs[i] = 0;
    fD3DDevice->SetTextureStageState(nSamplesPerPass, D3DTSS_COLOROP, D3DTOP_DISABLE);
    fD3DDevice->SetTextureStageState(nSamplesPerPass, D3DTSS_ALPHAOP, D3DTOP_DISABLE);

    // N offsets are { (-1,-1), (1, -1), (1, 1), (-1, 1) } * offsetScale / size, with
    // useful offsetScales probably going from 0.5 to 1.5, but we'll just have
    // to experiment and see. Larger values likely to require more than the current
    // 4 samples
    struct offsetStruct
        float   fU;
        float   fV;
    offsetStruct offsetScale = { scale / scratchRT->GetWidth(), scale / scratchRT->GetHeight() };
    static offsetStruct offsets[8] = {
        {-1.f,  -1.f},
        {1.f,   -1.f},
        {1.f,   1.f},
        {-1.f,  1.f},
        {0.f,   -0.5f},
        {0.f,   0.5f},
        {-0.5f, 0.f},
        {0.5f,  0.f}

    int iSample = 0;
    // For each pass, 
    for( i = 0; i < nPasses; i++ )
        // Set the N texture stage uv transforms to the
        // next N offsets.
        int j;
        for( j = 0; j < nSamplesPerPass; j++ )
            D3DXMATRIX offXfm = d3dIdentityMatrix;
            offXfm(2,0) = offsets[iSample].fU * offsetScale.fU;
            offXfm(2,1) = offsets[iSample].fV * offsetScale.fV;
            fD3DDevice->SetTransform(sTextureStages[j], &offXfm);
            fLayerTransform[j] = true;

        // Render our quad
        fD3DDevice->DrawPrimitive(D3DPT_TRIANGLESTRIP, 0, 2);

//      fD3DDevice->SetRenderState(D3DRS_TEXTUREFACTOR, 0L);
//      fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG1, D3DTA_TEXTURE);
//      fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG2, D3DTA_TFACTOR);
//      fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP,   D3DTOP_ADDSIGNED);
//      fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP,   D3DTOP_SELECTARG2);


// IRenderBlurBackToShadowMap /////////////////////////////////////////////////////////////////////
// Render our intermediate blurred map back into a useable shadow map.
void plDXPipeline::IRenderBlurBackToShadowMap(plRenderTarget* smap, plRenderTarget* scratch, plRenderTarget* dst)
    // Set the rendertarget

    // Clear it appropriately. This might not be necessary, since we're just going to overwrite.
    fD3DDevice->Clear(0, nil, D3DCLEAR_TARGET, 0xff000000L, 1.0f, 0L);

    // Scratch has an all white alpha, and the blurred color from smap. But the color
    // is a signed biased color. We need to remap [128..255] from scratch into [0..255]
    // on dst. Plus, we need to copy the alpha as is from smap into dst.
    // So, scratch is texture0, smap is texture1. TFACTOR is 0.
    // Color is ADDSIGNED2X(TFACTOR, texture0).
    // Alpha is SELECTARG1(texture1, current).
    // Then FB blend is just opaque copy.

    // Set Stage0 texture transform
    // Clamp still on (from RBFSM)
    D3DXMATRIX offXfm = d3dIdentityMatrix;
    fD3DDevice->SetTransform(sTextureStages[0], &offXfm);
    fD3DDevice->SetTransform(sTextureStages[1], &offXfm);
    fLayerTransform[0] = false;
    fLayerTransform[1] = false;

    plDXTextureRef* ref = (plDXTextureRef*)scratch->GetDeviceRef();
    hsAssert(ref, "Blur scratch map ref should have been made when it was rendered");
    if( !ref )
    hsRefCnt_SafeAssign( fLayerRef[0], ref );
    fD3DDevice->SetTexture( 0, ref->fD3DTexture );

    ref = (plDXTextureRef*)smap->GetDeviceRef();
    hsAssert(ref, "Blur src map ref should have been made when it was rendered");
    if( !ref )
    hsRefCnt_SafeAssign( fLayerRef[1], ref );
    fD3DDevice->SetTexture( 1, ref->fD3DTexture );

    // Stage0:
    //      Color
    //      Arg1 = TFACTOR = black
    //      Arg2 = texture
    //      Op = ADDSIGNED2X
    //      Alpha
    //      Arg1 = texture
    //      Op = selectArg1
    //  Texture = scratch
    // Stage1:
    //      Color
    //      Arg1 = texture
    //      Arg2 = current
    //      Op = selectArg2
    //      Alpha
    //      Arg1 = texture
    //      Op = selectArg1
    //  Texture = smap
    // FB blend 
    //      SRCBLEND = ONE
    //      DSTBLEND = ZERO

    fD3DDevice->SetRenderState(D3DRS_TEXTUREFACTOR, 0L);
    fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG1, D3DTA_TFACTOR);
    fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG2, D3DTA_TEXTURE);
    fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP,   D3DTOP_ADDSIGNED2X);

    // This alpha will be ignored, because in the next stage we select texture alpha.
    fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAARG1, D3DTA_TEXTURE); 
    fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG1);

    fLayerState[0].fBlendFlags = UInt32(-1);

    fD3DDevice->SetTextureStageState(1, D3DTSS_COLORARG2, D3DTA_CURRENT);
    fD3DDevice->SetTextureStageState(1, D3DTSS_COLOROP,   D3DTOP_SELECTARG2);

    fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAARG1, D3DTA_TEXTURE); 
    fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG1);

    fLayerState[1].fBlendFlags = UInt32(-1);

    fD3DDevice->SetTextureStageState(2, D3DTSS_COLOROP, D3DTOP_DISABLE);
    fD3DDevice->SetTextureStageState(2, D3DTSS_ALPHAOP, D3DTOP_DISABLE);

    fLastEndingStage = 2;

    fD3DDevice->SetRenderState(D3DRS_SRCBLEND,  D3DBLEND_ONE);
    fD3DDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ZERO);

    // Our quad should still be setup to go.
    fD3DDevice->DrawPrimitive(D3DPT_TRIANGLESTRIP, 0, 2);


struct plShadowVertStruct
    float       fPos[3];
    float       fUv[2];

// IReleaseBlurVBuffers //////////////////////////////////////////////////////////
// Free up our blur quad vertex buffers. Note these are in POOL_DEFAULT
void plDXPipeline::IReleaseBlurVBuffers()
    const UInt32 kVSize = sizeof(plShadowVertStruct);
    int i;
    for( i = 0; i < kMaxRenderTargetNext; i++ )
        if (fBlurVBuffers[i])
            PROFILE_POOL_MEM(D3DPOOL_DEFAULT, 4 * kVSize, false, "BlurVtxBuff");
            fBlurVBuffers[i] = nil;

// ICreateBlurVBuffers //////////////////////////////////////////////////////////////////
// We need a quad for each size of shadow map, because there's a slight dependency
// of UVW coordinates on size of render target. Sucks but it's true.
hsBool plDXPipeline::ICreateBlurVBuffers()
    // vertex size is 4 verts, with 4 floats each for position, and 2 floats each for uv.
    const UInt32 kVSize = sizeof(plShadowVertStruct);
    const UInt32 kVFormat = D3DFVF_XYZ | D3DFVF_TEX1 | D3DFVF_TEXCOORDSIZE2(0) ;

    int i;
    for( i = 0; i < kMaxRenderTargetNext; i++ )
        int width = 0;
        int height = 0;
        int which = -1;
        switch( i )
        case 0:
        case 1:
        case 2:
        case 3:
        case 4:
        case 5:
            width = height = 1 << i;
            which = i;
        case 6:
            width = height = 1 << i;
            which = i;
        case 7:
            width = height = 1 << i;
            which = i;
        case 8:
            width = height = 1 << i;
            which = i;
        case 9:
            width = height = 1 << i;
            which = i;
        if( which < 0 )

        // positions are { (-0.5,-0.5,0,1), (w-0.5,-0.5,0,1), (w-0.5,h-0.5,0,1), (-0.5,h-0.5,0,1) }
        // UVs are { (0,0), (1,0), (1,1), (0,1) }
        // So we won't have to bother with indices, we'll put them in as
        // p1, p2, p0, p3 and render tristrip

        // Create the buffer.
        IDirect3DVertexBuffer9* vBuffer = nil;

        UInt32 fvfFormat = kVFormat;
        hsAssert(!ManagedAlloced(), "Alloc default with managed alloc'd");
        if( FAILED( fD3DDevice->CreateVertexBuffer( 4 * kVSize,
                                                    &vBuffer, NULL) ) )
            hsAssert( false, "CreateVertexBuffer() call failed!" );
            return false;
        plShadowVertStruct* ptr = nil;

        /// Lock the buffer and fill it in.
        if( FAILED( vBuffer->Lock( 0, 0, (void **)&ptr, 0 ) ) )
            hsAssert( false, "Failed to lock vertex buffer for writing" );
            return false;
        PROFILE_POOL_MEM(D3DPOOL_DEFAULT, 4 * kVSize, true, "BlurVtxBuff");

        plShadowVertStruct vert;
        vert.fPos[0] = -1.f;
        vert.fPos[1] = -1.f;
        vert.fPos[2] = 0.5f;

        vert.fUv[0] = 0.5f / width;
        vert.fUv[1] = 1.f + 0.5f / height;

        // P0
        ptr[2] = vert;

        // P1
        ptr[0] = vert;
        ptr[0].fPos[0] += 2.f;
        ptr[0].fUv[0] += 1.f;

        // P2
        ptr[1] = vert;
        ptr[1].fPos[0] += 2.f;
        ptr[1].fUv[0] += 1.f;
        ptr[1].fPos[1] += 2.f;
        ptr[1].fUv[1] -= 1.f;

        // P3
        ptr[3] = vert;
        ptr[3].fPos[1] += 2.f;
        ptr[3].fUv[1] -= 1.f;


        fBlurVBuffers[which] = vBuffer;
    return true;

// ISetBlurQuadToRender ////////////////////////////////////////////////////
// Select the appropriate blur quad (based on size of shadow map) and set it up to render.
hsBool plDXPipeline::ISetBlurQuadToRender(plRenderTarget* smap)
    const UInt32 kVSize = sizeof(plShadowVertStruct);
    const UInt32 kVFormat = D3DFVF_XYZ | D3DFVF_TEX1 | D3DFVF_TEXCOORDSIZE2(0) ;

    // Each vb will be rendertarget size specific, so select one based on input rendertarget
    int which = -1;
    case 512:
        which = 9;
    case 256:
        which = 8;
    case 128:
        which = 7;
    case 64:
        which = 6;
    case 32:
        which = 5;
        return false;

    // If we haven't created (or have lost) our d3d resources, make them
    IDirect3DVertexBuffer9* vBuffer = fBlurVBuffers[which];
    if( !vBuffer )
        vBuffer = fBlurVBuffers[which];
        hsAssert(vBuffer, "AllocBlurVBuffers failed");

    HRESULT r = fD3DDevice->SetVertexShader(fSettings.fCurrVertexShader = NULL);
    fD3DDevice->SetFVF(fSettings.fCurrFVFFormat = kVFormat);
    hsAssert( r == D3D_OK, "Error trying to set the vertex shader!" );

    fSettings.fCurrVertexBuffRef = nil;

    r = fD3DDevice->SetStreamSource(0, vBuffer, 0, kVSize);

    // No SetIndices, we'll do a direct DrawPrimitive (not DrawIndexedPrimitive)

    // No transforms, we're supplying screen ready verts.

    return true;

// IRenderShadowCasterSpan //////////////////////////////////////////////////////////////////////
// Render the span into a rendertarget of the correct size, generating
// a depth map from this light to that span.
void plDXPipeline::IRenderShadowCasterSpan(plShadowSlave* slave, plDrawableSpans* drawable, const plIcicle& span)
    // Check that it's ready to render.
    ICheckDynBuffers(drawable, drawable->GetBufferGroup(span.fGroupIdx), &span);

    plDXVertexBufferRef*    vRef = (plDXVertexBufferRef *)drawable->GetVertexRef(span.fGroupIdx, span.fVBufferIdx);
    plDXIndexBufferRef* iRef = (plDXIndexBufferRef *)drawable->GetIndexRef(span.fGroupIdx, span.fIBufferIdx);

    HRESULT     r;

    if( vRef->fD3DBuffer == nil || iRef->fD3DBuffer == nil )
        hsAssert( false, "Trying to render a nil buffer pair!" );

    /// Switch to the vertex buffer we want
    if( fSettings.fCurrVertexBuffRef != vRef )
        hsRefCnt_SafeAssign( fSettings.fCurrVertexBuffRef, vRef );
        hsAssert( vRef->fD3DBuffer != nil, "Trying to render a buffer pair without a vertex buffer!" );

    if( vRef->RebuiltSinceUsed() )
        r = fD3DDevice->SetStreamSource( 0, vRef->fD3DBuffer, 0, vRef->fVertexSize );
        hsAssert( r == D3D_OK, "Error trying to set the stream source!" );

        fSettings.fCurrFVFFormat = IGetBufferD3DFormat(vRef->fFormat);
        r = fD3DDevice->SetVertexShader(fSettings.fCurrVertexShader = NULL);
        hsAssert( r == D3D_OK, "Error trying to set the vertex shader!" );



    if( fSettings.fCurrIndexBuffRef != iRef )
        hsRefCnt_SafeAssign( fSettings.fCurrIndexBuffRef, iRef );
        hsAssert( iRef->fD3DBuffer != nil, "Trying to render with a nil index buffer" );

    if( iRef->RebuiltSinceUsed() )
        r = fD3DDevice->SetIndices( iRef->fD3DBuffer );
        hsAssert( r == D3D_OK, "Error trying to set the indices!" );

    UInt32                  vStart = span.fVStartIdx;
    UInt32                  vLength = span.fVLength;
    UInt32                  iStart = span.fIPackedIdx;
    UInt32                  iLength= span.fILength;

    plRenderTriListFunc render(fD3DDevice, iRef->fOffset, vStart, vLength, iStart, iLength/3);

    static hsMatrix44 emptyMatrix;
    hsMatrix44 m = emptyMatrix;

    ISetupTransforms(drawable, span, m);

    hsBool flip = slave->ReverseCull();


// IGetULutTextureRef ///////////////////////////////////////////////////////////
// The ULut just translates a U coordinate in range [0..1] into 
// color and alpha of U * 255.9f. We just have the one we keep
// lying around.
plDXTextureRef* plDXPipeline::IGetULutTextureRef()
    const int width = 256;
    const int height = 1;
    if( !fULutTextureRef )
        UInt32* tData = TRACKED_NEW UInt32[width * height];

        UInt32* pData = tData;
        int j;
        for( j = 0; j < height; j++ )
            int i;
            for( i = 0; i < width; i++ )
                *pData = (i << 24)
                    | (i << 16)
                    | (i << 8)
                    | (i << 0);

        plDXTextureRef* ref = TRACKED_NEW plDXTextureRef( D3DFMT_A8R8G8B8, 
                                              1, // Num mip levels
                                              width, height, // width by height
                                              width * height, // numpix
                                              width*height*sizeof(UInt32), // totalsize
                                              nil, // levels data
                                              false // externData

        fULutTextureRef = ref;
    return fULutTextureRef;

// IFindRenderTarget //////////////////////////////////////////////////////////////////
// Find a matching render target from the pools. We prefer the requested size, but
// will look for a smaller size if there isn't one available.
// Param ortho indicates whether it will be used for orthogonal projection as opposed
// to perspective (directional light vs. point light), but is no longer used.
plRenderTarget* plDXPipeline::IFindRenderTarget(UInt32& width, UInt32& height, hsBool ortho)
    hsTArray<plRenderTarget*>* pool = nil;
    UInt32* iNext = nil;
    case 512:
        pool = &fRenderTargetPool512;
        iNext = &fRenderTargetNext[9];
    case 256:
        pool = &fRenderTargetPool256;
        iNext = &fRenderTargetNext[8];
    case 128:
        pool = &fRenderTargetPool128;
        iNext = &fRenderTargetNext[7];
    case 64:
        pool = &fRenderTargetPool64;
        iNext = &fRenderTargetNext[6];
    case 32:
        pool = &fRenderTargetPool32;
        iNext = &fRenderTargetNext[5];
        return nil;
    plRenderTarget* rt = (*pool)[*iNext];
    if( !rt )
        // We didn't find one, try again the next size down.
        if( height > 32 )
            return IFindRenderTarget(width >>= 1, height >>= 1, ortho);

        // We must be totally out. Oh well.
        return nil;

    return rt;

// IPushShadowCastState ////////////////////////////////////////////////////////////////////////////////
// Push all the state necessary to start rendering this shadow map, but independent of the
// actual shadow caster to be rendered into the map.
hsBool plDXPipeline::IPushShadowCastState(plShadowSlave* slave)
    plRenderTarget* renderTarg = IFindRenderTarget(slave->fWidth, slave->fHeight, slave->fView.GetOrthogonal());
    if( !renderTarg )
        return false;

    // Let the slave setup the transforms, viewport, etc. necessary to render it's shadow
    // map. This just goes into a plViewTransform, we translate that into D3D state ourselves below.
    if (!slave->SetupViewTransform(this))
        return false;

    // Turn off fogging and specular.
    fD3DDevice->SetRenderState(D3DRS_FOGENABLE, FALSE);
    fCurrFog.fEnvPtr = nil;
    fD3DDevice->SetRenderState(D3DRS_SPECULARENABLE, FALSE);
    fLayerState[0].fShadeFlags &= ~hsGMatState::kShadeSpecular;

    // Push the shadow slave's view transform as our current render state.
    fView.fCullMaxNodes = 0;

    // Push the shadow map as the current render target

    // We'll be rendering the light space distance to the span fragment into
    // alpha (color is white), so our camera space position, transformed into light space
    // and then converted to [0..255] via our ULut.

    // For stage 0:
    // Set uvw src
    UInt32 xformFlags = D3DTTFF_COUNT3;

    if( xformFlags != fLayerXformFlags[0] )
        fLayerXformFlags[0] = xformFlags;
        fD3DDevice->SetTextureStageState(0, D3DTSS_TEXTURETRANSFORMFLAGS, xformFlags);

    // Set texture transform to slave's lut transform. See plShadowMaster::IComputeLUT().
    hsMatrix44 castLUT = slave->fCastLUT;
    if( slave->fFlags & plShadowSlave::kCastInCameraSpace )
        hsMatrix44 c2w = GetCameraToWorld();

        castLUT = castLUT * c2w;

    D3DXMATRIX tXfm;
    IMatrix44ToD3DMatrix(tXfm, castLUT);

    fD3DDevice->SetTransform( sTextureStages[0], &tXfm );
    fLayerTransform[0] = true;

    // Set texture to clamp
    fD3DDevice->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
    fD3DDevice->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
    fLayerState[0].fClampFlags = hsGMatState::kClampTexture;

    DWORD clearColor = 0xff000000L;
//  const int l2NumSamples = kL2NumSamples; // HACKSAMPLE
    const int l2NumSamples = mfCurrentTest > 101 ? 3 : 2;
    DWORD intens;
    if( slave->fBlurScale > 0 )
        const int kNumSamples = mfCurrentTest > 101 ? 8 : 4;
        int nPasses = (int)hsCeil(float(kNumSamples) / fSettings.fMaxLayersAtOnce);
        int nSamplesPerPass = kNumSamples / nPasses;
        DWORD k = int(128.f / float(nSamplesPerPass));
        intens = (0xff << 24)
            | ((128 + k) << 16)
            | ((128 + k) << 8)
            | ((128 + k) << 0);
        clearColor = (0xff << 24)
            | ((128 - k) << 16)
            | ((128 - k) << 8)
            | ((128 - k) << 0);
        intens = 0xffffffff;

    // Note that we discard the shadow caster's alpha here, although we don't
    // need to. Even on a 2 texture stage system, we could include the diffuse
    // alpha and the texture alpha from the base texture. But we don't.

    // Set color to white. We could accomplish this easier by making the color
    // in our ULut white.
    fD3DDevice->SetRenderState(D3DRS_TEXTUREFACTOR, intens);

    fSettings.fVeryAnnoyingTextureInvalidFlag = true;
    fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG1, D3DTA_TFACTOR);
    fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP,   D3DTOP_SELECTARG1);

    fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAARG1, D3DTA_TEXTURE); 
    fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG1);
    fLayerState[0].fBlendFlags = UInt32(-1);

    // For stage 1 - disable
    fLastEndingStage = 1;
    fD3DDevice->SetTextureStageState(1, D3DTSS_COLOROP, D3DTOP_DISABLE);
    fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAOP, D3DTOP_DISABLE);
    fLayerState[1].fBlendFlags = UInt32(-1);

    // Set texture to U_LUT
    plDXTextureRef* ref = IGetULutTextureRef();

    if( !ref->fD3DTexture )
        if( ref->fData )
            IReloadTexture( ref );

    hsRefCnt_SafeAssign( fLayerRef[0], ref );
    fD3DDevice->SetTexture( 0, ref->fD3DTexture );

    fD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE);
    fD3DDevice->SetRenderState(D3DRS_SRCBLEND,  D3DBLEND_ONE);
    fD3DDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ZERO);

    fD3DDevice->SetRenderState(D3DRS_ALPHAFUNC, D3DCMP_ALWAYS);

    slave->fPipeData = renderTarg;

    // Enable ZBuffering w/ write
    fD3DDevice->SetRenderState(D3DRS_ZWRITEENABLE, TRUE);
    fLayerState[0].fZFlags &= ~hsGMatState::kZMask;

    // Clear the render target:
    // alpha to white ensures no shadow where there's no caster
    // color to black in case we ever get blurring going
    // Z to 1
    // Stencil ignored
    if( slave->ReverseZ() )
        fD3DDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_GREATEREQUAL);
        fD3DDevice->Clear(0, nil, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, clearColor, 0.0f, 0L);
        fD3DDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_LESSEQUAL);
        fD3DDevice->Clear(0, nil, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, clearColor, 1.0f, 0L);

    // Bring the viewport in (AFTER THE CLEAR) to protect the alpha boundary.
    fView.fTransform.SetViewPort(1, 1, (float)(slave->fWidth-2), (float)(slave->fHeight-2), false);


    // See ISetupShadowLight below for how the shadow light is used.
    // The shadow light isn't used in generating the shadow map, it's used
    // in projecting the shadow map onto the scene.

    return true;

// ISetupShadowLight //////////////////////////////////////////////////////////////////
// We use the shadow light to modulate the shadow effect in two ways while 
// projecting the shadow map onto the scene.
// First, the intensity of the shadow follows the N dot L of the light on
// the surface being projected onto. So on a sphere, the darkening effect
// of the shadow will fall off as the normals go from pointing to the light to
// pointing 90 degrees off.
// Second, we attenuate the whole shadow effect through the lights diffuse color.
// We attenuate for different reasons, like the intensity of the light, or
// to fade out a shadow as it gets too far in the distance to matter.
void plDXPipeline::ISetupShadowLight(plShadowSlave* slave)
    plDXLightRef* lRef = INextShadowLight(slave);

        = lRef->fD3DInfo.Diffuse.g 
        = lRef->fD3DInfo.Diffuse.b
        = slave->fPower;

    slave->fSelfShadowOn = false;

    if( slave->Positional() )
        hsPoint3 position = slave->fLightPos;
        lRef->fD3DInfo.Position.x = position.fX;
        lRef->fD3DInfo.Position.y = position.fY;
        lRef->fD3DInfo.Position.z = position.fZ;

        const float maxRange = 32767.f;
        lRef->fD3DInfo.Range = maxRange;
        lRef->fD3DInfo.Attenuation0 = 1.f;
        lRef->fD3DInfo.Attenuation1 = 0;
        lRef->fD3DInfo.Attenuation2 = 0;

        lRef->fD3DInfo.Type = D3DLIGHT_POINT;
        hsVector3 dir = slave->fLightDir;
        lRef->fD3DInfo.Direction.x = dir.fX;
        lRef->fD3DInfo.Direction.y = dir.fY;
        lRef->fD3DInfo.Direction.z = dir.fZ;

        lRef->fD3DInfo.Type = D3DLIGHT_DIRECTIONAL;

    fD3DDevice->SetLight( lRef->fD3DIndex, &lRef->fD3DInfo );

    slave->fLightIndex = lRef->fD3DIndex;

// INextShadowLight /////////////////////////////////////////////////////
// Get a scratch light for this shadow slave and assign it. The slave
// only keeps it for this render frame.
plDXLightRef* plDXPipeline::INextShadowLight(plShadowSlave* slave)

    if( !fLights.fShadowLights[fLights.fNextShadowLight] )
        plDXLightRef    *lRef = TRACKED_NEW plDXLightRef();
        /// Assign stuff and update
        lRef->fD3DIndex = fLights.ReserveD3DIndex();
        lRef->fOwner = nil;
        lRef->fD3DDevice = fD3DDevice;

        lRef->Link( &fLights.fRefList );

        fLights.fShadowLights[fLights.fNextShadowLight] = lRef;

        // Neutralize it until we need it.
        fD3DDevice->LightEnable(lRef->fD3DIndex, false);

        // Some things never change.
        memset(&lRef->fD3DInfo, 0, sizeof(lRef->fD3DInfo));
        lRef->fD3DInfo.Ambient.r = lRef->fD3DInfo.Ambient.g = lRef->fD3DInfo.Ambient.b = 0;
        lRef->fD3DInfo.Specular.r = lRef->fD3DInfo.Specular.g = lRef->fD3DInfo.Specular.b = 0;

    slave->fLightRefIdx = fLights.fNextShadowLight;

    return fLights.fShadowLights[fLights.fNextShadowLight++];

// IPopShadowCastState ///////////////////////////////////////////////////
// Pop the state set to render this shadow caster, so we're ready to render
// a different shadow caster, or go on to our main render.
hsBool plDXPipeline::IPopShadowCastState(plShadowSlave* slave)
    fView = fSettings.fViewStack.Pop();

    fView.fXformResetFlags = fView.kResetProjection | fView.kResetCamera;

    return true;

// IMakeRenderTargetPools /////////////////////////////////////////////////////////////
// These are actually only used as shadow map pools, but they could be used for other
// render targets.
// All these are created here in a single call because they go in POOL_DEFAULT, so they
// must be created before we start creating things in POOL_MANAGED.
void plDXPipeline::IMakeRenderTargetPools()
    hsAssert(!fManagedAlloced, "Allocating rendertargets with managed resources alloced");
    IReleaseRenderTargetPools(); // Just to be sure.

    // Numbers of render targets to be created for each size.
    // These numbers were set with multi-player in mind, so should be reconsidered.
    // But do keep in mind that there are many things in production assets that cast
    // shadows besides the avatar.
    plConst(hsScalar)   kCount[kMaxRenderTargetNext] = {
        0, // 1x1
        0, // 2x2
        0, // 4x4
        0, // 8x8
        0, // 16x16
        32, // 32x32
        16, // 64x64
        8, // 128x128
        4, // 256x256
        0 // 512x512
    int i;
    for( i = 0; i < kMaxRenderTargetNext; i++ )
        hsTArray<plRenderTarget*>* pool = nil;
        switch( i )
        case 0:
        case 1:
        case 2:
        case 3:
        case 4:

        case 5:
            pool = &fRenderTargetPool32;
        case 6:
            pool = &fRenderTargetPool64;
        case 7:
            pool = &fRenderTargetPool128;
        case 8:
            pool = &fRenderTargetPool256;
        case 9:
            pool = &fRenderTargetPool512;
        if( pool )
            (*pool)[0] = nil;
            (*pool)[(int)(kCount[i])] = nil;
            int j;
            for( j = 0; j < kCount[i]; j++ )
                UInt16 flags = plRenderTarget::kIsTexture | plRenderTarget::kIsProjected;
                UInt8 bitDepth = 32;
                UInt8 zDepth = 24;
                UInt8 stencilDepth = 0;
                // If we ever allow non-square shadows, change this.
                int width = 1 << i;
                int height = width; 

                plRenderTarget* rt = TRACKED_NEW plRenderTarget(flags, width, height, bitDepth, zDepth, stencilDepth);

                // If we've failed to create our render target ref, we're probably out of
                // video memory. We'll return nil, and this guy just doesn't get a shadow
                // until more video memory turns up (not likely).
                if( !SharedRenderTargetRef((*pool)[0], rt) )
                    delete rt;
                    (*pool)[j] = nil;
                (*pool)[j] = rt;

// IResetRenderTargetPools /////////////////////////////////////////////////////////////////
// No release of resources, this just resets for the start of a frame. So if a shadow
// slave gets a render target from a pool, once this is called (conceptually at the
// end of the frame), the slave no longer owns that render target.
void plDXPipeline::IResetRenderTargetPools()
    int i;
    for( i = 0; i < kMaxRenderTargetNext; i++ )
        fRenderTargetNext[i] = 0;
        fBlurScratchRTs[i] = nil;
        fBlurDestRTs[i] = nil;

    fLights.fNextShadowLight = 0;

// IPrepShadowCaster ////////////////////////////////////////////////////////////////////////
// Make sure all the geometry in this shadow caster is ready to be rendered.
// Keep in mind the single shadow caster may be multiple spans possibly in
// multiple drawables.
// The tricky part here is that we need to prep each drawable involved,
// but only prep it once. Say the caster is composed of:
// drawableA, span0
// drawableA, span1
// drawableB, span0
// Then we need to call plDrawable::PrepForRender() ONCE on drawableA,
// and once on drawableB. Further, we need to do any necessary CPU
// skinning with ISofwareVertexBlend(drawableA, visList={0,1}) and
// ISofwareVertexBlend(drawableB, visList={1}).
hsBool plDXPipeline::IPrepShadowCaster(const plShadowCaster* caster)
    static hsBitVector done;
    const hsTArray<plShadowCastSpan>& castSpans = caster->Spans();

    int i;
    for( i = 0; i < castSpans.GetCount(); i++ )
        if( !done.IsBitSet(i) )
            // We haven't already done this castSpan

            plDrawableSpans* drawable = castSpans[i].fDraw;

            // Start a visList with this index.
            static hsTArray<Int16> visList;
            // We're about to have done this castSpan.

            // Look forward through castSpans for any other spans
            // with the same drawable, and add them to visList.
            // We'll handle all the spans from this drawable at once.
            int j;
            for( j = i+1; j < castSpans.GetCount(); j++ )
                if( !done.IsBitSet(j) && (castSpans[j].fDraw == drawable) )
                    // Add to list

                    // We're about to have done this castSpan.
            // That's all, prep the drawable.
            drawable->PrepForRender( this );

            // Do any software skinning.
            if( !ISoftwareVertexBlend(drawable, visList) )
                return false;

    return true;

// IRenderShadowCaster ////////////////////////////////////////////////
// Render the shadow caster into the slave's render target, creating a shadow map.
hsBool plDXPipeline::IRenderShadowCaster(plShadowSlave* slave)
    const plShadowCaster* caster = slave->fCaster;

    // Setup to render into the slave's render target.
    if( !IPushShadowCastState(slave) )
        return false;

    // Get the shadow caster ready to render.
    if( !IPrepShadowCaster(slave->fCaster) )
        return false;

    // for each shadowCaster.fSpans
    int iSpan;
    for( iSpan = 0; iSpan < caster->Spans().GetCount(); iSpan++ )
        plDrawableSpans* dr = caster->Spans()[iSpan].fDraw;
        const plSpan* sp = caster->Spans()[iSpan].fSpan;
        UInt32 spIdx = caster->Spans()[iSpan].fIndex;

        hsAssert(sp->fTypeMask & plSpan::kIcicleSpan, "Shadow casting from non-trimeshes not currently supported");

        // render shadowcaster.fSpans[i] to rendertarget
        if( !(sp->fProps & plSpan::kPropNoShadowCast) )
            IRenderShadowCasterSpan(slave, dr, *(const plIcicle*)sp);

        // Keep track of which shadow slaves this span was rendered into.
        // If self-shadowing is off, we use that to determine not to
        // project the shadow map onto its source geometry.
        sp->SetShadowBit(slave->fIndex); //index set in SubmitShadowSlave

    // Debug only.
    if( blurScale >= 0.f )
        slave->fBlurScale = blurScale;

    // If this shadow requests being blurred, do it.
    if( slave->fBlurScale > 0.f )

    // Finished up, restore previous state.

    if (IsDebugFlagSet(plPipeDbg::kFlagShowShadowBounds))
        /// Add a span to our boundsIce to show this
        IAddBoundsSpan(fBoundsSpans, &slave->fWorldBounds);

    return true;

// We have a (possibly empty) list of shadows submitted for this frame.
// At BeginRender, we need to accomplish:
//  Find render targets for each shadow request of the requested size.
//  Render the associated spans into the render targets. Something like the following:
void plDXPipeline::IPreprocessShadows()

    // Mark our shared resources as free to be used.

    // Some board (possibly the Parhelia) freaked if anistropic filtering
    // was enabled when rendering to a render target. We never need it for
    // shadow maps, and it is slower, so we just kill it here.

    // Generate a shadow map for each submitted shadow slave.
    // Shadow slave corresponds to one shadow caster paired
    // with one shadow light that affects it. So a single caster
    // may be in multiple slaves (from different lights), or a
    // single light may be in different slaves (affecting different
    // casters). The overall number is low in spite of the possible
    // permutation explosion, because a slave is only generated
    // for a caster being affected (in range etc.) by a light.
    int iSlave;
    for( iSlave = 0; iSlave < fShadows.GetCount(); iSlave++ )
        plShadowSlave* slave = fShadows[iSlave];
        // Any trouble, remove it from the list for this frame.
        if( !IRenderShadowCaster(slave) )


    // Restore


// IClearShadowSlaves ///////////////////////////////////////////////////////////////////////////
// At EndRender(), we need to clear our list of shadow slaves. They are only valid for one frame.
void plDXPipeline::IClearShadowSlaves()
    int i;
    for( i = 0; i < fShadows.GetCount(); i++ )
        const plShadowCaster* caster = fShadows[i]->fCaster;

// IRenderShadowsOntoSpan /////////////////////////////////////////////////////////////////////
// After doing the usual render for a span (all passes), we call the following.
// If the span accepts shadows, this will loop over all the shadows active this
// frame, and apply the ones that intersect this spans bounds. See below for details.
void plDXPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, const plSpan* span, hsGMaterial* mat)
    // We've already computed which shadows affect this span. That's recorded in slaveBits.
    const hsBitVector& slaveBits = span->GetShadowSlaves();

    hsBool first = true;

    int i;
    for( i = 0; i < fShadows.GetCount(); i++ )
        if( slaveBits.IsBitSet(fShadows[i]->fIndex) )
            // This slave affects this span.
            if( first )
                // On the first, we do all the setup that is independent of
                // the shadow slave, so state that needs to get set once before
                // projecting any number of shadow maps. 

                first = false;


            // Now setup any state specific to this shadow slave.

            int selfShadowNow = span->IsShadowBitSet(fShadows[i]->fIndex);

            // We vary the shadow intensity when self shadowing (see below),
            // so we cache whether the shadow light is set for regular or
            // self shadowing intensity. If what we're doing now is different
            // than what we're currently set for, set it again.
            if( selfShadowNow != fShadows[i]->fSelfShadowOn )
                plDXLightRef* lRef = fLights.fShadowLights[fShadows[i]->fLightRefIdx];

                // We lower the power on self shadowing, because the artists like to
                // crank up the shadow strength to huge values to get a darker shadow
                // on the environment, which causes the shadow on the avatar to get 
                // way too dark. Another way to look at it is when self shadowing,
                // the surface being projected onto is going to be very close to
                // the surface casting the shadow (because they are the same object).
                if( selfShadowNow )
                    plConst(hsScalar) kMaxSelfPower = 0.3f;
                    hsScalar power = fShadows[i]->fPower > kMaxSelfPower ? kMaxSelfPower : fShadows[i]->fPower;
                        = lRef->fD3DInfo.Diffuse.g 
                        = lRef->fD3DInfo.Diffuse.b
                        = power;
                        = lRef->fD3DInfo.Diffuse.g 
                        = lRef->fD3DInfo.Diffuse.b
                        = fShadows[i]->fPower;
                fD3DDevice->SetLight(lRef->fD3DIndex, &lRef->fD3DInfo);

                // record which our intensity is now set for.
                fShadows[i]->fSelfShadowOn = selfShadowNow;

            // Enable the light.
            fD3DDevice->LightEnable(fShadows[i]->fLightIndex, true);
            DWORD nPass;
            fSettings.fDXError = fD3DDevice->ValidateDevice(&nPass);
            if( fSettings.fDXError != D3D_OK )
#endif // HS_DEBUGGING

            if (!IsDebugFlagSet(plPipeDbg::kFlagNoShadowApply))
            // Disable it again.
            fD3DDevice->LightEnable(fShadows[i]->fLightIndex, false);



// ISetupShadowRcvTextureStages ////////////////////////////////////////////
// Set the generic stage states. We'll fill in the specific textures
// for each slave later.
void plDXPipeline::ISetupShadowRcvTextureStages(hsGMaterial* mat)
    // Setup for nil shaders to get us back to fixed function pipeline.
    ISetShaders(nil, nil);

    // We're whacking about with renderstate independent of current material,
    // so make sure the next span processes it's material, even if it's the
    // same one.
    fForceMatHandle = true;

    // Set the D3D lighting/material model

    // Zbuffering on read-only
    fD3DDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_LESSEQUAL);
    fD3DDevice->SetRenderState(D3DRS_ZWRITEENABLE, FALSE);
    fLayerState[0].fZFlags &= ~hsGMatState::kZMask;
    fLayerState[0].fZFlags |= hsGMatState::kZNoZWrite;

    // Stage 0:
    // Texture is slave specific
    // Texture transform is slave specific
    // ColorArg1 = texture
    // ColorArg2 = diffuse
    // ColorOp = modulate
    // AlphaArg1 = texture
    // AlphaOp = SelectArg1
    fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG1, D3DTA_TEXTURE);
    fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG2, D3DTA_DIFFUSE);
    fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP,   D3DTOP_MODULATE);

    fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAARG1, D3DTA_TEXTURE); 
    fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG1);


    // Set texture to clamp
    fD3DDevice->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
    fD3DDevice->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
    fLayerState[0].fClampFlags = hsGMatState::kClampTexture;

    // Stage 1:
    // Set texture to ULut
    // Texture transform is slave specific
    // *** With the optional texture blurring, the state here becomes
    // *** partially slave dependent. Specifically, if we've done a blur,
    // *** then we want to modulate the lut color value by current (which is
    // *** the blurred color), else just select the lut. So we'll just move
    // *** the ColorOp down to the slave specific section.
    // %%% Okay, get this. The GeForce2 won't take a SelectArg1 on Stage1 if
    // %%% we're also trying to use Stage2 to modulate in the diffuse. But
    // %%% it WILL let us do a modulate on Stage1. So we're going to make sure
    // %%% that our shadowmap texture is white, then we can just modulate them
    // %%% with no effect. If we're blurring, we already wanted to modulate, so
    // %%% no change there. This means we can set the ColorOp now, rather than
    // %%% having to wait for the Slave specific section later.
    // ColorArg1 = 1 - ULut
    // ColorArg2 = Current
    // ColorOp = Modulate
    // AlphaArg1 = ULut
    // AlphaArg2 = Current
    // AlphaOp = Subtract
    plDXTextureRef* ref = IGetULutTextureRef();
    if( !ref->fD3DTexture )
        if( ref->fData )
    hsRefCnt_SafeAssign(fLayerRef[1], ref);
    fD3DDevice->SetTexture(1, ref->fD3DTexture);

    // The following commented out block is kind of cool, because it
    // bases the darkness of the shadow on the distance between the 
    // shadow caster and the point receiving the shadow. So, for example,
    // the hand's shadow would get darker as it reaches for the lever.
    // Unfortunately, it doesn't guarantee that the shadow will completely
    // attenuate out at the fAttenDist (in fact, it pretty much guarantees
    // that it won't), so shadows will pop in and out. So instead, we'll
    // base the color on the distance from the start of the slave. The
    // difference is subtle, and usually unnoticable, and we get no popping.
    fD3DDevice->SetTextureStageState(1, D3DTSS_COLORARG1, D3DTA_TEXTURE | D3DTA_COMPLEMENT);
    fD3DDevice->SetTextureStageState(1, D3DTSS_COLORARG2, D3DTA_CURRENT);
    fD3DDevice->SetTextureStageState(1, D3DTSS_COLOROP,   D3DTOP_MODULATE);

    fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAARG1, D3DTA_TEXTURE); 
    fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAARG2, D3DTA_CURRENT); 
    fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAOP,   D3DTOP_SUBTRACT);
    fLayerState[1].fBlendFlags = UInt32(-1);

    if( D3DTTFF_COUNT3 != fLayerXformFlags[1] )
        fLayerXformFlags[1] = D3DTTFF_COUNT3;
        fD3DDevice->SetTextureStageState(1, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTTFF_COUNT3);

    // Set texture to clamp
    fD3DDevice->SetSamplerState(1, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
    fD3DDevice->SetSamplerState(1, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
    fLayerState[1].fClampFlags = hsGMatState::kClampTexture;

    int iNextStage = 2;

    // If mat's base layer is alpha'd, and we have > 3 TMU's factor
    // in the base layer's alpha.   
    if( (fSettings.fMaxLayersAtOnce > 3) && mat->GetLayer(0)->GetTexture() && (mat->GetLayer(0)->GetBlendFlags() & hsGMatState::kBlendAlpha) )
        plLayerInterface* layer = mat->GetLayer(0);

        // If the following conditions are met, it means that layer 1 is a better choice to
        // get the transparency from. The specific case we're looking for is vertex alpha
        // simulated by an invisible second layer alpha LUT (known as the alpha hack).
        if( (layer->GetMiscFlags() & hsGMatState::kMiscBindNext) 
            && mat->GetLayer(1) 
            && !(mat->GetLayer(1)->GetMiscFlags() & hsGMatState::kMiscNoShadowAlpha)
            && !(mat->GetLayer(1)->GetBlendFlags() & hsGMatState::kBlendNoTexAlpha) 
            && mat->GetLayer(1)->GetTexture() )
                layer = mat->GetLayer(1);

        // Take the texture alpha and modulate the color so far with it. In
        // the final shadow map, black will have no effect, white will be maximal
        // darkening.
        fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_COLORARG1, D3DTA_TEXTURE | D3DTA_ALPHAREPLICATE);
        fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_COLORARG2, D3DTA_CURRENT);
        fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_COLOROP,   D3DTOP_MODULATE);

        fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_ALPHAARG2, D3DTA_CURRENT); 
        fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2);

        // Blend flags to layer blend (alpha +- complement)
        fLayerState[iNextStage].fBlendFlags = UInt32(-1);

        // Clamp to whatever the texture wants.
        if( fLayerState[iNextStage].fClampFlags ^ layer->GetClampFlags() )
            fLayerState[iNextStage].fClampFlags = layer->GetClampFlags();

        // Shade to 0
        fLayerState[iNextStage].fShadeFlags = 0;

        // ZFlags to ZNoZWrite
        fLayerState[iNextStage].fZFlags = hsGMatState::kZNoZWrite;

        // MiscFlags to layer's misc flags
        fLayerState[iNextStage].fMiscFlags = layer->GetMiscFlags();

        // Set up whatever UVW transform the layer normally uses.
        IHandleStageTransform(iNextStage, layer);
        // Normal UVW source.
        UInt32 uvwSrc = layer->GetUVWSrc();

        if( fLayerUVWSrcs[ iNextStage ] != uvwSrc )
            fD3DDevice->SetTextureStageState( iNextStage, D3DTSS_TEXCOORDINDEX, uvwSrc );
            fLayerUVWSrcs[ iNextStage ] = uvwSrc;

        UInt32 xformFlags;
        if( layer->GetMiscFlags() & hsGMatState::kMiscPerspProjection )
            xformFlags = D3DTTFF_COUNT3 | D3DTTFF_PROJECTED;
        else if( uvwSrc & (plLayerInterface::kUVWNormal | plLayerInterface::kUVWPosition | plLayerInterface::kUVWReflect) )
            xformFlags = D3DTTFF_COUNT3;
            xformFlags = D3DTTFF_COUNT2;

        if( xformFlags != fLayerXformFlags[iNextStage] )
            fLayerXformFlags[iNextStage] = xformFlags;
            fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_TEXTURETRANSFORMFLAGS, xformFlags);

        // This ref should be pretty safe to use, because we just rendered it.
        ref = (plDXTextureRef*)layer->GetTexture()->GetDeviceRef();

        hsRefCnt_SafeAssign( fLayerRef[iNextStage], ref );
        fD3DDevice->SetTexture( iNextStage, ref->fD3DTexture );


        fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_COLORARG1, D3DTA_DIFFUSE | D3DTA_ALPHAREPLICATE);
        fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_COLORARG2, D3DTA_CURRENT);
        fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_COLOROP,   D3DTOP_MODULATE);

        fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_ALPHAOP,   D3DTOP_DISABLE);

        fLayerState[iNextStage].fBlendFlags = UInt32(-1);


    fLayerState[iNextStage].fBlendFlags = UInt32(-1);

    // And seal it up
    fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_COLOROP, D3DTOP_DISABLE);
    fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_ALPHAOP, D3DTOP_DISABLE);
    fLayerState[iNextStage].fBlendFlags = UInt32(-1);

    fLastEndingStage = 0;

    // Now set the frame buffer blend
    // Remember that white darkens and black is no effect.
    // Form is Src * SrcBlend + Dst * DstBlend
    // We want inverse Src * Dst, so
    // Src * ZERO + Dst * InvSrc
    fD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE);
    fD3DDevice->SetRenderState(D3DRS_SRCBLEND,  D3DBLEND_ZERO);

    fLayerState[0].fBlendFlags = UInt32(-1);

    // Turn on alpha test. Alpha of zero means the shadow map depth
    // is greater or equal to the surface depth, i.e. the surface
    // is between the shadow caster and the light and doesn't receive
    // shadow.
    fD3DDevice->SetRenderState(D3DRS_ALPHAREF, 0x00000001);
    fLayerState[0].fBlendFlags |= hsGMatState::kBlendTest;

    fD3DDevice->SetRenderState(D3DRS_SPECULARENABLE, FALSE);
    fLayerState[0].fShadeFlags &= ~hsGMatState::kShadeSpecular; 

    // Set fog color to black
    // We should automatically reset it, because our blend mode is -1'd.
    fD3DDevice->SetRenderState(D3DRS_FOGCOLOR, 0);

    DWORD nPass;
    fSettings.fDXError = fD3DDevice->ValidateDevice(&nPass);
    if( fSettings.fDXError != D3D_OK )
#endif // HS_DEBUGGING

// ISetupShadowSlaveTextures //////////////////////////////////////////////
// Set any state specific to this shadow slave for projecting the slave's
// shadow map onto the surface.
void plDXPipeline::ISetupShadowSlaveTextures(plShadowSlave* slave)
    D3DXMATRIX tXfm;

    hsMatrix44 c2w = GetCameraToWorld();

    // Stage 0:
    // Set Stage 0's texture to the slave's rendertarget.
    // Set texture transform to slave's camera to texture transform
    plRenderTarget* renderTarg = (plRenderTarget*)slave->fPipeData;
    hsAssert(renderTarg, "Processing a slave that hasn't been rendered");
    if( !renderTarg )
    plDXTextureRef* ref = (plDXTextureRef*)renderTarg->GetDeviceRef();
    hsAssert(ref, "Shadow map ref should have been made when it was rendered");
    if( !ref )

    hsRefCnt_SafeAssign( fLayerRef[0], ref );
    fD3DDevice->SetTexture( 0, ref->fD3DTexture );
    hsMatrix44 cameraToTexture = slave->fWorldToTexture * c2w;
    IMatrix44ToD3DMatrix(tXfm, cameraToTexture);

    fD3DDevice->SetTransform( sTextureStages[0], &tXfm );
    fLayerTransform[0] = true;

    // Directional lights (ortho projection) just use COUNT2, point lights use COUNT3|PROJECTED.
    UInt32 xformFlags = slave->fView.GetOrthogonal() ? D3DTTFF_COUNT2 : D3DTTFF_COUNT3 | D3DTTFF_PROJECTED;

    if( xformFlags != fLayerXformFlags[0] )
        fLayerXformFlags[0] = xformFlags;
        fD3DDevice->SetTextureStageState(0, D3DTSS_TEXTURETRANSFORMFLAGS, xformFlags);

    // Stage 1: the lut
    // Set the texture transform to slave's fRcvLUT
    hsMatrix44 cameraToLut = slave->fRcvLUT * c2w;
    IMatrix44ToD3DMatrix(tXfm, cameraToLut);

    fD3DDevice->SetTransform( sTextureStages[1], &tXfm );
    fLayerTransform[1] = true;


// ISetShadowLightState //////////////////////////////////////////////////////////////////
// Set the D3D lighting/material model for projecting the shadow map onto this material.
void plDXPipeline::ISetShadowLightState(hsGMaterial* mat)

    fCurrLightingMethod = plSpan::kLiteShadow;

    static D3DMATERIAL9 d3dMat;
    if( mat && mat->GetNumLayers() && mat->GetLayer(0) )
        d3dMat.Diffuse.r = d3dMat.Diffuse.g = d3dMat.Diffuse.b = mat->GetLayer(0)->GetOpacity();
        d3dMat.Diffuse.r = d3dMat.Diffuse.g = d3dMat.Diffuse.b = 1.f;
    d3dMat.Diffuse.a = 1.f;

    fD3DDevice->SetRenderState( D3DRS_AMBIENT, 0 );

// IDisableLightsForShadow ///////////////////////////////////////////////////////////
// Disable any lights that are enabled. We'll only want the shadow light illuminating
// the surface.
void plDXPipeline::IDisableLightsForShadow()
    int i;
    for( i = 0; i < fLights.fLastIndex + 1; i++ )
        if( fLights.fEnabledFlags.IsBitSet(i) )
            fD3DDevice->LightEnable(i, false);

// IEnableShadowLight ///////////////////////////////////////////////
// Enable this shadow slave's light.
void plDXPipeline::IEnableShadowLight(plShadowSlave* slave)
    fD3DDevice->LightEnable(slave->fLightIndex, true);

// IAcceptsShadow ////////////////////////////////////////////////////////////////
// Only allow self shadowing if requested.
hsBool plDXPipeline::IAcceptsShadow(const plSpan* span, plShadowSlave* slave)
    // The span's shadow bits records which shadow maps that span was rendered
    // into.
    return slave->SelfShadow() || !span->IsShadowBitSet(slave->fIndex);

// IReceivesShadows ////////////////////////////////////////////////////////////////////
// Want artists to be able to just disable shadows for spans where they'll either
// look goofy, or won't contribute.
// Also, if we have less than 3 simultaneous textures, we want to skip anything with
// an alpha'd base layer, unless it's been overriden.
hsBool plDXPipeline::IReceivesShadows(const plSpan* span, hsGMaterial* mat)
    if( span->fProps & plSpan::kPropNoShadow )
        return false;

    if( span->fProps & plSpan::kPropForceShadow )
        return true;

    if( span->fProps & (plSpan::kPropSkipProjection | plSpan::kPropProjAsVtx) )
        return false;

    if( (fSettings.fMaxLayersAtOnce < 3) 
        && mat->GetLayer(0)->GetTexture() 
        && (mat->GetLayer(0)->GetBlendFlags() & hsGMatState::kBlendAlpha) )
        return false;

    // Shouldn't hit this, since we're disabling shadows on the Intel chips,
    // but just in case.
    // To enable this, you'll need to start passing in the drawable as well.
    if( fSettings.fIsIntel )
        const plVertexSpan* vertSpan = static_cast<const plVertexSpan*>(span);
        plGBufferGroup* group = drawable->GetBufferGroup(vertSpan->fGroupIdx);
        if( !group->GetNumUVs() )
            return false;

    return true;

void plDXPipeline::SubmitClothingOutfit(plClothingOutfit* co)
    if (fClothingOutfits.Find(co) == fClothingOutfits.kMissingIndex)
        if (!fPrevClothingOutfits.RemoveItem(co))

void plDXPipeline::IClearClothingOutfits(hsTArray<plClothingOutfit*>* outfits)
    int i;
    for (i = outfits->GetCount() - 1; i >= 0; i--)
        plClothingOutfit *co = outfits->Get(i);

void plDXPipeline::IFillAvRTPool()
    fAvNextFreeRT = 0;
    fAvRTShrinkValidSince = hsTimer::GetSysSeconds();
    int numRTs = 1;
    if (fClothingOutfits.GetCount() > 1)
        // Just jump to 8 for starters so we don't have to refresh for the 2nd, 4th, AND 8th player
        numRTs = 8;
        while (numRTs < fClothingOutfits.GetCount())
            numRTs *= 2;

    // I could see a 32MB video card going down to 64x64 RTs in extreme cases
    // (over 100 players onscreen at once), but really, if such hardware is ever trying to push 
    // that, the low texture resolution is not going to be your major concern.
    for (fAvRTWidth = 1024 >> plMipmap::GetGlobalLevelChopCount(); fAvRTWidth >= 32; fAvRTWidth /= 2)
        if (IFillAvRTPool(numRTs, fAvRTWidth))

        // Nope? Ok, lower the resolution and try again.

hsBool plDXPipeline::IFillAvRTPool(UInt16 numRTs, UInt16 width)
    int i;
    for (i = 0; i < numRTs; i++)
        UInt16 flags = plRenderTarget::kIsTexture | plRenderTarget::kIsProjected;
        UInt8 bitDepth = 32;
        UInt8 zDepth = 0;
        UInt8 stencilDepth = 0;
        fAvRTPool[i] = TRACKED_NEW plRenderTarget(flags, width, width, bitDepth, zDepth, stencilDepth);

        // If anyone fails, release everyone we've created.
        if (!MakeRenderTargetRef(fAvRTPool[i]))
            int j;
            for (j = 0; j <= i; j++)
                delete fAvRTPool[j];
            return false;
    return true;

void plDXPipeline::IReleaseAvRTPool()
    int i;
    for (i = 0; i < fClothingOutfits.GetCount(); i++)
    for (i = 0; i < fPrevClothingOutfits.GetCount(); i++)
    for (i = 0; i < fAvRTPool.GetCount(); i++)

plRenderTarget *plDXPipeline::IGetNextAvRT()
    return fAvRTPool[fAvNextFreeRT++];

void plDXPipeline::IFreeAvRT(plRenderTarget* tex)
    UInt32 index = fAvRTPool.Find(tex);
    if (index != fAvRTPool.kMissingIndex)
        hsAssert(index < fAvNextFreeRT, "Freeing an avatar RT that's already free?");
        fAvRTPool[index] = fAvRTPool[fAvNextFreeRT - 1];
        fAvRTPool[fAvNextFreeRT - 1] = tex;

struct plAVTexVert
    float fPos[3];
    float fUv[2];

void plDXPipeline::IPreprocessAvatarTextures()
    plProfile_Set(AvRTPoolUsed, fClothingOutfits.GetCount());
    plProfile_Set(AvRTPoolCount, fAvRTPool.GetCount());
    plProfile_Set(AvRTPoolRes, fAvRTWidth);
    plProfile_Set(AvRTShrinkTime, UInt32(hsTimer::GetSysSeconds() - fAvRTShrinkValidSince));

    IClearClothingOutfits(&fPrevClothingOutfits); // Frees anyone used last frame that we don't need this frame

    const UInt32 kVFormat = D3DFVF_XYZ | D3DFVF_TEX1 | D3DFVF_TEXCOORDSIZE2(0);

    if (fClothingOutfits.GetCount() == 0)

    plMipmap *itemBufferTex = nil;

    fForceMatHandle = true;
    ISetShaders(nil, nil); // Has a side effect of futzing with our cull settings...

    // Even though we're going to use DrawPrimitiveUP, we explicitly set the current VB ref to nil,
    // otherwise we might try and use the same VB ref later, think it hasn't changed, and
    // not update our FVF. 
    fSettings.fCurrVertexBuffRef = nil;
    fD3DDevice->SetStreamSource(0, NULL, 0, 0);
    fD3DDevice->SetFVF(fSettings.fCurrFVFFormat = kVFormat);
    fD3DDevice->SetTransform(D3DTS_VIEW, &d3dIdentityMatrix);
    fD3DDevice->SetTransform(D3DTS_WORLD, &d3dIdentityMatrix);
    fD3DDevice->SetTransform(D3DTS_PROJECTION, &d3dIdentityMatrix);
    fD3DDevice->SetRenderState(D3DRS_CULLMODE, fCurrCullMode = D3DCULL_NONE);
    fD3DDevice->SetRenderState(D3DRS_VERTEXBLEND, D3DVBF_DISABLE);
    fD3DDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_ALWAYS);
    fD3DDevice->SetRenderState(D3DRS_ZWRITEENABLE, FALSE);
    fLayerState[0].fZFlags &= ~hsGMatState::kZMask;
    fLayerState[0].fZFlags |= hsGMatState::kZNoZWrite | hsGMatState::kZNoZRead;
    if (fLayerUVWSrcs[0] != 0)
        fD3DDevice->SetTextureStageState(0, D3DTSS_TEXCOORDINDEX, 0);
        fLayerUVWSrcs[0] = 0;
    fD3DDevice->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
    fD3DDevice->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
    fLayerState[0].fClampFlags = hsGMatState::kClampTexture;
    if (D3DTTFF_DISABLE != fLayerXformFlags[0])
        fLayerXformFlags[0] = D3DTTFF_DISABLE;
    fD3DDevice->SetRenderState(D3DRS_SPECULARENABLE, FALSE);
    fLayerState[0].fShadeFlags &= ~hsGMatState::kShadeSpecular; 
    fD3DDevice->SetRenderState(D3DRS_FOGENABLE, FALSE);
    fCurrFog.fEnvPtr = nil;
    fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP, D3DTOP_MODULATE);
    fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG1, D3DTA_TFACTOR);
    fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG2, D3DTA_TEXTURE);
    fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAOP, D3DTOP_MODULATE);
    fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAARG1, D3DTA_TFACTOR);
    fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAARG2, D3DTA_TEXTURE);
    fD3DDevice->SetRenderState(D3DRS_ALPHAFUNC, D3DCMP_ALWAYS); 
    fLayerState[0].fBlendFlags = UInt32(-1);
    fD3DDevice->SetTextureStageState(1, D3DTSS_COLOROP, D3DTOP_DISABLE);
    fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAOP, D3DTOP_DISABLE);
    fLayerState[1].fBlendFlags = UInt32(-1);

    int oIdx;
    for (oIdx = 0; oIdx < fClothingOutfits.GetCount(); oIdx++)
        plClothingOutfit *co = fClothingOutfits[oIdx];
        if (co->fBase == nil || co->fBase->fBaseTexture == nil)

        plRenderTarget *rt = plRenderTarget::ConvertNoRef(co->fTargetLayer->GetTexture());
        if (rt != nil && co->fDirtyItems.Empty())
            // we've still got our valid RT from last frame and we have nothing to do.

        if (rt == nil)
            rt = IGetNextAvRT();

        D3DVIEWPORT9 vp = {0, 0, rt->GetWidth(), rt->GetHeight(), 0.f, 1.f};

        hsScalar uOff = 0.5f / rt->GetWidth();
        hsScalar vOff = 0.5f / rt->GetHeight();

        // Copy over the base
        fD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, FALSE);
        fD3DDevice->SetRenderState(D3DRS_TEXTUREFACTOR, 0xffffffff);
        fLayerState[0].fBlendFlags = UInt32(-1);
        IDrawClothingQuad(-1.f, -1.f, 2.f, 2.f, uOff, vOff, co->fBase->fBaseTexture);
        plClothingLayout *layout = plClothingMgr::GetClothingMgr()->GetLayout(co->fBase->fLayoutName);

        int i, j, k;
        for (i = 0; i < co->fItems.GetCount(); i++)
            plClothingItem *item = co->fItems[i];
            //if (!co->fDirtyItems.IsBitSet(item->fTileset))
            //  continue; // Not dirty, don't update

            for (j = 0; j < item->fElements.GetCount(); j++)
                for (k = 0; k < plClothingElement::kLayerMax; k++)
                    if (item->fTextures[j][k] == nil)

                    itemBufferTex = item->fTextures[j][k];
                    hsColorRGBA tint = co->GetItemTint(item, k);
                    if (k >= plClothingElement::kLayerSkinBlend1 && k <= plClothingElement::kLayerSkinLast)
                        tint.a = co->fSkinBlends[k - plClothingElement::kLayerSkinBlend1];

                    if (k == plClothingElement::kLayerBase)
                        fD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, FALSE);
                        fD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE);
                        fD3DDevice->SetRenderState(D3DRS_SRCBLEND,  D3DBLEND_SRCALPHA);
                        fD3DDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA);
                    fD3DDevice->SetRenderState(D3DRS_TEXTUREFACTOR, tint.ToARGB32());
                    fLayerState[0].fBlendFlags = UInt32(-1);
                    hsScalar screenW = (hsScalar)item->fElements[j]->fWidth / layout->fOrigWidth * 2.f;
                    hsScalar screenH = (hsScalar)item->fElements[j]->fHeight / layout->fOrigWidth * 2.f;                
                    hsScalar screenX = (hsScalar)item->fElements[j]->fXPos / layout->fOrigWidth * 2.f - 1.f;
                    hsScalar screenY = (1.f - (hsScalar)item->fElements[j]->fYPos / layout->fOrigWidth) * 2.f - 1.f - screenH;
                    IDrawClothingQuad(screenX, screenY, screenW, screenH, uOff, vOff, itemBufferTex);
    // Nothing else sets this render state, so let's just set it back to the default to be safe
    fView.fXformResetFlags = fView.kResetAll;


void plDXPipeline::IDrawClothingQuad(hsScalar x, hsScalar y, hsScalar w, hsScalar h, 
                                     hsScalar uOff, hsScalar vOff, plMipmap *tex)
    const UInt32 kVSize = sizeof(plAVTexVert);
    plDXTextureRef* ref = (plDXTextureRef*)tex->GetDeviceRef();
    if (!ref || ref->IsDirty())
        MakeTextureRef(nil, tex);
        ref = (plDXTextureRef*)tex->GetDeviceRef();
    if (!ref->fD3DTexture)
        if (ref->fData)
    hsRefCnt_SafeAssign( fLayerRef[0], ref );
    fD3DDevice->SetTexture(0, ref->fD3DTexture);

    plAVTexVert ptr[4];
    plAVTexVert vert;
    vert.fPos[0] = x;
    vert.fPos[1] = y;
    vert.fPos[2] = 0.5f;
    vert.fUv[0] = uOff;
    vert.fUv[1] = 1.f + vOff;

    // P0
    ptr[2] = vert;

    // P1
    ptr[0] = vert;
    ptr[0].fPos[0] += w;
    ptr[0].fUv[0] += 1.f;

    // P2
    ptr[1] = vert;
    ptr[1].fPos[0] += w;
    ptr[1].fUv[0] += 1.f;
    ptr[1].fPos[1] += h;
    ptr[1].fUv[1] -= 1.f;

    // P3
    ptr[3] = vert;
    ptr[3].fPos[1] += h;
    ptr[3].fUv[1] -= 1.f;

    DWORD nPass;
    fSettings.fDXError = fD3DDevice->ValidateDevice(&nPass);
    if( fSettings.fDXError != D3D_OK )
#endif // HS_DEBUGGING
    fD3DDevice->DrawPrimitiveUP(D3DPT_TRIANGLESTRIP, 2, ptr, kVSize);

// Test hackery as R&D for water

// End Test hackery as R&D for water

//// Functions from Other Classes That Need to Be Here to Compile Right ///////

plPipeline  *plPipelineCreate::ICreateDXPipeline( hsWinRef hWnd, const hsG3DDeviceModeRecord *devMode )
    plDXPipeline    *pipe = TRACKED_NEW plDXPipeline( hWnd, devMode );

    // Taken out 8.1.2001 mcn - If we have an error, still return so the client can grab the string
//  if( pipe->GetErrorString() != nil )
//  {
//      delete pipe;
//      pipe = nil;
//  }

    return pipe;