CWE-ou-minkata/MOULOpenSourceClientPlugin/Plasma20/Sources/Plasma/PubUtilLib/plPipeline/plDXPipeline.cpp

/*==LICENSE==*

CyanWorlds.com Engine - MMOG client, server and tools
Copyright (C) 2011 Cyan Worlds, Inc.

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.

Additional permissions under GNU GPL version 3 section 7

If you modify this Program, or any covered work, by linking or
combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK,
NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent
JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK
(or a modified version of those libraries),
containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA,
PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG
JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the
licensors of this Program grant you additional
permission to convey the resulting work. Corresponding Source for a
non-source form of such a combination shall include the source code for
the parts of OpenSSL and IJG JPEG Library used as well as that of the covered
work.

You can contact Cyan Worlds, Inc. by email legal@cyan.com
 or by snail mail at:
      Cyan Worlds, Inc.
      14617 N Newport Hwy
      Mead, WA   99021

*==LICENSE==*/
///////////////////////////////////////////////////////////////////////////////
//																			 //
//	plDXPipeline Class Functions   											 //
//	plPipeline derivative for DirectX 										 //
//	Cyan, Inc.																 //
//																			 //
//// Version History //////////////////////////////////////////////////////////
//																			 //
//	2.23.2001 mcn - Created.												 //
//																			 //
///////////////////////////////////////////////////////////////////////////////

#include "hsConfig.h"
#include "hsWindows.h"

#include <d3d9.h>
#include <ddraw.h>
#include <d3dx9mesh.h>
#include <dxerr.h>

#include "hsWinRef.h"

#include "hsTypes.h"
#include "plDXPipeline.h"
#include "plPipelineCreate.h"
#include "plDebugText.h"
#include "plDXEnumerate.h"
#include "hsG3DDeviceSelector.h"
#include "hsGDDrawDllLoad.h"
#include "hsResMgr.h"
#include "plStatusLogDrawer.h"
#include "plQuality.h"

#include "plPipeDebugFlags.h"

#include "hsTemplates.h"
//#include "hsGEnviron.h"
#include "plProfile.h"
#include "../plMessage/plDeviceRecreateMsg.h"
#include "../pnMessage/plSelfDestructMsg.h"
#include "../pnMessage/plClientMsg.h"
#include "../plSurface/hsGMaterial.h"
#include "../plSurface/plLayerInterface.h"
#include "../plSurface/plLayerShadowBase.h"
#include "../plGImage/plMipmap.h"
#include "../plGImage/plCubicEnvironmap.h"
#include "../plDrawable/plDrawableSpans.h"
#include "../plDrawable/plGeometrySpan.h"
#include "../plDrawable/plSpaceTree.h"
#include "../plDrawable/plDrawableGenerator.h"
#include "../plDrawable/plSpanTypes.h"
#include "../plDrawable/plAccessSpan.h"
#include "../plDrawable/plAuxSpan.h"
#include "../pnSceneObject/plSceneObject.h"
#include "../pnSceneObject/plDrawInterface.h"
#include "hsFastMath.h"
#include "../plGLight/plLightInfo.h"
#include "../plParticleSystem/plParticleEmitter.h"
#include "../plParticleSystem/plParticle.h"
#include "../plAvatar/plAvatarClothing.h"
#include "plDebugText.h"
#include "plFogEnvironment.h"
#include "plDXTextFont.h"
#include "plGBufferGroup.h"
#include "hsTimer.h"
#include "plgDispatch.h"
#include "../plScene/plRenderRequest.h"
#include "../plScene/plVisMgr.h"
#include "plRenderTarget.h"
#include "plCubicRenderTarget.h"
#include "plDynamicEnvMap.h"
#include "../../FeatureLib/pfCamera/plVirtualCamNeu.h"

#include "plDXBufferRefs.h"
#include "plDXTextureRef.h"
#include "plDXLightRef.h"
#include "plDXRenderTargetRef.h"
#include "plDXVertexShader.h"
#include "plDXPixelShader.h"

#include "../plGLight/plShadowSlave.h"
#include "../plGLight/plShadowCaster.h"

#include "hsGMatState.inl"

#include "../plSurface/plShader.h"
#include "plDXVertexShader.h"
#include "plDXPixelShader.h"

#include "../pnMessage/plPipeResMakeMsg.h"
#include "plPipeResReq.h"
#include "../pnNetCommon/plNetApp.h"	// for dbg logging
#include "../../FeatureLib/pfCamera/plVirtualCamNeu.h"
#include "../../FeatureLib/pfCamera/plCameraModifier.h"
#include "../plResMgr/plLocalization.h"


// mf horse - test hack, nuke this later
#include "../plSurface/plLayerDepth.h"

#include "../plGImage/hsCodecManager.h"
//#include "../plGImage/hsDXTDirectXCodec.h"

#ifdef HS_DEBUGGING
// This is so VC++ will let us view the contents of plIcicle::fOwnerKey
#include "../pnKeyedObject/plKey.h"
#endif

#include "plCullTree.h"

#include "plTweak.h"

#include <algorithm>

//#define MF_TOSSER

int mfCurrentTest = 100;
PipelineParams plPipeline::fDefaultPipeParams;
PipelineParams plPipeline::fInitialPipeParams;
//#define MF_ENABLE_HACKOFF
#ifdef MF_ENABLE_HACKOFF
//WHITE
static hsTArray<plRenderTarget*> hackOffscreens;
UInt32 doHackPlate = UInt32(-1);
#endif // MF_ENABLE_HACKOFF

UInt32	fDbgSetupInitFlags;		// HACK temp only

#ifdef HS_DEBUGGING
void plReleaseObject(IUnknown* x)
{
	if( x )
	{
		int refs = x->Release();
		if( refs )
			refs = 0;
	}
}
#else // HS_DEBUGGING
void plReleaseObject(IUnknown* x)
{
	if( x )
		x->Release();
}
#endif // HS_DEBUGGING

//// Local Static Stuff ///////////////////////////////////////////////////////

/// Macros for getting/setting data in a D3D vertex buffer
inline UInt8* inlStuffPoint( UInt8* ptr, const hsScalarTriple& point )
{
	register float* dst = (float*)ptr;
	register const float* src = (float*)&point.fX;
	*dst++ = *src++;
	*dst++ = *src++;
	*dst++ = *src++;
	return (UInt8*)dst;
}
inline UInt8* inlStuffUInt32( UInt8* ptr, const UInt32 uint )
{
	*(UInt32*)ptr = uint;
	return ptr + sizeof(uint);
}
inline UInt8* inlExtractPoint( const UInt8* ptr, const hsScalarTriple& pt )
{
	register const float* src = (float*)ptr;
	register float* dst = (float*)&pt.fX;
	*dst++ = *src++;
	*dst++ = *src++;
	*dst++ = *src++;
	return (UInt8*)src;
}
inline UInt8* inlExtractFloat( const UInt8*& ptr, float& f )
{
	register const float* src = (float*)ptr;
	f = *src++;
	return (UInt8*)src;
}
inline UInt8* inlExtractUInt32( const UInt8*& ptr, UInt32& uint )
{
	const UInt32* src = (UInt32*)ptr;
	uint = *src++;
	return (UInt8*)src;
}

inline DWORD F2DW( FLOAT f )
{
	return *((DWORD*)&f);
}

//// Macros for D3D error handling
#define INIT_ERROR_CHECK( cond, errMsg ) if( FAILED( fSettings.fDXError = cond ) ) { return ICreateFail( errMsg ); }

#if 1		// DEBUG
#define STRONG_ERROR_CHECK( cond ) if( FAILED( fSettings.fDXError = cond ) ) { IGetD3DError(); IShowErrorMessage(); }
#define WEAK_ERROR_CHECK( cond )	STRONG_ERROR_CHECK( cond )
#else
#define STRONG_ERROR_CHECK( cond ) if( FAILED( fSettings.fDXError = cond ) ) { IGetD3DError(); }
#define WEAK_ERROR_CHECK( cond )	cond
#endif

static D3DXMATRIX d3dIdentityMatrix( 1.0f, 0.0f, 0.0f, 0.0f,
									 0.0f, 1.0f, 0.0f, 0.0f,
									 0.0f, 0.0f, 1.0f, 0.0f,
									 0.0f, 0.0f, 0.0f, 1.0f );

static const enum _D3DTRANSFORMSTATETYPE	sTextureStages[ 8 ] =
{
	D3DTS_TEXTURE0,	D3DTS_TEXTURE1,	D3DTS_TEXTURE2,	D3DTS_TEXTURE3,
	D3DTS_TEXTURE4,	D3DTS_TEXTURE5,	D3DTS_TEXTURE6,	D3DTS_TEXTURE7
};

static const float kPerspLayerScale  = 0.00001f;
static const float kPerspLayerScaleW = 0.001f;
static const float kPerspLayerTrans  = 0.00002f;
static const hsScalar kAvTexPoolShrinkThresh = 30.f; // seconds

// This caps the number of D3D lights we use. We'll use up to the max allowed
// or this number, whichever is smaller. (This is to prevent us going haywire
// on trying to allocate an array for ALL of the lights in the Ref device.)
//#define kD3DMaxTotalLights		32
///HAAAAACK Let's be mean and limit the artists to only 4 run-time lights.... hehehehhehe (not my idea!!!)
const int kD3DMaxTotalLights = 8;
// The framerate is the limit on the number of projected lights an object can have.
const int kMaxProjectors = 100;

/// This controls whether we can draw bounds boxes around all the ice spans.
//#ifdef HS_DEBUGGING
#define MCN_BOUNDS_SPANS	1
//#endif

#define MF_BOUNDS_LEVEL_ICE 1
//#define HS_D3D_USE_SPECULAR

/// Define this to write out z-buffer debug info to plasmalog.txt
#ifdef HS_DEBUGGING
//#define DBG_WRITE_FORMATS
#endif

plProfile_CreateMemCounter("Pipeline Surfaces", "Memory", MemPipelineSurfaces);
plProfile_Extern(MemVertex);
plProfile_Extern(MemIndex);
plProfile_CreateCounter("Feed Triangles", "Draw", DrawFeedTriangles);
plProfile_CreateCounter("Polys", "General", DrawTriangles);
plProfile_CreateCounter("Draw Prim Static", "Draw", DrawPrimStatic);
plProfile_CreateMemCounter("Total Texture Size", "Draw", TotalTexSize);
plProfile_CreateTimer("Harvest", "Draw", Harvest);
plProfile_CreateCounter("Material Change", "Draw", MatChange);
plProfile_CreateCounter("Layer Change", "Draw", LayChange);

plProfile_Extern(DrawOccBuild);

plProfile_CreateCounterNoReset("Reload", "PipeC", PipeReload);

plProfile_CreateTimer("RenderScene", "PipeT", RenderScene);
plProfile_CreateTimer("VisEval", "PipeT", VisEval);
plProfile_CreateTimer("VisSelect", "PipeT", VisSelect);
plProfile_CreateTimer("FindSceneLights", "PipeT", FindSceneLights);
plProfile_CreateTimer("PrepShadows", "PipeT", PrepShadows);
plProfile_CreateTimer("PrepDrawable", "PipeT", PrepDrawable);
plProfile_CreateTimer("  Skin", "PipeT", Skin);
plProfile_CreateTimer("  AvSort", "PipeT", AvatarSort);
plProfile_CreateTimer("  Find Lights", "PipeT", FindLights);
plProfile_CreateTimer("    Find Perms", "PipeT", FindPerm);
plProfile_CreateTimer("    FindSpan", "PipeT", FindSpan);
plProfile_CreateTimer("    FindActiveLights", "PipeT", FindActiveLights);
plProfile_CreateTimer("    ApplyActiveLights", "PipeT", ApplyActiveLights);
plProfile_CreateTimer("      ApplyMoving", "PipeT", ApplyMoving);
plProfile_CreateTimer("      ApplyToSpec", "PipeT", ApplyToSpec);
plProfile_CreateTimer("      ApplyToMoving", "PipeT", ApplyToMoving);
plProfile_CreateTimer("     ClearLights", "PipeT", ClearLights);
plProfile_CreateTimer("RenderSpan", "PipeT", RenderSpan);
plProfile_CreateTimer("  MergeCheck", "PipeT", MergeCheck);
plProfile_CreateTimer("  MergeSpan", "PipeT", MergeSpan);
plProfile_CreateTimer("  SpanTransforms", "PipeT", SpanTransforms);
plProfile_CreateTimer("  SpanFog", "PipeT", SpanFog);
plProfile_CreateTimer("  SelectLights", "PipeT", SelectLights);
plProfile_CreateTimer("  SelectProj", "PipeT", SelectProj);
plProfile_CreateTimer("  CheckDyn", "PipeT", CheckDyn);
plProfile_CreateTimer("  CheckStat", "PipeT", CheckStat);
plProfile_CreateTimer("  RenderBuff", "PipeT", RenderBuff);
plProfile_CreateTimer("  RenderPrim", "PipeT", RenderPrim);
plProfile_CreateTimer("PlateMgr", "PipeT", PlateMgr);
plProfile_CreateTimer("DebugText", "PipeT", DebugText);
plProfile_CreateTimer("Reset", "PipeT", Reset);

plProfile_CreateMemCounter("DefMem", "PipeC", DefaultMem);
plProfile_CreateMemCounter("ManMem", "PipeC", ManagedMem);
plProfile_CreateMemCounterReset("CurrTex", "PipeC", CurrTex);
plProfile_CreateMemCounterReset("CurrVB", "PipeC", CurrVB);
plProfile_CreateMemCounter("TexTot", "PipeC", TexTot);
plProfile_CreateMemCounterReset("fTexUsed", "PipeC", fTexUsed);
plProfile_CreateMemCounterReset("fTexManaged", "PipeC", fTexManaged);
plProfile_CreateMemCounterReset("fVtxUsed", "PipeC", fVtxUsed);
plProfile_CreateMemCounterReset("fVtxManaged", "PipeC", fVtxManaged);
plProfile_CreateMemCounter("ManSeen", "PipeC", ManSeen);
plProfile_CreateCounterNoReset("ManEvict", "PipeC", ManEvict);
plProfile_CreateCounter("LightOn", "PipeC", LightOn);
plProfile_CreateCounter("LightVis", "PipeC", LightVis);
plProfile_CreateCounter("LightChar", "PipeC", LightChar);
plProfile_CreateCounter("LightActive", "PipeC", LightActive);
plProfile_CreateCounter("Lights Found", "PipeC", FindLightsFound);
plProfile_CreateCounter("Perms Found", "PipeC", FindLightsPerm);
plProfile_CreateCounter("Merge", "PipeC", SpanMerge);
plProfile_CreateCounter("TexNum", "PipeC", NumTex);
plProfile_CreateCounter("LiState", "PipeC", MatLightState);
plProfile_CreateCounter("OccPoly", "PipeC", OccPolyUsed);
plProfile_CreateCounter("OccNode", "PipeC", OccNodeUsed);
plProfile_CreateCounter("NumSkin", "PipeC", NumSkin);
plProfile_CreateCounter("AvatarFaces", "PipeC", AvatarFaces);
plProfile_CreateCounter("VertexChange", "PipeC", VertexChange);
plProfile_CreateCounter("IndexChange", "PipeC", IndexChange);
plProfile_CreateCounter("DynVBuffs", "PipeC", DynVBuffs);
plProfile_CreateCounter("EmptyList", "PipeC", EmptyList);
plProfile_CreateCounter("AvRTPoolUsed", "PipeC", AvRTPoolUsed);
plProfile_CreateCounter("AvRTPoolCount", "PipeC", AvRTPoolCount);
plProfile_CreateCounter("AvRTPoolRes", "PipeC", AvRTPoolRes);
plProfile_CreateCounter("AvRTShrinkTime", "PipeC", AvRTShrinkTime);

#ifndef PLASMA_EXTERNAL_RELEASE
/// Fun inlines for keeping track of surface creation/deletion memory
void D3DSURF_MEMNEW(IDirect3DSurface9* surf)
{
	if( surf )
	{
		D3DSURFACE_DESC	info;
		surf->GetDesc( &info );
		PROFILE_POOL_MEM(D3DPOOL_DEFAULT, info.Width * info.Height * plDXPipeline::GetDXBitDepth(info.Format) / 8 + sizeof(IDirect3DSurface9), true, "D3DSurface");
		plProfile_NewMem(MemPipelineSurfaces, info.Width * info.Height * plDXPipeline::GetDXBitDepth(info.Format) / 8 + sizeof(IDirect3DSurface9));
	}
}

void D3DSURF_MEMNEW(IDirect3DTexture9* tex)
{
	if( tex )
	{
		IDirect3DSurface9* surf;
		tex->GetSurfaceLevel(0, &surf);
		if( surf )
		{
			D3DSURF_MEMNEW(surf);
			surf->Release();
		}
	}
}

void D3DSURF_MEMNEW(IDirect3DCubeTexture9* cTex)
{
	if( cTex )
	{
		IDirect3DSurface9* surf;
		cTex->GetCubeMapSurface(D3DCUBEMAP_FACE_POSITIVE_X, 0, &surf);
		if( surf )
		{
			D3DSURF_MEMNEW(surf);
			D3DSURF_MEMNEW(surf);
			D3DSURF_MEMNEW(surf);
			D3DSURF_MEMNEW(surf);
			D3DSURF_MEMNEW(surf);
			D3DSURF_MEMNEW(surf);
			surf->Release();
		}
	}
}

void D3DSURF_MEMDEL(IDirect3DSurface9* surf)
{
	if( surf )
	{
		D3DSURFACE_DESC	info;
		surf->GetDesc( &info );
		PROFILE_POOL_MEM(D3DPOOL_DEFAULT, info.Width * info.Height * plDXPipeline::GetDXBitDepth(info.Format) / 8 + sizeof(IDirect3DSurface9), false, "D3DSurface");
		plProfile_DelMem(MemPipelineSurfaces, info.Width * info.Height * plDXPipeline::GetDXBitDepth(info.Format) / 8 + sizeof(IDirect3DSurface9));
	}
}

void D3DSURF_MEMDEL(IDirect3DTexture9* tex)
{
	if( tex )
	{
		IDirect3DSurface9* surf;
		tex->GetSurfaceLevel(0, &surf);
		if( surf )
		{
			D3DSURF_MEMDEL(surf);
			surf->Release();
		}
	}
}

void D3DSURF_MEMDEL(IDirect3DCubeTexture9* cTex)
{
	if( cTex )
	{
		IDirect3DSurface9* surf;
		cTex->GetCubeMapSurface(D3DCUBEMAP_FACE_POSITIVE_X, 0, &surf);
		if( surf )
		{
			D3DSURF_MEMDEL(surf);
			D3DSURF_MEMDEL(surf);
			D3DSURF_MEMDEL(surf);
			D3DSURF_MEMDEL(surf);
			D3DSURF_MEMDEL(surf);
			D3DSURF_MEMDEL(surf);
			surf->Release();
		}
	}
}
#else
void D3DSURF_MEMNEW(IDirect3DSurface9* surf) {}
void D3DSURF_MEMNEW(IDirect3DTexture9* tex) {}
void D3DSURF_MEMNEW(IDirect3DCubeTexture9* cTex) {}
void D3DSURF_MEMDEL(IDirect3DSurface9* surf) {}
void D3DSURF_MEMDEL(IDirect3DTexture9* tex) {}
void D3DSURF_MEMDEL(IDirect3DCubeTexture9* cTex) {}
#endif // PLASMA_EXTERNAL_RELEASE

#ifndef PLASMA_EXTERNAL_RELEASE
void plDXPipeline::ProfilePoolMem(D3DPOOL poolType, UInt32 size, hsBool add, char *id)
{
	switch( poolType )
	{
	case D3DPOOL_MANAGED:
		if (add)
		{
			plProfile_NewMem(ManagedMem, size);
			//plStatusLog::AddLineS("pipeline.log", 0xffff0000, "Adding   MANAGED mem. Size: %10d, Total: %10d ID: %s",
			//					  size, gProfileVarManagedMem.GetValue(), id);
		}
		else
		{
			plProfile_DelMem(ManagedMem, size);
			//plStatusLog::AddLineS("pipeline.log", 0xffff0000, "Deleting MANAGED mem. Size: %10d, Total: %10d ID: %s",
			//					  size, gProfileVarManagedMem.GetValue(), id);
		}
		break;
	default:
		if (add)
		{
			plProfile_NewMem(DefaultMem, size);
			//plStatusLog::AddLineS("pipeline.log", 0xffff0000, "Adding   DEFAULT mem. Size: %10d, Total: %10d ID: %s",
			//					  size, gProfileVarDefaultMem.GetValue(), id);
		}
		else
		{
			plProfile_DelMem(DefaultMem, size);
			//plStatusLog::AddLineS("pipeline.log", 0xffff0000, "Deleting DEFAULT mem. Size: %10d, Total: %10d ID: %s",
			//					  size, gProfileVarDefaultMem.GetValue(), id);
		}
		break;
	}
}
#endif // PLASMA_EXTERNAL_RELEASE

/////////////////////////////////////////////////////////////////////////////////////////
// Implementations of RenderPrims types.
// Currently support render tri list
// These allow the same setup code path to be followed, no matter what the primitive type
// (i.e. data-type/draw-call is going to happen once the render state is set.
// Originally useful to make one code path for trilists, tri-patches, and rect-patches, but
// we've since dropped support for patches. We still use the RenderNil function to allow the
// code to go through all the state setup without knowing whether a render call is going to
// come out the other end.
// Would allow easy extension for supporting tristrips or pointsprites, but we've never had
// a strong reason to use either.
// First, Declarations.

// Adding a nil RenderPrim for turning off drawing
class plRenderNilFunc : public plRenderPrimFunc
{
public:
	plRenderNilFunc() {}

	virtual hsBool RenderPrims() const { return false; }
};
static plRenderNilFunc sRenderNil;

class plRenderTriListFunc : public plRenderPrimFunc
{
protected:
	LPDIRECT3DDEVICE9	fD3DDevice;
	int					fBaseVertexIndex;
	int					fVStart;
	int					fVLength;
	int					fIStart;
	int					fNumTris;
public:
	plRenderTriListFunc(LPDIRECT3DDEVICE9 d3dDevice, int baseVertexIndex,
						int vStart, int vLength, int iStart, int iNumTris)
		: fD3DDevice(d3dDevice), fBaseVertexIndex(baseVertexIndex), fVStart(vStart), fVLength(vLength), fIStart(iStart), fNumTris(iNumTris) {}

	virtual hsBool RenderPrims() const;
};

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Implementations

hsBool plRenderTriListFunc::RenderPrims() const
{
	plProfile_IncCount(DrawFeedTriangles, fNumTris);
	plProfile_IncCount(DrawTriangles, fNumTris);
	plProfile_Inc(DrawPrimStatic);

	return FAILED( fD3DDevice->DrawIndexedPrimitive( D3DPT_TRIANGLELIST, fBaseVertexIndex, fVStart, fVLength, fIStart, fNumTris ) );
}

//// Constructor & Destructor /////////////////////////////////////////////////

UInt32 plDXPipeline::fTexUsed(0);
UInt32 plDXPipeline::fTexManaged(0);
UInt32 plDXPipeline::fVtxUsed(0);
UInt32 plDXPipeline::fVtxManaged(0);

plDXPipeline::plDXPipeline( hsWinRef hWnd, const hsG3DDeviceModeRecord *devModeRec )
:	fManagedAlloced(false),
	fAllocUnManaged(false)
{
	hsAssert(D3DTSS_TCI_PASSTHRU == plLayerInterface::kUVWPassThru, "D3D Enum has changed. Notify graphics department.");
	hsAssert(D3DTSS_TCI_CAMERASPACENORMAL == plLayerInterface::kUVWNormal, "D3D Enum has changed. Notify graphics department.");
	hsAssert(D3DTSS_TCI_CAMERASPACEPOSITION == plLayerInterface::kUVWPosition, "D3D Enum has changed. Notify graphics department.");
	hsAssert(D3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR == plLayerInterface::kUVWReflect, "D3D Enum has changed. Notify graphics department.");

	// Initialize everything to NULL.
	IClearMembers();

	// Get the requested mode and setup
	const hsG3DDeviceRecord *devRec = devModeRec->GetDevice();
	const hsG3DDeviceMode *devMode = devModeRec->GetMode();

	/// Init our screen mode
	fSettings.fHWnd = hWnd;
	if(!fInitialPipeParams.Windowed)
	{
		fSettings.fOrigWidth = devMode->GetWidth();
		fSettings.fOrigHeight = devMode->GetHeight();
	}
	else
	{
		// windowed can run in any mode
		fSettings.fOrigHeight = fInitialPipeParams.Height;
		fSettings.fOrigWidth = fInitialPipeParams.Width;
	}
	IGetViewTransform().SetScreenSize((UInt16)(fSettings.fOrigWidth), (UInt16)(fSettings.fOrigHeight));
	fSettings.fColorDepth = devMode->GetColorDepth();
	fVSync = fInitialPipeParams.VSync;

	if( devRec->GetAASetting() == 0 )
		fSettings.fNumAASamples = 0;
	else
		fSettings.fNumAASamples = devMode->GetFSAAType( devRec->GetAASetting() - 1 );

	hsGDirect3DTnLEnumerate d3dEnum;
	if( d3dEnum.GetEnumeErrorStr()[ 0 ] )
	{
		IShowErrorMessage( (char *)d3dEnum.GetEnumeErrorStr() );
		return;
	}

	if( d3dEnum.SelectFromDevMode(devRec, devMode) )
	{
		IShowErrorMessage( (char *)d3dEnum.GetEnumeErrorStr() );
		return;
	}

	// Gotta create this very first, so that the device/driver init works
	if( !fD3DObject )
	{
		if( ICreateMaster() )
		{
			IShowErrorMessage( "Cannot create D3D master object" );
			return;
		}
	}

	// Record the requested mode/setup.
	ISetCurrentDriver( d3dEnum.GetCurrentDriver() );
	ISetCurrentDevice( d3dEnum.GetCurrentDevice() );
	D3DEnum_ModeInfo *pModeInfo = d3dEnum.GetCurrentMode();
	pModeInfo->fWindowed = fInitialPipeParams.Windowed;		// set windowed mode from ini file
	ISetCurrentMode( d3dEnum.GetCurrentMode() );

	fSettings.fFullscreen = !fCurrentMode->fWindowed;

	fSettings.fNumAASamples = fInitialPipeParams.AntiAliasingAmount;

	// ISetCaps just records the card capabilities that were passed in.
	ISetCaps();
	// IRestrictCaps looks over those explicit caps and makes some decisions on
	// what the card can really do.
	IRestrictCaps( *devRec );

	fSettings.fMaxAnisotropicSamples = fInitialPipeParams.AnisotropicLevel;
	if(fSettings.fMaxAnisotropicSamples > fCurrentDevice->fDDCaps.MaxAnisotropy)
		fSettings.fMaxAnisotropicSamples = (UInt8)fCurrentDevice->fDDCaps.MaxAnisotropy;


	plConst(UInt32) kDefaultDynVtxSize(32000 * 44);
	plConst(UInt32)	kDefaultDynIdxSize(0 * plGBufferGroup::kMaxNumIndicesPerBuffer * 2);
	fDynVtxSize = kDefaultDynVtxSize;
	fVtxRefTime = 0;

	// Go create surfaces and DX-dependent objects
	if( ICreateDeviceObjects() )
	{
		IShowErrorMessage( "Cannot create Direct3D device" );
		return;
	}
	/*plStatusLog::AddLineS("pipeline.log", "Supported Resolutions:");
	std::vector<plDisplayMode> temp;
	GetSupportedDisplayModes( &temp, 16 );
	for(int i = 0; i < temp.size(); i++)
	{
		plStatusLog::AddLineS("pipeline.log", "%d, %d, %d", temp[i].Width, temp[i].Height, 16);
	}
	temp.clear();
	GetSupportedDisplayModes( &temp, 32 );
	for(int i = 0; i < temp.size(); i++)
	{
		plStatusLog::AddLineS("pipeline.log", "%d, %d, %d", temp[i].Width, temp[i].Height, 32);
	}*/

}

// Cleanup - Most happens in IReleaseDeviceObject().
plDXPipeline::~plDXPipeline()
{
	fCurrLay = nil;
	hsAssert( fCurrMaterial == nil, "Current material not unrefed properly" );

	// fCullProxy is a debugging representation of our CullTree. See plCullTree.cpp,
	// plScene/plOccluder.cpp and plScene/plOccluderProxy.cpp for more info
	if( fCullProxy )
		fCullProxy->GetKey()->UnRefObject();
	delete fCurrentDriver;
	delete fCurrentDevice;
	delete fCurrentMode;

	IReleaseDeviceObjects();
	IClearClothingOutfits(&fClothingOutfits);
	IClearClothingOutfits(&fPrevClothingOutfits);
}

//// IClearMembers ////////////////////////////////////////////////////////////
// Initialize everything to a nil state.
// This does not initialize to a working state, but to a state that can be
// built from. For example, the fD3DObject pointer is set to nil so that it's safe
// to delete or set to a valid pointer. It must be set to a valid pointer
// before the pipeline can be used for much.
// After the core initialization is done (in ICreateMaster and ICreateDeviceObjects)
// render state will be initialized in IInitDeviceState.

void	plDXPipeline::IClearMembers()
{
	/// Clear some stuff
	fVtxBuffRefList = nil;
	fIdxBuffRefList = nil;
	fTextureRefList = nil;
	fTextFontRefList = nil;
	fRenderTargetRefList = nil;
	fVShaderRefList = nil;
	fPShaderRefList = nil;
	fCurrMaterial = nil;
	fCurrLay = nil;
	fCurrRenderLayer = 0;
#if MCN_BOUNDS_SPANS
	fBoundsMat = nil;
	fBoundsSpans = nil;
#endif
	fPlateMgr = nil;
	fLogDrawer = nil;
	fDebugTextMgr = nil;
	fCurrLightingMethod = plSpan::kLiteMaterial;

	fCurrCullMode = D3DCULL_CW;
	fTexturing = false;
	fCurrNumLayers = 0;
	fLastEndingStage = -1;

	fSettings.Reset();
	fStencil.Reset();
	fTweaks.Reset();
	fLights.Reset(this);
	fCurrFog.Reset();
	fDeviceLost = false;
	fDevWasLost = false;

	fSettings.fCurrFVFFormat = 0;
	fDynVtxBuff = nil;
	fNextDynVtx = 0;

	int i;
	for( i = 0; i < 8; i++ )
		fLayerRef[i] = nil;

	IResetRenderTargetPools();
	fULutTextureRef = nil;
	for( i = 0; i < kMaxRenderTargetNext; i++ )
		fBlurVBuffers[i] = nil;
	fBlurVSHandle = nil;

	fD3DObject = nil;
	fD3DDevice = nil;
	fD3DBackBuff = nil;
	fD3DDepthSurface = nil;
	fD3DMainSurface = nil;

	fSharedDepthSurface[0] = nil;
	fSharedDepthFormat[0] = D3DFMT_UNKNOWN;
	fSharedDepthSurface[1] = nil;
	fSharedDepthFormat[1] = D3DFMT_UNKNOWN;

	fCurrentMode = nil;
	fCurrentDriver = nil;
	fCurrentDevice = nil;

	fOverLayerStack.Reset();
	fOverBaseLayer = nil;
	fOverAllLayer = nil;
	fPiggyBackStack.Reset();
	fMatPiggyBacks = 0;
	fActivePiggyBacks = 0;

	for( i = 0; i < 8; i++ )
	{
		fLayerState[i].Reset();
		fOldLayerState[i].Reset();
	}
	fMatOverOn.Reset();
	fMatOverOff.Reset();
//	SetMaterialOverride( hsGMatState::kShade, hsGMatState::kShadeSpecularHighlight, false );

	fView.Reset();

	fCullProxy = nil;

	fTime = 0;
	fFrame = 0;

	fInSceneDepth = 0;
	fTextUseTime = 0;
	fEvictTime = 0;
	fManagedSeen = 0;
	fManagedCutoff = 0;
	fRenderCnt = 0;

	fDebugFlags.Clear();

	fForceMatHandle = true;
	fAvRTShrinkValidSince = 0;
	fAvRTWidth = 1024;
	fAvNextFreeRT = 0;
}

// plDXViewSettings are just a convenience member struct to segregate the current view settings.
//
// Reset - Initialize the ViewSettings to default (normal/neutral) values.
void plDXViewSettings::Reset()
{
	// Normal render, on clear, clear the color buffer and depth buffer.
	fRenderState = plPipeline::kRenderNormal | plPipeline::kRenderClearColor | plPipeline::kRenderClearDepth;

	fRenderRequest = nil;

	fDrawableTypeMask = plDrawable::kNormal;
	fSubDrawableTypeMask = plDrawable::kSubNormal;

	// Clear color to black, depth to yon.
	fClearColor = 0;
	fClearDepth = 1.f;
	fDefaultFog.Clear();

	// Want to limit the number of nodes in the cull tree. After adding so many nodes,
	// the benefits (#objects culled) falls off, but the cost (evaluating objects against
	// node planes) keeps rising.
	const UInt16 kCullMaxNodes = 250;
	fCullTree.Reset();
	fCullTreeDirty = true;
	fCullMaxNodes = kCullMaxNodes;

	// Object Local to world transform and its inverse.
	fLocalToWorld.Reset();
	fWorldToLocal.Reset();

	// see Core/plViewTransform.h
	fTransform.Reset();

	fTransform.SetScreenSize(800, 600);

	// Keep track of handedness of local to world and camera transform for winding.
	fLocalToWorldLeftHanded = false;
	fWorldToCamLeftHanded = false;
}

//// plDXGeneralSettings::Reset //////////////////////////////////////////////
// Catch all struct of general settings plus pointers to current d3d objects.

void	plDXGeneralSettings::Reset()
{
	fCurrVertexBuffRef = nil;
	fCurrIndexBuffRef = nil;
	fFullscreen = false;
	fHWnd = nil;
	fColorDepth = 32;
	fD3DCaps = 0;
	fBoardKluge = 0;
	fStageEnd = 0;
	fMaxNumLights = kD3DMaxTotalLights;
	fMaxNumProjectors = kMaxProjectors;
	fMaxLayersAtOnce = 1;
	fMaxPiggyBacks = 0;
	fBoundsDrawLevel = -1;

	fProperties = 0;
	fClearColor = 0;

	fNoGammaCorrect = false;
	fMaxUVWSrc = 8;
	fCantProj = false;
	fLimitedProj = false;
	fBadManaged = false;
	fShareDepth = false;
	fCurrAnisotropy = false;
	fIsIntel = false;

	fDXError = D3D_OK;
	memset( fErrorStr, 0, sizeof( fErrorStr ) );

	fCurrRenderTarget = nil;
	fCurrBaseRenderTarget = nil;
	fCurrD3DMainSurface = nil;
	fCurrD3DDepthSurface = nil;
	fCurrRenderTargetRef = nil;

	fCurrFVFFormat = 0;
	fCurrVertexShader = nil;
	fCurrPixelShader = nil;

	fVeryAnnoyingTextureInvalidFlag = false;
}

//// IInitDeviceState /////////////////////////////////////////////////////////
// Initialize the device to a known state. This also syncs it up with our internal state
// as recorded in the fLayerStates.
// Some of these states reflect the caps of the device, but for the most part, the
// important thing here is NOT what state we're in coming out of this function, but
// that we are in a known state, and that the known state is recorded in fLayerStates.
void	plDXPipeline::IInitDeviceState()
{
	fLayerState[0].Reset();
	fCurrCullMode = D3DCULL_CW;

	/// Set D3D states
	fCurrFog.Reset();
	ISetFogParameters( nil, nil );

	fD3DDevice->SetRenderState( D3DRS_ZFUNC,        D3DCMP_LESSEQUAL );
	fD3DDevice->SetRenderState( D3DRS_ZWRITEENABLE, TRUE );
	fD3DDevice->SetRenderState( D3DRS_ZENABLE,		( fSettings.fD3DCaps & kCapsWBuffer ) ? D3DZB_USEW : D3DZB_TRUE );
	fD3DDevice->SetRenderState( D3DRS_CLIPPING,		TRUE );
	fD3DDevice->SetRenderState( D3DRS_CULLMODE,		fCurrCullMode );
	ISetCullMode();

	fD3DDevice->SetRenderState( D3DRS_ALPHATESTENABLE,	TRUE );
	fD3DDevice->SetRenderState( D3DRS_ALPHAFUNC,		D3DCMP_GREATEREQUAL );
	fD3DDevice->SetRenderState( D3DRS_ALPHAREF,			0x00000001 );

	fD3DDevice->SetRenderState( D3DRS_MULTISAMPLEANTIALIAS, ( fSettings.fD3DCaps & kCapsFSAntiAlias ) ? TRUE : FALSE );
	fD3DDevice->SetRenderState( D3DRS_ANTIALIASEDLINEENABLE,		FALSE );

	fD3DDevice->SetRenderState( D3DRS_DITHERENABLE,		( fSettings.fD3DCaps & kCapsDither ) ? TRUE : FALSE );
	fD3DDevice->SetRenderState( D3DRS_SPECULARENABLE,	FALSE );
	fD3DDevice->SetRenderState( D3DRS_LIGHTING,			FALSE );
	fCurrD3DLiteState = false;
	fD3DDevice->SetRenderState( D3DRS_TEXTUREFACTOR,	0x0 );
	fD3DDevice->SetRenderState( D3DRS_STENCILENABLE,	FALSE );
    fD3DDevice->SetTransform( D3DTS_TEXTURE0,			&d3dIdentityMatrix );
	fD3DDevice->SetTransform( D3DTS_WORLD,				&d3dIdentityMatrix );

	/// NEW: to compensate for scaling transformations that might screw up our nicely
	/// normalized normals. Note: nVidia says this is as fast or faster than with
	/// this disabled, but who knows what it'll do on other cards...
	fD3DDevice->SetRenderState( D3DRS_NORMALIZENORMALS, TRUE );
	fD3DDevice->SetRenderState( D3DRS_LOCALVIEWER, TRUE );

	UInt32 totalMem = fD3DDevice->GetAvailableTextureMem();
	plProfile_Set(TotalTexSize, totalMem);

	// Initialization for all 8 stages (even though we only use a few of them).
	int i;
	for( i = 0; i < 8; i++ )
	{
		fLayerLODBias[ i ] = fTweaks.fDefaultLODBias;
		fLayerTransform[ i ] = false;
		fLayerRef[ i ] = nil;
		fLayerUVWSrcs[ i ] = i;
		fLayerState[ i ].Reset();

		fD3DDevice->SetTexture( i, nil );
		fD3DDevice->SetTextureStageState( i, D3DTSS_TEXCOORDINDEX, i );
		fD3DDevice->SetSamplerState( i, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP  );
		fD3DDevice->SetSamplerState( i, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP  );
		fD3DDevice->SetSamplerState( i, D3DSAMP_MIPMAPLODBIAS, *(DWORD *)( &fLayerLODBias[ i ] ) );

		if( fSettings.fMaxAnisotropicSamples > 0 && !IsDebugFlagSet(plPipeDbg::kFlagNoAnisotropy))
		{
			fD3DDevice->SetSamplerState( i, D3DSAMP_MINFILTER, D3DTEXF_ANISOTROPIC );
			fD3DDevice->SetSamplerState( i, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR );
			fD3DDevice->SetSamplerState( i, D3DSAMP_MAXANISOTROPY, (DWORD)fSettings.fMaxAnisotropicSamples );
			fSettings.fCurrAnisotropy = true;
		}
		else
		{
			fD3DDevice->SetSamplerState( i, D3DSAMP_MINFILTER, D3DTEXF_LINEAR );
			fD3DDevice->SetSamplerState( i, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR );
			fSettings.fCurrAnisotropy = false;
		}
		fD3DDevice->SetSamplerState( i, D3DSAMP_MIPFILTER, D3DTEXF_LINEAR );

		fD3DDevice->SetTransform( sTextureStages[ i ], &d3dIdentityMatrix );
		fLayerXformFlags[ i ] = D3DTTFF_COUNT2;
		fD3DDevice->SetTextureStageState( i, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTTFF_COUNT2 );
	}

	// Initialize our bump mapping matrices.
	for( i = 0; i < 4; i++ )
	{
		int j;
		for( j = 0; j < 4; j++ )
		{
			fBumpDuMatrix.fMap[i][j] = 0;
			fBumpDvMatrix.fMap[i][j] = 0;
			fBumpDwMatrix.fMap[i][j] = 0;

		}
	}
	fBumpDuMatrix.NotIdentity();
	fBumpDvMatrix.NotIdentity();
	fBumpDwMatrix.NotIdentity();

	PushMaterialOverride( hsGMatState::kShade, hsGMatState::kShadeSpecularHighlight, false );

	fLights.Reset(this);

	// Tell the light infos to unlink themselves
	while (fActiveLights)
		UnRegisterLight(fActiveLights);

	return;
}

//// ISetCaps /////////////////////////////////////////////////////////////////
// We've recorded the capabilities of the current device in fCurrentDevice (traditionally in the setup program),
// now translate that into our own caps flags.
void	plDXPipeline::ISetCaps()
{
	fSettings.fD3DCaps = kCapsNone;

	// Set relevant caps (ones we can do something about).
	if( fCurrentDevice->fDDCaps.RasterCaps & D3DPRASTERCAPS_DEPTHBIAS )
		fSettings.fD3DCaps |= kCapsZBias;
	if( fCurrentDevice->fDDCaps.RasterCaps & D3DPRASTERCAPS_FOGRANGE )
		fSettings.fD3DCaps |= kCapsRangeFog;
	if( fCurrentDevice->fDDCaps.RasterCaps & D3DPRASTERCAPS_FOGTABLE )
		fSettings.fD3DCaps |= kCapsLinearFog | kCapsExpFog | kCapsExp2Fog | kCapsPixelFog;
	else
		fSettings.fD3DCaps |= kCapsLinearFog;
	if( fCurrentDevice->fDDCaps.TextureFilterCaps & D3DPTFILTERCAPS_MIPFLINEAR )
		fSettings.fD3DCaps |= kCapsMipmap;
	if( fCurrentDevice->fDDCaps.TextureCaps & D3DPTEXTURECAPS_MIPCUBEMAP )
		fSettings.fD3DCaps |= kCapsCubicMipmap;
	if( fCurrentDevice->fDDCaps.RasterCaps & D3DPRASTERCAPS_WBUFFER )
		fSettings.fD3DCaps |= kCapsWBuffer;
	if( fCurrentDevice->fDDCaps.RasterCaps & D3DPRASTERCAPS_DITHER )
		fSettings.fD3DCaps |= kCapsDither;
	if( fSettings.fNumAASamples > 0 )
		fSettings.fD3DCaps |= kCapsFSAntiAlias;
	if( fCurrentDevice->fDDCaps.RasterCaps & D3DPRASTERCAPS_WFOG )
		fSettings.fD3DCaps |= kCapsDoesWFog;
	if( fCurrentDevice->fDDCaps.TextureCaps & D3DPTEXTURECAPS_CUBEMAP )
		fSettings.fD3DCaps |= kCapsCubicTextures;

	/// New 1.5.2000 - cull out mixed vertex processing
	if( fCurrentDevice->fDDCaps.DevCaps & D3DDEVCAPS_HWTRANSFORMANDLIGHT
		&& fCurrentMode->fDDBehavior == D3DCREATE_HARDWARE_VERTEXPROCESSING
		)
		fSettings.fD3DCaps |= kCapsHWTransform;


	// Currently always want d3d to transform
	fSettings.fD3DCaps |= kCapsHWTransform;

	/// Always assume we can do small textures (IRestrictCaps will turn this off
	/// if necessary)
	fSettings.fD3DCaps |= kCapsDoesSmallTextures;

	/// Look for supported texture formats
	if( IFindCompressedFormats() )
		fSettings.fD3DCaps |= kCapsCompressTextures;
	if( IFindLuminanceFormats() )
		fSettings.fD3DCaps |= kCapsLuminanceTextures;

	/// Max # of hardware lights
	fSettings.fMaxNumLights = fCurrentDevice->fDDCaps.MaxActiveLights;
	if( fSettings.fMaxNumLights > kD3DMaxTotalLights )
		fSettings.fMaxNumLights = kD3DMaxTotalLights;

	// Intel Extreme chips report 0 lights, meaning T&L is done
	// in software, so you can have as many lights as you want.
	// We only need 8, so set that here. Also turn off shadows,
	// since the extreme can't really afford them, and record
	// the fact this is the extreme for other driver problem
	// workarounds.
	if( !fSettings.fMaxNumLights )
	{
		fSettings.fMaxNumLights = kD3DMaxTotalLights;
		fSettings.fIsIntel = true;
		plShadowCaster::SetCanShadowCast(false);
	}

	/// Max # of textures at once
	fSettings.fMaxLayersAtOnce = fCurrentDevice->fDDCaps.MaxSimultaneousTextures;
	if( fCurrentDevice->fDDCaps.DevCaps & D3DDEVCAPS_SEPARATETEXTUREMEMORIES )
		fSettings.fMaxLayersAtOnce = 1;
	// Alloc half our simultaneous textures to piggybacks.
	// Won't hurt us unless we try to many things at once.
	fSettings.fMaxPiggyBacks = fSettings.fMaxLayersAtOnce >> 1;

	// Less than 4 layers at once means we have to fallback on uv bumpmapping
	if (fSettings.fMaxLayersAtOnce < 4)
		SetDebugFlag(plPipeDbg::kFlagBumpUV, true);

	fSettings.fMaxAnisotropicSamples = (UInt8)(fCurrentDevice->fDDCaps.MaxAnisotropy);

	fSettings.fNoGammaCorrect = !(fCurrentDevice->fDDCaps.Caps2 & D3DCAPS2_FULLSCREENGAMMA);

	if (!(fCurrentDevice->fDDCaps.TextureCaps & D3DPTEXTURECAPS_PROJECTED))
		plDynamicCamMap::SetCapable(false);

	ISetGraphicsCapability(fCurrentDevice->fDDCaps.PixelShaderVersion);
}

// ISetGraphicsCapability ///////////////////////////////////////////////////////
// Tell our global quality settings what we can do. We'll use this to only load
// versions we can render. So if we can render it, we load it and skip its low quality substitute,
// if we can't render it, we skip it and load its low quality substitute.
// Naturally, this must happen before we do any loading.
void plDXPipeline::ISetGraphicsCapability(UInt32 v)
{
	int pixelMajor = D3DSHADER_VERSION_MAJOR(v);
	int pixelMinor = D3DSHADER_VERSION_MINOR(v);
	if( pixelMajor > 1 )
	{
		plQuality::SetCapability(plQuality::kPS_2_Plus);
	}
	else if( pixelMajor > 0 )
	{
		if( pixelMinor >= 4 )
			plQuality::SetCapability(plQuality::kPS_1_4);
		else if( pixelMinor > 0 )
			plQuality::SetCapability(plQuality::kPS_1_1);
	}
}

//// IRestrictCaps ////////////////////////////////////////////////////////////
// ISetCaps() sets our native caps based on the D3D caps bits D3D returns.
// IRestrictCaps looks at our hsG3DDeviceSelector flags and translates those
// into our runtime native caps.
// The DeviceSelector flags aren't set by what the board claims, but rather
// we try to identify the board and set them according to previous knowledge.
// For example, the ATI7500 will only use uvw coordinates 0 or 1. There's
// no d3d cap to reflect this, and it really should support [0..7], but
// there's no way to force it to be d3d compliant. So when we see we have
// an ATI7500, we set the cap kCapsMaxUVWSrc2.
// See hsG3DDeviceSelector.cpp for details and implementation.
void	plDXPipeline::IRestrictCaps( const hsG3DDeviceRecord& devRec )
{
	if( !devRec.GetCap( hsG3DDeviceSelector::kCapsMipmap ) )
		fSettings.fD3DCaps &= ~kCapsMipmap;
	if( !devRec.GetCap( hsG3DDeviceSelector::kCapsCubicMipmap ) )
		fSettings.fD3DCaps &= ~kCapsCubicMipmap;
	if( !devRec.GetCap( hsG3DDeviceSelector::kCapsWBuffer ) )
		fSettings.fD3DCaps &= ~kCapsWBuffer;
	if( !devRec.GetCap( hsG3DDeviceSelector::kCapsZBias ) )
		fSettings.fD3DCaps &= ~kCapsZBias;
//	if( !devRec.GetCap( hsG3DDeviceSelector::kCapsHWTransform ) )
//		fSettings.fD3DCaps &= ~kCapsHWTransform;
	if( !devRec.GetCap( hsG3DDeviceSelector::kCapsDither ) )
		fSettings.fD3DCaps &= ~kCapsDither;
//	if( devRec.GetAASetting() == 0 )
//		fSettings.fD3DCaps &= ~kCapsFSAntiAlias;
	if( !devRec.GetCap( hsG3DDeviceSelector::kCapsFogExp ) )
		fSettings.fD3DCaps &= ~kCapsExpFog;
	if( !devRec.GetCap( hsG3DDeviceSelector::kCapsCubicTextures ) )
		fSettings.fD3DCaps &= ~kCapsCubicTextures;

	if( devRec.GetCap(hsG3DDeviceSelector::kCapsCantShadow) )
		plShadowCaster::SetCanShadowCast(false);

	if( devRec.GetCap(hsG3DDeviceSelector::kCapsCantProj) )
		fSettings.fCantProj = true;
	if( devRec.GetCap(hsG3DDeviceSelector::kCapsLimitedProj) )
		fSettings.fLimitedProj = true;
	if( devRec.GetCap(hsG3DDeviceSelector::kCapsBadManaged) )
		fSettings.fBadManaged = true;
	if( devRec.GetCap(hsG3DDeviceSelector::kCapsShareDepth) )
		fSettings.fShareDepth = true;

	/// Added 9.6.2000 mcn - shouldn't they be here anyway?
	if( !devRec.GetCap( hsG3DDeviceSelector::kCapsFogExp2 ) )
		fSettings.fD3DCaps &= ~kCapsExp2Fog;
	if( !devRec.GetCap( hsG3DDeviceSelector::kCapsDoesSmallTextures ) )
		fSettings.fD3DCaps &= ~kCapsDoesSmallTextures;

	/// 9.22.2000 mcn - dFlag for bad (savage4) yon fix
	if( devRec.GetCap( hsG3DDeviceSelector::kCapsBadYonStuff ) )
		fSettings.fD3DCaps |= kCapsHasBadYonStuff;

	/// 10.31.2000 mcn - Flag for can't-handle-under-8-pixel-dimensions-on-textures
	/// (see, isn't the name flag actually better in retrospect? :)
	if( devRec.GetCap( hsG3DDeviceSelector::kCapsNoKindaSmallTexs ) )
		fSettings.fD3DCaps |= kCapsNoKindaSmallTexs;

	/// Note: the following SHOULD be here, but we later detect for texture
	/// formats and reset this flag. It should only be set if it is set already,
	/// but that means ensuring it's set beforehand, which it might not be.
	if( !devRec.GetCap( hsG3DDeviceSelector::kCapsCompressTextures ) )
		fSettings.fD3DCaps &= ~kCapsCompressTextures;

	/// Set up tweaks
	SetZBiasScale( (float)devRec.GetZBiasRating() );
	fTweaks.fDefaultLODBias = (float)-( 0.25 + (float)devRec.GetLODBiasRating() );
	devRec.GetFogApproxStarts( fTweaks.fFogExpApproxStart, fTweaks.fFogExp2ApproxStart );
	fTweaks.fFogEndBias = (float)devRec.GetFogEndBias();

	// Fog knee stuff
	devRec.GetFogKneeParams( hsG3DDeviceRecord::kFogExp, fTweaks.fExpFogKnee, fTweaks.fExpFogKneeVal );
	devRec.GetFogKneeParams( hsG3DDeviceRecord::kFogExp2, fTweaks.fExp2FogKnee, fTweaks.fExp2FogKneeVal );

	// Max # of layers
	UInt32 max = devRec.GetLayersAtOnce();
	if( max > 0 && max < fSettings.fMaxLayersAtOnce )
		fSettings.fMaxLayersAtOnce = max;

	/// Debug flag to force high-level cards down to GeForce 2 caps
	if( fDbgSetupInitFlags & 0x00000004 )
	{
		fSettings.fD3DCaps &= ~kCapsFSAntiAlias;
		if( fSettings.fMaxLayersAtOnce > 2 )
			fSettings.fMaxLayersAtOnce = 2;
		fSettings.fMaxAnisotropicSamples = 0;

		plQuality::SetCapability(plQuality::kMinimum);
	}

	// There's a bug in NVidia drivers on Windows 2000 for GeForce1-4 (all flavors, including MX).
	// When the amount allocated into managed memory approaches the on board memory size, the performance
	// severely degrades, no matter how little is actually in use in the current rendering. So say all
	// our d3d textures are created into managed memory at age load. Also say you are
	// consistently viewing only 5Mb of managed materials (texture + vertex buffer). So as
	// you walk through the age, the new textures you see get loaded on demand into video memory.
	// Once you've seen enough to fill the on board memory, your frame rate starts falling and
	// continues to fall as more textures get loaded. So either the memory manager is not letting
	// go of LRU textures, or fragmentation is so horrible as to make the manager useless.
	// So on these boards and with this OS, we keep track of how much managed memory we've seen,
	// and when it reaches a threshhold, we flush managed memory with an EvictManagedResources() call.
	// There's an unfortunate glitch, and then the frame rate is fine again.
	// So if we need this workaround, we set fManagedCutoff to 1 here, and then once we have our
	// D3D device, we query for the amount of memory and set the threshhold for flushing memory
	// based on that.
	OSVERSIONINFO osinfo;
	memset(&osinfo, 0, sizeof(osinfo));
	osinfo.dwOSVersionInfoSize = sizeof(osinfo);
	GetVersionEx(&osinfo);
	if( (osinfo.dwMajorVersion == 5)
		&&(osinfo.dwMinorVersion == 0) )
	{
		// It's the dreaded win2k
		if( devRec.GetCap(hsG3DDeviceSelector::kCapsDoubleFlush) )
			fManagedCutoff = 1;
		else if( devRec.GetCap(hsG3DDeviceSelector::kCapsSingleFlush) )
			fManagedCutoff = 1;
	}

	//// Our temp debug flag to force z-buffering...
	if( !( fDbgSetupInitFlags & 0x00000001 ) )
		fSettings.fD3DCaps &= ~kCapsWBuffer;

	/// Set up the z-bias scale values, based on z- or w-buffering
	if( fSettings.fD3DCaps & kCapsWBuffer )
		fTweaks.fDefaultPerspLayerScale = kPerspLayerScaleW;
	else
		fTweaks.fDefaultPerspLayerScale = kPerspLayerScale;


	// Less than 4 layers at once means we have to fallback on uv bumpmapping
	if( fSettings.fMaxLayersAtOnce < 4 )
		SetDebugFlag(plPipeDbg::kFlagBumpUV, true);

	if( ( fSettings.fD3DCaps & kCapsHWTransform ) && ( fCurrentMode->fDDBehavior == D3DCREATE_SOFTWARE_VERTEXPROCESSING ) )
		fSettings.fD3DCaps &= ~kCapsHWTransform;

	if( devRec.GetCap(hsG3DDeviceSelector::kCapsMaxUVWSrc2) )
		fSettings.fMaxUVWSrc = 2;

	/// Anisotropy stuff
	//if( devRec.GetMaxAnisotropicSamples() < fSettings.fMaxAnisotropicSamples )
	//	fSettings.fMaxAnisotropicSamples = devRec.GetMaxAnisotropicSamples();
	if( devRec.GetCap(hsG3DDeviceSelector::kCapsNoAniso) || (fSettings.fMaxAnisotropicSamples <= 1) )
		fSettings.fMaxAnisotropicSamples = 0;
}

//// Get/SetZBiasScale ////////////////////////////////////////////////////////
// If the board really doesn't support Z-biasing, we adjust the perspective matrix in IGetCameraToNDC
// The layer scale and translation are tailored to the current hardware.
hsScalar	plDXPipeline::GetZBiasScale() const
{
	return ( fTweaks.fPerspLayerScale / fTweaks.fDefaultPerspLayerScale ) - 1.0f;
}

void	plDXPipeline::SetZBiasScale( hsScalar scale )
{
	scale += 1.0f;
	fTweaks.fPerspLayerScale = fTweaks.fDefaultPerspLayerScale * scale;
	fTweaks.fPerspLayerTrans = kPerspLayerTrans * scale;
}

// Create all our video memory consuming D3D objects.
hsBool plDXPipeline::ICreateDynDeviceObjects()
{
	// Front/Back/Depth buffers
	if( ICreateNormalSurfaces() )
		return true;

	// RenderTarget pools are shared for our shadow generation algorithm.
	// Different sizes for different resolutions.
	IMakeRenderTargetPools();

	// Create device-specific stuff
	fDebugTextMgr = TRACKED_NEW plDebugTextManager();
	if( fDebugTextMgr == nil )
		return true;

	// Vertex buffers, index buffers, textures, etc.
	LoadResources();

	return false;
}
//// ICreateDeviceObjects /////////////////////////////////////////////////////
//	Create all of our steady state D3D objects. More D3D objects will be created
// and destroyed as ages are loaded and unloaded, but these are the things that
// only go away when we lose the device.

hsBool	plDXPipeline::ICreateDeviceObjects()
{
	// The D3D device
	if( ICreateDevice(!fSettings.fFullscreen) )
		return true;

	// Most everything else D3D
	if( ICreateDynDeviceObjects() )
		return true;

	// PlateMgr is largely for debugging and performance stats,
	// but also gets used for some things like the cursor and
	// linking fade to/from black.
	fPlateMgr = TRACKED_NEW plDXPlateManager( this, fD3DDevice );
	if( fPlateMgr == nil || !fPlateMgr->IsValid() )
		return true;

	// We've got everything created now, initialize to a known state.
	IInitDeviceState();
	if( FAILED( fD3DDevice->Clear( 0, nil, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, fSettings.fClearColor, 1.0f, 0L ) ) )
		return true;

	// You may be wondering what this is. It's a workaround for a GeForce2 driver bug, where
	// clears to the Zbuffer (but not color) are getting partially ignored. Don't even ask.
	// So this is just to try and get the board used to the kind of foolishness it can expect
	// from here out.
	if( FAILED( fD3DDevice->Clear( 0, nil, D3DCLEAR_ZBUFFER, fSettings.fClearColor, 1.0f, 0L ) ) )
		return true;
	if( FAILED( fD3DDevice->Clear( 0, nil, D3DCLEAR_ZBUFFER, fSettings.fClearColor, 1.0f, 0L ) ) )
		return true;
	if( FAILED( fD3DDevice->Clear( 0, nil, D3DCLEAR_ZBUFFER, fSettings.fClearColor, 1.0f, 0L ) ) )
		return true;

	/// Log renderer
	fLogDrawer = TRACKED_NEW plStatusLogDrawer( this );
	plStatusLogMgr::GetInstance().SetDrawer( fLogDrawer );

	/// Ok, we're done now
#if MCN_BOUNDS_SPANS
	fBoundsSpans = TRACKED_NEW plDrawableSpans();
	hsgResMgr::ResMgr()->NewKey( "BoundsSpans", fBoundsSpans, plLocation::kGlobalFixedLoc );
	fBoundsSpans->SetNativeProperty( plDrawable::kPropVolatile, true );
	fBoundsMat = TRACKED_NEW hsGMaterial();
	hsgResMgr::ResMgr()->NewKey( "BoundsMaterial", fBoundsMat, plLocation::kGlobalFixedLoc );
	plLayer *lay = fBoundsMat->MakeBaseLayer();
	lay->SetMiscFlags( hsGMatState::kMiscWireFrame | hsGMatState::kMiscTwoSided );
	lay->SetShadeFlags( lay->GetShadeFlags() | hsGMatState::kShadeWhite );

	// Set up a ref to these. Since we don't have a key, we use the
	// generic RefObject() (and matching UnRefObject() when we're done).
	// If we had a key, we would use myKey->AddViaNotify(otherKey) and myKey->Release(otherKey).
	fBoundsMat->GetKey()->RefObject();
	fBoundsSpans->GetKey()->RefObject();
#endif

	return false;
}

//// ISetCurrentDriver ////////////////////////////////////////////////////////
// Copy over the driver info.
void	plDXPipeline::ISetCurrentDriver( D3DEnum_DriverInfo *driv )
{
	if( fCurrentDriver != nil )
		delete fCurrentDriver;

	fCurrentDriver = TRACKED_NEW D3DEnum_DriverInfo;

	fCurrentDriver->fGuid = driv->fGuid;
	hsStrncpy( fCurrentDriver->fStrDesc, driv->fStrDesc, 40 );
	hsStrncpy( fCurrentDriver->fStrName, driv->fStrName, 40 );

	fCurrentDriver->fDesktopMode = driv->fDesktopMode;
	fCurrentDriver->fAdapterInfo = driv->fAdapterInfo;

	fCurrentDriver->fCurrentMode = nil;
	fCurrentDriver->fCurrentDevice = nil;

	/// Go looking for an adapter to match this one
	UINT	iAdapter;
	for( fCurrentAdapter = 0, iAdapter = 0; iAdapter < fD3DObject->GetAdapterCount(); iAdapter++ )
	{
		D3DADAPTER_IDENTIFIER9		adapterInfo;
		fD3DObject->GetAdapterIdentifier( iAdapter, 0, &adapterInfo );

		if( adapterInfo.DeviceIdentifier == fCurrentDriver->fAdapterInfo.DeviceIdentifier )
		{
			fCurrentAdapter = iAdapter;
			break;
		}
	}
}

//// ISetCurrentDevice ////////////////////////////////////////////////////////
// Copy over the device info.
void	plDXPipeline::ISetCurrentDevice( D3DEnum_DeviceInfo *dev )
{
	if( fCurrentDevice != nil )
		delete fCurrentDevice;
	fCurrentDevice = TRACKED_NEW D3DEnum_DeviceInfo;

	hsStrncpy( fCurrentDevice->fStrName, dev->fStrName, 40 );

	fCurrentDevice->fDDCaps = dev->fDDCaps;
	fCurrentDevice->fDDType = dev->fDDType;
	fCurrentDevice->fIsHardware = dev->fIsHardware;
	fCurrentDevice->fCanWindow = dev->fCanWindow;
//	fCurrentDevice->fCanAntialias = dev->fCanAntialias;
	fCurrentDevice->fCompatibleWithDesktop = dev->fCompatibleWithDesktop;

	// copy over supported device modes
	D3DEnum_ModeInfo currMode;

	for(int i = 0; i < dev->fModes.Count(); i++)
	{
		// filter unusable modes
		if(dev->fModes[i].fDDmode.Width < MIN_WIDTH || dev->fModes[i].fDDmode.Height < MIN_HEIGHT)
			continue;

		currMode.fBitDepth = dev->fModes[i].fBitDepth;
		currMode.fCanRenderToCubic = dev->fModes[i].fCanRenderToCubic;
		currMode.fDDBehavior = dev->fModes[i].fDDBehavior;
		currMode.fDepthFormats = dev->fModes[i].fDepthFormats;
		currMode.fFSAATypes = dev->fModes[i].fFSAATypes;
		memcpy(&currMode.fDDmode, &dev->fModes[i].fDDmode, sizeof(D3DDISPLAYMODE));
		strcpy(currMode.fStrDesc, dev->fModes[i].fStrDesc);
		currMode.fWindowed = dev->fModes[i].fWindowed;

		fCurrentDevice->fModes.Push(currMode);
	}
}

//// ISetCurrentMode //////////////////////////////////////////////////////////
// Copy over the mode info.
void	plDXPipeline::ISetCurrentMode( D3DEnum_ModeInfo *mode )
{
	if( fCurrentMode != nil )
		delete fCurrentMode;
	fCurrentMode = TRACKED_NEW D3DEnum_ModeInfo;

	*fCurrentMode = *mode;
}

//// IFindCompressedFormats ///////////////////////////////////////////////////
//
//	New DX Way: Check to see if each format is valid.

hsBool	plDXPipeline::IFindCompressedFormats()
{
	D3DFORMAT	toCheckFor[] = {D3DFMT_DXT1,
								//D3DFMT_DXT2,
								//D3DFMT_DXT3,
								//D3DFMT_DXT4,
								D3DFMT_DXT5,
								D3DFMT_UNKNOWN };
	short		i = 0;


	for( i = 0; toCheckFor[ i ] != D3DFMT_UNKNOWN; i++ )
	{
		if( FAILED( fD3DObject->CheckDeviceFormat( fCurrentAdapter, fCurrentDevice->fDDType,
													fCurrentMode->fDDmode.Format,
													0, D3DRTYPE_TEXTURE, toCheckFor[ i ] ) ) )
			return false;
	}

	/// Got here, must have found them all
	return true;
}

//// IFindLuminanceFormats ////////////////////////////////////////////////////
//
//	New DX Way: Check to see if each format we want is valid

hsBool	plDXPipeline::IFindLuminanceFormats()
{
	D3DFORMAT	toCheckFor[] = { D3DFMT_L8, D3DFMT_A8L8, D3DFMT_UNKNOWN };
	short		i = 0;


	for( i = 0; toCheckFor[ i ] != D3DFMT_UNKNOWN; i++ )
	{
		if( FAILED( fD3DObject->CheckDeviceFormat( fCurrentAdapter, fCurrentDevice->fDDType,
													fCurrentMode->fDDmode.Format,
													0, D3DRTYPE_TEXTURE, toCheckFor[ i ] ) ) )
			return false;
	}

	/// Got here, must have found them all
	return true;
}

//// ITextureFormatAllowed ////////////////////////////////////////////////////
//
//	Returns true if the given format is supported on the current device and
//	mode, false if it isn't.

hsBool		plDXPipeline::ITextureFormatAllowed( D3DFORMAT format )
{
	if( FAILED( fD3DObject->CheckDeviceFormat( fCurrentAdapter, fCurrentDevice->fDDType,
												fCurrentMode->fDDmode.Format,
												0, D3DRTYPE_TEXTURE, format ) ) )
		return false;

	return true;
}

//// SetDebugFlag /////////////////////////////////////////////////////////////
// Debug flags should never be employed to do a game effect, although they can
// be useful for developing effects. Mostly they help in diagnosing problems
// in rendering or performance.
void		plDXPipeline::SetDebugFlag( UInt32 flag, hsBool on )
{
	fDebugFlags.SetBit(flag, on);

	if (flag == plPipeDbg::kFlagColorizeMipmaps)
	{
		// Force textures to reload
		plDXTextureRef		*ref = fTextureRefList;
		while( ref != nil )
		{
			ref->SetDirty( true );
			ref = ref->GetNext();
		}

		// Reset mipmap filtering state (usually is LINEAR, but we set it to POINT for coloring)
		int i;
		for( i = 0; i < 8; i++ )
			fD3DDevice->SetSamplerState( i, D3DSAMP_MIPFILTER, on ? D3DTEXF_POINT : D3DTEXF_LINEAR );
	}

	if (flag == plPipeDbg::kFlagNoAnisotropy)
	{
		ISetAnisotropy(!on);
	}
}

hsBool plDXPipeline::IsDebugFlagSet( UInt32 flag ) const
{
	return fDebugFlags.IsBitSet(flag);
}

///////////////////////////////////////////////////////////////////////////////
//// Device Creation //////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

//// ICreateMaster ////////////////////////////////////////////////////////////
//	Creates the master Direct3D objects. I guess just in case you want
//	multiple Direct3D devices.... :~

hsBool plDXPipeline::ICreateMaster()
{
	hsAssert( !fD3DObject, "ICreateMaster() should only be called for Master Direct3DDevice" );

	/// The new DirectX Way: Create a Direct3D object, out of which everything else springs
	if( hsGDDrawDllLoad::GetD3DDll() == nil )
		return ICreateFail( "Cannot load Direct3D driver!" );

	Direct3DCreateProc		procPtr;
	procPtr = (Direct3DCreateProc)GetProcAddress( hsGDDrawDllLoad::GetD3DDll(), "Direct3DCreate9" );
	if( procPtr == nil )
		return ICreateFail( "Cannot load D3D Create Proc!" );

	// Create a D3D object to use
	fD3DObject = procPtr( D3D_SDK_VERSION );

	if( fD3DObject == nil )
		return ICreateFail( "Cannot create Direct3D object" );

	return false;
}

//// ICreateDevice ////////////////////////////////////////////////////
//
//	Creates the device. Surfaces, buffers, etc. created separately (in case of lost device).
// See ICreateDeviceObjects.

hsBool plDXPipeline::ICreateDevice(hsBool windowed)
{
	/// First, create the D3D Device object
	D3DPRESENT_PARAMETERS		params;
	D3DDISPLAYMODE				dispMode;
	int							i;
#ifdef DBG_WRITE_FORMATS
	char						msg[ 256 ];
#endif // DBG_WRITE_FORMATS

	INIT_ERROR_CHECK( fD3DObject->GetAdapterDisplayMode( fCurrentAdapter, &dispMode ),
		"Cannot get desktop display mode" );

	// save desktop properties
	fDesktopParams.Width = dispMode.Width;
	fDesktopParams.Height = dispMode.Height;
	fDesktopParams.ColorDepth = GetDXBitDepth( dispMode.Format );


	if( windowed )
	{
		// Reset fColor, since we're getting the desktop bitdepth
		fSettings.fColorDepth = GetDXBitDepth( dispMode.Format );
		if(fSettings.fOrigWidth > fDesktopParams.Width || fSettings.fOrigHeight > fDesktopParams.Height)
		{
			fSettings.fOrigWidth = fDesktopParams.Width;
			fSettings.fOrigHeight = fDesktopParams.Height;
			IGetViewTransform().SetScreenSize(fDesktopParams.Width, fDesktopParams.Height);
		}
	}

	memset( &params, 0, sizeof( params ) );
	params.Windowed = ( windowed ? TRUE : FALSE );
	params.Flags = 0;//D3DPRESENTFLAG_LOCKABLE_BACKBUFFER;
	params.BackBufferCount = 1;
	params.BackBufferWidth = GetViewTransform().GetScreenWidth();
	params.BackBufferHeight = GetViewTransform().GetScreenHeight();
	params.EnableAutoDepthStencil = TRUE;

	// NOTE: This was changed 5.29.2001 mcn to avoid the nasty flashing bug on nVidia's 12.60 beta drivers
// SWAPEFFECT must be _DISCARD when using antialiasing, so we'll just go with _DISCARD for the time being. mf
	params.SwapEffect = D3DSWAPEFFECT_DISCARD;
	params.FullScreen_RefreshRateInHz = ( windowed ? 0 : D3DPRESENT_RATE_DEFAULT );
	if(windowed)
	{
		params.PresentationInterval = D3DPRESENT_INTERVAL_DEFAULT;
	}
	else
	{
		params.PresentationInterval = ( fVSync ? D3DPRESENT_INTERVAL_DEFAULT : D3DPRESENT_INTERVAL_IMMEDIATE );
	}

#ifdef DBG_WRITE_FORMATS
	for( i = 0; i < fCurrentMode->fDepthFormats.GetCount(); i++ )
	{
		sprintf( msg, "-- Valid depth buffer format: %s", IGetDXFormatName( fCurrentMode->fDepthFormats[ i ] ) );
		hsDebugMessage( msg, 0 );
	}
#endif

	// Attempt to find the closest AA setting we can
	params.MultiSampleType = D3DMULTISAMPLE_NONE;
	for( i = fSettings.fNumAASamples; i >= 2; i-- )
	{
		if( fCurrentMode->fFSAATypes.Find( (D3DMULTISAMPLE_TYPE)i ) != fCurrentMode->fFSAATypes.kMissingIndex )
		{
			params.MultiSampleType = (D3DMULTISAMPLE_TYPE)i;
			break;
		}
	}

	if( !IFindDepthFormat(params) )
	{
		// If we haven't found a depth format, turn off multisampling and try it again.
		params.MultiSampleType = D3DMULTISAMPLE_NONE;
		if( !IFindDepthFormat(params) )
			// Okay, we're screwed here, we might as well bail.
			return ICreateFail( "Can't find a Depth Buffer format" );
	}

	/// TEMP HACK--if we're running 16-bit z-buffer or below, use our z-bias (go figure, it works better
	/// in 16-bit, worse in 24 and 32)
	if( params.AutoDepthStencilFormat == D3DFMT_D15S1 ||
		params.AutoDepthStencilFormat == D3DFMT_D16 ||
		params.AutoDepthStencilFormat == D3DFMT_D16_LOCKABLE )
		fSettings.fD3DCaps &= ~kCapsZBias;

#ifdef DBG_WRITE_FORMATS
	sprintf( msg, "-- Requesting depth buffer format: %s", IGetDXFormatName( params.AutoDepthStencilFormat ) );
	hsDebugMessage( msg, 0 );
#endif


	params.BackBufferFormat = ( windowed ? dispMode.Format : fCurrentMode->fDDmode.Format );
#ifdef DBG_WRITE_FORMATS
	sprintf( msg, "-- Requesting back buffer format: %s", IGetDXFormatName( params.BackBufferFormat ) );
	hsDebugMessage( msg, 0 );
#endif

	params.hDeviceWindow = fSettings.fHWnd;

	// Enable this to switch to a pure device.
//	fCurrentMode->fDDBehavior |= D3DCREATE_PUREDEVICE;
//	fCurrentMode->fDDBehavior |= D3DCREATE_DISABLE_DRIVER_MANAGEMENT;

#ifndef PLASMA_EXTERNAL_RELEASE
	UINT adapter;
	for (adapter = 0; adapter < fD3DObject->GetAdapterCount(); adapter++)
	{
		D3DADAPTER_IDENTIFIER9 id;
		fD3DObject->GetAdapterIdentifier(adapter, 0, &id);

		// We should be matching against "NVIDIA NVPerfHUD", but the space
		// in the description seems to be bogus. This seems to be a fair
		// alternative
		if (strstr(id.Description, "NVPerfHUD"))
		{
			// This won't actually use the REF device, but we ask for
			// it as part of the handshake to let NVPerfHUD know we give
			// it permission to analyze us.
			fCurrentAdapter = adapter;
			fCurrentDevice->fDDType= D3DDEVTYPE_REF;
			SetDebugFlag(plPipeDbg::kFlagNVPerfHUD, true);
			break;
		}
	}
#endif // PLASMA_EXTERNAL_RELEASE

	INIT_ERROR_CHECK( fD3DObject->CreateDevice( fCurrentAdapter, fCurrentDevice->fDDType,
											  fSettings.fHWnd, fCurrentMode->fDDBehavior,
											  &params, &fD3DDevice ),
						"Cannot create primary display surface via CreateDevice()" );

	fSettings.fPresentParams = params;

	// This bit matches up with the fManagedCutoff workaround for a problem
	// with the NVidia drivers on win2k. Search for "GetVersionEx" in IRestrictCaps
	// for more info.
	UInt32 mem = fD3DDevice->GetAvailableTextureMem();
	plProfile_IncCount(TexTot, mem);

	const UInt32 kSingleFlush(40000000);
	const UInt32 kDoubleFlush(24000000);
	if( fManagedCutoff )
	{
		if( mem < 64000000 )
			fManagedCutoff = kDoubleFlush;
		else
			fManagedCutoff = kSingleFlush;
	}

	return false;
}

// IFindDepthFormat //////////////////////////////////////////////////////////////
// Look through available depth formats for the closest to what we want that
// will work.
hsBool plDXPipeline::IFindDepthFormat(D3DPRESENT_PARAMETERS& params)
{
	// Okay, we're not using the stencil buffer right now, and it's bringing out
	// some painful driver bugs on the GeForce2. So rather than go out of our way
	// looking for trouble, we're going to look for a depth buffer with NO STENCIL.
	int i;
	for( i = fCurrentMode->fDepthFormats.GetCount() - 1; i >= 0; i-- )
	{
		D3DFORMAT fmt = fCurrentMode->fDepthFormats[ i ];
		if( (fmt == D3DFMT_D32)
			||(fmt == D3DFMT_D24X8)
			||(fmt == D3DFMT_D16) )
		{
			HRESULT hr = fD3DObject->CheckDeviceMultiSampleType(fCurrentAdapter,
																fCurrentDevice->fDDType,
																fmt,
																fCurrentMode->fWindowed ? TRUE : FALSE,
																params.MultiSampleType, NULL);
			if( !FAILED(hr) )
			{
				params.AutoDepthStencilFormat = fmt;
				fStencil.fDepth = 0;
				break;
			}
		}
	}
	if( i < 0 )
	{
		for( i = fCurrentMode->fDepthFormats.GetCount() - 1; i >= 0; i-- )
		{
			D3DFORMAT fmt = fCurrentMode->fDepthFormats[ i ];
			if( fmt == D3DFMT_D15S1 || fmt == D3DFMT_D24X4S4 || fmt == D3DFMT_D24S8 )
			{
				HRESULT hr = fD3DObject->CheckDeviceMultiSampleType(fCurrentAdapter,
																	fCurrentDevice->fDDType,
																	fmt,
																	fCurrentMode->fWindowed ? TRUE : FALSE,
																	params.MultiSampleType, NULL);
				if( !FAILED(hr) )
				{
					params.AutoDepthStencilFormat = fmt;
					if( fmt == D3DFMT_D15S1 )
						fStencil.fDepth = 1;
					else if( fmt == D3DFMT_D24X4S4 )
						fStencil.fDepth = 4;
					else
						fStencil.fDepth = 8;
					break;
				}
			}
		}
	}
	return i >= 0;
}

// ICreateNormalSurfaces //////////////////////////////////////////////////////
// Create the primary color and depth buffers.
//
hsBool plDXPipeline::ICreateNormalSurfaces()
{
	/// Now get the backbuffer surface pointer
	INIT_ERROR_CHECK( fD3DDevice->GetBackBuffer( 0, 0, D3DBACKBUFFER_TYPE_MONO, &fD3DBackBuff ),
						"Cannot get primary surface's back buffer" );

	/// And finally, get the main D3D surfaces (for restoring after rendertargets )
	INIT_ERROR_CHECK( fD3DDevice->GetRenderTarget( 0, &fD3DMainSurface ), "Cannot capture primary surface" );
	INIT_ERROR_CHECK( fD3DDevice->GetDepthStencilSurface( &fD3DDepthSurface ), "Cannot capture primary depth surface" );

	fSettings.fCurrD3DMainSurface = fD3DMainSurface;
	fSettings.fCurrD3DDepthSurface = fD3DDepthSurface;

	D3DSURF_MEMNEW( fD3DMainSurface );
	D3DSURF_MEMNEW( fD3DDepthSurface );
	D3DSURF_MEMNEW( fD3DBackBuff );

	D3DSURFACE_DESC	info;
	fD3DMainSurface->GetDesc( &info );
	fD3DDepthSurface->GetDesc( &info );
	fD3DBackBuff->GetDesc( &info );

	return false;
}

// IReleaseRenderTargetPools //////////////////////////////////////////////////
// Free up all resources assosiated with our pools of rendertargets of varying
// sizes. Primary user of these pools is the shadow generation.
void plDXPipeline::IReleaseRenderTargetPools()
{
	int i;

	for( i = 0; i < fRenderTargetPool512.GetCount(); i++ )
	{
		delete fRenderTargetPool512[i];
		fRenderTargetPool512[i] = nil;
	}
	fRenderTargetPool512.SetCount(0);

	for( i = 0; i < fRenderTargetPool256.GetCount(); i++ )
	{
		delete fRenderTargetPool256[i];
		fRenderTargetPool256[i] = nil;
	}
	fRenderTargetPool256.SetCount(0);

	for( i = 0; i < fRenderTargetPool128.GetCount(); i++ )
	{
		delete fRenderTargetPool128[i];
		fRenderTargetPool128[i] = nil;
	}
	fRenderTargetPool128.SetCount(0);

	for( i = 0; i < fRenderTargetPool64.GetCount(); i++ )
	{
		delete fRenderTargetPool64[i];
		fRenderTargetPool64[i] = nil;
	}
	fRenderTargetPool64.SetCount(0);

	for( i = 0; i < fRenderTargetPool32.GetCount(); i++ )
	{
		delete fRenderTargetPool32[i];
		fRenderTargetPool32[i] = nil;
	}
	fRenderTargetPool32.SetCount(0);

	for( i = 0; i < kMaxRenderTargetNext; i++ )
	{
		fRenderTargetNext[i] = 0;
		fBlurScratchRTs[i] = nil;
		fBlurDestRTs[i] = nil;
	}

#ifdef MF_ENABLE_HACKOFF
	hackOffscreens.Reset();
#endif // MF_ENABLE_HACKOFF
}

// IReleaseDynDeviceObjects //////////////////////////////////////////////
// Make sure we aren't holding on to anything, and release all of
// the D3D resources that we normally hang on to forever. Meaning things
// that persist through unloading one age and loading the next.
void plDXPipeline::IReleaseDynDeviceObjects()
{
	// We should do this earlier, but the textFont objects don't remove
	// themselves from their parent objects yet
	delete fDebugTextMgr;
	fDebugTextMgr = nil;

	if( fD3DDevice )
	{
		fD3DDevice->SetStreamSource(0, nil, 0, 0);
		fD3DDevice->SetIndices(nil);
	}

	/// Delete actual d3d objects
	hsRefCnt_SafeUnRef( fSettings.fCurrVertexBuffRef );
	fSettings.fCurrVertexBuffRef = nil;
	hsRefCnt_SafeUnRef( fSettings.fCurrIndexBuffRef );
	fSettings.fCurrIndexBuffRef = nil;

	while( fTextFontRefList )
		delete fTextFontRefList;

	while( fRenderTargetRefList )
	{
		plDXRenderTargetRef* rtRef = fRenderTargetRefList;
		rtRef->Release();
		rtRef->Unlink();
	}

	// The shared dynamic vertex buffers used by things like objects skinned on CPU, or
	// particle systems.
	IReleaseDynamicBuffers();
	IReleaseAvRTPool();
	IReleaseRenderTargetPools();

	if( fSharedDepthSurface[0] )
	{
		D3DSURF_MEMDEL(fSharedDepthSurface[0]);
		ReleaseObject(fSharedDepthSurface[0]);
		fSharedDepthFormat[0] = D3DFMT_UNKNOWN;
	}
	if( fSharedDepthSurface[1] )
	{
		D3DSURF_MEMDEL(fSharedDepthSurface[1]);
		ReleaseObject(fSharedDepthSurface[1]);
		fSharedDepthFormat[1] = D3DFMT_UNKNOWN;
	}

	D3DSURF_MEMDEL( fD3DMainSurface );
	D3DSURF_MEMDEL( fD3DDepthSurface );
	D3DSURF_MEMDEL( fD3DBackBuff );

	ReleaseObject( fD3DBackBuff );
	ReleaseObject( fD3DDepthSurface );
	ReleaseObject( fD3DMainSurface );

}

// IReleaseShaders ///////////////////////////////////////////////////////////////
// Delete our vertex and pixel shaders. Releasing the plasma ref will release the
// D3D handle.
void plDXPipeline::IReleaseShaders()
{
	while( fVShaderRefList )
	{
		plDXVertexShader* ref = fVShaderRefList;
		ref->Release();
		ref->Unlink();
	}

	while( fPShaderRefList )
	{
		plDXPixelShader* ref = fPShaderRefList;
		ref->Release();
		ref->Unlink();
	}
}

//// IReleaseDeviceObjects ///////////////////////////////////////////////////////
// Release everything we've created. This is the main cleanup function.
void	plDXPipeline::IReleaseDeviceObjects()
{
	plDXDeviceRef	*ref;

	/// Delete d3d-dependent objects
#if MCN_BOUNDS_SPANS
	if( fBoundsSpans )
		fBoundsSpans->GetKey()->UnRefObject();
	fBoundsSpans = nil;
	if( fBoundsMat )
		fBoundsMat->GetKey()->UnRefObject();
	fBoundsMat = nil;
#endif

	plStatusLogMgr::GetInstance().SetDrawer( nil );
	delete fLogDrawer;
	fLogDrawer = nil;

	IGetPixelScratch( 0 );

	int i;
	for( i = 0; i < 8; i++ )
	{
		if( fLayerRef[i] )
		{
			hsRefCnt_SafeUnRef(fLayerRef[i]);
			fLayerRef[i] = nil;
		}
	}

#ifdef MF_ENABLE_HACKOFF
	//WHITE
	hackOffscreens.SetCount(0);
#endif // MF_ENABLE_HACKOFF

	if( fULutTextureRef )
		delete [] fULutTextureRef->fData;
	hsRefCnt_SafeUnRef(fULutTextureRef);
	fULutTextureRef = nil;

	while( fVtxBuffRefList )
	{
		ref = fVtxBuffRefList;
		ref->Release();
		ref->Unlink();
	}
	while( fIdxBuffRefList )
	{
		ref = fIdxBuffRefList;
		ref->Release();
		ref->Unlink();
	}
	while( fTextureRefList )
	{
		ref = fTextureRefList;
		ref->Release();
		ref->Unlink();
	}

	IReleaseShaders();

	fLights.Release();

	IReleaseDynDeviceObjects();

	delete fPlateMgr;
	fPlateMgr = nil;

	if( fD3DDevice != nil )
	{
		LONG ret;
		while( ret = fD3DDevice->Release() )
		{
			hsStatusMessageF("%d - Error releasing device", ret);
		}
		fD3DDevice = nil;
	}

	if( fD3DObject != nil )
	{
		LONG ret;
		while( ret = fD3DObject->Release() )
		{
			hsStatusMessageF("%d - Error releasing Direct3D Object", ret);
		}
		fD3DObject = nil;
	}

	fManagedAlloced = false;
	fAllocUnManaged = false;
}

// IReleaseDynamicBuffers /////////////////////////////////////////////////
// Release everything we've created in POOL_DEFAULT.
// This is called on shutdown or when we lose the device. Search for D3DERR_DEVICELOST.
void plDXPipeline::IReleaseDynamicBuffers()
{
	// Workaround for ATI driver bug.
	if( fSettings.fBadManaged )
	{
		plDXTextureRef* tRef = fTextureRefList;
		while( tRef )
		{
			tRef->Release();
			tRef = tRef->GetNext();
		}
	}
	plDXVertexBufferRef* vbRef = fVtxBuffRefList;
	while( vbRef )
	{
		if( vbRef->Volatile() && vbRef->fD3DBuffer )
		{
			vbRef->fD3DBuffer->Release();
			vbRef->fD3DBuffer = nil;

			// Actually, if it's volatile, it's sharing the global dynamic vertex buff, so we're already
			// accounting for the memory when we clear the global buffer.
			//PROFILE_POOL_MEM(D3DPOOL_DEFAULT, vbRef->fCount * vbRef->fVertexSize, false, "VtxBuff");
		}
		// 9600 THRASH
		else if( fSettings.fBadManaged )
		{
			vbRef->Release();
		}
		vbRef = vbRef->GetNext();
	}
	plDXIndexBufferRef* iRef = fIdxBuffRefList;
	while( iRef )
	{
		// If it's volatile, we have to release it.
		// If it's not, we want to release it so
		// we can make it volatile (D3DPOOL_DEFAULT)
		if (iRef->fD3DBuffer)
		{
			iRef->fD3DBuffer->Release();
			iRef->fD3DBuffer = nil;
			PROFILE_POOL_MEM(iRef->fPoolType, iRef->fCount * sizeof(UInt16), false, "IndexBuff");
		}
		iRef = iRef->GetNext();
	}
	if (fDynVtxBuff)
	{
		ReleaseObject(fDynVtxBuff);
		PROFILE_POOL_MEM(D3DPOOL_DEFAULT, fDynVtxSize, false, "DynVtxBuff");
		fDynVtxBuff = nil;
	}

	fNextDynVtx = 0;

	fVtxRefTime++;

	// PlateMgr has a POOL_DEFAULT vertex buffer for drawing quads.
	if( fPlateMgr )
		fPlateMgr->IReleaseGeometry();

	// Also has POOL_DEFAULT vertex buffer.
	plDXTextFont::ReleaseShared(fD3DDevice);

	IReleaseBlurVBuffers();
}

// ICreateDynamicBuffers /////////////////////////////////////////////////////
// Create the things we need in POOL_DEFAULT. We clump them into this function,
// because they must be created before anything in POOL_MANAGED.
// So we create these global POOL_DEFAULT objects here, then send out a message
// to the objects in the scene to create anything they need in POOL_DEFAULT,
// then go on to create things on POOL_MANAGED.
// Set LoadResources().
void plDXPipeline::ICreateDynamicBuffers()
{
	ICreateBlurVBuffers();

	plDXTextFont::CreateShared(fD3DDevice);

	if( fPlateMgr )
		fPlateMgr->ICreateGeometry(this);

	fNextDynVtx = 0;

	fVtxRefTime++;

	DWORD usage = D3DUSAGE_WRITEONLY | D3DUSAGE_DYNAMIC;
	hsAssert(!fManagedAlloced, "Alloc default with managed alloc'd");
	D3DPOOL poolType = D3DPOOL_DEFAULT;
	if( fDynVtxSize )
	{
		PROFILE_POOL_MEM(poolType, fDynVtxSize, true, "DynVtxBuff");
		if( FAILED( fD3DDevice->CreateVertexBuffer( fDynVtxSize,
													usage,
													0,
													poolType,
													&fDynVtxBuff, NULL) ) )
		{
			hsAssert(false, "Don't know what to do here.");
		}
	}
}


void plDXPipeline::IPrintDeviceInitError()
{
	char str[256];
	char err[16];
	switch(plLocalization::GetLanguage())
	{
		case plLocalization::kFrench:	strcpy(err, "Erreur"); strcpy(str, "Erreur d'initialisation de votre carte graphique. Les valeurs par d<EFBFBD>faut de ses param<EFBFBD>tres ont <EFBFBD>t<EFBFBD> r<EFBFBD>tablis. ");	break;
		case plLocalization::kGerman:	strcpy(err, "Fehler");	strcpy(str, "Bei der Initialisierung Ihrer Grafikkarte ist ein Fehler aufgetreten. Standardeinstellungen werden wiederhergestellt."); break;
		case plLocalization::kSpanish:	strcpy(err, "Error"); strcpy(str, "Ocurri<EFBFBD> un error al inicializar tu tarjeta de v<EFBFBD>deo. Hemos restaurado los ajustes por defecto. "); break;
		case plLocalization::kItalian:	strcpy(err, "Errore");	strcpy(str, "Errore di inizializzazione della scheda video. Sono state ripristinate le impostazioni predefinite."); break;
		default:						strcpy(err, "Error"); strcpy(str, "There was an error initializing your video card. We have reset it to its Default settings."); break;
	}
	hsMessageBox(str, err, hsMessageBoxNormal, hsMessageBoxIconError);
}

// Reset device creation parameters to default and write to ini file
void plDXPipeline::IResetToDefaults(D3DPRESENT_PARAMETERS *params)
{
	// this will reset device parameters to default and make sure all other necessary parameters are updated
	params->BackBufferWidth = fDefaultPipeParams.Width;
	params->BackBufferHeight = fDefaultPipeParams.Height;
	fSettings.fOrigWidth = fDefaultPipeParams.Width;
	fSettings.fOrigHeight = fDefaultPipeParams.Height;
	IGetViewTransform().SetScreenSize(fDefaultPipeParams.Width, fDefaultPipeParams.Height);
	params->BackBufferFormat = D3DFMT_X8R8G8B8;
	fSettings.fColorDepth = fDefaultPipeParams.ColorDepth;

	int i;
	hsTArray<D3DEnum_ModeInfo> *modes = &fCurrentDevice->fModes;
	for( i = 0; i < modes->Count(); i++ )
	{
		D3DEnum_ModeInfo *mode = &(*modes)[i];
		if(mode->fDDmode.Width == params->BackBufferWidth &&
			mode->fDDmode.Height == params->BackBufferHeight &&
			mode->fBitDepth == 32 )
		{
			ISetCurrentMode(&(*modes)[i]);
			break;
		}
	}
	params->Windowed = fDefaultPipeParams.Windowed;
	fSettings.fFullscreen = !fDefaultPipeParams.Windowed;
	fCurrentMode->fWindowed = fDefaultPipeParams.Windowed;

	 // Attempt to find the closest AA setting we can
	params->MultiSampleType = D3DMULTISAMPLE_NONE;
	fSettings.fNumAASamples = 0;
	for( int i = fDefaultPipeParams.AntiAliasingAmount; i >= 2; i-- )
	{
		if( fCurrentMode->fFSAATypes.Find( (D3DMULTISAMPLE_TYPE)i ) != fCurrentMode->fFSAATypes.kMissingIndex )
		{
			fSettings.fNumAASamples = i;
			params->MultiSampleType = (D3DMULTISAMPLE_TYPE)i;
			break;
		}
	}
	fSettings.fMaxAnisotropicSamples = fDefaultPipeParams.AnisotropicLevel;

	fVSync = false;

	params->PresentationInterval = D3DPRESENT_INTERVAL_IMMEDIATE;

	plShadowCaster::EnableShadowCast(fDefaultPipeParams.Shadows ? true : false);
	plQuality::SetQuality(fDefaultPipeParams.VideoQuality);
	plQuality::SetCapability(fDefaultPipeParams.VideoQuality);
	plDynamicCamMap::SetEnabled(fDefaultPipeParams.PlanarReflections ? true : false);
	plBitmap::SetGlobalLevelChopCount(2 - fDefaultPipeParams.TextureQuality);

	// adjust camera properties
	plVirtualCam1::SetAspectRatio((float)fSettings.fOrigWidth / (float)fSettings.fOrigHeight);
	plVirtualCam1::SetFOV(plVirtualCam1::GetFOVw(), plVirtualCam1::GetFOVh());

	// fire off a message to the client so we can write defaults to the ini file, and adjust the window size
	plKey clientKey = hsgResMgr::ResMgr()->FindKey( kClient_KEY );
	plClientMsg* clientMsg = TRACKED_NEW plClientMsg(plClientMsg::kSetGraphicsDefaults);
	clientMsg->Send(clientKey);

}

// IResetDevice
// reset the device to its operational state.
// returns true if not ready yet, false if the reset was successful.
// All this is generally in response to a fullscreen alt-tab.
hsBool plDXPipeline::IResetDevice()
{
	hsBool fakeDevLost(false);
	if( fakeDevLost )
		fDeviceLost = true;

	if( fDeviceLost )
	{
		IClearShadowSlaves();

		Sleep(100);
		HRESULT coopLev = fD3DDevice->TestCooperativeLevel();
		if( coopLev == D3DERR_DEVICELOST )
		{
			// Nothing to do yet.
			return true;
		}
		if( fakeDevLost )
			coopLev = D3DERR_DEVICENOTRESET;
		if( coopLev == D3DERR_DEVICENOTRESET || fForceDeviceReset)
		{
			plStatusLog::AddLineS("pipeline.log", 0xffff0000, "Resetting Device");
			IReleaseDynDeviceObjects();
			if( !IFindDepthFormat(fSettings.fPresentParams) )
			{
				// If we haven't found a depth format, turn off multisampling and try it again.
				fSettings.fPresentParams.MultiSampleType = D3DMULTISAMPLE_NONE;
				IFindDepthFormat(fSettings.fPresentParams);
			}
			HRESULT hr = fD3DDevice->Reset(&fSettings.fPresentParams);
			int count = 0;
			while( FAILED(hr) )
			{
				if(count++ == 25)
				{
					IPrintDeviceInitError();
					IResetToDefaults(&fSettings.fPresentParams);
				}
				// Still not ready? This is bad.
				// Until we called Reset(), we could make any D3D call we wanted,
				// and it would turn into a no-op. But once we call Reset(), until
				// the device really is reset, anything but TestCoop/Reset/Release
				// has just become illegal. We've already released everything, Reset
				// just failed, not much to do but wait and try again.
				::Sleep(250);
				hr = fD3DDevice->Reset(&fSettings.fPresentParams);
			}
			fSettings.fCurrFVFFormat = 0;
			fSettings.fCurrVertexShader = NULL;
			fManagedAlloced = false;
			ICreateDynDeviceObjects();
			IInitDeviceState();

			/// Broadcast a message letting everyone know that we were recreated and that
			/// all device-specific stuff needs to be recreated
			plDeviceRecreateMsg* clean = TRACKED_NEW plDeviceRecreateMsg(this);
			plgDispatch::MsgSend(clean);
		}
		fDevWasLost = true;
		fDeviceLost = false;

		// We return true here, even though we've successfully recreated, to take
		// another spin around the update loop and give everyone a chance to
		// get back in sync.
		return true;
	}
	return false;
}

void plDXPipeline::ResetDisplayDevice(int Width, int Height, int ColorDepth, hsBool Windowed, int NumAASamples, int MaxAnisotropicSamples, hsBool VSync /* = false */)
{
	if( fSettings.fPresentParams.BackBufferWidth == Width &&
		fSettings.fPresentParams.BackBufferHeight == Height &&
		(fSettings.fPresentParams.Windowed ? 1 : fSettings.fColorDepth == ColorDepth) && // if we're windowed dont check color depth we just use the desktop colordepth
		((fSettings.fPresentParams.Windowed && Windowed)  || (!fSettings.fPresentParams.Windowed && !Windowed)) &&
		fSettings.fNumAASamples == NumAASamples &&
		fSettings.fMaxAnisotropicSamples == MaxAnisotropicSamples &&
		fVSync == VSync
	)
	{
		return;		// nothing has changed
	}

	fVSync = VSync;
	int i = 0;
	hsTArray<D3DEnum_ModeInfo> *modes = &fCurrentDevice->fModes;
	// check for supported resolution if we're not going to windowed mode
	if(!Windowed)
	{
		for( i = 0; i < modes->Count(); i++ )
		{
			D3DEnum_ModeInfo *mode = &(*modes)[i];
			if(mode->fDDmode.Width == Width &&
				mode->fDDmode.Height == Height &&
				mode->fBitDepth == ColorDepth )
			{
				ISetCurrentMode(&(*modes)[i]);
				break;
			}
		}
	}
	if(i != modes->Count())
	{
		// Set Resolution
		fSettings.fOrigWidth = Width;
		fSettings.fOrigHeight = Height;
		IGetViewTransform().SetScreenSize(Width, Height);
		fSettings.fPresentParams.BackBufferWidth = Width;
		fSettings.fPresentParams.BackBufferHeight = Height;
		fSettings.fColorDepth = ColorDepth;
		fSettings.fPresentParams.BackBufferFormat = D3DFMT_X8R8G8B8;
	}

	// set windowed/fullscreen mode
	fCurrentMode->fWindowed = Windowed;
	fSettings.fPresentParams.Windowed = Windowed;
	fSettings.fFullscreen = !Windowed;

	// set Antialiasing
	fSettings.fNumAASamples = 0;
	// Attempt to find the closest AA setting we can
	fSettings.fPresentParams.MultiSampleType = D3DMULTISAMPLE_NONE;
	for( i = NumAASamples; i >= 2; i-- )
	{
		if( fCurrentMode->fFSAATypes.Find( (D3DMULTISAMPLE_TYPE)i ) != fCurrentMode->fFSAATypes.kMissingIndex )
		{
			fSettings.fNumAASamples = i;
			fSettings.fPresentParams.MultiSampleType = (D3DMULTISAMPLE_TYPE)i;
			break;
		}
	}
	if( fSettings.fNumAASamples > 0 )
		fSettings.fD3DCaps |= kCapsFSAntiAlias;
	else
		fSettings.fD3DCaps &= ~kCapsFSAntiAlias;

	// Set Anisotropic filtering
	fSettings.fMaxAnisotropicSamples = MaxAnisotropicSamples;
	ISetAnisotropy(MaxAnisotropicSamples > 0);
	if(Windowed)
	{
		fSettings.fPresentParams.PresentationInterval = D3DPRESENT_INTERVAL_DEFAULT;
	}
	else
	{
		fSettings.fPresentParams.PresentationInterval = ( fVSync ? D3DPRESENT_INTERVAL_DEFAULT : D3DPRESENT_INTERVAL_IMMEDIATE );
	}

	// Force a device reset
	fDeviceLost = true;
	fForceDeviceReset = true;

	plVirtualCam1::SetAspectRatio((float)Width / (float)Height);
	plVirtualCam1::SetFOV(plVirtualCam1::GetFOVw(), plVirtualCam1::GetFOVh());

	IResetDevice();


	return;
}

void plDXPipeline::GetSupportedColorDepths(hsTArray<int> &ColorDepths)
{
	int i, j;
	// iterate through display modes
	for( i = 0; i < fCurrentDevice->fModes.Count(); i++ )
	{
		// Check to see if color depth has been added already
		for( j = 0; j < ColorDepths.Count(); j++ )
		{
			if( fCurrentDevice->fModes[i].fBitDepth == ColorDepths[i] )
				break;
		}
		if(j == ColorDepths.Count())
		{
			//add it
			ColorDepths.Push( fCurrentDevice->fModes[i].fBitDepth );
		}
	}
}

void plDXPipeline::GetSupportedDisplayModes(std::vector<plDisplayMode> *res, int ColorDepth  )
{
	int i, j;
	std::vector<plDisplayMode> supported;
	// loop through display modes
	for( i = 0; i < fCurrentDevice->fModes.Count(); i++ )
	{
		if( fCurrentDevice->fModes[i].fBitDepth == ColorDepth )
		{
			// check for duplicate mode
			for( j = 0; j < supported.size(); j++ )
			{
				if(supported[j].Width == fCurrentDevice->fModes[i].fDDmode.Width && supported[j].Height == fCurrentDevice->fModes[i].fDDmode.Height)
					break;
			}
			if(j == supported.size())
			{
				// new mode, add it
				plDisplayMode mode;
				mode.Width = fCurrentDevice->fModes[i].fDDmode.Width;
				mode.Height = fCurrentDevice->fModes[i].fDDmode.Height;
				mode.ColorDepth = ColorDepth;
				supported.push_back(mode);
			}
		}
	}

	*res = supported;
}

// Get max anitialias for the specified displaymode
int plDXPipeline::GetMaxAntiAlias(int Width, int Height, int ColorDepth)
{
	int max = 0;
	D3DEnum_ModeInfo *pCurrMode = nil;
	hsTArray<D3DEnum_ModeInfo> *modes = &fCurrentDevice->fModes;
	for(int i = 0; i < modes->Count(); i++ )
	{
		D3DEnum_ModeInfo *mode = &(*modes)[i];
		if( mode->fDDmode.Width == Width &&
			mode->fDDmode.Height == Height &&
			mode->fBitDepth == ColorDepth )
		{
			pCurrMode = mode;
		}
	}
	if(pCurrMode)
	{
		for(int i = 0; i < pCurrMode->fFSAATypes.Count(); i++)
		{
			if(pCurrMode->fFSAATypes[i] > max)
				max = pCurrMode->fFSAATypes[i];
		}
	}
	return max;
}

int plDXPipeline::GetMaxAnisotropicSamples()
{
	return fCurrentDevice ? fCurrentDevice->fDDCaps.MaxAnisotropy : 0;
}

//// Resize ///////////////////////////////////////////////////////////////////
// Resize is fairly obsolete, having been replaced by IResetDevice, which is
// automatically called if needed on BeginRender.
// This Resize function used to serve as both to Resize the primary buffers and
// to restore after losing the device (alt-tab). It didn't actually do either
// very well, so I'm not sure why I haven't deleted it.
void	plDXPipeline::Resize( UInt32 width, UInt32 height )
{
	hsMatrix44	w2c, c2w, proj;


	HRESULT coopLev = fD3DDevice->TestCooperativeLevel();
	if( coopLev == D3DERR_DEVICELOST )
	{
		/// Direct3D is reporting that we lost the device but are unable to reset
		/// it yet, so ignore.
		hsStatusMessage( "Received Resize() request at an invalid time. Ignoring...\n" );
		return;
	}
	if( !width && !height )
	{
		if( D3D_OK == coopLev )
			return;

		IReleaseDynDeviceObjects();
		HRESULT hr = fD3DDevice->Reset(&fSettings.fPresentParams);
		fManagedAlloced = false;
		if( !FAILED(hr) )
		{
			ICreateDynDeviceObjects();
			IInitDeviceState();
			return;
		}
	}

	// Store some states that we *want* to restore back...
	plViewTransform resetTransform = GetViewTransform();

	/// HACK: Don't recreate if we're windowed, bad things happen
	/// Comment out this if if you want to test the crashing thing in windowed alt-tabbing
#if 0
	if( ( width == 0 || height == 0 ) && !fSettings.fFullscreen )
		return;
#endif

	// Destroy old
	IReleaseDeviceObjects();

	// Reset width and height
	if( width != 0 && height != 0 )
	{
		// Width and height of zero mean just recreate
		fSettings.fOrigWidth = width;
		fSettings.fOrigHeight = height;
		IGetViewTransform().SetScreenSize((UInt16)(fSettings.fOrigWidth), (UInt16)(fSettings.fOrigHeight));
		resetTransform.SetScreenSize((UInt16)(fSettings.fOrigWidth), (UInt16)(fSettings.fOrigHeight));
	}
	else
	{
		// Just for debug
		hsStatusMessage( "Recreating the pipeline...\n" );
	}

	// Recreate
	if( !fD3DObject )
	{
		if( ICreateMaster() )
		{
			IShowErrorMessage( "Cannot create D3D master object" );
			return;
		}
	}

	// Go recreate surfaces and DX-dependent objects
	if( ICreateDeviceObjects() )
	{
		IShowErrorMessage( "Cannot create Direct3D device" );
		return;
	}

	// Restore states
	SetViewTransform(resetTransform);
	IProjectionMatrixToD3D();

	/// Broadcast a message letting everyone know that we were recreated and that
	/// all device-specific stuff needs to be recreated
	plDeviceRecreateMsg* clean = TRACKED_NEW plDeviceRecreateMsg(this);
	plgDispatch::MsgSend(clean);
}


///////////////////////////////////////////////////////////////////////////////
//// Debug Text ///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

//// MakeTextFont /////////////////////////////////////////////////////////////

plTextFont	*plDXPipeline::MakeTextFont( char *face, UInt16 size )
{
	plTextFont	*font;


	font = TRACKED_NEW plDXTextFont( this, fD3DDevice );
	if( font == nil )
		return nil;
	font->Create( face, size );
	font->Link( &fTextFontRefList );

	return font;
}


///////////////////////////////////////////////////////////////////////////////
//// Drawable Stuff ///////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

//// Draw /////////////////////////////////////////////////////////////////////

// PreRender //////////////////////////////////////////////////////////////////
// Most of this is debugging stuff, drawing the bounds, drawing the normals, etc.
// The functional part is in IGetVisibleSpans, which creates a list of the visible (non-culled)
// span indices within this drawable.
// This is called once per render, and generally well before rendering begins (as part of the
// cull phase).
hsBool	plDXPipeline::PreRender( plDrawable* drawable, hsTArray<Int16>& visList, plVisMgr* visMgr )
{
	plDrawableSpans	*ds = plDrawableSpans::ConvertNoRef(drawable);
	if( !ds )
		return false;
	if( ( ds->GetType() & fView.fDrawableTypeMask ) == 0 )
		return false;

	IGetVisibleSpans( ds, visList, visMgr );

#if MCN_BOUNDS_SPANS
	if( ( drawable != fBoundsSpans ) && IsDebugFlagSet(plPipeDbg::kFlagShowAllBounds) )
	{
		const hsTArray<plSpan *>	&spans = ds->GetSpanArray();
		int i;
		for( i = 0; i < visList.GetCount(); i++ )
		{
			/// Add a span to our boundsIce to show this
			IAddBoundsSpan( fBoundsSpans, &spans[ visList[i] ]->fWorldBounds );
		}
	}
	else if( ( drawable != fBoundsSpans ) && IsDebugFlagSet(plPipeDbg::kFlagShowNormals) )
	{
		const hsTArray<plSpan *>	&spans = ds->GetSpanArray();
		int i;
		for( i = 0; i < visList.GetCount(); i++ )
		{
			/// Add a span to our boundsIce to show this
			plIcicle	*span = (plIcicle *)spans[ visList[ i ] ];
			if( span->fTypeMask & plSpan::kIcicleSpan )
			{
				IAddNormalsSpan( fBoundsSpans, span, (plDXVertexBufferRef *)ds->GetVertexRef( span->fGroupIdx, span->fVBufferIdx ), 0xff0000ff );
			}
		}
	}
#endif
#if MF_BOUNDS_LEVEL_ICE
	if( (fSettings.fBoundsDrawLevel >= 0) && ( drawable != fBoundsSpans ) )
	{
		hsTArray<Int16> bndList;
		drawable->GetSpaceTree()->HarvestLevel(fSettings.fBoundsDrawLevel, bndList);
		int i;
		for( i = 0; i < bndList.GetCount(); i++ )
		{
			IAddBoundsSpan( fBoundsSpans, &hsBounds3Ext(drawable->GetSpaceTree()->GetNode(bndList[i]).GetWorldBounds()), 0xff000000 | (0xf << ((fSettings.fBoundsDrawLevel % 6) << 2)) );
		}
	}
#endif // MF_BOUNDS_LEVEL_ICE


	return visList.GetCount() > 0;
}

struct plSortFace
{
	UInt16		fIdx[3];
	hsScalar	fDist;
};

struct plCompSortFace : public std::binary_function<plSortFace, plSortFace, bool>
{
	bool operator()( const plSortFace& lhs, const plSortFace& rhs) const
	{
		return lhs.fDist > rhs.fDist;
	}
};

// IAvatarSort /////////////////////////////////////////////////////////////////////////
// We handle avatar sort differently from the rest of the face sort. The reason is that
// within the single avatar index buffer, we want to only sort the faces of spans requesting
// a sort, and sort them in place.
// Contrast that with the normal scene translucency sort. There, we sort all the spans in a drawble,
// then we sort all the faces in that drawable, then for each span in the sorted span list, we extract
// the faces for that span appending onto the index buffer. This gives great efficiency because
// only the visible faces are sorted and they wind up packed into the front of the index buffer, which
// permits more batching. See plDrawableSpans::SortVisibleSpans.
// For the avatar, it's generally the case that all the avatar is visible or not, and there is only
// one material, so neither of those efficiencies is helpful. Moreover, for the avatar the faces we
// want sorted are a tiny subset of the avatar's faces. Moreover, and most importantly, for the avatar, we
// want to preserve the order that spans are drawn, so, for example, the opaque base head will always be
// drawn before the translucent hair fringe, which will always be drawn before the pink clear plastic baseball cap.
hsBool plDXPipeline::IAvatarSort(plDrawableSpans* d, const hsTArray<Int16>& visList)
{
	plProfile_BeginTiming(AvatarSort);
	int i;
	for( i = 0; i < visList.GetCount(); i++ )
	{
		hsAssert(d->GetSpan(visList[i])->fTypeMask & plSpan::kIcicleSpan, "Unknown type for sorting faces");

		plIcicle* span = (plIcicle*)d->GetSpan(visList[i]);

		if( span->fProps & plSpan::kPartialSort )
		{
			hsAssert(d->GetBufferGroup(span->fGroupIdx)->AreIdxVolatile(), "Badly setup buffer group - set PartialSort too late?");

			const hsPoint3 viewPos = GetViewPositionWorld();

			plGBufferGroup* group = d->GetBufferGroup(span->fGroupIdx);

			plDXVertexBufferRef* vRef = (plDXVertexBufferRef*)group->GetVertexBufferRef(span->fVBufferIdx);

			const UInt8* vdata = vRef->fData;
			const UInt32 stride = vRef->fVertexSize;

			const int numTris = span->fILength/3;

			static hsTArray<plSortFace> sortScratch;
			sortScratch.SetCount(numTris);

			plProfile_IncCount(AvatarFaces, numTris);

			plSortFace* begin = sortScratch.AcquireArray();
			plSortFace* end = begin + numTris;

			//
			// Have three very similar sorts here, differing only on where the "position" of
			// each triangle is defined, either as the center of the triangle, the nearest
			// point on the triangle, or the farthest point on the triangle.
			// Having tried all three on the avatar (the only thing this sort is used on),
			// the best results surprisingly came from using the center of the triangle.
			UInt16* indices = group->GetIndexBufferData(span->fIBufferIdx) + span->fIStartIdx;
			int j;
			for( j = 0; j < numTris; j++ )
			{
#if 1 // TRICENTER
				UInt16 idx = *indices++;
				sortScratch[j].fIdx[0] = idx;
				hsPoint3 pos = *(hsPoint3*)(vdata + idx * stride);

				idx = *indices++;
				sortScratch[j].fIdx[1] = idx;
				pos += *(hsPoint3*)(vdata + idx * stride);

				idx = *indices++;
				sortScratch[j].fIdx[2] = idx;
				pos += *(hsPoint3*)(vdata + idx * stride);

				pos *= 0.3333f;

				sortScratch[j].fDist = hsVector3(&pos, &viewPos).MagnitudeSquared();
#elif 0 // NEAREST
				UInt16 idx = *indices++;
				sortScratch[j].fIdx[0] = idx;
				hsPoint3 pos = *(hsPoint3*)(vdata + idx * stride);
				hsScalar dist = hsVector3(&pos, &viewPos).MagnitudeSquared();
				hsScalar minDist = dist;

				idx = *indices++;
				sortScratch[j].fIdx[1] = idx;
				pos = *(hsPoint3*)(vdata + idx * stride);
				dist = hsVector3(&pos, &viewPos).MagnitudeSquared();
				if( dist < minDist )
					minDist = dist;

				idx = *indices++;
				sortScratch[j].fIdx[2] = idx;
				pos = *(hsPoint3*)(vdata + idx * stride);
				dist = hsVector3(&pos, &viewPos).MagnitudeSquared();
				if( dist < minDist )
					minDist = dist;

				sortScratch[j].fDist = minDist;
#elif 1 // FURTHEST
				UInt16 idx = *indices++;
				sortScratch[j].fIdx[0] = idx;
				hsPoint3 pos = *(hsPoint3*)(vdata + idx * stride);
				hsScalar dist = hsVector3(&pos, &viewPos).MagnitudeSquared();
				hsScalar maxDist = dist;

				idx = *indices++;
				sortScratch[j].fIdx[1] = idx;
				pos = *(hsPoint3*)(vdata + idx * stride);
				dist = hsVector3(&pos, &viewPos).MagnitudeSquared();
				if( dist > maxDist )
					maxDist = dist;

				idx = *indices++;
				sortScratch[j].fIdx[2] = idx;
				pos = *(hsPoint3*)(vdata + idx * stride);
				dist = hsVector3(&pos, &viewPos).MagnitudeSquared();
				if( dist > maxDist )
					maxDist = dist;

				sortScratch[j].fDist = maxDist;
#endif // SORTTYPES
			}

			std::sort(begin, end, plCompSortFace());

			indices = group->GetIndexBufferData(span->fIBufferIdx) + span->fIStartIdx;
			plSortFace* iter = sortScratch.AcquireArray();;
			for( j = 0; j < numTris; j++ )
			{
				*indices++ = iter->fIdx[0];
				*indices++ = iter->fIdx[1];
				*indices++ = iter->fIdx[2];
				iter++;
			}

			group->DirtyIndexBuffer(span->fIBufferIdx);
		}
	}
	plProfile_EndTiming(AvatarSort);
	return true;
}

// PrepForRender //////////////////////////////////////////////////////////////////
// Make sure the given drawable and each of the spans to be drawn (as noted in the
// indices in visList) is ready to be rendered.
// This means:
// a) select which lights will be used for each span
// b) do any necessary sorting (if required, spans are already in sorted order in visList,
//		so this only means face sorting).
// c) do any necessary software skinning.
// This is called once per render, and before any rendering actually starts. See plPageTreeMgr.cpp.
// So any preperation needs to last until rendering actually begins. So cached information, like
// which lights a span will use, needs to be stored on the span.
hsBool plDXPipeline::PrepForRender(plDrawable* d, hsTArray<Int16>& visList, plVisMgr* visMgr)
{
	plProfile_BeginTiming(PrepDrawable);

	plDrawableSpans	*drawable = plDrawableSpans::ConvertNoRef(d);
	if( !drawable )
	{
		plProfile_EndTiming(PrepDrawable);
		return false;
	}

	// Find our lights
	ICheckLighting(drawable, visList, visMgr);

	// Sort our faces
	if( drawable->GetNativeProperty(plDrawable::kPropSortFaces) )
	{
		drawable->SortVisibleSpans(visList, this);
	}

	// Prep for render. This is gives the drawable a chance to
	// do any last minute updates for its buffers, including
	// generating particle tri lists.
	drawable->PrepForRender( this );

	// Any skinning necessary
	if( !ISoftwareVertexBlend(drawable, visList) )
	{
		plProfile_EndTiming(PrepDrawable);
		return false;
	}
	// Avatar face sorting happens after the software skin.
	if( drawable->GetNativeProperty(plDrawable::kPropPartialSort) )
	{
		IAvatarSort(drawable, visList);
	}

	plProfile_EndTiming(PrepDrawable);

	return true;
}

// Draw ///////////////////////////////////////////////////////////
// Convenience function for a drawable that needs to get drawn outside of
// the normal scene graph render (i.e. something not managed by the plPageTreeMgr).
// Not nearly as efficient, so only useful as a special case.
void	plDXPipeline::Draw( plDrawable *d )
{
	plDrawableSpans	*ds = plDrawableSpans::ConvertNoRef( d );

	if( ds )
	{
		if( ( ds->GetType() & fView.fDrawableTypeMask ) == 0 )
			return;

		static hsTArray<Int16>visList;

		PreRender( ds, visList );
		PrepForRender(ds, visList);
		Render( ds, visList );
	}
}

// Render ////////////////////////////////////////////////////////////////////////////////
// The normal way to render a subset of a drawable.
// This assumes that PreRender and PrepForRender have already been called.
// Note that PreRender and PrepForRender are called once per drawable per render
// with a visList containing all of the spans which will be rendered, but
// Render itself may be called with multiple visList subsets which union to
// the visList passed into PreRender/PrepForRender. This happens when drawing
// sorted spans, because some spans from drawable B may be in the middle of
// the spans of drawable A, so the sequence would be:
//
// PreRender(A, ATotalVisList);
// PreRender(B, BTotalVisList);
// PrepForRender(A, ATotalVisList);
// PrepForRender(B, BTotalVisList);
// Render(A, AFarHalfVisList);
// Render(B, BTotalVisList);
// Render(A, ANearHalfVisList);
// See plPageTreeMgr, which handles all this.
void	plDXPipeline::Render( plDrawable *d, const hsTArray<Int16>& visList )
{
	// Reset here, since we can push/pop renderTargets after BeginRender() but before
	// this function, which necessitates this being called
	if( fView.fXformResetFlags != 0 )
		ITransformsToD3D();

	plDrawableSpans	*ds = plDrawableSpans::ConvertNoRef( d );

	if( ds )
	{
		IRenderSpans( ds, visList );
	}
}

//// BeginDrawable ////////////////////////////////////////////////////////////
// Obsolete, should be removed
hsBool plDXPipeline::BeginDrawable( plDrawable *d )
{
	return true;
}

//// EndDrawable //////////////////////////////////////////////////////////////
// Obsolete, should be removed

hsBool plDXPipeline::EndDrawable( plDrawable *d )
{
	return true;
}

// IMakeLightLists ///////////////////////////////////////////////////////////
// Look through all the current lights, and fill out two lists.
// Only active lights (not disabled, not exactly black, and not
// ignored because of visibility regions by plVisMgr) will
// be considered.
// The first list is lights that will affect the avatar and similar
// indeterminately mobile (physical) objects - fLights.fCharLights.
// The second list is lights that aren't restricted by light include
// lists.
// These two abbreviated lists will be further refined for each object
// and avatar to find the strongest 8 lights which affect that object.
// A light with an include list, or LightGroup Component) has
// been explicitly told which objects it affects, so they don't
// need to be in the search lists.
// These lists are only constructed once per render, but searched
// multiple times
void plDXPipeline::IMakeLightLists(plVisMgr* visMgr)
{
	plProfile_BeginTiming(FindSceneLights);
	fLights.fCharLights.SetCount(0);
	fLights.fVisLights.SetCount(0);
	if( visMgr )
	{
		const hsBitVector& visSet = visMgr->GetVisSet();
		const hsBitVector& visNot = visMgr->GetVisNot();
		plLightInfo* light;
		for( light = fLights.fActiveList; light != nil; light = light->GetNext() )
		{
			plProfile_IncCount(LightActive, 1);
			if( !light->IsIdle() && !light->InVisNot(visNot) && light->InVisSet(visSet) )
			{
				plProfile_IncCount(LightOn, 1);
				if( light->GetProperty(plLightInfo::kLPHasIncludes) )
				{
					if( light->GetProperty(plLightInfo::kLPIncludesChars) )
						fLights.fCharLights.Append(light);
				}
				else
				{
					fLights.fVisLights.Append(light);
					fLights.fCharLights.Append(light);
				}
			}
		}
	}
	else
	{
		plLightInfo* light;
		for( light = fLights.fActiveList; light != nil; light = light->GetNext() )
		{
			plProfile_IncCount(LightActive, 1);
			if( !light->IsIdle() )
			{
				plProfile_IncCount(LightOn, 1);
				if( light->GetProperty(plLightInfo::kLPHasIncludes) )
				{
					if( light->GetProperty(plLightInfo::kLPIncludesChars) )
						fLights.fCharLights.Append(light);
				}
				else
				{
					fLights.fVisLights.Append(light);
					fLights.fCharLights.Append(light);
				}
			}
		}
	}
	plProfile_IncCount(LightVis, fLights.fVisLights.GetCount());
	plProfile_IncCount(LightChar, fLights.fCharLights.GetCount());

	plProfile_EndTiming(FindSceneLights);
}

// BeginVisMgr /////////////////////////////////////////////////////////
// Marks the beginning of a render with the given visibility manager.
// In particular, we cache which lights the visMgr believes to be
// currently active
void plDXPipeline::BeginVisMgr(plVisMgr* visMgr)
{
	IMakeLightLists(visMgr);
}

// EndVisMgr ///////////////////////////////////////////////////////////
// Marks the end of a render with the given visibility manager.
void plDXPipeline::EndVisMgr(plVisMgr* visMgr)
{
	fLights.fCharLights.SetCount(0);
	fLights.fVisLights.SetCount(0);
}

// ICheckLighting ///////////////////////////////////////////////////////
// For every span in the list of visible span indices, find the list of
// lights that currently affect the span with an estimate of the strength
// of how much the light affects it. The strongest 8 lights will be used
// to illuminate that span.
// For projective lights, there is no limit on how many are supported, other
// than performance (usually fill rate limited).
// The permaLights and permaProjs are lights explicitly selected for a span
// via the LightGroup component.
// For static objects and static lights, the lighting was done offline and stored
// in the vertex diffuse color.
// So here we're only looking for:
// A) moving objects, which can't be staticly lit, so are affected by all runtime lights.
// B) moving lights, which can't staticly light, so affect all objects
// C) specular objects + specular lights, since specular can't be precomputed.
void plDXPipeline::ICheckLighting(plDrawableSpans* drawable, hsTArray<Int16>& visList, plVisMgr* visMgr)
{
	if( fView.fRenderState & kRenderNoLights )
		return;

	if( !visList.GetCount() )
		return;

	plLightInfo		*light;
	int				j;

	// First add in the explicit lights (from LightGroups).
	// Refresh the lights as they are added (actually a lazy eval).
	plProfile_BeginTiming(FindLights);
	plProfile_BeginTiming(FindPerm);
	for( j = 0; j < visList.GetCount(); j++ )
	{
		drawable->GetSpan( visList[ j ] )->ClearLights();

		if (IsDebugFlagSet(plPipeDbg::kFlagNoRuntimeLights))
			continue;

		// Set the bits for the lights added from the permanent lists (during ClearLights()).
		int k;
		const hsTArray<plLightInfo*>& permaLights = drawable->GetSpan(visList[j])->fPermaLights;
		for( k = 0; k < permaLights.GetCount(); k++ )
		{
			permaLights[k]->Refresh();
			if( permaLights[k]->GetProperty(plLightInfo::kLPShadowLightGroup) && !permaLights[k]->IsIdle() )
			{
				// If it casts a shadow, attach the shadow now.
				ISetShadowFromGroup(drawable, drawable->GetSpan(visList[j]), permaLights[k]);
			}
		}
		const hsTArray<plLightInfo*>& permaProjs = drawable->GetSpan(visList[j])->fPermaProjs;
		for( k = 0; k < permaProjs.GetCount(); k++ )
		{
			permaProjs[k]->Refresh();
			if( permaProjs[k]->GetProperty(plLightInfo::kLPShadowLightGroup) && !permaProjs[k]->IsIdle() )
			{
				// If it casts a shadow, attach the shadow now.
				ISetShadowFromGroup(drawable, drawable->GetSpan(visList[j]), permaProjs[k]);
			}
		}
	}
	plProfile_EndTiming(FindPerm);

	if (IsDebugFlagSet(plPipeDbg::kFlagNoRuntimeLights))
	{
		plProfile_EndTiming( FindLights );
		return;
	}

	// Sort the incoming spans as either
	// A) moving - affected by all lights - moveList
	// B) specular - affected by specular lights - specList
	// C) visible - affected by moving lights - visList
	static hsTArray<Int16> tmpList;
	static hsTArray<Int16> moveList;
	static hsTArray<Int16> specList;

	moveList.SetCount(0);
	specList.SetCount(0);

	plProfile_BeginTiming(FindSpan);
	int k;
	for( k = 0; k < visList.GetCount(); k++ )
	{
		const plSpan* span = drawable->GetSpan(visList[k]);

		if( span->fProps & plSpan::kPropRunTimeLight )
		{
			moveList.Append(visList[k]);
			specList.Append(visList[k]);
		}
		else if( span->fProps & plSpan::kPropMatHasSpecular )
			specList.Append(visList[k]);
	}
	plProfile_EndTiming(FindSpan);

	// Make a list of lights that can potentially affect spans in this drawable
	// based on the drawables bounds and properties.
	// If the drawable has the PropCharacter property, it is affected by lights
	// in fLights.fCharLights, else only by the smaller list of fLights.fVisLights.
	plProfile_BeginTiming(FindActiveLights);
	static hsTArray<plLightInfo*> lightList;
	lightList.SetCount(0);
	const hsBool isChar = 0 != drawable->GetNativeProperty(plDrawable::kPropCharacter);
	if( isChar )
	{
		int i;
		for( i = 0; i < fLights.fCharLights.GetCount(); i++ )
		{
			if( fLights.fCharLights[i]->AffectsBound(drawable->GetSpaceTree()->GetWorldBounds()) )
				lightList.Append(fLights.fCharLights[i]);
		}
	}
	else
	{
		int i;
		for( i = 0; i < fLights.fVisLights.GetCount(); i++ )
		{
			if( fLights.fVisLights[i]->AffectsBound(drawable->GetSpaceTree()->GetWorldBounds()) )
				lightList.Append(fLights.fVisLights[i]);
		}
	}
	plProfile_EndTiming(FindActiveLights);

	// Loop over the lights and for each light, extract a list of the spans that light
	// affects. Append the light to each spans list with a scalar strength of how strongly
	// the light affects it. Since the strength is based on the object's center position,
	// it's not very accurate, but good enough for selecting which lights to use.
	plProfile_BeginTiming(ApplyActiveLights);
	for( k = 0; k < lightList.GetCount(); k++ )
	{
		light = lightList[k];

		tmpList.SetCount(0);
		if( light->GetProperty(plLightInfo::kLPMovable) )
		{
			plProfile_BeginTiming(ApplyMoving);

			const hsTArray<Int16>& litList = light->GetAffected(drawable->GetSpaceTree(),
				visList,
				tmpList,
				drawable->GetNativeProperty(plDrawable::kPropCharacter) );

			// PUT OVERRIDE FOR KILLING PROJECTORS HERE!!!!
			hsBool proj = nil != light->GetProjection();
			if( fView.fRenderState & kRenderNoProjection )
				proj = false;

			for( j = 0; j < litList.GetCount(); j++ )
			{
				// Use the light IF light is enabled and
				//		1) light is movable
				//		2) span is movable, or
				//		3) Both the light and the span have specular
				const plSpan* span = drawable->GetSpan(litList[j]);
				hsBool currProj = proj;
				if( span->fProps & plSpan::kPropProjAsVtx )
					currProj = false;

				if( !(currProj && (span->fProps & plSpan::kPropSkipProjection)) )
				{
					plDXLightRef	*ref = (plDXLightRef *)light->GetDeviceRef();
					hsScalar		strength, scale;

					light->GetStrengthAndScale(span->fWorldBounds, strength, scale);

					// We can't pitch a light because it's "strength" is zero, because the strength is based
					// on the center of the span and isn't conservative enough. We can pitch based on the
					// scale though, since a light scaled down to zero will have no effect no where.
					if( scale > 0 )
					{
						plProfile_Inc(FindLightsFound);
						span->AddLight(light, strength, scale, currProj);
					}
				}
			}
			plProfile_EndTiming(ApplyMoving);

		}
		else if( light->GetProperty(plLightInfo::kLPHasSpecular) )
		{
			if( !specList.GetCount() )
				continue;

			plProfile_BeginTiming(ApplyToSpec);

			const hsTArray<Int16>& litList = light->GetAffected(drawable->GetSpaceTree(),
				specList,
				tmpList,
				drawable->GetNativeProperty(plDrawable::kPropCharacter) );

			// PUT OVERRIDE FOR KILLING PROJECTORS HERE!!!!
			hsBool proj = nil != light->GetProjection();
			if( fView.fRenderState & kRenderNoProjection )
				proj = false;

			for( j = 0; j < litList.GetCount(); j++ )
			{
				// Use the light IF light is enabled and
				//		1) light is movable
				//		2) span is movable, or
				//		3) Both the light and the span have specular
				const plSpan* span = drawable->GetSpan(litList[j]);
				hsBool currProj = proj;
				if( span->fProps & plSpan::kPropProjAsVtx )
					currProj = false;

				if( !(currProj && (span->fProps & plSpan::kPropSkipProjection)) )
				{
					plDXLightRef	*ref = (plDXLightRef *)light->GetDeviceRef();
					hsScalar		strength, scale;

					light->GetStrengthAndScale(span->fWorldBounds, strength, scale);

					// We can't pitch a light because it's "strength" is zero, because the strength is based
					// on the center of the span and isn't conservative enough. We can pitch based on the
					// scale though, since a light scaled down to zero will have no effect no where.
					if( scale > 0 )
					{
						plProfile_Inc(FindLightsFound);
						span->AddLight(light, strength, scale, currProj);
					}
				}
			}
			plProfile_EndTiming(ApplyToSpec);
		}
		else
		{
			if( !moveList.GetCount() )
				continue;

			plProfile_BeginTiming(ApplyToMoving);

			const hsTArray<Int16>& litList = light->GetAffected(drawable->GetSpaceTree(),
				moveList,
				tmpList,
				drawable->GetNativeProperty(plDrawable::kPropCharacter) );

			// PUT OVERRIDE FOR KILLING PROJECTORS HERE!!!!
			hsBool proj = nil != light->GetProjection();
			if( fView.fRenderState & kRenderNoProjection )
				proj = false;

			for( j = 0; j < litList.GetCount(); j++ )
			{
				// Use the light IF light is enabled and
				//		1) light is movable
				//		2) span is movable, or
				//		3) Both the light and the span have specular
				const plSpan* span = drawable->GetSpan(litList[j]);
				hsBool currProj = proj;
				if( span->fProps & plSpan::kPropProjAsVtx )
					currProj = false;

				if( !(currProj && (span->fProps & plSpan::kPropSkipProjection)) )
				{
					plDXLightRef	*ref = (plDXLightRef *)light->GetDeviceRef();
					hsScalar		strength, scale;

					light->GetStrengthAndScale(span->fWorldBounds, strength, scale);

					// We can't pitch a light because it's "strength" is zero, because the strength is based
					// on the center of the span and isn't conservative enough. We can pitch based on the
					// scale though, since a light scaled down to zero will have no effect no where.
					if( scale > 0 )
					{
						plProfile_Inc(FindLightsFound);
						span->AddLight(light, strength, scale, currProj);
					}
				}
			}
			plProfile_EndTiming(ApplyToMoving);

		}

	}
	plProfile_EndTiming(ApplyActiveLights);

	IAttachShadowsToReceivers(drawable, visList);

	plProfile_EndTiming(FindLights);
}

// HarvestVisible ////////////////////////////////////////////////////////////////////////
// Contruct a list of the indices of leaf nodes in the given spacetree which are currently
// visible according to the current cull tree. The cull tree factors in camera frustum and
// occluder polys, but _not_ the current visibility regions, plVisMgr.
// This is the normal path for visibility culling at a gross level (e.g. which SceneNodes
// to bother with, which drawables within the SceneNode). For finer objects, like the spans
// themselves, the culling is done via IGetVisibleSpans, which also takes the plVisMgr into
// account.
hsBool plDXPipeline::HarvestVisible(plSpaceTree* space, hsTArray<Int16>& visList)
{
	if( !space )
		return false;

	space->SetViewPos(GetViewPositionWorld());

	space->Refresh();

	if( fView.fCullTreeDirty )
		IRefreshCullTree();

	plProfile_BeginTiming(Harvest);
	fView.fCullTree.Harvest(space, visList);
	plProfile_EndTiming(Harvest);

	return visList.GetCount() != 0;
}

//// IGetVisibleSpans /////////////////////////////////////////////////////
//	Given a drawable, returns a list of visible span indices. Disabled spans will not
//	show up in the list, behaving as if they were culled.
//	See plCullTree (in plPipeline) and plSpaceTree (in plDrawable) and plVisMgr (in plScene).
void plDXPipeline::IGetVisibleSpans( plDrawableSpans* drawable, hsTArray<Int16>& visList, plVisMgr* visMgr )
{
	static hsTArray<Int16> tmpVis;
	tmpVis.SetCount(0);
	visList.SetCount(0);

	drawable->GetSpaceTree()->SetViewPos(GetViewPositionWorld());

	drawable->GetSpaceTree()->Refresh();

	if( fView.fCullTreeDirty )
		IRefreshCullTree();

	const hsScalar viewDist = GetViewDirWorld().InnerProduct(GetViewPositionWorld());

	const hsTArray<plSpan *>	&spans = drawable->GetSpanArray();

	plProfile_BeginTiming(Harvest);
	if( visMgr )
	{
		drawable->SetVisSet(visMgr);
		fView.fCullTree.Harvest(drawable->GetSpaceTree(), tmpVis);
		drawable->SetVisSet(nil);
	}
	else
	{
		fView.fCullTree.Harvest(drawable->GetSpaceTree(), tmpVis);
	}

	// This is a big waste of time, As a desparate "optimization" pass, the artists
	// insist on going through and marking objects to fade or pop out of rendering
	// past a certain distance. This breaks the batching and requires more CPU to
	// check the objects by distance. Since there is no pattern to the distance at
	// which objects will be told not to draw, there's no way to make this hierarchical,
	// which is what it would take to make it a performance win. So they succeed in
	// reducing the poly count, but generally the frame rate goes _down_ as well.
	// Unfortunately, this technique actually does work in a few key areas, so
	// I haven't been able to purge it.
	if (IsDebugFlagSet(plPipeDbg::kFlagSkipVisDist))
	{
		int i;
		for( i = 0; i < tmpVis.GetCount(); i++ )
		{
			if( spans[tmpVis[i]]->fSubType & GetSubDrawableTypeMask() )
			{
				visList.Append(tmpVis[i]);
			}
		}
	}
	else
	{
		int i;
		for( i = 0; i < tmpVis.GetCount(); i++ )
		{
			if( spans[tmpVis[i]]->fSubType & GetSubDrawableTypeMask() )
			{
				// We'll check here for spans we can discard because they've completely distance faded out.
				// Note this is based on view direction distance (because the fade is), rather than the
				// preferrable distance to camera we sort by.
				hsScalar minDist, maxDist;
				if( drawable->GetSubVisDists(tmpVis[i], minDist, maxDist) )
				{
					const hsBounds3Ext& bnd = drawable->GetSpaceTree()->GetNode(tmpVis[i]).fWorldBounds;
					hsPoint2 depth;
					bnd.TestPlane(GetViewDirWorld(), depth);
					if( (0 < minDist + viewDist - depth.fY)
							||(0 > maxDist + viewDist - depth.fX) )
						continue;
				}

				visList.Append(tmpVis[i]);
			}
		}
	}
	plProfile_EndTiming(Harvest);
}

// ISetupTransforms //////////////////////////////////////////////////////////////////////////////////
// Set the D3D world transform according to the input span.
// Engine currently supports HW vertex blending with 2 matrices,
// else a single Local To World.
// If software skinning is being used, the WORLD matrix will be identity,
// because the full local to world is folded into the skinned vertices.
void plDXPipeline::ISetupTransforms(plDrawableSpans* drawable, const plSpan& span, hsMatrix44& lastL2W)
{
	if( span.fNumMatrices )
	{
		if( span.fNumMatrices <= 2 )
		{
			ISetLocalToWorld( span.fLocalToWorld, span.fWorldToLocal );
			lastL2W = span.fLocalToWorld;
		}
		else
		{
			lastL2W.Reset();
			ISetLocalToWorld( lastL2W, lastL2W );
			fView.fLocalToWorldLeftHanded = span.fLocalToWorld.GetParity();
		}
	}
	else
	if( lastL2W != span.fLocalToWorld )
	{
		ISetLocalToWorld( span.fLocalToWorld, span.fWorldToLocal );
		lastL2W = span.fLocalToWorld;
	}
	else
	{
		fView.fLocalToWorldLeftHanded = lastL2W.GetParity();
	}

	if( span.fNumMatrices == 2 )
	{
		D3DXMATRIX	mat;
		IMatrix44ToD3DMatrix(mat, drawable->GetPaletteMatrix(span.fBaseMatrix+1));
		fD3DDevice->SetTransform(D3DTS_WORLDMATRIX(1), &mat);
		fD3DDevice->SetRenderState(D3DRS_VERTEXBLEND, D3DVBF_1WEIGHTS);
	}
	else
	{
		fD3DDevice->SetRenderState(D3DRS_VERTEXBLEND, D3DVBF_DISABLE);
	}
}

// IRefreshDynVertices ////////////////////////////////////////////////////////////////////////
// All dynamic vertices share a single dynamic vertex buffer. They are cycled through
// that buffer using the NOOVERWRITE/DISCARD paradigm. Since the vertices sharing that
// buffer may be of different formats, care is taken to always start a group of vertices
// a the next available position in the buffer aligned with that vertex size.
// Only software skinned objects, dynamic decals, and particle systems currently use the
// dynamic vertex buffer.
hsBool plDXPipeline::IRefreshDynVertices(plGBufferGroup* group, plDXVertexBufferRef* vRef)
{
	// First, pad out our next slot to be on a vertex boundary (for this vertex size).
	fNextDynVtx = ((fNextDynVtx + vRef->fVertexSize-1) / vRef->fVertexSize) * vRef->fVertexSize;

	Int32 size = (group->GetVertBufferEnd(vRef->fIndex) - group->GetVertBufferStart(vRef->fIndex)) * vRef->fVertexSize;
	if( !size )
		return false; // No error, just nothing to do.

	hsAssert(size > 0, "Bad start and end counts in a group");

	// If we DON'T have room in our dynamic buffer
	if( fNextDynVtx + size > fDynVtxSize )
	{
		plProfile_IncCount(DynVBuffs, 1);

		// Advance the timestamp, because we're about to reuse the buffer
		fVtxRefTime++;

		// Reset next available spot index to zero
		fNextDynVtx = 0;

	}
	// Point our ref at the next available spot
	Int32 newStart = fNextDynVtx / vRef->fVertexSize;

	vRef->fOffset = newStart - group->GetVertBufferStart(vRef->fIndex);

	// Lock the buffer
	// If index is zero, lock with discard, else with overwrite.
	DWORD lockFlag = fNextDynVtx ? D3DLOCK_NOOVERWRITE : D3DLOCK_DISCARD;
	UInt8*	destPtr = nil;
	if( FAILED( fDynVtxBuff->Lock( fNextDynVtx,
								size,
								(void **)&destPtr,
								lockFlag) ) )
	{
		hsAssert( false, "Cannot lock vertex buffer for writing" );
		return true;
	}

	UInt8* vData;
	if( vRef->fData )
	{
		vData = vRef->fData;
	}
	else
	{
		vData = group->GetVertBufferData(vRef->fIndex) + group->GetVertBufferStart(vRef->fIndex) * vRef->fVertexSize;
	}
	memcpy(destPtr, vData, size);

	// Unlock the buffer
	fDynVtxBuff->Unlock();

	// Advance next available spot index
	fNextDynVtx += size;

	// Set the timestamp
	vRef->fRefTime = fVtxRefTime;
	vRef->SetDirty(false);

	if( !vRef->fD3DBuffer )
	{
		vRef->fD3DBuffer = fDynVtxBuff;
		fDynVtxBuff->AddRef();
	}
	hsAssert(vRef->fD3DBuffer == fDynVtxBuff, "Holding on to an old dynamic buffer?");

//	vRef->SetRebuiltSinceUsed(true);

	return false;
}

// ICheckAuxBuffers ///////////////////////////////////////////////////////////////////////
// The AuxBuffers are associated with drawables for things to be drawn right after that
// drawable's contents. In particular, see the plDynaDecal, which includes things like
// water ripples, bullet hits, and footprints.
// This function just makes sure they are ready to be rendered, called right before
// the rendering.
hsBool plDXPipeline::ICheckAuxBuffers(const plAuxSpan* span)
{
	plGBufferGroup* group = span->fGroup;

	plDXVertexBufferRef* vRef = (plDXVertexBufferRef*)group->GetVertexBufferRef(span->fVBufferIdx);
	if( !vRef )
		return true;

	plDXIndexBufferRef* iRef = (plDXIndexBufferRef*)group->GetIndexBufferRef(span->fIBufferIdx);
	if( !iRef )
		return true;

	// If our vertex buffer ref is volatile and the timestamp is off
	// then it needs to be refilled
	if( vRef->Expired(fVtxRefTime) )
	{
		IRefreshDynVertices(group, vRef);
	}
	if( vRef->fOffset != iRef->fOffset )
	{
		iRef->fOffset = vRef->fOffset;

		iRef->SetRebuiltSinceUsed(true);
	}

	return false; // No error
}

// ICheckDynBuffers ////////////////////////////////////////////////////////////////////////////////////////
// Make sure the buffers underlying this span are ready to be rendered. Meaning that the underlying
// D3D buffers are in sync with the plasma buffers.
hsBool plDXPipeline::ICheckDynBuffers(plDrawableSpans* drawable, plGBufferGroup* group, const plSpan* spanBase)
{
	if( !(spanBase->fTypeMask & plSpan::kVertexSpan) )
		return false;
	// If we arent' an trilist, we're toast.
	if( !(spanBase->fTypeMask & plSpan::kIcicleSpan) )
		return false;

	plIcicle* span = (plIcicle*)spanBase;

	plDXVertexBufferRef* vRef = (plDXVertexBufferRef*)group->GetVertexBufferRef(span->fVBufferIdx);
	if( !vRef )
		return true;

	plDXIndexBufferRef* iRef = (plDXIndexBufferRef*)group->GetIndexBufferRef(span->fIBufferIdx);
	if( !iRef )
		return true;

	// If our vertex buffer ref is volatile and the timestamp is off
	// then it needs to be refilled
	if( vRef->Expired(fVtxRefTime) )
	{
		IRefreshDynVertices(group, vRef);
	}
	if( vRef->fOffset != iRef->fOffset )
	{
		iRef->fOffset = vRef->fOffset;

		iRef->SetRebuiltSinceUsed(true);
	}
	if( iRef->IsDirty()  )
	{
		IFillIndexBufferRef(iRef, group, span->fIBufferIdx);
		iRef->SetRebuiltSinceUsed(true);
	}

	return false; // No error
}

//// IRenderSpans /////////////////////////////////////////////////////////////
// Renders an array of spans obtained from a plDrawableSpans object
// The incoming visList gives the indices of the spans which are visible and should
// be drawn now, and gives them in sorted order.
void	plDXPipeline::IRenderSpans( plDrawableSpans *drawable, const hsTArray<Int16>& visList )
{
	plProfile_BeginTiming(RenderSpan);

	hsMatrix44		lastL2W;
	UInt32			i, j;
	bool			drewPatch = false;
	hsGMaterial		*material;

	const hsTArray<plSpan *>&		spans = drawable->GetSpanArray();

	plProfile_IncCount(EmptyList, !visList.GetCount());

	/// Set this (*before* we do our TestVisibleWorld stuff...)
	lastL2W.Reset();
	ISetLocalToWorld( lastL2W, lastL2W );	// This is necessary; otherwise, we have to test for
											// the first transform set, since this'll be identity
											// but the actual device transform won't be (unless
											// we do this)


	/// Loop through our spans, combining them when possible
	for( i = 0; i < visList.GetCount(); )
	{
		material = GetOverrideMaterial() ? GetOverrideMaterial() : drawable->GetMaterial( spans[ visList[ i ] ]->fMaterialIdx );

		/// It's an icicle--do our icicle merge loop
		plIcicle tempIce(*( (plIcicle *)spans[ visList[ i ] ] ));

		// Start at i + 1, look for as many spans as we can add to tempIce
		for( j = i + 1; j < visList.GetCount(); j++ )
		{
			if( GetOverrideMaterial() )
				tempIce.fMaterialIdx = spans[visList[j]]->fMaterialIdx;

			plProfile_BeginTiming(MergeCheck);
			if( !spans[ visList[ j ] ]->CanMergeInto( &tempIce ) )
			{
				plProfile_EndTiming(MergeCheck);
				break;
			}
			plProfile_EndTiming(MergeCheck);
			plProfile_Inc(SpanMerge);

			plProfile_BeginTiming(MergeSpan);
			spans[ visList[ j ] ]->MergeInto( &tempIce );
			plProfile_EndTiming(MergeSpan);
		}

		if( material != nil )
		{
			// What do we change?

			plProfile_BeginTiming(SpanTransforms);
			ISetupTransforms(drawable, tempIce, lastL2W);
			plProfile_EndTiming(SpanTransforms);

			// Turn on this spans lights and turn off the rest.
			IEnableLights( &tempIce );

			// Check that the underlying buffers are ready to go.
			plProfile_BeginTiming(CheckDyn);
			ICheckDynBuffers(drawable, drawable->GetBufferGroup(tempIce.fGroupIdx), &tempIce);
			plProfile_EndTiming(CheckDyn);

			plProfile_BeginTiming(CheckStat);
			CheckVertexBufferRef(drawable->GetBufferGroup(tempIce.fGroupIdx), tempIce.fVBufferIdx);
			CheckIndexBufferRef(drawable->GetBufferGroup(tempIce.fGroupIdx), tempIce.fIBufferIdx);
			plProfile_EndTiming(CheckStat);

			// Draw this span now
			IRenderBufferSpan( tempIce,
								drawable->GetVertexRef( tempIce.fGroupIdx, tempIce.fVBufferIdx ),
								drawable->GetIndexRef( tempIce.fGroupIdx, tempIce.fIBufferIdx ),
								material,
								tempIce.fVStartIdx, tempIce.fVLength,	// These are used as our accumulated range
								tempIce.fIPackedIdx, tempIce.fILength );
		}

		// Restart our search...
		i = j;
	}

	plProfile_EndTiming(RenderSpan);
	/// All done!
}

//// IAddBoundsSpan ///////////////////////////////////////////////////////////
//	Creates a new span for the given drawable to represent the specified
//	world bounds.
// Debugging only.

void	plDXPipeline::IAddBoundsSpan( plDrawableSpans *ice, const hsBounds3Ext *bounds, UInt32 bndColor )
{
#if MCN_BOUNDS_SPANS
	static hsTArray<plGeometrySpan *>	spanArray;
	static hsMatrix44		identMatrix;
	static hsPoint3		c[ 8 ], n[ 8 ];
	static int			nPts[ 8 ][ 3 ] = { { -1, -1, -1 }, { 1, -1, -1 }, { -1, 1, -1 }, { 1, 1, -1 },
										{ -1, -1, 1 }, { 1, -1, 1 }, { -1, 1, 1 }, { 1, 1, 1 } };
	int				i;
	plGeometrySpan	*newSpan;


	if( spanArray.GetCount() == 0 )
	{
		spanArray.Reset();
		spanArray.Append( TRACKED_NEW plGeometrySpan() );
		identMatrix.Reset();

		// Make normals
		for( i = 0; i < 8; i++ )
		{
			n[ i ].fX = (float)nPts[ i ][ 0 ];
			n[ i ].fY = (float)nPts[ i ][ 1 ];
			n[ i ].fZ = (float)nPts[ i ][ 2 ];
		}
	}
	else
		spanArray[ 0 ] = TRACKED_NEW plGeometrySpan();

	newSpan = spanArray[ 0 ];

	newSpan->BeginCreate( fBoundsMat, identMatrix, 0 );

	// Make corners
	c[1] = c[2] = c[4] = *bounds->GetCorner(&c[0]);
	hsVector3 axes[3];
	bounds->GetAxes(axes+0, axes+1, axes+2);
	c[1] += axes[0];
	c[2] += axes[1];
	c[4] += axes[2];

	c[3] = c[1];
	c[3] += axes[1];

	c[5] = c[1];
	c[5] += axes[2];

	c[6] = c[2];
	c[6] += axes[2];

	c[7] = c[6];
	c[7] += axes[0];

	for( i = 0; i < 8; i++ )
		newSpan->AddVertex( &c[ i ], &n[ i ], bndColor );

	newSpan->AddTriIndices( 0, 1, 2 );
	newSpan->AddTriIndices( 2, 1, 3 );

	newSpan->AddTriIndices( 6, 3, 7 );
	newSpan->AddTriIndices( 7, 1, 5 );
	newSpan->AddTriIndices( 5, 0, 4 );
	newSpan->AddTriIndices( 4, 2, 6 );

	newSpan->EndCreate();

	fBSpansToDelete.Append( ice->AppendDISpans( spanArray ) );

#endif
}

//// IAddNormalsSpan //////////////////////////////////////////////////////////
//	Creates a new span for the given drawable to represent the specified
//	world bounds.
// Debugging only.

void	plDXPipeline::IAddNormalsSpan( plDrawableSpans *ice, plIcicle *span, plDXVertexBufferRef *vRef, UInt32 bndColor )
{
#if MCN_BOUNDS_SPANS
	static hsTArray<plGeometrySpan *>	spanArray;
	static hsMatrix44		identMatrix;
	static hsPoint3		point, off, blank;
	hsVector3	b2;
	UInt16		v1, v2, v3;
	int				i;
	plGeometrySpan	*newSpan;


	if( spanArray.GetCount() == 0 )
	{
		spanArray.Reset();
		spanArray.Append( TRACKED_NEW plGeometrySpan() );
		identMatrix.Reset();
	}
	else
		spanArray[ 0 ] = TRACKED_NEW plGeometrySpan();

	newSpan = spanArray[ 0 ];

	newSpan->BeginCreate( fBoundsMat, span->fLocalToWorld, 0 );

	for( i = 0; i < span->fVLength; i++ )
	{
		point = vRef->fOwner->Position( span->fVBufferIdx, span->fCellIdx, span->fCellOffset + i );
		b2 = vRef->fOwner->Normal( span->fVBufferIdx, span->fCellIdx, span->fCellOffset + i );
		off.Set( point.fX + b2.fX, point.fY + b2.fY, point.fZ + b2.fZ );
		v1 = newSpan->AddVertex( &point, &blank, bndColor );
		v2 = newSpan->AddVertex( &off, &blank, bndColor );
		v3 = newSpan->AddVertex( &point, &blank, bndColor );
		newSpan->AddTriIndices( v1, v2, v3 );
	}

	newSpan->EndCreate();

	fBSpansToDelete.Append( ice->AppendDISpans( spanArray ) );

#endif
}

//// BeginRender //////////////////////////////////////////////////////////////
// Specifies the beginning of the render frame.
// If this succeeds (returns false) it must be matched with a call to EndRender.
// Normally, the main client loop will wrap the entire scene render (including
// any offscreen rendering) in a BeginRender/EndRender pair. There is no need
// for further calls for sub-renders.
hsBool plDXPipeline::BeginRender()
{
	// Do we have some restoration work ahead of us?
	// Checks for Device Lost condition
	if( IResetDevice() )
		return true;

	// We were lost, but now we're found! Spread the good word brother!
	if( fDevWasLost )
	{
		/// Broadcast a message letting everyone know that we were recreated and that
		/// all device-specific stuff needs to be recreated
//		plDeviceRecreateMsg* clean = TRACKED_NEW plDeviceRecreateMsg(this);
//		plgDispatch::MsgSend(clean);

		fDevWasLost = false;
	}

	if (IsDebugFlagSet(plPipeDbg::kFlagReload))
	{
		IReleaseShaders();
		fD3DDevice->EvictManagedResources();
		fEvictTime = fTextUseTime;
		fManagedSeen = 0;
		SetDebugFlag(plPipeDbg::kFlagReload, false);
	}

	// offset transform
	RefreshScreenMatrices();

	// If this is the primary BeginRender, make sure we're really ready.
	if( !fInSceneDepth++ )
	{
		// Workaround for NVidia memory manager bug. Search for "OSVERSIONINFO" to
		// find notes on the bug. This is where we purge managed memory periodically.
		plProfile_Set(ManSeen, fManagedSeen);
		if( fManagedCutoff )
		{
			plConst(UInt32) kMinEvictTime(1800); // ~2 minutes @ 15FPS
			if( (fManagedSeen > fManagedCutoff) && (fTexUsed + fVtxUsed < fManagedCutoff) && (fTextUseTime - fEvictTime > kMinEvictTime) )
			{
				fD3DDevice->EvictManagedResources();
				fManagedSeen = 0;
				fEvictTime = fTextUseTime;
				plProfile_IncCount(ManEvict, 1);
			}
		}

		// Superfluous setting of Z state.
		fD3DDevice->SetRenderState( D3DRS_ZENABLE,
									( fView.IsPerspective() && ( fSettings.fD3DCaps & kCapsWBuffer ) )
									? D3DZB_USEW : D3DZB_TRUE );

		/// If we have a renderTarget active, use its viewport
		ISetViewport();

		// Tell D3D we're ready to start rendering.
		if( FAILED(fD3DDevice->BeginScene()) )
		{
			fDeviceLost = true;
		}

		// Reset all our buffer/image usage counters
		fNextDynVtx = 0;
		fVtxRefTime++;

		fTexUsed = 0;
		fVtxUsed = 0;
		fTextUseTime++;

		// Render any shadow maps that have been submitted for this frame.
		IPreprocessShadows();
		IPreprocessAvatarTextures();
	}
	fRenderCnt++;

	// Would probably rather this be an input.
	fTime = hsTimer::GetSysSeconds();

	return false;
}

//// ISetViewport /////////////////////////////////////////////////////////////
// Translate our viewport into a D3D viewport
void	plDXPipeline::ISetViewport()
{
	D3DVIEWPORT9 vp = { GetViewTransform().GetViewPortLeft(),
						GetViewTransform().GetViewPortTop(),
						GetViewTransform().GetViewPortWidth(),
						GetViewTransform().GetViewPortHeight(),
						0.f, 1.f };


	WEAK_ERROR_CHECK( fD3DDevice->SetViewport( &vp ) );
}

//// RenderScreenElements /////////////////////////////////////////////////////
//	Renders all the screen elements, such as debug text and plates. Also puts
//	up all the info about vertex buffers and such. Should be called right
//	before EndRender(), but only on the main surface (not on renderTargets,
//	for example).

void	plDXPipeline::RenderScreenElements()
{
	bool		reset = false;


#if MCN_BOUNDS_SPANS
	if( fBoundsSpans && fBSpansToDelete.GetCount() > 0 )
	{
		Draw( fBoundsSpans );

		int		i;
		for( i = 0; i < fBSpansToDelete.GetCount(); i++ )
			fBoundsSpans->RemoveDISpans( fBSpansToDelete[ i ] );

		fBSpansToDelete.Reset();
	}
#endif
	if( fCullProxy )
		Draw( fCullProxy );

#ifdef MF_ENABLE_HACKOFF
	//WHITE
	static plPlate* hackPlate = nil;
	if( doHackPlate < hackOffscreens.GetCount() )
	{
		if( !hackPlate )
		{
			fPlateMgr->CreatePlate(&hackPlate, 0.5f, 0.5f, 1.0f, 1.0f);
			hackPlate->CreateBlankMaterial(32, 32, false);
		}
	}
	if( hackPlate )
	{
		if( doHackPlate < hackOffscreens.GetCount() )
		{
			hsGMaterial* hackMat = hackPlate->GetMaterial();
			plLayer* lay = plLayer::ConvertNoRef(hackMat->GetLayer(0));
			if( lay )
				lay->SetTexture(hackOffscreens[doHackPlate]);
			hackPlate->SetVisible( true );
		}
		else
		{
			hackPlate->SetVisible( false );
		}
	}
#endif // MF_ENABLE_HACKOFF

	hsGMatState	tHack = PushMaterialOverride(hsGMatState::kMisc, hsGMatState::kMiscWireFrame, false);
	hsGMatState	ambHack = PushMaterialOverride(hsGMatState::kShade, hsGMatState::kShadeWhite, true);

	plProfile_BeginTiming(PlateMgr);
	/// Plates
	if( fPlateMgr )
	{
		fPlateMgr->DrawToDevice( this );
		reset = true;
	}
	plProfile_EndTiming(PlateMgr);

	PopMaterialOverride(ambHack, true);
	PopMaterialOverride(tHack, false);

	plProfile_BeginTiming(DebugText);
	/// Debug text
	if( fDebugTextMgr && plDebugText::Instance().IsEnabled() )
	{
		fDebugTextMgr->DrawToDevice( this );

		reset = true;
	}
	plProfile_EndTiming(DebugText);

	plProfile_BeginTiming(Reset);
	if( reset )
	{
		// Reset these since the drawing might have trashed them
		hsRefCnt_SafeUnRef( fSettings.fCurrVertexBuffRef );
		hsRefCnt_SafeUnRef( fSettings.fCurrIndexBuffRef );
		fSettings.fCurrVertexBuffRef = nil;
		fSettings.fCurrIndexBuffRef = nil;

		fView.fXformResetFlags = fView.kResetAll;		// Text destroys view transforms
		hsRefCnt_SafeUnRef( fLayerRef[ 0 ] );
		fLayerRef[ 0 ] = nil;		// Text destroys stage 0 texture
	}
	plProfile_EndTiming(Reset);
}

//// EndRender ////////////////////////////////////////////////////////////////
// Tell D3D we're through rendering for this frame, and flip the back buffer to front.
// Also includes a bit of making sure we're not holding onto anything that might
// get deleted before the next render.
hsBool plDXPipeline::EndRender()
{
#ifdef MF_ENABLE_HACKOFF
	hackOffscreens.SetCount(0);
#endif // MF_ENABLE_HACKOFF

	IBottomLayer();

	hsBool retVal = false;
	/// Actually end the scene
	if( !--fInSceneDepth )
	{
		WEAK_ERROR_CHECK( fD3DDevice->EndScene() );
		retVal = IFlipSurface();

		IClearShadowSlaves();
	}

	// Do this last, after we've drawn everything
	// Just letting go of things we're done with for the frame.
	fForceMatHandle = true;
	hsRefCnt_SafeUnRef( fCurrMaterial );
	fCurrMaterial = nil;

	int i;
	for( i = 0; i < 8; i++ )
	{
		if( fLayerRef[i] )
		{
			hsRefCnt_SafeUnRef(fLayerRef[i]);
			fLayerRef[i] = nil;
		}
	}

	return retVal;
}

// SetGamma ////////////////////////////////////////////////////////////
// Create and set a gamma table based on the input exponent values for
// R, G, and B. Can also set explicit table using the other SetGamma().
hsBool plDXPipeline::SetGamma(hsScalar eR, hsScalar eG, hsScalar eB)
{
	if( fSettings.fNoGammaCorrect )
		return false;

	D3DGAMMARAMP ramp;

	ramp.red[0] = ramp.green[0] = ramp.blue[0] = 0L;

	plConst(hsScalar) kMinE(0.1f);
	if( eR > kMinE )
		eR = 1.f / eR;
	else
		eR = 1.f / kMinE;
	if( eG > kMinE )
		eG = 1.f / eG;
	else
		eG = 1.f / kMinE;
	if( eB > kMinE )
		eB = 1.f / eB;
	else
		eB = 1.f / kMinE;

	int i;
	for( i = 1; i < 256; i++ )
	{
		hsScalar orig = hsScalar(i) / 255.f;

		hsScalar gamm;
		gamm = pow(orig, eR);
		gamm *= hsScalar(UInt16(-1));
		ramp.red[i] = UInt16(gamm);

		gamm = pow(orig, eG);
		gamm *= hsScalar(UInt16(-1));
		ramp.green[i] = UInt16(gamm);

		gamm = pow(orig, eB);
		gamm *= hsScalar(UInt16(-1));
		ramp.blue[i] = UInt16(gamm);
	}

	fD3DDevice->SetGammaRamp(0, D3DSGR_NO_CALIBRATION, &ramp);

	return true;
}

// SetGamma
// Copy the input gamma tables and pass them to the hardware.
hsBool plDXPipeline::SetGamma(const UInt16* const tabR, const UInt16* const tabG, const UInt16* const tabB)
{
	if( fSettings.fNoGammaCorrect )
		return false;

	D3DGAMMARAMP ramp;
	memcpy(ramp.red, tabR, 256 * sizeof(WORD));
	memcpy(ramp.green, tabG, 256 * sizeof(WORD));
	memcpy(ramp.blue, tabB, 256 * sizeof(WORD));

	fD3DDevice->SetGammaRamp(0, D3DSGR_NO_CALIBRATION, &ramp);

	return true;
}


//// IFlipSurface /////////////////////////////////////////////////////////////
// Initiate moving the back buffer contents to the front buffer. Will detect
// and set the device lost condition when it occurs.
hsBool	plDXPipeline::IFlipSurface()
{
	/// Works now for both fullscreen and windowed modes
	HRESULT hr = D3D_OK;
	if( fSettings.fCurrRenderTarget == nil )
	{
		hr = fD3DDevice->Present( nil, nil, fSettings.fHWnd, nil );
	}

	if( FAILED(hr) )
	{
		fDeviceLost = true;
	}
	return fDeviceLost;
}

// ExtractMipMap
// This code works and is fairly fast for creating a new mipmap
// as a copy of the data in an offscreen render target. It's not
// currently used, because of driver bugs found in rendering to
// offscreen render targets.
plMipmap* plDXPipeline::ExtractMipMap(plRenderTarget* targ)
{
	if( plCubicRenderTarget::ConvertNoRef(targ) )
		return nil;

	if( targ->GetPixelSize() != 32 )
	{
		hsAssert(false, "Only RGBA8888 currently implemented");
		return nil;
	}

	plDXRenderTargetRef* ref = (plDXRenderTargetRef*)targ->GetDeviceRef();
	if( !ref )
		return nil;

	IDirect3DSurface9* surf = ref->GetColorSurface();
	if( !surf )
		return nil;

	D3DLOCKED_RECT rect;
	if( FAILED( surf->LockRect(&rect, nil, D3DLOCK_READONLY) ) )
	{
		return nil;
	}

	const int width = targ->GetWidth();
	const int height = targ->GetHeight();

	plMipmap* mipMap = TRACKED_NEW plMipmap(width, height, plMipmap::kARGB32Config, 1);

	UInt8* ptr = (UInt8*)(rect.pBits);
	const int pitch = rect.Pitch;

	const UInt32 blackOpaque = 0xff000000;
	int y;
	for( y = 0; y < height; y++ )
	{
		UInt32* destPtr = mipMap->GetAddr32(0, y);
		UInt32* srcPtr = (UInt32*)ptr;
		int x;
		for( x = 0; x < width; x++ )
		{
			destPtr[x] = srcPtr[x] | blackOpaque;
		}
		ptr += pitch;
	}

	surf->UnlockRect();

	return mipMap;
}

//// CaptureScreen ////////////////////////////////////////////////////////////
// Copy the current contents of the front buffer to the destination mipmap, with optional
// rescaling. Note that the mipmap function which does this rescaling is of low quality
// (pyramid filter even though it claims a box filter) and low performance (slow).
// If it mattered, it would take about an hour to have a higher performance, higher quality,
// more robust rescale function.
// This function is fairly straightforward, the complexity only comes from making sure
// all pixels in dest get written to, even though the client window may be partially
// offscreen. If the client window is partially offscreen, there will be no values
// for the "offscreen pixels" to copy to dest, so opaque black is used.
hsBool	plDXPipeline::CaptureScreen( plMipmap *dest, bool flipVertical, UInt16 desiredWidth, UInt16 desiredHeight )
{
	UInt32				y, *destPtr, *srcPtr, width, height, bigWidth, bigHeight;
	IDirect3DSurface9	*surface;
	D3DLOCKED_RECT		rect;
	RECT				rToLock;


	width = GetViewTransform().GetViewPortWidth();
	height = GetViewTransform().GetViewPortHeight();

	int left = 0;
	int right = width;
	int top = 0;
	int bottom = height;

	if( fSettings.fFullscreen )
	{
		if (FAILED(fD3DDevice->CreateOffscreenPlainSurface(width, height, D3DFMT_A8R8G8B8, D3DPOOL_SCRATCH, &surface, NULL)))
			return false;

		rToLock.left = GetViewTransform().GetViewPortLeft();
		rToLock.top = GetViewTransform().GetViewPortTop();
		rToLock.right = GetViewTransform().GetViewPortRight();
		rToLock.bottom = GetViewTransform().GetViewPortBottom();
	}
	else
	{
		bigWidth = GetSystemMetrics( SM_CXSCREEN );
		bigHeight = GetSystemMetrics( SM_CYSCREEN );

		if (FAILED(fD3DDevice->CreateOffscreenPlainSurface(bigWidth, bigHeight, D3DFMT_A8R8G8B8, D3DPOOL_SCRATCH, &surface, NULL)))
			return false;

		GetClientRect( fSettings.fHWnd, &rToLock );
		MapWindowPoints( fSettings.fHWnd, nil, (POINT *)&rToLock, 2 );

		if( rToLock.right > bigWidth )
		{
			right -= (rToLock.right - bigWidth);
			rToLock.right = bigWidth;
		}
		if( rToLock.bottom > bigHeight )
		{
			bottom -= (rToLock.bottom - bigHeight);
			rToLock.bottom = bigHeight;
		}
		if( rToLock.top < 0 )
		{
			top -= rToLock.top;
			rToLock.top = 0;
		}
		if( rToLock.left < 0 )
		{
			left -= rToLock.left;
			rToLock.left = 0;
		}
	}

	UINT swapChain = 0;
	if( FAILED( fD3DDevice->GetFrontBufferData(swapChain, surface) ) )
	{
		ReleaseObject( surface );
		return false;
	}

	if( FAILED( surface->LockRect( &rect, &rToLock, D3DLOCK_READONLY ) ) )
	{
		ReleaseObject( surface );
		return false;
	}

	if( dest->GetWidth() != width || dest->GetHeight() != height ||
		dest->GetPixelSize() != 32 )
	{
		dest->Reset();
		dest->Create( width, height, plMipmap::kARGB32Config, 1 );
	}

	const UInt32 blackOpaque = 0xff000000;
	/// Copy over
	for( y = 0; y < top; y++ )
	{
		if (flipVertical)
			destPtr = dest->GetAddr32( 0, height - 1 - y );
		else
			destPtr = dest->GetAddr32( 0, y );

		int x;
		for( x = 0; x < width; x++ )
		{
			*destPtr++ = blackOpaque;
		}
	}
	for( y = top; y < bottom; y++ )
	{
		srcPtr = (UInt32 *)( (UInt8 *)rect.pBits + rect.Pitch * y );
		if (flipVertical)
			destPtr = dest->GetAddr32( 0, height - 1 - y );
		else
			destPtr = dest->GetAddr32( 0, y );

		int x;
		for( x = 0; x < left; x++ )
			*destPtr++ = blackOpaque;

		memcpy( destPtr, srcPtr, (right - left) * sizeof( UInt32 ) );
		destPtr += (right - left);

		for( x = right; x < width; x++ )
			*destPtr++ = blackOpaque;
	}
	for( y = bottom; y < height; y++ )
	{
		if (flipVertical)
			destPtr = dest->GetAddr32( 0, height - 1 - y );
		else
			destPtr = dest->GetAddr32( 0, y );

		int x;
		for( x = 0; x < width; x++ )
		{
			*destPtr++ = blackOpaque;
		}
	}

	surface->UnlockRect();
	ReleaseObject( surface );

	if( desiredWidth != 0 && desiredHeight != nil )
	{
		// Rescale to the right size
		dest->ResizeNicely( desiredWidth, desiredHeight, plMipmap::kDefaultFilter );
	}
	return true;
}

///////////////////////////////////////////////////////////////////////////////
//// Render Targets ///////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

//// MakeRenderTargetRef //////////////////////////////////////////////////////
// Create the a Plasma render target ref, filling in the underlying D3D resources
// (e.g. color/depth buffers).
// Note that for ATI boards, we create a single depth surface for them to share.
// That can actually be 2 depth surfaces, if some color surfaces are 16 bit and
// others are 24/32 bit, since the ATI's want to match color depth with depth depth.
hsGDeviceRef	*plDXPipeline::MakeRenderTargetRef( plRenderTarget *owner )
{
	plDXRenderTargetRef	*ref = nil;
	IDirect3DSurface9		*surface = nil, *depthSurface = nil;
	IDirect3DTexture9		*texture = nil;
	IDirect3DCubeTexture9	*cTexture = nil;
	D3DFORMAT				surfFormat = D3DFMT_UNKNOWN, depthFormat = D3DFMT_UNKNOWN;
	D3DRESOURCETYPE			resType;
	int						i;
	plCubicRenderTarget		*cubicRT;
	UInt16					width, height;

	hsAssert(!fManagedAlloced, "Allocating non-managed resource with managed resources alloc'd");

	/// Check--is this renderTarget really a child of a cubicRenderTarget?
	if( owner->GetParent() != nil )
	{
		/// This'll create the deviceRefs for all of its children as well
		MakeRenderTargetRef( owner->GetParent() );
		return owner->GetDeviceRef();
	}

	// If we already have a rendertargetref, we just need it filled out with D3D resources.
	if( owner->GetDeviceRef() != nil )
		ref = (plDXRenderTargetRef *)owner->GetDeviceRef();

	// Look for supported format. Note that the surfFormat and depthFormat are
	// passed in by ref, so they may be different after this function call (if
	// an exact match isn't supported, but something similar is).
	if( !IPrepRenderTargetInfo( owner, surfFormat, depthFormat, resType ) )
	{
		hsAssert( false, "Error getting renderTarget info" );
		return nil;
	}


	/// Create the render target now
	// Start with the depth surface.
	// Note that we only ever give a cubic rendertarget a single shared depth buffer,
	// since we only render one face at a time. If we were rendering part of face X, then part
	// of face Y, then more of face X, then they would all need their own depth buffers.
	if( owner->GetZDepth() && (owner->GetFlags() & ( plRenderTarget::kIsTexture | plRenderTarget::kIsOffscreen )) )
	{
		// 9600 THRASH
		if( !fSettings.fShareDepth )
		{
			/// Create the depthbuffer
			if( FAILED( fD3DDevice->CreateDepthStencilSurface(
								owner->GetWidth(), owner->GetHeight(), depthFormat,
								D3DMULTISAMPLE_NONE, 0, FALSE,
								&depthSurface, NULL ) ) )
			{
				return nil;
			}

			// See plDXRenderTargetRef::Release()
			//D3DSURF_MEMNEW(depthSurface);
		}
		else
		{
			const int iZ = owner->GetZDepth() / 24;
			if( !fSharedDepthSurface[iZ] )
			{
				plConst(DWORD) kSharedWidth(800);
				plConst(DWORD) kSharedHeight(600);
				if( FAILED( fD3DDevice->CreateDepthStencilSurface(
									kSharedWidth, kSharedHeight, depthFormat,
									D3DMULTISAMPLE_NONE, 0, FALSE,
									&fSharedDepthSurface[iZ], NULL ) ) )
				{
					return nil;
				}
				// See plDXRenderTargetRef::Release()
				//D3DSURF_MEMNEW(fSharedDepthSurface[iZ]);
				fSharedDepthFormat[iZ] = depthFormat;
			}
			hsAssert(depthFormat == fSharedDepthFormat[iZ], "Mismatch on render target types");
			fSharedDepthSurface[iZ]->AddRef();
			depthSurface = fSharedDepthSurface[iZ];
		}
	}

	// See if it's a cubic render target.
	// Primary consumer here is the vertex/pixel shader water.
	cubicRT = plCubicRenderTarget::ConvertNoRef( owner );
	if( cubicRT != nil )
	{
		/// And create the ref (it'll know how to set all the flags)
		if( ref != nil )
			ref->Set( surfFormat, 0, owner );
		else
			ref = TRACKED_NEW plDXRenderTargetRef( surfFormat, 0, owner );

		if( !FAILED( fD3DDevice->CreateCubeTexture( owner->GetWidth(), 1, D3DUSAGE_RENDERTARGET, surfFormat,
														D3DPOOL_DEFAULT, (IDirect3DCubeTexture9 **)&cTexture, NULL ) ) )
		{
			/// Create a CUBIC texture
			for( i = 0; i < 6; i++ )
			{
				plRenderTarget			*face = cubicRT->GetFace( i );
				plDXRenderTargetRef	*fRef;

				if( face->GetDeviceRef() != nil )
				{
					fRef = (plDXRenderTargetRef *)face->GetDeviceRef();
					fRef->Set( surfFormat, 0, face );
					if( !fRef->IsLinked() )
						fRef->Link( &fRenderTargetRefList );
				}
				else
				{
					face->SetDeviceRef( TRACKED_NEW plDXRenderTargetRef( surfFormat, 0, face, false ) );
					( (plDXRenderTargetRef *)face->GetDeviceRef())->Link( &fRenderTargetRefList );
					// Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least)
					hsRefCnt_SafeUnRef( face->GetDeviceRef() );
				}
			}

			D3DSURF_MEMNEW(cTexture);

			ref->SetTexture( cTexture, depthSurface );
		}
		else
		{
			ReleaseObject(depthSurface);
			hsRefCnt_SafeUnRef(ref);
			ref = nil;
		}
	}
	// Not a cubic, is it a texture render target? These are currently used
	// primarily for shadow map generation.
	else if( owner->GetFlags() & plRenderTarget::kIsTexture )
	{
		/// Create a normal texture
		if( ref != nil )
			ref->Set( surfFormat, 0, owner );
		else
			ref = TRACKED_NEW plDXRenderTargetRef( surfFormat, 0, owner );

		if( !FAILED( fD3DDevice->CreateTexture( owner->GetWidth(), owner->GetHeight(), 1, D3DUSAGE_RENDERTARGET, surfFormat,
														D3DPOOL_DEFAULT, (IDirect3DTexture9 **)&texture, NULL ) ) )
		{
			D3DSURF_MEMNEW(texture);

			ref->SetTexture( texture, depthSurface );
		}
		else
		{
			ReleaseObject(depthSurface);
			hsRefCnt_SafeUnRef(ref);
			ref = nil;
		}
	}
	// Not a texture either, must be a plain offscreen.
	// Note that the plain offscreen code path works and was used until recently,
	// until it turned up that some hardware had bugs on rendering to
	// an offscreen.
	// Some GeForce1's had lighting anomolies, although my GeForce1 DDR didn't.
	// Some ATI's showed a momemtary glitch of corrupted rendering on the frame
	// when rendering both to the primary and an offscreen (again, not mine).
	// So the Offscreen isn't currently used for anything.
	else if( owner->GetFlags() & plRenderTarget::kIsOffscreen )
	{
		/// Create a blank surface
		if( ref != nil )
			ref->Set( surfFormat, 0, owner );
		else
			ref = TRACKED_NEW plDXRenderTargetRef( surfFormat, 0, owner );

		width = owner->GetWidth();
		height = owner->GetHeight();

		// Specify true for lockable, otherwise I'm not sure what we'd do with it. I guess we
		// could copyrect to another surface, presumably a texture. But right now the only
		// thing we use this for is to render a snapshot and copy it to sysmem, which implies
		// lockable.
		if( !FAILED( fD3DDevice->CreateRenderTarget( width, height, surfFormat,
							D3DMULTISAMPLE_NONE, 0,
							TRUE, &surface, NULL ) ) )
		{
			D3DSURF_MEMNEW(surface);

			ref->SetTexture( surface, depthSurface );
		}
		else
		{
			ReleaseObject(depthSurface);
			hsRefCnt_SafeUnRef(ref);
			ref = nil;
		}

	}

	// Keep it in a linked list for ready destruction.
	if( owner->GetDeviceRef() != ref )
	{
		owner->SetDeviceRef( ref );
		// Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least)
		hsRefCnt_SafeUnRef( ref );
		if( ref != nil && !ref->IsLinked() )
			ref->Link( &fRenderTargetRefList );
	}
	else
	{
		if( ref != nil && !ref->IsLinked() )
			ref->Link( &fRenderTargetRefList );
	}

	// Mark as dirty.
	if( ref != nil )
	{
		ref->SetDirty( false );
	}

	return ref;
}

//// SharedRenderTargetRef //////////////////////////////////////////////////////
// Same as MakeRenderTargetRef, except specialized for the shadow map generation.
// The shadow map pools of a given dimension (called RenderTargetPool) all share
// a single depth buffer of that size. This allows sharing on NVidia hardware
// that wants the depth buffer dimensions to match the color buffer size.
// It may be that NVidia hardware doesn't care any more. Contact Matthias
// about that.
hsGDeviceRef* plDXPipeline::SharedRenderTargetRef(plRenderTarget* share, plRenderTarget *owner)
{
	plDXRenderTargetRef*	ref = nil;
	IDirect3DSurface9*		surface = nil;
	IDirect3DSurface9*		depthSurface = nil;
	IDirect3DTexture9*		texture = nil;
	IDirect3DCubeTexture9*	cTexture = nil;
	D3DFORMAT				surfFormat = D3DFMT_UNKNOWN, depthFormat = D3DFMT_UNKNOWN;
	D3DRESOURCETYPE			resType;
	int						i;
	plCubicRenderTarget*	cubicRT;
	UInt16					width, height;

	// If we don't already have one to share from, start from scratch.
	if( !share )
		return MakeRenderTargetRef(owner);

	hsAssert(!fManagedAlloced, "Allocating non-managed resource with managed resources alloc'd");

#ifdef HS_DEBUGGING
	// Check out the validity of the match. Debug only.
	hsAssert(!owner->GetParent() == !share->GetParent(), "Mismatch on shared render target");
	hsAssert(owner->GetWidth() == share->GetWidth(), "Mismatch on shared render target");
	hsAssert(owner->GetHeight() == share->GetHeight(), "Mismatch on shared render target");
	hsAssert(owner->GetZDepth() == share->GetZDepth(), "Mismatch on shared render target");
	hsAssert(owner->GetStencilDepth() == share->GetStencilDepth(), "Mismatch on shared render target");
#endif // HS_DEBUGGING

	/// Check--is this renderTarget really a child of a cubicRenderTarget?
	if( owner->GetParent() != nil )
	{
		/// This'll create the deviceRefs for all of its children as well
		SharedRenderTargetRef(share->GetParent(), owner->GetParent());
		return owner->GetDeviceRef();
	}

	if( owner->GetDeviceRef() != nil )
		ref = (plDXRenderTargetRef *)owner->GetDeviceRef();

	// Look for a good format of matching color and depth size.
	if( !IFindRenderTargetInfo(owner, surfFormat, resType) )
	{
		hsAssert( false, "Error getting renderTarget info" );
		return nil;
	}


	/// Create the render target now
	// Start with the depth. We're just going to share the depth surface on the
	// input shareRef.
	plDXRenderTargetRef* shareRef = (plDXRenderTargetRef*)share->GetDeviceRef();
	hsAssert(shareRef, "Trying to share from a render target with no ref");
	if( shareRef->fD3DDepthSurface )
		shareRef->fD3DDepthSurface->AddRef();
	depthSurface = shareRef->fD3DDepthSurface;

	// Check for Cubic. This is unlikely, since this function is currently only
	// used for the shadow map pools.
	cubicRT = plCubicRenderTarget::ConvertNoRef( owner );
	if( cubicRT != nil )
	{
		/// And create the ref (it'll know how to set all the flags)
		if( ref != nil )
			ref->Set( surfFormat, 0, owner );
		else
			ref = TRACKED_NEW plDXRenderTargetRef( surfFormat, 0, owner );

		hsAssert(!fManagedAlloced, "Alloc default with managed alloc'd");
		if( !FAILED( fD3DDevice->CreateCubeTexture( owner->GetWidth(), 1, D3DUSAGE_RENDERTARGET, surfFormat,
														D3DPOOL_DEFAULT, (IDirect3DCubeTexture9 **)&cTexture, NULL ) ) )
		{

			/// Create a CUBIC texture
			for( i = 0; i < 6; i++ )
			{
				plRenderTarget			*face = cubicRT->GetFace( i );
				plDXRenderTargetRef	*fRef;

				if( face->GetDeviceRef() != nil )
				{
					fRef = (plDXRenderTargetRef *)face->GetDeviceRef();
					fRef->Set( surfFormat, 0, face );
					if( !fRef->IsLinked() )
						fRef->Link( &fRenderTargetRefList );
				}
				else
				{
					face->SetDeviceRef( TRACKED_NEW plDXRenderTargetRef( surfFormat, 0, face, false ) );
					( (plDXRenderTargetRef *)face->GetDeviceRef())->Link( &fRenderTargetRefList );
					// Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least)
					hsRefCnt_SafeUnRef( face->GetDeviceRef() );
				}
			}

			D3DSURF_MEMNEW(cTexture);

			ref->SetTexture( cTexture, depthSurface );
		}
		else
		{
			ReleaseObject(depthSurface);
			hsRefCnt_SafeUnRef(ref);
			ref = nil;
		}
	}
	// Is it a texture render target? Probably, since shadow maps are all we use this for.
	else if( owner->GetFlags() & plRenderTarget::kIsTexture )
	{
		/// Create a normal texture
		if( ref != nil )
			ref->Set( surfFormat, 0, owner );
		else
			ref = TRACKED_NEW plDXRenderTargetRef( surfFormat, 0, owner );

		hsAssert(!fManagedAlloced, "Alloc default with managed alloc'd");
		if( !FAILED( fD3DDevice->CreateTexture( owner->GetWidth(), owner->GetHeight(), 1, D3DUSAGE_RENDERTARGET, surfFormat,
														D3DPOOL_DEFAULT, (IDirect3DTexture9 **)&texture, NULL ) ) )
		{
			D3DSURF_MEMNEW(texture);

			ref->SetTexture( texture, depthSurface );
		}
		else
		{
			ReleaseObject(depthSurface);
			hsRefCnt_SafeUnRef(ref);
			ref = nil;
		}
	}
	// Pretty sure this code path has never been followed.
	else if( owner->GetFlags() & plRenderTarget::kIsOffscreen )
	{
		/// Create a blank surface
		if( ref != nil )
			ref->Set( surfFormat, 0, owner );
		else
			ref = TRACKED_NEW plDXRenderTargetRef( surfFormat, 0, owner );

		width = owner->GetWidth();
		height = owner->GetHeight();

		if( !FAILED( fD3DDevice->CreateRenderTarget( width, height, surfFormat,
							D3DMULTISAMPLE_NONE, 0,
							FALSE, &surface, NULL ) ) )
		{
			D3DSURF_MEMNEW(surface);

			ref->SetTexture( surface, depthSurface );
		}
		else
		{
			ReleaseObject(depthSurface);
			hsRefCnt_SafeUnRef(ref);
			ref = nil;
		}

	}

	if( owner->GetDeviceRef() != ref )
	{
		owner->SetDeviceRef( ref );
		// Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least)
		hsRefCnt_SafeUnRef( ref );
		if( ref != nil && !ref->IsLinked() )
			ref->Link( &fRenderTargetRefList );
	}
	else
	{
		if( ref != nil && !ref->IsLinked() )
			ref->Link( &fRenderTargetRefList );
	}

	if( ref != nil )
	{
		ref->SetDirty( false );
	}

	return ref;
}

//// IPrepRenderTargetInfo ////////////////////////////////////////////////////
//	Shared processing of render target creation parameters. Also does the
//	dirty work of finding a good surface format to use.
hsBool	plDXPipeline::IPrepRenderTargetInfo( plRenderTarget *owner, D3DFORMAT &surfFormat,
											  D3DFORMAT &depthFormat, D3DRESOURCETYPE &resType )
{
	int			i, j;
	UInt16		flags, width, height;
	Int8		bitDepth, zDepth, stencilDepth, stencilIndex;
	D3DFORMAT	depthFormats[] = { D3DFMT_D24X8, D3DFMT_D24X4S4, D3DFMT_D24S8 };


	flags = owner->GetFlags();
	width = owner->GetWidth();
	height = owner->GetHeight();
	bitDepth = owner->GetPixelSize();
	zDepth = owner->GetZDepth();
	stencilDepth = owner->GetStencilDepth();

	if( flags != 0 )
	{
		if( flags & plRenderTarget::kIsTexture )
		{
			/// Do an extra check for width and height here
			for( i = width >> 1, j = 0; i != 0; i >>= 1, j++ );
			if( width != ( 1 << j ) )
				return false;

			for( i = height >> 1, j = 0; i != 0; i >>= 1, j++ );
			if( height!= ( 1 << j ) )
				return false;

			resType = D3DRTYPE_TEXTURE;
		}
		else
			resType = D3DRTYPE_SURFACE;

		if( bitDepth == 16 )
			surfFormat = D3DFMT_A4R4G4B4;
		else if( bitDepth == 32 )
			surfFormat = D3DFMT_A8R8G8B8;

		/// Get the backbuffer format (if one is requested)
		if( zDepth )
		{
			if( zDepth == 16 && stencilDepth == 0 )
				depthFormat = D3DFMT_D16;
			else if( zDepth == 24 )
			{
				if( stencilDepth == 0 )	stencilIndex = 0;
				else if( stencilDepth <= 4 ) stencilIndex = 1;
				else if( stencilDepth <= 8 ) stencilIndex = 2;
				else
					stencilIndex = 2;

				depthFormat = depthFormats[ stencilIndex ];
			}
			else if( zDepth == 32 && stencilDepth == 0 )
				depthFormat = D3DFMT_D32;
			else if( zDepth == 15 && stencilDepth == 1 )
				depthFormat = D3DFMT_D15S1;

			if( surfFormat == D3DFMT_UNKNOWN || depthFormat == D3DFMT_UNKNOWN )
			{
				return false;
			}
		}
		else
		{
			depthFormat = D3DFMT_UNKNOWN;
		}

		/// Check the device format
		if( FAILED( fSettings.fDXError = fD3DObject->CheckDeviceFormat( fCurrentAdapter, fCurrentDevice->fDDType, fCurrentMode->fDDmode.Format,
													D3DUSAGE_RENDERTARGET, resType, surfFormat ) ) )
		{
			if( bitDepth == 16 )
			{
				bitDepth = 32;
				surfFormat = D3DFMT_A8R8G8B8;
			}
			else if( bitDepth == 32 )
			{
				bitDepth = 16;
				surfFormat = D3DFMT_A4R4G4B4;
			}
			if( FAILED( fSettings.fDXError = fD3DObject->CheckDeviceFormat( fCurrentAdapter, fCurrentDevice->fDDType, fCurrentMode->fDDmode.Format,
														D3DUSAGE_RENDERTARGET, resType, surfFormat ) ) )
			{
				IGetD3DError();
				return false;
			}
		}

		if( zDepth )
		{
			while( FAILED( fSettings.fDXError = fD3DObject->CheckDeviceFormat( fCurrentAdapter, fCurrentDevice->fDDType, fCurrentMode->fDDmode.Format,
														D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, depthFormat ) ) )
			{
				if( stencilIndex < sizeof( depthFormats ) / sizeof( depthFormats[ 0 ] ) - 1 )
				{
					stencilIndex++;
					depthFormat = depthFormats[ stencilIndex ];
				}
				else
				{
					IGetD3DError();
					return false;
				}
			}

			if( FAILED( fSettings.fDXError = fD3DObject->CheckDepthStencilMatch( fCurrentAdapter, fCurrentDevice->fDDType, fCurrentMode->fDDmode.Format,
														surfFormat, depthFormat ) ) )
			{
				IGetD3DError();
				return false;
			}
		}
	}

	return true;
}

//// IFindRenderTargetInfo ////////////////////////////////////////////////////
//	Shared processing of render target creation parameters. Also does the
//	dirty work of finding a good surface format to use.
// Doesn't bother checking depth buffer, since this is only used for a render target
// that's going to share a depth buffer that's already been created.
hsBool	plDXPipeline::IFindRenderTargetInfo( plRenderTarget *owner, D3DFORMAT &surfFormat, D3DRESOURCETYPE &resType )
{
	UInt16		flags, width, height;
	Int8		bitDepth;


	flags = owner->GetFlags();
	width = owner->GetWidth();
	height = owner->GetHeight();
	bitDepth = owner->GetPixelSize();

	if( flags != 0 )
	{
		if( flags & plRenderTarget::kIsTexture )
		{
			resType = D3DRTYPE_TEXTURE;
		}
		else
			resType = D3DRTYPE_SURFACE;

		if( bitDepth == 16 )
			surfFormat = D3DFMT_A4R4G4B4;
		else if( bitDepth == 32 )
			surfFormat = D3DFMT_A8R8G8B8;

		if( surfFormat == D3DFMT_UNKNOWN )
		{
			return false;
		}

		/// Check the device format
		if( FAILED( fSettings.fDXError = fD3DObject->CheckDeviceFormat( fCurrentAdapter, fCurrentDevice->fDDType, fCurrentMode->fDDmode.Format,
													D3DUSAGE_RENDERTARGET, resType, surfFormat ) ) )
		{
			if( bitDepth == 16 )
			{
				bitDepth = 32;
				surfFormat = D3DFMT_A8R8G8B8;
			}
			else if( bitDepth == 32 )
			{
				bitDepth = 16;
				surfFormat = D3DFMT_A4R4G4B4;
			}
			if( FAILED( fSettings.fDXError = fD3DObject->CheckDeviceFormat( fCurrentAdapter, fCurrentDevice->fDDType, fCurrentMode->fDDmode.Format,
														D3DUSAGE_RENDERTARGET, resType, surfFormat ) ) )
			{
				IGetD3DError();
				return false;
			}
		}
	}

	return true;
}

// PushRenderRequest ///////////////////////////////////////////////
// We're moving from our current render (probably to primary) onto
// another specialized render request. This may be to the primary (if req->GetRenderTarget() is nil)
// or to a texture. This function saves enough state to resume rendering on PopRenderRequest.
// The render request may just be a new camera position.
void plDXPipeline::PushRenderRequest(plRenderRequest* req)
{
	// Save these, since we want to copy them to our current view
	hsMatrix44 l2w = fView.fLocalToWorld;
	hsMatrix44 w2l = fView.fWorldToLocal;

	plFogEnvironment defFog = fView.fDefaultFog;

	fSettings.fViewStack.Push(fView);

	SetViewTransform(req->GetViewTransform());

	PushRenderTarget(req->GetRenderTarget());
	fView.fRenderState = req->GetRenderState();

	fView.fRenderRequest = req;
	hsRefCnt_SafeRef(fView.fRenderRequest);

	SetDrawableTypeMask(req->GetDrawableMask());
	SetSubDrawableTypeMask(req->GetSubDrawableMask());

	fView.fClearColor = inlGetD3DColor( req->GetClearColor() );
	fView.fClearDepth = req->GetClearDepth();

	if( req->GetFogStart() < 0 )
	{
		fView.fDefaultFog = defFog;
	}
	else
	{
		fView.fDefaultFog.Set( req->GetYon() * (1.f - req->GetFogStart()), req->GetYon(), 1.f, &req->GetClearColor());
		fCurrFog.fEnvPtr = nil;
	}

	if( req->GetOverrideMat() )
		PushOverrideMaterial(req->GetOverrideMat());

	// Set from our saved ones...
	fView.fWorldToLocal = w2l;
	fView.fLocalToWorld = l2w;

	RefreshMatrices();

	if (req->GetIgnoreOccluders())
		fView.fCullMaxNodes = 0;

	fView.fCullTreeDirty = true;
}

// PopRenderRequest //////////////////////////////////////////////////
// Restore state to resume rendering as before the preceding PushRenderRequest.
void plDXPipeline::PopRenderRequest(plRenderRequest* req)
{
	if( req->GetOverrideMat() )
		PopOverrideMaterial(nil);

	hsRefCnt_SafeUnRef(fView.fRenderRequest);
	fView = fSettings.fViewStack.Pop();

	// Force the next thing drawn to update the fog settings.
	fD3DDevice->SetRenderState(D3DRS_FOGENABLE, FALSE);
	fCurrFog.fEnvPtr = nil;

	PopRenderTarget();
	fView.fXformResetFlags = fView.kResetProjection | fView.kResetCamera;
}

//// PushRenderTarget /////////////////////////////////////////////////////////
// Begin rendering to the specified target. If target is nil, that's the primary surface.
void	plDXPipeline::PushRenderTarget( plRenderTarget *target )
{
	//WHITE
#ifdef MF_ENABLE_HACKOFF
	if( target && (hackOffscreens.kMissingIndex == hackOffscreens.Find(target)) )
		hackOffscreens.Append(target);
#endif // MF_ENABLE_HACKOFF


	fSettings.fCurrRenderTarget = target;
	hsRefCnt_SafeAssign( fSettings.fCurrRenderTargetRef, ( target != nil ) ? (plDXDeviceRef *)target->GetDeviceRef() : nil );

	while( target != nil )
	{
		fSettings.fCurrBaseRenderTarget = target;
		target = target->GetParent();
	}

	fSettings.fRenderTargets.Push( fSettings.fCurrRenderTarget );
	ISetRenderTarget( fSettings.fCurrRenderTarget );
}

//// PopRenderTarget //////////////////////////////////////////////////////////
// Resume rendering to the render target before the last PushRenderTarget,
// making sure we aren't holding on to anything from the render target getting
// popped.
plRenderTarget		*plDXPipeline::PopRenderTarget()
{
	plRenderTarget	*old = fSettings.fRenderTargets.Pop(), *temp;
	int				i = fSettings.fRenderTargets.GetCount();

	if( i == 0 )
	{
		fSettings.fCurrRenderTarget = nil;
		fSettings.fCurrBaseRenderTarget = nil;
		hsRefCnt_SafeUnRef( fSettings.fCurrRenderTargetRef );
		fSettings.fCurrRenderTargetRef = nil;
	}
	else
	{
		fSettings.fCurrRenderTarget = fSettings.fRenderTargets[ i - 1 ];
		temp = fSettings.fCurrRenderTarget;
		while( temp != nil )
		{
			fSettings.fCurrBaseRenderTarget = temp;
			temp = temp->GetParent();
		}
		hsRefCnt_SafeAssign( fSettings.fCurrRenderTargetRef,
							 ( fSettings.fCurrRenderTarget != nil ) ?
									(plDXDeviceRef *)fSettings.fCurrRenderTarget->GetDeviceRef()
									: nil );
	}

	ISetRenderTarget( fSettings.fCurrRenderTarget );

	return old;
}

// ISetAnisotropy ///////////////////////////////////////////////////////////
// Set the current anisotropic filtering settings to D3D
void plDXPipeline::ISetAnisotropy(hsBool on)
{
	if( (fSettings.fMaxAnisotropicSamples <= 0) || IsDebugFlagSet(plPipeDbg::kFlagNoAnisotropy) )
		on = false;

	if( on == fSettings.fCurrAnisotropy )
		return;

	if( on )
	{
		int i;
		for( i = 0; i < 8; i++ )
		{
			// GeForce cards have decided that they no longer handle anisotropic as a mag filter.
			// We could detect caps... but I don't think we'd notice if we just made the mag
			// filter always be linear.
			fD3DDevice->SetSamplerState( i, D3DSAMP_MINFILTER, D3DTEXF_ANISOTROPIC );
			fD3DDevice->SetSamplerState( i, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR );
			fD3DDevice->SetSamplerState( i, D3DSAMP_MAXANISOTROPY, (DWORD)fSettings.fMaxAnisotropicSamples );
		}
		fSettings.fCurrAnisotropy = true;
	}
	else
	{
		int i;
		for( i = 0; i < 8; i++ )
		{
			fD3DDevice->SetSamplerState( i, D3DSAMP_MINFILTER, D3DTEXF_LINEAR );
			fD3DDevice->SetSamplerState( i, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR );
		}
		fSettings.fCurrAnisotropy = false;
	}
}

//// ISetRenderTarget /////////////////////////////////////////////////////////
// Set rendering to the specified render target. Nil rendertarget is the primary.
// Invalidates the state as required by experience, not documentation.
void	plDXPipeline::ISetRenderTarget( plRenderTarget *target )
{
	IDirect3DSurface9		*main, *depth;
	plDXRenderTargetRef	*ref = nil;


	if( target != nil )
	{
		ref = (plDXRenderTargetRef *)target->GetDeviceRef();
		if( ref == nil || ref->IsDirty() )
			ref = (plDXRenderTargetRef *)MakeRenderTargetRef( target );
	}

	if( ref == nil || ref->GetColorSurface() == nil )
	{
		/// Set to main screen
		main = fD3DMainSurface;
		depth = fD3DDepthSurface;
		ISetAnisotropy(true);
	}
	else
	{
		/// Set to this target
		main = ref->GetColorSurface();
		depth = ref->fD3DDepthSurface;
		ISetAnisotropy(false);
	}

	if( main != fSettings.fCurrD3DMainSurface || depth != fSettings.fCurrD3DDepthSurface )
	{
		fSettings.fCurrD3DMainSurface = main;
		fSettings.fCurrD3DDepthSurface = depth;
		fD3DDevice->SetRenderTarget(0, main);
		fD3DDevice->SetDepthStencilSurface(depth);
	}

	IInvalidateState();

	ISetViewport();
}

// SetClear /////////////////////////////////////////////////////////////////////
// Set the color and depth clear values.
void plDXPipeline::SetClear(const hsColorRGBA* col, const hsScalar* depth)
{
	if( col )
		fView.fClearColor = inlGetD3DColor(*col);
	if( depth )
		fView.fClearDepth = *depth;
}

// GetClearColor ////////////////////////////////////////////////////////////////
// Return the current clear color.
hsColorRGBA plDXPipeline::GetClearColor() const
{
	return hsColorRGBA().FromARGB32(fView.fClearColor);
}

// GetClearDepth ////////////////////////////////////////////////////////////////
// Return the current clear depth.
hsScalar plDXPipeline::GetClearDepth() const
{
	return fView.fClearDepth;
}

//// ClearRenderTarget ////////////////////////////////////////////////////////
// Clear the current color and depth buffers. If a drawable is passed in, then
// the color buffer will be cleared by rendering that drawable.
// The depth buffer is always cleared  with a clear call.
// Clearing of depth and/or color may be turned off by setting the kRenderClearDepth
// and kRenderClearColor bits in fView.fRenderState to false.
void plDXPipeline::ClearRenderTarget( plDrawable* d )
{
	plDrawableSpans* src = plDrawableSpans::ConvertNoRef(d);

	if( !src )
	{
		ClearRenderTarget();
		return;
	}
	// First clear the depth buffer as normal.
	if( fView.fRenderState & kRenderClearDepth )
	{
		D3DRECT r;
		hsBool useRect = IGetClearViewPort(r);

		if( useRect )
		{
			WEAK_ERROR_CHECK( fD3DDevice->Clear( 1, &r, D3DCLEAR_ZBUFFER, 0, fView.fClearDepth, 0L ) );
		}
		else
		{
			WEAK_ERROR_CHECK( fD3DDevice->Clear( 0, nil, D3DCLEAR_ZBUFFER, 0, fView.fClearDepth, 0L ) );
// debug, clears to red			WEAK_ERROR_CHECK( fD3DDevice->Clear( 0, nil, D3DCLEAR_ZBUFFER | D3DCLEAR_TARGET, 0xffff0000, fView.fClearDepth, 0L ) );
		}
	}

	UInt32 s = fView.fRenderState;
	UInt32 dtm = fView.fDrawableTypeMask;
	UInt32 sdtm = fView.fSubDrawableTypeMask;

	fView.fDrawableTypeMask = plDrawable::kNormal;
	fView.fSubDrawableTypeMask = UInt32(-1);

	BeginDrawable(d);
	Draw(d);
	EndDrawable(d);

	fView.fSubDrawableTypeMask = sdtm;
	fView.fDrawableTypeMask = dtm;
	fView.fRenderState = s;

}

// IGetClearViewPort //////////////////////////////////////////////
// Sets the input rect to the current viewport. Returns true if
// that is a subset of the current render target, else false.
hsBool plDXPipeline::IGetClearViewPort(D3DRECT& r)
{
	r.x1 = GetViewTransform().GetViewPortLeft();
	r.y1 = GetViewTransform().GetViewPortTop();
	r.x2 = GetViewTransform().GetViewPortRight();
	r.y2 = GetViewTransform().GetViewPortBottom();

	hsBool useRect = false;
	if( fSettings.fCurrRenderTarget != nil )
	{
		useRect = ( (r.x1 != 0) || (r.y1 != 0) || (r.x2 != fSettings.fCurrRenderTarget->GetWidth()) || (r.y2 != fSettings.fCurrRenderTarget->GetHeight()) );

	}
	else
	{
		useRect = ( (r.x1 != 0) || (r.y1 != 0) || (r.x2 != fSettings.fOrigWidth) || (r.y2 != fSettings.fOrigHeight) );
	}

	return useRect;
}

// ClearRenderTarget //////////////////////////////////////////////////////////////////////////////
// Flat fill the current render target with the specified color and depth values.
void	plDXPipeline::ClearRenderTarget( const hsColorRGBA *col, const hsScalar* depth )
{
	if( fView.fRenderState & (kRenderClearColor | kRenderClearDepth) )
	{
		DWORD clearColor = inlGetD3DColor(col ? *col : GetClearColor());
		hsScalar clearDepth = depth ? *depth : fView.fClearDepth;

		DWORD	dwFlags = 0;//fStencil.fDepth > 0 ? D3DCLEAR_STENCIL : 0;
		if( fView.fRenderState & kRenderClearColor )
			dwFlags |= D3DCLEAR_TARGET;
		if( fView.fRenderState & kRenderClearDepth )
			dwFlags |= D3DCLEAR_ZBUFFER;

		D3DRECT r;
		hsBool useRect = IGetClearViewPort(r);
		if( useRect )
		{
			WEAK_ERROR_CHECK( fD3DDevice->Clear( 1, &r, dwFlags, clearColor, clearDepth, 0L ) );
		}
		else
		{
			WEAK_ERROR_CHECK( fD3DDevice->Clear( 0, nil, dwFlags, clearColor, clearDepth, 0L ) );
		}
	}
}


///////////////////////////////////////////////////////////////////////////////
//// Fog //////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// The current fog system sucks. It was never meant to get used this way, but
// the production artists started using it with debug commands that were around,
// and before they could be stopped it was too late.
// The good news is that there's a lot that could be done with fog here that
// would be greatly appreciated.

// IGetVSFogSet ///////////////////////////////////////////////////////////////
// Translate the current fog settings into a linear fog that the current
// vertex shaders can use.
void plDXPipeline::IGetVSFogSet(float* const set) const
{
	set[2] = 0.f;
	set[3] = 1.f;
	if( fCurrFog.fEnvPtr )
	{
		hsColorRGBA colorTrash;
		hsScalar start;
		hsScalar end;
		fCurrFog.fEnvPtr->GetPipelineParams(&start, &end, &colorTrash);
		if( end > start )
		{
			set[0] = -end;
			set[1] = 1.f / (start - end);
		}
		else
		{
			set[0] = 1.f;
			set[1] = 0.f;
		}
	}
	else
	{
		set[0] = 1.f;
		set[1] = 0.f;
	}
}

//// ISetFogParameters ////////////////////////////////////////////////////////
// So looking at this function, one might guess that fog parameters were settable
// individually for different objects, and that was the original intent, with transitions
// as something like the avatar moved from one fog region to another.
// Never happened.
// So the current state is that there is one set of fog parameters per age, and things
// are either fogged, or not fogged.
// This is complicated by the DX vertex/pixel shaders only supporting per-vertex fog,
// so the same plasma fog settings may turn into differing D3D fog state.
void plDXPipeline::ISetFogParameters(const plSpan* span, const plLayerInterface* baseLay)
{
#ifndef PLASMA_EXTERNAL_RELEASE
	if (IsDebugFlagSet(plPipeDbg::kFlagNoFog))
	{
		fCurrFog.fEnvPtr = nil;
		fD3DDevice->SetRenderState(D3DRS_FOGENABLE, FALSE);
		return;
	}
#endif // PLASMA_EXTERNAL_RELEASE

	plFogEnvironment* fog = (span ? (span->fFogEnvironment ? span->fFogEnvironment : &fView.fDefaultFog) : nil);

	UInt8 isVertex = 0;
	UInt8 isShader = false;
	if (baseLay)
	{
		if ((baseLay->GetShadeFlags() & hsGMatState::kShadeReallyNoFog) && !(fMatOverOff.fShadeFlags & hsGMatState::kShadeReallyNoFog))
			fog = nil;
		if (baseLay->GetVertexShader())
			isShader = true;
	}
	if (fMatOverOn.fShadeFlags & hsGMatState::kShadeReallyNoFog)
		fog = nil;

	bool forceLoad = false;
	D3DRENDERSTATETYPE	d3dFogType = D3DRS_FOGTABLEMODE;		// Use VERTEXMODE for vertex fog

#if !HS_BUILD_FOR_XBOX
	if (!(fSettings.fD3DCaps & kCapsPixelFog) || isShader)
	{
		d3dFogType = D3DRS_FOGVERTEXMODE;
		isVertex = true;
	}
#endif

	// Quick check
	if ((fCurrFog.fEnvPtr == fog) && (fCurrFog.fIsVertex == isVertex) && (fCurrFog.fIsShader == isShader))
		return;

	UInt8 type = ( fog == nil ) ? plFogEnvironment::kNoFog : fog->GetType();

	if (type == plFogEnvironment::kNoFog)
	{
		/// No fog, just disable
		fD3DDevice->SetRenderState( D3DRS_FOGENABLE, FALSE );
		fCurrFog.fEnvPtr = nil;
		return;
	}
	else if( fCurrFog.fEnvPtr != fog )
	{
		fD3DDevice->SetRenderState( D3DRS_FOGENABLE, TRUE );
		forceLoad = true;
		fCurrFog.fEnvPtr = fog;
	}

	if( isShader )
		type = plFogEnvironment::kLinearFog;

	if( fCurrFog.fIsShader != isShader )
		forceLoad = true;

	if( fCurrFog.fIsVertex != isVertex )
		forceLoad = true;

	fCurrFog.fIsShader = isShader;
	fCurrFog.fIsVertex = isVertex;

	hsScalar	startOrDensity, end;
	hsColorRGBA	color;

	/// Get params
	if( type == plFogEnvironment::kLinearFog )
	{
		fog->GetPipelineParams( &startOrDensity, &end, &color );

		if (startOrDensity == end)
		{
			// This should be legal, but some cards don't like it. Just disable. Same thing.
			fD3DDevice->SetRenderState(D3DRS_FOGENABLE, FALSE);
			return;
		}
	}
	else
		fog->GetPipelineParams( &startOrDensity, &color );

	if( isShader )
	{
		// None of this is technically necessary, but it's to work around
		// a known goofiness in the NVidia drivers. Actually, I don't think
		// having to set the tablemode fog to linear in addition to setting
		// the vertexmode is even a "known" issue. But turns out to be
		// necessary on GeForceFX latest drivers.
		startOrDensity = 1.f;
		end = 0.f;

		// Setting FOGTABLEMODE to none seems to work on both ATI and NVidia,
		// but I haven't tried it on the GeForceFX yet.
		//		if( fCurrFog.fMode != D3DFOG_LINEAR )
		//			fD3DDevice->SetRenderState(D3DRS_FOGTABLEMODE, D3DFOG_LINEAR);
		fD3DDevice->SetRenderState(D3DRS_FOGTABLEMODE, D3DFOG_NONE);
	}

	/// Set color
	if( !( fCurrFog.fColor == color ) || forceLoad )
	{
		fCurrFog.fColor = color;
		fCurrFog.fHexColor = inlGetD3DColor( color );
		fD3DDevice->SetRenderState( D3DRS_FOGCOLOR, fCurrFog.fHexColor );
	}

	D3DFOGMODE			modes[ 4 ] = { D3DFOG_LINEAR, D3DFOG_EXP, D3DFOG_EXP2, D3DFOG_NONE };

	/// Set type
	if( fCurrFog.fMode != modes[type] || forceLoad )
	{
		fCurrFog.fMode = modes[type];

		if( fCurrFog.fMode == D3DFOG_LINEAR )
		{
			fCurrFog.fStart = startOrDensity;
			fCurrFog.fEnd = end;

			fD3DDevice->SetRenderState( d3dFogType, fCurrFog.fMode );
			fD3DDevice->SetRenderState( D3DRS_FOGSTART, *(DWORD *)( &fCurrFog.fStart ) );
			fD3DDevice->SetRenderState( D3DRS_FOGEND, *(DWORD *)( &fCurrFog.fEnd ) );
		}
		else
		{
			fCurrFog.fDensity = startOrDensity;

			fD3DDevice->SetRenderState( d3dFogType, fCurrFog.fMode );
			fD3DDevice->SetRenderState( D3DRS_FOGDENSITY, *(DWORD *)( &fCurrFog.fDensity ) );
		}
	}
	else
	{
		// Type is the same, but are the params?
		if( fCurrFog.fMode == D3DFOG_LINEAR )
		{
			if( fCurrFog.fStart != startOrDensity )
			{
				fCurrFog.fStart = startOrDensity;
				fD3DDevice->SetRenderState( D3DRS_FOGSTART, *(DWORD *)( &fCurrFog.fStart ) );
			}

			if( fCurrFog.fEnd != end )
			{
				fCurrFog.fEnd = end;
				fD3DDevice->SetRenderState( D3DRS_FOGEND, *(DWORD *)( &fCurrFog.fEnd ) );
			}
		}
		else
		{
			if( fCurrFog.fDensity != startOrDensity )
			{
				fCurrFog.fDensity = startOrDensity;
				fD3DDevice->SetRenderState( D3DRS_FOGDENSITY, *(DWORD *)( &fCurrFog.fDensity ) );
			}
		}
	}
}

///////////////////////////////////////////////////////////////////////////////
//// Stenciling ///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// I know that none of this stencil code has ever been used in production.
// To my knowledge, none of this stencil code was ever even tested.
// It may save you some time as a starting point, but don't trust it.

//// StencilEnable ////////////////////////////////////////////////////////////

hsBool	plDXPipeline::StencilEnable( hsBool enable )
{
	if( fStencil.fEnabled == enable )
		return true;

	if( enable && fStencil.fDepth == 0 )
		return false;			// Can't enable stenciling when we don't support it!

	fD3DDevice->SetRenderState( D3DRS_STENCILENABLE, enable ? TRUE : FALSE );

	return true;
}

//// StencilSetCompareFunc ////////////////////////////////////////////////////

void	plDXPipeline::StencilSetCompareFunc( UInt8 func, UInt32 refValue )
{
	D3DCMPFUNC	newFunc;


	switch( func )
	{
		case plStencilCaps::kCmpNever:				newFunc = D3DCMP_NEVER; break;
		case plStencilCaps::kCmpLessThan:			newFunc = D3DCMP_LESS; break;
		case plStencilCaps::kCmpEqual:				newFunc = D3DCMP_EQUAL; break;
		case plStencilCaps::kCmpLessThanOrEqual:	newFunc = D3DCMP_LESSEQUAL; break;
		case plStencilCaps::kCmpGreaterThan:		newFunc = D3DCMP_GREATER; break;
		case plStencilCaps::kCmpNotEqual:			newFunc = D3DCMP_NOTEQUAL; break;
		case plStencilCaps::kCmpGreaterThanOrEqual: newFunc = D3DCMP_GREATEREQUAL; break;
		case plStencilCaps::kCmpAlways:				newFunc = D3DCMP_ALWAYS; break;
		default: hsAssert( false, "Invalid compare function to StencilSetCompareFunc()" ); return;
	}

	if( fStencil.fCmpFunc != newFunc )
	{
		fD3DDevice->SetRenderState( D3DRS_STENCILFUNC, newFunc );
		fStencil.fCmpFunc = newFunc;
	}

	if( fStencil.fRefValue != refValue )
	{
		fD3DDevice->SetRenderState( D3DRS_STENCILREF, refValue );
		fStencil.fRefValue = refValue;
	}
}

//// StencilSetMask ///////////////////////////////////////////////////////////

void	plDXPipeline::StencilSetMask( UInt32 mask, UInt32 writeMask )
{
	if( fStencil.fMask != mask )
	{
		fD3DDevice->SetRenderState( D3DRS_STENCILMASK, mask );
		fStencil.fMask = mask;
	}

	if( fStencil.fWriteMask != writeMask )
	{
		fD3DDevice->SetRenderState( D3DRS_STENCILWRITEMASK, writeMask );
		fStencil.fWriteMask = writeMask;
	}
}

//// StencilSetOps ////////////////////////////////////////////////////////////

void	plDXPipeline::StencilSetOps( UInt8 passOp, UInt8 failOp, UInt8 passButZFailOp )
{
	D3DSTENCILOP		op;


	/// Pass op
	switch( passOp )
	{
		case plStencilCaps::kOpKeep:		op = D3DSTENCILOP_KEEP; break;
		case plStencilCaps::kOpSetToZero:	op = D3DSTENCILOP_ZERO; break;
		case plStencilCaps::kOpReplace:		op = D3DSTENCILOP_REPLACE; break;
		case plStencilCaps::kOpIncClamp:	op = D3DSTENCILOP_INCRSAT; break;
		case plStencilCaps::kOpDecClamp:	op = D3DSTENCILOP_DECRSAT; break;
		case plStencilCaps::kOpInvert:		op = D3DSTENCILOP_INVERT; break;
		case plStencilCaps::kOpIncWrap:		op = D3DSTENCILOP_INCR; break;
		case plStencilCaps::kOpDecWrap:		op = D3DSTENCILOP_DECR; break;
		default: hsAssert( false, "Invalid op to StencilSetOps()" ); return;
	}

	if( fStencil.fPassOp != op )
	{
		fD3DDevice->SetRenderState( D3DRS_STENCILPASS, op );
		fStencil.fPassOp = op;
	}

	/// Fail op
	switch( failOp )
	{
		case plStencilCaps::kOpKeep:		op = D3DSTENCILOP_KEEP; break;
		case plStencilCaps::kOpSetToZero:	op = D3DSTENCILOP_ZERO; break;
		case plStencilCaps::kOpReplace:		op = D3DSTENCILOP_REPLACE; break;
		case plStencilCaps::kOpIncClamp:	op = D3DSTENCILOP_INCRSAT; break;
		case plStencilCaps::kOpDecClamp:	op = D3DSTENCILOP_DECRSAT; break;
		case plStencilCaps::kOpInvert:		op = D3DSTENCILOP_INVERT; break;
		case plStencilCaps::kOpIncWrap:		op = D3DSTENCILOP_INCR; break;
		case plStencilCaps::kOpDecWrap:		op = D3DSTENCILOP_DECR; break;
		default: hsAssert( false, "Invalid op to StencilSetOps()" ); return;
	}

	if( fStencil.fFailOp != op )
	{
		fD3DDevice->SetRenderState( D3DRS_STENCILFAIL, op );
		fStencil.fFailOp = op;
	}

	/// Pass-but-z-fail op
	switch( passButZFailOp )
	{
		case plStencilCaps::kOpKeep:		op = D3DSTENCILOP_KEEP; break;
		case plStencilCaps::kOpSetToZero:	op = D3DSTENCILOP_ZERO; break;
		case plStencilCaps::kOpReplace:		op = D3DSTENCILOP_REPLACE; break;
		case plStencilCaps::kOpIncClamp:	op = D3DSTENCILOP_INCRSAT; break;
		case plStencilCaps::kOpDecClamp:	op = D3DSTENCILOP_DECRSAT; break;
		case plStencilCaps::kOpInvert:		op = D3DSTENCILOP_INVERT; break;
		case plStencilCaps::kOpIncWrap:		op = D3DSTENCILOP_INCR; break;
		case plStencilCaps::kOpDecWrap:		op = D3DSTENCILOP_DECR; break;
		default: hsAssert( false, "Invalid op to StencilSetOps()" ); return;
	}

	if( fStencil.fPassButZFailOp != op )
	{
		fD3DDevice->SetRenderState( D3DRS_STENCILZFAIL, op );
		fStencil.fPassButZFailOp = op;
	}
}

//// StencilGetCaps ///////////////////////////////////////////////////////////

hsBool	plDXPipeline::StencilGetCaps( plStencilCaps *caps )
{
	hsAssert( caps != nil, "Invalid pointer to StencilGetCaps()" );

	int		i;


	/// Find supported depths
	caps->fSupportedDepths = 0;
	for( i = 0; i < fCurrentMode->fDepthFormats.GetCount(); i++ )
	{
		switch( fCurrentMode->fDepthFormats[ i ] )
		{
			case D3DFMT_D15S1:		caps->fSupportedDepths |= plStencilCaps::kDepth1Bit; break;
			case D3DFMT_D24X4S4:	caps->fSupportedDepths |= plStencilCaps::kDepth4Bits; break;
			case D3DFMT_D24S8:		caps->fSupportedDepths |= plStencilCaps::kDepth8Bits; break;
		}
	}

	if( caps->fSupportedDepths == 0 )
	{
		caps->fIsSupported = false;
		return false;
	}

	/// Get supported ops
	caps->fSupportedOps = 0;

	if( fCurrentDevice->fDDCaps.StencilCaps & D3DSTENCILCAPS_DECR )
		caps->fSupportedOps |= plStencilCaps::kOpDecWrap;
	if( fCurrentDevice->fDDCaps.StencilCaps & D3DSTENCILCAPS_DECRSAT )
		caps->fSupportedOps |= plStencilCaps::kOpDecClamp;
	if( fCurrentDevice->fDDCaps.StencilCaps & D3DSTENCILCAPS_INCR )
		caps->fSupportedOps |= plStencilCaps::kOpIncWrap;
	if( fCurrentDevice->fDDCaps.StencilCaps & D3DSTENCILCAPS_INCRSAT )
		caps->fSupportedOps |= plStencilCaps::kOpIncClamp;

	if( fCurrentDevice->fDDCaps.StencilCaps & D3DSTENCILCAPS_INVERT )
		caps->fSupportedOps |= plStencilCaps::kOpInvert;
	if( fCurrentDevice->fDDCaps.StencilCaps & D3DSTENCILCAPS_KEEP )
		caps->fSupportedOps |= plStencilCaps::kOpKeep;
	if( fCurrentDevice->fDDCaps.StencilCaps & D3DSTENCILCAPS_REPLACE )
		caps->fSupportedOps |= plStencilCaps::kOpReplace;
	if( fCurrentDevice->fDDCaps.StencilCaps & D3DSTENCILCAPS_ZERO )
		caps->fSupportedOps |= plStencilCaps::kOpSetToZero;

	return true;
}


///////////////////////////////////////////////////////////////////////////////
//// Lighting /////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

//// IMakeLightRef ////////////////////////////////////////////////////////////
// Create a plasma device ref for a light. Includes reserving a D3D light
// index for the light. Ref is kept in a linked list for ready disposal
// as well as attached to the light.
hsGDeviceRef	*plDXPipeline::IMakeLightRef( plLightInfo *owner )
{
	plDXLightRef	*lRef = TRACKED_NEW plDXLightRef();


	/// Assign stuff and update
	lRef->fD3DIndex = fLights.ReserveD3DIndex();
	lRef->fOwner = owner;
	owner->SetDeviceRef( lRef );
	// Unref now, since for now ONLY the BG owns the ref, not us (not until we use it, at least)
	hsRefCnt_SafeUnRef( lRef );

	lRef->Link( &fLights.fRefList );

	lRef->UpdateD3DInfo( fD3DDevice, &fLights );

	// Neutralize it until we need it.
	fD3DDevice->LightEnable(lRef->fD3DIndex, false);

	return lRef;
}

//// RegisterLight ////////////////////////////////////////////////////////////
// Register a light with the pipeline. Light become immediately
// ready to illuminate the scene.
void plDXPipeline::RegisterLight(plLightInfo* liInfo)
{
	if( liInfo->IsLinked() )
		return;

	liInfo->Link( &fLights.fActiveList );
	liInfo->SetDeviceRef( IMakeLightRef( liInfo ) );
	fLights.fTime++;
}

//// UnRegisterLight //////////////////////////////////////////////////////////
// Remove a light from the pipeline's active light list. Light will
// no longer illuminate the scene.
void plDXPipeline::UnRegisterLight(plLightInfo* liInfo)
{
	liInfo->SetDeviceRef( nil );
	liInfo->Unlink();

	fLights.fTime++;
}

//// IEnableLights ////////////////////////////////////////////////////////////
//	Does the lighting enable pass. Given a span with lights to use, builds
//	a bit vector representing the lights to use, then uses that to mask off
//	which lights actually need to be enabled/disabled.
// Constructs 2 lists on the span, one for normal lights, and one for projective lights.

void	plDXPipeline::IEnableLights( plSpan *span )
{
	plProfile_BeginTiming(SelectLights);
	ISelectLights( span, fSettings.fMaxNumLights, false );
	plProfile_EndTiming(SelectLights);
	if( !(fView.fRenderState & kRenderNoProjection) )
	{
		plProfile_BeginTiming(SelectProj);
		ISelectLights( span, fSettings.fMaxNumProjectors, true );
		plProfile_EndTiming(SelectProj);
	}
}

// ISelectLights ///////////////////////////////////////////////////////////////
// Find the strongest numLights lights to illuminate the span with.
// Weaker lights are faded out in effect so they won't pop when the
// strongest N changes membership.
void	plDXPipeline::ISelectLights( plSpan *span, int numLights, hsBool proj )
{
	int					i, startScale;
	static hsBitVector	newFlags;
	static hsTArray<plLightInfo*>	onLights;
	plDXLightRef		*ref;
	float				threshhold, overHold = 0.3, scale;

	/// Build new flags

	/// Step 1: Find the n strongest lights
	newFlags.Clear();
	onLights.SetCount(0);

	if	(!IsDebugFlagSet(plPipeDbg::kFlagNoRuntimeLights) &&
		!(IsDebugFlagSet(plPipeDbg::kFlagNoApplyProjLights) && proj) &&
		!(IsDebugFlagSet(plPipeDbg::kFlagOnlyApplyProjLights) && !proj))
	{
		hsTArray<plLightInfo*>& spanLights = span->GetLightList(proj);

		for( i = 0; i < spanLights.GetCount() && i < numLights; i++ )
		{
			ref = (plDXLightRef *)spanLights[i]->GetDeviceRef();

			if( ref->IsDirty() )
			{
				if( ref->fD3DIndex == 0 )
					ref->fD3DIndex = fLights.ReserveD3DIndex();
				ref->UpdateD3DInfo( fD3DDevice, &fLights );
				ref->SetDirty( false );
			}

			newFlags.SetBit( ref->fD3DIndex );
			onLights.Append(spanLights[i]);
		}
		startScale = i;

		/// Attempt #2: Take some of the n strongest lights (below a given threshhold) and
		/// fade them out to nothing as they get closer to the bottom. This way, they fade
		/// out of existence instead of pop out.

		if( i < spanLights.GetCount() - 1 && i > 0 )
		{
			threshhold = span->GetLightStrength( i, proj );
			i--;
			overHold = threshhold * 1.5f;
			if( overHold > span->GetLightStrength( 0, proj ) )
				overHold = span->GetLightStrength( 0, proj );

			for( ; i > 0 && span->GetLightStrength( i, proj ) < overHold; i-- )
			{
				scale = ( overHold - span->GetLightStrength( i, proj ) ) / ( overHold - threshhold );

				ref = (plDXLightRef *)spanLights[i]->GetDeviceRef();

				IScaleD3DLight( ref, (1 - scale) * span->GetLightScale(i, proj) );
			}
			startScale = i + 1;
		}

		/// Make sure those lights that aren't scaled....aren't
		for( i = 0; i < startScale; i++ )
		{
			ref = (plDXLightRef *)spanLights[i]->GetDeviceRef();
			IScaleD3DLight(ref, span->GetLightScale(i, proj) );
		}

	}

	// If these are non-projected lights, go ahead and enable them.
	// For the projected lights, don't enable, just remember who they are.
	if( !proj )
	{
		// A little change here. Some boards get sticky about exactly
		// how many lights you have enabled, whether you are currently
		// rendering or not. So if we go through enabling the lights
		// we want and disabling the ones we don't, then even though
		// at the end of the loop, less than MaxNumLights are enabled,
		// we can still wind up screwed.
		// Think about if we have 8 lights enabled, and they all happen
		// to be at the end of fLights. Now we want to enable a different
		// 8 lights, which happen to be at the beginning of the list.
		// So we loop through and enable the lights we want, and then later
		// in the loop disable the lights we don't want. Problem is that
		// when we were enabling the ones we want we went over our 8 light
		// limit, and some boards (ATI) react by ignoring the enable request.
		// So then we disable the other lights at the end of the loop, but
		// it's too late because our enable requests at the beginning of the
		// loop were ignored.
		// Solution is to go through the list twice, first disabling, then
		// enabling. mf
		hsBitVector newOff = fLights.fEnabledFlags - newFlags;
		hsBitIterator iterOff(newOff);
		for( iterOff.Begin(); !iterOff.End(); iterOff.Advance() )
			fD3DDevice->LightEnable(iterOff.Current(), false);

		hsBitVector newOn = newFlags - fLights.fEnabledFlags;
		hsBitIterator iterOn(newOn);
		for( iterOn.Begin(); !iterOn.End(); iterOn.Advance() )
			fD3DDevice->LightEnable(iterOn.Current(), true);
		fLights.fEnabledFlags = newFlags;
	}
	else
	{
		fLights.fProjAll.SetCount(0);
		fLights.fProjEach.SetCount(0);
		for( i = 0; i < onLights.GetCount(); i++ )
		{
			if( onLights[i]->OverAll() )
				fLights.fProjAll.Append(onLights[i]);
			else
				fLights.fProjEach.Append(onLights[i]);
		}
		onLights.SetCount(0);
	}
}

// IDisableSpanLights /////////////////////////////////////////////////////
// Disable all the enabled lights, remembering which they are for
// quick reenabling.
void plDXPipeline::IDisableSpanLights()
{
	int i;
	for( i = 0; i < fLights.fLastIndex + 1; i++ )
	{
		if( fLights.fEnabledFlags.IsBitSet(i) )
		{
			fD3DDevice->LightEnable(i, false);
			fLights.fHoldFlags.SetBit(i);
		}
	}
	fLights.fEnabledFlags.Clear();
}

// IRestoreSpanLights //////////////////////////////////////////////////////
// Re-enable all the lights disabled by the matching IDisableSpanLights.
void plDXPipeline::IRestoreSpanLights()
{
	int i;
	for( i = 0; i < fLights.fLastIndex + 1; i++ )
	{
		if( fLights.fHoldFlags.IsBitSet(i) )
		{
			fD3DDevice->LightEnable(i, true);
			fLights.fEnabledFlags.SetBit(i);
		}
	}
	fLights.fHoldFlags.Clear();
}

//// IScaleD3DLight ///////////////////////////////////////////////////////////
// Scale the D3D light by the given scale factor, used for fading lights
// in and out by importance.
void	plDXPipeline::IScaleD3DLight( plDXLightRef *ref, hsScalar scale )
{
	scale = int(scale * 1.e1f) * 1.e-1f;
	if( ref->fScale != scale )
	{
		D3DLIGHT9		light = ref->fD3DInfo;


		light.Diffuse.r *= scale;
		light.Diffuse.g *= scale;
		light.Diffuse.b *= scale;

		light.Ambient.r *= scale;
		light.Ambient.g *= scale;
		light.Ambient.b *= scale;

		light.Specular.r *= scale;
		light.Specular.g *= scale;
		light.Specular.b *= scale;

		fD3DDevice->SetLight( ref->fD3DIndex, &light );
		ref->fScale = scale;
	}
}

// inlPlToDWORDColor /////////////////////////////////////////////////
// Convert a plasma floating point color to a D3D DWORD color
static inline DWORD inlPlToDWORDColor(const hsColorRGBA& c)
{
	return (DWORD(c.a * 255.99f) << 24)
		| (DWORD(c.r * 255.99f) << 16)
		| (DWORD(c.g * 255.99f) << 8)
		| (DWORD(c.b * 255.99f) << 0);
}

// inlPlToD3DColor ////////////////////////////////////////////////////
// Convert a plasma floating point color to a D3D floating point color.
inline D3DCOLORVALUE plDXPipeline::inlPlToD3DColor(const hsColorRGBA& c, float a) const
{
	D3DCOLORVALUE ret;
	ret.r = c.r;
	ret.g = c.g;
	ret.b = c.b;
	ret.a = a;
	return ret;
}

// inlEnsureLightingOn ////////////////////////////////////////////////
// Turn D3D lighting on if it isn't already.
inline void plDXPipeline::inlEnsureLightingOn()
{
	if( !fCurrD3DLiteState )
	{
		fD3DDevice->SetRenderState( D3DRS_LIGHTING, TRUE );
		fCurrD3DLiteState = true;
	}
}

// inlEnsureLightingOff ///////////////////////////////////////////////
// Turn D3D lighting off if it isn't already.
inline void plDXPipeline::inlEnsureLightingOff()
{
	if( fCurrD3DLiteState )
	{
		fD3DDevice->SetRenderState( D3DRS_LIGHTING, FALSE );
		fCurrD3DLiteState = false;
	}
}

// ColorMul ///////////////////////////////////////////////////////////
// Multiply a D3D floating point color by a plasma floating point color,
// returning the result as a D3D floating point color.
static inline D3DCOLORVALUE ColorMul(const D3DCOLORVALUE& c0, const hsColorRGBA& c1)
{
	D3DCOLORVALUE ret;
	ret.r = c0.r * c1.r;
	ret.g = c0.g * c1.g;
	ret.b = c0.b * c1.b;
	ret.a = c0.a * c1.a;

	return ret;
}

//// ICalcLighting ////////////////////////////////////////////////////////////
// Kind of misnamed. Sets the D3D material lighting model based on what we're
// currently doing.
void	plDXPipeline::ICalcLighting( const plLayerInterface *currLayer, const plSpan *currSpan )
{
	D3DMATERIAL9	mat;
	static hsScalar diffScale = 1.f;
	static hsScalar ambScale = 1.f;
	UInt32			props;


	plProfile_Inc(MatLightState);

			/// New (temporary) lighting method:
			/// The vertices now include the following:
			///		diffuse = maxVertexColor * matDiffuse + matAmbient
			///		specular = ( maxLighting + maxIllum ) * matDiffuse + matAmbient
			/// And we want the lighting set up like:
			///		L = I*v1 + v2 + (sigma)(light stuff * v3 + 0)
			///	Where I = 0 for now (will be the environmental light constant eventually),
			///	v1 is the diffuse vertex color and v2 is the specular vertex color.
			/// So it basically translates into:
			///		D3D ambient color = diffuse vertex color
			///		D3D ambient constant = environmental light constant (0 for now)
			///		D3D emissive color = specular vertex color
			///		D3D diffuse color = diffuse vertex color

	/// We now provide three lighting equations at the pipeline's disposal:
	///		Material: (the one we all know and love)
	///				MATd * VTXd + MATa + <sigma of lighting w/ MATd>
	///		Vtx preshaded: (particle systems)
	///				MATa * VTXd + 0 + <sigma of lighting w/ VTXd>
	///		Vtx non-preshaded:
	///				white * VTXd + MATa + <sigma of lighting w/ VTXd>
	///	We also have a few more for shadows and such, which are handled individually

	memset( &mat, 0, sizeof( mat ) );

	/// Normal rendering--select the right lighting equation
	if (IsDebugFlagSet(plPipeDbg::kFlagAllBright))
	{
		inlEnsureLightingOn();
		mat.Diffuse.r = mat.Diffuse.g = mat.Diffuse.b = mat.Diffuse.a = 1.f;
		mat.Ambient.r = mat.Ambient.g = mat.Ambient.b = mat.Ambient.a = 1.f;
		mat.Emissive.r = mat.Emissive.g = mat.Emissive.b = mat.Emissive.a = 1.f;
		fD3DDevice->SetMaterial( &mat );
		fD3DDevice->SetRenderState( D3DRS_DIFFUSEMATERIALSOURCE, D3DMCS_MATERIAL );
		fD3DDevice->SetRenderState( D3DRS_SPECULARMATERIALSOURCE, D3DMCS_MATERIAL );
		fD3DDevice->SetRenderState( D3DRS_EMISSIVEMATERIALSOURCE, D3DMCS_MATERIAL );
		fD3DDevice->SetRenderState( D3DRS_AMBIENTMATERIALSOURCE, D3DMCS_MATERIAL );
		fD3DDevice->SetRenderState( D3DRS_AMBIENT, 0xffffffff );
		return;
	}

	props = ( currSpan != nil ) ? ( currSpan->fProps & plSpan::kLiteMask ) : plSpan::kLiteMaterial;

	if( fLayerState[0].fMiscFlags & hsGMatState::kMiscBumpChans )
	{
		props = plSpan::kLiteMaterial;
		fLayerState[0].fShadeFlags |= hsGMatState::kShadeNoShade | hsGMatState::kShadeWhite;
	}
	/// Select one of our three lighting methods
	switch( props )
	{
	case plSpan::kLiteMaterial:		// Material shading

		///		Material: (the one we all know and love)
		///				MATd * VTXd + MATa + <sigma of lighting w/ MATd>

		inlEnsureLightingOn();

		// D3D ambient - give it our material static diffuse, since it will be multiplied by the vertex color
		if( fLayerState[0].fShadeFlags & hsGMatState::kShadeWhite )
		{
			mat.Ambient.r = mat.Ambient.g = mat.Ambient.b = diffScale;
			mat.Ambient.a = 1.f;

		}
		else if (IsDebugFlagSet(plPipeDbg::kFlagNoPreShade))
		{
			mat.Ambient.r = mat.Ambient.g = mat.Ambient.b = 0;
			mat.Ambient.a = 1.f;
		}
		else
			mat.Ambient = inlPlToD3DColor(currLayer->GetPreshadeColor() * diffScale, 1.f);

		// D3D diffuse - give it our runtime material diffuse
		mat.Diffuse = inlPlToD3DColor(currLayer->GetRuntimeColor() * diffScale, currLayer->GetOpacity());

		// D3D emissive - give it our material ambient
		mat.Emissive = inlPlToD3DColor(currLayer->GetAmbientColor() * ambScale, 1.f);

		// Set specular properties
		if( fLayerState[0].fShadeFlags & hsGMatState::kShadeSpecular )
		{
			mat.Specular = inlPlToD3DColor( currLayer->GetSpecularColor(), 1.f);
			mat.Power = currLayer->GetSpecularPower();
		}

		fD3DDevice->SetMaterial( &mat );
		fD3DDevice->SetRenderState( D3DRS_DIFFUSEMATERIALSOURCE, D3DMCS_MATERIAL );
		fD3DDevice->SetRenderState( D3DRS_SPECULARMATERIALSOURCE, D3DMCS_MATERIAL );
		fD3DDevice->SetRenderState( D3DRS_EMISSIVEMATERIALSOURCE, D3DMCS_MATERIAL );

		if( fLayerState[0].fShadeFlags & hsGMatState::kShadeWhite )
			fD3DDevice->SetRenderState( D3DRS_AMBIENT, 0xffffffff );
		else
			fD3DDevice->SetRenderState( D3DRS_AMBIENT, inlGetD3DColor( *(hsColorRGBA*)&mat.Ambient ) );

		if( fLayerState[0].fShadeFlags & hsGMatState::kShadeNoShade )
			fD3DDevice->SetRenderState( D3DRS_AMBIENTMATERIALSOURCE, D3DMCS_MATERIAL );
		else
			fD3DDevice->SetRenderState( D3DRS_AMBIENTMATERIALSOURCE, D3DMCS_COLOR1 );

		fCurrLightingMethod = plSpan::kLiteMaterial;
		break;

	case plSpan::kLiteVtxPreshaded:  // Vtx preshaded
		//				MATa * VTXd  + 0     + <sigma of lighting w/ VTXd>
		// Mapping to:  GLa  * AMSrc + EMSrc + <.....................DMSrc>

#if 0 // PARTICLESHADE
		if( fLayerState[0].fShadeFlags & hsGMatState::kShadeEmissive )
		{
			inlEnsureLightingOff();
		}
		else
		{
			inlEnsureLightingOn();

			// Set a black material (we ONLY care about vertex color when doing particles,
			//						 er I mean, vtxPreshaded)
			fD3DDevice->SetMaterial( &mat );

			fD3DDevice->SetRenderState( D3DRS_DIFFUSEMATERIALSOURCE, D3DMCS_COLOR1 );
			fD3DDevice->SetRenderState( D3DRS_AMBIENTMATERIALSOURCE, D3DMCS_MATERIAL );
			fD3DDevice->SetRenderState( D3DRS_AMBIENT, 0 );

			fD3DDevice->SetRenderState( D3DRS_EMISSIVEMATERIALSOURCE, D3DMCS_MATERIAL );
			fD3DDevice->SetRenderState( D3DRS_SPECULARMATERIALSOURCE, D3DMCS_MATERIAL );
		}
#else // PARTICLESHADE
		inlEnsureLightingOn();

		// MATa * white + 0 + <sigma of lighting with VTXd>


		fD3DDevice->SetMaterial( &mat );

		fD3DDevice->SetRenderState( D3DRS_DIFFUSEMATERIALSOURCE, D3DMCS_COLOR1 );
		fD3DDevice->SetRenderState( D3DRS_AMBIENTMATERIALSOURCE, D3DMCS_MATERIAL );
		fD3DDevice->SetRenderState( D3DRS_AMBIENT, 0 );

		fD3DDevice->SetRenderState( D3DRS_SPECULARMATERIALSOURCE, D3DMCS_MATERIAL );

		if( fLayerState[0].fShadeFlags & hsGMatState::kShadeEmissive )
			fD3DDevice->SetRenderState( D3DRS_EMISSIVEMATERIALSOURCE, D3DMCS_COLOR1 );
		else
			fD3DDevice->SetRenderState( D3DRS_EMISSIVEMATERIALSOURCE, D3DMCS_MATERIAL );

#endif // PARTICLESHADE

		fCurrLightingMethod = plSpan::kLiteVtxPreshaded;
		break;


	case plSpan::kLiteVtxNonPreshaded:		// Vtx non-preshaded
		//				white * VTXd + MATa  + <sigma of lighting w/ VTXd>
		// Mapping to:  GLa  * AMSrc + EMSrc + <.....................DMSrc>

		inlEnsureLightingOn();

		// D3D emissive - give it our material ambient
		mat.Emissive = inlPlToD3DColor(currLayer->GetAmbientColor() * ambScale, 1.f);

		// Set specular properties
		if( fLayerState[0].fShadeFlags & hsGMatState::kShadeSpecular )
		{
			mat.Specular = inlPlToD3DColor( currLayer->GetSpecularColor(), 1.f);
			mat.Power = currLayer->GetSpecularPower();
		}
		fD3DDevice->SetMaterial( &mat );

		// Lightmaps want WHITE here, otherwise we want BLACK
		DWORD preShadeStrength;
		preShadeStrength = inlPlToDWORDColor(currLayer->GetPreshadeColor());
		fD3DDevice->SetRenderState(D3DRS_AMBIENT, preShadeStrength);

		fD3DDevice->SetRenderState( D3DRS_DIFFUSEMATERIALSOURCE, D3DMCS_COLOR1 );
		fD3DDevice->SetRenderState( D3DRS_AMBIENTMATERIALSOURCE, D3DMCS_COLOR1 );
		fD3DDevice->SetRenderState( D3DRS_EMISSIVEMATERIALSOURCE, D3DMCS_MATERIAL );
		fD3DDevice->SetRenderState( D3DRS_SPECULARMATERIALSOURCE, D3DMCS_MATERIAL );

		fCurrLightingMethod = plSpan::kLiteVtxNonPreshaded;
		break;

	default:
		hsAssert( false, "Bad lighting type" );
		break;
	}

}

///////////////////////////////////////////////////////////////////////////////
//// plDXLightSettings Functions /////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

plDXLightSettings::plDXLightSettings()
:	fActiveList(nil),
	fRefList(nil),
	fPipeline(nil)
{
}

//// Reset ////////////////////////////////////////////////////////////////////
//	Sets member variables to initial states.

void	plDXLightSettings::Reset( plDXPipeline *pipe )
{
	Release();

	fNextShadowLight = 0;

	fUsedFlags.Clear();
	fEnabledFlags.Clear();
	fHoldFlags.Clear();
	fProjEach.Reset();
	fProjAll.Reset();
	fNextIndex = 1;		/// Light 0 is reserved
	fLastIndex = 1;
	fTime = 0;
	fRefList = nil;
	fPipeline = pipe;
}

//// Release //////////////////////////////////////////////////////////////////
//	Releases/deletes anything associated with these settings.
// This includes unregistering all lights.
void	plDXLightSettings::Release()
{
	plDXLightRef	*ref;

	fProjEach.Reset();
	fProjAll.Reset();

	while( fRefList )
	{
		ref = fRefList;
		ref->Release();
		ref->Unlink();
	}

	// Tell the light infos to unlink themselves
	while( fActiveList )
		fPipeline->UnRegisterLight( fActiveList );

	fShadowLights.SetCount(fShadowLights.GetNumAlloc());
	int i;
	for( i = 0; i < fShadowLights.GetCount(); i++ )
	{
		hsRefCnt_SafeUnRef(fShadowLights[i]);
		fShadowLights[i] = nil;
	}
	fShadowLights.SetCount(0);

}

//// ReserveD3DIndex //////////////////////////////////////////////////////////
//	Reserve a D3D light index.

UInt32	plDXLightSettings::ReserveD3DIndex()
{
	for( ; fNextIndex < (UInt32)-1; fNextIndex++ )
	{
		if( !fUsedFlags.IsBitSet( fNextIndex ) )
			break;
	}

	fUsedFlags.SetBit( fNextIndex );
	fEnabledFlags.ClearBit( fNextIndex );	// Ensure it's cleared
	fHoldFlags.ClearBit( fNextIndex );
	if( fNextIndex > fLastIndex )
		fLastIndex = fNextIndex;

	return fNextIndex;
}

//// ReleaseD3DIndex //////////////////////////////////////////////////////////
//	Release a reserved D3D light index to be reused.

void	plDXLightSettings::ReleaseD3DIndex( UInt32 idx )
{
	fUsedFlags.SetBit( idx, false );
	if( fNextIndex > idx )
		fNextIndex = idx;		// Forces search to start here next time

	// Dec down fLastIndex
	while( fLastIndex > 0 && !fUsedFlags.IsBitSet( fLastIndex ) )
		fLastIndex--;

	if( fNextIndex > fLastIndex )
		fNextIndex = fLastIndex;
}


///////////////////////////////////////////////////////////////////////////////
//// Materials ////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

//// ISetLayer ////////////////////////////////////////////////////////////////
// Sets whether we're rendering a base layer or upper layer. Upper layer has
// a Z bias to avoid Z fighting.
void	plDXPipeline::ISetLayer( UInt32 lay )
{
	if( lay )
	{
		if( fCurrRenderLayer != lay )
		{
			fCurrRenderLayer = lay;

			plCONST(int) kBiasMult = 8;
			if( !( fSettings.fD3DCaps & kCapsZBias ) )
				IProjectionMatrixToD3D();
			else
				fD3DDevice->SetRenderState( D3DRS_DEPTHBIAS, kBiasMult * fCurrRenderLayer );
		}
	}
	else
		IBottomLayer();
}

//// IBottomLayer /////////////////////////////////////////////////////////////
// Turn off any Z bias.
void	plDXPipeline::IBottomLayer()
{
	if( fCurrRenderLayer != 0 )
	{
		fCurrRenderLayer = 0;
		if( !( fSettings.fD3DCaps & kCapsZBias ) )
			IProjectionMatrixToD3D();
		else
			fD3DDevice->SetRenderState( D3DRS_DEPTHBIAS, 0 );
	}
}

// Special effects /////////////////////////////////////////////////////////////

// IPushOverBaseLayer /////////////////////////////////////////////////////////
// Sets fOverBaseLayer (if any) as a wrapper on top of input layer.
// This allows the OverBaseLayer to intercept and modify queries of
// the real current layer's properties (e.g. color or state).
// fOverBaseLayer is set to only get applied to the base layer during
// multitexturing.
// Must be matched with call to IPopOverBaseLayer.
plLayerInterface* plDXPipeline::IPushOverBaseLayer(plLayerInterface* li)
{
	if( !li )
		return nil;

	fOverLayerStack.Push(li);

	if( !fOverBaseLayer )
		return fOverBaseLayer = li;

	fForceMatHandle = true;
	fOverBaseLayer = fOverBaseLayer->Attach(li);
	fOverBaseLayer->Eval(fTime, fFrame, 0);
	return fOverBaseLayer;
}

// IPopOverBaseLayer /////////////////////////////////////////////////////////
// Removes fOverBaseLayer as wrapper on top of input layer.
// Should match calls to IPushOverBaseLayer.
plLayerInterface* plDXPipeline::IPopOverBaseLayer(plLayerInterface* li)
{
	if( !li )
		return nil;

	fForceMatHandle = true;

	plLayerInterface* pop = fOverLayerStack.Pop();
	fOverBaseLayer = fOverBaseLayer->Detach(pop);

	return pop;
}

// IPushOverAllLayer ///////////////////////////////////////////////////
// Push fOverAllLayer (if any) as wrapper around the input layer.
// fOverAllLayer is set to be applied to each layer during multitexturing.
// Must be matched by call to IPopOverAllLayer
plLayerInterface* plDXPipeline::IPushOverAllLayer(plLayerInterface* li)
{
	if( !li )
		return nil;

	fOverLayerStack.Push(li);

	if( !fOverAllLayer )
	{
		fOverAllLayer = li;
		fOverAllLayer->Eval(fTime, fFrame, 0);
		return fOverAllLayer;
	}

	fForceMatHandle = true;
	fOverAllLayer = fOverAllLayer->Attach(li);
	fOverAllLayer->Eval(fTime, fFrame, 0);

	return fOverAllLayer;
}

// IPopOverAllLayer //////////////////////////////////////////////////
// Remove fOverAllLayer as wrapper on top of input layer.
// Should match calls to IPushOverAllLayer.
plLayerInterface* plDXPipeline::IPopOverAllLayer(plLayerInterface* li)
{
	if( !li )
		return nil;

	fForceMatHandle = true;

	plLayerInterface* pop = fOverLayerStack.Pop();
	fOverAllLayer = fOverAllLayer->Detach(pop);

	return pop;
}

// PiggyBacks - used in techniques like projective lighting.
// PiggyBacks are layers appended to each drawprimitive pass.
// For example, if a material has 3 layers which will be drawn
// in 2 passes,
//		pass0: layer0+layer1
//		pass1: layer2
// Then if a piggyback layer layerPB is active, the actual rendering would be
//		pass0: layer0+layer1+layerPB
//		pass1: layer2 + layerPB

// ISetNumActivePiggyBacks /////////////////////////////////////////////
// Calculate the number of active piggy backs.
int plDXPipeline::ISetNumActivePiggyBacks()
{
	return fActivePiggyBacks = hsMinimum(fSettings.fMaxPiggyBacks, fPiggyBackStack.GetCount());
}

// IPushProjPiggyBack //////////////////////////////////////////////////
// Push a projected texture on as a piggy back.
void plDXPipeline::IPushProjPiggyBack(plLayerInterface* li)
{
	if( fView.fRenderState & plPipeline::kRenderNoPiggyBacks )
		return;

	fPiggyBackStack.Push(li);
	fActivePiggyBacks = fPiggyBackStack.GetCount() - fMatPiggyBacks;
	fForceMatHandle = true;
}

// IPopProjPiggyBacks /////////////////////////////////////////////////
// Remove a projected texture from use as a piggy back.
void plDXPipeline::IPopProjPiggyBacks()
{
	if( fView.fRenderState & plPipeline::kRenderNoPiggyBacks )
		return;

	fPiggyBackStack.SetCount(fMatPiggyBacks);
	ISetNumActivePiggyBacks();
	fForceMatHandle = true;
}

// IPushPiggyBacks ////////////////////////////////////////////////////
// Push any piggy backs associated with a material, presumed to
// be a light map because that's all they are used for.
// Matched with IPopPiggyBacks
void plDXPipeline::IPushPiggyBacks(hsGMaterial* mat)
{
	hsAssert(!fMatPiggyBacks, "Push/Pop Piggy mismatch");

	if( fView.fRenderState & plPipeline::kRenderNoPiggyBacks )
		return;

	int i;
	for( i = 0; i < mat->GetNumPiggyBacks(); i++ )
	{
		if( !mat->GetPiggyBack(i) )
			continue;

		if ((mat->GetPiggyBack(i)->GetMiscFlags() & hsGMatState::kMiscLightMap)
			&& IsDebugFlagSet(plPipeDbg::kFlagNoLightmaps))
			continue;

		fPiggyBackStack.Push(mat->GetPiggyBack(i));
		fMatPiggyBacks++;
	}
	ISetNumActivePiggyBacks();
	fForceMatHandle = true;
}

// IPopPiggyBacks ///////////////////////////////////////////////////////
// Pop any current piggy backs set from IPushPiggyBacks.
// Matches IPushPiggyBacks.
void plDXPipeline::IPopPiggyBacks()
{
	if( fView.fRenderState & plPipeline::kRenderNoPiggyBacks )
		return;

	fPiggyBackStack.SetCount(fPiggyBackStack.GetCount() - fMatPiggyBacks);
	fMatPiggyBacks = 0;

	ISetNumActivePiggyBacks();
	fForceMatHandle = true;
}

//// IHandleMaterial //////////////////////////////////////////////////////////
//	Takes the starting "layer" and uses as many layers as possible in the given
//	material and sets up the device to draw with it. Returns the first layer
//	index not yet used. (I.e. if we ate layers 0 and 1, it'll return 2).
// A return value of -1 means don't bother rendering.

Int32	plDXPipeline::IHandleMaterial( hsGMaterial *newMat, UInt32 layer, const plSpan *currSpan )
{
	// No material means no draw.
	if( !newMat && newMat->GetLayer(layer) )
		return -1;

	// If this is a bump mapping pass but the object isn't currently runtime lit, just skip.
	// Note that <layer> may change here, if we're skipping past the bump layers but there
	// are more layers (passes) to do after that.
	if( ISkipBumpMap(newMat, layer, currSpan) )
	{
		return -1;
	}

	// Workaround for the ATI Radeon 7500's inability to use uvw coordinates above 1.
	// If we have a layer trying to use uvw 2 or higher, skip it and any layers bound to
	// it.
	while( (layer < newMat->GetNumLayers())
		&& newMat->GetLayer(layer)
		&& ((newMat->GetLayer(layer)->GetUVWSrc() & 0xf) > fSettings.fMaxUVWSrc) )
	{
		if( newMat->GetLayer(layer)->GetMiscFlags() & hsGMatState::kMiscBindNext )
			layer++;
		layer++;
	}
	if( layer >= newMat->GetNumLayers() )
		return -1;

	// If nothing has changed, we don't need to recompute and set state.
	if( !fForceMatHandle && (newMat == fCurrMaterial && layer == fCurrLayerIdx) )
	{
		// Before returning, check if we have to redo our lighting
		UInt32		lightType = ( currSpan != nil ) ? ( currSpan->fProps & plSpan::kLiteMask ) : plSpan::kLiteMaterial;
		if( lightType != fCurrLightingMethod )
			ICalcLighting( fCurrLay, currSpan );

		if( fLayerState[0].fMiscFlags & (hsGMatState::kMiscBumpDu|hsGMatState::kMiscBumpDw) )
			ISetBumpMatrices(fCurrLay, currSpan);

		return layer + fCurrNumLayers;
	}

	fForceMatHandle = false;

	fCurrLayerIdx = layer;
//	fCurrNumLayers = newMat->GetNumLayers();

	if (newMat != fCurrMaterial)
		plProfile_Inc(MatChange);
	plProfile_Inc(LayChange);

	/// Test for fail states
	if (IsDebugFlagSet(plPipeDbg::kFlagNoDecals) && (newMat->GetCompositeFlags() & hsGMaterial::kCompDecal))
	{
		return -1;
	}

	/// Workaround for a D3D limitation--you're not allowed to render with a texture that you're
	/// rendering INTO. Hence we can't have self-reflecting cubicRenderTargets (damn)
	if( fSettings.fCurrBaseRenderTarget != nil &&
		newMat->GetLayer( layer )->GetTexture() == plBitmap::ConvertNoRef( fSettings.fCurrBaseRenderTarget ) )
	{
		return -1;
	}

	/// Figure out our current states
	// Start with the base layer.
	plLayerInterface	*currLay = IPushOverBaseLayer(newMat->GetLayer(layer));

	if (IsDebugFlagSet(plPipeDbg::kFlagBumpW) && (currLay->GetMiscFlags() & hsGMatState::kMiscBumpDu) )
		currLay = newMat->GetLayer(fCurrLayerIdx = ++layer);

	currLay = IPushOverAllLayer(currLay);

	/// Save stuff for next time around
	ICompositeLayerState(0, currLay);
	hsRefCnt_SafeAssign( fCurrMaterial, newMat );
	fCurrLayerIdx = layer;
	fCurrLay = currLay;

	if (IsDebugFlagSet(plPipeDbg::kFlagDisableSpecular))
		fLayerState[0].fShadeFlags &= ~hsGMatState::kShadeSpecular;

	// ZIncLayer requests Z bias for upper layers.
	if( fLayerState[0].fZFlags & hsGMatState::kZIncLayer )
		ISetLayer( 1 );
	else
		IBottomLayer();

	/// A few debugging things
	if (IsDebugFlagSet(plPipeDbg::kFlagNoAlphaBlending))
		fLayerState[0].fBlendFlags &= ~hsGMatState::kBlendMask;

	if ((IsDebugFlagSet(plPipeDbg::kFlagBumpUV) || IsDebugFlagSet(plPipeDbg::kFlagBumpW)) && (fLayerState[0].fMiscFlags & hsGMatState::kMiscBumpChans) )
	{
		switch( fLayerState[0].fMiscFlags & hsGMatState::kMiscBumpChans )
		{
		case hsGMatState::kMiscBumpDu:
			break;
		case hsGMatState::kMiscBumpDv:
			if( !(fCurrMaterial->GetLayer(layer-2)->GetBlendFlags() & hsGMatState::kBlendAdd) )
			{
				fLayerState[0].fBlendFlags &= ~hsGMatState::kBlendMask;
				fLayerState[0].fBlendFlags |= hsGMatState::kBlendMADD;
			}
			break;
		case hsGMatState::kMiscBumpDw:
			if( !(fCurrMaterial->GetLayer(layer-1)->GetBlendFlags() & hsGMatState::kBlendAdd) )
			{
				fLayerState[0].fBlendFlags &= ~hsGMatState::kBlendMask;
				fLayerState[0].fBlendFlags |= hsGMatState::kBlendMADD;
			}
			break;
		default:
			break;
		}
	}

	/// Get the # of layers we can draw in this pass into fCurrNumLayers
	int oldNumLayers = fCurrNumLayers;
	ILayersAtOnce( newMat, layer );
	if( oldNumLayers != fCurrNumLayers )
	{
		// This hack is necessary to cover a hack necessary to cover a "limitation" in the GeForce2 drivers.
		// Basically, we have to handle NoTexAlpha/Color differently if it's stage 1 than other stages,
		// so even though the BlendFlags haven't changed, the calls to D3D are different. Another
		// way to handle this would be to have a different handler based on whether we are 2 TMU limited
		// or not, but whatever.
		if( fLayerState[1].fBlendFlags & (hsGMatState::kBlendNoTexAlpha | hsGMatState::kBlendNoTexColor) )
			fLayerState[1].fBlendFlags = UInt32(-1);
	}

	// Placed here, since it's material-dependent (or more accurately, current-layer-dependent)
	ICalcLighting( currLay, currSpan );

	// If we're bump mapping, compute the texture transforms.
	if( fLayerState[0].fMiscFlags & (hsGMatState::kMiscBumpDu|hsGMatState::kMiscBumpDw) )
		ISetBumpMatrices(currLay, currSpan);

	/// Transfer states to D3D now
	IHandleFirstTextureStage( currLay );

	currLay = IPopOverAllLayer(currLay);
	currLay = IPopOverBaseLayer(currLay);
	fCurrLay = currLay;

	int nextLayer = fCurrLayerIdx + fCurrNumLayers;
	if (IsDebugFlagSet(plPipeDbg::kFlagBumpW) && (fLayerState[0].fMiscFlags & hsGMatState::kMiscBumpDw) )
	{
		// Bump mapping approximation using only the W (normal direction) component of lighting.
		plLayerInterface* layPtr = IPushOverAllLayer(newMat->GetLayer(fCurrLayerIdx + 2));
		if( !layPtr )
			return -1;
		ICompositeLayerState(1, layPtr);
		IHandleTextureStage( 1, layPtr );
		layPtr = IPopOverAllLayer(layPtr);
		nextLayer = fCurrLayerIdx + 3;
	}
	else if (IsDebugFlagSet(plPipeDbg::kFlagBumpUV) && (fLayerState[0].fMiscFlags & hsGMatState::kMiscBumpDu) )
	{
		// Bump mapping approximation using only the UV (surface tangent directions) component of lighting.
		plLayerInterface* layPtr = IPushOverAllLayer(newMat->GetLayer(fCurrLayerIdx + 3));
		if( !layPtr )
			return -1;
		ICompositeLayerState(1, layPtr);
		IHandleTextureStage( 1, layPtr );
		layPtr = IPopOverAllLayer(layPtr);
		nextLayer = fCurrLayerIdx + 2;
	}
	else
	{
		// Normal multi texturing.
		/// Loop through all multitexturing layers
		int	i;
		if( fView.fRenderState & plPipeline::kRenderBaseLayerOnly )
			nextLayer = newMat->GetNumLayers();

		for( i = 1; i < fCurrNumLayers; i++ )
		{
			plLayerInterface* layPtr = newMat->GetLayer( fCurrLayerIdx + i );
			if( !layPtr )
				return -1;

			// Can't render into a render target using same rendertarget as a texture.
			if( fSettings.fCurrBaseRenderTarget
				&&
				layPtr->GetTexture() == (plBitmap*)(fSettings.fCurrBaseRenderTarget) )
			{
				// Oops, just bail
				return -1;
			}

			layPtr = IPushOverAllLayer(layPtr);
			ICompositeLayerState(i, layPtr);
			IHandleTextureStage( i, layPtr );
			layPtr = IPopOverAllLayer(layPtr);
		}

	}

	// More cleanup for the DX9.0c 2 texture limitation. See ILayersAtOnce()
	if (fSettings.fMaxLayersAtOnce == 2)
	{
		if ((fLayerState[0].fBlendFlags & hsGMatState::kBlendAdd)
			&& (newMat->GetNumLayers() > fCurrLayerIdx + 1)
			&& (newMat->GetLayer(fCurrLayerIdx + 1)->GetUVWSrc() & plLayerInterface::kUVWPosition))
		{
			// If we're doing additive blending and the next layer is based on position,
			// it's probably a distance fade. We'd rather have our diffuse color.
			// ILayersAtOnce will already have told us we can't use it this pass.
			// Skip it so it won't draw on its own next pass.
			nextLayer++;
		}
	}

	int numActivePiggyBacks = 0;
	if( !(fLayerState[0].fMiscFlags & hsGMatState::kMiscBumpChans) && !(fLayerState[0].fShadeFlags & hsGMatState::kShadeEmissive) )
	{
		/// Tack lightmap onto last stage if we have one
		numActivePiggyBacks = fActivePiggyBacks;
		if( numActivePiggyBacks > fSettings.fMaxLayersAtOnce - fCurrNumLayers )
			numActivePiggyBacks = fSettings.fMaxLayersAtOnce - fCurrNumLayers;
		if( numActivePiggyBacks )
		{
			int i;
			for( i = 0; i < numActivePiggyBacks; i++ )
			{
				// Note that we take piggybacks off the end of fPiggyBackStack.
				plLayerInterface* layPtr = IPushOverAllLayer( fPiggyBackStack[fPiggyBackStack.GetCount()-1-i] );
				if( !layPtr )
					return -1;
				ICompositeLayerState(fCurrNumLayers+i, layPtr);
				IHandleTextureStage( fCurrNumLayers+i, layPtr );
				layPtr = IPopOverAllLayer(layPtr);
			}

			// If we've got a piggyback, plus two layers that must be drawn together, but
			// only two TMU's to work with, we're screwed. Someone has got to get skipped and
			// hope no one notices. Typically, the first (base) layer has the color info,
			// and the second the opacity. So we'll try using the projection to brighten
			// the color, ignoring the opacity.
//			if( ((fCurrNumLayers + numActivePiggyBacks) == fSettings.fMaxLayersAtOnce)
//					&& (fLayerState[0].fMiscFlags & hsGMatState::kMiscBindNext) )
			if( (fLayerState[0].fMiscFlags & hsGMatState::kMiscBindNext)
				&& (fCurrNumLayers < 2) )
				nextLayer++;
		}
	}

	// Declare we won't be using any more texture stages.
	IStageStop( fCurrNumLayers + numActivePiggyBacks );

	return nextLayer;
}

// ICompositeLayerState /////////////////////////////////////////////////////////////////
// Set the current Plasma state based on the input layer state and the material overrides.
// fMatOverOn overrides to set a state bit whether it is set in the layer or not.
// fMatOverOff overrides to clear a state bit whether it is set in the layer or not.
const hsGMatState& plDXPipeline::ICompositeLayerState(int which, plLayerInterface* layer)
{
	fOldLayerState[which] = fLayerState[which];
	fLayerState[which].Composite(layer->GetState(), fMatOverOn, fMatOverOff);
	if( fOldLayerState[which].fBlendFlags == UInt32(-1) )
		fOldLayerState[which].fBlendFlags = ~fLayerState[which].fBlendFlags;

	return fLayerState[which];
}

//// IHandleFirstTextureStage /////////////////////////////////////////////////
// Convert internal material state to D3D state for the base layer.
void	plDXPipeline::IHandleFirstTextureStage( plLayerInterface *layer )
{
	IHandleTextureMode(layer);
	IHandleShadeMode();
	if( fLayerState[0].Differs( fLayerState[0].fZFlags, fOldLayerState[0].fZFlags, hsGMatState::kZMask ) )
		IHandleZMode();
	IHandleMiscMode();

	IHandleTextureStage( 0, layer );
}

//// IHandleShadeMode /////////////////////////////////////////////////////////
// Convert shade state into D3D settings.
void	plDXPipeline::IHandleShadeMode()
{
	if( fLayerState[0].Differs( fLayerState[0].fShadeFlags, fOldLayerState[0].fShadeFlags, hsGMatState::kShadeSpecular ) )
	{
		if( fLayerState[0].fShadeFlags & hsGMatState::kShadeSpecular )
			fD3DDevice->SetRenderState( D3DRS_SPECULARENABLE, TRUE );
		else
			fD3DDevice->SetRenderState( D3DRS_SPECULARENABLE, FALSE );
	}
}

//// IHandleZMode /////////////////////////////////////////////////////////////
// Convert Z state into D3D settings.
void	plDXPipeline::IHandleZMode()
{
	switch( fLayerState[0].fZFlags & hsGMatState::kZMask )
	{
		case hsGMatState::kZClearZ:
			fD3DDevice->SetRenderState( D3DRS_ZFUNC,        D3DCMP_ALWAYS );
			fD3DDevice->SetRenderState( D3DRS_ZWRITEENABLE, TRUE );
			break;
		case hsGMatState::kZNoZRead:
			fD3DDevice->SetRenderState( D3DRS_ZFUNC,        D3DCMP_ALWAYS );
			fD3DDevice->SetRenderState( D3DRS_ZWRITEENABLE, TRUE );
			break;
		case hsGMatState::kZNoZWrite:
			fD3DDevice->SetRenderState( D3DRS_ZFUNC,        D3DCMP_LESSEQUAL );
			fD3DDevice->SetRenderState( D3DRS_ZWRITEENABLE, FALSE );
			break;
		case hsGMatState::kZNoZRead | hsGMatState::kZClearZ:
			fD3DDevice->SetRenderState( D3DRS_ZFUNC,        D3DCMP_ALWAYS );
			fD3DDevice->SetRenderState( D3DRS_ZWRITEENABLE, TRUE );
			break;
		case hsGMatState::kZNoZRead | hsGMatState::kZNoZWrite:
			fD3DDevice->SetRenderState( D3DRS_ZWRITEENABLE, FALSE );
			fD3DDevice->SetRenderState( D3DRS_ZFUNC,        D3DCMP_ALWAYS );
			break;
		case 0:
			fD3DDevice->SetRenderState( D3DRS_ZFUNC,        D3DCMP_LESSEQUAL );
			fD3DDevice->SetRenderState( D3DRS_ZWRITEENABLE, TRUE );
			break;

		// illegal combinations
		case hsGMatState::kZClearZ | hsGMatState::kZNoZWrite:
		case hsGMatState::kZClearZ | hsGMatState::kZNoZWrite | hsGMatState::kZNoZRead:
			hsAssert(false, "Illegal combination of Z Buffer modes (Clear but don't write)");
			break;
	}
}

//// IHandleMiscMode //////////////////////////////////////////////////////////
// Convert Misc state into D3D settings.
void	plDXPipeline::IHandleMiscMode()
{
	if( fLayerState[0].Differs(fLayerState[0].fMiscFlags, fOldLayerState[0].fMiscFlags, hsGMatState::kMiscWireFrame) )
	{
		if( fLayerState[0].fMiscFlags & hsGMatState::kMiscWireFrame )
			fD3DDevice->SetRenderState( D3DRS_FILLMODE, D3DFILL_WIREFRAME );
		else
			fD3DDevice->SetRenderState( D3DRS_FILLMODE, D3DFILL_SOLID );
	}
}

//// IHandleTextureStage //////////////////////////////////////////////////////
// Issue D3D calls to enable rendering the given layer at the given texture stage.
void	plDXPipeline::IHandleTextureStage( UInt32 stage, plLayerInterface *layer )
{
	hsGDeviceRef		*ref = nil;
	plBitmap			*texture;

	// Blend mode
	const hsGMatState& layState = fLayerState[stage];
	if( fLayerState[ stage ].fBlendFlags ^ fOldLayerState[stage].fBlendFlags )
		IHandleStageBlend(stage);

	// Texture wrap/clamp mode
	if( fLayerState[ stage ].fClampFlags ^ fOldLayerState[stage].fClampFlags )
		IHandleStageClamp(stage);

	// UVW transform
	IHandleStageTransform( stage, layer );

	// Create the D3D texture (if necessary) and set it to the device.
	if( ( texture = layer->GetTexture() ) != nil )
	{
		ref = texture->GetDeviceRef();
		if( ref == nil || ref->IsDirty() )
		{
			// Normal textures
			plMipmap			*mip;
			plCubicEnvironmap	*cubic;

			if( ( mip = plMipmap::ConvertNoRef( texture ) ) != nil )
				ref = MakeTextureRef( layer, mip );

			// Cubic environment maps
			else if( ( cubic = plCubicEnvironmap::ConvertNoRef( texture ) ) != nil )
				ref = IMakeCubicTextureRef( layer, cubic );
		}
	}

	if( ref != nil )
		IUseTextureRef(stage, ref, layer);
	else
	{
		fD3DDevice->SetTexture( stage, NULL );
		hsRefCnt_SafeUnRef( fLayerRef[ stage ] );
		fLayerRef[ stage ] = nil;
	}
}

// CheckTextureRef //////////////////////////////////////////////////////
// Make sure the given layer's texture has background D3D resources allocated.
void plDXPipeline::CheckTextureRef(plLayerInterface* layer)
{
	plBitmap* bitmap = layer->GetTexture();
	if( bitmap )
	{
		hsGDeviceRef* ref = bitmap->GetDeviceRef();

		if( !ref )
		{
			plMipmap* mip = plMipmap::ConvertNoRef(bitmap);
			if( mip )
			{
				MakeTextureRef(layer, mip);
				return;
			}

			plCubicEnvironmap* cubic = plCubicEnvironmap::ConvertNoRef(bitmap);
			if( cubic )
			{
				IMakeCubicTextureRef(layer, cubic);
				return;
			}
		}
	}
}

// IHandleBumpEnv //////////////////////////////////////////////////////////////
// D3D settings for BUMPENVMAPLUMINANCE.
// This has never been used in production assets, because I never got
// a good effect out of it, and BUMPENVMAPLUMINANCE isn't universally
// supported in hardware.
void plDXPipeline::IHandleBumpEnv(int stage, UInt32 blendFlags)
{
	DWORD current = stage ? D3DTA_CURRENT : D3DTA_DIFFUSE;
	UInt32 colorSrc = blendFlags & hsGMatState::kBlendInvertColor ? D3DTA_TEXTURE | D3DTA_COMPLEMENT : D3DTA_TEXTURE;

	fD3DDevice->SetTextureStageState(stage, D3DTSS_COLOROP, D3DTOP_BUMPENVMAPLUMINANCE);
	fD3DDevice->SetTextureStageState(stage, D3DTSS_COLORARG1, colorSrc);
	fD3DDevice->SetTextureStageState(stage, D3DTSS_COLORARG2, current);

	fD3DDevice->SetTextureStageState(stage, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2);
	fD3DDevice->SetTextureStageState(stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT);

	const hsMatrix44& envXfm = fCurrLay->GetBumpEnvMatrix();
	fD3DDevice->SetTextureStageState(stage, D3DTSS_BUMPENVMAT00, F2DW(envXfm.fMap[0][0]));
	fD3DDevice->SetTextureStageState(stage, D3DTSS_BUMPENVMAT01, F2DW(envXfm.fMap[1][0]));
	fD3DDevice->SetTextureStageState(stage, D3DTSS_BUMPENVMAT10, F2DW(envXfm.fMap[0][1]));
	fD3DDevice->SetTextureStageState(stage, D3DTSS_BUMPENVMAT11, F2DW(envXfm.fMap[1][1]));

	fD3DDevice->SetTextureStageState(stage, D3DTSS_BUMPENVLSCALE, F2DW(envXfm.fMap[2][2]));
    fD3DDevice->SetTextureStageState(stage, D3DTSS_BUMPENVLOFFSET, F2DW(envXfm.fMap[2][3]));
}

//// IHandleStageBlend ////////////////////////////////////////////////////////
// Translate current blend state for this stage into D3D settings.
void	plDXPipeline::IHandleStageBlend(int stage)
{
	const UInt32 blendFlags = fLayerState[stage].fBlendFlags;
	// If it's the base layer, handle that differently, because it's not really
	// texture stage settings, but frame buffer blend settings.
	if( stage == 0 )
	{
		IHandleFirstStageBlend();
		return;
	}

	UInt32 colorSrc = D3DTA_TEXTURE;
	if( blendFlags & hsGMatState::kBlendInvertColor )
		colorSrc |= D3DTA_COMPLEMENT ;
	// kBlendEnvBumpNext not really used.
	if( blendFlags & hsGMatState::kBlendEnvBumpNext )
	{
		IHandleBumpEnv(stage, blendFlags);
	}
	else switch( blendFlags & hsGMatState::kBlendMask )
	{
		// Alpha blending. Complicated by the ability to ignore either
		// color or alpha for any given texture. The lower end GeForces
		// don't orthogonally support settings, especially when the final
		// (3rd) stage is the diffuse color/alpha modulate and the board
		// really only wants to support 2 stages.
		// So we couldn't just translate our internal plasma stage states
		// into D3D states, we had to do some rearranging.
		// Note that by the time we get here, we _know_ that this isn't the
		// base layer (stage 0), because that's handled elsewhere.
		case hsGMatState::kBlendAlpha:
			// If the current number of layers is 2, then we've already handled the
			// base layer, so this must be layer 1 and the final layer.
			// If the base layer has NoTexColor or this layer has NoTexColor, we need
			// to do some rearranging.
			if( (fCurrNumLayers == 2)
				&&((blendFlags | fLayerState[0].fBlendFlags) & hsGMatState::kBlendNoTexColor) )
			{
				// If this layer AND base layer are NoTexColor, then we just want the diffuse color.
				if( (blendFlags & hsGMatState::kBlendNoTexColor)
					&&(fLayerState[0].fBlendFlags & hsGMatState::kBlendNoTexColor) )
				{
					// select diffuse color
					fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_DIFFUSE );
					fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP, D3DTOP_SELECTARG2 );
				}
				// If the base layer has NoTexColor but this layer doesn't, then we
				// want the output to be this texture color times diffuse (ignoring base texture color).
				else if( fLayerState[0].fBlendFlags & hsGMatState::kBlendNoTexColor )
				{
					// diffuse is arg2, modulate
					fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG1, colorSrc );
					fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_DIFFUSE );
					fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP, D3DTOP_MODULATE );
				}
				// If base layer doesn't have NoTexColor, but this layer does, then
				// we want the output to be diffuse times base texture, which is in current.
				else if( blendFlags & hsGMatState::kBlendNoTexColor )
				{
					// diffuse is arg1, modulate
					fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG1, D3DTA_DIFFUSE );
					fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_CURRENT );
					fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP, D3DTOP_MODULATE );
				}

			}
			// If we get here and this layer has NoTexColor, then we MUST be on a layer
			// above 1, which means we're on an advanced enough board to handle this orthogonally,
			// i.e. one with more than 2 texture stages.
			else if( blendFlags & hsGMatState::kBlendNoTexColor )
			{
				fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_CURRENT );
				fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP, D3DTOP_SELECTARG2 );
			}
			// Finally, no NoTexColor in sight, just set it.
			else
			{
				fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG1, colorSrc );
				fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_CURRENT );
				fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP,
												blendFlags & hsGMatState::kBlendInvertAlpha
													? D3DTOP_MODULATEINVALPHA_ADDCOLOR
													: D3DTOP_BLENDTEXTUREALPHA );
			}
			// The same ordeal for alpha, and the ability to ignore the alpha on any texture.
			// Note the additional logic for how to combine the alphas of multiple textures
			// into a final FB alpha.
			// This is orthogonal to using the alpha to combine colors of two different textures.
			// The default behavior is to use the upper texture alpha to blend the upper layer color
			// with the lower texture color, but retain the lower texture alpha (modulated by diffuse)
			// for the frame buffer alpha.
			switch( blendFlags & ( hsGMatState::kBlendAlphaAdd | hsGMatState::kBlendAlphaMult ) )
			{
				default:
				case 0:
					// Using alpha to blend textures, but this layer's alpha doesn't affect final FB
					// alpha.
					// Two layer setup with one or the other (or both) ignoring alpha.
					if( (fCurrNumLayers == 2)
						&&((blendFlags | fLayerState[0].fBlendFlags) & hsGMatState::kBlendNoTexAlpha) )
					{
						// Both ignoring alpha, use diffuse.
						if( (blendFlags & hsGMatState::kBlendNoTexAlpha)
							&&(fLayerState[0].fBlendFlags & hsGMatState::kBlendNoTexAlpha) )
						{
							// select diffuse alpha
							fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_DIFFUSE );
							fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP, D3DTOP_SELECTARG2 );
						}
						// Base ignoring alpha, use diffuse times this texure alpha.
						else if( fLayerState[0].fBlendFlags & hsGMatState::kBlendNoTexAlpha )
						{
							// diffuse is arg2, modulate
							fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_MODULATE );
							fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG1,
															blendFlags & hsGMatState::kBlendInvertAlpha
																? D3DTA_TEXTURE | D3DTA_COMPLEMENT
																: D3DTA_TEXTURE);
							fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_DIFFUSE );
						}
						// This ignoring alpha, use diffuse times base alpha (in current).
						else if( blendFlags & hsGMatState::kBlendNoTexAlpha )
						{
							// diffuse is arg1, modulate
							fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_MODULATE );
							fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG1, D3DTA_DIFFUSE );
							fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT );
						}
					}
					// Ignoring alpha or not, with more than 2 texture stages,
					// Either way, we'll ignore this texture's alpha, because it's an upper layer
					// and has already been used (if it's going to get used) to blend this texture's
					// color with the lower layers.
					else
					{
						fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2 );
						fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT );
					}
					break;
					// Alpha coming out of this stage is lower stage alpha plus this texture alpha.
				case hsGMatState::kBlendAlphaAdd:
					fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_ADD );
					fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG1,
													blendFlags & hsGMatState::kBlendInvertAlpha
														? D3DTA_TEXTURE | D3DTA_COMPLEMENT
														: D3DTA_TEXTURE);
					fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT );
					break;
					// Alpha coming out of this stage is lower stage alpha times this texture alpha.
				case hsGMatState::kBlendAlphaMult:
					fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_MODULATE );
					fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG1,
													blendFlags & hsGMatState::kBlendInvertAlpha
														? D3DTA_TEXTURE | D3DTA_COMPLEMENT
														: D3DTA_TEXTURE);
					fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT );
					break;
			}
			break;

			// Add texture colors, pass through current alpha.
		case hsGMatState::kBlendAdd:
			fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG1, colorSrc );
			fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_CURRENT );
			fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP,   D3DTOP_ADD );

			fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2 );
			fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT );
			break;

			// Multiply texture colors, pass through current alpha
		case hsGMatState::kBlendMult:
			fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG1, colorSrc );
			fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_CURRENT );
			fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP,   D3DTOP_MODULATE );

			fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2 );
			fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT );

			if (fSettings.fMaxLayersAtOnce == 2 && stage == 1)
			{
				// On these boards, the only way we can do 2 textures plus diffuse is to
				// multiply it in during stage 0, but that only gives the same result
				// when doing a mult blend, which we won't know when setting up stage 0.
				// Now that we know, adjust stage 0 settings.
				fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG2, D3DTA_DIFFUSE);
				fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP, D3DTOP_MODULATE);
			}
			break;

			// Dot3 texture colors, pass through current alpha.
		case hsGMatState::kBlendDot3:
			fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG1, colorSrc );
			fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_CURRENT );
			fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP,   D3DTOP_DOTPRODUCT3 );

			fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2 );
			fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT );
			break;

			// Add signed texture colors, pass through current alpha.
		case hsGMatState::kBlendAddSigned:
			fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG1, colorSrc );
			fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_CURRENT );
			fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP,   D3DTOP_ADDSIGNED );

			fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2 );
			fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT );
			break;

			// Add signed * 2 texture colors, pass through current alpha.
		case hsGMatState::kBlendAddSigned2X:
			fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG1, colorSrc );
			fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_CURRENT );
			fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP,   D3DTOP_ADDSIGNED2X );

			fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2 );
			fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT );
			break;

			// kBlendAddColorTimesAlpha is only supported for the base layer.
		case hsGMatState::kBlendAddColorTimesAlpha:
			hsAssert(false, "Blend mode unsupported on upper layers");
			break;

			// No blend, select this texture color and pass through current alpha
		case 0:
			fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG1, colorSrc );
			fD3DDevice->SetTextureStageState( stage, D3DTSS_COLORARG2, D3DTA_CURRENT );
			fD3DDevice->SetTextureStageState( stage, D3DTSS_COLOROP,   D3DTOP_SELECTARG1 );

			fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2 );
			fD3DDevice->SetTextureStageState( stage, D3DTSS_ALPHAARG2, D3DTA_CURRENT );
			break;
	}
}

//// IHandleFirstStageBlend ///////////////////////////////////////////////////
// Set frame buffer blend mode for blending the base layer
// For the case of rendering to a texture with alpha, the alpha written to
// the render target will be computed exactly as the color (limitation of D3D).
void	plDXPipeline::IHandleFirstStageBlend()
{
	// No color, just writing out Z values.
	if( fLayerState[0].fBlendFlags & hsGMatState::kBlendNoColor )
	{
		fD3DDevice->SetRenderState( D3DRS_ALPHABLENDENABLE,   TRUE );
		fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_ZERO );
		fD3DDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_ONE );
		fLayerState[0].fBlendFlags |= 0x80000000;
	}
	else
	{
		switch( fLayerState[0].fBlendFlags & hsGMatState::kBlendMask )
		{
			// Detail is just a special case of alpha, handled in construction of the texture
			// mip chain by making higher levels of the chain more transparent.
			case hsGMatState::kBlendDetail:
			case hsGMatState::kBlendAlpha:
				fD3DDevice->SetRenderState( D3DRS_ALPHABLENDENABLE,   TRUE );
				if( fLayerState[0].fBlendFlags & hsGMatState::kBlendInvertFinalAlpha )
				{
					if( fLayerState[0].fBlendFlags & hsGMatState::kBlendAlphaPremultiplied )
					{
						fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_ONE );
					}
					else
					{
						fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_INVSRCALPHA );
					}
					fD3DDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_SRCALPHA );
				}
				else
				{
					if( fLayerState[0].fBlendFlags & hsGMatState::kBlendAlphaPremultiplied )
					{
						fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_ONE );
					}
					else
					{
						fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_SRCALPHA );
					}
					fD3DDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA );
				}
				break;
			// Multiply the final color onto the frame buffer.
			case hsGMatState::kBlendMult:
				fD3DDevice->SetRenderState( D3DRS_ALPHABLENDENABLE,   TRUE );
				if( fLayerState[0].fBlendFlags & hsGMatState::kBlendInvertFinalColor )
				{
					fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_ZERO );
					fD3DDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_INVSRCCOLOR );
				}
				else
				{
					fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_ZERO );
					fD3DDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_SRCCOLOR );
				}
				break;

			// Add final color to FB.
			case hsGMatState::kBlendAdd:
				fD3DDevice->SetRenderState( D3DRS_ALPHABLENDENABLE,   TRUE );
				fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_ONE );
				fD3DDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_ONE );

				break;

			// Multiply final color by FB color and add it into the FB.
			case hsGMatState::kBlendMADD:
				fD3DDevice->SetRenderState( D3DRS_ALPHABLENDENABLE,   TRUE );
				fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_DESTCOLOR );
				fD3DDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_ONE );

				break;

			// Final color times final alpha, added into the FB.
			case hsGMatState::kBlendAddColorTimesAlpha:
				fD3DDevice->SetRenderState( D3DRS_ALPHABLENDENABLE,   TRUE );
				if( fLayerState[0].fBlendFlags & hsGMatState::kBlendInvertFinalAlpha )
					fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_INVSRCALPHA );
				else
					fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_SRCALPHA );
				fD3DDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_ONE );

				break;

			// Overwrite final color onto FB
			case 0:
				fD3DDevice->SetRenderState( D3DRS_ALPHABLENDENABLE,   TRUE );
				fD3DDevice->SetRenderState( D3DRS_SRCBLEND,  D3DBLEND_ONE );
				fD3DDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_ZERO );

				break;

			default:
				{
					hsAssert(false, "Too many blend modes specified in material");
					plLayer* lay = plLayer::ConvertNoRef(fCurrMaterial->GetLayer(fCurrLayerIdx)->BottomOfStack());
					if( lay )
					{
						if( lay->GetBlendFlags() & hsGMatState::kBlendAlpha )
						{
							lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAlpha);
						}
						else
						{
							lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAdd);
						}
					}
				}
				break;
		}
	}
	// Blend ops, not currently used in production.
	if( fLayerState[0].Differs( fLayerState[0].fBlendFlags, fOldLayerState[0].fBlendFlags, (hsGMatState::kBlendSubtract | hsGMatState::kBlendRevSubtract) ) )
	{
		if( fLayerState[0].fBlendFlags & hsGMatState::kBlendSubtract )
			fD3DDevice->SetRenderState( D3DRS_BLENDOP, D3DBLENDOP_SUBTRACT );
		else if( fLayerState[0].fBlendFlags & hsGMatState::kBlendRevSubtract )
			fD3DDevice->SetRenderState( D3DRS_BLENDOP, D3DBLENDOP_REVSUBTRACT );
		else
			fD3DDevice->SetRenderState( D3DRS_BLENDOP, D3DBLENDOP_ADD );

	}

	// AlphaTestHigh is used for reducing sort artifacts on textures that are mostly opaque or transparent, but
	// have regions of translucency in transition. Like a texture for a bush billboard. It lets there be some
	// transparency falloff, but quit drawing before it gets so transparent that draw order problems (halos)
	// become apparent.
	if( fLayerState[0].Differs( fLayerState[0].fBlendFlags, fOldLayerState[0].fBlendFlags, hsGMatState::kBlendAlphaTestHigh) )
	{
		plConst(UInt32) kHighAlphaTest(0x40);
		if( fLayerState[0].fBlendFlags & hsGMatState::kBlendAlphaTestHigh )
			fD3DDevice->SetRenderState(D3DRS_ALPHAREF, kHighAlphaTest);
		else
			fD3DDevice->SetRenderState(D3DRS_ALPHAREF, 0x00000001);
	}
	// Set the alpha test function, turn on for alpha blending, else off.
	if( fLayerState[0].Differs( fLayerState[0].fBlendFlags, fOldLayerState[0].fBlendFlags, hsGMatState::kBlendAlpha | hsGMatState::kBlendTest | hsGMatState::kBlendAlphaAlways | hsGMatState::kBlendAddColorTimesAlpha) )
	{
		if( (fLayerState[0].fBlendFlags & (hsGMatState::kBlendAlpha | hsGMatState::kBlendTest | hsGMatState::kBlendAddColorTimesAlpha))
				&& !(fLayerState[0].fBlendFlags & hsGMatState::kBlendAlphaAlways) )
			fD3DDevice->SetRenderState( D3DRS_ALPHAFUNC, D3DCMP_GREATER );
		else
			fD3DDevice->SetRenderState( D3DRS_ALPHAFUNC, D3DCMP_ALWAYS );
	}
	// Adjust the fog color based on the blend mode. Setting fog color to black for additive modes is
	// an exact solution, setting it to white for multipication is as close of an approximation to correct
	// as you're going to get with DX.
	if( fLayerState[0].Differs( fLayerState[0].fBlendFlags, fOldLayerState[0].fBlendFlags, hsGMatState::kBlendAdd | hsGMatState::kBlendMult | hsGMatState::kBlendMADD | hsGMatState::kBlendAddColorTimesAlpha ) )
	{
		if( fLayerState[0].fBlendFlags & (hsGMatState::kBlendAdd | hsGMatState::kBlendMADD | hsGMatState::kBlendAddColorTimesAlpha) )
			fD3DDevice->SetRenderState( D3DRS_FOGCOLOR, 0 );
		else if( fLayerState[0].fBlendFlags & hsGMatState::kBlendMult )
			fD3DDevice->SetRenderState( D3DRS_FOGCOLOR, 0xffffffff );
		else
			fD3DDevice->SetRenderState( D3DRS_FOGCOLOR, fCurrFog.fHexColor );
	}
}

//// IHandleTextureMode ///////////////////////////////////////////////////////
// Handle the texture stage state for the base layer.
void	plDXPipeline::IHandleTextureMode(plLayerInterface* layer)
{
	plBitmap *bitmap = layer->GetTexture();
	if( bitmap )
	{
		// EnvBumpNext not used in production.
		if( fLayerState[0].fBlendFlags & hsGMatState::kBlendEnvBumpNext )
		{
			IHandleBumpEnv(0, fLayerState[0].fBlendFlags);
		}
		// If the texture stage settings have changed. Note that this
		// is a bad test, we should just be doing something like keeping
		// an array of D3D TextureStageStates as we set them and checking against
		// that directly rather than trying to infer from higher level state
		// whether we need to make the D3D call.
		else if( fSettings.fVeryAnnoyingTextureInvalidFlag
			|| !fTexturing
			|| ( fLayerState[ 0 ].fBlendFlags ^ fOldLayerState[0].fBlendFlags )
			|| ( fCurrNumLayers + fActivePiggyBacks != fLastEndingStage )
				)
		{
			// If we're only doing one layer, just modulate texture color by diffuse and we're done.
			if( ( fCurrNumLayers + fActivePiggyBacks ) <= 1 )
			{
				// See IHandleStageBlend for notes on NoTexColor.
				if( fLayerState[0].fBlendFlags & hsGMatState::kBlendNoTexColor )
					fD3DDevice->SetTextureStageState( 0, D3DTSS_COLOROP, D3DTOP_SELECTARG2 );
				else
					fD3DDevice->SetTextureStageState( 0, D3DTSS_COLOROP, D3DTOP_MODULATE );
				fD3DDevice->SetTextureStageState( 0, D3DTSS_COLORARG1,
					fLayerState[0].fBlendFlags & hsGMatState::kBlendInvertColor
						? D3DTA_TEXTURE | D3DTA_COMPLEMENT
						: D3DTA_TEXTURE);
				fD3DDevice->SetTextureStageState( 0, D3DTSS_COLORARG2, D3DTA_DIFFUSE );

			}
			else
			{
				// See the check in IHandleStageBlend for fSettings.fMaxLayersAtOnce == 2.
				// It depends on these settings and adjusts what it needs.

				// Multitexturing, select texture color to make its way upstream on stages.
				fD3DDevice->SetTextureStageState( 0, D3DTSS_COLOROP,   D3DTOP_SELECTARG1 );
				fD3DDevice->SetTextureStageState( 0, D3DTSS_COLORARG1,
					fLayerState[0].fBlendFlags & hsGMatState::kBlendInvertColor
						? D3DTA_TEXTURE | D3DTA_COMPLEMENT
						: D3DTA_TEXTURE);

				// If our NoTexColor setting has changed, for a refresh of blend state on the next stage
				// since it's affected by our NoTexColor state.
				if( fLayerState[0].Differs( fLayerState[0].fBlendFlags, fOldLayerState[0].fBlendFlags, hsGMatState::kBlendNoTexColor) )
					fLayerState[1].fBlendFlags = UInt32(-1);
			}

			// Alpha Arg1 is texture alpha (possibly complemented), and Arg2 is diffuse (possibly complemented).
			// If we want to ignore vertex alpha, select arg1
			// If we want to ignore texture alpha, select arg2
			// Otherwise (and normally) multiply the two.
			fD3DDevice->SetTextureStageState( 0, D3DTSS_ALPHAOP,
				fLayerState[0].fBlendFlags & hsGMatState::kBlendNoVtxAlpha
					? D3DTOP_SELECTARG1
					:	fLayerState[0].fBlendFlags & hsGMatState::kBlendNoTexAlpha
						? D3DTOP_SELECTARG2
						: D3DTOP_MODULATE );
			fD3DDevice->SetTextureStageState( 0, D3DTSS_ALPHAARG1,
				fLayerState[0].fBlendFlags & hsGMatState::kBlendInvertAlpha
					? D3DTA_TEXTURE | D3DTA_COMPLEMENT
					: D3DTA_TEXTURE);
			fD3DDevice->SetTextureStageState( 0, D3DTSS_ALPHAARG2, D3DTA_DIFFUSE |
						( fLayerState[0].fBlendFlags & hsGMatState::kBlendInvertVtxAlpha
							? D3DTA_COMPLEMENT
							: 0 ) );

			fTexturing = true;
		}
	}
	// Here we've no texture for the base layer, but we have more than layer.
	// Select diffuse color and alpha, and pretend we have a texture but we're ignoring its
	// color and alpha.
	else if( fCurrNumLayers + fActivePiggyBacks > 1 )
	{
		fLayerState[0].fBlendFlags |= hsGMatState::kBlendNoTexColor | hsGMatState::kBlendNoTexAlpha;
		fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP,   D3DTOP_SELECTARG2);
		fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2);
		fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG1, D3DTA_DIFFUSE);
		fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAARG1, D3DTA_DIFFUSE);
		if( fLayerState[0].Differs( fLayerState[0].fBlendFlags, fOldLayerState[0].fBlendFlags, (hsGMatState::kBlendNoTexColor|hsGMatState::kBlendNoTexAlpha)) )
			fLayerState[1].fBlendFlags = UInt32(-1);
		fTexturing = false;
	}
	// Finally, a color only (non-textured) pass. Just select diffuse.
	else
	{
		if( fTexturing || fSettings.fVeryAnnoyingTextureInvalidFlag )
		{
			fD3DDevice->SetTextureStageState( 0, D3DTSS_COLOROP,   D3DTOP_SELECTARG1 );
			fD3DDevice->SetTextureStageState( 0, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG1 );
			fD3DDevice->SetTextureStageState( 0, D3DTSS_COLORARG1, D3DTA_DIFFUSE );
			fD3DDevice->SetTextureStageState( 0, D3DTSS_ALPHAARG1, D3DTA_DIFFUSE |
						( fLayerState[0].fBlendFlags & hsGMatState::kBlendInvertVtxAlpha ? D3DTA_COMPLEMENT : 0 ) );

			fTexturing = false;
		}
	}

	fSettings.fVeryAnnoyingTextureInvalidFlag = false;
}

//// IHandleStageClamp ////////////////////////////////////////////////////////
// Translate our current wrap/clamp mode to D3D calls.
void	plDXPipeline::IHandleStageClamp(int stage)
{
	const UInt32 flags = fLayerState[stage].fClampFlags;
	switch( flags )
	{
		case 0:
			fD3DDevice->SetSamplerState( stage, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP  );
			fD3DDevice->SetSamplerState( stage, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP  );
			break;
		case hsGMatState::kClampTextureU:
			fD3DDevice->SetSamplerState( stage, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP );
			fD3DDevice->SetSamplerState( stage, D3DSAMP_ADDRESSV, D3DTADDRESS_WRAP  );
			break;
		case hsGMatState::kClampTextureV:
			fD3DDevice->SetSamplerState( stage, D3DSAMP_ADDRESSU, D3DTADDRESS_WRAP  );
			fD3DDevice->SetSamplerState( stage, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP );
			break;
		case hsGMatState::kClampTexture:
			fD3DDevice->SetSamplerState( stage, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP );
			fD3DDevice->SetSamplerState( stage, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP );
			break;
	}
}

void plDXPipeline::ISetBumpMatrices(const plLayerInterface* layer, const plSpan* span)
{
//#define BUMP_COMPARE_MATH
#ifdef BUMP_COMPARE_MATH
	// This section is just debugging, to compute the matrices that will be set.
	static hsMatrix44 preMDu;
	static hsMatrix44 preMDv;
	static hsMatrix44 preMDw;
	static int preMInit = false;
	if( !preMInit )
	{
		hsMatrix44 rotAndCollapseToX;
		int i, j;
		for( i = 0; i < 4; i++ )
		{
			for( j = 0; j < 4; j++ )
			{
				rotAndCollapseToX.fMap[i][j] = 0;
			}
		}
		rotAndCollapseToX.fMap[0][2] = 1.f;
		rotAndCollapseToX.fMap[3][3] = 1.f;
		rotAndCollapseToX.NotIdentity();

		hsMatrix44 offset;
		offset.Reset();
		offset.fMap[0][0] = 0.5f;
		offset.fMap[0][3] = 0.5f;
		offset.NotIdentity();

		preMDu = offset * rotAndCollapseToX;

		offset.fMap[1][3] = 0.5f;

		preMDv = offset * rotAndCollapseToX;

		offset.fMap[1][3] = 1.f;

		preMDw = offset * rotAndCollapseToX;

		preMInit = true;
	}

	hsMatrix44 localToLight = span->GetLight(0, false)->GetWorldToLight() * span->fLocalToWorld;
	localToLight.fMap[0][3] = localToLight.fMap[1][3] = localToLight.fMap[2][3] = 0;

	fBumpDuMatrix = preMDu * localToLight;
	fBumpDvMatrix = preMDv * localToLight;

	hsMatrix44 c2w = fView.fCameraToWorld;
	hsMatrix44 cameraToLight = span->GetLight(0, false)->GetWorldToLight() * c2w;
	cameraToLight.fMap[0][3] = cameraToLight.fMap[1][3] = cameraToLight.fMap[2][3] = 0;
	fBumpDwMatrix = preMDw * cameraToLight;

	// HACK PART - FOR COMPARISON
	hsMatrix44 bDu = fBumpDuMatrix;
	hsMatrix44 bDv = fBumpDvMatrix;
	hsMatrix44 bDw = fBumpDwMatrix;
	static hsMatrix44 zeroMatrix;
	fBumpDuMatrix = zeroMatrix;
	fBumpDvMatrix = zeroMatrix;
	fBumpDwMatrix = zeroMatrix;
	// HACK PART - FOR COMPARISON

#endif // BUMP_COMPARE_MATH

	// Here's the math
	// The incoming uv coordinate is either:
	//	kMiscBumpDu - dPos/dU (in other words, the direction in space from this vertex where U increases and V remains constant) in local space.
	//	kMiscBumpDv - dPos/dV (in other words, the direction in space from this vertex where V increases and U remains constant) in local space.
	//	kMiscBumpDw - the normal in camera space.
	//
	// In each case, we need to transform the vector (uvw coord) into light space, and dot it with the light direction.
	// Well, in light space, the light direction is always (0,0,1).
	// So really, we just transform the vector into light space, and the z component is what we want.
	// Then, for each of these, we take that z value (the dot product) and put it into a color channel.
	// R = dPos/dU dot liDir
	// G = dPos/dV dot liDir
	// B = dPos/dW dot liDir
	//
	// That's what we want, here's how we get it.
	// Here, Li(vec) means the vector in light space, Loc(vec) is local space, Tan(vec) is tangent space
	//
	// Li(uvw) = local2Light * Loc(uvw) (uvw comes in in local space, ie input uvw == Loc(uvw)
	// Then we want to:
	//		a) Rotate the Z component to be along X (U) axis
	//		b) Zero out the new Y and Z
	//		c) Scale and offset our new X (the old Z) so -1 => 0, 1 => 1 (scale by 0.5, add 0.5).
	// The following matrix does all this (it's just a concatenation of the above 3 simple matrices).
	//	M =	|0 0 0.5 0.5|
	//		|0 0 0   0  |
	//		|0 0 0   0  |
	//		|0 0 0   1  |
	//
	// Our lookup texture that these transformed coords will read into has three horizontal bands,
	// the bottom 3rd is a ramp along U of 0->red
	// middle 3rd is a ramp along U of 0->green
	// last third (highest V) is a ramp along U of 0->blue.
	// So we can do the conversion from our dot to a color with an appropriate V offset in the above M.
	//
	// dPos/dU and dPos/dV are both input in local space, so the transform to get them into light space is
	// the same for each, and that's obviously WorldToLight * LocalToWorld.
	// That's a little inconvenient and inefficient. It's inconvenient, because for an omni light, we
	// can easily fake a light direction (span position - light position), but the full matrix is kind
	// of arbitrary. We could fake it, but instead we move on. It's inefficient because, looking at the
	// form of matrix M, we know we'll be throwing away a lot of it anyway. So we work through the matrix
	// math and find that we're going to wind up with:
	//
	//		M1 =	|	M[0][2] * loc2li[2][0]	M[0][2] * loc2li[2][1]	M[0][2] * loc2li[2][2]	0.5	|
	//				|					0					0						0			0	|
	//				|					0					0						0			0	|
	//				|					0					0						0			1	|
	//
	// So all we really need is loc2li[2] (row 2). A little more matrix math gives us:
	//
	//		loc2li[2] = (w2li[2] dot loc2wT[0], w2li[2] dot loc2wT[1], w2li[2] dot loc2wT[2]) (where loc2wT is Transpose(loc2w)
	//
	// And hey, that's just dependent on the light's direction w2li[2]. The same thing works out for dPos/dW, except
	// substitue cam2w for loc2w (since input is in camera space instead of world space).
	//
	// And that's about it. We don't actually have to multiply all those matrices at run-time, because
	// we know what the answer will be anyway. We just construct the matrices, making sure we set the
	// appropriate translate for V to get each into the right color channel. The hardware does the three
	// uv transforms and lookups, sums the results, and the output is:
	// (dPos/dU dot liDir, dPos/dV dot liDir, dPos/dW dot liDir), which also happens to be the light direction
	// transformed into tangent space. We dot that with our bump map (which has the normals in tangent space),
	// and we've got per-pixel shading for this light direction.


	hsPoint3 spanPos = span->fWorldBounds.GetCenter();
	hsVector3 liDir(0,0,0);
	int i;
	const hsTArray<plLightInfo*>& spanLights = span->GetLightList(false);
	hsScalar maxStrength = 0;
	for( i = 0; i < spanLights.GetCount(); i++ )
	{
		hsScalar liWgt = span->GetLightStrength(i, false);
		// A light strength of 2.f means it's from a light group, and we haven't actually calculated
		// the strength. So calculate it now.
		if( liWgt == 2.f )
		{
			hsScalar scale;
			spanLights[i]->GetStrengthAndScale(span->fWorldBounds, liWgt, scale);
		}
		if( liWgt > maxStrength )
			maxStrength = liWgt;
		liDir += spanLights[i]->GetNegativeWorldDirection(spanPos) * liWgt;
	}
	hsFastMath::NormalizeAppr(liDir);

	static hsScalar kUVWScale = 1.f;
	hsScalar uvwScale = kUVWScale;
	if( fLayerState[0].fBlendFlags & hsGMatState::kBlendAdd )
	{
		hsVector3 cam2span(&GetViewPositionWorld(), &spanPos);
		hsFastMath::NormalizeAppr(cam2span);
		liDir += cam2span;
		hsFastMath::NormalizeAppr(liDir);
		static hsScalar kSpecularMax = 0.1f;
		static hsScalar kSpecularMaxUV = 0.5f;
		if (IsDebugFlagSet(plPipeDbg::kFlagBumpUV))
			uvwScale *= kSpecularMaxUV;
		else
			uvwScale *= kSpecularMax;
	}

	switch( fCurrMaterial->GetLayer(fCurrLayerIdx)->GetMiscFlags() & hsGMatState::kMiscBumpChans )
	{
	case hsGMatState::kMiscBumpDu:
		uvwScale *= fCurrMaterial->GetLayer(fCurrLayerIdx+3)->GetRuntimeColor().r;
		break;
	case hsGMatState::kMiscBumpDv: // This currently should never happen
		uvwScale *= fCurrMaterial->GetLayer(fCurrLayerIdx+1)->GetRuntimeColor().r;
		break;
	case hsGMatState::kMiscBumpDw:
		uvwScale *= fCurrMaterial->GetLayer(fCurrLayerIdx+2)->GetRuntimeColor().r;
		break;
	}
	maxStrength *= 20.f;
	if( maxStrength > 1.f )
		maxStrength = 1.f;
	liDir *= uvwScale * maxStrength;

	const hsScalar kUVWOffset = 0.5f;

	hsScalar kOffsetToRed;
	hsScalar kOffsetToGreen;
	hsScalar kOffsetToBlue;

	if (IsDebugFlagSet(plPipeDbg::kFlagBumpUV) || IsDebugFlagSet(plPipeDbg::kFlagBumpW))
	{
		kOffsetToRed = 0.2f;
		kOffsetToGreen = 0.6f;
		kOffsetToBlue = 1.f;
	}
	else
	{
		kOffsetToRed = 0.f;
		kOffsetToGreen = 0.4f;
		kOffsetToBlue = 0.8f;
	}

	const hsMatrix44& l2w = span->fLocalToWorld;

	fBumpDvMatrix.fMap[0][0] = fBumpDuMatrix.fMap[0][0] = (liDir.fX * l2w.fMap[0][0] + liDir.fY * l2w.fMap[1][0] + liDir.fZ * l2w.fMap[2][0]);
	fBumpDvMatrix.fMap[0][1] = fBumpDuMatrix.fMap[0][1] = (liDir.fX * l2w.fMap[0][1] + liDir.fY * l2w.fMap[1][1] + liDir.fZ * l2w.fMap[2][1]);
	fBumpDvMatrix.fMap[0][2] = fBumpDuMatrix.fMap[0][2] = (liDir.fX * l2w.fMap[0][2] + liDir.fY * l2w.fMap[1][2] + liDir.fZ * l2w.fMap[2][2]);

	fBumpDvMatrix.fMap[0][3] = fBumpDuMatrix.fMap[0][3] = kUVWOffset;

	fBumpDuMatrix.fMap[1][3] = kOffsetToRed;
	fBumpDvMatrix.fMap[1][3] = kOffsetToGreen;

#ifndef BUMP_COMPARE_MATH
	hsMatrix44 c2w = fView.GetCameraToWorld();
#endif // BUMP_COMPARE_MATH

	// The bump textures created so far have very strong blue components, which make anything
	// bump mapped glow. The ideal fix would be to have the artists adjust the blue component
	// to a better (lower) value, so there would be a little extra illumination where the bump
	// is straight out into the normal direction, to complement the lateral illumination.
	// Attempts so far have been unsuccessful in getting them to get a better understanding
	// of bump maps, so I've just zeroed out the contribution in the normal direction.
	plConst(int) kBumpUVOnly(true);
	if( !kBumpUVOnly )
	{
		fBumpDwMatrix.fMap[0][0] = (liDir.fX * c2w.fMap[0][0] + liDir.fY * c2w.fMap[1][0] + liDir.fZ * c2w.fMap[2][0]);
		fBumpDwMatrix.fMap[0][1] = (liDir.fX * c2w.fMap[0][1] + liDir.fY * c2w.fMap[1][1] + liDir.fZ * c2w.fMap[2][1]);
		fBumpDwMatrix.fMap[0][2] = (liDir.fX * c2w.fMap[0][2] + liDir.fY * c2w.fMap[1][2] + liDir.fZ * c2w.fMap[2][2]);
	}
	else
	{
		fBumpDwMatrix.fMap[0][0] = 0;
		fBumpDwMatrix.fMap[0][1] = 0;
		fBumpDwMatrix.fMap[0][2] = 0;
	}

	fBumpDwMatrix.fMap[0][3] = kUVWOffset;
	fBumpDwMatrix.fMap[1][3] = kOffsetToBlue;
}

// IGetBumpMatrix ///////////////////////////////////////////////////////
// Return the correct uvw transform for the bump map channel implied
// in the miscFlags. The matrices have been previously set in ISetBumpMatrices.
const hsMatrix44& plDXPipeline::IGetBumpMatrix(UInt32 miscFlags) const
{
	switch( miscFlags & hsGMatState::kMiscBumpChans )
	{
	case hsGMatState::kMiscBumpDu:
		return fBumpDuMatrix;
	case hsGMatState::kMiscBumpDv:
		return fBumpDvMatrix;
	case hsGMatState::kMiscBumpDw:
	default:
		return fBumpDwMatrix;
	}
}

// ISkipBumpMap /////////////////////////////////////////////////////////////////////////
// Determine whether to skip bumpmapping on this object/material/layer combination.
// We skip if the span isn't illuminated by any lights, or bump mapping is disabled.
// If skipping, we advance <layer> past the bump layers.
// If there are no more layers after that, we return true (to abort further rendering of currSpan),
// else false to continue rendering.
hsBool plDXPipeline::ISkipBumpMap(hsGMaterial* newMat, UInt32& layer, const plSpan* currSpan) const
{
	if( newMat && currSpan )
	{
		if (newMat->GetLayer(layer)
			&&(newMat->GetLayer(layer)->GetMiscFlags() & hsGMatState::kMiscBumpChans)
			&&(!currSpan->GetNumLights(false) || IsDebugFlagSet(plPipeDbg::kFlagNoBump)) )
		{
			layer += 4;
			if( layer >= newMat->GetNumLayers() )
				return true;
		}
	}
	return false;
}

//// IHandleStageTransform ////////////////////////////////////////////////////
// Compute and set the UVW transform to D3D.
// This only gets interesting if the transform is dependent on on the current camera transform,
// as is the case with Reflection, Projection, or bump mapping.
void	plDXPipeline::IHandleStageTransform( int stage, plLayerInterface *layer )
{
	if( 1
		|| !(layer->GetTransform().fFlags & hsMatrix44::kIsIdent)
		|| (fLayerState[stage].fMiscFlags & (hsGMatState::kMiscUseReflectionXform|hsGMatState::kMiscUseRefractionXform|hsGMatState::kMiscProjection|hsGMatState::kMiscBumpChans)) )
	{
		D3DXMATRIX tXfm;

		if( fLayerState[stage].fMiscFlags & (hsGMatState::kMiscUseReflectionXform | hsGMatState::kMiscUseRefractionXform) )
		{
			// Reflection - this is just the camera to world, with translation removed,
			// and rotated to match cube map conventions.
			hsMatrix44 c2env = fView.GetCameraToWorld();
			c2env = fView.GetCameraToWorld();

			c2env.fMap[0][3]
				= c2env.fMap[1][3]
				= c2env.fMap[2][3]
				= 0.f;


			if( fLayerState[stage].fMiscFlags & hsGMatState::kMiscUseReflectionXform )
			{

				// This is just a rotation about X of Pi/2 (y = z, z = -y),
				// followed by flipping Z to reflect back towards us (z = -z).
				hsScalar t = c2env.fMap[1][0];
				c2env.fMap[1][0] = c2env.fMap[2][0];
				c2env.fMap[2][0] = t;

				t = c2env.fMap[1][1];
				c2env.fMap[1][1] = c2env.fMap[2][1];
				c2env.fMap[2][1] = t;

				t = c2env.fMap[1][2];
				c2env.fMap[1][2] = c2env.fMap[2][2];
				c2env.fMap[2][2] = t;
			}
			else // must be kMiscUseRefractionXform
			{

				// Okay, I know this refraction isn't any where near
				// right, so don't sit down and try to figure out the
				// math and hook it to the refractive index.
				// It's just a hack that will fool anyone that isn't
				// really paying attention.

				// This is just a rotation about X of Pi/2 (y = z, z = -y),
				// followed by NOT flipping Z to reflect back towards us (z = -z).
				// In other words, same as reflection, but then c2env = c2env * scaleMatNegateZ.
				hsScalar t = c2env.fMap[1][0];
				c2env.fMap[1][0] = c2env.fMap[2][0];
				c2env.fMap[2][0] = t;

				t = c2env.fMap[1][1];
				c2env.fMap[1][1] = c2env.fMap[2][1];
				c2env.fMap[2][1] = t;

				t = c2env.fMap[1][2];
				c2env.fMap[1][2] = c2env.fMap[2][2];
				c2env.fMap[2][2] = t;

				c2env.fMap[0][2] = -c2env.fMap[0][2];
				c2env.fMap[1][2] = -c2env.fMap[1][2];
				c2env.fMap[2][2] = -c2env.fMap[2][2];

#if 0
				const hsScalar kFishEyeScale = 0.5f;
				// You can adjust the fish-eye-ness of this by scaling
				// X and Y as well. Eventually, you wind up with the same
				// as c2env * scaleMatXYAndNegateZ, but this is shorter.
				// kFishEyeScale gets pretty fish-eye at about 0.5, and
				// like you're looking through the wrong end of a telescope
				// at about 1.5.
				// Ideally kFishEyeScale would be a parameter of the layer.
				c2env.fMap[0][0] *= kFishEyeScale;
				c2env.fMap[1][0] *= kFishEyeScale;
				c2env.fMap[2][0] *= kFishEyeScale;

				c2env.fMap[0][1] *= kFishEyeScale;
				c2env.fMap[1][1] *= kFishEyeScale;
				c2env.fMap[2][1] *= kFishEyeScale;
#endif
			}

			IMatrix44ToD3DMatrix( tXfm, c2env );
		}
		// cam2Screen will also have the kMiscPerspProjection flag set, so this needs
		// to go before the regular kMiscProjection check.
		else if (fLayerState[stage].fMiscFlags & hsGMatState::kMiscCam2Screen )
		{
			// Still needs a bit of cleaning...
			static hsVector3 camScale(0.5f, -0.5f, 1.f);
			static hsVector3 camTrans(0.5f, 0.5f, 0.f);
			hsMatrix44 p2s;
			p2s.MakeScaleMat(&camScale);
			p2s.fMap[0][3] += camTrans.fX;
			p2s.fMap[1][3] += camTrans.fY;

			// The scale and trans move us from NDC to Screen space. We need to swap
			// the Z and W coordinates so that the texture projection will divide by W
			// and give us projected 2D coordinates.
			hsScalar temp = p2s.fMap[2][2];
			p2s.fMap[2][2] = p2s.fMap[3][2];
			p2s.fMap[3][2] = temp;

			temp = p2s.fMap[2][3];
			p2s.fMap[2][3] = p2s.fMap[3][3];
			p2s.fMap[3][3] = temp;

			IMatrix44ToD3DMatrix(tXfm, p2s * IGetCameraToNDC());
		}
		else if( fLayerState[stage].fMiscFlags & hsGMatState::kMiscProjection )
		{
			// For projection, the worldToLight transform is in the layer transform,
			// so we append the cameraToWorld, getting cameraToLight
			hsMatrix44 c2w = fView.GetCameraToWorld();
			if( !(layer->GetUVWSrc() & plLayerInterface::kUVWPosition) )
			{
				c2w.fMap[0][3] = 0;
				c2w.fMap[1][3] = 0;
				c2w.fMap[2][3] = 0;
			}

			// We've already stuffed the worldToLight transform into the layer.
			hsMatrix44 c2l = layer->GetTransform() * c2w;

			IMatrix44ToD3DMatrix(tXfm, c2l);
		}
		else if( fLayerState[stage].fMiscFlags & hsGMatState::kMiscBumpChans )
		{
			// Bump matrices are already set, just get the right one and stuff it in.
			hsMatrix44 m = IGetBumpMatrix(fLayerState[stage].fMiscFlags);

			IMatrix44ToD3DMatrix(tXfm, m);
		}
		else
		{
			// Just put take the layer transform and stuff it in.
			IMatrix44ToD3DMatrix( tXfm, layer->GetTransform() );
		}

		fD3DDevice->SetTransform( sTextureStages[ stage ], &tXfm );
		fLayerTransform[ stage ] = true;
	}
	else if( fLayerTransform[ stage ] )
	{
		// We'd like to just turn it off, but the Voodoo board freaks if the
		// texture coordinates are 3-tuple for no apparent reason.
		fD3DDevice->SetTransform( sTextureStages[ stage ], &d3dIdentityMatrix );
		fLayerTransform[ stage ] = false;
	}

	// If there's an lod bias associated with the layer, set it here.
	// There usually isn't.
	float newBias = fLayerState[stage].fZFlags & hsGMatState::kZLODBias ? layer->GetLODBias() : fTweaks.fDefaultLODBias;
	if( newBias != fLayerLODBias[ stage ] )
	{
		fLayerLODBias[ stage ] = newBias;
		fD3DDevice->SetSamplerState( stage, D3DSAMP_MIPMAPLODBIAS, *(DWORD*)(&fLayerLODBias[ stage ]) );
	}
}

//// IUseTextureRef ///////////////////////////////////////////////////////////
// Set the texturing flags and texture.
void	plDXPipeline::IUseTextureRef( int stage, hsGDeviceRef *dRef, plLayerInterface* layer )
{
	plDXTextureRef *ref = (plDXTextureRef *)dRef;
	UInt32			xformFlags;

	UInt32 uvwSrc = layer->GetUVWSrc();

	// Keep track of how much managed memory has been "seen" since the last
	// evict, for that NVidia bug. Look for OSVERSIONINFO for more notes.
	if( ref->fUseTime <= fEvictTime )
		fManagedSeen += ref->fDataSize;

	// Also used for the same thing.
	if( ref->fUseTime ^ fTextUseTime )
	{
		plProfile_NewMem(CurrTex, ref->fDataSize);
		plProfile_Inc(NumTex);
		ref->fUseTime = fTextUseTime;

		fTexUsed += ref->fDataSize;
	}

	// DX pixel shaders require the TEXCOORDINDEX to be equal to the stage,
	// even though its ignored.
	if( layer->GetPixelShader() && (stage != uvwSrc) )
		uvwSrc = stage;

	// Update our UVW source
	if( fLayerUVWSrcs[ stage ] != uvwSrc )
	{
		fD3DDevice->SetTextureStageState( stage, D3DTSS_TEXCOORDINDEX, uvwSrc );
		fLayerUVWSrcs[ stage ] = uvwSrc;
	}

	if (!layer->GetVertexShader() && !layer->GetPixelShader())
	{
		/// Set the transform flags
		/// Note: the perspective projection flag must be taken from the layer, since it's layer-specific.
		/// Storing it on the texture ref is bad, because the texture ref can be shared among layers whose
		/// projection flags might not match. This should probably be cleaned up, but for now this fixes the
		/// problem.
		if( ref->GetFlags() & plDXTextureRef::kCubicMap )
			xformFlags = D3DTTFF_COUNT3;
		else if( layer->GetMiscFlags() & hsGMatState::kMiscPerspProjection )
			xformFlags = D3DTTFF_COUNT3 | D3DTTFF_PROJECTED;
		else
			xformFlags = D3DTTFF_COUNT2;

		if( xformFlags != fLayerXformFlags[ stage ] )
		{
			fLayerXformFlags[ stage ] = xformFlags;
			fD3DDevice->SetTextureStageState( stage, D3DTSS_TEXTURETRANSFORMFLAGS, xformFlags );
		}
	}

	// Update our current ref
	if( !ref->fD3DTexture )
	{
		if( ref->fData )
			IReloadTexture( ref );
	}
	else if( dRef == fLayerRef[ stage ] )
	{
		return;
	}
	hsRefCnt_SafeAssign( fLayerRef[ stage ], dRef );

	/// Actually make it active!
	fD3DDevice->SetTexture( stage, ref->fD3DTexture );
}

//// IStageStop ///////////////////////////////////////////////////////////////
// Tell the hardware we won't be using any more stages.
// This is more complicated than it sounds. Cases:
// a) single texture stage, we're done (because we've already set
//		texture times diffuse), so just disable stage 1.
// b) we have 2 stages active.
//		b.0) we're skipping texture color on one of those 2 stages. In that
//				case, we've already modulated in our diffuse, so just
//				disable stage 2.
//		b.1) we're using texture color from both stages 0 and 1, and still need
//				to modulate in diffuse. So set stage 2 to modulate in diffuse,
//				and disable stage 3.
// c) we have 3 or more stages active. Append a modulation by diffuse
// Note that this only applies to color, because diffuse alpha is always modulated
// in from the start.
void	plDXPipeline::IStageStop( UInt32 stage )
{
	int disableStage = stage;

	// Note: even if we don't have a texture, we handle it similar to if we had one,
	// so the only special case we need here is if we only had one stage to set up -mcn
	if( ( stage <= 1 ) )
	{
		fD3DDevice->SetTextureStageState(stage, D3DTSS_COLOROP, D3DTOP_DISABLE);
		fD3DDevice->SetTextureStageState(stage, D3DTSS_ALPHAOP, D3DTOP_DISABLE);
		fLayerState[ stage ].fBlendFlags = UInt32(-1);
		disableStage = stage;
	}
	else if( stage == 2 )
	{
		// The fMaxLayersAtOnce == 2 check is for the DX9.0c 2 texture limitation.
		// See ILayersAtOnce()
		if ((fLayerState[0].fBlendFlags & hsGMatState::kBlendNoTexColor)
			|| (fLayerState[1].fBlendFlags & hsGMatState::kBlendNoTexColor)
			|| fSettings.fMaxLayersAtOnce == 2)
		{
			fD3DDevice->SetTextureStageState(2, D3DTSS_COLOROP, D3DTOP_DISABLE);
			disableStage = 2;
		}
		else
		{
			fD3DDevice->SetTextureStageState(2, D3DTSS_COLOROP,   D3DTOP_MODULATE);
			fD3DDevice->SetTextureStageState(2, D3DTSS_COLORARG1, D3DTA_DIFFUSE);
			fD3DDevice->SetTextureStageState(2, D3DTSS_COLORARG2, D3DTA_CURRENT);

			fD3DDevice->SetTextureStageState(3, D3DTSS_COLOROP, D3DTOP_DISABLE);
			disableStage = 3;
		}

		fD3DDevice->SetTextureStageState(2, D3DTSS_ALPHAOP, D3DTOP_DISABLE);

		fLayerState[2].fBlendFlags = UInt32(-1);
		fLayerState[3].fBlendFlags = UInt32(-1);
	}
	else
	{
		// This is directly contrary to the DX documentation, but in line with
		// the code generated by MFCTex (which works). The docs say:
		//	"Alpha operations cannot be disabled when color operations are enabled.
		//		Setting the alpha operation to D3DTOP_DISABLE when color blending
		//		is enabled causes undefined behavior."
		// But not disabling the earliest possible alpha stage causes the driver
		// to choke.


		fD3DDevice->SetTextureStageState(stage, D3DTSS_COLOROP,   D3DTOP_MODULATE);
		fD3DDevice->SetTextureStageState(stage, D3DTSS_COLORARG1, D3DTA_DIFFUSE);
		fD3DDevice->SetTextureStageState(stage, D3DTSS_COLORARG2, D3DTA_CURRENT);

		fD3DDevice->SetTextureStageState(stage, D3DTSS_ALPHAOP, D3DTOP_DISABLE);
		fLayerState[stage].fBlendFlags = UInt32(-1);

		fD3DDevice->SetTextureStageState(stage+1, D3DTSS_COLOROP, D3DTOP_DISABLE);
		fLayerState[stage+1].fBlendFlags = UInt32(-1);

		disableStage = stage+1;
	}

	fLastEndingStage = stage;

	if( fSettings.fIsIntel )
	{
		int maxUVW = 0;
		int k;
		for( k = 0; k < fCurrNumLayers; k++ )
		{
			if( (fCurrMaterial->GetLayer(k + fCurrLayerIdx)->GetUVWSrc() & 0xf) > maxUVW )
				maxUVW = fCurrMaterial->GetLayer(k + fCurrLayerIdx)->GetUVWSrc() & 0xf;
		}
		for( k = disableStage; k <= maxUVW; k++ )
		{
			fD3DDevice->SetTextureStageState(k, D3DTSS_COLOROP,   D3DTOP_SELECTARG2);
			fD3DDevice->SetTextureStageState(k, D3DTSS_COLORARG2, D3DTA_CURRENT);
		}
		fD3DDevice->SetTextureStageState(k, D3DTSS_COLOROP, D3DTOP_DISABLE);
	}
}

// IInvalidateState /////////////////////////////////////////////////////////////
// Documentation is unclear on what state persists or becomes invalid on switching
// a render target or finishing a frame. I put into this function things that show
// up as suspect, whether they "ought" to be here or not.
void plDXPipeline::IInvalidateState()
{
	fLastEndingStage = 0;
	fTexturing = false;
	int i;
	for( i = 0; i < 8; i++ )
	{
		hsRefCnt_SafeUnRef( fLayerRef[ i ] );
		fLayerRef[ i ] = nil;
		fD3DDevice->SetTexture( i, nil );
	}

	fLayerState[ 0 ].fZFlags = 0;
	fD3DDevice->SetRenderState( D3DRS_ZFUNC,        D3DCMP_LESSEQUAL );
	fD3DDevice->SetRenderState( D3DRS_ZWRITEENABLE, TRUE );

	// This is a workaround for the latest ATI drivers (6.14.10.6422).
	// They seem to be caching something on lights (possibly only specular
	// lights, but I haven't been able to prove it) the first time they
	// are used in a render, and then not letting go when the camera
	// is moved for another render in the same frame (same BeginScene/EndScene pair).
	// The effect is very incorrect lighting. Moreover, if the multiple renders
	// per frame are infrequent (e.g. refreshing an environment map every few
	// seconds), you'll get flashes after the double render frames.
	// Workaround is to Disable all lights at render target switch, although
	// a more correct workaround might be to disable all lights at camera move.
	// All of this is strictly conjecture, so I'm going with what works.
	// Note also that I'm only disabling lights that are currently enabled
	// at the time of the render target switch. Since this is dealing with
	// a driver bug, it might be safer to disable them all, but timings
	// show that looping through all the lights in a scene like Teledahn exterior,
	// with hundreds of active lights, incurs a measurable expense (some milliseconds),
	// whereas disabling only the active lights fixes the known problem but costs
	// zero.
	plProfile_BeginTiming(ClearLights);

	hsBitIterator iterOff(fLights.fEnabledFlags);
	for( iterOff.Begin(); !iterOff.End(); iterOff.Advance() )
		fD3DDevice->LightEnable(iterOff.Current(), false);
	fLights.fEnabledFlags.Clear();
	fLights.fHoldFlags.Clear();

	plProfile_EndTiming(ClearLights);

	// This is very annoying. Set fTexturing to false doesn't work if the next layer
	// we draw doesn't have a texture. So we have to set this flag instead to force
	// a state update. I have an idea about how to do all of this a lot better, but
	// it's not time to do it...not yet at least.... --mcn
	fSettings.fVeryAnnoyingTextureInvalidFlag = true;
}

//// ILayersAtOnce ////////////////////////////////////////////////////////////
// Compute how many of the upcoming layers we can render in a single pass on the
// current hardware.
UInt32	plDXPipeline::ILayersAtOnce( hsGMaterial *mat, UInt32 which )
{
	fCurrNumLayers = 1;

	if( fView.fRenderState & plPipeline::kRenderBaseLayerOnly )
		return fCurrNumLayers;

	plLayerInterface *lay = mat->GetLayer( which );

	if (IsDebugFlagSet(plPipeDbg::kFlagNoMultitexture))
		return fCurrNumLayers;

	if ((IsDebugFlagSet(plPipeDbg::kFlagBumpUV) || IsDebugFlagSet(plPipeDbg::kFlagBumpW)) && (lay->GetMiscFlags() & hsGMatState::kMiscBumpChans) )
		return fCurrNumLayers = 2;

	if( (lay->GetBlendFlags() & hsGMatState::kBlendNoColor)
		||(lay->GetMiscFlags() & hsGMatState::kMiscTroubledLoner)
		)
		return fCurrNumLayers;

	// New DX9.0c limitation for cards that can only do 2 textures per pass.
	// We used to be able to set stage 0 and 1 to textures, and set stage 2 to the
	// diffuse color. With DX9.0c we just get two texture stages. Period.
	// Either we give up a texture or the diffuse color.
	if (fSettings.fMaxLayersAtOnce == 2)
	{
		if ((mat->GetNumLayers() > which + 1)
			&& !(mat->GetLayer(which + 1)->GetBlendFlags() & hsGMatState::kBlendNoTexColor))
		{
			// If we're just using the texture for alpha, we can multiply
			// the diffuse color in stage 1. Otherwise, save it for the next pass.
			return fCurrNumLayers;
		}
	}

	int i;
	int maxLayersAtOnce = fSettings.fMaxLayersAtOnce;

	// Now Reserve space for piggy backs, and see if there are
	// are any more layers we can pick up.
	//
	maxLayersAtOnce = fSettings.fMaxLayersAtOnce - fActivePiggyBacks;
	if( which + maxLayersAtOnce > mat->GetNumLayers() )
		maxLayersAtOnce = mat->GetNumLayers() - which;

	for( i = fCurrNumLayers; i < maxLayersAtOnce; i++ )
	{
		plLayerInterface *lay = mat->GetLayer(which + i);
		if( (lay->GetUVWSrc() & 0xf) > fSettings.fMaxUVWSrc )
			break;
		if( (lay->GetMiscFlags() & hsGMatState::kMiscBindNext)
				&&(i+1 >= maxLayersAtOnce) )
			break;
		if( lay->GetMiscFlags() & hsGMatState::kMiscRestartPassHere )
			break;
		if( !(mat->GetLayer(which+i-1)->GetMiscFlags() & hsGMatState::kMiscBindNext)
				&& !ICanEatLayer(lay) )
			break;
		fCurrNumLayers++;
	}
	return fCurrNumLayers;
}

//// ICanEatLayer /////////////////////////////////////////////////////////////
// Determine if this layer can be an upper layer, or if it needs
// to be the base on another pass.
hsBool	plDXPipeline::ICanEatLayer( plLayerInterface* lay )
{
	if( !lay->GetTexture() )
		return false;


	if( (lay->GetBlendFlags() & hsGMatState::kBlendNoColor)
		||(lay->GetBlendFlags() & hsGMatState::kBlendAddColorTimesAlpha) // has to be base layer
		||(lay->GetMiscFlags() & hsGMatState::kMiscTroubledLoner) )
		return false;

	if( (lay->GetBlendFlags() & hsGMatState::kBlendAlpha )
		&&(lay->GetAmbientColor().a < hsScalar1) )
		return false;

	if( !(lay->GetZFlags() & hsGMatState::kZNoZWrite) )
		return false;

	return true;
}

///////////////////////////////////////////////////////////////////////////////
//// Textures /////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

//// IReloadTexture ///////////////////////////////////////////////////////////
// Fills in D3D texture resource, creating it if necessary.
void	plDXPipeline::IReloadTexture( plDXTextureRef *ref )
{
	if( ref->GetFlags() & plDXTextureRef::kCubicMap )
	{
		if( ref->fD3DTexture == nil )
			ref->fD3DTexture = IMakeD3DCubeTexture( ref, ref->fFormatType );

		if( ref->fD3DTexture != nil )
			IFillD3DCubeTexture( (plDXCubeTextureRef *)ref );
	}
	else
	{
		if( ref->fD3DTexture == nil )
			ref->fD3DTexture = IMakeD3DTexture( ref, ref->fFormatType );

		if( ref->fD3DTexture != nil )
			IFillD3DTexture( ref );
	}
}

//// IMakeD3DTexture //////////////////////////////////////////////////////////
//	Makes a DX Texture object based on the ref given.

IDirect3DTexture9	*plDXPipeline::IMakeD3DTexture( plDXTextureRef *ref, D3DFORMAT formatType )
{
	D3DPOOL poolType = D3DPOOL_MANAGED;
	IDirect3DTexture9	*texPtr;
	fManagedAlloced = true;
	if( FAILED( fSettings.fDXError = fD3DDevice->CreateTexture( ref->fMaxWidth, ref->fMaxHeight,
										  ref->fMMLvs,
										  0,
										  formatType,
										  poolType,
										  &texPtr, NULL ) ) )
	{
		IGetD3DError();
		plStatusLog::AddLineS( "pipeline.log", 0xffff0000, "Unable to create texture (%s) Owner: %s "
											"Size: %d x %d NumLvls: %d Flags: %x",
											fSettings.fErrorStr, ref->fOwner ? ref->fOwner->GetKey() ? ref->fOwner->GetKey()->GetUoid().GetObjectName() : "" : "",
											ref->fMaxWidth, ref->fMaxHeight, ref->fMMLvs, ref->GetFlags() );
		return nil;
	}
	PROFILE_POOL_MEM(poolType, ref->fDataSize, true, (ref->fOwner ? ref->fOwner->GetKey() ? ref->fOwner->GetKey()->GetUoid().GetObjectName() : "(UnknownTexture)" : "(UnknownTexture)"));
	fTexManaged += ref->fDataSize;

	return texPtr;
}

//// IFillD3DTexture //////////////////////////////////////////////////////////
// Copies the data from the ref into the D3D texture, filling in all
// mip levels.
void	plDXPipeline::IFillD3DTexture( plDXTextureRef *ref )
{
	int			i;
	UInt8		*pTexDat = (UInt8 *)ref->fData;


	if( pTexDat == nil )
	{
		plStatusLog::AddLineS( "pipeline.log", 0xffff0000, "Unable to fill texture ref (data is nil) Owner: %s",
											ref->fOwner ? ref->fOwner->GetKey() ? ref->fOwner->GetKey()->GetUoid().GetObjectName() : "" : "" );
		return;
	}

	IDirect3DTexture9 *lpDst = (IDirect3DTexture9 *)ref->fD3DTexture;

	for( i = 0; i < ref->fMMLvs; i++ )
	{
		D3DLOCKED_RECT		lockInfo;

		if( FAILED( fSettings.fDXError = lpDst->LockRect( i, &lockInfo, nil, 0 ) ) )
		{
			IGetD3DError();
			plStatusLog::AddLineS( "pipeline.log", 0xffff0000, "Unable to lock texture level %d for filling (%s) Owner: %s "
												"Size: %d x %d NumLvls: %d Flags: %x",
												i, fSettings.fErrorStr, ref->fOwner ? ref->fOwner->GetKey() ? ref->fOwner->GetKey()->GetUoid().GetObjectName() : "" : "",
												ref->fMaxWidth, ref->fMaxHeight, ref->fMMLvs, ref->GetFlags() );
			return;
		}

		memcpy( (char *)lockInfo.pBits, pTexDat, ref->fLevelSizes[ i ] );
		pTexDat += ref->fLevelSizes[ i ];
		lpDst->UnlockRect( i );
	}
}

//// IMakeD3DCubeTexture //////////////////////////////////////////////////////
//	Makes a DX Cubic Texture object based on the ref given.

IDirect3DCubeTexture9	*plDXPipeline::IMakeD3DCubeTexture( plDXTextureRef *ref, D3DFORMAT formatType )
{
	D3DPOOL					poolType = D3DPOOL_MANAGED;
	IDirect3DCubeTexture9	*texPtr = nil;
	fManagedAlloced = true;
	WEAK_ERROR_CHECK(fD3DDevice->CreateCubeTexture( ref->fMaxWidth, ref->fMMLvs, 0, formatType, poolType, &texPtr, NULL));
	PROFILE_POOL_MEM(poolType, ref->fDataSize, true, (ref->fOwner ? ref->fOwner->GetKey() ? ref->fOwner->GetKey()->GetUoid().GetObjectName() : "(UnknownTexture)" : "(UnknownTexture)"));
	fTexManaged += ref->fDataSize;
	return texPtr;
}

//// IFillD3DCubeTexture //////////////////////////////////////////////////////
// Fill in all faces of the D3D cube map from the input reference.
void	plDXPipeline::IFillD3DCubeTexture( plDXCubeTextureRef *ref )
{
	int					i, f;
	D3DCUBEMAP_FACES	faces[ 6 ] = {  D3DCUBEMAP_FACE_NEGATIVE_X,		// Left
										D3DCUBEMAP_FACE_POSITIVE_X,		// Right
										D3DCUBEMAP_FACE_POSITIVE_Z,		// Front
										D3DCUBEMAP_FACE_NEGATIVE_Z,		// Back
										D3DCUBEMAP_FACE_POSITIVE_Y,		// Top
										D3DCUBEMAP_FACE_NEGATIVE_Y };	// Bottom

	for( f = 0; f < 6; f++ )
	{
		UInt8					*pTexDat = ( f == 0 ) ? (UInt8 *)ref->fData : (UInt8 *)ref->fFaceData[ f - 1 ];
		IDirect3DCubeTexture9	*lpDst = (IDirect3DCubeTexture9 *)ref->fD3DTexture;

		for( i = 0; i < ref->fMMLvs; i++ )
		{
			D3DLOCKED_RECT		lockInfo;

			lpDst->LockRect( faces[ f ], i, &lockInfo, nil, 0 );
			memcpy( (char *)lockInfo.pBits, pTexDat, ref->fLevelSizes[ i ] );
			pTexDat += ref->fLevelSizes[ i ];
			lpDst->UnlockRect( faces[ f ], i );
		}
	}
}

//// MakeTextureRef ///////////////////////////////////////////////////////////
//	Creates a hsGDeviceRef for a texture.
// May have to decompress the texture if the hardware doesn't support compressed textures (unlikely).
hsGDeviceRef	*plDXPipeline::MakeTextureRef( plLayerInterface* layer, plMipmap *b )
{
	plMipmap	*original = b, *colorized = nil;

	// If the hardware doesn't support Luminance maps, we'll just treat as ARGB.
	if( !( fSettings.fD3DCaps & kCapsLuminanceTextures ) )
		b->SetFlags( b->GetFlags() & ~plMipmap::kIntensityMap );

	/// Colorize if we're supposed to (8.21.2000 mcn)
	// Debugging only.
	if (IsDebugFlagSet(plPipeDbg::kFlagColorizeMipmaps))
	{
		b = original->Clone();
		if( b != nil )
			b->Colorize();
		else
			b = original;
	}

	if( !( fSettings.fD3DCaps & kCapsCompressTextures ) && b->IsCompressed() )
		b = hsCodecManager::Instance().CreateUncompressedMipmap( b, hsCodecManager::k16BitDepth );

	/// Set up some stuff
	UInt32		mmlvs      = 1;
	D3DFORMAT	formatType = D3DFMT_UNKNOWN;	// D3D Format
	UInt32		formatSize = 0;
	UInt32		totalSize = 0;
	UInt32*		levelSizes = nil;
	UInt32		numPix = 0;
	UInt32		externData = false;
	void		*tData;
	hsBool		noMip = !(fSettings.fD3DCaps & kCapsMipmap);


	/// Convert the bitmap over
	// Select a target format
	IGetD3DTextureFormat( b, formatType, formatSize );

	// Process the texture data into a format that can be directly copied to the D3D texture.
	// externData returned as true means that tData just points directly into the mipmap's fImage,
	// so don't delete it when deleting the texture device ref. externData false means this is
	// a reformatted copy, so the ref owns it.
	externData = IProcessMipmapLevels( b, mmlvs, levelSizes, totalSize, numPix, tData, noMip );

	// If the texture has a device ref, just re-purpose it, else make one and initialize it.
	plDXTextureRef *ref = (plDXTextureRef *)b->GetDeviceRef();
	if( !ref )
	{
		ref = TRACKED_NEW plDXTextureRef( formatType,
										  mmlvs, b->GetWidth(), b->GetHeight(),
										  numPix, totalSize, totalSize, levelSizes,
										  tData, externData );
		ref->fOwner = original;
		ref->Link( &fTextureRefList );
		original->SetDeviceRef( ref );
		// Note: this is because SetDeviceRef() will ref it, and at this point,
		// only the bitmap should own the ref, not us. We ref/unref it on Use()
		hsRefCnt_SafeUnRef( ref );
	}
	else
		ref->Set( formatType, mmlvs, b->GetWidth(), b->GetHeight(),
				  numPix, totalSize, totalSize, levelSizes, tData, externData );

	// Keep the refs in a linked list for easy disposal.
	if( !ref->IsLinked() )
	{
		// Re-linking
		ref->Link( &fTextureRefList );
	}

	/// Copy the data into the ref
	IReloadTexture( ref );

	ref->fData = nil;
	ref->SetDirty( false );

	// Set any implied flags.
	if (layer)
	{
		if( layer->GetMiscFlags() & hsGMatState::kMiscPerspProjection )
			ref->SetFlags(ref->GetFlags() | plDXTextureRef::kPerspProjection);
		else if( layer->GetMiscFlags() & hsGMatState::kMiscOrthoProjection )
			ref->SetFlags(ref->GetFlags() | plDXTextureRef::kOrthoProjection);

		if( layer->GetMiscFlags() & hsGMatState::kMiscBumpDw )
			ref->SetFlags(ref->GetFlags() | plDXTextureRef::kUVWNormal);
	}

	if( b != original )
		delete b;		// Delete if we created a new (temporary) one

	// Turn this on to delete the plasma system memory copy once we have a D3D managed version.
	// Currently disabled, because there are still mipmaps that are read from after their managed
	// versions are created, but aren't flagged DontThrowAwayImage or kUserOwnesBitmap.
	if( !( original->GetFlags() & ( plMipmap::kUserOwnsBitmap | plMipmap::kDontThrowAwayImage ) )
		&& !GetProperty( kPropDontDeleteTextures ) )
	{
#ifdef MF_TOSSER
		original->Reset();
#endif // MF_TOSSER
	}

	return ref;
}

//// IMakeCubicTextureRef /////////////////////////////////////////////////////
// Same as MakeTextureRef, except done for the six faces of a cube map.
hsGDeviceRef	*plDXPipeline::IMakeCubicTextureRef( plLayerInterface* layer, plCubicEnvironmap *cubic )
{
	plDXCubeTextureRef	*ref;
	plMipmap			*faces[ 6 ];
	int					i;
	D3DFORMAT			formatType = D3DFMT_UNKNOWN;
	UInt32				formatSize = 0;
	UInt32				numLevels = 1;
	UInt32				totalSize = 0;
	UInt32				*levelSizes = nil;
	UInt32				numPixels = 0;
	UInt32				externData;
	void				*textureData[ 6 ];

	if( cubic == nil || !( fSettings.fD3DCaps & kCapsCubicTextures ) )
		return nil;


	hsBool noMip = !(fSettings.fD3DCaps & kCapsMipmap) || !(fSettings.fD3DCaps & kCapsCubicMipmap);

	/// Get the mips
	if( !( fSettings.fD3DCaps & kCapsCompressTextures ) )
	{
		for( i = 0; i < 6; i++ )
		{
			faces[ i ] = cubic->GetFace( i );
			if( faces[ i ]->IsCompressed() )
				faces[ i ] = hsCodecManager::Instance().CreateUncompressedMipmap( faces[ i ], hsCodecManager::k16BitDepth );
		}
	}
	else
	{
		for( i = 0; i < 6; i++ )
			faces[ i ] = cubic->GetFace( i );
	}

	/// Create the ref
	// Get format
	IGetD3DTextureFormat( faces[0], formatType, formatSize );

	// Process the data.
	if( faces[0]->IsCompressed() || ( faces[0]->GetPixelSize() < 32 ) )
	{
		/// For this, we just take the image data pointers directly, so only call IProcess once
		externData = IProcessMipmapLevels( faces[ 0 ], numLevels, levelSizes, totalSize, numPixels, textureData[ 0 ], noMip );
		for( i = 1; i < 6; i++ )
			textureData[ i ] = faces[ i ]->GetImage();
	}
	else
	{
		for( i = 0; i < 6; i++ )
		{
			/// Some of this will be redundant, but oh well
			externData = IProcessMipmapLevels( faces[ i ], numLevels, levelSizes, totalSize, numPixels, textureData[ i ], noMip );
		}
	}

	ref = (plDXCubeTextureRef *)cubic->GetDeviceRef();
	if( !ref )
	{
		ref = TRACKED_NEW plDXCubeTextureRef( formatType,
										  numLevels, faces[ 0 ]->GetWidth(), faces[ 0 ]->GetHeight(),
										  numPixels, totalSize, totalSize * 6, levelSizes,
										  textureData[ 0 ], externData );
		ref->fOwner = cubic;
		ref->Link( &fTextureRefList );	// So we don't ref later on down
		for( i = 0; i < 5; i++ )
			ref->fFaceData[ i ] = textureData[ i + 1 ];

		cubic->SetDeviceRef( ref );
		// Note: this is because SetDeviceRef() will ref it, and at this point,
		// only the bitmap should own the ref, not us. We ref/unref it on Use()
		hsRefCnt_SafeUnRef( ref );
	}
	else
	{
		ref->Set( formatType, numLevels, faces[ 0 ]->GetWidth(), faces[ 0 ]->GetHeight(),
				  numPixels, totalSize, totalSize * 6, levelSizes, textureData[ 0 ], externData );

		for( i = 0; i < 5; i++ )
			ref->fFaceData[ i ] = textureData[ i + 1 ];
	}
	ref->SetFlags( ref->GetFlags() | plDXTextureRef::kCubicMap );

	// Put in linked list for easy disposal.
	if( !ref->IsLinked() )
	{
		// Re-linking
		ref->Link( &fTextureRefList );
	}

	/// Copy the data into the ref
	IReloadTexture( ref );
	ref->SetDirty( false );

	/// Cleanup
	for( i = 0; i < 6; i++ )
	{
		if( faces[ i ] != cubic->GetFace( i ) )
			delete faces[ i ];
		if( !( cubic->GetFace(i)->GetFlags() & (plMipmap::kUserOwnsBitmap | plMipmap::kDontThrowAwayImage) ) && !GetProperty( kPropDontDeleteTextures ) )
		{
			// Turn this on to delete the plasma system memory copy once we have a D3D managed version.
			// Currently disabled, because there are still mipmaps that are read from after their managed
			// versions are created, but aren't flagged DontThrowAwayImage or kUserOwnesBitmap.
//			cubic->GetFace(i)->Reset();
		}
	}

	return ref;
}

//// IProcessMipmapLevels /////////////////////////////////////////////////////
// Compute proper values for the arguments passed in.
// Return true if the data returned points directly into the mipmap data,
// return false if textureData is a reformatted copy of the mipmap's data.
hsBool	plDXPipeline::IProcessMipmapLevels( plMipmap *mipmap, UInt32 &numLevels,
											UInt32 *&levelSizes, UInt32 &totalSize,
											UInt32 &numPixels, void *&textureData, hsBool noMip )
{
	hsBool		externData = false;
	D3DFORMAT	formatType = D3DFMT_UNKNOWN;	// D3D Format
	UInt32		formatSize;


	IGetD3DTextureFormat( mipmap, formatType, formatSize );

	// Compressed or 16 bit, we can use directly.
	if( mipmap->IsCompressed() || ( mipmap->GetPixelSize() < 32 ) )
	{
		numPixels = 0;
		if( noMip )
		{
			numLevels = 1;
			levelSizes = nil;
			totalSize = mipmap->GetLevelSize(0);
		}
		else
		{
			UInt32			sizeMask = 0x03;

			/// 10.31.2000 - If we have this flag set, we really have to cut out
			/// sizes under 8x8. So far only true on the KYRO...
			if( fSettings.fD3DCaps & kCapsNoKindaSmallTexs )
				sizeMask = 0x07;

			int maxLevel = mipmap->GetNumLevels() - 1;

			/// 9.7.2000 - Also do this test if the card doesn't support
			/// itty bitty textures
			if( mipmap->IsCompressed() || !( fSettings.fD3DCaps & kCapsDoesSmallTextures ) )
			{
				mipmap->SetCurrLevel( maxLevel );
				while( ( mipmap->GetCurrWidth() | mipmap->GetCurrHeight() ) & sizeMask )
				{
					maxLevel--;
					hsAssert( maxLevel >= 0, "How was this ever compressed?" );
					mipmap->SetCurrLevel( maxLevel );
				}
			}

			mipmap->SetCurrLevel( 0 );
			totalSize = 0;
			numLevels = maxLevel + 1;
			levelSizes = TRACKED_NEW UInt32[ numLevels ];
			int i;
			for( i = 0; i < numLevels; i++ )
			{
				levelSizes[ i ] = mipmap->GetLevelSize( i );
				totalSize += mipmap->GetLevelSize( i );
			}
		}

		textureData = mipmap->GetImage();
		externData = true;
	}
	else
	{
		// 32 bit uncompressed data. In general, we reformat to 16 bit if we're running
		// 16 bit, or if 32 bit leave it at 32. All subject to what the hardware can do
		// and what the texture is for. See IGetD3DTextureFormat.
		formatSize >>= 3;

		if( !noMip )
		{
			numPixels = mipmap->GetTotalSize() * 8 / mipmap->GetPixelSize();
			numLevels = mipmap->GetNumLevels();

			levelSizes = TRACKED_NEW UInt32[ numLevels ];

			int		i;
			UInt32 w, h;
			for( i = 0; i < numLevels; i++ )
			{
				mipmap->GetLevelPtr( i, &w, &h );
				levelSizes[ i ] = w * h * formatSize;
			}
		}
		else
		{
			numPixels = mipmap->GetWidth() * mipmap->GetHeight();
			numLevels = 1;
			levelSizes = nil;
		}
		totalSize = numPixels * formatSize;

		// Shared scratch space to reformat a texture before it's copied into
		// the D3D surface.
		textureData = IGetPixelScratch( totalSize );

		// Convert it to the requested format.
		IFormatTextureData( formatType, numPixels, (hsRGBAColor32 *)mipmap->GetImage(), textureData );
	}

	return externData;
}

//// IGetPixelScratch /////////////////////////////////////////////////////////
// Return scratch space at least of at least size bytes, to reformat a mipmap into.
void	*plDXPipeline::IGetPixelScratch( UInt32 size )
{
	static char		*sPtr = nil;
	static UInt32	sSize = 0;

	if( size > sSize )
	{
		if( sPtr != nil )
			delete [] sPtr;

		if( size > 0 )
			sPtr = TRACKED_NEW char[ sSize = size ];
		else
			sPtr = nil;
	}
	else if( size == 0 )
	{
		if( sPtr != nil )
			delete [] sPtr;

		sPtr = nil;
		sSize = 0;
	}

	return sPtr;
}

//// IGetD3DTextureFormat /////////////////////////////////////////////////////
//	Given a bitmap, finds the matching D3D format.

void	plDXPipeline::IGetD3DTextureFormat( plBitmap *b, D3DFORMAT &formatType, UInt32& texSize )
{
	hsAssert( b, "Nil input to GetTextureFormat()" );

	hsBool prefer32bit = 0 != (b->GetFlags() & plBitmap::kForce32Bit);

	if( b->IsCompressed() )
	{
		hsAssert( plMipmap::kDirectXCompression == b->fCompressionType, "Unsupported compression format" );
		texSize = 0;
		switch( b->fDirectXInfo.fCompressionType )
		{
			case plMipmap::DirectXInfo::kDXT1:
				formatType = D3DFMT_DXT1;
				break;
//			case plMipmap::DirectXInfo::kDXT2:
//				formatType = D3DFMT_DXT2;
//				break;
//			case plMipmap::DirectXInfo::kDXT3:
//				formatType = D3DFMT_DXT3;
//				break;
//			case plMipmap::DirectXInfo::kDXT4:
//				formatType = D3DFMT_DXT4;
//				break;
			case plMipmap::DirectXInfo::kDXT5:
				formatType = D3DFMT_DXT5;
				break;
			default:
				hsAssert(false, "Unknown DirectX compression format");
		}
	}
	else if( b->GetFlags() & plMipmap::kBumpEnvMap )
	{
		texSize = 16;
		if( b->GetFlags() & plMipmap::kAlphaChannelFlag )
			formatType = D3DFMT_L6V5U5;
		else
			formatType = D3DFMT_V8U8;
	}
	else if( b->GetPixelSize() == 16 )
	{
		texSize = 16;
		if( b->GetFlags() & plMipmap::kIntensityMap )
		{
			if( b->GetFlags() & plMipmap::kAlphaChannelFlag )
				formatType = D3DFMT_A8L8;
			else
				formatType = D3DFMT_L8;
		}
		else
		{
			if( b->GetFlags() & plMipmap::kAlphaChannelFlag )
				formatType = D3DFMT_A4R4G4B4;
			else
				formatType = D3DFMT_A1R5G5B5;
		}
	}
	else if( b->GetFlags() & plMipmap::kIntensityMap )
	{
		if( b->GetFlags() & plMipmap::kAlphaChannelFlag )
		{
			if( ITextureFormatAllowed( D3DFMT_A8L8 ) )
			{
				formatType = D3DFMT_A8L8;
				texSize = 16;
			}
			else if( !prefer32bit && ( fSettings.fColorDepth == 16 ) && ITextureFormatAllowed( D3DFMT_A4R4G4B4 ) )
			{
				formatType = D3DFMT_A4R4G4B4;
				texSize = 16;
			}
			else if( ITextureFormatAllowed( D3DFMT_A8R8G8B8 ) )
			{
				formatType = D3DFMT_A8R8G8B8;
				texSize = 32;
			}
			else if( ITextureFormatAllowed( D3DFMT_A4R4G4B4 ) )
			{
				formatType = D3DFMT_A4R4G4B4;
				texSize = 16;
			}
		}
		else
		{
			if( ITextureFormatAllowed( D3DFMT_L8 ) )
			{
				formatType = D3DFMT_L8;
				texSize = 8;
			}
			else if( !prefer32bit && ( fSettings.fColorDepth == 16 ) && ITextureFormatAllowed( D3DFMT_A1R5G5B5 ) )
			{
				formatType = D3DFMT_A1R5G5B5;
				texSize = 16;
			}
			else if( ITextureFormatAllowed( D3DFMT_A8R8G8B8 ) )
			{
				formatType = D3DFMT_A8R8G8B8;
				texSize = 32;
			}
			else if( ITextureFormatAllowed( D3DFMT_A1R5G5B5 ) )
			{
				formatType = D3DFMT_A1R5G5B5;
				texSize = 16;
			}
		}
	}
	else
	{
		if( b->GetFlags() & plMipmap::kAlphaChannelFlag )
		{
			if( !prefer32bit && ( fSettings.fColorDepth == 16 ) && ITextureFormatAllowed( D3DFMT_A4R4G4B4 ) )
			{
				formatType = D3DFMT_A4R4G4B4;
				texSize = 16;
			}
			else if( ITextureFormatAllowed( D3DFMT_A8R8G8B8 ) )
			{
				formatType = D3DFMT_A8R8G8B8;
				texSize = 32;
			}
			else if( ITextureFormatAllowed( D3DFMT_A4R4G4B4 ) )
			{
				formatType = D3DFMT_A4R4G4B4;
				texSize = 16;
			}
		}
		else
		{
			if( !prefer32bit && ( fSettings.fColorDepth == 16 ) && ITextureFormatAllowed( D3DFMT_A1R5G5B5 ) )
			{
				formatType = D3DFMT_A1R5G5B5;
				texSize = 16;
			}
			else if( ITextureFormatAllowed( D3DFMT_A8R8G8B8 ) )
			{
				formatType = D3DFMT_A8R8G8B8;
				texSize = 32;
			}
			else if( ITextureFormatAllowed( D3DFMT_A1R5G5B5 ) )
			{
				formatType = D3DFMT_A1R5G5B5;
				texSize = 16;
			}
		}
	}

	hsAssert( formatType, "failing to find format type" );
}

//// IFormatTextureData ///////////////////////////////////////////////////////
// Convert the input 32 bit uncompressed RGBA data into the requested format.
void	plDXPipeline::IFormatTextureData( UInt32 formatType, UInt32 numPix, hsRGBAColor32* const src, void *dst )
{
	switch( formatType )
	{
		case D3DFMT_L6V5U5:
			{
				UInt16 *pixels = (UInt16 *)dst;
				hsRGBAColor32* p = src;
				hsRGBAColor32* end = src + numPix;

				while( p < end )
				{
					*pixels = ((p->a << 8) & 0xfc00)
						| ((p->g << 2) & 0x03e0)
						| ((p->r >> 3) & 0x001f);
#ifdef HS_DEBUGGING
					if( *pixels & 0xfc00 )
						pixels++;
					else if( *pixels & 0x03e0 )
						pixels++;
					else if( *pixels & 0x001f )
						pixels++;
					else
#endif // HS_DEBUGGING
					pixels++;
					p++;
				}
			}
			break;

		case D3DFMT_V8U8:
			{
				UInt16 *pixels = (UInt16 *)dst;
				hsRGBAColor32* p = src;
				hsRGBAColor32* end = src + numPix;

				while( p < end )
				{
					*pixels = (p->g << 8)
						| (p->r << 0);
					pixels++;
					p++;
				}
			}
			break;

		case D3DFMT_A8L8:
			{
				UInt16 *pixels = (UInt16 *)dst;
				int i;
				hsRGBAColor32* const p = src;

				for(i =0; i < numPix; i++)
					pixels[i]= ((p[i].a & 0xff) << 8) | (p[i].r & 0xff);
			}
			break;

		case D3DFMT_A4R4G4B4:
			{
				UInt16 *pixels = (UInt16 *)dst;
				int i;
				hsRGBAColor32* const p = src;

				for(i =0; i < numPix; i++)
				{
					pixels[i]= (((p[i].r>>4) & 0xf) << 8)
								| (((p[i].g >> 4) & 0xf) << 4)
								| (((p[i].b >> 4) & 0xf) )
								| (((p[i].a >> 4) & 0xf) << 12);
				}
			}
			break;

		case D3DFMT_A1R5G5B5:
			{
				UInt16 *pixels = (UInt16 *)dst;
				int i;
				hsRGBAColor32* const p = src;

				for(i =0; i < numPix; i++)
				{
					pixels[i]= (((p[i].r>>3) & 0x1f) << 10) |
								(((p[i].g >> 3) & 0x1f) << 5) |
								((p[i].b >> 3) & 0x1f) | ((p[i].a == 0) ? 0 : 0x8000);
				}
			}
			break;

		case D3DFMT_L8:
			{
				UInt8 *pixels = (UInt8 *)dst;
				int i;
				hsRGBAColor32* const p = src;

				for(i =0; i < numPix; i++)
					pixels[i]= p[i].r;
			}
			break;

		case D3DFMT_A8R8G8B8:
			{
				UInt32 *pixels = (UInt32 *)dst;
				int i;
				hsRGBAColor32* const p = src;

				for(i =0; i < numPix; i++)
					pixels[i]= ( ( p[i].a << 24 ) | ( p[i].r << 16 ) | ( p[i].g << 8 ) | p[i].b );
			}
			break;

		default:
			hsAssert(false, "Unknown texture format selected");
			break;
	}
}


///////////////////////////////////////////////////////////////////////////////
//// View Stuff ///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////


//// TestVisibleWorld /////////////////////////////////////////////////////////
// Check if the world space bounds are visible within the current view frustum.
hsBool plDXPipeline::TestVisibleWorld( const hsBounds3Ext& wBnd )
{
	if( fView.fCullTreeDirty )
		IRefreshCullTree();
	return fView.fCullTree.BoundsVisible(wBnd);
}

hsBool plDXPipeline::TestVisibleWorld( const plSceneObject* sObj )
{
	const plDrawInterface* di = sObj->GetDrawInterface();
	if( !di )
		return false;

	const int numDraw = di->GetNumDrawables();
	int i;
	for( i = 0; i < numDraw; i++ )
	{
		plDrawableSpans* dr = plDrawableSpans::ConvertNoRef(di->GetDrawable(i));
		if( !dr )
			continue;

		plDISpanIndex& diIndex = dr->GetDISpans(di->GetDrawableMeshIndex(i));
		if( diIndex.IsMatrixOnly() )
			continue;

		const int numSpan = diIndex.GetCount();
		int j;
		for( j = 0; j < numSpan; j++ )
		{
			const plSpan* span = dr->GetSpan(diIndex[j]);

			if( span->fProps & plSpan::kPropNoDraw )
				continue;

			if( !span->GetVisSet().Overlap(plGlobalVisMgr::Instance()->GetVisSet())
				|| span->GetVisSet().Overlap(plGlobalVisMgr::Instance()->GetVisNot()) )

				continue;

			if( !TestVisibleWorld(span->fWorldBounds) )
				continue;

			return true;
		}
	}
	return false;
}

//// GetViewAxesWorld /////////////////////////////////////////////////////////
// Get the current view direction, up and direction X up.
void	plDXPipeline::GetViewAxesWorld(hsVector3 axes[3] /* ac,up,at */ ) const
{
	axes[ 0 ] = GetViewAcrossWorld();
	axes[ 1 ] = GetViewUpWorld();
	axes[ 2 ] = GetViewDirWorld();
}

//// GetFOV ///////////////////////////////////////////////////////////////////
// Get the current FOV in degrees.
void	plDXPipeline::GetFOV(hsScalar& fovX, hsScalar& fovY) const
{
	fovX = GetViewTransform().GetFovXDeg();
	fovY = GetViewTransform().GetFovYDeg();
}

//// SetFOV ///////////////////////////////////////////////////////////////////
// Set the current FOV in degrees. Forces perspective rendering to be true.
void	plDXPipeline::SetFOV( hsScalar fovX, hsScalar fovY )
{
	IGetViewTransform().SetFovDeg(fovX, fovY);
	IGetViewTransform().SetPerspective(true);
}

// Get the orthogonal projection view size in world units (e.g. feet).
void	plDXPipeline::GetSize( hsScalar& width, hsScalar& height ) const
{
	width = GetViewTransform().GetScreenWidth();
	height = GetViewTransform().GetScreenHeight();
}

// Set the orthogonal projection view size in world units (e.g. feet).
// Forces projection to orthogonal if it wasn't.
void	plDXPipeline::SetSize( hsScalar width, hsScalar height )
{
	IGetViewTransform().SetWidth(width);
	IGetViewTransform().SetHeight(height);
	IGetViewTransform().SetOrthogonal(true);
}

//// GetDepth /////////////////////////////////////////////////////////////////
// Get the current hither and yon.
void plDXPipeline::GetDepth(hsScalar& hither, hsScalar& yon) const
{
	GetViewTransform().GetDepth(hither, yon);
}

//// SetDepth /////////////////////////////////////////////////////////////////
// Set the current hither and yon.
void plDXPipeline::SetDepth(hsScalar hither, hsScalar yon)
{
	IGetViewTransform().SetDepth(hither, yon);
}

//// ISavageYonHack ///////////////////////////////////////////////////////////
//	Corrects the yon for the *#(&$*#&$(*& Savage4 chipset (ex. Diamond Stealth
//	III S540). Let's just say this card SUCKS.
// Obsolete since we don't support the Savage4 chipset any more.
void	plDXPipeline::ISavageYonHack()
{
	hsScalar yon = GetViewTransform().GetYon();


	if( ( yon > 128.f - 5.0f ) && ( yon < 128.f + 1.01f ) )
		yon = 128.f + 1.01f;
	else if( ( yon > 256.f - 10.0f ) && ( yon < 256.f + 1.02f ) )
		yon = 256.f + 1.02f;
	else if( ( yon > 512.f - 35.0f ) && ( yon < 512.f + 1.02f ) )
		yon = 512.f + 1.02f;
	else if( ( yon > 1024.f - 120.0f ) && ( yon < 1024.f + 1.f ) )
		yon = 1024.f + 1.f;
}

//// GetWorldToCamera /////////////////////////////////////////////////////////
// Return current world to camera transform.
const hsMatrix44& plDXPipeline::GetWorldToCamera() const
{
	return fView.GetWorldToCamera();
}

//// GetCameraToWorld /////////////////////////////////////////////////////////
// Return current camera to world transform.
const hsMatrix44& plDXPipeline::GetCameraToWorld() const
{
	return fView.GetCameraToWorld();
}

// IUpdateViewFlags /////////////////////////////////////////////////////////
// Dirty anything cached dependent on the current camera matrix.
void plDXPipeline::IUpdateViewFlags()
{
	fView.fCullTreeDirty = true;

	fView.fWorldToCamLeftHanded = fView.GetWorldToCamera().GetParity();
}
//// SetWorldToCamera /////////////////////////////////////////////////////////
// Immediate set of camera transform.
void plDXPipeline::SetWorldToCamera(const hsMatrix44& w2c, const hsMatrix44& c2w)
{
	IGetViewTransform().SetCameraTransform(w2c, c2w);

	IUpdateViewFlags();

	IWorldToCameraToD3D();
}

// IWorldToCameraToD3D ///////////////////////////////////////////////////////
// Pass the current camera transform through to D3D.
void plDXPipeline::IWorldToCameraToD3D()
{
	D3DXMATRIX	mat;

	IMatrix44ToD3DMatrix( mat, fView.GetWorldToCamera() );
	fD3DDevice->SetTransform( D3DTS_VIEW, &mat );

	fView.fXformResetFlags &= ~fView.kResetCamera;

	fFrame++;
}

// SetViewTransform ///////////////////////////////////////////////////////////
// ViewTransform encapsulates everything about the current camera, viewport and
// window necessary to render or convert from world space to pixel space. Doesn't
// include the object dependent local to world transform.
// Set plViewTransform.h
void plDXPipeline::SetViewTransform(const plViewTransform& v)
{
	fView.fTransform = v;

	if( !v.GetScreenWidth() || !v.GetScreenHeight() )
	{
		fView.fTransform.SetScreenSize((UInt16)(fSettings.fOrigWidth), (UInt16)(fSettings.fOrigHeight));
	}

	IUpdateViewFlags();

	IWorldToCameraToD3D();
}

//// GetWorldToLocal //////////////////////////////////////////////////////////
// Return current World to Local transform. Note that this is only meaningful while an
// object is being rendered, so this function is pretty worthless.
const hsMatrix44& plDXPipeline::GetWorldToLocal() const
{
	return fView.fWorldToLocal;
}

//// GetLocalToWorld //////////////////////////////////////////////////////////
// Return current Local to World transform. Note that this is only meaningful while an
// object is being rendered, so this function is pretty worthless.

const hsMatrix44& plDXPipeline::GetLocalToWorld() const
{
	return fView.fLocalToWorld;
}

//// ISetLocalToWorld /////////////////////////////////////////////////////////
// Record and pass on to D3D the current local to world transform for the object
// about to be rendered.
void	plDXPipeline::ISetLocalToWorld( const hsMatrix44& l2w, const hsMatrix44& w2l )
{

	fView.fLocalToWorld = l2w;
	fView.fWorldToLocal = w2l;

	fView.fViewVectorsDirty = true;

	// We keep track of parity for winding order culling.
	fView.fLocalToWorldLeftHanded = fView.fLocalToWorld.GetParity();

	ILocalToWorldToD3D();
}

// ILocalToWorldToD3D ///////////////////////////////////////////////////////////
// pass the current local to world tranform on to D3D.
void plDXPipeline::ILocalToWorldToD3D()
{
	D3DXMATRIX	mat;

	if( fView.fLocalToWorld.fFlags & hsMatrix44::kIsIdent )
		fD3DDevice->SetTransform( D3DTS_WORLD, &d3dIdentityMatrix );
	else
	{
		IMatrix44ToD3DMatrix( mat, fView.fLocalToWorld );
		fD3DDevice->SetTransform( D3DTS_WORLD, &mat );
	}

	fView.fXformResetFlags &= ~fView.kResetL2W;
}

//// IIsViewLeftHanded ////////////////////////////////////////////////////////
//	Returns true if the combination of the local2world and world2camera
//	matrices is left-handed.

hsBool	plDXPipeline::IIsViewLeftHanded()
{
	return fView.fTransform.GetOrthogonal() ^ ( fView.fLocalToWorldLeftHanded ^ fView.fWorldToCamLeftHanded ) ? true : false;
}

//// ScreenToWorldPoint ///////////////////////////////////////////////////////
// Given a screen space pixel position, and a world space distance from the camera, return a
// full world space position. I.e. cast a ray through a screen pixel dist feet, and where
// is it.
void	plDXPipeline::ScreenToWorldPoint( int n, UInt32 stride, Int32 *scrX, Int32 *scrY, hsScalar dist, UInt32 strideOut, hsPoint3 *worldOut )
{
	while( n-- )
	{
		hsPoint3 scrP;
		scrP.Set(float(*scrX++), float(*scrY++), float(dist));
		*worldOut++ = GetViewTransform().ScreenToWorld(scrP);
	}
}

// IRefreshCullTree ////////////////////////////////////////////////////////////////////
// The cull tree captures the view frustum and any occluders in the scene into a single
// BSP tree. See plCullTree.h. It must be recomputed any time the camera moves.
void plDXPipeline::IRefreshCullTree()
{
	if( fView.fCullTreeDirty )
	{
		plProfile_BeginTiming(DrawOccBuild);

		fView.fCullTree.Reset();

		fView.fCullTree.SetViewPos(GetViewPositionWorld());

		if (fCullProxy && !IsDebugFlagSet(plPipeDbg::kFlagOcclusionSnap))
		{
			fCullProxy->GetKey()->UnRefObject();
			fCullProxy = nil;
			SetDrawableTypeMask(GetDrawableTypeMask() & ~plDrawable::kOccSnapProxy);
		}
		hsBool doCullSnap = IsDebugFlagSet(plPipeDbg::kFlagOcclusionSnap)&& !fCullProxy && !fSettings.fViewStack.GetCount();
		if( doCullSnap )
		{
			fView.fCullTree.BeginCapturePolys();
			fView.fCullTree.SetVisualizationYon(GetViewTransform().GetYon());
		}
		fView.fCullTree.InitFrustum(GetViewTransform().GetWorldToNDC());
		fView.fCullTreeDirty = false;

		if( fView.fCullMaxNodes )
		{
			int i;
			for( i = 0; i < fCullPolys.GetCount(); i++ )
			{
				fView.fCullTree.AddPoly(*fCullPolys[i]);
				if( fView.fCullTree.GetNumNodes() >= fView.fCullMaxNodes )
					break;
			}
			fCullPolys.SetCount(0);
			plProfile_Set(OccPolyUsed, i);

			for( i = 0; i < fCullHoles.GetCount(); i++ )
			{
				fView.fCullTree.AddPoly(*fCullHoles[i]);
			}
			fCullHoles.SetCount(0);
			plProfile_Set(OccNodeUsed, fView.fCullTree.GetNumNodes());
		}
		if( doCullSnap )
		{
			fView.fCullTree.EndCapturePolys();
			IMakeOcclusionSnap();
		}

		plProfile_EndTiming(DrawOccBuild);
	}
}

// IMakeOcclusionSnap /////////////////////////////////////////////////////////////////////
// Debugging visualization tool only. Takes a snapshot of the current occlusion
// BSP tree and renders it until told to stop.
void plDXPipeline::IMakeOcclusionSnap()
{
	hsTArray<hsPoint3>& pos = fView.fCullTree.GetCaptureVerts();
	hsTArray<hsVector3>& norm = fView.fCullTree.GetCaptureNorms();
	hsTArray<hsColorRGBA>& color = fView.fCullTree.GetCaptureColors();
	hsTArray<UInt16>& tris = fView.fCullTree.GetCaptureTris();

	if( tris.GetCount() )
	{
		hsMatrix44 ident;
		ident.Reset();

		hsGMaterial* mat = TRACKED_NEW hsGMaterial;
		hsgResMgr::ResMgr()->NewKey( "OcclusionSnapMat", mat, plLocation::kGlobalFixedLoc );
		plLayer *lay = mat->MakeBaseLayer();
		lay->SetZFlags(hsGMatState::kZNoZWrite);
		lay->SetPreshadeColor(hsColorRGBA().Set(1.f, 0.5f, 0.5f, 1.f));
		lay->SetRuntimeColor(hsColorRGBA().Set(1.f, 0.5f, 0.5f, 1.f));
		lay->SetAmbientColor(hsColorRGBA().Set(0,0,0,1.f));
		lay->SetOpacity(0.5f);
		lay->SetBlendFlags(lay->GetBlendFlags() | hsGMatState::kBlendAlpha);

		fCullProxy = plDrawableGenerator::GenerateDrawable(pos.GetCount(),
											pos.AcquireArray(),
											norm.AcquireArray(),
											nil,
											0,
											color.AcquireArray(),
											true,
											nil,
											tris.GetCount(),
											tris.AcquireArray(),
											mat,
											ident,
											true,
											nil,
											nil);

		if( fCullProxy )
		{
			fCullProxy->GetKey()->RefObject();
			fCullProxy->SetType(plDrawable::kOccSnapProxy);

			SetDrawableTypeMask(GetDrawableTypeMask() | plDrawable::kOccSnapProxy);

			fCullProxy->PrepForRender(this);
		}
	}
	fView.fCullTree.ReleaseCapture();
}

// SubmitOccluders /////////////////////////////////////////////////////////////
// Add the input polys into the list of polys from which to generate the cull tree.
hsBool plDXPipeline::SubmitOccluders(const hsTArray<const plCullPoly*>& polyList)
{
	fCullPolys.SetCount(0);
	fCullHoles.SetCount(0);
	int i;
	for( i = 0; i < polyList.GetCount(); i++ )
	{
		if( polyList[i]->IsHole() )
			fCullHoles.Append(polyList[i]);
		else
			fCullPolys.Append(polyList[i]);
	}
	fView.fCullTreeDirty = true;

	return true;
}

//// RefreshScreenMatrices ////////////////////////////////////////////////////
// Force a refresh of cached state when the projection matrix changes.
void	plDXPipeline::RefreshScreenMatrices()
{
	fView.fCullTreeDirty = true;
	IProjectionMatrixToD3D();
}

//// RefreshMatrices //////////////////////////////////////////////////////////
//	Just a wrapper

void	plDXPipeline::RefreshMatrices()
{
	RefreshScreenMatrices();
}


///////////////////////////////////////////////////////////////////////////////
//// Overrides ////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

//// PushOverrideMaterial /////////////////////////////////////////////////////
// Push a material to be used instead of the material associated with objects
// for rendering.
// Must be matched with a PopOverrideMaterial.
hsGMaterial	*plDXPipeline::PushOverrideMaterial( hsGMaterial *mat )
{
	hsGMaterial *ret = GetOverrideMaterial();
	hsRefCnt_SafeRef( mat );
	fOverrideMat.Push( mat );
	fForceMatHandle = true;

	return ret;
}

//// PopOverrideMaterial //////////////////////////////////////////////////////
// Stop overriding with the current override material.
// Must match a preceding PushOverrideMaterial.
void plDXPipeline::PopOverrideMaterial( hsGMaterial *restore )
{
	hsGMaterial *pop = fOverrideMat.Pop();
	hsRefCnt_SafeUnRef( pop );

	if( fCurrMaterial == pop )
	{
		fForceMatHandle = true;
	}
}

//// GetOverrideMaterial //////////////////////////////////////////////////////
// Return the current override material, or nil if there isn't any.
hsGMaterial	*plDXPipeline::GetOverrideMaterial() const
{
	return fOverrideMat.GetCount() ? fOverrideMat.Peek() : nil;
}

//// GetMaterialOverrideOn ////////////////////////////////////////////////////
// Return the current bits set to be always on for the given category (e.g. ZFlags).
UInt32	plDXPipeline::GetMaterialOverrideOn( hsGMatState::StateIdx category ) const
{
	return fMatOverOn.Value(category);
}

//// GetMaterialOverrideOff ///////////////////////////////////////////////////
// Return the current bits set to be always off for the given category (e.g. ZFlags).
UInt32	plDXPipeline::GetMaterialOverrideOff( hsGMatState::StateIdx category ) const
{
	return fMatOverOff.Value(category);
}

//// PushMaterialOverride /////////////////////////////////////////////////////
// Force material state bits on or off. If you use this, save the return value
// as input to PopMaterialOverride, to restore previous values.
hsGMatState	plDXPipeline::PushMaterialOverride( const hsGMatState& state, hsBool on )
{
	hsGMatState ret = GetMaterialOverride( on );
	if( on )
	{
		fMatOverOn |= state;
		fMatOverOff -= state;
	}
	else
	{
		fMatOverOff |= state;
		fMatOverOn -= state;
	}
	fForceMatHandle = true;
	return ret;
}

// PushMaterialOverride ///////////////////////////////////////////////////////
// Force material state bits on or off. If you use this, save the return value
// as input to PopMaterialOverride, to restore previous values.
// This version just sets for one category (e.g. Z flags).
hsGMatState	plDXPipeline::PushMaterialOverride(hsGMatState::StateIdx cat, UInt32 which, hsBool on)
{
	hsGMatState ret = GetMaterialOverride( on );
	if( on )
	{
		fMatOverOn[ cat ] |= which;
		fMatOverOff[ cat ] &= ~which;
	}
	else
	{
		fMatOverOn[ cat ] &= ~which;
		fMatOverOff[ cat ] |= which;
	}
	fForceMatHandle = true;
	return ret;
}

//// PopMaterialOverride //////////////////////////////////////////////////////
// Restore the previous settings returned from the matching PushMaterialOverride.
void plDXPipeline::PopMaterialOverride(const hsGMatState& restore, hsBool on)
{
	if( on )
	{
		fMatOverOn = restore;
		fMatOverOff.Clear( restore );
	}
	else
	{
		fMatOverOff = restore;
		fMatOverOn.Clear( restore );
	}
	fForceMatHandle = true;
}

//// GetMaterialOverride //////////////////////////////////////////////////////
// Return the current material state bits force to on or off, depending on input <on>.
const hsGMatState& plDXPipeline::GetMaterialOverride(hsBool on) const
{
	return on ? fMatOverOn : fMatOverOff;
}

//// PushColorOverride //////////////////////////////////////////////////
// Obsolete and unused.
hsColorOverride plDXPipeline::PushColorOverride(const hsColorOverride& over)
{
	hsColorOverride ret = GetColorOverride();
	PopColorOverride( over );
	return ret;
}

// PopColorOverride ////////////////////////////////////////////////////////
// Obsolete and unused.
void plDXPipeline::PopColorOverride(const hsColorOverride& restore)
{
	return;
/*
	hsColorOverride cpy = restore;
	if( !(cpy.fFlags & hsColorOverride::kModAlpha) )
		cpy.fColor.a = 1.f;
	if( !(cpy.fFlags & (hsColorOverride::kModAlpha | hsColorOverride::kModColor)) )
		fDev->SetColorNormal();
	else
		fDev->SetColorOverride(cpy.fColor, !(cpy.fFlags & hsColorOverride::kModColor));
*/
}

//// GetColorOverride /////////////////////////////////////////////////////////
// Obsolete and unused.
const hsColorOverride& plDXPipeline::GetColorOverride() const
{
	static hsColorOverride ret;
	return ret;

/*	ret.fFlags = hsColorOverride::kNone;
	if( fDev->GetDebugFlags() & hsG3DDevice::kDeviceColor )
		ret.fFlags |= hsColorOverride::kModColor;
	if( fDev->GetDebugFlags() & hsG3DDevice::kDeviceAlpha )
		ret.fFlags |= hsColorOverride::kModAlpha;

	ret.fColor = fDev->GetColorOverride();
*/
	return ret;
}

///////////////////////////////////////////////////////////////////////////////
//// Transforms ///////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

//// IMatrix44ToD3DMatrix /////////////////////////////////////////////////////
// Make a D3DXMATRIX matching the input plasma matrix. Mostly a transpose.
D3DXMATRIX&		plDXPipeline::IMatrix44ToD3DMatrix( D3DXMATRIX& dst, const hsMatrix44& src )
{
	if( src.fFlags & hsMatrix44::kIsIdent )
	{
		dst = d3dIdentityMatrix;
	}
	else
	{
		dst(0,0) = src.fMap[0][0];
		dst(1,0) = src.fMap[0][1];
		dst(2,0) = src.fMap[0][2];
		dst(3,0) = src.fMap[0][3];

		dst(0,1) = src.fMap[1][0];
		dst(1,1) = src.fMap[1][1];
		dst(2,1) = src.fMap[1][2];
		dst(3,1) = src.fMap[1][3];

		dst(0,2) = src.fMap[2][0];
		dst(1,2) = src.fMap[2][1];
		dst(2,2) = src.fMap[2][2];
		dst(3,2) = src.fMap[2][3];

		dst(0,3) = src.fMap[3][0];
		dst(1,3) = src.fMap[3][1];
		dst(2,3) = src.fMap[3][2];
		dst(3,3) = src.fMap[3][3];
	}

	return dst;
}

/////////////////////////////////////////////////////////
// IGetCameraToNDC /////////////////////////////////////////////
// Get the camera to NDC transform. This may be adjusted to create
// a Z bias towards the camera for cases where the D3D Z bias fails us.
hsMatrix44 plDXPipeline::IGetCameraToNDC()
{
	hsMatrix44 cam2ndc = GetViewTransform().GetCameraToNDC();

	if( fView.IsPerspective() )
	{
		// Want to scale down W and offset in Z without
		// changing values of x/w, y/w. This is just
		// minimal math for
		// Mproj' * p = Mscaletrans * Mproj * p
		// where Mscaletrans =
		// [ s 0 0 0 ]
		// [ 0 s 0 0 ]
		// [ 0 0 s 0 ]
		// [ 0 0 t s ]
		// Resulting matrix Mproj' is not exactly "Fog Friendly",
		// but is close enough.
		// Resulting point is [sx, sy, sz + tw, sw] and after divide
		// is [x/w, y/w, z/w + t/s, 1/sw]


		if( fSettings.fD3DCaps & kCapsWBuffer )
		{
			// W-buffering is only true w-buffering on 3dfx cards. On everything else,
			// they REALLY base it off the Z value. So we want to scale (but NOT translate)
			// the Z...
			// Note: the base value for perspLayerScale should be 0.001 for w-buffering,
			// not the normal 0.00001
			float scale = 1.f - float(fCurrRenderLayer) * fTweaks.fPerspLayerScale;

			cam2ndc.fMap[0][0] *= scale;
			cam2ndc.fMap[1][1] *= scale;
			cam2ndc.fMap[2][2] *= scale;
			cam2ndc.fMap[3][2] *= scale;
		}
		else
		{
			// Z-buffering, so do it the traditional way
			float scale = 1.f - float(fCurrRenderLayer) * fTweaks.fPerspLayerScale;
//				scale = -1.f;
			float zTrans = -scale * float(fCurrRenderLayer) * fTweaks.fPerspLayerTrans;

			cam2ndc.fMap[0][0] *= scale;
			cam2ndc.fMap[1][1] *= scale;

			cam2ndc.fMap[2][2] *= scale;
			cam2ndc.fMap[2][2] += zTrans * cam2ndc.fMap[3][2];
			cam2ndc.fMap[3][2] *= scale;
		}
	}
	else
	{
		plConst(float) kZTrans = -1.e-4f;
		cam2ndc.fMap[2][3] += kZTrans * fCurrRenderLayer;
	}

	return cam2ndc;
}

// IProjectionMatrixToD3D //////////////////////////////////////////////////////////
// Send the current camera to NDC transform to D3D.
void plDXPipeline::IProjectionMatrixToD3D()
{
	D3DXMATRIX matProjection;

	IMatrix44ToD3DMatrix( matProjection, IGetCameraToNDC() );

	fD3DDevice->SetTransform( D3DTS_PROJECTION, &matProjection );
	fView.fXformResetFlags &= ~fView.kResetProjection;
}

//// ISetCullMode /////////////////////////////////////////////////////////////
//	Tests and sets the current winding order cull mode (CW, CCW, or none).
// Will reverse the cull mode as necessary for left handed camera or local to world
// transforms.
void	plDXPipeline::ISetCullMode(hsBool flip)
{
	D3DCULL newCull = D3DCULL_NONE;

	if( !(fLayerState[0].fMiscFlags & hsGMatState::kMiscTwoSided) )
		newCull = !IIsViewLeftHanded() ^ !flip ? D3DCULL_CW : D3DCULL_CCW;

	if( newCull != fCurrCullMode )
	{
		fCurrCullMode = newCull;
		fD3DDevice->SetRenderState( D3DRS_CULLMODE, fCurrCullMode );
	}
}

//// ITransformsToD3D //////////////////////////////////////////////////////////
//	Refreshes all transforms. Useful after popping renderTargets :)

void plDXPipeline::ITransformsToD3D()
{
	hsBool resetCullMode = fView.fXformResetFlags & (fView.kResetCamera | fView.kResetL2W);

	if( fView.fXformResetFlags & fView.kResetCamera )
		IWorldToCameraToD3D();

	if( fView.fXformResetFlags & fView.kResetL2W )
		ILocalToWorldToD3D();

	if( fView.fXformResetFlags & fView.kResetProjection )
		IProjectionMatrixToD3D();
}

// ISetupVertexBufferRef /////////////////////////////////////////////////////////
// Initialize input vertex buffer ref according to source.
void plDXPipeline::ISetupVertexBufferRef(plGBufferGroup* owner, UInt32 idx, plDXVertexBufferRef* vRef)
{
	// Initialize to nil, in case something goes wrong.
	vRef->fD3DBuffer = nil;

	UInt8 format = owner->GetVertexFormat();

	// All indexed skinning is currently done on CPU, so the source data
	// will have indices, but we strip them out for the D3D buffer.
	if( format & plGBufferGroup::kSkinIndices )
	{
		format &= ~(plGBufferGroup::kSkinWeightMask | plGBufferGroup::kSkinIndices);
		format |= plGBufferGroup::kSkinNoWeights;		// Should do nothing, but just in case...
		vRef->SetSkinned(true);
		vRef->SetVolatile(true);
	}

	UInt32 vertSize = IGetBufferFormatSize(format); // vertex stride
	UInt32 numVerts = owner->GetVertBufferCount(idx);

	vRef->fDevice = fD3DDevice;

	vRef->fOwner = owner;
	vRef->fCount = numVerts;
	vRef->fVertexSize = vertSize;
	vRef->fFormat = format;
	vRef->fRefTime = 0;

	vRef->SetDirty(true);
	vRef->SetRebuiltSinceUsed(true);
	vRef->fData = nil;

	vRef->SetVolatile(vRef->Volatile() || owner->AreVertsVolatile());

	vRef->fIndex = idx;

	owner->SetVertexBufferRef(idx, vRef);
	hsRefCnt_SafeUnRef(vRef);
}

// ICheckStaticVertexBuffer ///////////////////////////////////////////////////////////////////////
// Ensure a static vertex buffer has any D3D resources necessary for rendering created and filled
// with proper vertex data.
void plDXPipeline::ICheckStaticVertexBuffer(plDXVertexBufferRef* vRef, plGBufferGroup* owner, UInt32 idx)
{
	hsAssert(!vRef->Volatile(), "Creating a managed vertex buffer for a volatile buffer ref");

	if( !vRef->fD3DBuffer )
	{
		// Okay, haven't done this one.

		DWORD fvfFormat = IGetBufferD3DFormat(vRef->fFormat);


		D3DPOOL poolType = D3DPOOL_MANAGED;
//		DWORD usage = D3DUSAGE_WRITEONLY;
		DWORD usage = 0;
		const int numVerts = vRef->fCount;
		const int vertSize = vRef->fVertexSize;
		fManagedAlloced = true;
		if( FAILED( fD3DDevice->CreateVertexBuffer( numVerts * vertSize,
													usage,
													fvfFormat,
													poolType,
													&vRef->fD3DBuffer, NULL) ) )
		{
			hsAssert( false, "CreateVertexBuffer() call failed!" );
			vRef->fD3DBuffer = nil;
			return;
		}
		PROFILE_POOL_MEM(poolType, numVerts * vertSize, true, "VtxBuff");

		// Record that we've allocated this into managed memory, in case we're
		// fighting that NVidia driver bug. Search for OSVERSION for mor info.
		AllocManagedVertex(numVerts * vertSize);

		// Fill in the vertex data.
		IFillStaticVertexBufferRef(vRef, owner, idx);

		// This is currently a no op, but this would let the buffer know it can
		// unload the system memory copy, since we have a managed version now.
		owner->PurgeVertBuffer(idx);
	}
}

// IFillStaticVertexBufferRef //////////////////////////////////////////////////
// BufferRef is set up, just copy the data in.
// This is uglied up hugely by the insane non-interleaved data case with cells
// and whatever else.
void plDXPipeline::IFillStaticVertexBufferRef(plDXVertexBufferRef *ref, plGBufferGroup *group, UInt32 idx)
{
	IDirect3DVertexBuffer9* vertexBuff = ref->fD3DBuffer;

	if( !vertexBuff )
	{
		// We most likely already warned about this earlier, best to just quietly return now
		return;
	}

	const UInt32 vertSize = ref->fVertexSize;
	const UInt32 vertStart = group->GetVertBufferStart(idx) * vertSize;
	const UInt32 size = group->GetVertBufferEnd(idx) * vertSize - vertStart;
	if( !size )
		return;

	/// Lock the buffer
	UInt8* ptr;
	if( FAILED( vertexBuff->Lock( vertStart, size, (void **)&ptr, group->AreVertsVolatile() ? D3DLOCK_DISCARD : 0 ) ) )
	{
		hsAssert( false, "Failed to lock vertex buffer for writing" );
	}

	if( ref->fData )
	{
		memcpy(ptr, ref->fData + vertStart, size);
	}
	else
	{
		hsAssert(0 == vertStart, "Offsets on non-interleaved data not supported");
		hsAssert(group->GetVertBufferCount(idx) * vertSize == size, "Trailing dead space on non-interleaved data not supported");

		const UInt32 vertSmallSize = group->GetVertexLiteStride() - sizeof( hsPoint3 ) * 2;
		UInt8* srcVPtr = group->GetVertBufferData(idx);
		plGBufferColor* const srcCPtr = group->GetColorBufferData( idx );

		const int numCells = group->GetNumCells(idx);
		int i;
		for( i = 0; i < numCells; i++ )
		{
			plGBufferCell	*cell = group->GetCell( idx, i );

			if( cell->fColorStart == (UInt32)-1 )
			{
				/// Interleaved, do straight copy
				memcpy( ptr, srcVPtr + cell->fVtxStart, cell->fLength * vertSize );
				ptr += cell->fLength * vertSize;
			}
			else
			{
				/// Separated, gotta interleave
				UInt8* tempVPtr = srcVPtr + cell->fVtxStart;
				plGBufferColor* tempCPtr = srcCPtr + cell->fColorStart;
				int j;
				for( j = 0; j < cell->fLength; j++ )
				{
					memcpy( ptr, tempVPtr, sizeof( hsPoint3 ) * 2 );
					ptr += sizeof( hsPoint3 ) * 2;
					tempVPtr += sizeof( hsPoint3 ) * 2;

					memcpy( ptr, &tempCPtr->fDiffuse, sizeof( UInt32 ) );
					ptr += sizeof( UInt32 );
					memcpy( ptr, &tempCPtr->fSpecular, sizeof( UInt32 ) );
					ptr += sizeof( UInt32 );

					memcpy( ptr, tempVPtr, vertSmallSize );
					ptr += vertSmallSize;
					tempVPtr += vertSmallSize;
					tempCPtr++;
				}
			}
		}
	}

	/// Unlock and clean up
	vertexBuff->Unlock();
	ref->SetRebuiltSinceUsed(true);
	ref->SetDirty(false);
}

// OpenAccess ////////////////////////////////////////////////////////////////////////////////////////
// Lock the managed buffer and setup the accessSpan to point into the buffers data.
hsBool plDXPipeline::OpenAccess(plAccessSpan& dst, plDrawableSpans* drawable, const plVertexSpan* span, hsBool readOnly)
{
	plGBufferGroup* grp = drawable->GetBufferGroup(span->fGroupIdx);
	hsAssert(!grp->AreVertsVolatile(), "Don't ask for D3DBuffer data on a volatile buffer");

	plDXVertexBufferRef* vRef = (plDXVertexBufferRef*)grp->GetVertexBufferRef(span->fVBufferIdx);
	if( !vRef )
	{
		dst.SetType(plAccessSpan::kUndefined);
		return false;
	}

	IDirect3DVertexBuffer9* vertexBuff = vRef->fD3DBuffer;
	if( !vertexBuff )
	{
		dst.SetType(plAccessSpan::kUndefined);
		return false;
	}

	const UInt32 stride = vRef->fVertexSize;
	const UInt32 vertStart = span->fVStartIdx * stride;
	const UInt32 size = span->fVLength * stride;

	if( !size )
	{
		dst.SetType(plAccessSpan::kUndefined);
		return false;
	}

	DWORD lockFlags = readOnly ? D3DLOCK_READONLY : 0;

	UInt8* ptr;
	if( FAILED( vertexBuff->Lock(vertStart, size, (void **)&ptr, lockFlags) ) )
	{
		hsAssert( false, "Failed to lock vertex buffer for writing" );
		dst.SetType(plAccessSpan::kUndefined);
		return false;
	}

	plAccessVtxSpan& acc = dst.AccessVtx();

	acc.SetVertCount((UInt16)(span->fVLength));

	Int32 offset = (-(Int32)(span->fVStartIdx)) * ((Int32)stride);

	acc.PositionStream(ptr, (UInt16)stride, offset);
	ptr += sizeof(hsPoint3);

	int numWgts = grp->GetNumWeights();
	if( numWgts )
	{
		acc.SetNumWeights(numWgts);
		acc.WeightStream(ptr, (UInt16)stride, offset);
		ptr += numWgts * sizeof(hsScalar);
		if( grp->GetVertexFormat() & plGBufferGroup::kSkinIndices )
		{
			acc.WgtIndexStream(ptr, (UInt16)stride, offset);
			ptr += sizeof(UInt32);
		}
		else
		{
			acc.WgtIndexStream(nil, 0, offset);
		}
	}
	else
	{
		acc.SetNumWeights(0);
	}

	acc.NormalStream(ptr, (UInt16)stride, offset);
	ptr += sizeof(hsVector3);

	acc.DiffuseStream(ptr, (UInt16)stride, offset);
	ptr += sizeof(UInt32);

	acc.SpecularStream(ptr, (UInt16)stride, offset);
	ptr += sizeof(UInt32);

	acc.UVWStream(ptr, (UInt16)stride, offset);

	acc.SetNumUVWs(grp->GetNumUVs());

	acc.SetVtxDeviceRef(vRef);

	return true;
}

// CloseAccess /////////////////////////////////////////////////////////////////////
// Unlock the buffer, invalidating the accessSpan.
hsBool plDXPipeline::CloseAccess(plAccessSpan& dst)
{
	if( !dst.HasAccessVtx() )
		return false;

	plAccessVtxSpan& acc = dst.AccessVtx();

	plDXVertexBufferRef* vRef = (plDXVertexBufferRef*)acc.GetVtxDeviceRef();
	if( !vRef )
		return false;

	IDirect3DVertexBuffer9* vertexBuff = vRef->fD3DBuffer;
	if( !vertexBuff )
		return false;

	vertexBuff->Unlock();

	return true;
}

// CheckVertexBufferRef /////////////////////////////////////////////////////
// Make sure the buffer group has a valid buffer ref and that it is up to date.
void plDXPipeline::CheckVertexBufferRef(plGBufferGroup* owner, UInt32 idx)
{
	// First, do we have a device ref at this index?
	plDXVertexBufferRef* vRef = (plDXVertexBufferRef*)owner->GetVertexBufferRef(idx);
	// If not
	if( !vRef )
	{
		// Make the blank ref
		vRef = TRACKED_NEW plDXVertexBufferRef;

		ISetupVertexBufferRef(owner, idx, vRef);

	}
	if( !vRef->IsLinked() )
		vRef->Link( &fVtxBuffRefList );

	// One way or another, we now have a vbufferref[idx] in owner.
	// Now, does it need to be (re)filled?
	// If the owner is volatile, then we hold off. It might not
	// be visible, and we might need to refill it again if we
	// have an overrun of our dynamic D3D buffer.
	if( !vRef->Volatile() )
	{
		if( fAllocUnManaged )
			return;

		// If it's a static buffer, allocate a D3D vertex buffer for it. Otherwise, it'll
		// be sharing the global D3D dynamic buffer, and marked as volatile.
		ICheckStaticVertexBuffer(vRef, owner, idx);

		// Might want to remove this assert, and replace it with a dirty check if
		// we have static buffers that change very seldom rather than never.
		hsAssert(!vRef->IsDirty(), "Non-volatile vertex buffers should never get dirty");
	}
	else
	{
		// Make sure we're going to be ready to fill it.

		if( !vRef->fData && (vRef->fFormat != owner->GetVertexFormat()) )
		{
			vRef->fData = TRACKED_NEW UInt8[vRef->fCount * vRef->fVertexSize];
		}
	}
}

// CheckIndexBufferRef /////////////////////////////////////////////////////
// Make sure the buffer group has an index buffer ref and that its data is current.
void plDXPipeline::CheckIndexBufferRef(plGBufferGroup* owner, UInt32 idx)
{
	plDXIndexBufferRef* iRef = (plDXIndexBufferRef*)owner->GetIndexBufferRef(idx);
	if( !iRef )
	{
		// Create one from scratch.

		iRef = TRACKED_NEW plDXIndexBufferRef;

		ISetupIndexBufferRef(owner, idx, iRef);

	}
	if( !iRef->IsLinked() )
		iRef->Link(&fIdxBuffRefList);

	// Make sure it has all D3D resources created.
	ICheckIndexBuffer(iRef);

	// If it's dirty, refill it.
	if( iRef->IsDirty()  )
		IFillIndexBufferRef(iRef, owner, idx);
}

// IFillIndexBufferRef ////////////////////////////////////////////////////////////
// Refresh the D3D index buffer from the plasma index buffer.
void plDXPipeline::IFillIndexBufferRef(plDXIndexBufferRef* iRef, plGBufferGroup* owner, UInt32 idx)
{
	UInt32 startIdx = owner->GetIndexBufferStart(idx);
	UInt32 size = (owner->GetIndexBufferEnd(idx) - startIdx) * sizeof(UInt16);
	if( !size )
		return;

	DWORD lockFlags = iRef->Volatile() ? D3DLOCK_DISCARD : 0;
	UInt16* destPtr = nil;
	if( FAILED( iRef->fD3DBuffer->Lock(startIdx * sizeof(UInt16), size, (void **)&destPtr, lockFlags) ) )
	{
		hsAssert( false, "Cannot lock index buffer for writing" );
		return;
	}

	memcpy( destPtr, owner->GetIndexBufferData(idx) + startIdx, size );

	iRef->fD3DBuffer->Unlock();

	iRef->SetDirty( false );

}

// ICheckIndexBuffer ////////////////////////////////////////////////////////
// Make sure index buffer ref has any D3D resources it needs.
void plDXPipeline::ICheckIndexBuffer(plDXIndexBufferRef* iRef)
{
	if( !iRef->fD3DBuffer && iRef->fCount )
	{
		D3DPOOL poolType = fAllocUnManaged ? D3DPOOL_DEFAULT : D3DPOOL_MANAGED;
		DWORD usage = D3DUSAGE_WRITEONLY;
		iRef->SetVolatile(false);
		if( FAILED( fD3DDevice->CreateIndexBuffer( sizeof( UInt16 ) * iRef->fCount,
													usage,
													D3DFMT_INDEX16,
													poolType,
													&iRef->fD3DBuffer, NULL) ) )
		{
			hsAssert( false, "CreateIndexBuffer() call failed!" );
			iRef->fD3DBuffer = nil;
			return;
		}
		PROFILE_POOL_MEM(poolType, sizeof(UInt16) * iRef->fCount, true, "IndexBuff");

		iRef->fPoolType = poolType;
		iRef->SetDirty(true);
		iRef->SetRebuiltSinceUsed(true);
	}
}

// ISetupIndexBufferRef ////////////////////////////////////////////////////////////////
// Initialize the index buffer ref, but don't create anything for it.
void plDXPipeline::ISetupIndexBufferRef(plGBufferGroup* owner, UInt32 idx, plDXIndexBufferRef* iRef)
{
	UInt32 numIndices = owner->GetIndexBufferCount(idx);
	iRef->fCount = numIndices;
	iRef->fOwner = owner;
	iRef->fIndex = idx;
	iRef->fRefTime = 0;

	iRef->SetDirty(true);
	iRef->SetRebuiltSinceUsed(true);

	owner->SetIndexBufferRef(idx, iRef);
	hsRefCnt_SafeUnRef(iRef);

	iRef->SetVolatile(owner->AreIdxVolatile());
}

//// ISoftwareVertexBlend ///////////////////////////////////////////////////////
// Emulate matrix palette operations in software. The big difference between the hardware
// and software versions is we only want to lock the vertex buffer once and blend all the
// verts we're going to in software, so the vertex blend happens once for an entire drawable.
// In hardware, we want the opposite, to break it into managable chunks, manageable meaning
// few enough matrices to fit into hardware registers. So for hardware version, we set up
// our palette, draw a span or few, setup our matrix palette with new matrices, draw, repeat.
hsBool		plDXPipeline::ISoftwareVertexBlend( plDrawableSpans* drawable, const hsTArray<Int16>& visList )
{
	if (IsDebugFlagSet(plPipeDbg::kFlagNoSkinning))
		return true;

	if( drawable->GetSkinTime() == fRenderCnt )
		return true;

	const hsBitVector	&blendBits = drawable->GetBlendingSpanVector();

	if( drawable->GetBlendingSpanVector().Empty() )
	{
		// This sucker doesn't have any skinning spans anyway. Just return
		drawable->SetSkinTime( fRenderCnt );
		return true;
	}

	plProfile_BeginTiming(Skin);

	// lock the data buffer

	// First, figure out which buffers we need to blend.
	const int kMaxBufferGroups = 20;
	const int kMaxVertexBuffers = 20;
	static char blendBuffers[kMaxBufferGroups][kMaxVertexBuffers];
	memset(blendBuffers, 0, kMaxBufferGroups * kMaxVertexBuffers * sizeof(**blendBuffers));

	hsAssert(kMaxBufferGroups >= drawable->GetNumBufferGroups(), "Bigger than we counted on num groups skin.");

	const hsTArray<plSpan *>& spans = drawable->GetSpanArray();
	int i;
	for( i = 0; i < visList.GetCount(); i++ )
	{
		if( blendBits.IsBitSet( visList[ i ] ) )
		{
			const plVertexSpan &vSpan = *(plVertexSpan *)spans[visList[i]];
			hsAssert(kMaxVertexBuffers > vSpan.fVBufferIdx, "Bigger than we counted on num buffers skin.");

			blendBuffers[vSpan.fGroupIdx][vSpan.fVBufferIdx] = 1;
			drawable->SetBlendingSpanVectorBit( visList[ i ], false );
		}
	}

	// Now go through each of the group/buffer (= a real vertex buffer) pairs we found,
	// and blend into it. We'll lock the buffer once, and then for each span that
	// uses it, set the matrix palette and and then do the blend for that span.
	// When we've done all the spans for a group/buffer, we unlock it and move on.
	int j;
	for( i = 0; i < kMaxBufferGroups; i++ )
	{
		for( j = 0; j < kMaxVertexBuffers; j++ )
		{
			if( blendBuffers[i][j] )
			{
				// Found one. Do the lock.
				plDXVertexBufferRef* vRef = (plDXVertexBufferRef*)drawable->GetVertexRef(i, j);

				hsAssert(vRef->fData, "Going into skinning with no place to put results!");

				UInt8*	destPtr = vRef->fData;

				int k;
				for( k = 0; k < visList.GetCount(); k++ )
				{
					const plIcicle& span = *(plIcicle*)spans[visList[k]];
					if( (span.fGroupIdx == i)&&(span.fVBufferIdx == j) )
					{
						plProfile_Inc(NumSkin);

						hsMatrix44* matrixPalette = drawable->GetMatrixPalette(span.fBaseMatrix);
						matrixPalette[0] = span.fLocalToWorld;

						UInt8* ptr = vRef->fOwner->GetVertBufferData(vRef->fIndex);
						ptr += span.fVStartIdx * vRef->fOwner->GetVertexSize();
						IBlendVertsIntoBuffer( (plSpan*)&span,
												matrixPalette, span.fNumMatrices,
												ptr,
												vRef->fOwner->GetVertexFormat(),
												vRef->fOwner->GetVertexSize(),
												destPtr + span.fVStartIdx * vRef->fVertexSize,
												vRef->fVertexSize,
												span.fVLength,
												span.fLocalUVWChans );
						vRef->SetDirty(true);
					}
				}
				// Unlock and move on.
			}
		}
	}

	plProfile_EndTiming(Skin);

	if( drawable->GetBlendingSpanVector().Empty() )
	{
		// Only do this if we've blended ALL of the spans. Thus, this becomes a trivial
		// rejection for all the skinning flags being cleared
		drawable->SetSkinTime(fRenderCnt);
	}

	return true;
}

// IBeginAllocUnmanaged ///////////////////////////////////////////////////////////////////
// Before allocating anything into POOL_DEFAULT, we must evict managed memory.
// See LoadResources.
void plDXPipeline::IBeginAllocUnManaged()
{
	// Flush out all managed resources to make room for unmanaged resources.
	fD3DDevice->EvictManagedResources();
	fEvictTime = fTextUseTime;
	fManagedSeen = 0;

	fManagedAlloced = false;
	fAllocUnManaged = true; // we're currently only allocating POOL_DEFAULT
}

// IEndAllocUnManged.
// Before allocating anything into POOL_DEFAULT, we must evict managed memory.
// See LoadResources.
void plDXPipeline::IEndAllocUnManaged()
{
	fAllocUnManaged = false;

	// Flush the (should be empty) resource manager to reset its internal allocation pool.
	fD3DDevice->EvictManagedResources();
	fEvictTime = fTextUseTime;
	fManagedSeen = 0;
}

// ICheckTextureUsage ////////////////////////////////////////////////////////////////////
// Obsolete, unused.
// Deletes textures LRU to try to get around NVidia memory manager bug. Found a
// more robust/efficent way. Besides, it didn't help. See OSVERSION.
void plDXPipeline::ICheckTextureUsage()
{
	plProfile_IncCount(fTexUsed, fTexUsed);
	plProfile_IncCount(fTexManaged, fTexManaged);

	plConst(UInt32) kMinTexManaged(5000000);
	if( fTexManaged < kMinTexManaged )
		return;

	plConst(UInt32) kScale(2);
	if( fTexUsed * kScale < fTexManaged )
	{
		// Find the stalest
		UInt32 stalest = fTextUseTime;
		plDXTextureRef* ref = fTextureRefList;
		while( ref )
		{
			// I don't know if render targets even get put in this list.
			if( !(ref->GetFlags() & plDXTextureRef::kRenderTarget) && (ref->fUseTime < stalest) )
				stalest = ref->fUseTime;
			ref = ref->GetNext();
		}
		stalest = fTextUseTime - stalest;

		// If the stalest is fresh, live with thrashing
		plConst(UInt32) kMinAge(60);
		if( stalest < kMinAge )
			return;

		// Kill the stalest, and everything more than half as stale
		stalest /= 2;
		if( stalest < kMinAge )
			stalest = kMinAge;

		stalest = fTextUseTime - stalest;

		// Go through again slaughtering left and right
		ref = fTextureRefList;
		while( ref )
		{
			if( !(ref->GetFlags() & plDXTextureRef::kRenderTarget) && (ref->fUseTime < stalest) )
			{
				plDXTextureRef* nuke = ref;
				ref = ref->GetNext();
				nuke->Release();
				nuke->Unlink();
			}
			else
			{
				ref = ref->GetNext();
			}
		}
	}
}

// ICheckVtxUsage ////////////////////////////////////////////////////////////////////
// Obsolete, unused.
// Deletes textures LRU to try to get around NVidia memory manager bug. Found a
// more robust/efficent way. Besides, it didn't help. See OSVERSION.
void plDXPipeline::ICheckVtxUsage()
{
	plProfile_IncCount(fVtxUsed, fVtxUsed);
	plProfile_IncCount(fVtxManaged, fVtxManaged);

	plConst(UInt32) kMinVtxManaged(5000000);
	if( fVtxManaged < kMinVtxManaged )
		return;

	plConst(UInt32) kScale(2);
	if( fVtxUsed * kScale < fVtxManaged )
	{
		// Find the stalest
		UInt32 stalest = fTextUseTime;
		plDXVertexBufferRef* ref = fVtxBuffRefList;
		while( ref )
		{
			if( !ref->Volatile() && (ref->fUseTime < stalest) )
				stalest = ref->fUseTime;
			ref = ref->GetNext();
		}
		stalest = fTextUseTime - stalest;

		// If the stalest is fresh, live with thrashing
		plConst(UInt32) kMinAge(60);
		if( stalest < kMinAge )
			return;

		// Kill the stalest, and everything more than half as stale
		stalest /= 2;
		if( stalest < kMinAge )
			stalest = kMinAge;

		stalest = fTextUseTime - stalest;

		// Go through again slaughtering left and right
		ref = fVtxBuffRefList;
		while( ref )
		{
			if( !ref->Volatile() && (ref->fUseTime < stalest) )
			{
				plDXVertexBufferRef* nuke = ref;
				ref = ref->GetNext();
				nuke->Release();
				nuke->Unlink();
			}
			else
			{
				ref = ref->GetNext();
			}
		}
	}
}

hsBool plDXPipeline::CheckResources()
{
	if ((fClothingOutfits.GetCount() <= 1 && fAvRTPool.GetCount() > 1) ||
		(fAvRTPool.GetCount() >= 16 && (fAvRTPool.GetCount() / 2 >= fClothingOutfits.GetCount())))
	{
		return (hsTimer::GetSysSeconds() - fAvRTShrinkValidSince > kAvTexPoolShrinkThresh);
	}

	fAvRTShrinkValidSince = hsTimer::GetSysSeconds();
	return (fAvRTPool.GetCount() < fClothingOutfits.GetCount());
}

// LoadResources ///////////////////////////////////////////////////////////////////////
// Basically, we have to load anything that goes into POOL_DEFAULT before
// anything into POOL_MANAGED, or the memory manager gets confused.
// More precisely, we have to evict everything from POOL_MANAGED before we
// can allocate anything into POOL_DEFAULT.
// So, this function frees up everything in POOL_DEFAULT, evicts managed memory,
// calls out for anything needing to be created POOL_DEFAULT to do so,
// Then we're free to load into POOL_MANAGED on demand.
// This is typically called at the beginning of the first render after loading
// a new age.
void plDXPipeline::LoadResources()
{
	hsStatusMessageF("Begin Device Reload t=%f",hsTimer::GetSeconds());
	plNetClientApp::StaticDebugMsg("Begin Device Reload");

	// Just to be safe.
	IInitDeviceState(); // 9700 THRASH

	// Evict mananged memory.
	IBeginAllocUnManaged();

	// Release everything we have in POOL_DEFAULT.
	IReleaseDynamicBuffers();
	IReleaseAvRTPool();

	// Create all RenderTargets
	plPipeRTMakeMsg* rtMake = TRACKED_NEW plPipeRTMakeMsg(this);
	rtMake->Send();

	// Create all our shadow render targets and pipeline specific POOL_DEFAULT vertex buffers.
	// This includes our single dynamic vertex buffer that we cycle through for software
	// skinned, particle systems, etc.
	ICreateDynamicBuffers();

	// Create all POOL_DEFAULT (sorted) index buffers in the scene.
	plPipeGeoMakeMsg* defMake = TRACKED_NEW plPipeGeoMakeMsg(this, true);
	defMake->Send();

	// This can be a bit of a mem hog and will use more mem if available, so keep it last in the
	// POOL_DEFAULT allocs.
	IFillAvRTPool();

	// We should have everything POOL_DEFAULT we need now.
	IEndAllocUnManaged();

	// Force a create of all our static D3D vertex buffers.
#define MF_PRELOAD_MANAGEDBUFFERS
#ifdef MF_PRELOAD_MANAGEDBUFFERS
	plPipeGeoMakeMsg* manMake = TRACKED_NEW plPipeGeoMakeMsg(this, false);
	manMake->Send();
#endif // MF_PRELOAD_MANAGEDBUFFERS

	// Forcing a preload of textures turned out to not be so great,
	// since there are typically so many in an age, it swamped out
	// VM.
#ifdef MF_TOSSER
#define MF_PRELOAD_TEXTURES
#endif // MF_TOSSER
#ifdef MF_PRELOAD_TEXTURES
	plPipeTexMakeMsg* texMake = TRACKED_NEW plPipeTexMakeMsg(this);
	texMake->Send();
#endif // MF_PRELOAD_TEXTURES

	fD3DDevice->EvictManagedResources();

	// Okay, we've done it, clear the request.
	plPipeResReq::Clear();

	plProfile_IncCount(PipeReload, 1);

	hsStatusMessageF("End Device Reload t=%f",hsTimer::GetSeconds());
	plNetClientApp::StaticDebugMsg("End Device Reload");
}

// Sorry about this, but it really did speed up the skinning.
// Just some macros for the inner loop of IBlendVertsIntoBuffer.
#define MATRIXMULTBEGIN(xfm, wgt) \
	register float m00 = xfm.fMap[0][0]; \
	register float m01 = xfm.fMap[0][1]; \
	register float m02 = xfm.fMap[0][2]; \
	register float m03 = xfm.fMap[0][3]; \
	register float m10 = xfm.fMap[1][0]; \
	register float m11 = xfm.fMap[1][1]; \
	register float m12 = xfm.fMap[1][2]; \
	register float m13 = xfm.fMap[1][3]; \
	register float m20 = xfm.fMap[2][0]; \
	register float m21 = xfm.fMap[2][1]; \
	register float m22 = xfm.fMap[2][2]; \
	register float m23 = xfm.fMap[2][3]; \
	register float m_wgt = wgt; \
	register float srcX, srcY, srcZ;

#define MATRIXMULTPOINTADD(dst, src) \
	srcX = src.fX; \
	srcY = src.fY; \
	srcZ = src.fZ; \
	\
	dst.fX += (srcX * m00 + srcY * m01 + srcZ * m02 + m03) * m_wgt; \
	dst.fY += (srcX * m10 + srcY * m11 + srcZ * m12 + m13) * m_wgt; \
	dst.fZ += (srcX * m20 + srcY * m21 + srcZ * m22 + m23) * m_wgt;

#define MATRIXMULTVECTORADD(dst, src) \
	srcX = src.fX; \
	srcY = src.fY; \
	srcZ = src.fZ; \
	\
	dst.fX += (srcX * m00 + srcY * m01 + srcZ * m02) * m_wgt; \
	dst.fY += (srcX * m10 + srcY * m11 + srcZ * m12) * m_wgt; \
	dst.fZ += (srcX * m20 + srcY * m21 + srcZ * m22) * m_wgt;

// inlTESTPOINT /////////////////////////////////////////
// Update mins and maxs if destP is outside.
inline void inlTESTPOINT(const hsPoint3& destP,
						 hsScalar& minX, hsScalar& minY, hsScalar& minZ,
						 hsScalar& maxX, hsScalar& maxY, hsScalar& maxZ)
{
	if( destP.fX < minX )
		minX = destP.fX;
	else if( destP.fX > maxX )
		maxX = destP.fX;

	if( destP.fY < minY )
		minY = destP.fY;
	else if( destP.fY > maxY )
		maxY = destP.fY;

	if( destP.fZ < minZ )
		minZ = destP.fZ;
	else if( destP.fZ > maxZ )
		maxZ = destP.fZ;
}

//// IBlendVertsIntoBuffer ////////////////////////////////////////////////////
//	Given a pointer into a buffer of verts that have blending data in the D3D
//	format, blends them into the destination buffer given without the blending
//	info.

void	plDXPipeline::IBlendVertsIntoBuffer( plSpan* span,
											  hsMatrix44* matrixPalette, int numMatrices,
											  const UInt8 *src, UInt8 format, UInt32 srcStride,
											  UInt8 *dest, UInt32 destStride, UInt32 count,
											  UInt16 localUVWChans )
{
	UInt8		numUVs, numWeights;
	UInt32		i, j, indices, color, specColor, uvChanSize;
	float		weights[ 4 ], weightSum;
	hsPoint3	pt, tempPt, destPt;
	hsVector3	vec, tempNorm, destNorm;


	/// Get some counts
	switch( format & plGBufferGroup::kSkinWeightMask )
	{
		case plGBufferGroup::kSkin1Weight:  numWeights = 1; break;
		case plGBufferGroup::kSkin2Weights: numWeights = 2; break;
		case plGBufferGroup::kSkin3Weights: numWeights = 3; break;
		default: hsAssert( false, "Invalid weight count in IBlendVertsIntoBuffer()" );
	}

	numUVs = plGBufferGroup::CalcNumUVs( format );
	uvChanSize = numUVs * sizeof( float ) * 3;

//#define MF_RECALC_BOUNDS
#ifdef MF_RECALC_BOUNDS
	hsScalar minX = 1.e33f;
	hsScalar minY = 1.e33f;
	hsScalar minZ = 1.e33f;

	hsScalar maxX = -1.e33f;
	hsScalar maxY = -1.e33f;
	hsScalar maxZ = -1.e33f;
#endif // MF_RECALC_BOUNDS

	// localUVWChans is bump mapping tangent space vectors, which need to
	// be skinned like the normal, as opposed to passed through like
	// garden variety UVW coordinates.
	// There are no localUVWChans that I know of in production assets (i.e.
	// the avatar is not skinned).
	if( !localUVWChans )
	{
		/// Copy whilst blending
		for( i = 0; i < count; i++ )
		{
			// Extract data
			src = inlExtractPoint( src, pt );
			for( j = 0, weightSum = 0; j < numWeights; j++ )
			{
				src = inlExtractFloat( src, weights[ j ] );
				weightSum += weights[ j ];
			}
			weights[ j ] = 1 - weightSum;

			if( format & plGBufferGroup::kSkinIndices )
			{
				src = inlExtractUInt32( src, indices );
			}
			else
			{
				indices = 1 << 8;
			}
			src = inlExtractPoint( src, vec );
			src = inlExtractUInt32( src, color );
			src = inlExtractUInt32( src, specColor );

			// Blend
			destPt.Set( 0, 0, 0 );
			destNorm.Set( 0, 0, 0 );
			for( j = 0; j < numWeights + 1; j++ )
			{
				if( weights[ j ] )
				{
					MATRIXMULTBEGIN(matrixPalette[indices & 0xff], weights[j]);

					MATRIXMULTPOINTADD(destPt, pt);
					MATRIXMULTVECTORADD(destNorm, vec);
				}

				indices >>= 8;
			}
			// Probably don't really need to renormalize this. There errors are
			// going to be subtle and "smooth".
//			hsFastMath::NormalizeAppr(destNorm);

#ifdef MF_RECALC_BOUNDS
			inlTESTPOINT(destPt, minX, minY, minZ, maxX, maxY, maxZ);
#endif // MF_RECALC_BOUNDS

			// Slam data into position now
			dest = inlStuffPoint( dest, destPt );
			dest = inlStuffPoint( dest, destNorm );
			dest = inlStuffUInt32( dest, color );
			dest = inlStuffUInt32( dest, specColor );
			memcpy( dest, src, uvChanSize );
			src += uvChanSize;
			dest += uvChanSize;
		}
	}
	else
	{
		UInt8 hiChan = localUVWChans >> 8;
		UInt8 loChan = localUVWChans & 0xff;
		/// Copy whilst blending
		for( i = 0; i < count; i++ )
		{
			hsVector3 srcUVWs[plGeometrySpan::kMaxNumUVChannels];
			hsVector3 dstUVWs[plGeometrySpan::kMaxNumUVChannels];

			// Extract data
			src = inlExtractPoint( src, pt );
			for( j = 0, weightSum = 0; j < numWeights; j++ )
			{
				src = inlExtractFloat( src, weights[ j ] );
				weightSum += weights[ j ];
			}
			weights[ j ] = 1 - weightSum;

			if( format & plGBufferGroup::kSkinIndices )
			{
				src = inlExtractUInt32( src, indices );
			}
			else
			{
				indices = 1 << 8;
			}

			src = inlExtractPoint( src, vec );
			src = inlExtractUInt32( src, color );
			src = inlExtractUInt32( src, specColor );

			UInt8 k;
			for( k = 0; k < numUVs; k++ )
			{
				src = inlExtractPoint( src, srcUVWs[k] );
			}
			memcpy( dstUVWs, srcUVWs, uvChanSize);
			dstUVWs[loChan].Set(0,0,0);
			dstUVWs[hiChan].Set(0,0,0);

			// Blend
			destPt.Set( 0, 0, 0 );
			destNorm.Set( 0, 0, 0 );
			for( j = 0; j < numWeights + 1; j++ )
			{
				if( weights[ j ] )
				{
					MATRIXMULTBEGIN(matrixPalette[indices & 0xff], weights[j]);

					MATRIXMULTPOINTADD(destPt, pt);
					MATRIXMULTVECTORADD(destNorm, vec);
					MATRIXMULTVECTORADD(dstUVWs[loChan], srcUVWs[loChan]);
					MATRIXMULTVECTORADD(dstUVWs[hiChan], srcUVWs[hiChan]);
				}

				indices >>= 8;
			}
			// Probably don't really need to renormalize this. There errors are
			// going to be subtle and "smooth".
//			hsFastMath::NormalizeAppr(destNorm);
//			hsFastMath::NormalizeAppr(dstUVWs[loChan]);
//			hsFastMath::NormalizeAppr(dstUVWs[hiChan]);

#ifdef MF_RECALC_BOUNDS
			inlTESTPOINT(destPt, minX, minY, minZ, maxX, maxY, maxZ);
#endif // MF_RECALC_BOUNDS

			// Slam data into position now
			dest = inlStuffPoint( dest, destPt );
			dest = inlStuffPoint( dest, destNorm );
			dest = inlStuffUInt32( dest, color );
			dest = inlStuffUInt32( dest, specColor );
			memcpy( dest, dstUVWs, uvChanSize );
			dest += uvChanSize;
		}
	}
#ifdef MF_RECALC_BOUNDS
	hsBounds3Ext wBnd;
	wBnd.Reset(&hsPoint3(minX, minY, minZ));
	wBnd.Union(&hsPoint3(maxX, maxY, maxZ));
	span->fWorldBounds = wBnd;
#endif // MF_RECALC_BOUNDS
}

// ISetPipeConsts //////////////////////////////////////////////////////////////////
// A shader can request that the pipeline fill in certain constants that are indeterminate
// until the pipeline is about to render the object the shader is applied to. For example,
// the object's local to world. A single shader may be used on multiple objects with
// multiple local to world transforms. This ensures the pipeline will shove the proper
// local to world into the shader immediately before the render.
// See plShader.h for the list of available pipe constants.
// Note that the lighting pipe constants are NOT implemented.
void plDXPipeline::ISetPipeConsts(plShader* shader)
{
	int n = shader->GetNumPipeConsts();
	int i;
	for( i = 0; i < n; i++ )
	{
		const plPipeConst& pc = shader->GetPipeConst(i);
		switch( pc.fType )
		{
		case plPipeConst::kFogSet:
			{
				float set[4];
				IGetVSFogSet(set);
				shader->SetFloat4(pc.fReg, set);
			}
			break;
		case plPipeConst::kLayAmbient:
			{
				hsColorRGBA col = fCurrLay->GetAmbientColor();
				shader->SetColor(pc.fReg, col);
			}
			break;
		case plPipeConst::kLayRuntime:
			{
				hsColorRGBA col = fCurrLay->GetRuntimeColor();
				col.a = fCurrLay->GetOpacity();
				shader->SetColor(pc.fReg, col);
			}
			break;
		case plPipeConst::kLaySpecular:
			{
				hsColorRGBA col = fCurrLay->GetSpecularColor();
				shader->SetColor(pc.fReg, col);
			}
			break;
		case plPipeConst::kTex3x4_0:
		case plPipeConst::kTex3x4_1:
		case plPipeConst::kTex3x4_2:
		case plPipeConst::kTex3x4_3:
		case plPipeConst::kTex3x4_4:
		case plPipeConst::kTex3x4_5:
		case plPipeConst::kTex3x4_6:
		case plPipeConst::kTex3x4_7:
			{
				int stage = pc.fType - plPipeConst::kTex3x4_0;

				if( stage > fCurrNumLayers )
				{
					// Ooops. This is bad, means the shader is expecting more layers than
					// we actually have (or is just bogus). Assert and quietly continue.
					hsAssert(false, "Shader asking for higher stage transform than we have");
					continue;
				}
				const hsMatrix44& xfm = fCurrMaterial->GetLayer(fCurrLayerIdx + stage)->GetTransform();

				shader->SetMatrix34(pc.fReg, xfm);
			}
			break;
		case plPipeConst::kTex2x4_0:
		case plPipeConst::kTex2x4_1:
		case plPipeConst::kTex2x4_2:
		case plPipeConst::kTex2x4_3:
		case plPipeConst::kTex2x4_4:
		case plPipeConst::kTex2x4_5:
		case plPipeConst::kTex2x4_6:
		case plPipeConst::kTex2x4_7:
			{
				int stage = pc.fType - plPipeConst::kTex2x4_0;

				if( stage > fCurrNumLayers )
				{
					// Ooops. This is bad, means the shader is expecting more layers than
					// we actually have (or is just bogus). Assert and quietly continue.
					hsAssert(false, "Shader asking for higher stage transform than we have");
					continue;
				}
				const hsMatrix44& xfm = fCurrMaterial->GetLayer(fCurrLayerIdx + stage)->GetTransform();

				shader->SetMatrix24(pc.fReg, xfm);
			}
			break;
		case plPipeConst::kTex1x4_0:
		case plPipeConst::kTex1x4_1:
		case plPipeConst::kTex1x4_2:
		case plPipeConst::kTex1x4_3:
		case plPipeConst::kTex1x4_4:
		case plPipeConst::kTex1x4_5:
		case plPipeConst::kTex1x4_6:
		case plPipeConst::kTex1x4_7:
			{
				int stage = pc.fType - plPipeConst::kTex1x4_0;

				if( stage > fCurrNumLayers )
				{
					// Ooops. This is bad, means the shader is expecting more layers than
					// we actually have (or is just bogus). Assert and quietly continue.
					hsAssert(false, "Shader asking for higher stage transform than we have");
					continue;
				}
				const hsMatrix44& xfm = fCurrMaterial->GetLayer(fCurrLayerIdx + stage)->GetTransform();

				shader->SetFloat4(pc.fReg, xfm.fMap[0]);
			}
			break;
		case plPipeConst::kLocalToNDC:
			{
				hsMatrix44 cam2ndc = IGetCameraToNDC();
				hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera();

				hsMatrix44 local2ndc = cam2ndc * world2cam * GetLocalToWorld();

				shader->SetMatrix44(pc.fReg, local2ndc);
			}
			break;

		case plPipeConst::kCameraToNDC:
			{
				hsMatrix44 cam2ndc = IGetCameraToNDC();

				shader->SetMatrix44(pc.fReg, cam2ndc);
			}
			break;

		case plPipeConst::kWorldToNDC:
			{
				hsMatrix44 cam2ndc = IGetCameraToNDC();
				hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera();

				hsMatrix44 world2ndc = cam2ndc * world2cam;

				shader->SetMatrix44(pc.fReg, world2ndc);
			}
			break;

		case plPipeConst::kLocalToWorld:
			shader->SetMatrix34(pc.fReg, GetLocalToWorld());
			break;

		case plPipeConst::kWorldToLocal:
			shader->SetMatrix34(pc.fReg, GetWorldToLocal());
			break;

		case plPipeConst::kWorldToCamera:
			{
				hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera();

				shader->SetMatrix34(pc.fReg, world2cam);
			}
			break;

		case plPipeConst::kCameraToWorld:
			{
				hsMatrix44 cam2world = GetViewTransform().GetCameraToWorld();

				shader->SetMatrix34(pc.fReg, cam2world);
			}
			break;

		case plPipeConst::kLocalToCamera:
			{
				hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera();

				hsMatrix44 local2cam = world2cam * GetLocalToWorld();

				shader->SetMatrix34(pc.fReg, local2cam);
			}
			break;

		case plPipeConst::kCameraToLocal:
			{
				hsMatrix44 cam2world = GetViewTransform().GetCameraToWorld();

				hsMatrix44 cam2local = GetWorldToLocal() * cam2world;

				shader->SetMatrix34(pc.fReg, cam2local);
			}
			break;

		case plPipeConst::kCamPosWorld:
			{
				shader->SetVectorW(pc.fReg, GetViewTransform().GetCameraToWorld().GetTranslate(), 1.f);
			}
			break;

		case plPipeConst::kCamPosLocal:
			{
				hsPoint3 localCam = GetWorldToLocal() * GetViewTransform().GetCameraToWorld().GetTranslate();

				shader->SetVectorW(pc.fReg, localCam, 1.f);
			}
			break;

		case plPipeConst::kObjPosWorld:
			{
				shader->SetVectorW(pc.fReg, GetLocalToWorld().GetTranslate(), 1.f);
			}
			break;

		// UNIMPLEMENTED
		case plPipeConst::kDirLight1:
		case plPipeConst::kDirLight2:
		case plPipeConst::kDirLight3:
		case plPipeConst::kDirLight4:
		case plPipeConst::kPointLight1:
		case plPipeConst::kPointLight2:
		case plPipeConst::kPointLight3:
		case plPipeConst::kPointLight4:
			break;
		}
	}
}

// ISetShaders /////////////////////////////////////////////////////////////////////////////////////
// Setup to render using the input vertex and pixel shader. Either or both may
// be nil, in which case the fixed function pipeline is indicated.
// Any Pipe Constants the non-FFP shader wants will be set here.
// Lastly, all constants will be set (as a block) for any non-FFP vertex or pixel shader.
HRESULT plDXPipeline::ISetShaders(plShader* vShader, plShader* pShader)
{
	IDirect3DVertexShader9 *vsHandle = NULL;
	if( vShader )
	{
		hsAssert(vShader->IsVertexShader(), "Wrong type shader as vertex shader");
		ISetPipeConsts(vShader);

		plDXVertexShader* vRef = (plDXVertexShader*)vShader->GetDeviceRef();
		if( !vRef )
		{
			vRef = TRACKED_NEW plDXVertexShader(vShader);
			hsRefCnt_SafeUnRef(vRef);
		}
		if( !vRef->IsLinked() )
			vRef->Link(&fVShaderRefList);
		vsHandle = vRef->GetShader(this);

		// This is truly obnoxious, but D3D insists that, while using the progammable pipeline,
		// all stages be set up like this, not just the ones we're going to use. We have to
		// do this if we have either a vertex or a pixel shader. See below. Whatever. mf
		int i;
		for( i = 0; i < 8; i++ )
		{
			fD3DDevice->SetTextureStageState(i, D3DTSS_TEXCOORDINDEX, fLayerUVWSrcs[i] = i);
			fD3DDevice->SetTextureStageState(i, D3DTSS_TEXTURETRANSFORMFLAGS, fLayerXformFlags[i] = 0);
		}
	}

	IDirect3DPixelShader9 *psHandle = NULL;
	if( pShader )
	{
		hsAssert(pShader->IsPixelShader(), "Wrong type shader as pixel shader");

		ISetPipeConsts(pShader);

		plDXPixelShader* pRef = (plDXPixelShader*)pShader->GetDeviceRef();
		if( !pRef )
		{
			pRef = TRACKED_NEW plDXPixelShader(pShader);
			hsRefCnt_SafeUnRef(pRef);
		}
		if( !pRef->IsLinked() )
			pRef->Link(&fPShaderRefList);
		psHandle = pRef->GetShader(this);

		if( !vShader )
		{
			int i;
			for( i = 0; i < 8; i++ )
			{
				fD3DDevice->SetTextureStageState(i, D3DTSS_TEXCOORDINDEX, fLayerUVWSrcs[i] = i);
				fD3DDevice->SetTextureStageState(i, D3DTSS_TEXTURETRANSFORMFLAGS, fLayerXformFlags[i] = 0);
			}
		}
	}

	if( vsHandle != fSettings.fCurrVertexShader )
	{
		HRESULT hr = fD3DDevice->SetVertexShader(fSettings.fCurrVertexShader = vsHandle);
		hsAssert(!FAILED(hr), "Error setting vertex shader");
	}

	if( psHandle != fSettings.fCurrPixelShader )
	{
		HRESULT hr = fD3DDevice->SetPixelShader(fSettings.fCurrPixelShader = psHandle);
		hsAssert(!FAILED(hr), "Error setting pixel shader");
	}

	// Handle cull mode here, because current cullmode is dependent on
	// the handedness of the LocalToCamera AND whether we are twosided.
	ISetCullMode();

	return S_OK;
}

// IRenderAuxSpan //////////////////////////////////////////////////////////
// Aux spans (auxilliary) are geometry rendered immediately after, and therefore dependent, on
// other normal geometry. They don't have SceneObjects, Drawables, DrawInterfaces or
// any of that, and therefore don't correspond to any object in the scene.
// They are dynamic procedural decals. See plDynaDecal.cpp and plDynaDecalMgr.cpp.
// This is wrapped by IRenderAuxSpans, which makes sure state is restored to resume
// normal rendering after the AuxSpan is rendered.
void plDXPipeline::IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux)
{
	// Make sure the underlying resources are created and filled in with current data.
	CheckVertexBufferRef(aux->fGroup, aux->fVBufferIdx);
	CheckIndexBufferRef(aux->fGroup, aux->fIBufferIdx);
	ICheckAuxBuffers(aux);

	// Set to render from the aux spans buffers.
	plDXVertexBufferRef* vRef = (plDXVertexBufferRef*)aux->fGroup->GetVertexBufferRef(aux->fVBufferIdx);

	if( !vRef )
		return;

	plDXIndexBufferRef* iRef = (plDXIndexBufferRef*)aux->fGroup->GetIndexBufferRef(aux->fIBufferIdx);

	if( !iRef )
		return;

	HRESULT		r;

	r = fD3DDevice->SetStreamSource( 0, vRef->fD3DBuffer, 0, vRef->fVertexSize );
	hsAssert( r == D3D_OK, "Error trying to set the stream source!" );
	plProfile_Inc(VertexChange);

	fD3DDevice->SetFVF(fSettings.fCurrFVFFormat = IGetBufferD3DFormat(vRef->fFormat));

	r = fD3DDevice->SetIndices( iRef->fD3DBuffer );
	hsAssert( r == D3D_OK, "Error trying to set the indices!" );

	plRenderTriListFunc render(fD3DDevice, iRef->fOffset, aux->fVStartIdx, aux->fVLength, aux->fIStartIdx, aux->fILength/3);

	// Now just loop through the aux material, rendering in as many passes as it takes.
	hsGMaterial* material = aux->fMaterial;
	int j;
	for( j = 0; j < material->GetNumLayers(); )
	{
		int iCurrMat = j;
		j = IHandleMaterial( material, iCurrMat, &span );
		if (j == -1)
			break;

		ISetShaders(material->GetLayer(iCurrMat)->GetVertexShader(), material->GetLayer(iCurrMat)->GetPixelShader());

		if( aux->fFlags & plAuxSpan::kOverrideLiteModel )
		{
			static D3DMATERIAL9	mat;
			fD3DDevice->SetRenderState(D3DRS_AMBIENT, 0xffffffff);

			fD3DDevice->SetRenderState( D3DRS_DIFFUSEMATERIALSOURCE, D3DMCS_MATERIAL );
			fD3DDevice->SetRenderState( D3DRS_AMBIENTMATERIALSOURCE, D3DMCS_COLOR1 );
			fD3DDevice->SetRenderState( D3DRS_EMISSIVEMATERIALSOURCE, D3DMCS_MATERIAL );
			fD3DDevice->SetRenderState( D3DRS_SPECULARMATERIALSOURCE, D3DMCS_MATERIAL );

			fD3DDevice->SetMaterial( &mat );
		}

		render.RenderPrims();
	}

}

// IRenderAuxSpans ////////////////////////////////////////////////////////////////////////////
// Save and restore render state around calls to IRenderAuxSpan. This lets
// a list of aux spans get rendered with only one save/restore state.
void plDXPipeline::IRenderAuxSpans(const plSpan& span)
{
	if (IsDebugFlagSet(plPipeDbg::kFlagNoAuxSpans))
		return;

	plDXVertexBufferRef* oldVRef = fSettings.fCurrVertexBuffRef;
	plDXIndexBufferRef* oldIRef = fSettings.fCurrIndexBuffRef;

	ISetLocalToWorld(hsMatrix44::IdentityMatrix(), hsMatrix44::IdentityMatrix());

	int i;
	for( i = 0; i < span.GetNumAuxSpans(); i++ )
		IRenderAuxSpan(span, span.GetAuxSpan(i));

	ISetLocalToWorld(span.fLocalToWorld, span.fWorldToLocal);

	HRESULT		r;

	r = fD3DDevice->SetStreamSource( 0, oldVRef->fD3DBuffer, 0, oldVRef->fVertexSize );
	hsAssert( r == D3D_OK, "Error trying to set the stream source!" );
	plProfile_Inc(VertexChange);

	r = fD3DDevice->SetFVF(fSettings.fCurrFVFFormat = IGetBufferD3DFormat(oldVRef->fFormat));

	r = fD3DDevice->SetIndices( oldIRef->fD3DBuffer );
	hsAssert( r == D3D_OK, "Error trying to set the indices!" );

}

// ICheckVBUsage //////////////////////////////////////////////////////////////
// Keep track of how much managed vertex buffer memory is being used and
// has been used since the last evict.
inline void plDXPipeline::ICheckVBUsage(plDXVertexBufferRef* vRef)
{
	if( !vRef->fOwner->AreVertsVolatile() )
	{
		if( vRef->fUseTime <= fEvictTime )
			fManagedSeen += vRef->fVertexSize * vRef->fCount;

		if( vRef->fUseTime != fTextUseTime )
		{
			plProfile_NewMem(CurrVB, vRef->fVertexSize * vRef->fCount);
			fVtxUsed += vRef->fVertexSize * vRef->fCount;
			vRef->fUseTime = fTextUseTime;
		}
	}
}

//// IRenderBufferSpan ////////////////////////////////////////////////////////
// Sets up the vertex and index buffers for a span, and then
// renders it in as many passes as it takes in ILoopOverLayers.
void	plDXPipeline::IRenderBufferSpan( const plIcicle& span,
										 hsGDeviceRef *vb, hsGDeviceRef *ib,
										 hsGMaterial *material, UInt32 vStart, UInt32 vLength,
										 UInt32 iStart, UInt32 iLength )
{
	plProfile_BeginTiming(RenderBuff);

	plDXVertexBufferRef	*vRef = (plDXVertexBufferRef *)vb;
	plDXIndexBufferRef		*iRef = (plDXIndexBufferRef *)ib;

	HRESULT		r;

	if( vRef->fD3DBuffer == nil || iRef->fD3DBuffer == nil )
	{
		plProfile_EndTiming(RenderBuff);
		hsAssert( false, "Trying to render a nil buffer pair!" );
		return;
	}

	/// Switch to the vertex buffer we want
	if( fSettings.fCurrVertexBuffRef != vRef )
	{
		hsRefCnt_SafeAssign( fSettings.fCurrVertexBuffRef, vRef );
		hsAssert( vRef->fD3DBuffer != nil, "Trying to render a buffer pair without a vertex buffer!" );
		vRef->SetRebuiltSinceUsed(true);
	}

	if( vRef->RebuiltSinceUsed() )
	{
		r = fD3DDevice->SetStreamSource( 0, vRef->fD3DBuffer, 0, vRef->fVertexSize );
		hsAssert( r == D3D_OK, "Error trying to set the stream source!" );
		plProfile_Inc(VertexChange);

		DWORD fvf = IGetBufferD3DFormat(vRef->fFormat);
		if (fSettings.fCurrFVFFormat != fvf)
			fD3DDevice->SetFVF(fSettings.fCurrFVFFormat = fvf);

		vRef->SetRebuiltSinceUsed(false);

		ICheckVBUsage(vRef);
	}

	// Note: both these stats are the same, since we don't do any culling or clipping on the tris
	if( fSettings.fCurrIndexBuffRef != iRef )
	{
		hsRefCnt_SafeAssign( fSettings.fCurrIndexBuffRef, iRef );
		hsAssert( iRef->fD3DBuffer != nil, "Trying to render with a nil index buffer" );
		iRef->SetRebuiltSinceUsed(true);
	}

	if( iRef->RebuiltSinceUsed() )
	{
		r = fD3DDevice->SetIndices( iRef->fD3DBuffer );
		hsAssert( r == D3D_OK, "Error trying to set the indices!" );
		plProfile_Inc(IndexChange);
		iRef->SetRebuiltSinceUsed(false);
	}

	plRenderTriListFunc render(fD3DDevice, iRef->fOffset, vStart, vLength, iStart, iLength/3);

	plProfile_EndTiming(RenderBuff);
	ILoopOverLayers(render, material, span);
}

// ILoopOverLayers /////////////////////////////////////////////////////////////////////////////////
// Render the input span with the input material in as many passes as it takes.
// Also handles rendering projected lights, either onto each pass or
// once onto the FB after all the passes, as appropriate.
hsBool plDXPipeline::ILoopOverLayers(const plRenderPrimFunc& inRender, hsGMaterial* material, const plSpan& span)
{
	plProfile_BeginTiming(RenderPrim);

	const plRenderPrimFunc& render = IsDebugFlagSet(plPipeDbg::kFlagNoRender) ? (const plRenderPrimFunc&)sRenderNil : inRender;

	if( GetOverrideMaterial() )
		material = GetOverrideMaterial();

	IPushPiggyBacks(material);

	hsBool normalLightsDisabled = false;

	// Loop across all the layers we need to draw
	int j;
	for( j = 0; j < material->GetNumLayers(); )
	{
		int iCurrMat = j;
		j = IHandleMaterial( material, iCurrMat, &span );
		if (j == -1)
			break;

		if( (fLayerState[0].fBlendFlags & hsGMatState::kBlendAlpha)
				&&(material->GetLayer(iCurrMat)->GetOpacity() <= 0)
				&&(fCurrLightingMethod != plSpan::kLiteVtxPreshaded) ) // This opt isn't good for particles, since their
																		// material opacity is undefined/unused... -mcn
			continue;

		plProfile_BeginTiming(SpanFog);
		ISetFogParameters(&span, material->GetLayer(iCurrMat));
		plProfile_EndTiming(SpanFog);

		ISetShaders(material->GetLayer(iCurrMat)->GetVertexShader(), material->GetLayer(iCurrMat)->GetPixelShader());

		if( normalLightsDisabled )
			IRestoreSpanLights();

#ifdef HS_DEBUGGING
		DWORD nPass;
		fSettings.fDXError = fD3DDevice->ValidateDevice(&nPass);
		if( fSettings.fDXError != D3D_OK )
			IGetD3DError();
#endif // HS_DEBUGGING

		// Do the regular draw.
		render.RenderPrims();

		// Take care of projections that get applied to each pass.
		if( fLights.fProjEach.GetCount() && !(fView.fRenderState & kRenderNoProjection) )
		{
			// Disable all the normal lights.
			IDisableSpanLights();
			normalLightsDisabled = true;

			IRenderProjectionEach(render, material, iCurrMat, span);

		}
		if (IsDebugFlagSet(plPipeDbg::kFlagNoUpperLayers))
			j = material->GetNumLayers();
	}
	IPopPiggyBacks();

	// If we disabled lighting, re-enable it.
	if( normalLightsDisabled )
		IRestoreSpanLights();

	// Render any aux spans associated.
	if( span.GetNumAuxSpans() )
		IRenderAuxSpans(span);

	// Only render projections and shadows if we successfully rendered the span.
	// j == -1 means we aborted render.
	if( j >= 0 )
	{
		// Projections that get applied to the frame buffer (after all passes).
		if( fLights.fProjAll.GetCount() && !(fView.fRenderState & kRenderNoProjection) )
			IRenderProjections(render);

		// Handle render of shadows onto geometry.
		if( fShadows.GetCount() )
			IRenderShadowsOntoSpan(render, &span, material);
	}

	// Debug only
	if (IsDebugFlagSet(plPipeDbg::kFlagOverlayWire))
	{
		IRenderOverWire(render, material, span);
	}
	plProfile_EndTiming(RenderPrim);

	return false;
}

// IRenderOverWire ///////////////////////////////////////////////////////////////////////////////
// Debug only, renders wireframe on top of normal render.
void plDXPipeline::IRenderOverWire(const plRenderPrimFunc& render, hsGMaterial* material, const plSpan& span)
{
	UInt32 state = fView.fRenderState;
	fView.fRenderState |= plPipeline::kRenderBaseLayerOnly;
	static plLayerDepth depth;
	depth.SetMiscFlags(depth.GetMiscFlags() | hsGMatState::kMiscWireFrame | hsGMatState::kMiscTwoSided);
	depth.SetZFlags((depth.GetZFlags() & ~hsGMatState::kZNoZRead) | hsGMatState::kZIncLayer);

	AppendLayerInterface(&depth, false);

	if( IHandleMaterial( material, 0, &span ) >= 0 )
	{
		ISetShaders(nil, nil);
		render.RenderPrims();
	}

	RemoveLayerInterface(&depth, false)	;

	fView.fRenderState = state;
}

// IRenderProjectionEach ///////////////////////////////////////////////////////////////////////////////////////
// Render any lights that are to be projected onto each pass of the object.
void plDXPipeline::IRenderProjectionEach(const plRenderPrimFunc& render, hsGMaterial* material, int iPass, const plSpan& span)
{
	// If this is a bump map pass, forget it, we've already "done" per-pixel lighting.
	if( fLayerState[iPass].fMiscFlags & (hsGMatState::kMiscBumpLayer | hsGMatState::kMiscBumpChans) )
		return;

	// Push the LayerShadowBase override. This sets the blend
	// to framebuffer as Add/ZNoWrite and AmbientColor = 0.
	static plLayerLightBase layLightBase;

	int iNextPass = iPass + fCurrNumLayers;

	if( fSettings.fLimitedProj && (material->GetLayer(iPass)->GetUVWSrc() & ~plLayerInterface::kUVWIdxMask) )
		return;

	// For each projector:
	int k;
	for( k = 0; k < fLights.fProjEach.GetCount(); k++ )
	{
		// Push it's projected texture as a piggyback.
		plLightInfo* li = fLights.fProjEach[k];

		// Lower end boards are iffy on when they'll project correctly.
		if( fSettings.fCantProj && !li->GetProperty(plLightInfo::kLPForceProj) )
			continue;

		plLayerInterface* proj = li->GetProjection();
		hsAssert(proj, "A projector with no texture to project?");
		IPushProjPiggyBack(proj);

		// Enable the projecting light only.
		plDXLightRef* ref = (plDXLightRef *)li->GetDeviceRef();
		fD3DDevice->LightEnable( ref->fD3DIndex, true );

		AppendLayerInterface(&layLightBase, false);

		// Render until it's done.
		int iRePass = iPass;
		while( iRePass < iNextPass )
		{
			iRePass = IHandleMaterial( material, iRePass, &span );
			ISetShaders(nil, nil);

			// Do the render with projection.
			render.RenderPrims();
		}

		RemoveLayerInterface(&layLightBase, false);

		// Disable the projecting light
		fD3DDevice->LightEnable(ref->fD3DIndex, false);

		// Pop it's projected texture off piggyback
		IPopProjPiggyBacks();

	}

}

// IRenderProjections ///////////////////////////////////////////////////////////
// Render any projected lights that want to be rendered a single time after
// all passes on the object are complete.
void plDXPipeline::IRenderProjections(const plRenderPrimFunc& render)
{
	IDisableSpanLights();
	int i;
	for( i = 0; i < fLights.fProjAll.GetCount(); i++ )
	{
		plLightInfo* li = fLights.fProjAll[i];

		if( fSettings.fCantProj && !li->GetProperty(plLightInfo::kLPForceProj) )
			continue;

		IRenderProjection(render, li);
	}
	IRestoreSpanLights();
}

// IRenderProjection //////////////////////////////////////////////////////////////
// Render this light's projection onto the frame buffer.
void plDXPipeline::IRenderProjection(const plRenderPrimFunc& render, plLightInfo* li)
{
	plDXLightRef* ref = (plDXLightRef *)li->GetDeviceRef();
	fD3DDevice->LightEnable(ref->fD3DIndex, true);

	plLayerInterface* proj = li->GetProjection();

	static D3DMATERIAL9	mat;
	mat.Diffuse.r = mat.Diffuse.g = mat.Diffuse.b = mat.Diffuse.a = 1.f;

	fD3DDevice->SetMaterial( &mat );
	fD3DDevice->SetRenderState( D3DRS_DIFFUSEMATERIALSOURCE, D3DMCS_MATERIAL );
	fD3DDevice->SetRenderState( D3DRS_SPECULARMATERIALSOURCE, D3DMCS_MATERIAL );
	fD3DDevice->SetRenderState( D3DRS_EMISSIVEMATERIALSOURCE, D3DMCS_MATERIAL );

	fD3DDevice->SetRenderState( D3DRS_AMBIENTMATERIALSOURCE, D3DMCS_MATERIAL );

	fD3DDevice->SetRenderState( D3DRS_AMBIENT, 0xffffffff ); //@@@

	// Set the FB blend mode, texture, all that.
	ICompositeLayerState(0, proj);
	// We should have put ZNoZWrite on during export, but we didn't.
	fLayerState[0].fZFlags = hsGMatState::kZNoZWrite;
	fCurrNumLayers = 1;
	IHandleFirstTextureStage(proj);

	if( proj->GetBlendFlags() & hsGMatState::kBlendInvertFinalColor )
	{
		fD3DDevice->SetTextureStageState( 0, D3DTSS_COLORARG2, D3DTA_DIFFUSE | D3DTA_COMPLEMENT);
	}

	// Seal it up
	fLastEndingStage = 1;
	fD3DDevice->SetTextureStageState(1, D3DTSS_COLOROP, D3DTOP_DISABLE);
	fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAOP, D3DTOP_DISABLE);
	fLayerState[1].fBlendFlags = UInt32(-1);

#ifdef HS_DEBUGGING
	DWORD nPass;
	fSettings.fDXError = fD3DDevice->ValidateDevice(&nPass);
	if( fSettings.fDXError != D3D_OK )
		IGetD3DError();
#endif // HS_DEBUGGING

	// Okay, render it already.

	render.RenderPrims();

	fD3DDevice->LightEnable(ref->fD3DIndex, false);
}

//// IGetBufferD3DFormat //////////////////////////////////////////////////////
// Convert the dumbest vertex format on the planet (ours) into an FVF code.
// Note the assumption of position, normal, diffuse, and specular.
// We no longer use FVF codes, just shader handles.
long	plDXPipeline::IGetBufferD3DFormat( UInt8 format ) const
{
	long	fmt, i;


	switch( format & plGBufferGroup::kSkinWeightMask )
	{
		case plGBufferGroup::kSkinNoWeights:
			fmt = D3DFVF_XYZ | D3DFVF_DIFFUSE | D3DFVF_SPECULAR | D3DFVF_NORMAL;
			break;
		case plGBufferGroup::kSkin1Weight:
			fmt = D3DFVF_XYZB1 | D3DFVF_DIFFUSE | D3DFVF_SPECULAR | D3DFVF_NORMAL;
			break;
		case plGBufferGroup::kSkin2Weights:
			fmt = D3DFVF_XYZB2 | D3DFVF_DIFFUSE | D3DFVF_SPECULAR | D3DFVF_NORMAL;
			break;
		case plGBufferGroup::kSkin3Weights:
			fmt = D3DFVF_XYZB3 | D3DFVF_DIFFUSE | D3DFVF_SPECULAR | D3DFVF_NORMAL;
			break;
		default:
			hsAssert( false, "Bad skin weight value in IGetBufferD3DFormat()" );
	}
	if( format & plGBufferGroup::kSkinIndices )
	{
		hsAssert(false, "Indexed skinning not supported");
		fmt |= D3DFVF_LASTBETA_UBYTE4;
	}

	switch( plGBufferGroup::CalcNumUVs( format ) )
	{
		case 0: fmt |= D3DFVF_TEX0; break;
		case 1: fmt |= D3DFVF_TEX1; break;
		case 2: fmt |= D3DFVF_TEX2; break;
		case 3: fmt |= D3DFVF_TEX3; break;
		case 4: fmt |= D3DFVF_TEX4; break;
		case 5: fmt |= D3DFVF_TEX5; break;
		case 6: fmt |= D3DFVF_TEX6; break;
		case 7: fmt |= D3DFVF_TEX7; break;
		case 8: fmt |= D3DFVF_TEX8; break;
	}

	for( i = 0; i < plGBufferGroup::CalcNumUVs( format ); i++ )
		fmt |= D3DFVF_TEXCOORDSIZE3( i );

	return fmt;
}

//// IGetBufferFormatSize /////////////////////////////////////////////////////
// Calculate the vertex stride from the given format.
UInt32	plDXPipeline::IGetBufferFormatSize( UInt8 format ) const
{
	UInt32	size = sizeof( float ) * 6 + sizeof( UInt32 ) * 2; // Position and normal, and two packed colors


	switch( format & plGBufferGroup::kSkinWeightMask )
	{
		case plGBufferGroup::kSkinNoWeights:
			break;
		case plGBufferGroup::kSkin1Weight:
			size += sizeof(float);
			break;
		default:
			hsAssert( false, "Invalid skin weight value in IGetBufferFormatSize()" );
	}

	size += sizeof( float ) * 3 * plGBufferGroup::CalcNumUVs( format );

	return size;
}

///////////////////////////////////////////////////////////////////////////////
//// Plate and PlateManager Functions /////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

// None of this plate code is mine, so your guess is as good as mine.
// I'll throw in comments where I happen to know what it's doing, but a lot
// of this is just ugly.
// The plates are mostly used for debugging/performance tools, but they do
// unfortunately get used for some production things like the cursor.
// By the way, a Plate is just a screen aligned textured quad that is rendered
// on top of the normal scene. mf

// ICreateGeometry /////////////////////////////////////////////////////////
// Make a quad suitable for rendering as a tristrip.
void plDXPlateManager::ICreateGeometry(plDXPipeline* pipe)
{
	UInt32 fvfFormat = PLD3D_PLATEFVF;
	D3DPOOL poolType = D3DPOOL_DEFAULT;
	hsAssert(!pipe->ManagedAlloced(), "Alloc default with managed alloc'd");
	if( FAILED( fD3DDevice->CreateVertexBuffer( 4 * sizeof( plPlateVertex ),
												D3DUSAGE_WRITEONLY,
												fvfFormat,
												poolType, &fVertBuffer, NULL ) ) )
	{
		hsAssert( false, "CreateVertexBuffer() call failed!" );
		fCreatedSucessfully = false;
		return;
	}
	PROFILE_POOL_MEM(poolType, 4 * sizeof(plPlateVertex), true, "PlateMgrVtxBuff");

	/// Lock the buffer
	plPlateVertex *ptr;
	if( FAILED( fVertBuffer->Lock( 0, 0, (void **)&ptr, D3DLOCK_NOSYSLOCK ) ) )
	{
		hsAssert( false, "Failed to lock vertex buffer for writing" );
		fCreatedSucessfully = false;
		return;
	}

	/// Set 'em up
	ptr[ 0 ].fPoint.Set( -0.5f, -0.5f, 0.0f );
	ptr[ 0 ].fColor = 0xffffffff;
	ptr[ 0 ].fUV.Set( 0.0f, 0.0f, 0.0f );

	ptr[ 1 ].fPoint.Set( -0.5f, 0.5f, 0.0f );
	ptr[ 1 ].fColor = 0xffffffff;
	ptr[ 1 ].fUV.Set( 0.0f, 1.0f, 0.0f );

	ptr[ 2 ].fPoint.Set( 0.5f, -0.5f, 0.0f );
	ptr[ 2 ].fColor = 0xffffffff;
	ptr[ 2 ].fUV.Set( 1.0f, 0.0f, 0.0f );

	ptr[ 3 ].fPoint.Set( 0.5f, 0.5f, 0.0f );
	ptr[ 3 ].fColor = 0xffffffff;
	ptr[ 3 ].fUV.Set( 1.0f, 1.0f, 0.0f );

	/// Unlock and we're done!
	fVertBuffer->Unlock();
	fCreatedSucessfully = true;

}

// IReleaseGeometry ////////////////////////////////////////////////////////////
// Let go of any D3D resources created for this.
void plDXPlateManager::IReleaseGeometry()
{
	if (fVertBuffer)
	{
		ReleaseObject(fVertBuffer);
		PROFILE_POOL_MEM(D3DPOOL_DEFAULT, 4 * sizeof(plPlateVertex), false, "PlateMgrVtxBuff");
		fVertBuffer = nil;
	}
}

//// Constructor & Destructor /////////////////////////////////////////////////

plDXPlateManager::plDXPlateManager( plDXPipeline *pipe, IDirect3DDevice9 *device ) : plPlateManager( pipe ),
			PLD3D_PLATEFVF( D3DFVF_XYZ | D3DFVF_DIFFUSE | D3DFVF_TEX1 | D3DFVF_TEXCOORDSIZE3(0) ),
			fD3DDevice(device),
			fVertBuffer(nil)
{
}

plDXPlateManager::~plDXPlateManager()
{
	IReleaseGeometry();
}

//// IDrawPlate ///////////////////////////////////////////////////////////////
// Render all currently enabled plates to the screen.
void	plDXPlateManager::IDrawToDevice( plPipeline *pipe )
{
	plDXPipeline	*dxPipe = (plDXPipeline *)pipe;
	plPlate			*plate;
	UInt32			scrnWidthDiv2 = fOwner->Width() >> 1;
	UInt32			scrnHeightDiv2 = fOwner->Height() >> 1;
	D3DXMATRIX		mat;
	D3DCULL			oldCullMode;

	if( !fVertBuffer )
		return;

	// Make sure skinning is disabled.
	fD3DDevice->SetRenderState(D3DRS_VERTEXBLEND, D3DVBF_DISABLE);
	fD3DDevice->SetVertexShader( dxPipe->fSettings.fCurrVertexShader = NULL);
	fD3DDevice->SetFVF(dxPipe->fSettings.fCurrFVFFormat = PLD3D_PLATEFVF);
	fD3DDevice->SetStreamSource( 0, fVertBuffer, 0, sizeof( plPlateVertex ) );
	plProfile_Inc(VertexChange);
	// To get plates properly pixel-aligned, we need to compensate for D3D9's weird half-pixel
	// offset (see http://drilian.com/2008/11/25/understanding-half-pixel-and-half-texel-offsets/
	// or http://msdn.microsoft.com/en-us/library/bb219690(VS.85).aspx).
	D3DXMatrixTranslation(&mat, -0.5f/scrnWidthDiv2, -0.5f/scrnHeightDiv2, 0.0f);
	fD3DDevice->SetTransform( D3DTS_VIEW, &mat );
	oldCullMode = dxPipe->fCurrCullMode;

	for( plate = fPlates; plate != nil; plate = plate->GetNext() )
	{
		if( plate->IsVisible() )
		{
			dxPipe->IDrawPlate( plate );

			const char *title = plate->GetTitle();
			if( plDebugText::Instance().IsEnabled() && title[ 0 ] != 0 )
			{
				hsPoint3 pt;
				pt.Set( 0, -0.5, 0 );
				pt = plate->GetTransform() * pt;
				pt.fX = pt.fX * scrnWidthDiv2 + scrnWidthDiv2;
				pt.fY = pt.fY * scrnHeightDiv2 + scrnHeightDiv2;
				pt.fX -= plDebugText::Instance().CalcStringWidth( title ) >> 1;
				plDebugText::Instance().DrawString( (UInt16)pt.fX, (UInt16)pt.fY + 1, title, 255, 255, 255, 255, plDebugText::kStyleBold );
			}

			if( plate->GetFlags() & plPlate::kFlagIsAGraph )
			{
				plGraphPlate	*graph = (plGraphPlate *)plate;
				hsPoint3		pt, pt2;
				int				i;

				if( graph->GetLabelText( 0 )[ 0 ] != 0 )
				{
					/// Draw key
					const char *str;

					pt.Set( -0.5, -0.5, 0 );
					pt = plate->GetTransform() * pt;
					pt.fX = pt.fX * scrnWidthDiv2 + scrnWidthDiv2;
					pt.fY = pt.fY * scrnHeightDiv2 + scrnHeightDiv2;
					pt.fY += plDebugText::Instance().GetFontHeight();

					UInt32 numLabels = graph->GetNumLabels();
					if (numLabels > graph->GetNumColors())
						numLabels = graph->GetNumColors();

					for( i = 0; i < numLabels; i++ )
					{
						str = graph->GetLabelText( i );
						if( str[ 0 ] == 0 )
							break;

						pt2 = pt;
						pt2.fX -= plDebugText::Instance().CalcStringWidth( str );
						plDebugText::Instance().DrawString( (UInt16)pt2.fX, (UInt16)pt2.fY, str,
															graph->GetDataColor( i ), plDebugText::kStyleBold );
						pt.fY += plDebugText::Instance().GetFontHeight();
					}
				}
			}
		}
	}

	dxPipe->fCurrCullMode = ( dxPipe->fLayerState[0].fMiscFlags & hsGMatState::kMiscTwoSided ) ? D3DCULL_NONE : oldCullMode;
	fD3DDevice->SetRenderState( D3DRS_CULLMODE,	dxPipe->fCurrCullMode );
}

// IDrawPlate ///////////////////////////////////////////////////////////////////////
// Render this plate, in as many passes as it takes.
void	plDXPipeline::IDrawPlate( plPlate *plate )
{
	int			i;
	hsGMaterial	*material = plate->GetMaterial();
	D3DXMATRIX	mat;


	/// Set up the D3D transform directly
	IMatrix44ToD3DMatrix( mat, plate->GetTransform() );
	fD3DDevice->SetTransform( D3DTS_WORLD, &mat );
	mat = d3dIdentityMatrix;
	mat(1,1) = -1.0f;
	mat(2,2) = 2.0f;
	mat(2,3) = 1.0f;
	mat(3,2) = -2.0f;
	mat(3,3) = 0.0f;

	IPushPiggyBacks(material);

	/// Draw the vertex buffer once for each material pass
	for( i = 0; i < material->GetNumLayers(); )
	{
		// Stat gather adjust: since IHandleMaterial will count this in the stat gather,
		// artificially decrement here so that the plates don't skew the stat gathering
		// Taking this out. If the plates are causing more material changes, they should
		// show up in the stats. mf


		i = IHandleMaterial( material, i, nil );
		ISetShaders(nil, nil);

		// To override the transform done by the z-bias
		fD3DDevice->SetTransform( D3DTS_PROJECTION, &mat );
		// And this to override cullmode set based on material 2-sidedness.
		fD3DDevice->SetRenderState( D3DRS_CULLMODE,	fCurrCullMode = D3DCULL_CW );

		WEAK_ERROR_CHECK( fD3DDevice->DrawPrimitive( D3DPT_TRIANGLESTRIP, 0, 2 ) );
	}

	IPopPiggyBacks();
}


///////////////////////////////////////////////////////////////////////////////
//// Error Message Stuff //////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

// IAddErrorMessage ////////////////////////////////////////////////////
// Append the error string to the current error string.
void	plDXPipeline::IAddErrorMessage( char *errStr )
{
	static char	str[ 512 ];
	if( errStr && strlen( errStr ) + strlen( fSettings.fErrorStr ) < sizeof( fSettings.fErrorStr ) - 4 )
	{
		strcpy( str, fSettings.fErrorStr );
		sprintf( fSettings.fErrorStr, "%s\n(%s)", errStr, str );
		plStatusLog::AddLineS("pipeline.log", fSettings.fErrorStr);
	}
}

// ISetErrorMessage //////////////////////////////////////////////////////////
// Clear the current error string to the input string.
void	plDXPipeline::ISetErrorMessage( char *errStr )
{
	if( errStr )
	{
		strcpy( fSettings.fErrorStr, errStr );
		plStatusLog::AddLineS("pipeline.log", fSettings.fErrorStr);
	}
	else
		fSettings.fErrorStr[ 0 ] = nil;
}

// IGetD3DError /////////////////////////////////////////////////////////////////
// Convert the last D3D error code to a string (probably "Conflicting Render State").
void	plDXPipeline::IGetD3DError()
{
	sprintf( fSettings.fErrorStr, "D3DError : %s", (char *)DXGetErrorString( fSettings.fDXError ) );
}

// IShowErrorMessage /////////////////////////////////////////////////////////////
// Append the string to the running error string.
void	plDXPipeline::IShowErrorMessage( char *errStr )
{
	if( errStr != nil )
		IAddErrorMessage( errStr );

//	hsAssert( false, fSettings.fErrorStr );
}

// ICreateFail ////////////////////////////////////////////////////////////////////
// Called on unrecoverable error during device creation. Frees up anything
// allocated so far, sets the error string, and returns true.
hsBool	plDXPipeline::ICreateFail( char *errStr )
{
	// Don't overwrite any error string we already had
	if( fSettings.fErrorStr[ 0 ] == 0 )
		IGetD3DError();

	if( errStr && *errStr )
	{
		IAddErrorMessage( errStr );
	}
	else if( !*fSettings.fErrorStr )
		IAddErrorMessage( "unknown" );

	IReleaseDeviceObjects();
	return true;
}

// GetErrorString ///////////////////////////////////////////////////////////////////////////
// Return the current error string.
const char	*plDXPipeline::GetErrorString()
{
	if( fSettings.fErrorStr[ 0 ] == 0 )
		return nil;

	return fSettings.fErrorStr;
}


///////////////////////////////////////////////////////////////////////////////
//// Miscellaneous Utility Functions //////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

//// GetDXBitDepth //////////////////////////////////////////////////////////
//
//	From a D3DFORMAT enumeration, return the bit depth associated with it.

short	plDXPipeline::GetDXBitDepth( D3DFORMAT format )
{
	if( format == D3DFMT_UNKNOWN )
		return 0;
	else if( format == D3DFMT_R8G8B8 )
		return 24;
	else if( format == D3DFMT_A8R8G8B8 )
		return 32;
	else if( format == D3DFMT_X8R8G8B8 )
		return 32;
	else if( format == D3DFMT_R5G6B5 )
		return 16;
	else if( format == D3DFMT_X1R5G5B5 )
		return 16;
	else if( format == D3DFMT_A1R5G5B5 )
		return 16;
	else if( format == D3DFMT_A4R4G4B4 )
		return 16;
	else if( format == D3DFMT_R3G3B2 )
		return 8;
	else if( format == D3DFMT_A8 )
		return 8;
	else if( format == D3DFMT_A8R3G3B2 )
		return 16;
	else if( format == D3DFMT_X4R4G4B4 )
		return 16;
	else if( format == D3DFMT_A8P8 )
		return 16;
	else if( format == D3DFMT_P8 )
		return 8;
	else if( format == D3DFMT_L8 )
		return 8;
	else if( format == D3DFMT_A8L8 )
		return 16;
	else if( format == D3DFMT_A4L4 )
		return 8;
	else if( format == D3DFMT_V8U8 )
		return 16;
	else if( format == D3DFMT_L6V5U5 )
		return 16;
	else if( format == D3DFMT_X8L8V8U8 )
		return 32;
	else if( format == D3DFMT_Q8W8V8U8 )
		return 32;
	else if( format == D3DFMT_V16U16 )
		return 32;
//	else if( format == D3DFMT_W11V11U10 )
//		return 32;
	/* /// These formats really don't have bit depths associated with them
	D3DFMT_UYVY
    D3DFMT_YUY2
    D3DFMT_DXT1
    D3DFMT_DXT2
    D3DFMT_DXT3
    D3DFMT_DXT4
    D3DFMT_DXT5
    D3DFMT_VERTEXDATA
	*/
	else if( format == D3DFMT_D16_LOCKABLE )
		return 16;
	else if( format == D3DFMT_D32 )
		return 32;
	else if( format == D3DFMT_D15S1 )
		return 16;
	else if( format == D3DFMT_D24S8 )
		return 32;
	else if( format == D3DFMT_D16 )
		return 16;
	else if( format == D3DFMT_D24X8 )
		return 32;
	else if( format == D3DFMT_D24X4S4 )
		return 32;
	else if( format == D3DFMT_INDEX16 )
		return 16;
	else if( format == D3DFMT_INDEX32 )
		return 32;

	// Unsupported translation format--return 0
	return 0;
}

//// IGetDXFormatName ////////////////////////////////////////////////////////
//
//	From a D3DFORMAT enumeration, return the string for it.

const char	*plDXPipeline::IGetDXFormatName( D3DFORMAT format )
{
	switch( format )
	{
		case D3DFMT_UNKNOWN: return "D3DFMT_UNKNOWN";
		case D3DFMT_R8G8B8: return "D3DFMT_R8G8B8";
		case D3DFMT_A8R8G8B8: return "D3DFMT_A8R8G8B8";
		case D3DFMT_X8R8G8B8: return "D3DFMT_X8R8G8B8";
		case D3DFMT_R5G6B5: return "D3DFMT_R5G6B5";
		case D3DFMT_X1R5G5B5: return "D3DFMT_X1R5G5B5";
		case D3DFMT_A1R5G5B5: return "D3DFMT_A1R5G5B5";
		case D3DFMT_A4R4G4B4: return "D3DFMT_A4R4G4B4";
		case D3DFMT_R3G3B2: return "D3DFMT_R3G3B2";
		case D3DFMT_A8: return "D3DFMT_A8";
		case D3DFMT_A8R3G3B2: return "D3DFMT_A8R3G3B2";
		case D3DFMT_X4R4G4B4: return "D3DFMT_X4R4G4B4";
		case D3DFMT_A8P8: return "D3DFMT_A8P8";
		case D3DFMT_P8: return "D3DFMT_P8";
		case D3DFMT_L8: return "D3DFMT_L8";
		case D3DFMT_A8L8: return "D3DFMT_A8L8";
		case D3DFMT_A4L4: return "D3DFMT_A4L4";
		case D3DFMT_V8U8: return "D3DFMT_V8U8";
		case D3DFMT_L6V5U5: return "D3DFMT_L6V5U5";
		case D3DFMT_X8L8V8U8: return "D3DFMT_X8L8V8U8";
		case D3DFMT_Q8W8V8U8: return "D3DFMT_Q8W8V8U8";
		case D3DFMT_V16U16: return "D3DFMT_V16U16";
		//case D3DFMT_W11V11U10: return "D3DFMT_W11V11U10";
		case D3DFMT_UYVY: return "D3DFMT_UYVY";
		case D3DFMT_YUY2: return "D3DFMT_YUY2";
		case D3DFMT_DXT1: return "D3DFMT_DXT1";
//		case D3DFMT_DXT2: return "D3DFMT_DXT2";
//		case D3DFMT_DXT3: return "D3DFMT_DXT3";
//		case D3DFMT_DXT4: return "D3DFMT_DXT4";
		case D3DFMT_DXT5: return "D3DFMT_DXT5";
		case D3DFMT_VERTEXDATA: return "D3DFMT_VERTEXDATA";
		case D3DFMT_D16_LOCKABLE: return "D3DFMT_D16_LOCKABLE";
		case D3DFMT_D32: return "D3DFMT_D32";
		case D3DFMT_D15S1: return "D3DFMT_D15S1";
		case D3DFMT_D24S8: return "D3DFMT_D24S8";
		case D3DFMT_D16: return "D3DFMT_D16";
		case D3DFMT_D24X8: return "D3DFMT_D24X8";
		case D3DFMT_D24X4S4: return "D3DFMT_D24X4S4";
		case D3DFMT_INDEX16: return "D3DFMT_INDEX16";
		case D3DFMT_INDEX32: return "D3DFMT_INDEX32";
		default: return "Bad format";
	}
}

//// IFPUCheck ////////////////////////////////////////////////////////////////
//	Checks the FPU to make sure it's in the right mode
// This should return wSave to allow it to be restored after rendering.
// This is obsolete as of DX8
void	plDXPipeline::IFPUCheck()
{
	WORD	wSave, wTemp;
    __asm fstcw wSave
    if (wSave & 0x300 ||            // Not single mode
        0x3f != (wSave & 0x3f) ||   // Exceptions enabled
        wSave & 0xC00)              // Not round to nearest mode
    {
		__asm
		{
			mov ax, wSave
			and ax, not 0x300    ;; single mode
			or  ax, 0x3f         ;; disable all exceptions
			and ax, not 0xC00   ;; round to nearest mode
			mov wTemp, ax
			fldcw   wTemp
		}
	}
}

// PushPiggyBackLayer /////////////////////////////////////////////////////
// Push a piggy back onto the stack.
plLayerInterface* plDXPipeline::PushPiggyBackLayer(plLayerInterface* li)
{
	fPiggyBackStack.Push(li);

	ISetNumActivePiggyBacks();

	fForceMatHandle = true;

	return li;
}

// PopPiggyBackLayer ///////////////////////////////////////////////////////////////////
// Pull the piggy back out of the stack (if it's there).
plLayerInterface* plDXPipeline::PopPiggyBackLayer(plLayerInterface* li)
{
	int idx = fPiggyBackStack.Find(li);
	if( fPiggyBackStack.kMissingIndex == idx )
		return nil;
	fPiggyBackStack.Remove(idx);

	ISetNumActivePiggyBacks();

	fForceMatHandle = true;

	return li;
}

// AppendLayerInterface ///////////////////////////////////////////////////////////////////
// Setup a layer wrapper to wrap around either all layers rendered with or just the base layers.
// Note that a single material has multiple base layers if it takes mutliple passes to render.
// Stays in effect until removed by RemoveLayerInterface.
plLayerInterface* plDXPipeline::AppendLayerInterface(plLayerInterface* li, hsBool onAllLayers)
{
	fForceMatHandle = true;
	if( onAllLayers )
		return fOverAllLayer = li->Attach(fOverAllLayer);
	else
		return fOverBaseLayer = li->Attach(fOverBaseLayer);
}

// RemoveLayerInterface //////////////////////////////////////////////////////////////////
// Removes a layer wrapper installed by AppendLayerInterface.
plLayerInterface* plDXPipeline::RemoveLayerInterface(plLayerInterface* li, hsBool onAllLayers)
{
	fForceMatHandle = true;

	if( onAllLayers )
	{
		if( !fOverAllLayer )
			return nil;
		return fOverAllLayer = fOverAllLayer->Remove(li);
	}

	if( !fOverBaseLayer )
		return nil;

	return fOverBaseLayer = fOverBaseLayer->Remove(li);
}

///////////////////////////////////////////////////////////////////////////////
//// ShadowSection
//// Shadow specific internal functions
///////////////////////////////////////////////////////////////////////////////
// See plGLight/plShadowMaster.cpp for more notes.

// IAttachShadowsToReceivers ///////////////////////////////////////////////////////////
// For each active shadow map (in fShadows), attach it to all of the visible spans in drawable
// that it affects. Shadows explicitly attached via light groups are handled separately in ISetShadowFromGroup.
void plDXPipeline::IAttachShadowsToReceivers(plDrawableSpans* drawable, const hsTArray<Int16>& visList)
{
	int i;
	for( i = 0; i < fShadows.GetCount(); i++ )
		IAttachSlaveToReceivers(i, drawable, visList);
}

// IAttachSlaveToReceivers /////////////////////////////////////////////////////
// Find all the visible spans in this drawable affected by this shadow map,
// and attach it to them.
void plDXPipeline::IAttachSlaveToReceivers(int which, plDrawableSpans* drawable, const hsTArray<Int16>& visList)
{
	plShadowSlave* slave = fShadows[which];

	// Whether the drawable is a character affects which lights/shadows affect it.
	hsBool isChar = drawable->GetNativeProperty(plDrawable::kPropCharacter);

	// If the shadow is part of a light group, it gets handled in ISetShadowFromGroup.
	// Unless the drawable is a character (something that moves around indeterminately,
	// like the avatar or a physical object), and the shadow affects all characters.
	if( slave->ObeysLightGroups() && !(slave->IncludesChars() && isChar) )
		return;

	// Do a space tree harvest looking for spans that are visible and whose bounds
	// intercect the shadow volume.
	plSpaceTree* space = drawable->GetSpaceTree();

	static hsBitVector cache;
	cache.Clear();
	space->EnableLeaves(visList, cache);

	static hsTArray<Int16> hitList;
	hitList.SetCount(0);
	space->HarvestEnabledLeaves(slave->fIsect, cache, hitList);

	// For the visible spans that intercect the shadow volume, attach the shadow
	// to all appropriate for receiving this shadow map.
	int i;
	for( i = 0; i < hitList.GetCount(); i++ )
	{
		const plSpan* span = drawable->GetSpan(hitList[i]);
		hsGMaterial* mat = drawable->GetMaterial(span->fMaterialIdx);

		// Check that the span isn't flagged as unshadowable, or has
		// a material that we can't shadow onto.
		if( !IReceivesShadows(span, mat) )
			continue;

		// Check for self shadowing. If the shadow doesn't want self shadowing,
		// and the span is part of the shadow caster, then skip.
		if( !IAcceptsShadow(span, slave) )
			continue;

		// Add it to this span's shadow list for this frame.
		span->AddShadowSlave(fShadows[which]->fIndex);
	}

}

// ISetShadowFromGroup ////////////////////////////////////////////////////////////////////////
// The light casting this shadow has been explicitly attached to this span, so no need
// for checking bounds, but we do anyway because the artists aren't very conservative
// along those lines. The light has a bitvector indicating which of the current shadows
// are from it (there will be a shadow map for each shadow-light/shadow-caster pair),
// so we look through those shadow maps and if they are acceptable, attach them to
// the span.
// Note that a shadow slave corresponds to a shadow map.
void plDXPipeline::ISetShadowFromGroup(plDrawableSpans* drawable, const plSpan* span, plLightInfo* liInfo)
{
	hsGMaterial* mat = drawable->GetMaterial(span->fMaterialIdx);

	// Check that this span/material combo can receive shadows at all.
	if( !IReceivesShadows(span, mat) )
		return;

	const hsBitVector& slaveBits = liInfo->GetSlaveBits();
	int i;
	for( i = 0; i < fShadows.GetCount(); i++ )
	{
		if( slaveBits.IsBitSet(fShadows[i]->fIndex) )
		{
			// Check self shadowing.
			if( IAcceptsShadow(span, fShadows[i]) )
			{
				// Check for overlapping bounds.
				if( fShadows[i]->fIsect->Test(span->fWorldBounds) != kVolumeCulled )
					span->AddShadowSlave(fShadows[i]->fIndex);
			}
		}
	}
}


// SubmitShadowSlave ////////////////////////////////////////////////////////
// Puts the slave in a list valid for this frame only. The list will
// be preprocessed at BeginRender. See IPreprocessShadows.

void plDXPipeline::SubmitShadowSlave(plShadowSlave* slave)
{
	// Check that it's a valid slave.
	if( !(slave && slave->fCaster && slave->fCaster->GetKey()) )
		return;

	// A board with limited projection capability (i.e. GeForce1) can't
	// do perspective shadows (from point source lights) because it
	// requires a count3 uvw on 2 texture units (0,1) simultaneously. Just skip.
	if( (fSettings.fLimitedProj || fSettings.fCantProj) && slave->fView.GetPerspective() )
		return;

	// Ref the shadow caster so we're sure it will still be around when we go to
	// render it.
	slave->fCaster->GetKey()->RefObject();

	// Keep the shadow slaves in a priority sorted list. For performance reasons,
	// we may want only the strongest N or those of a minimum priority.
	int i;
	for( i = 0; i < fShadows.GetCount(); i++ )
	{
		if( slave->fPriority <= fShadows[i]->fPriority )
			break;
	}

	// Note that fIndex is no longer the index in the fShadows list, but
	// is still used as a unique identifier for this slave.
	slave->fIndex = fShadows.GetCount();
	fShadows.Insert(i, slave);
}

hsScalar blurScale = -1.f;
static 	const int kL2NumSamples = 3; // Log2(4)

// IBlurShadowMap //////////////////////////////////////////////////////////////////
// For a shadow map, we've got a specific (non-general) blurring in mind.
// This could be used as a loose model for more general blurring, but you
// wouldn't want to run a generic texture or render target through this.
// Specifically, we assume:
//	Input:
//		An RGBA rendertarget with an alpha we want to preserve, and color
//			going from black (unused) to white (written).
//		A blur factor
//	Output:
//		The rendertarget with alpha preserved, and the color channel blurred
//			appropriately.
//	We'll want to minimize our render target changes, so
//		we clear our scratch render target to black/white (color/alpha), then
//		render additively the color of our input with a zero alpha. The scratch
//		accumulates the color sum, but the alpha starts and stays saturated to 100%.
//		Then we modulate that back into the input, so the alpha is unchanged, the
//		color (within the white region) falls off at the edges. The color outside the
//		white region is black and stays black, but we don't care because we'll be ignoring
//		that anyway.
//	Notice that this depends on the input, each pixel having been all black or all "white".
// Also depends on "white" having 1/N premodulated in, where N is the number of samples.
//		That's why we can just sum up the colors, without needing to do a divide. Otherwise
//		we'd saturate at 255 during the sum, and the divide would be pointless.
// One other thing we're counting on here, is that we've just been rendering to an
//		offscreen, we're done, and we're about to pop our rendertarget, which is going
//		to reset a lot of render state that we would otherwise be responsible for here.
// We're hoping that this blur function (if efficient enough) can get called enough times
//		per frame to warrant the sins described above.
void plDXPipeline::IBlurShadowMap(plShadowSlave* slave)
{
	plRenderTarget* smap = (plRenderTarget*)slave->fPipeData;
	hsScalar scale = slave->fBlurScale;

	// Find a scratch rendertarget which matches the input.
	int which = IGetScratchRenderTarget(smap);
	plRenderTarget* scratchRT = fBlurScratchRTs[which];
	if( !scratchRT )
		return;
	plRenderTarget* destRT = fBlurDestRTs[which];
	if( !destRT )
		return;

	// Set up to render into it.
	IBlurSetRenderTarget(scratchRT);

	// Clear it appropriately
	fD3DDevice->Clear(0, nil, D3DCLEAR_TARGET, 0xff000000L, 1.0f, 0L);

	// Setup our quad for rendering
	ISetBlurQuadToRender(smap);

	// Render the input image into the scratch image, creating the blur.
	IRenderBlurFromShadowMap(scratchRT, smap, scale);

	// Set the rendertarget back to src
	// Setup renderstate to render it back modulating.
	// Render the scratch back into the input.
	IRenderBlurBackToShadowMap(smap, scratchRT, destRT);

	// dst is now now slave's rendertarget and smap is the new scratch dst
	// for this size.
	slave->fPipeData = (void*)destRT;
	fBlurDestRTs[which] = smap;
}

// IGetScratchRenderTarget ////////////////////////////////////////////
// Look for a render target for as scratch space for blurring the input render target.
// Note that the whole blur process requires 3 render targets, the source,
// an intermediate, and the destination (which gets swapped with the source).
// But that's only an extra 2 render targets for all shadow maps of a given
// size.
// Note also that the intermediate is one size smaller than the source,
// to get better blurring through bilerp magnification.
int plDXPipeline::IGetScratchRenderTarget(plRenderTarget* smap)
{
	int which = -1;
	switch(smap->GetHeight())
	{
	case 512:
		which = 9;
		break;
	case 256:
		which = 8;
		break;
	case 128:
		which = 7;
		break;
	case 64:
		which = 6;
		break;
	case 32:
		which = 5;
		break;
	default:
		return false;
	}
	if( !fBlurScratchRTs[which] )
	{
		// We may or may not get back the size we requested here, but if we didn't,
		// we aren't going to later, so we might as well stuff the smaller render target
		// into the bigger slot. Bad thing is that we might want a smaller render target
		// later, and we won't know to look in the bigger slot for it, so we could wind
		// up using say two 128x128's (one in the 256 slot, one in the 128 slot).
		// This intermediate is one power of 2 smaller than the source.
		UInt32 width = smap->GetWidth();
		UInt32 height = smap->GetHeight();
		if( width > 32 )
		{
			width >>= 1;
			height >>= 1;
		}
		fBlurScratchRTs[which] = IFindRenderTarget(width, height, smap->GetFlags() & plRenderTarget::kIsOrtho);
	}
	if( !fBlurDestRTs[which] )
	{
		// Destination is same size as source.
		UInt32 width = smap->GetWidth();
		UInt32 height = smap->GetHeight();
		fBlurDestRTs[which] = IFindRenderTarget(width, height, smap->GetFlags() & plRenderTarget::kIsOrtho);
	}
#ifdef MF_ENABLE_HACKOFF
	if( hackOffscreens.kMissingIndex == hackOffscreens.Find(fBlurScratchRTs[which]) )
		hackOffscreens.Append(fBlurScratchRTs[which]);
	if( hackOffscreens.kMissingIndex == hackOffscreens.Find(fBlurDestRTs[which]) )
		hackOffscreens.Append(fBlurDestRTs[which]);
#endif // MF_ENABLE_HACKOFF
	return which;
}

// IBlurSetRenderTarget /////////////////////////////////////////////////////////////////////
// Set the input render target up to be rendered into. This abbreviated version
// of PushRenderTarget is possible because of the special case of the state coming
// in, and that we know we're going to immediately pop the previous render target
// when we're done.
void plDXPipeline::IBlurSetRenderTarget(plRenderTarget* rt)
{
	plDXRenderTargetRef* ref = (plDXRenderTargetRef *)rt->GetDeviceRef();
	// Set the rendertarget
	IDirect3DSurface9* main = ref->GetColorSurface();
	IDirect3DSurface9* depth = ref->fD3DDepthSurface;

	fSettings.fCurrD3DMainSurface = main;
	fSettings.fCurrD3DDepthSurface = depth;
	fD3DDevice->SetRenderTarget(0, main);
	fD3DDevice->SetDepthStencilSurface(depth);

	// Now set the correct viewport
	D3DVIEWPORT9 vp = { 0,
						0,
						rt->GetWidth(),
						rt->GetHeight(),
						0.f, 1.f };


	WEAK_ERROR_CHECK( fD3DDevice->SetViewport( &vp ) );
}


// IRenderBlurFromShadowMap ////////////////////////////////////////////////////////////////////////////////
// Render a shadow map into a scratch render target multiple times offset slightly to create a blur
// in the color, preserving alpha exactly. It's just rendering a single quad with slight offsets
// in the UVW transform.
void plDXPipeline::IRenderBlurFromShadowMap(plRenderTarget* scratchRT, plRenderTarget* smap, hsScalar scale)
{
	// Quad is set up in camera space.
	fD3DDevice->SetTransform(D3DTS_VIEW, &d3dIdentityMatrix);
	fD3DDevice->SetTransform(D3DTS_WORLD, &d3dIdentityMatrix);
	fD3DDevice->SetTransform(D3DTS_PROJECTION, &d3dIdentityMatrix);

	// Figure out how many passes we'll need.
//	const int kNumSamples = 1 << kL2NumSamples; // HACKSAMPLE
	const int kNumSamples = mfCurrentTest > 101 ? 8 : 4;
	int nPasses = (int)hsCeil(float(kNumSamples) / fSettings.fMaxLayersAtOnce);
	int nSamplesPerPass = kNumSamples / nPasses;

	// Attenuate by number of passes, to average as we sum.
	DWORD atten = 255 / nPasses;
	plConst(float) kAtten(1.f);
	atten = DWORD(atten * kAtten);
	atten = (atten << 24)
		| (atten << 16)
		| (atten << 8)
		| (atten << 0);

	// Disable skinning
	fD3DDevice->SetRenderState(D3DRS_VERTEXBLEND, D3DVBF_DISABLE);
	//
	//	AlphaEnable = true
	//	AlphaTest OFF
	fD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE);
	fD3DDevice->SetRenderState(D3DRS_SRCBLEND,  D3DBLEND_SRCALPHA);
	fD3DDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ONE);
	fD3DDevice->SetRenderState(D3DRS_ALPHAFUNC, D3DCMP_ALWAYS);

	//	ZBUFFER disabled
	fD3DDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_ALWAYS);
	fD3DDevice->SetRenderState(D3DRS_ZWRITEENABLE, FALSE);
	fLayerState[0].fZFlags &= ~hsGMatState::kZMask;
	fLayerState[0].fZFlags |= hsGMatState::kZNoZWrite | hsGMatState::kZNoZRead;
	//
	//	Cullmode is NONE
	fCurrCullMode = D3DCULL_NONE;
	fD3DDevice->SetRenderState( D3DRS_CULLMODE,	fCurrCullMode );

	plDXTextureRef* ref = (plDXTextureRef*)smap->GetDeviceRef();
	hsAssert(ref, "Shadow map ref should have been made when it was rendered");
	if( !ref )
		return;

	// TFactor contains the attenuation
	fD3DDevice->SetRenderState(D3DRS_TEXTUREFACTOR, atten);

	// Set the N texture stages all to use the same
	// src rendertarget texture.
	// Blend modes are:
	//	Stage0:
	//		Color
	//		Arg1 = texture
	//		Op = selectArg1
	//		Alpha
	//		Arg1 = TFACTOR = white
	//		Op = selectArg1
	//	Stage[1..n-1]
	//		Color
	//		Arg1 = texture
	//		Arg2 = current
	//		Op = AddSigned
	//		Alpha
	//		Arg1 = texture
	//		Arg2 = current
	//		Op = SelectArg2
	//	StageN
	//		Color/Alpha
	//		Op = disable
	//
	// Frame buffer blend is
	//		SRCBLEND = ONE
	//		DSTBLEND = ONE
	//	All texture stages are clamped
	//
	// Set stage0, then loop over the rest
	fD3DDevice->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
	fD3DDevice->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
	fLayerState[0].fClampFlags = hsGMatState::kClampTexture;

	fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG1, D3DTA_TEXTURE);
	fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP,   D3DTOP_SELECTARG1);

	fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAARG1, D3DTA_TFACTOR);
	fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG1);
	fLayerState[0].fBlendFlags = UInt32(-1);

	hsRefCnt_SafeAssign( fLayerRef[0], ref );
	fD3DDevice->SetTexture( 0, ref->fD3DTexture );

	if( D3DTTFF_COUNT2 != fLayerXformFlags[0] )
	{
		fLayerXformFlags[0] = D3DTTFF_COUNT2;
		fD3DDevice->SetTextureStageState(0, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTTFF_COUNT2);
	}
	fD3DDevice->SetTextureStageState(0, D3DTSS_TEXCOORDINDEX, 0);
	fLayerUVWSrcs[0] = 0;

	int i;
	for( i = 1; i < nSamplesPerPass; i++ )
	{
		fD3DDevice->SetSamplerState(i, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
		fD3DDevice->SetSamplerState(i, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
		fLayerState[i].fClampFlags = hsGMatState::kClampTexture;

		fD3DDevice->SetTextureStageState(i, D3DTSS_COLORARG1, D3DTA_TEXTURE);
		fD3DDevice->SetTextureStageState(i, D3DTSS_COLORARG2, D3DTA_CURRENT);
		fD3DDevice->SetTextureStageState(i, D3DTSS_COLOROP,   D3DTOP_ADDSIGNED);

		fD3DDevice->SetTextureStageState(i, D3DTSS_ALPHAARG1, D3DTA_TEXTURE);
		fD3DDevice->SetTextureStageState(i, D3DTSS_ALPHAARG2, D3DTA_CURRENT);
		fD3DDevice->SetTextureStageState(i, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2);
		fLayerState[i].fBlendFlags = UInt32(-1);

		hsRefCnt_SafeAssign( fLayerRef[i], ref );
		fD3DDevice->SetTexture( i, ref->fD3DTexture );

		if( D3DTTFF_COUNT2 != fLayerXformFlags[i] )
		{
			fLayerXformFlags[i] = D3DTTFF_COUNT2;
			fD3DDevice->SetTextureStageState(i, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTTFF_COUNT2);
		}
		fD3DDevice->SetTextureStageState(i, D3DTSS_TEXCOORDINDEX, 0);
		fLayerUVWSrcs[i] = 0;
	}
	fD3DDevice->SetTextureStageState(nSamplesPerPass, D3DTSS_COLOROP, D3DTOP_DISABLE);
	fD3DDevice->SetTextureStageState(nSamplesPerPass, D3DTSS_ALPHAOP, D3DTOP_DISABLE);

	// N offsets are { (-1,-1), (1, -1), (1, 1), (-1, 1) } * offsetScale / size, with
	// useful offsetScales probably going from 0.5 to 1.5, but we'll just have
	// to experiment and see. Larger values likely to require more than the current
	// 4 samples
	struct offsetStruct
	{
		float	fU;
		float	fV;
	};
	offsetStruct offsetScale = { scale / scratchRT->GetWidth(), scale / scratchRT->GetHeight() };
	static offsetStruct offsets[8] = {
		{-1.f,	-1.f},
		{1.f,	-1.f},
		{1.f,	1.f},
		{-1.f,	1.f},
		{0.f,	-0.5f},
		{0.f,	0.5f},
		{-0.5f,	0.f},
		{0.5f,	0.f}
	};

	int iSample = 0;
	// For each pass,
	for( i = 0; i < nPasses; i++ )
	{
		// Set the N texture stage uv transforms to the
		// next N offsets.
		int j;
		for( j = 0; j < nSamplesPerPass; j++ )
		{
			D3DXMATRIX offXfm = d3dIdentityMatrix;
			offXfm(2,0) = offsets[iSample].fU * offsetScale.fU;
			offXfm(2,1) = offsets[iSample].fV * offsetScale.fV;
			fD3DDevice->SetTransform(sTextureStages[j], &offXfm);
			fLayerTransform[j] = true;

			iSample++;
		}

		// Render our quad
		fD3DDevice->DrawPrimitive(D3DPT_TRIANGLESTRIP, 0, 2);

//		fD3DDevice->SetRenderState(D3DRS_TEXTUREFACTOR, 0L);
//		fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG1, D3DTA_TEXTURE);
//		fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG2, D3DTA_TFACTOR);
//		fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP,   D3DTOP_ADDSIGNED);
//		fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP,   D3DTOP_SELECTARG2);

	}
}

// IRenderBlurBackToShadowMap /////////////////////////////////////////////////////////////////////
// Render our intermediate blurred map back into a useable shadow map.
void plDXPipeline::IRenderBlurBackToShadowMap(plRenderTarget* smap, plRenderTarget* scratch, plRenderTarget* dst)
{
	// Set the rendertarget
	IBlurSetRenderTarget(dst);

	// Clear it appropriately. This might not be necessary, since we're just going to overwrite.
	fD3DDevice->Clear(0, nil, D3DCLEAR_TARGET, 0xff000000L, 1.0f, 0L);

	// Scratch has an all white alpha, and the blurred color from smap. But the color
	// is a signed biased color. We need to remap [128..255] from scratch into [0..255]
	// on dst. Plus, we need to copy the alpha as is from smap into dst.
	// So, scratch is texture0, smap is texture1. TFACTOR is 0.
	// Color is ADDSIGNED2X(TFACTOR, texture0).
	// Alpha is SELECTARG1(texture1, current).
	// Then FB blend is just opaque copy.

	// Set Stage0 texture transform
	// Clamp still on (from RBFSM)
	D3DXMATRIX offXfm = d3dIdentityMatrix;
	fD3DDevice->SetTransform(sTextureStages[0], &offXfm);
	fD3DDevice->SetTransform(sTextureStages[1], &offXfm);
	fLayerTransform[0] = false;
	fLayerTransform[1] = false;

	plDXTextureRef* ref = (plDXTextureRef*)scratch->GetDeviceRef();
	hsAssert(ref, "Blur scratch map ref should have been made when it was rendered");
	if( !ref )
		return;
	hsRefCnt_SafeAssign( fLayerRef[0], ref );
	fD3DDevice->SetTexture( 0, ref->fD3DTexture );

	ref = (plDXTextureRef*)smap->GetDeviceRef();
	hsAssert(ref, "Blur src map ref should have been made when it was rendered");
	if( !ref )
		return;
	hsRefCnt_SafeAssign( fLayerRef[1], ref );
	fD3DDevice->SetTexture( 1, ref->fD3DTexture );

	// Stage0:
	//		Color
	//		Arg1 = TFACTOR = black
	//		Arg2 = texture
	//		Op = ADDSIGNED2X
	//		Alpha
	//		Arg1 = texture
	//		Op = selectArg1
	//	Texture = scratch
	// Stage1:
	//		Color
	//		Arg1 = texture
	//		Arg2 = current
	//		Op = selectArg2
	//		Alpha
	//		Arg1 = texture
	//		Op = selectArg1
	//	Texture = smap
	// FB blend
	//		SRCBLEND = ONE
	//		DSTBLEND = ZERO

	fD3DDevice->SetRenderState(D3DRS_TEXTUREFACTOR, 0L);
	fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG1, D3DTA_TFACTOR);
	fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG2, D3DTA_TEXTURE);
	fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP,   D3DTOP_ADDSIGNED2X);

	// This alpha will be ignored, because in the next stage we select texture alpha.
	fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAARG1, D3DTA_TEXTURE);
	fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG1);

	fLayerState[0].fBlendFlags = UInt32(-1);

	fD3DDevice->SetTextureStageState(1, D3DTSS_COLORARG2, D3DTA_CURRENT);
	fD3DDevice->SetTextureStageState(1, D3DTSS_COLOROP,   D3DTOP_SELECTARG2);

	fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAARG1, D3DTA_TEXTURE);
	fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG1);

	fLayerState[1].fBlendFlags = UInt32(-1);

	fD3DDevice->SetTextureStageState(2, D3DTSS_COLOROP, D3DTOP_DISABLE);
	fD3DDevice->SetTextureStageState(2, D3DTSS_ALPHAOP, D3DTOP_DISABLE);

	fLastEndingStage = 2;

	fD3DDevice->SetRenderState(D3DRS_SRCBLEND,  D3DBLEND_ONE);
	fD3DDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ZERO);

	// Our quad should still be setup to go.
	fD3DDevice->DrawPrimitive(D3DPT_TRIANGLESTRIP, 0, 2);

}

struct plShadowVertStruct
{
	float		fPos[3];
	float		fUv[2];
};

// IReleaseBlurVBuffers //////////////////////////////////////////////////////////
// Free up our blur quad vertex buffers. Note these are in POOL_DEFAULT
void plDXPipeline::IReleaseBlurVBuffers()
{
	const UInt32 kVSize = sizeof(plShadowVertStruct);
	int i;
	for( i = 0; i < kMaxRenderTargetNext; i++ )
	{
		if (fBlurVBuffers[i])
		{
			ReleaseObject(fBlurVBuffers[i]);
			PROFILE_POOL_MEM(D3DPOOL_DEFAULT, 4 * kVSize, false, "BlurVtxBuff");
			fBlurVBuffers[i] = nil;
		}
	}
}

// ICreateBlurVBuffers //////////////////////////////////////////////////////////////////
// We need a quad for each size of shadow map, because there's a slight dependency
// of UVW coordinates on size of render target. Sucks but it's true.
hsBool plDXPipeline::ICreateBlurVBuffers()
{
	// vertex size is 4 verts, with 4 floats each for position, and 2 floats each for uv.
	const UInt32 kVSize = sizeof(plShadowVertStruct);
	const UInt32 kVFormat = D3DFVF_XYZ | D3DFVF_TEX1 | D3DFVF_TEXCOORDSIZE2(0) ;

	int i;
	for( i = 0; i < kMaxRenderTargetNext; i++ )
	{
		int width = 0;
		int height = 0;
		int which = -1;
		switch( i )
		{
		default:
		case 0:
		case 1:
		case 2:
		case 3:
		case 4:
			break;
		case 5:
			width = height = 1 << i;
			which = i;
			break;
		case 6:
			width = height = 1 << i;
			which = i;
			break;
		case 7:
			width = height = 1 << i;
			which = i;
			break;
		case 8:
			width = height = 1 << i;
			which = i;
			break;
		case 9:
			width = height = 1 << i;
			which = i;
			break;
		}
		if( which < 0 )
			continue;

		// positions are { (-0.5,-0.5,0,1), (w-0.5,-0.5,0,1), (w-0.5,h-0.5,0,1), (-0.5,h-0.5,0,1) }
		// UVs are { (0,0), (1,0), (1,1), (0,1) }
		// So we won't have to bother with indices, we'll put them in as
		// p1, p2, p0, p3 and render tristrip


		// Create the buffer.
		IDirect3DVertexBuffer9* vBuffer = nil;

		UInt32 fvfFormat = kVFormat;
		hsAssert(!ManagedAlloced(), "Alloc default with managed alloc'd");
		if( FAILED( fD3DDevice->CreateVertexBuffer( 4 * kVSize,
													D3DUSAGE_WRITEONLY,
													fvfFormat,
													D3DPOOL_DEFAULT,
													&vBuffer, NULL) ) )
		{
			hsAssert( false, "CreateVertexBuffer() call failed!" );
			return false;
		}
		plShadowVertStruct* ptr = nil;

		/// Lock the buffer and fill it in.
		if( FAILED( vBuffer->Lock( 0, 0, (void **)&ptr, 0 ) ) )
		{
			hsAssert( false, "Failed to lock vertex buffer for writing" );
			vBuffer->Release();
			return false;
		}
		PROFILE_POOL_MEM(D3DPOOL_DEFAULT, 4 * kVSize, true, "BlurVtxBuff");

		plShadowVertStruct vert;
		vert.fPos[0] = -1.f;
		vert.fPos[1] = -1.f;
		vert.fPos[2] = 0.5f;

		vert.fUv[0] = 0.5f / width;
		vert.fUv[1] = 1.f + 0.5f / height;

		// P0
		ptr[2] = vert;

		// P1
		ptr[0] = vert;
		ptr[0].fPos[0] += 2.f;
		ptr[0].fUv[0] += 1.f;

		// P2
		ptr[1] = vert;
		ptr[1].fPos[0] += 2.f;
		ptr[1].fUv[0] += 1.f;
		ptr[1].fPos[1] += 2.f;
		ptr[1].fUv[1] -= 1.f;

		// P3
		ptr[3] = vert;
		ptr[3].fPos[1] += 2.f;
		ptr[3].fUv[1] -= 1.f;

		vBuffer->Unlock();

		fBlurVBuffers[which] = vBuffer;
	}
	return true;
}

// ISetBlurQuadToRender ////////////////////////////////////////////////////
// Select the appropriate blur quad (based on size of shadow map) and set it up to render.
hsBool plDXPipeline::ISetBlurQuadToRender(plRenderTarget* smap)
{
	const UInt32 kVSize = sizeof(plShadowVertStruct);
	const UInt32 kVFormat = D3DFVF_XYZ | D3DFVF_TEX1 | D3DFVF_TEXCOORDSIZE2(0) ;

	// Each vb will be rendertarget size specific, so select one based on input rendertarget
	int which = -1;
	switch(smap->GetHeight())
	{
	case 512:
		which = 9;
		break;
	case 256:
		which = 8;
		break;
	case 128:
		which = 7;
		break;
	case 64:
		which = 6;
		break;
	case 32:
		which = 5;
		break;
	default:
		return false;
	}

	// If we haven't created (or have lost) our d3d resources, make them
	IDirect3DVertexBuffer9* vBuffer = fBlurVBuffers[which];
	if( !vBuffer )
	{
		ICreateBlurVBuffers();
		vBuffer = fBlurVBuffers[which];
		hsAssert(vBuffer, "AllocBlurVBuffers failed");
	}

	HRESULT r = fD3DDevice->SetVertexShader(fSettings.fCurrVertexShader = NULL);
	fD3DDevice->SetFVF(fSettings.fCurrFVFFormat = kVFormat);
	hsAssert( r == D3D_OK, "Error trying to set the vertex shader!" );

	hsRefCnt_SafeUnRef(fSettings.fCurrVertexBuffRef);
	fSettings.fCurrVertexBuffRef = nil;

	r = fD3DDevice->SetStreamSource(0, vBuffer, 0, kVSize);
	plProfile_Inc(VertexChange);

	// No SetIndices, we'll do a direct DrawPrimitive (not DrawIndexedPrimitive)

	// No transforms, we're supplying screen ready verts.

	return true;
}

// IRenderShadowCasterSpan //////////////////////////////////////////////////////////////////////
// Render the span into a rendertarget of the correct size, generating
// a depth map from this light to that span.
void plDXPipeline::IRenderShadowCasterSpan(plShadowSlave* slave, plDrawableSpans* drawable, const plIcicle& span)
{
	// Check that it's ready to render.
	plProfile_BeginTiming(CheckDyn);
	ICheckDynBuffers(drawable, drawable->GetBufferGroup(span.fGroupIdx), &span);
	plProfile_EndTiming(CheckDyn);

	plDXVertexBufferRef*	vRef = (plDXVertexBufferRef *)drawable->GetVertexRef(span.fGroupIdx, span.fVBufferIdx);
	plDXIndexBufferRef*	iRef = (plDXIndexBufferRef *)drawable->GetIndexRef(span.fGroupIdx, span.fIBufferIdx);

	HRESULT		r;

	if( vRef->fD3DBuffer == nil || iRef->fD3DBuffer == nil )
	{
		hsAssert( false, "Trying to render a nil buffer pair!" );
		return;
	}

	/// Switch to the vertex buffer we want
	if( fSettings.fCurrVertexBuffRef != vRef )
	{
		hsRefCnt_SafeAssign( fSettings.fCurrVertexBuffRef, vRef );
		hsAssert( vRef->fD3DBuffer != nil, "Trying to render a buffer pair without a vertex buffer!" );
		vRef->SetRebuiltSinceUsed(true);
	}

	if( vRef->RebuiltSinceUsed() )
	{
		r = fD3DDevice->SetStreamSource( 0, vRef->fD3DBuffer, 0, vRef->fVertexSize );
		hsAssert( r == D3D_OK, "Error trying to set the stream source!" );
		plProfile_Inc(VertexChange);

		fSettings.fCurrFVFFormat = IGetBufferD3DFormat(vRef->fFormat);
		r = fD3DDevice->SetVertexShader(fSettings.fCurrVertexShader = NULL);
		fD3DDevice->SetFVF(fSettings.fCurrFVFFormat);
		hsAssert( r == D3D_OK, "Error trying to set the vertex shader!" );

		vRef->SetRebuiltSinceUsed(false);

	}

	if( fSettings.fCurrIndexBuffRef != iRef )
	{
		hsRefCnt_SafeAssign( fSettings.fCurrIndexBuffRef, iRef );
		hsAssert( iRef->fD3DBuffer != nil, "Trying to render with a nil index buffer" );
		iRef->SetRebuiltSinceUsed(true);
	}

	if( iRef->RebuiltSinceUsed() )
	{
		r = fD3DDevice->SetIndices( iRef->fD3DBuffer );
		hsAssert( r == D3D_OK, "Error trying to set the indices!" );
		plProfile_Inc(IndexChange);
		iRef->SetRebuiltSinceUsed(false);
	}

	UInt32					vStart = span.fVStartIdx;
	UInt32					vLength = span.fVLength;
	UInt32					iStart = span.fIPackedIdx;
	UInt32					iLength= span.fILength;

	plRenderTriListFunc render(fD3DDevice, iRef->fOffset, vStart, vLength, iStart, iLength/3);

	static hsMatrix44 emptyMatrix;
	hsMatrix44 m = emptyMatrix;

	ISetupTransforms(drawable, span, m);

	hsBool flip = slave->ReverseCull();
	ISetCullMode(flip);

	render.RenderPrims();
}

// IGetULutTextureRef ///////////////////////////////////////////////////////////
// The ULut just translates a U coordinate in range [0..1] into
// color and alpha of U * 255.9f. We just have the one we keep
// lying around.
plDXTextureRef* plDXPipeline::IGetULutTextureRef()
{
	const int width = 256;
	const int height = 1;
	if( !fULutTextureRef )
	{
		UInt32* tData = TRACKED_NEW UInt32[width * height];

		UInt32* pData = tData;
		int j;
		for( j = 0; j < height; j++ )
		{
			int i;
			for( i = 0; i < width; i++ )
			{
				*pData = (i << 24)
					| (i << 16)
					| (i << 8)
					| (i << 0);
				pData++;
			}
		}

		plDXTextureRef* ref = TRACKED_NEW plDXTextureRef( D3DFMT_A8R8G8B8,
											  1, // Num mip levels
											  width, height, // width by height
											  width * height, // numpix
											  width*height*sizeof(UInt32), // totalsize
											  width*height*sizeof(UInt32),
											  nil, // levels data
											  tData,
											  false // externData
											  );
		ref->Link(&fTextureRefList);

		fULutTextureRef = ref;
	}
	return fULutTextureRef;
}

// IFindRenderTarget //////////////////////////////////////////////////////////////////
// Find a matching render target from the pools. We prefer the requested size, but
// will look for a smaller size if there isn't one available.
// Param ortho indicates whether it will be used for orthogonal projection as opposed
// to perspective (directional light vs. point light), but is no longer used.
plRenderTarget* plDXPipeline::IFindRenderTarget(UInt32& width, UInt32& height, hsBool ortho)
{
	hsTArray<plRenderTarget*>* pool = nil;
	UInt32* iNext = nil;
	// NOT CURRENTLY SUPPORTING NON-SQUARE SHADOWS. IF WE DO, CHANGE THIS.
	switch(height)
	{
	case 512:
		pool = &fRenderTargetPool512;
		iNext = &fRenderTargetNext[9];
		break;
	case 256:
		pool = &fRenderTargetPool256;
		iNext = &fRenderTargetNext[8];
		break;
	case 128:
		pool = &fRenderTargetPool128;
		iNext = &fRenderTargetNext[7];
		break;
	case 64:
		pool = &fRenderTargetPool64;
		iNext = &fRenderTargetNext[6];
		break;
	case 32:
		pool = &fRenderTargetPool32;
		iNext = &fRenderTargetNext[5];
		break;
	default:
		return nil;
	}
	plRenderTarget* rt = (*pool)[*iNext];
	if( !rt )
	{
		// We didn't find one, try again the next size down.
		if( height > 32 )
			return IFindRenderTarget(width >>= 1, height >>= 1, ortho);

		// We must be totally out. Oh well.
		return nil;
	}
	(*iNext)++;

	return rt;
}

// IPushShadowCastState ////////////////////////////////////////////////////////////////////////////////
// Push all the state necessary to start rendering this shadow map, but independent of the
// actual shadow caster to be rendered into the map.
hsBool plDXPipeline::IPushShadowCastState(plShadowSlave* slave)
{
	plRenderTarget* renderTarg = IFindRenderTarget(slave->fWidth, slave->fHeight, slave->fView.GetOrthogonal());
	if( !renderTarg )
		return false;

	// Let the slave setup the transforms, viewport, etc. necessary to render it's shadow
	// map. This just goes into a plViewTransform, we translate that into D3D state ourselves below.
	if (!slave->SetupViewTransform(this))
		return false;

	// Turn off fogging and specular.
	fD3DDevice->SetRenderState(D3DRS_FOGENABLE, FALSE);
	fCurrFog.fEnvPtr = nil;
	fD3DDevice->SetRenderState(D3DRS_SPECULARENABLE, FALSE);
	fLayerState[0].fShadeFlags &= ~hsGMatState::kShadeSpecular;

	// Push the shadow slave's view transform as our current render state.
	fSettings.fViewStack.Push(fView);
	fView.fCullMaxNodes = 0;
	SetViewTransform(slave->fView);
	IProjectionMatrixToD3D();

	// Push the shadow map as the current render target
	PushRenderTarget(renderTarg);

	// We'll be rendering the light space distance to the span fragment into
	// alpha (color is white), so our camera space position, transformed into light space
	// and then converted to [0..255] via our ULut.

	// For stage 0:
	// Set uvw src
	if( fLayerUVWSrcs[0] != D3DTSS_TCI_CAMERASPACEPOSITION )
	{
		fD3DDevice->SetTextureStageState(0, D3DTSS_TEXCOORDINDEX, D3DTSS_TCI_CAMERASPACEPOSITION);
		fLayerUVWSrcs[0] = D3DTSS_TCI_CAMERASPACEPOSITION;
	}
	UInt32 xformFlags = D3DTTFF_COUNT3;

	if( xformFlags != fLayerXformFlags[0] )
	{
		fLayerXformFlags[0] = xformFlags;
		fD3DDevice->SetTextureStageState(0, D3DTSS_TEXTURETRANSFORMFLAGS, xformFlags);
	}

	// Set texture transform to slave's lut transform. See plShadowMaster::IComputeLUT().
	hsMatrix44 castLUT = slave->fCastLUT;
	if( slave->fFlags & plShadowSlave::kCastInCameraSpace )
	{
		hsMatrix44 c2w = GetCameraToWorld();

		castLUT = castLUT * c2w;
	}

	D3DXMATRIX tXfm;
	IMatrix44ToD3DMatrix(tXfm, castLUT);

	fD3DDevice->SetTransform( sTextureStages[0], &tXfm );
	fLayerTransform[0] = true;

	// Set texture to clamp
	fD3DDevice->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
	fD3DDevice->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
	fLayerState[0].fClampFlags = hsGMatState::kClampTexture;

	DWORD clearColor = 0xff000000L;
//	const int l2NumSamples = kL2NumSamples; // HACKSAMPLE
	const int l2NumSamples = mfCurrentTest > 101 ? 3 : 2;
	DWORD intens;
	if( slave->fBlurScale > 0 )
	{
		const int kNumSamples = mfCurrentTest > 101 ? 8 : 4;
		int nPasses = (int)hsCeil(float(kNumSamples) / fSettings.fMaxLayersAtOnce);
		int nSamplesPerPass = kNumSamples / nPasses;
		DWORD k = int(128.f / float(nSamplesPerPass));
		intens = (0xff << 24)
			| ((128 + k) << 16)
			| ((128 + k) << 8)
			| ((128 + k) << 0);
		clearColor = (0xff << 24)
			| ((128 - k) << 16)
			| ((128 - k) << 8)
			| ((128 - k) << 0);
	}
	else
		intens = 0xffffffff;

	// Note that we discard the shadow caster's alpha here, although we don't
	// need to. Even on a 2 texture stage system, we could include the diffuse
	// alpha and the texture alpha from the base texture. But we don't.

	// Set color to white. We could accomplish this easier by making the color
	// in our ULut white.
	fD3DDevice->SetRenderState(D3DRS_TEXTUREFACTOR, intens);

	fSettings.fVeryAnnoyingTextureInvalidFlag = true;
	fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG1, D3DTA_TFACTOR);
	fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP,   D3DTOP_SELECTARG1);

	fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAARG1, D3DTA_TEXTURE);
	fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG1);
	fLayerState[0].fBlendFlags = UInt32(-1);

	// For stage 1 - disable
	fLastEndingStage = 1;
	fD3DDevice->SetTextureStageState(1, D3DTSS_COLOROP, D3DTOP_DISABLE);
	fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAOP, D3DTOP_DISABLE);
	fLayerState[1].fBlendFlags = UInt32(-1);

	// Set texture to U_LUT
	plDXTextureRef* ref = IGetULutTextureRef();

	if( !ref->fD3DTexture )
	{
		if( ref->fData )
			IReloadTexture( ref );
	}

	hsRefCnt_SafeAssign( fLayerRef[0], ref );
	fD3DDevice->SetTexture( 0, ref->fD3DTexture );

	fD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE);
	fD3DDevice->SetRenderState(D3DRS_SRCBLEND,  D3DBLEND_ONE);
	fD3DDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ZERO);

	fD3DDevice->SetRenderState(D3DRS_ALPHAFUNC, D3DCMP_ALWAYS);

	slave->fPipeData = renderTarg;

	// Enable ZBuffering w/ write
	fD3DDevice->SetRenderState(D3DRS_ZWRITEENABLE, TRUE);
	fLayerState[0].fZFlags &= ~hsGMatState::kZMask;

	// Clear the render target:
	// alpha to white ensures no shadow where there's no caster
	// color to black in case we ever get blurring going
	// Z to 1
	// Stencil ignored
	if( slave->ReverseZ() )
	{
		fD3DDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_GREATEREQUAL);
		fD3DDevice->Clear(0, nil, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, clearColor, 0.0f, 0L);
	}
	else
	{
		fD3DDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_LESSEQUAL);
		fD3DDevice->Clear(0, nil, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, clearColor, 1.0f, 0L);
	}

	// Bring the viewport in (AFTER THE CLEAR) to protect the alpha boundary.
	fView.fTransform.SetViewPort(1, 1, (float)(slave->fWidth-2), (float)(slave->fHeight-2), false);
	ISetViewport();

	inlEnsureLightingOff();

	// See ISetupShadowLight below for how the shadow light is used.
	// The shadow light isn't used in generating the shadow map, it's used
	// in projecting the shadow map onto the scene.
	ISetupShadowLight(slave);

	return true;
}

// ISetupShadowLight //////////////////////////////////////////////////////////////////
// We use the shadow light to modulate the shadow effect in two ways while
// projecting the shadow map onto the scene.
// First, the intensity of the shadow follows the N dot L of the light on
// the surface being projected onto. So on a sphere, the darkening effect
// of the shadow will fall off as the normals go from pointing to the light to
// pointing 90 degrees off.
// Second, we attenuate the whole shadow effect through the lights diffuse color.
// We attenuate for different reasons, like the intensity of the light, or
// to fade out a shadow as it gets too far in the distance to matter.
void plDXPipeline::ISetupShadowLight(plShadowSlave* slave)
{
	plDXLightRef* lRef = INextShadowLight(slave);

	lRef->fD3DInfo.Diffuse.r
		= lRef->fD3DInfo.Diffuse.g
		= lRef->fD3DInfo.Diffuse.b
		= slave->fPower;

	slave->fSelfShadowOn = false;

	if( slave->Positional() )
	{
		hsPoint3 position = slave->fLightPos;
		lRef->fD3DInfo.Position.x = position.fX;
		lRef->fD3DInfo.Position.y = position.fY;
		lRef->fD3DInfo.Position.z = position.fZ;

		const float maxRange = 32767.f;
		lRef->fD3DInfo.Range = maxRange;
		lRef->fD3DInfo.Attenuation0 = 1.f;
		lRef->fD3DInfo.Attenuation1 = 0;
		lRef->fD3DInfo.Attenuation2 = 0;

		lRef->fD3DInfo.Type = D3DLIGHT_POINT;
	}
	else
	{
		hsVector3 dir = slave->fLightDir;
		lRef->fD3DInfo.Direction.x = dir.fX;
		lRef->fD3DInfo.Direction.y = dir.fY;
		lRef->fD3DInfo.Direction.z = dir.fZ;

		lRef->fD3DInfo.Type = D3DLIGHT_DIRECTIONAL;
	}

	fD3DDevice->SetLight( lRef->fD3DIndex, &lRef->fD3DInfo );

	slave->fLightIndex = lRef->fD3DIndex;
}

// INextShadowLight /////////////////////////////////////////////////////
// Get a scratch light for this shadow slave and assign it. The slave
// only keeps it for this render frame.
plDXLightRef* plDXPipeline::INextShadowLight(plShadowSlave* slave)
{
	fLights.fShadowLights.ExpandAndZero(fLights.fNextShadowLight+1);

	if( !fLights.fShadowLights[fLights.fNextShadowLight] )
	{
		plDXLightRef	*lRef = TRACKED_NEW plDXLightRef();

		/// Assign stuff and update
		lRef->fD3DIndex = fLights.ReserveD3DIndex();
		lRef->fOwner = nil;
		lRef->fD3DDevice = fD3DDevice;

		lRef->Link( &fLights.fRefList );

		fLights.fShadowLights[fLights.fNextShadowLight] = lRef;

		// Neutralize it until we need it.
		fD3DDevice->LightEnable(lRef->fD3DIndex, false);

		// Some things never change.
		memset(&lRef->fD3DInfo, 0, sizeof(lRef->fD3DInfo));
		lRef->fD3DInfo.Ambient.r = lRef->fD3DInfo.Ambient.g = lRef->fD3DInfo.Ambient.b = 0;
		lRef->fD3DInfo.Specular.r = lRef->fD3DInfo.Specular.g = lRef->fD3DInfo.Specular.b = 0;

	}
	slave->fLightRefIdx = fLights.fNextShadowLight;

	return fLights.fShadowLights[fLights.fNextShadowLight++];
}

// IPopShadowCastState ///////////////////////////////////////////////////
// Pop the state set to render this shadow caster, so we're ready to render
// a different shadow caster, or go on to our main render.
hsBool plDXPipeline::IPopShadowCastState(plShadowSlave* slave)
{
	fView = fSettings.fViewStack.Pop();

	PopRenderTarget();
	fView.fXformResetFlags = fView.kResetProjection | fView.kResetCamera;

	return true;
}

// IMakeRenderTargetPools /////////////////////////////////////////////////////////////
// These are actually only used as shadow map pools, but they could be used for other
// render targets.
// All these are created here in a single call because they go in POOL_DEFAULT, so they
// must be created before we start creating things in POOL_MANAGED.
void plDXPipeline::IMakeRenderTargetPools()
{
	hsAssert(!fManagedAlloced, "Allocating rendertargets with managed resources alloced");
	IReleaseRenderTargetPools(); // Just to be sure.

	// Numbers of render targets to be created for each size.
	// These numbers were set with multi-player in mind, so should be reconsidered.
	// But do keep in mind that there are many things in production assets that cast
	// shadows besides the avatar.
	plConst(hsScalar)	kCount[kMaxRenderTargetNext] = {
		0, // 1x1
		0, // 2x2
		0, // 4x4
		0, // 8x8
		0, // 16x16
		32, // 32x32
		16, // 64x64
		8, // 128x128
		4, // 256x256
		0 // 512x512
	};
	int i;
	for( i = 0; i < kMaxRenderTargetNext; i++ )
	{
		hsTArray<plRenderTarget*>* pool = nil;
		switch( i )
		{
		default:
		case 0:
		case 1:
		case 2:
		case 3:
		case 4:
			break;

		case 5:
			pool = &fRenderTargetPool32;
			break;
		case 6:
			pool = &fRenderTargetPool64;
			break;
		case 7:
			pool = &fRenderTargetPool128;
			break;
		case 8:
			pool = &fRenderTargetPool256;
			break;
		case 9:
			pool = &fRenderTargetPool512;
			break;
		}
		if( pool )
		{
			pool->SetCount((int)(kCount[i]+1));
			(*pool)[0] = nil;
			(*pool)[(int)(kCount[i])] = nil;
			int j;
			for( j = 0; j < kCount[i]; j++ )
			{
				UInt16 flags = plRenderTarget::kIsTexture | plRenderTarget::kIsProjected;
				UInt8 bitDepth = 32;
				UInt8 zDepth = 24;
				UInt8 stencilDepth = 0;

				// If we ever allow non-square shadows, change this.
				int width = 1 << i;
				int height = width;

				plRenderTarget* rt = TRACKED_NEW plRenderTarget(flags, width, height, bitDepth, zDepth, stencilDepth);

				// If we've failed to create our render target ref, we're probably out of
				// video memory. We'll return nil, and this guy just doesn't get a shadow
				// until more video memory turns up (not likely).
				if( !SharedRenderTargetRef((*pool)[0], rt) )
				{
					delete rt;
					pool->SetCount(j+1);
					(*pool)[j] = nil;
					break;
				}
				(*pool)[j] = rt;
			}
		}
	}
}

// IResetRenderTargetPools /////////////////////////////////////////////////////////////////
// No release of resources, this just resets for the start of a frame. So if a shadow
// slave gets a render target from a pool, once this is called (conceptually at the
// end of the frame), the slave no longer owns that render target.
void plDXPipeline::IResetRenderTargetPools()
{
	int i;
	for( i = 0; i < kMaxRenderTargetNext; i++ )
	{
		fRenderTargetNext[i] = 0;
		fBlurScratchRTs[i] = nil;
		fBlurDestRTs[i] = nil;
	}

	fLights.fNextShadowLight = 0;
}

// IPrepShadowCaster ////////////////////////////////////////////////////////////////////////
// Make sure all the geometry in this shadow caster is ready to be rendered.
// Keep in mind the single shadow caster may be multiple spans possibly in
// multiple drawables.
// The tricky part here is that we need to prep each drawable involved,
// but only prep it once. Say the caster is composed of:
// drawableA, span0
// drawableA, span1
// drawableB, span0
// Then we need to call plDrawable::PrepForRender() ONCE on drawableA,
// and once on drawableB. Further, we need to do any necessary CPU
// skinning with ISofwareVertexBlend(drawableA, visList={0,1}) and
// ISofwareVertexBlend(drawableB, visList={1}).
hsBool plDXPipeline::IPrepShadowCaster(const plShadowCaster* caster)
{
	static hsBitVector done;
	done.Clear();
	const hsTArray<plShadowCastSpan>& castSpans = caster->Spans();

	int i;
	for( i = 0; i < castSpans.GetCount(); i++ )
	{
		if( !done.IsBitSet(i) )
		{
			// We haven't already done this castSpan

			plDrawableSpans* drawable = castSpans[i].fDraw;

			// Start a visList with this index.
			static hsTArray<Int16> visList;
			visList.SetCount(0);
			visList.Append((Int16)(castSpans[i].fIndex));

			// We're about to have done this castSpan.
			done.SetBit(i);

			// Look forward through castSpans for any other spans
			// with the same drawable, and add them to visList.
			// We'll handle all the spans from this drawable at once.
			int j;
			for( j = i+1; j < castSpans.GetCount(); j++ )
			{
				if( !done.IsBitSet(j) && (castSpans[j].fDraw == drawable) )
				{
					// Add to list
					visList.Append((Int16)(castSpans[j].fIndex));

					// We're about to have done this castSpan.
					done.SetBit(j);
				}
			}
			// That's all, prep the drawable.
			drawable->PrepForRender( this );

			// Do any software skinning.
			if( !ISoftwareVertexBlend(drawable, visList) )
				return false;
		}
	}

	return true;
}

// IRenderShadowCaster ////////////////////////////////////////////////
// Render the shadow caster into the slave's render target, creating a shadow map.
hsBool plDXPipeline::IRenderShadowCaster(plShadowSlave* slave)
{
	const plShadowCaster* caster = slave->fCaster;

	// Setup to render into the slave's render target.
	if( !IPushShadowCastState(slave) )
		return false;

	// Get the shadow caster ready to render.
	if( !IPrepShadowCaster(slave->fCaster) )
		return false;

	// for each shadowCaster.fSpans
	int iSpan;
	for( iSpan = 0; iSpan < caster->Spans().GetCount(); iSpan++ )
	{
		plDrawableSpans* dr = caster->Spans()[iSpan].fDraw;
		const plSpan* sp = caster->Spans()[iSpan].fSpan;
		UInt32 spIdx = caster->Spans()[iSpan].fIndex;

		hsAssert(sp->fTypeMask & plSpan::kIcicleSpan, "Shadow casting from non-trimeshes not currently supported");

		// render shadowcaster.fSpans[i] to rendertarget
		if( !(sp->fProps & plSpan::kPropNoShadowCast) )
			IRenderShadowCasterSpan(slave, dr, *(const plIcicle*)sp);

		// Keep track of which shadow slaves this span was rendered into.
		// If self-shadowing is off, we use that to determine not to
		// project the shadow map onto its source geometry.
		sp->SetShadowBit(slave->fIndex); //index set in SubmitShadowSlave
	}

	// Debug only.
	if( blurScale >= 0.f )
		slave->fBlurScale = blurScale;

	// If this shadow requests being blurred, do it.
	if( slave->fBlurScale > 0.f )
		IBlurShadowMap(slave);

	// Finished up, restore previous state.
	IPopShadowCastState(slave);

#if MCN_BOUNDS_SPANS
	if (IsDebugFlagSet(plPipeDbg::kFlagShowShadowBounds))
	{
		/// Add a span to our boundsIce to show this
		IAddBoundsSpan(fBoundsSpans, &slave->fWorldBounds);
	}
#endif // MCN_BOUNDS_SPANS

	return true;
}

// We have a (possibly empty) list of shadows submitted for this frame.
// At BeginRender, we need to accomplish:
//	Find render targets for each shadow request of the requested size.
//	Render the associated spans into the render targets. Something like the following:
void plDXPipeline::IPreprocessShadows()
{
	plProfile_BeginTiming(PrepShadows);

	// Mark our shared resources as free to be used.
	IResetRenderTargetPools();

	// Some board (possibly the Parhelia) freaked if anistropic filtering
	// was enabled when rendering to a render target. We never need it for
	// shadow maps, and it is slower, so we just kill it here.
	ISetAnisotropy(false);

	// Generate a shadow map for each submitted shadow slave.
	// Shadow slave corresponds to one shadow caster paired
	// with one shadow light that affects it. So a single caster
	// may be in multiple slaves (from different lights), or a
	// single light may be in different slaves (affecting different
	// casters). The overall number is low in spite of the possible
	// permutation explosion, because a slave is only generated
	// for a caster being affected (in range etc.) by a light.
	int iSlave;
	for( iSlave = 0; iSlave < fShadows.GetCount(); iSlave++ )
	{
		plShadowSlave* slave = fShadows[iSlave];

		// Any trouble, remove it from the list for this frame.
		if( !IRenderShadowCaster(slave) )
		{
			fShadows.Remove(iSlave);
			iSlave--;
			continue;
		}

	}

	// Restore
	ISetAnisotropy(true);

	plProfile_EndTiming(PrepShadows);
}

// IClearShadowSlaves ///////////////////////////////////////////////////////////////////////////
// At EndRender(), we need to clear our list of shadow slaves. They are only valid for one frame.
void plDXPipeline::IClearShadowSlaves()
{
	int i;
	for( i = 0; i < fShadows.GetCount(); i++ )
	{
		const plShadowCaster* caster = fShadows[i]->fCaster;
		caster->GetKey()->UnRefObject();
	}
	fShadows.SetCount(0);
}


// IRenderShadowsOntoSpan /////////////////////////////////////////////////////////////////////
// After doing the usual render for a span (all passes), we call the following.
// If the span accepts shadows, this will loop over all the shadows active this
// frame, and apply the ones that intersect this spans bounds. See below for details.
void plDXPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, const plSpan* span, hsGMaterial* mat)
{
	// We've already computed which shadows affect this span. That's recorded in slaveBits.
	const hsBitVector& slaveBits = span->GetShadowSlaves();

	hsBool first = true;

	int i;
	for( i = 0; i < fShadows.GetCount(); i++ )
	{
		if( slaveBits.IsBitSet(fShadows[i]->fIndex) )
		{
			// This slave affects this span.
			if( first )
			{
				// On the first, we do all the setup that is independent of
				// the shadow slave, so state that needs to get set once before
				// projecting any number of shadow maps.
				ISetupShadowRcvTextureStages(mat);

				first = false;

			}

			// Now setup any state specific to this shadow slave.
			ISetupShadowSlaveTextures(fShadows[i]);

			int selfShadowNow = span->IsShadowBitSet(fShadows[i]->fIndex);

			// We vary the shadow intensity when self shadowing (see below),
			// so we cache whether the shadow light is set for regular or
			// self shadowing intensity. If what we're doing now is different
			// than what we're currently set for, set it again.
			if( selfShadowNow != fShadows[i]->fSelfShadowOn )
			{
				plDXLightRef* lRef = fLights.fShadowLights[fShadows[i]->fLightRefIdx];

				// We lower the power on self shadowing, because the artists like to
				// crank up the shadow strength to huge values to get a darker shadow
				// on the environment, which causes the shadow on the avatar to get
				// way too dark. Another way to look at it is when self shadowing,
				// the surface being projected onto is going to be very close to
				// the surface casting the shadow (because they are the same object).
				if( selfShadowNow )
				{
					plConst(hsScalar) kMaxSelfPower = 0.3f;
					hsScalar power = fShadows[i]->fPower > kMaxSelfPower ? kMaxSelfPower : fShadows[i]->fPower;
					lRef->fD3DInfo.Diffuse.r
						= lRef->fD3DInfo.Diffuse.g
						= lRef->fD3DInfo.Diffuse.b
						= power;
				}
				else
				{
					lRef->fD3DInfo.Diffuse.r
						= lRef->fD3DInfo.Diffuse.g
						= lRef->fD3DInfo.Diffuse.b
						= fShadows[i]->fPower;
				}
				fD3DDevice->SetLight(lRef->fD3DIndex, &lRef->fD3DInfo);

				// record which our intensity is now set for.
				fShadows[i]->fSelfShadowOn = selfShadowNow;
			}

			// Enable the light.
			fD3DDevice->LightEnable(fShadows[i]->fLightIndex, true);

#ifdef HS_DEBUGGING
			DWORD nPass;
			fSettings.fDXError = fD3DDevice->ValidateDevice(&nPass);
			if( fSettings.fDXError != D3D_OK )
				IGetD3DError();
#endif // HS_DEBUGGING

#ifndef PLASMA_EXTERNAL_RELEASE
			if (!IsDebugFlagSet(plPipeDbg::kFlagNoShadowApply))
#endif // PLASMA_EXTERNAL_RELEASE
				render.RenderPrims();

			// Disable it again.
			fD3DDevice->LightEnable(fShadows[i]->fLightIndex, false);

		}
	}

}

// ISetupShadowRcvTextureStages ////////////////////////////////////////////
// Set the generic stage states. We'll fill in the specific textures
// for each slave later.
void plDXPipeline::ISetupShadowRcvTextureStages(hsGMaterial* mat)
{
	// Setup for nil shaders to get us back to fixed function pipeline.
	ISetShaders(nil, nil);

	// We're whacking about with renderstate independent of current material,
	// so make sure the next span processes it's material, even if it's the
	// same one.
	fForceMatHandle = true;

	// Set the D3D lighting/material model
	ISetShadowLightState(mat);

	// Zbuffering on read-only
	fD3DDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_LESSEQUAL);
	fD3DDevice->SetRenderState(D3DRS_ZWRITEENABLE, FALSE);
	fLayerState[0].fZFlags &= ~hsGMatState::kZMask;
	fLayerState[0].fZFlags |= hsGMatState::kZNoZWrite;

	// Stage 0:
	// Texture is slave specific
	// Texture transform is slave specific
	// ColorArg1 = texture
	// ColorArg2 = diffuse
	// ColorOp = modulate
	// AlphaArg1 = texture
	// AlphaOp = SelectArg1
	fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG1, D3DTA_TEXTURE);
	fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG2, D3DTA_DIFFUSE);
	fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP,   D3DTOP_MODULATE);

	fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAARG1, D3DTA_TEXTURE);
	fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG1);

	if( fLayerUVWSrcs[0] != D3DTSS_TCI_CAMERASPACEPOSITION )
	{
		fD3DDevice->SetTextureStageState(0, D3DTSS_TEXCOORDINDEX, D3DTSS_TCI_CAMERASPACEPOSITION);
		fLayerUVWSrcs[0] = D3DTSS_TCI_CAMERASPACEPOSITION;
	}

	// Set texture to clamp
	fD3DDevice->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
	fD3DDevice->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
	fLayerState[0].fClampFlags = hsGMatState::kClampTexture;

	// Stage 1:
	// Set texture to ULut
	// Texture transform is slave specific
	// *** With the optional texture blurring, the state here becomes
	// *** partially slave dependent. Specifically, if we've done a blur,
	// *** then we want to modulate the lut color value by current (which is
	// *** the blurred color), else just select the lut. So we'll just move
	// *** the ColorOp down to the slave specific section.
	// %%% Okay, get this. The GeForce2 won't take a SelectArg1 on Stage1 if
	// %%% we're also trying to use Stage2 to modulate in the diffuse. But
	// %%% it WILL let us do a modulate on Stage1. So we're going to make sure
	// %%% that our shadowmap texture is white, then we can just modulate them
	// %%% with no effect. If we're blurring, we already wanted to modulate, so
	// %%% no change there. This means we can set the ColorOp now, rather than
	// %%% having to wait for the Slave specific section later.
	// ColorArg1 = 1 - ULut
	// ColorArg2 = Current
	// ColorOp = Modulate
	// AlphaArg1 = ULut
	// AlphaArg2 = Current
	// AlphaOp = Subtract
	plDXTextureRef* ref = IGetULutTextureRef();
	if( !ref->fD3DTexture )
	{
		if( ref->fData )
			IReloadTexture(ref);
	}
	hsRefCnt_SafeAssign(fLayerRef[1], ref);
	fD3DDevice->SetTexture(1, ref->fD3DTexture);

	// The following commented out block is kind of cool, because it
	// bases the darkness of the shadow on the distance between the
	// shadow caster and the point receiving the shadow. So, for example,
	// the hand's shadow would get darker as it reaches for the lever.
	// Unfortunately, it doesn't guarantee that the shadow will completely
	// attenuate out at the fAttenDist (in fact, it pretty much guarantees
	// that it won't), so shadows will pop in and out. So instead, we'll
	// base the color on the distance from the start of the slave. The
	// difference is subtle, and usually unnoticable, and we get no popping.
	fD3DDevice->SetTextureStageState(1, D3DTSS_COLORARG1, D3DTA_TEXTURE | D3DTA_COMPLEMENT);
	fD3DDevice->SetTextureStageState(1, D3DTSS_COLORARG2, D3DTA_CURRENT);
	fD3DDevice->SetTextureStageState(1, D3DTSS_COLOROP,   D3DTOP_MODULATE);

	fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAARG1, D3DTA_TEXTURE);
	fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAARG2, D3DTA_CURRENT);
	fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAOP,   D3DTOP_SUBTRACT);
	fLayerState[1].fBlendFlags = UInt32(-1);

	if( fLayerUVWSrcs[1] != D3DTSS_TCI_CAMERASPACEPOSITION )
	{
		fD3DDevice->SetTextureStageState(1, D3DTSS_TEXCOORDINDEX, D3DTSS_TCI_CAMERASPACEPOSITION);
		fLayerUVWSrcs[1] = D3DTSS_TCI_CAMERASPACEPOSITION;
	}
	if( D3DTTFF_COUNT3 != fLayerXformFlags[1] )
	{
		fLayerXformFlags[1] = D3DTTFF_COUNT3;
		fD3DDevice->SetTextureStageState(1, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTTFF_COUNT3);
	}

	// Set texture to clamp
	fD3DDevice->SetSamplerState(1, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
	fD3DDevice->SetSamplerState(1, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
	fLayerState[1].fClampFlags = hsGMatState::kClampTexture;

	int iNextStage = 2;

	// If mat's base layer is alpha'd, and we have > 3 TMU's factor
	// in the base layer's alpha.
	if( (fSettings.fMaxLayersAtOnce > 3) && mat->GetLayer(0)->GetTexture() && (mat->GetLayer(0)->GetBlendFlags() & hsGMatState::kBlendAlpha) )
	{
		plLayerInterface* layer = mat->GetLayer(0);

		// If the following conditions are met, it means that layer 1 is a better choice to
		// get the transparency from. The specific case we're looking for is vertex alpha
		// simulated by an invisible second layer alpha LUT (known as the alpha hack).
		if( (layer->GetMiscFlags() & hsGMatState::kMiscBindNext)
			&& mat->GetLayer(1)
			&& !(mat->GetLayer(1)->GetMiscFlags() & hsGMatState::kMiscNoShadowAlpha)
			&& !(mat->GetLayer(1)->GetBlendFlags() & hsGMatState::kBlendNoTexAlpha)
			&& mat->GetLayer(1)->GetTexture() )
				layer = mat->GetLayer(1);

		// Take the texture alpha and modulate the color so far with it. In
		// the final shadow map, black will have no effect, white will be maximal
		// darkening.
		fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_COLORARG1, D3DTA_TEXTURE | D3DTA_ALPHAREPLICATE);
		fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_COLORARG2, D3DTA_CURRENT);
		fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_COLOROP,   D3DTOP_MODULATE);

		fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_ALPHAARG2, D3DTA_CURRENT);
		fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_ALPHAOP,   D3DTOP_SELECTARG2);

		// Blend flags to layer blend (alpha +- complement)
		fLayerState[iNextStage].fBlendFlags = UInt32(-1);

		// Clamp to whatever the texture wants.
		if( fLayerState[iNextStage].fClampFlags ^ layer->GetClampFlags() )
		{
			fLayerState[iNextStage].fClampFlags = layer->GetClampFlags();
			IHandleStageClamp(iNextStage);
		}

		// Shade to 0
		fLayerState[iNextStage].fShadeFlags = 0;

		// ZFlags to ZNoZWrite
		fLayerState[iNextStage].fZFlags = hsGMatState::kZNoZWrite;

		// MiscFlags to layer's misc flags
		fLayerState[iNextStage].fMiscFlags = layer->GetMiscFlags();

		// Set up whatever UVW transform the layer normally uses.
		IHandleStageTransform(iNextStage, layer);

		// Normal UVW source.
		UInt32 uvwSrc = layer->GetUVWSrc();

		if( fLayerUVWSrcs[ iNextStage ] != uvwSrc )
		{
			fD3DDevice->SetTextureStageState( iNextStage, D3DTSS_TEXCOORDINDEX, uvwSrc );
			fLayerUVWSrcs[ iNextStage ] = uvwSrc;
		}

		UInt32 xformFlags;
		if( layer->GetMiscFlags() & hsGMatState::kMiscPerspProjection )
			xformFlags = D3DTTFF_COUNT3 | D3DTTFF_PROJECTED;
		else if( uvwSrc & (plLayerInterface::kUVWNormal | plLayerInterface::kUVWPosition | plLayerInterface::kUVWReflect) )
			xformFlags = D3DTTFF_COUNT3;
		else
			xformFlags = D3DTTFF_COUNT2;

		if( xformFlags != fLayerXformFlags[iNextStage] )
		{
			fLayerXformFlags[iNextStage] = xformFlags;
			fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_TEXTURETRANSFORMFLAGS, xformFlags);
		}

		// This ref should be pretty safe to use, because we just rendered it.
		ref = (plDXTextureRef*)layer->GetTexture()->GetDeviceRef();

		hsRefCnt_SafeAssign( fLayerRef[iNextStage], ref );
		fD3DDevice->SetTexture( iNextStage, ref->fD3DTexture );

		iNextStage++;

		fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_COLORARG1, D3DTA_DIFFUSE | D3DTA_ALPHAREPLICATE);
		fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_COLORARG2, D3DTA_CURRENT);
		fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_COLOROP,   D3DTOP_MODULATE);

		fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_ALPHAOP,   D3DTOP_DISABLE);

		fLayerState[iNextStage].fBlendFlags = UInt32(-1);

		iNextStage++;
	}

	fLayerState[iNextStage].fBlendFlags = UInt32(-1);

	// And seal it up
	fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_COLOROP, D3DTOP_DISABLE);
	fD3DDevice->SetTextureStageState(iNextStage, D3DTSS_ALPHAOP, D3DTOP_DISABLE);
	fLayerState[iNextStage].fBlendFlags = UInt32(-1);

	fLastEndingStage = 0;

	// Now set the frame buffer blend
	// Remember that white darkens and black is no effect.
	// Form is Src * SrcBlend + Dst * DstBlend
	// We want inverse Src * Dst, so
	// Src * ZERO + Dst * InvSrc
	fD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE);
	fD3DDevice->SetRenderState(D3DRS_SRCBLEND,  D3DBLEND_ZERO);
	fD3DDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_INVSRCCOLOR);

	fLayerState[0].fBlendFlags = UInt32(-1);

	// Turn on alpha test. Alpha of zero means the shadow map depth
	// is greater or equal to the surface depth, i.e. the surface
	// is between the shadow caster and the light and doesn't receive
	// shadow.
	fD3DDevice->SetRenderState(D3DRS_ALPHAFUNC, D3DCMP_GREATEREQUAL);
	fD3DDevice->SetRenderState(D3DRS_ALPHAREF, 0x00000001);
	fLayerState[0].fBlendFlags |= hsGMatState::kBlendTest;

	fD3DDevice->SetRenderState(D3DRS_SPECULARENABLE, FALSE);
	fLayerState[0].fShadeFlags &= ~hsGMatState::kShadeSpecular;

	// Set fog color to black
	// We should automatically reset it, because our blend mode is -1'd.
	fD3DDevice->SetRenderState(D3DRS_FOGCOLOR, 0);

#ifdef HS_DEBUGGING
	DWORD nPass;
	fSettings.fDXError = fD3DDevice->ValidateDevice(&nPass);
	if( fSettings.fDXError != D3D_OK )
		IGetD3DError();
#endif // HS_DEBUGGING
}

// ISetupShadowSlaveTextures //////////////////////////////////////////////
// Set any state specific to this shadow slave for projecting the slave's
// shadow map onto the surface.
void plDXPipeline::ISetupShadowSlaveTextures(plShadowSlave* slave)
{
	D3DXMATRIX tXfm;

	hsMatrix44 c2w = GetCameraToWorld();

	// Stage 0:
	// Set Stage 0's texture to the slave's rendertarget.
	// Set texture transform to slave's camera to texture transform
	plRenderTarget* renderTarg = (plRenderTarget*)slave->fPipeData;
	hsAssert(renderTarg, "Processing a slave that hasn't been rendered");
	if( !renderTarg )
		return;
	plDXTextureRef* ref = (plDXTextureRef*)renderTarg->GetDeviceRef();
	hsAssert(ref, "Shadow map ref should have been made when it was rendered");
	if( !ref )
		return;

	hsRefCnt_SafeAssign( fLayerRef[0], ref );
	fD3DDevice->SetTexture( 0, ref->fD3DTexture );

	hsMatrix44 cameraToTexture = slave->fWorldToTexture * c2w;
	IMatrix44ToD3DMatrix(tXfm, cameraToTexture);

	fD3DDevice->SetTransform( sTextureStages[0], &tXfm );
	fLayerTransform[0] = true;

	// Directional lights (ortho projection) just use COUNT2, point lights use COUNT3|PROJECTED.
	UInt32 xformFlags = slave->fView.GetOrthogonal() ? D3DTTFF_COUNT2 : D3DTTFF_COUNT3 | D3DTTFF_PROJECTED;

	if( xformFlags != fLayerXformFlags[0] )
	{
		fLayerXformFlags[0] = xformFlags;
		fD3DDevice->SetTextureStageState(0, D3DTSS_TEXTURETRANSFORMFLAGS, xformFlags);
	}

	// Stage 1: the lut
	// Set the texture transform to slave's fRcvLUT
	hsMatrix44 cameraToLut = slave->fRcvLUT * c2w;
	IMatrix44ToD3DMatrix(tXfm, cameraToLut);

	fD3DDevice->SetTransform( sTextureStages[1], &tXfm );
	fLayerTransform[1] = true;

}

// ISetShadowLightState //////////////////////////////////////////////////////////////////
// Set the D3D lighting/material model for projecting the shadow map onto this material.
void plDXPipeline::ISetShadowLightState(hsGMaterial* mat)
{
	IDisableLightsForShadow();
	inlEnsureLightingOn();

	fCurrLightingMethod = plSpan::kLiteShadow;

	static D3DMATERIAL9	d3dMat;
	if( mat && mat->GetNumLayers() && mat->GetLayer(0) )
		d3dMat.Diffuse.r = d3dMat.Diffuse.g = d3dMat.Diffuse.b = mat->GetLayer(0)->GetOpacity();
	else
		d3dMat.Diffuse.r = d3dMat.Diffuse.g = d3dMat.Diffuse.b = 1.f;
	d3dMat.Diffuse.a = 1.f;

	fD3DDevice->SetMaterial(&d3dMat);
	fD3DDevice->SetRenderState( D3DRS_AMBIENT, 0 );
}

// IDisableLightsForShadow ///////////////////////////////////////////////////////////
// Disable any lights that are enabled. We'll only want the shadow light illuminating
// the surface.
void plDXPipeline::IDisableLightsForShadow()
{
	int i;
	for( i = 0; i < fLights.fLastIndex + 1; i++ )
	{
		if( fLights.fEnabledFlags.IsBitSet(i) )
		{
			fD3DDevice->LightEnable(i, false);
		}
	}
	fLights.fEnabledFlags.Clear();
}

// IEnableShadowLight ///////////////////////////////////////////////
// Enable this shadow slave's light.
// NOT USED.
void plDXPipeline::IEnableShadowLight(plShadowSlave* slave)
{
	fD3DDevice->LightEnable(slave->fLightIndex, true);
}

// IAcceptsShadow ////////////////////////////////////////////////////////////////
// Only allow self shadowing if requested.
hsBool plDXPipeline::IAcceptsShadow(const plSpan* span, plShadowSlave* slave)
{
	// The span's shadow bits records which shadow maps that span was rendered
	// into.
	return slave->SelfShadow() || !span->IsShadowBitSet(slave->fIndex);
}

// IReceivesShadows ////////////////////////////////////////////////////////////////////
// Want artists to be able to just disable shadows for spans where they'll either
// look goofy, or won't contribute.
// Also, if we have less than 3 simultaneous textures, we want to skip anything with
// an alpha'd base layer, unless it's been overriden.
hsBool plDXPipeline::IReceivesShadows(const plSpan* span, hsGMaterial* mat)
{
	if( span->fProps & plSpan::kPropNoShadow )
		return false;

	if( span->fProps & plSpan::kPropForceShadow )
		return true;

	if( span->fProps & (plSpan::kPropSkipProjection | plSpan::kPropProjAsVtx) )
		return false;

	if( (fSettings.fMaxLayersAtOnce < 3)
		&& mat->GetLayer(0)->GetTexture()
		&& (mat->GetLayer(0)->GetBlendFlags() & hsGMatState::kBlendAlpha) )
		return false;

#ifdef ENABLE_INTEL_SHADOWS
	// Shouldn't hit this, since we're disabling shadows on the Intel chips,
	// but just in case.
	// To enable this, you'll need to start passing in the drawable as well.
	if( fSettings.fIsIntel )
	{
		const plVertexSpan* vertSpan = static_cast<const plVertexSpan*>(span);
		plGBufferGroup* group = drawable->GetBufferGroup(vertSpan->fGroupIdx);
		if( !group->GetNumUVs() )
			return false;
	}
#endif // ENABLE_INTEL_SHADOWS

	return true;
}

void plDXPipeline::SubmitClothingOutfit(plClothingOutfit* co)
{
	if (fClothingOutfits.Find(co) == fClothingOutfits.kMissingIndex)
	{
		fClothingOutfits.Append(co);
		if (!fPrevClothingOutfits.RemoveItem(co))
			co->GetKey()->RefObject();
	}
}

void plDXPipeline::IClearClothingOutfits(hsTArray<plClothingOutfit*>* outfits)
{
	int i;
	for (i = outfits->GetCount() - 1; i >= 0; i--)
	{
		plClothingOutfit *co = outfits->Get(i);
		outfits->Remove(i);
		IFreeAvRT((plRenderTarget*)co->fTargetLayer->GetTexture());
		co->fTargetLayer->SetTexture(nil);
		co->GetKey()->UnRefObject();
	}
}

void plDXPipeline::IFillAvRTPool()
{
	fAvNextFreeRT = 0;
	fAvRTShrinkValidSince = hsTimer::GetSysSeconds();
	int numRTs = 1;
	if (fClothingOutfits.GetCount() > 1)
	{
		// Just jump to 8 for starters so we don't have to refresh for the 2nd, 4th, AND 8th player
		numRTs = 8;
		while (numRTs < fClothingOutfits.GetCount())
			numRTs *= 2;
	}

	// I could see a 32MB video card going down to 64x64 RTs in extreme cases
	// (over 100 players onscreen at once), but really, if such hardware is ever trying to push
	// that, the low texture resolution is not going to be your major concern.
	for (fAvRTWidth = 1024 >> plMipmap::GetGlobalLevelChopCount(); fAvRTWidth >= 32; fAvRTWidth /= 2)
	{
		if (IFillAvRTPool(numRTs, fAvRTWidth))
			return;

		// Nope? Ok, lower the resolution and try again.
	}
}

hsBool plDXPipeline::IFillAvRTPool(UInt16 numRTs, UInt16 width)
{
	fAvRTPool.SetCount(numRTs);
	int i;
	for (i = 0; i < numRTs; i++)
	{
		UInt16 flags = plRenderTarget::kIsTexture | plRenderTarget::kIsProjected;
		UInt8 bitDepth = 32;
		UInt8 zDepth = 0;
		UInt8 stencilDepth = 0;
		fAvRTPool[i] = TRACKED_NEW plRenderTarget(flags, width, width, bitDepth, zDepth, stencilDepth);

		// If anyone fails, release everyone we've created.
		if (!MakeRenderTargetRef(fAvRTPool[i]))
		{
			int j;
			for (j = 0; j <= i; j++)
			{
				delete fAvRTPool[j];
			}
			return false;
		}
	}
	return true;
}

void plDXPipeline::IReleaseAvRTPool()
{
	int i;
	for (i = 0; i < fClothingOutfits.GetCount(); i++)
	{
		fClothingOutfits[i]->fTargetLayer->SetTexture(nil);
	}
	for (i = 0; i < fPrevClothingOutfits.GetCount(); i++)
	{
		fPrevClothingOutfits[i]->fTargetLayer->SetTexture(nil);
	}
	for (i = 0; i < fAvRTPool.GetCount(); i++)
	{
		delete(fAvRTPool[i]);
	}
	fAvRTPool.Reset();
}

plRenderTarget *plDXPipeline::IGetNextAvRT()
{
	return fAvRTPool[fAvNextFreeRT++];
}

void plDXPipeline::IFreeAvRT(plRenderTarget* tex)
{
	UInt32 index = fAvRTPool.Find(tex);
	if (index != fAvRTPool.kMissingIndex)
	{
		hsAssert(index < fAvNextFreeRT, "Freeing an avatar RT that's already free?");
		fAvRTPool[index] = fAvRTPool[fAvNextFreeRT - 1];
		fAvRTPool[fAvNextFreeRT - 1] = tex;
		fAvNextFreeRT--;
	}
}

struct plAVTexVert
{
	float fPos[3];
	float fUv[2];
};

void plDXPipeline::IPreprocessAvatarTextures()
{
	plProfile_Set(AvRTPoolUsed, fClothingOutfits.GetCount());
	plProfile_Set(AvRTPoolCount, fAvRTPool.GetCount());
	plProfile_Set(AvRTPoolRes, fAvRTWidth);
	plProfile_Set(AvRTShrinkTime, UInt32(hsTimer::GetSysSeconds() - fAvRTShrinkValidSince));

	IClearClothingOutfits(&fPrevClothingOutfits); // Frees anyone used last frame that we don't need this frame

	const UInt32 kVFormat = D3DFVF_XYZ | D3DFVF_TEX1 | D3DFVF_TEXCOORDSIZE2(0);

	if (fClothingOutfits.GetCount() == 0)
		return;

	plMipmap *itemBufferTex = nil;

	fForceMatHandle = true;
	ISetShaders(nil, nil); // Has a side effect of futzing with our cull settings...

	// Even though we're going to use DrawPrimitiveUP, we explicitly set the current VB ref to nil,
	// otherwise we might try and use the same VB ref later, think it hasn't changed, and
	// not update our FVF.
	hsRefCnt_SafeUnRef(fSettings.fCurrVertexBuffRef);
	fSettings.fCurrVertexBuffRef = nil;
	fD3DDevice->SetStreamSource(0, NULL, 0, 0);
	fD3DDevice->SetFVF(fSettings.fCurrFVFFormat = kVFormat);
	fD3DDevice->SetTransform(D3DTS_VIEW, &d3dIdentityMatrix);
	fD3DDevice->SetTransform(D3DTS_WORLD, &d3dIdentityMatrix);
	fD3DDevice->SetTransform(D3DTS_PROJECTION, &d3dIdentityMatrix);
	fD3DDevice->SetRenderState(D3DRS_CULLMODE, fCurrCullMode = D3DCULL_NONE);
	fD3DDevice->SetRenderState(D3DRS_VERTEXBLEND, D3DVBF_DISABLE);
	fD3DDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_ALWAYS);
	fD3DDevice->SetRenderState(D3DRS_ZWRITEENABLE, FALSE);
	fLayerState[0].fZFlags &= ~hsGMatState::kZMask;
	fLayerState[0].fZFlags |= hsGMatState::kZNoZWrite | hsGMatState::kZNoZRead;
	if (fLayerUVWSrcs[0] != 0)
	{
		fD3DDevice->SetTextureStageState(0, D3DTSS_TEXCOORDINDEX, 0);
		fLayerUVWSrcs[0] = 0;
	}
	fD3DDevice->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
	fD3DDevice->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
	fLayerState[0].fClampFlags = hsGMatState::kClampTexture;
	if (D3DTTFF_DISABLE != fLayerXformFlags[0])
	{
		fLayerXformFlags[0] = D3DTTFF_DISABLE;
		fD3DDevice->SetTextureStageState(0, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTTFF_DISABLE);
	}
	fD3DDevice->SetRenderState(D3DRS_SPECULARENABLE, FALSE);
	fLayerState[0].fShadeFlags &= ~hsGMatState::kShadeSpecular;
	fD3DDevice->SetRenderState(D3DRS_FOGENABLE, FALSE);
	fCurrFog.fEnvPtr = nil;
	fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP, D3DTOP_MODULATE);
	fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG1, D3DTA_TFACTOR);
	fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG2, D3DTA_TEXTURE);
	fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAOP, D3DTOP_MODULATE);
	fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAARG1, D3DTA_TFACTOR);
	fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAARG2, D3DTA_TEXTURE);
	fD3DDevice->SetRenderState(D3DRS_ALPHAFUNC, D3DCMP_ALWAYS);
	fLayerState[0].fBlendFlags = UInt32(-1);
	fD3DDevice->SetTextureStageState(1, D3DTSS_COLOROP, D3DTOP_DISABLE);
	fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAOP, D3DTOP_DISABLE);
	fLayerState[1].fBlendFlags = UInt32(-1);
	inlEnsureLightingOff();

	int oIdx;
	for (oIdx = 0; oIdx < fClothingOutfits.GetCount(); oIdx++)
	{
		plClothingOutfit *co = fClothingOutfits[oIdx];
		if (co->fBase == nil || co->fBase->fBaseTexture == nil)
			continue;

		plRenderTarget *rt = plRenderTarget::ConvertNoRef(co->fTargetLayer->GetTexture());
		if (rt != nil && co->fDirtyItems.Empty())
		{
			// we've still got our valid RT from last frame and we have nothing to do.
			continue;
		}

		if (rt == nil)
		{
			rt = IGetNextAvRT();
			co->fTargetLayer->SetTexture(rt);
		}

		PushRenderTarget(rt);
		D3DVIEWPORT9 vp = {0, 0, rt->GetWidth(), rt->GetHeight(), 0.f, 1.f};
		WEAK_ERROR_CHECK(fD3DDevice->SetViewport(&vp));

		hsScalar uOff = 0.5f / rt->GetWidth();
		hsScalar vOff = 0.5f / rt->GetHeight();

		// Copy over the base
		fD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, FALSE);
		fD3DDevice->SetRenderState(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_RED | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_BLUE | D3DCOLORWRITEENABLE_ALPHA);
		fD3DDevice->SetRenderState(D3DRS_TEXTUREFACTOR, 0xffffffff);
		fLayerState[0].fBlendFlags = UInt32(-1);
		IDrawClothingQuad(-1.f, -1.f, 2.f, 2.f, uOff, vOff, co->fBase->fBaseTexture);
		plClothingLayout *layout = plClothingMgr::GetClothingMgr()->GetLayout(co->fBase->fLayoutName);

		int i, j, k;
		for (i = 0; i < co->fItems.GetCount(); i++)
		{
			plClothingItem *item = co->fItems[i];
			//if (!co->fDirtyItems.IsBitSet(item->fTileset))
			//	continue; // Not dirty, don't update

			for (j = 0; j < item->fElements.GetCount(); j++)
			{
				for (k = 0; k < plClothingElement::kLayerMax; k++)
				{
					if (item->fTextures[j][k] == nil)
						continue;

					itemBufferTex = item->fTextures[j][k];
					hsColorRGBA tint = co->GetItemTint(item, k);
					if (k >= plClothingElement::kLayerSkinBlend1 &&	k <= plClothingElement::kLayerSkinLast)
						tint.a = co->fSkinBlends[k - plClothingElement::kLayerSkinBlend1];

					if (k == plClothingElement::kLayerBase)
					{
						fD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, FALSE);
						fD3DDevice->SetRenderState(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_RED | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_BLUE | D3DCOLORWRITEENABLE_ALPHA);
					}
					else
					{
						fD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE);
						fD3DDevice->SetRenderState(D3DRS_SRCBLEND,  D3DBLEND_SRCALPHA);
						fD3DDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA);
						fD3DDevice->SetRenderState(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_RED | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_BLUE);
					}
					fD3DDevice->SetRenderState(D3DRS_TEXTUREFACTOR, tint.ToARGB32());
					fLayerState[0].fBlendFlags = UInt32(-1);
					hsScalar screenW = (hsScalar)item->fElements[j]->fWidth / layout->fOrigWidth * 2.f;
					hsScalar screenH = (hsScalar)item->fElements[j]->fHeight / layout->fOrigWidth * 2.f;
					hsScalar screenX = (hsScalar)item->fElements[j]->fXPos / layout->fOrigWidth * 2.f - 1.f;
					hsScalar screenY = (1.f - (hsScalar)item->fElements[j]->fYPos / layout->fOrigWidth) * 2.f - 1.f - screenH;
					IDrawClothingQuad(screenX, screenY, screenW, screenH, uOff, vOff, itemBufferTex);
				}
			}
		}
		PopRenderTarget();
		co->fDirtyItems.Clear();
	}
	// Nothing else sets this render state, so let's just set it back to the default to be safe
	fD3DDevice->SetRenderState(D3DRS_COLORWRITEENABLE, D3DCOLORWRITEENABLE_RED | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_BLUE | D3DCOLORWRITEENABLE_ALPHA);
	fView.fXformResetFlags = fView.kResetAll;

	fClothingOutfits.Swap(fPrevClothingOutfits);
}

void plDXPipeline::IDrawClothingQuad(hsScalar x, hsScalar y, hsScalar w, hsScalar h,
									 hsScalar uOff, hsScalar vOff, plMipmap *tex)
{
	const UInt32 kVSize = sizeof(plAVTexVert);
	plDXTextureRef* ref = (plDXTextureRef*)tex->GetDeviceRef();
	if (!ref || ref->IsDirty())
	{
		MakeTextureRef(nil, tex);
		ref = (plDXTextureRef*)tex->GetDeviceRef();
	}
	if (!ref->fD3DTexture)
	{
		if (ref->fData)
			IReloadTexture(ref);
	}
	hsRefCnt_SafeAssign( fLayerRef[0], ref );
	fD3DDevice->SetTexture(0, ref->fD3DTexture);

	plAVTexVert ptr[4];
	plAVTexVert vert;
	vert.fPos[0] = x;
	vert.fPos[1] = y;
	vert.fPos[2] = 0.5f;
	vert.fUv[0] = uOff;
	vert.fUv[1] = 1.f + vOff;

	// P0
	ptr[2] = vert;

	// P1
	ptr[0] = vert;
	ptr[0].fPos[0] += w;
	ptr[0].fUv[0] += 1.f;

	// P2
	ptr[1] = vert;
	ptr[1].fPos[0] += w;
	ptr[1].fUv[0] += 1.f;
	ptr[1].fPos[1] += h;
	ptr[1].fUv[1] -= 1.f;

	// P3
	ptr[3] = vert;
	ptr[3].fPos[1] += h;
	ptr[3].fUv[1] -= 1.f;

#ifdef HS_DEBUGGING
	DWORD nPass;
	fSettings.fDXError = fD3DDevice->ValidateDevice(&nPass);
	if( fSettings.fDXError != D3D_OK )
		IGetD3DError();
#endif // HS_DEBUGGING
	fD3DDevice->DrawPrimitiveUP(D3DPT_TRIANGLESTRIP, 2, ptr, kVSize);
}

///////////////////////////////////////////////////////////////////////////////
// Test hackery as R&D for water
///////////////////////////////////////////////////////////////////////////////


///////////////////////////////////////////////////////////////////////////////
// End Test hackery as R&D for water
///////////////////////////////////////////////////////////////////////////////

///////////////////////////////////////////////////////////////////////////////
//// Functions from Other Classes That Need to Be Here to Compile Right ///////
///////////////////////////////////////////////////////////////////////////////

plPipeline	*plPipelineCreate::ICreateDXPipeline( hsWinRef hWnd, const hsG3DDeviceModeRecord *devMode )
{
	plDXPipeline	*pipe = TRACKED_NEW plDXPipeline( hWnd, devMode );

	// Taken out 8.1.2001 mcn - If we have an error, still return so the client can grab the string
//	if( pipe->GetErrorString() != nil )
//	{
//		delete pipe;
//		pipe = nil;
//	}

	return pipe;
}