Fix line endings and tabs

2025-07-14 02:27:40 -04:00 · 2011-04-11 16:27:55 -07:00
parent d4250e19b5
commit 908aaeb6f6
2738 changed files with 702562 additions and 702562 deletions
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/AssShader.zip
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/AssShader.zip
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_BiasNormals.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_BiasNormals.inl
@ -1,17 +1,17 @@
-
-
-// Grab noise texture,
-// modulate biased version by vtx color 0,
-// add to vtx color 1
-
-ps.1.1
-
-tex		t0;
-tex		t1;
-
-add		r0.rgb, t0_bias, t1_bias;
-+add	r0.a, t0, t1;
-//mov		r0, t1_bias;
-mad		r0.rgb, r0, v0, v1;
-//mov r0, v1;
-
+
+
+// Grab noise texture,
+// modulate biased version by vtx color 0,
+// add to vtx color 1
+
+ps.1.1
+
+tex     t0;
+tex     t1;
+
+add     r0.rgb, t0_bias, t1_bias;
+add    r0.a, t0, t1;
+//mov       r0, t1_bias;
+mad     r0.rgb, r0, v0, v1;
+//mov r0, v1;
+
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CaddAadd.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CaddAadd.inl
@ -1,14 +1,14 @@
-
-ps.1.1
-
-// Add blend color, output sum of alpha
-
-// Color is t0 + t1
-// Alpha is t0.a + t1.a
-
-tex		t0;
-tex		t1;
-
-add		r0.rgb, t0, t1;
-+add		r0.a, t0, t1;
-mul		r0, r0, v0;
+
+ps.1.1
+
+// Add blend color, output sum of alpha
+
+// Color is t0 + t1
+// Alpha is t0.a + t1.a
+
+tex     t0;
+tex     t1;
+
+add     r0.rgb, t0, t1;
+add        r0.a, t0, t1;
+mul     r0, r0, v0;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CaddAbase.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CaddAbase.inl
@ -1,14 +1,14 @@
-
-ps.1.1
-
-// Add blend color, output base alpha
-
-// Color is t0 + t1
-// Alpha is t0.a
-
-tex		t0;
-tex		t1;
-
-add		r0.rgb, t0, t1;
-+mov		r0.a, t0;
-mul		r0, r0, v0;
+
+ps.1.1
+
+// Add blend color, output base alpha
+
+// Color is t0 + t1
+// Alpha is t0.a
+
+tex     t0;
+tex     t1;
+
+add     r0.rgb, t0, t1;
+mov        r0.a, t0;
+mul     r0, r0, v0;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CaddAmult.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CaddAmult.inl
@ -1,14 +1,14 @@
-
-ps.1.1
-
-// Add blend color, output product of alpha
-
-// Color is t0 + t1
-// Alpha is t0.a * t1.a
-
-tex		t0;
-tex		t1;
-
-add		r0.rgb, t0, t1;
-+mul		r0.a, t0, t1;
-mul		r0, r0, v0;
+
+ps.1.1
+
+// Add blend color, output product of alpha
+
+// Color is t0 + t1
+// Alpha is t0.a * t1.a
+
+tex     t0;
+tex     t1;
+
+add     r0.rgb, t0, t1;
+mul        r0.a, t0, t1;
+mul     r0, r0, v0;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CalphaAadd.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CalphaAadd.inl
@ -1,14 +1,14 @@
-
-ps.1.1
-
-// Alpha blend color, output sum of alphas
-
-// Color is t0 * (1 - t1.a) + t1 * t1.a
-// Alpha is t0.a + t1.a
-
-tex		t0
-tex		t1
-
-lrp		r0.rgb, t1.a, t1, t0
-add		r0.a, t0, t1;
-mul		r0, r0, v0;
+
+ps.1.1
+
+// Alpha blend color, output sum of alphas
+
+// Color is t0 * (1 - t1.a) + t1 * t1.a
+// Alpha is t0.a + t1.a
+
+tex     t0
+tex     t1
+
+lrp     r0.rgb, t1.a, t1, t0
+add     r0.a, t0, t1;
+mul     r0, r0, v0;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CalphaAbase.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CalphaAbase.inl
@ -1,14 +1,14 @@
-
-ps.1.1
-
-// Alpha blend layers, output base alpha
-//
-// Color is t0 * (1 - t1.a) + t1 * t1.a
-// Alpha is t0.a
-
-tex		t0
-tex		t1
-
-lrp		r0.rgb, t1.a, t1, t0
-mov		r0.a, t0;
-mul		r0, r0, v0;
+
+ps.1.1
+
+// Alpha blend layers, output base alpha
+//
+// Color is t0 * (1 - t1.a) + t1 * t1.a
+// Alpha is t0.a
+
+tex     t0
+tex     t1
+
+lrp     r0.rgb, t1.a, t1, t0
+mov     r0.a, t0;
+mul     r0, r0, v0;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CalphaAmult.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CalphaAmult.inl
@ -1,14 +1,14 @@
-
-ps.1.1
-
-// Alpha blend color, output product of alphas
-
-// Color is t0 * (1 - t1.a) + t1 * t1.a
-// Alpha is t0.a * t1.a
-
-tex		t0
-tex		t1
-
-lrp		r0.rgb, t1.a, t1, t0
-mul		r0.a, t0, t1;
-mul		r0, r0, v0;
+
+ps.1.1
+
+// Alpha blend color, output product of alphas
+
+// Color is t0 * (1 - t1.a) + t1 * t1.a
+// Alpha is t0.a * t1.a
+
+tex     t0
+tex     t1
+
+lrp     r0.rgb, t1.a, t1, t0
+mul     r0.a, t0, t1;
+mul     r0, r0, v0;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CbaseAbase.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CbaseAbase.inl
@ -1,9 +1,9 @@
-
-ps.1.1
-
-// Single layer, just modulate by vertex color and emit
-//
-
-tex		t0
-
-mul		r0, t0, v0;
+
+ps.1.1
+
+// Single layer, just modulate by vertex color and emit
+//
+
+tex     t0
+
+mul     r0, t0, v0;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CmultAadd.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CmultAadd.inl
@ -1,14 +1,14 @@
-
-ps.1.1
-
-// Multiply blend color, output sum of alpha
-
-// Color is t0 * t1
-// Alpha is t0.a + t1.a
-
-tex		t0;
-tex		t1;
-
-mul		r0.rgb, t0, t1;
-+add		r0.a, t0, t1;
-mul		r0, r0, v0;
+
+ps.1.1
+
+// Multiply blend color, output sum of alpha
+
+// Color is t0 * t1
+// Alpha is t0.a + t1.a
+
+tex     t0;
+tex     t1;
+
+mul     r0.rgb, t0, t1;
+add        r0.a, t0, t1;
+mul     r0, r0, v0;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CmultAbase.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CmultAbase.inl
@ -1,14 +1,14 @@
-
-ps.1.1
-
-// Multiply blend color, output base alpha
-
-// Color is t0 * t1
-// Alpha is t0.a
-
-tex		t0;
-tex		t1;
-
-mul		r0.rgb, t0, t1;
-+mov		r0.a, t0;
-mul		r0, r0, v0;
+
+ps.1.1
+
+// Multiply blend color, output base alpha
+
+// Color is t0 * t1
+// Alpha is t0.a
+
+tex     t0;
+tex     t1;
+
+mul     r0.rgb, t0, t1;
+mov        r0.a, t0;
+mul     r0, r0, v0;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CmultAmult.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CmultAmult.inl
@ -1,14 +1,14 @@
-
-ps.1.1
-
-// Multiply blend color, output product of alpha
-
-// Color is t0 * t1
-// Alpha is t0.a * t1.a
-
-tex		t0;
-tex		t1;
-
-mul		r0.rgb, t0, t1;
-+mul		r0.a, t0, t1;
-mul		r0, r0, v0;
+
+ps.1.1
+
+// Multiply blend color, output product of alpha
+
+// Color is t0 * t1
+// Alpha is t0.a * t1.a
+
+tex     t0;
+tex     t1;
+
+mul     r0.rgb, t0, t1;
+mul        r0.a, t0, t1;
+mul     r0, r0, v0;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CompCosines.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_CompCosines.inl
@ -1,31 +1,31 @@
-
-// Composite the cosines together.
-// Input map is cosine(pix) for each of
-// the 4 waves.
-//
-// The constants are set up so:
-//		Nx = -freq * amp * dirX * cos(pix);
-//		Ny = -freq * amp * dirY * cos(pix);
-//	So c[i].x = -freq[i] * amp[i] * dirX[i]
-//	etc.
-// All textures are:
-//		(r,g,b,a) = (cos(), cos(), 1, 1)
-//
-// So c[0].z = 1, but all other c[i].z = 0
-// Note also the c4 used for biasing back at the end.
-
-ps.1.1
-
-tex		t0;
-tex		t1;
-tex		t2;
-tex		t3;
-
-mul		r0, t0_bx2, c0;
-mad		r0, t1_bx2, c1, r0;
-mad		r0, t2_bx2, c2, r0;
-mad		r0, t3_bx2, c3, r0;
-// Now bias it back into range [0..1] for output.
-mul		r0, r0, c4;		// c4 = (0.5, 0.5, 0.5, 1)
-add		r0, r0, c4;
-//mov		r0, c4;
+
+// Composite the cosines together.
+// Input map is cosine(pix) for each of
+// the 4 waves.
+//
+// The constants are set up so:
+//      Nx = -freq * amp * dirX * cos(pix);
+//      Ny = -freq * amp * dirY * cos(pix);
+//  So c[i].x = -freq[i] * amp[i] * dirX[i]
+//  etc.
+// All textures are:
+//      (r,g,b,a) = (cos(), cos(), 1, 1)
+//
+// So c[0].z = 1, but all other c[i].z = 0
+// Note also the c4 used for biasing back at the end.
+
+ps.1.1
+
+tex     t0;
+tex     t1;
+tex     t2;
+tex     t3;
+
+mul     r0, t0_bx2, c0;
+mad     r0, t1_bx2, c1, r0;
+mad     r0, t2_bx2, c2, r0;
+mad     r0, t3_bx2, c3, r0;
+// Now bias it back into range [0..1] for output.
+mul     r0, r0, c4;     // c4 = (0.5, 0.5, 0.5, 1)
+add     r0, r0, c4;
+//mov       r0, c4;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_GrassShader.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_GrassShader.inl
@ -1,6 +1,6 @@
-ps.1.1
-
-// Grass shader. Just does a simple tex mult
-
-tex t0
-mul r0, t0, v0
+ps.1.1
+
+// Grass shader. Just does a simple tex mult
+
+tex t0
+mul r0, t0, v0
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_MoreCosines.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_MoreCosines.inl
@ -1,35 +1,35 @@
-
-
-// Composite the cosines together.
-// Input map is cosine(pix) for each of
-// the 4 waves.
-//
-// The constants are set up so:
-//		Nx = -freq * amp * dirX * cos(pix);
-//		Ny = -freq * amp * dirY * cos(pix);
-//	So c[i].x = -freq[i] * amp[i] * dirX[i]
-//	etc.
-// All textures are:
-//		(r,g,b,a) = (cos(), cos(), 1, 1)
-//
-// Here all c[i].z = 0, because we're accumulating ontop
-// of layers that have been primed with z = 1.
-// Note also the c4 used for biasing back at the end.
-
-ps.1.1
-
-tex		t0;
-tex		t1;
-tex		t2;
-tex		t3;
-
-mul		r0, t0_bx2, c0;
-mad		r0, t1_bx2, c1, r0;
-mad		r0, t2_bx2, c2, r0;
-mad		r0, t3_bx2, c3, r0;
-
-// Now bias it back into range [0..1] for output.
-mul		r0.rgb, r0, c4;
-+mov	r0.a, c4;
-add		r0.rgb, r0, c5;
-//mov		r0, c4;
+
+
+// Composite the cosines together.
+// Input map is cosine(pix) for each of
+// the 4 waves.
+//
+// The constants are set up so:
+//      Nx = -freq * amp * dirX * cos(pix);
+//      Ny = -freq * amp * dirY * cos(pix);
+//  So c[i].x = -freq[i] * amp[i] * dirX[i]
+//  etc.
+// All textures are:
+//      (r,g,b,a) = (cos(), cos(), 1, 1)
+//
+// Here all c[i].z = 0, because we're accumulating ontop
+// of layers that have been primed with z = 1.
+// Note also the c4 used for biasing back at the end.
+
+ps.1.1
+
+tex     t0;
+tex     t1;
+tex     t2;
+tex     t3;
+
+mul     r0, t0_bx2, c0;
+mad     r0, t1_bx2, c1, r0;
+mad     r0, t2_bx2, c2, r0;
+mad     r0, t3_bx2, c3, r0;
+
+// Now bias it back into range [0..1] for output.
+mul     r0.rgb, r0, c4;
+mov    r0.a, c4;
+add     r0.rgb, r0, c5;
+//mov       r0, c4;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_ShoreLeave6.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_ShoreLeave6.inl
@ -1,21 +1,21 @@
-
-ps.1.1
-
-def c0, 1.0, 1.0, 1.0, 1.0       // Temp Hack
-
-tex		t0;
-tex		t1;
-tex		t2;
-
-mov		r1.a, t1;
-lrp		r0.rgb, r1.a, t1, t0;
-+mul	r0.a, 1-t1, 1-t0;
-lrp		r0.rgb, t2.a, t2, r0;
-+mul	r0.a, 1-t2, r0;
-mul		r0.rgb, r0, v0;
-+mul		r0.a, 1-r0, v0;
-
-//mov		r0.a, c1;
-
-//mov r0.rgb, t2;
-//+mov r0.a, 1-t2;
+
+ps.1.1
+
+def c0, 1.0, 1.0, 1.0, 1.0       // Temp Hack
+
+tex     t0;
+tex     t1;
+tex     t2;
+
+mov     r1.a, t1;
+lrp     r0.rgb, r1.a, t1, t0;
+mul    r0.a, 1-t1, 1-t0;
+lrp     r0.rgb, t2.a, t2, r0;
+mul    r0.a, 1-t2, r0;
+mul     r0.rgb, r0, v0;
+mul        r0.a, 1-r0, v0;
+
+//mov       r0.a, c1;
+
+//mov r0.rgb, t2;
+//+mov r0.a, 1-t2;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_WaveDecEnv.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_WaveDecEnv.inl
@ -1,35 +1,35 @@
-
-// Very simular to ps_WaveFixed.inl. Only the final coloring is different.
-// Even though so far they are identical.
-
-ps.1.1
-
-//def c0, 1.0, 0.0, 0.0, 1.0       // Temp Hack
-
-
-tex t0                  // Bind texture in stage 0 to register t0.
-texm3x3pad   t1,  t0_bx2   // First row of matrix multiply.
-texm3x3pad   t2,  t0_bx2   // Second row of matrix multiply.
-texm3x3vspec t3,  t0_bx2   // Third row of matrix multiply to get a 3-vector.
-                      // Reflect 3-vector by the eye-ray vector.
-                      // Use reflected vector to do a texture lookup
-                      // at stage 3.
-
-// t3 now has our reflected environment map value
-// We've (presumably) attenuated the effect on a vertex basis
-// and have our color w/ attenuated alpha in v0. So all we need
-// is to multiply t3 by v0 into r0 and we're done.
-mul			r0.rgb, t3, v0;
-+mul		r0.a, t0, v0;
-
-// mov r0, t0;
-
-/*
-tex t0;
-texcoord t1;
-texcoord t2;
-texcoord t3;
-
-mov	r0.rgb, t3;
-+mov r0.a, c0;
-*/
+
+// Very simular to ps_WaveFixed.inl. Only the final coloring is different.
+// Even though so far they are identical.
+
+ps.1.1
+
+//def c0, 1.0, 0.0, 0.0, 1.0       // Temp Hack
+
+
+tex t0                  // Bind texture in stage 0 to register t0.
+texm3x3pad   t1,  t0_bx2   // First row of matrix multiply.
+texm3x3pad   t2,  t0_bx2   // Second row of matrix multiply.
+texm3x3vspec t3,  t0_bx2   // Third row of matrix multiply to get a 3-vector.
+                      // Reflect 3-vector by the eye-ray vector.
+                      // Use reflected vector to do a texture lookup
+                      // at stage 3.
+
+// t3 now has our reflected environment map value
+// We've (presumably) attenuated the effect on a vertex basis
+// and have our color w/ attenuated alpha in v0. So all we need
+// is to multiply t3 by v0 into r0 and we're done.
+mul         r0.rgb, t3, v0;
+mul        r0.a, t0, v0;
+
+// mov r0, t0;
+
+/*
+tex t0;
+texcoord t1;
+texcoord t2;
+texcoord t3;
+
+mov r0.rgb, t3;
+mov r0.a, c0;
+*/
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_WaveFixed.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_WaveFixed.inl
@ -1,77 +1,77 @@
-//ps.1.1
-
-// def c0, 1.0, 0.0, 0.0, 1.0
-
-// mov r0, c0
-
-// Short pixel shader. Use the texm3x3vspec to do a per-pixel
-// reflected lookup into our environment map.
-// Input:
-//    t0    - Normal map in tangent space. Apply _bx2 modifier to shift
-//             [0..255] -> [-1..1]
-//    t1    - UVW = tangent + eye2pos.x, map ignored.
-//    t2    - UVW = binormal + eye2pos.y, map ignored
-//    t3    - UVW = normal + eye2pos.z, map = environment cube map
-//    v0    - attenuating color/alpha.
-//    See docs on texm3x3vspec for explanation of the eye2pos wackiness.
-// Output:
-//    r0 = reflected lookup from environment map X input v0.
-//    Since environment map has alpha = 255, the output of this
-//    shader can be used for either alpha or additive blending,
-//    as long as v0 is fed in appropriately.
-
-ps.1.1
-
-def c0, 1.0, 0.0, 0.0, 1.0       // Temp Hack
-/*
-def c1, 0.0, 1.0, 0.0, 1.0
-def c2, 0.0, 0.0, 1.0, 1.0
-*/
-
-
-tex t0                  // Bind texture in stage 0 to register t0.
-texm3x3pad   t1,  t0_bx2   // First row of matrix multiply.
-texm3x3pad   t2,  t0_bx2   // Second row of matrix multiply.
-texm3x3vspec t3,  t0_bx2   // Third row of matrix multiply to get a 3-vector.
-                      // Reflect 3-vector by the eye-ray vector.
-                      // Use reflected vector to do a texture lookup
-                      // at stage 3.
-
-// t3 now has our reflected environment map value
-// We've (presumably) attenuated the effect on a vertex basis
-// and have our color w/ attenuated alpha in v0. So all we need
-// is to multiply t3 by v0 into r0, add our base color from v1 and we're done.
-mad			r0.rgb, t3, v0, v1;
-/* HACKAGE
-//+mul			r0.a, v1, v0;
-HACKAGE */
-mov r0.a, v0; //HACKAGE
-/*
-mov	r0.rgb, v0;
-mov r0.a, v0;
-*/
-
-/*
-tex t0;
-texcoord t1;
-texcoord t2;
-texcoord t3;
-
-mov r0.rgb, t3;
-
-+mov	r0.a, c0;
-*/
-
-
-
-
-
-/*
-tex t0;
-texcoord t1;
-texcoord t2;
-texcoord t3;
-
-mul r0.rgb, t0_bx2, c1;
-+mov r0.a, c2;
-*/
+//ps.1.1
+
+// def c0, 1.0, 0.0, 0.0, 1.0
+
+// mov r0, c0
+
+// Short pixel shader. Use the texm3x3vspec to do a per-pixel
+// reflected lookup into our environment map.
+// Input:
+//    t0    - Normal map in tangent space. Apply _bx2 modifier to shift
+//             [0..255] -> [-1..1]
+//    t1    - UVW = tangent + eye2pos.x, map ignored.
+//    t2    - UVW = binormal + eye2pos.y, map ignored
+//    t3    - UVW = normal + eye2pos.z, map = environment cube map
+//    v0    - attenuating color/alpha.
+//    See docs on texm3x3vspec for explanation of the eye2pos wackiness.
+// Output:
+//    r0 = reflected lookup from environment map X input v0.
+//    Since environment map has alpha = 255, the output of this
+//    shader can be used for either alpha or additive blending,
+//    as long as v0 is fed in appropriately.
+
+ps.1.1
+
+def c0, 1.0, 0.0, 0.0, 1.0       // Temp Hack
+/*
+def c1, 0.0, 1.0, 0.0, 1.0
+def c2, 0.0, 0.0, 1.0, 1.0
+*/
+
+
+tex t0                  // Bind texture in stage 0 to register t0.
+texm3x3pad   t1,  t0_bx2   // First row of matrix multiply.
+texm3x3pad   t2,  t0_bx2   // Second row of matrix multiply.
+texm3x3vspec t3,  t0_bx2   // Third row of matrix multiply to get a 3-vector.
+                      // Reflect 3-vector by the eye-ray vector.
+                      // Use reflected vector to do a texture lookup
+                      // at stage 3.
+
+// t3 now has our reflected environment map value
+// We've (presumably) attenuated the effect on a vertex basis
+// and have our color w/ attenuated alpha in v0. So all we need
+// is to multiply t3 by v0 into r0, add our base color from v1 and we're done.
+mad         r0.rgb, t3, v0, v1;
+/* HACKAGE
+//+mul          r0.a, v1, v0;
+HACKAGE */
+mov r0.a, v0; //HACKAGE
+/*
+mov r0.rgb, v0;
+mov r0.a, v0;
+*/
+
+/*
+tex t0;
+texcoord t1;
+texcoord t2;
+texcoord t3;
+
+mov r0.rgb, t3;
+
+mov    r0.a, c0;
+*/
+
+
+
+
+
+/*
+tex t0;
+texcoord t1;
+texcoord t2;
+texcoord t3;
+
+mul r0.rgb, t0_bx2, c1;
+mov r0.a, c2;
+*/
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_WaveGraph.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_WaveGraph.inl
@ -1,30 +1,30 @@
-
-ps.1.1
-
-// Have a couple extra textures to burn here. Only thing
-// I've thought of is to have an additional texture to
-// make the front of the wave solid. So it's UVW would be
-// the same as the base texture, but the texture itself would
-// be just a thin horizontal band of alpha. Then just add that
-// alpha to the output alpha.
-//
-// Let's get the first cut running first.
-
-tex		t0;
-tex		t1;
-tex		t2;
-
-//mul		r0, v0, t0;
-//mul		r0, r0, t1;
-//add		r0.a, r0, t2;
-
-// 1.0 mov		r0, t0;
-// 1.0 mul		r0, r0, t1;
-mul		r0, t0, t1;
-// TEST add		r0.a, r0, t2; // TEST
-add		r0, r0, t2; // TEST
-mul		r0, r0, v0;
-
-//mul		r0.rgb, r0, r0.a; // TEST
-
-//mov r0, t1;
+
+ps.1.1
+
+// Have a couple extra textures to burn here. Only thing
+// I've thought of is to have an additional texture to
+// make the front of the wave solid. So it's UVW would be
+// the same as the base texture, but the texture itself would
+// be just a thin horizontal band of alpha. Then just add that
+// alpha to the output alpha.
+//
+// Let's get the first cut running first.
+
+tex     t0;
+tex     t1;
+tex     t2;
+
+//mul       r0, v0, t0;
+//mul       r0, r0, t1;
+//add       r0.a, r0, t2;
+
+// 1.0 mov      r0, t0;
+// 1.0 mul      r0, r0, t1;
+mul     r0, t0, t1;
+// TEST add     r0.a, r0, t2; // TEST
+add     r0, r0, t2; // TEST
+mul     r0, r0, v0;
+
+//mul       r0.rgb, r0, r0.a; // TEST
+
+//mov r0, t1;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_WaveGrid.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_WaveGrid.inl
@ -1,63 +1,63 @@
-//ps.1.1
-
-// def c0, 1.0, 0.0, 0.0, 1.0
-
-// mov r0, c0
-
-// Short pixel shader. Use the texm3x3vspec to do a per-pixel
-// reflected lookup into our environment map.
-// Input:
-//    t0    - Normal map in tangent space. Apply _bx2 modifier to shift
-//             [0..255] -> [-1..1]
-//    t1    - UVW = tangent + eye2pos.x, map ignored.
-//    t2    - UVW = binormal + eye2pos.y, map ignored
-//    t3    - UVW = normal + eye2pos.z, map = environment cube map
-//    v0    - attenuating color/alpha.
-//    See docs on texm3x3vspec for explanation of the eye2pos wackiness.
-// Output:
-//    r0 = reflected lookup from environment map X input v0.
-//    Since environment map has alpha = 255, the output of this
-//    shader can be used for either alpha or additive blending,
-//    as long as v0 is fed in appropriately.
-
-ps.1.1
-
-//def c0, 1.0, 1.0, 1.0, 1.0       // Temp Hack
-//def c1, 2.0, 2.0, 2.0, 1.0
-
-//texcoord t0;
-//texcoord t1;
-//texcoord t2;
-//texcoord t3;
-
-tex t0                  // Bind texture in stage 0 to register t0.
-texm3x3pad   t1,  t0_bx2   // First row of matrix multiply.
-texm3x3pad   t2,  t0_bx2   // Second row of matrix multiply.
-texm3x3vspec t3,  t0_bx2   // Third row of matrix multiply to get a 3-vector.
-                      // Reflect 3-vector by the eye-ray vector.
-                      // Use reflected vector to do a texture lookup
-                      // at stage 3.
-
-// t3 now has our reflected environment map value
-// We've (presumably) attenuated the effect on a vertex basis
-// and have our color w/ attenuated alpha in v0. So all we need
-// is to multiply t3 by v0 into r0 and we're done.
-mad			r0.rgb, t3, v1, v0;
-//add			r0.rgb, t3, v0;
-+mov			r0.a, v1;
-
-//mov r0.rgb, v1.a; // HACKAGE
-//mov r0.a, v1.a; // HACKAGE
-//mov r0, v1; // HACKAGE
-
-//mov r0, c0
-
-//mul r0, r0, t0;
-
-//mov		r0, v1;
-//mov			r0, t3;
-
-//mov	r0.rgb, t3;
-//+mov r0.a, c0;
-
-
+//ps.1.1
+
+// def c0, 1.0, 0.0, 0.0, 1.0
+
+// mov r0, c0
+
+// Short pixel shader. Use the texm3x3vspec to do a per-pixel
+// reflected lookup into our environment map.
+// Input:
+//    t0    - Normal map in tangent space. Apply _bx2 modifier to shift
+//             [0..255] -> [-1..1]
+//    t1    - UVW = tangent + eye2pos.x, map ignored.
+//    t2    - UVW = binormal + eye2pos.y, map ignored
+//    t3    - UVW = normal + eye2pos.z, map = environment cube map
+//    v0    - attenuating color/alpha.
+//    See docs on texm3x3vspec for explanation of the eye2pos wackiness.
+// Output:
+//    r0 = reflected lookup from environment map X input v0.
+//    Since environment map has alpha = 255, the output of this
+//    shader can be used for either alpha or additive blending,
+//    as long as v0 is fed in appropriately.
+
+ps.1.1
+
+//def c0, 1.0, 1.0, 1.0, 1.0       // Temp Hack
+//def c1, 2.0, 2.0, 2.0, 1.0
+
+//texcoord t0;
+//texcoord t1;
+//texcoord t2;
+//texcoord t3;
+
+tex t0                  // Bind texture in stage 0 to register t0.
+texm3x3pad   t1,  t0_bx2   // First row of matrix multiply.
+texm3x3pad   t2,  t0_bx2   // Second row of matrix multiply.
+texm3x3vspec t3,  t0_bx2   // Third row of matrix multiply to get a 3-vector.
+                      // Reflect 3-vector by the eye-ray vector.
+                      // Use reflected vector to do a texture lookup
+                      // at stage 3.
+
+// t3 now has our reflected environment map value
+// We've (presumably) attenuated the effect on a vertex basis
+// and have our color w/ attenuated alpha in v0. So all we need
+// is to multiply t3 by v0 into r0 and we're done.
+mad         r0.rgb, t3, v1, v0;
+//add           r0.rgb, t3, v0;
+mov            r0.a, v1;
+
+//mov r0.rgb, v1.a; // HACKAGE
+//mov r0.a, v1.a; // HACKAGE
+//mov r0, v1; // HACKAGE
+
+//mov r0, c0
+
+//mul r0, r0, t0;
+
+//mov       r0, v1;
+//mov           r0, t3;
+
+//mov   r0.rgb, t3;
+//+mov r0.a, c0;
+
+
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_WaveRip.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/ps_WaveRip.inl
@ -1,21 +1,21 @@
-
-ps.1.1
-
-//def c0, 1.0, 0.0, 0.0, 1.0       // Temp Hack
-
-// Want
-// Color: vert.rgb * t0.rgb
-// Alpha: vert.a * t0.a * t1.a
-
-tex t0;
-//tex t1;
-
-//mul		r0.rgb, v0, t0;
-//+mul	r0.a, v0.a, t0.a;
-//mul		r0.a, r0.a, t1.a;
-
-//mul r0, t0, t1;
-
-mul	r0, t0, v0;
-
-//mov r0, t0;
+
+ps.1.1
+
+//def c0, 1.0, 0.0, 0.0, 1.0       // Temp Hack
+
+// Want
+// Color: vert.rgb * t0.rgb
+// Alpha: vert.a * t0.a * t1.a
+
+tex t0;
+//tex t1;
+
+//mul       r0.rgb, v0, t0;
+//+mul  r0.a, v0.a, t0.a;
+//mul       r0.a, r0.a, t1.a;
+
+//mul r0, t0, t1;
+
+mul r0, t0, v0;
+
+//mov r0, t0;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_BiasNormals.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_BiasNormals.inl
@ -1,34 +1,34 @@
-
-
-vs.1.1
-
-dcl_position v0
-dcl_texcoord0 v7
-
-// Take in a screen space position,
-// transform the UVW,
-// and spit it out.
-// c0 = uvXform0[0]
-// c1 = uvXform0[1]
-// c2 = uvXform1[0]
-// c3 = uvXform1[1]
-// c4 = (0,0.5,1.0,2.0)
-// c5 = (noiseScale, bias, 0, 1)
-
-mov oPos, v0;
-
-mov r0.zw, c4.xxxz; // yzw will stay constant (0,0,1);
-
-dp4 r0.x, v7, c0;
-dp4 r0.y, v7, c1;
-
-mov oT0, r0;
-
-dp4 r0.x, v7, c2;
-dp4 r0.y, v7, c3;
-
-mov oT1, r0;
-
-mov oD0, c5.xxzz;
-mov oD1, c5.yyzz;
-
+
+
+vs.1.1
+
+dcl_position v0
+dcl_texcoord0 v7
+
+// Take in a screen space position,
+// transform the UVW,
+// and spit it out.
+// c0 = uvXform0[0]
+// c1 = uvXform0[1]
+// c2 = uvXform1[0]
+// c3 = uvXform1[1]
+// c4 = (0,0.5,1.0,2.0)
+// c5 = (noiseScale, bias, 0, 1)
+
+mov oPos, v0;
+
+mov r0.zw, c4.xxxz; // yzw will stay constant (0,0,1);
+
+dp4 r0.x, v7, c0;
+dp4 r0.y, v7, c1;
+
+mov oT0, r0;
+
+dp4 r0.x, v7, c2;
+dp4 r0.y, v7, c3;
+
+mov oT1, r0;
+
+mov oD0, c5.xxzz;
+mov oD1, c5.yyzz;
+
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_CompCosines.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_CompCosines.inl
@ -1,31 +1,31 @@
-vs.1.1
-
-dcl_position v0
-dcl_texcoord0 v7
-
-// Take in a screen space position,
-// transform the UVW,
-// and spit it out.
-// c4 = (0,0.5,1.0,2.0)
-
-//mov r0, v0;
-//mov r0.w, c4.zzzz;
-//mov oPos, r0;
-mov oPos, v0;
-
-dp4 r0.x, v7, c0;
-mov r0.yzw, c4.xxxz; // yzw will stay constant (0,0,1);
-
-mov oT0, r0;
-
-dp4 r0.x, v7, c1;
-
-mov oT1, r0;
-
-dp4 r0.x, v7, c2;
-
-mov oT2, r0;
-
-dp4 r0.x, v7, c3;
-
-mov oT3, r0;
+vs.1.1
+
+dcl_position v0
+dcl_texcoord0 v7
+
+// Take in a screen space position,
+// transform the UVW,
+// and spit it out.
+// c4 = (0,0.5,1.0,2.0)
+
+//mov r0, v0;
+//mov r0.w, c4.zzzz;
+//mov oPos, r0;
+mov oPos, v0;
+
+dp4 r0.x, v7, c0;
+mov r0.yzw, c4.xxxz; // yzw will stay constant (0,0,1);
+
+mov oT0, r0;
+
+dp4 r0.x, v7, c1;
+
+mov oT1, r0;
+
+dp4 r0.x, v7, c2;
+
+mov oT2, r0;
+
+dp4 r0.x, v7, c3;
+
+mov oT3, r0;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_GrassShader.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_GrassShader.inl
@ -1,60 +1,60 @@
-vs.1.1
-
-// Grass shader. Moves verts according sine waves seeded by position
-// Based on the article "Animated Grass with Pixel and Vertex Shaders"
-// by John Isidoro and Drew Card, in the book
-// "Direct3D ShaderX Vertex and Pixel Shader Tips and Tricks"
-
-// c0 = Local2NDC
-// c4 = (0.0, 0.5, 1.0, 2.0)
-// c5 = (time, X, X, X)
-// c6 = Pi constants
-// c7 = Sin constants (-1/3!, 1/!5, -1/7!, 1/9!)
-// c8 = waveDistortX
-// c9 = waveDistortY
-// c10 = waveDistortZ
-// c11 = waveDirX (0.25, 0.0, -0.7, -0.8)
-// c12 = waveDirY (0.0, 0.15, -0.7, 0.1)
-// c13 = waveSpeed (0.2, 0.15, 0.4, 0.4)
-
-dcl_position v0
-dcl_color v5
-dcl_texcoord0 v7
-
-mul r0, c11, v0.x		// pos X,Y input to waves
-mad r0, c12, v0.y, r0
-
-mov r1, c5.x			// time
-mad r0, r1, c13, r0		// scale by speed and add to X,Y input
-frc r0.xy, r0
-frc r1.xy, r0.zwzw
-mov r0.zw, r1.xyxy
-
-sub r0, r0, c4.y		// - 0.5
-mul r1, r0, c6.w		// *= 2 pi
-
-mul r2, r1, r1			// ^2
-mul r3, r2, r1			// ^3
-mul r5, r3, r2			// ^5
-mul r7, r5, r2			// ^7
-mul r9, r7, r2			// ^9
-
-mad r0, r3, c7.x, r1	// - r1^3 / 3!
-mad r0, r5, c7.y, r0	// + r1^5 / 5!
-mad r0, r7, c7.z, r0	// - r1^7 / 7!
-mad r0, r9, c7.w, r0	// + r1^9 / 9!
-
-dp4 r3.x, r0, c8
-dp4 r3.y, r0, c9
-dp4 r3.zw, r0, c10
-
-sub r4, c4.z, v7.y
-mul r3, r3, r4		// mult by Y tex coord. So the waves only affect the top verts
-mov r2.w, v0			//
-add r2.xyz, r3, v0		// add offset to position
-
-m4x4 oPos, r2, c0		// trans to NDC
-
-mov oFog, c4.z		// no fog
-mov oD0, v5
-mov oT0, v7
+vs.1.1
+
+// Grass shader. Moves verts according sine waves seeded by position
+// Based on the article "Animated Grass with Pixel and Vertex Shaders"
+// by John Isidoro and Drew Card, in the book
+// "Direct3D ShaderX Vertex and Pixel Shader Tips and Tricks"
+
+// c0 = Local2NDC
+// c4 = (0.0, 0.5, 1.0, 2.0)
+// c5 = (time, X, X, X)
+// c6 = Pi constants
+// c7 = Sin constants (-1/3!, 1/!5, -1/7!, 1/9!)
+// c8 = waveDistortX
+// c9 = waveDistortY
+// c10 = waveDistortZ
+// c11 = waveDirX (0.25, 0.0, -0.7, -0.8)
+// c12 = waveDirY (0.0, 0.15, -0.7, 0.1)
+// c13 = waveSpeed (0.2, 0.15, 0.4, 0.4)
+
+dcl_position v0
+dcl_color v5
+dcl_texcoord0 v7
+
+mul r0, c11, v0.x       // pos X,Y input to waves
+mad r0, c12, v0.y, r0
+
+mov r1, c5.x            // time
+mad r0, r1, c13, r0     // scale by speed and add to X,Y input
+frc r0.xy, r0
+frc r1.xy, r0.zwzw
+mov r0.zw, r1.xyxy
+
+sub r0, r0, c4.y        // - 0.5
+mul r1, r0, c6.w        // *= 2 pi
+
+mul r2, r1, r1          // ^2
+mul r3, r2, r1          // ^3
+mul r5, r3, r2          // ^5
+mul r7, r5, r2          // ^7
+mul r9, r7, r2          // ^9
+
+mad r0, r3, c7.x, r1    // - r1^3 / 3!
+mad r0, r5, c7.y, r0    // + r1^5 / 5!
+mad r0, r7, c7.z, r0    // - r1^7 / 7!
+mad r0, r9, c7.w, r0    // + r1^9 / 9!
+
+dp4 r3.x, r0, c8
+dp4 r3.y, r0, c9
+dp4 r3.zw, r0, c10
+
+sub r4, c4.z, v7.y
+mul r3, r3, r4      // mult by Y tex coord. So the waves only affect the top verts
+mov r2.w, v0            //
+add r2.xyz, r3, v0      // add offset to position
+
+m4x4 oPos, r2, c0       // trans to NDC
+
+mov oFog, c4.z      // no fog
+mov oD0, v5
+mov oT0, v7
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_ShoreLeave6.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_ShoreLeave6.inl
@ -1,245 +1,245 @@
-vs.1.1
-
-dcl_position v0
-dcl_color v5
-dcl_texcoord0 v7
-
-// Store our input position in world space in r6
-m4x3		r6, v0, c25; // v0 * l2w
-// Fill out our w (m4x3 doesn't touch w).
-mov			r6.w, c16.z;
-
-//
-
-// Input diffuse v5 color is:
-// v5.r = overall transparency
-// v5.g = reflection strength (transparency)
-// v5.b = overall wave scaling
-//
-// v5.a is:
-// v5.w = 1/(2.f * edge length)
-// So per wave filtering is:
-// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
-// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
-// and is completely filtered at 2 times sampling frequency.
-
-// We'd like to make this autocalculated based on the depth of the water.
-// The frequency filtering (v5.w) still needs to be calculated offline, because
-// it's dependent on edge length, but the first 3 filterings can be calculated
-// based on this vertex.
-// Basically, we want the transparency, reflection strength, and wave scaling
-// to go to zero as the water depth goes to zero. Linear falloffs are as good
-// a place to start as any.
-//
-// depth = waterlevel - r6.z		=> depth in feet (may be negative)
-// depthNorm = depth / depthFalloff	=> zero at watertable, one at depthFalloff beneath
-// atten = minAtten + depthNorm * (maxAtten - minAtten);
-// These are all vector ops.
-// This provides separate ramp ups for each of the channels (they reach full unfiltered
-// values at different depths), but doesn't provide separate controls for where they
-// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
-// in feet (depth) is probably the most intuitive. So that changes the first calculation
-// of depth to:
-// depth = waterlevel - r6.z + offset
-//		= (waterlevel + offset) - r6.z
-// And since we only need offsets for 3 channels, we can make the waterlevel constant
-// waterlevel[chan] = watertableheight + offset[chan],
-// with waterlevel.w = watertableheight.
-//
-// So:
-//	c30 = waterlevel + offset
-//	c31 = (maxAtten - minAtten) / depthFalloff
-//	c32 = minAtten.
-// And in particular:
-//	c30.w = waterlevel
-//	c31.w = 1.f;
-//	c32.w = 0;
-// So r4.w is the depth of this vertex in feet.
-
-// Dot our position with our direction vectors.
-mul		r0, c8, r6.xxxx;
-mad		r0, c9, r6.yyyy, r0;
-
-//
-//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
-mul         r0, r0, c5;
-add			r0, r0, c6;
-//
-//    // Now we need dist mod'd into range [-Pi..Pi]
-//    dist *= rcp(kTwoPi);
-rcp         r4, c15.wwww;
-add			r0, r0, c15.zzzz;
-mul         r0, r0, r4;
-//    dist = frac(dist);
-expp     r1.y, r0.xxxx
-mov      r1.x, r1.yyyy
-expp     r1.y, r0.zzzz
-mov      r1.z, r1.yyyy
-expp     r1.y, r0.wwww
-mov      r1.w, r1.yyyy
-expp     r1.y, r0.yyyy
-//    dist *= kTwoPi;
-mul         r0, r1, c15.wwww;
-//    dist += -kPi;
-sub         r0, r0, c15.zzzz;
-
-//
-//    sincos(dist, sinDist, cosDist);
-// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-mul         r1, r0, r0; // r0^2
-mul         r2, r1, r0; // r0^3 - probably stall
-mul         r3, r1, r1; // r0^4
-mul         r4, r1, r2; // r0^5
-mul         r5, r2, r3; // r0^7
-
-mul         r1, r1, c14.yyyy;       // r1 = r0^2 * vCos.y
-mad         r2, r2, c13.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
-add         r1, r1, c14.xxxx;       // r1 = 1 + r0^2 * vCos.y
-mad         r2, r4, c13.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-mad         r1, r3, c14.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-
-// r0^7 & r0^6 terms
-mul         r4, r4, r0; // r0^6
-mad         r2, r5, c13.wwww, r2;
-mad         r1, r4, c14.wwww, r1;
-
-// Calc our depth based filtering here into r4 (because we don't use it again
-// after here, and we need our filtering shortly).
-sub			r4, c30, r6.zzzz;
-mul			r4, r4, c31;
-add			r4, r4, c32;
-// Clamp .xyz to range [0..1]
-min			r4.xyz, r4, c16.zzzz;
-max			r4.xyz, r4, c16.xxxx;
-
-// Calc our filter (see above).
-mul			r11, v5.wwww, c29;
-max			r11, r11, c16.xxxx;
-min			r11, r11, c16.zzzz;
-
-//mov    r2, r1;
-// r2 == sinDist
-// r1 == cosDist
-//    sinDist *= filter;
-mul         r2, r2, r11;
-//    sinDist *= kAmplitude.xyzw
-mul         r2, r2, c7;
-//    height = dp4(sinDist, kOne);
-//    accumPos.z += height; (but accumPos.z is currently 0).
-dp4         r8.x, r2, c16.zzzz;
-
-// Smooth the approach to the shore.
-sub		r10.x, r6.z, c30.w;			// r10.x = height
-mul		r10.x, r10.x, r10.x;		// r10.x = h^2
-mul		r10.x, r10.x, c10.x;		// r10.x = -h^2 * k1 / k2^2
-add		r10.x, r10.x, c10.y;		// r10.x = k1 + -h^2 * k1 / k2^2
-max		r10.x, r10.x, c16.xxxx;		// Clamp to >= zero
-add		r8.x, r8.x, r10.x;			// r8.x += del
-
-mul			r8.y, r8.x, r4.z;
-add			r8.z, r8.y, c30.w;
-max			r6.z, r6.z, r8.z;
-add			r6.z, r6.z, c12.z;
-// r8.x == wave height relative to 0
-// r8.y == dampened wave relative to 0
-// r8.z == dampened wave height in world space
-// r6.z == wave height clamped to never go beneath ground level
-//
-//    cosDist *= kFreq.xyzw;
-mul         r1, r1, c5;
-//    cosDist *= kAmplitude.xyzw; // Combine?
-mul         r1, r1, c7;
-//    cosDist *= filter;
-mul         r1, r1, r11;
-//
-// accumCos = (0, 0, 0, 0);
-mov         r7, c16.xxxx;
-//    temp = dp4( cosDist, toCenter_X );
-//    accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
-dp4         r7.x, r1, -c8
-//
-//    temp = dp4( cosDist, toCenter_Y );
-//    accumCos.y += temp.xxxx;
-dp4         r7.y, r1, -c9
-//
-// }
-//
-// accumBin = (1, 0, -accumCos.x);
-// accumTan = (0, 1, -accumCos.y);
-// accumNorm = (accumCos.x, accumCos.y, 1);
-mov         r11, c16.xxzx;
-add         r11, r11, r7;
-dp3         r10.x, r11, r11;
-rsq         r10.x, r10.x;
-mul         r11, r11, r10.xxxx;
-
-//
-// Add in our scrunch (offset in X/Y plane).
-// Scale down our scrunch amount by the wave scaling
-mul			r10.x, c12.y, r4.z;
-mad         r6.xy, r11.xy, r10.xx, r6.xy;
-
-//   mul			r6.z, r6.z, r10.xxxx; DEBUG
-
-//   mad         r6, r11, c12.yyzz, r6;
-
-// accumNorm = mul (accumNorm, kScrunchScale ); // kScrunchScale = (scrunchScale, scrunchScale, 1, 1);
-// accumCos *= (scrunchScale, scrunchScale, 0, 0);
-
-//##mul			r2.x, r6.z, c12.x;
-//##add			r2.x, r2.x, c16.z;
-
-//##mul			r7.xy, r7.xy, r2.xx;
-
-// This is actually wrong, but useful right now for visualizing the generated coords.
-// See below for correct version.
-
-//##sub			r3, c16.xxzx, r7.xyzz;
-
-// Normalize?
-
-
-// Now rotate our normal vector into the wind
-//##dp3		r0.x, r3, c18.xyww;
-//##dp3		r0.y, r3, c18.zxww;
-//##mov		r3.xy, r0;
-
-// Initialize r0.w
-mov			r0.w, c16.zzzz;
-
-//##dp3         r0.x, r3, r3;
-//##rsq         r0.x, r0.x;
-//##mul			r3, r3, r0.xxxw;
-
-
-//
-// // Transform position to screen
-//
-//
-//m4x3	r6, v0, c25; // HACKAGE
-//mov		r6.w, c16.z; // HACKAGE
-//m4x4     oPos, r6, c0; // ADDFOG
-m4x4		r9, r6, c0;
-add			r10.x, r9.w, c11.x;
-mul			oFog, r10.x, c11.y;
-mov			oPos, r9;
-
-
-// Color
-mul	oD0,	c4, v5.xxxx;
-
-// UVW0
-// This layer just stays put. The motion's in the texture
-// U = transformed U
-// V = transformed V
-dp4			r0.x, v7, c19;
-dp4			r0.y, v7, c20;
-//mul			r0.y, r0.y, -c16.z;
-//add			r0.y, r0.y, c16.z;
-//add			r0.y, r0.y, c16.z;
-//add			r0.y, r0.y, c16.y;
-mov			oT0, r0.xyww;
-mov			oT1, r0.xyww;
-mov			oT2, r0.xyww;
-
+vs.1.1
+
+dcl_position v0
+dcl_color v5
+dcl_texcoord0 v7
+
+// Store our input position in world space in r6
+m4x3        r6, v0, c25; // v0 * l2w
+// Fill out our w (m4x3 doesn't touch w).
+mov         r6.w, c16.z;
+
+//
+
+// Input diffuse v5 color is:
+// v5.r = overall transparency
+// v5.g = reflection strength (transparency)
+// v5.b = overall wave scaling
+//
+// v5.a is:
+// v5.w = 1/(2.f * edge length)
+// So per wave filtering is:
+// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
+// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
+// and is completely filtered at 2 times sampling frequency.
+
+// We'd like to make this autocalculated based on the depth of the water.
+// The frequency filtering (v5.w) still needs to be calculated offline, because
+// it's dependent on edge length, but the first 3 filterings can be calculated
+// based on this vertex.
+// Basically, we want the transparency, reflection strength, and wave scaling
+// to go to zero as the water depth goes to zero. Linear falloffs are as good
+// a place to start as any.
+//
+// depth = waterlevel - r6.z        => depth in feet (may be negative)
+// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
+// atten = minAtten + depthNorm * (maxAtten - minAtten);
+// These are all vector ops.
+// This provides separate ramp ups for each of the channels (they reach full unfiltered
+// values at different depths), but doesn't provide separate controls for where they
+// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
+// in feet (depth) is probably the most intuitive. So that changes the first calculation
+// of depth to:
+// depth = waterlevel - r6.z + offset
+//      = (waterlevel + offset) - r6.z
+// And since we only need offsets for 3 channels, we can make the waterlevel constant
+// waterlevel[chan] = watertableheight + offset[chan],
+// with waterlevel.w = watertableheight.
+//
+// So:
+//  c30 = waterlevel + offset
+//  c31 = (maxAtten - minAtten) / depthFalloff
+//  c32 = minAtten.
+// And in particular:
+//  c30.w = waterlevel
+//  c31.w = 1.f;
+//  c32.w = 0;
+// So r4.w is the depth of this vertex in feet.
+
+// Dot our position with our direction vectors.
+mul     r0, c8, r6.xxxx;
+mad     r0, c9, r6.yyyy, r0;
+
+//
+//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
+mul         r0, r0, c5;
+add         r0, r0, c6;
+//
+//    // Now we need dist mod'd into range [-Pi..Pi]
+//    dist *= rcp(kTwoPi);
+rcp         r4, c15.wwww;
+add         r0, r0, c15.zzzz;
+mul         r0, r0, r4;
+//    dist = frac(dist);
+expp     r1.y, r0.xxxx
+mov      r1.x, r1.yyyy
+expp     r1.y, r0.zzzz
+mov      r1.z, r1.yyyy
+expp     r1.y, r0.wwww
+mov      r1.w, r1.yyyy
+expp     r1.y, r0.yyyy
+//    dist *= kTwoPi;
+mul         r0, r1, c15.wwww;
+//    dist += -kPi;
+sub         r0, r0, c15.zzzz;
+
+//
+//    sincos(dist, sinDist, cosDist);
+// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+mul         r1, r0, r0; // r0^2
+mul         r2, r1, r0; // r0^3 - probably stall
+mul         r3, r1, r1; // r0^4
+mul         r4, r1, r2; // r0^5
+mul         r5, r2, r3; // r0^7
+
+mul         r1, r1, c14.yyyy;       // r1 = r0^2 * vCos.y
+mad         r2, r2, c13.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
+add         r1, r1, c14.xxxx;       // r1 = 1 + r0^2 * vCos.y
+mad         r2, r4, c13.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+mad         r1, r3, c14.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+
+// r0^7 & r0^6 terms
+mul         r4, r4, r0; // r0^6
+mad         r2, r5, c13.wwww, r2;
+mad         r1, r4, c14.wwww, r1;
+
+// Calc our depth based filtering here into r4 (because we don't use it again
+// after here, and we need our filtering shortly).
+sub         r4, c30, r6.zzzz;
+mul         r4, r4, c31;
+add         r4, r4, c32;
+// Clamp .xyz to range [0..1]
+min         r4.xyz, r4, c16.zzzz;
+max         r4.xyz, r4, c16.xxxx;
+
+// Calc our filter (see above).
+mul         r11, v5.wwww, c29;
+max         r11, r11, c16.xxxx;
+min         r11, r11, c16.zzzz;
+
+//mov    r2, r1;
+// r2 == sinDist
+// r1 == cosDist
+//    sinDist *= filter;
+mul         r2, r2, r11;
+//    sinDist *= kAmplitude.xyzw
+mul         r2, r2, c7;
+//    height = dp4(sinDist, kOne);
+//    accumPos.z += height; (but accumPos.z is currently 0).
+dp4         r8.x, r2, c16.zzzz;
+
+// Smooth the approach to the shore.
+sub     r10.x, r6.z, c30.w;         // r10.x = height
+mul     r10.x, r10.x, r10.x;        // r10.x = h^2
+mul     r10.x, r10.x, c10.x;        // r10.x = -h^2 * k1 / k2^2
+add     r10.x, r10.x, c10.y;        // r10.x = k1 + -h^2 * k1 / k2^2
+max     r10.x, r10.x, c16.xxxx;     // Clamp to >= zero
+add     r8.x, r8.x, r10.x;          // r8.x += del
+
+mul         r8.y, r8.x, r4.z;
+add         r8.z, r8.y, c30.w;
+max         r6.z, r6.z, r8.z;
+add         r6.z, r6.z, c12.z;
+// r8.x == wave height relative to 0
+// r8.y == dampened wave relative to 0
+// r8.z == dampened wave height in world space
+// r6.z == wave height clamped to never go beneath ground level
+//
+//    cosDist *= kFreq.xyzw;
+mul         r1, r1, c5;
+//    cosDist *= kAmplitude.xyzw; // Combine?
+mul         r1, r1, c7;
+//    cosDist *= filter;
+mul         r1, r1, r11;
+//
+// accumCos = (0, 0, 0, 0);
+mov         r7, c16.xxxx;
+//    temp = dp4( cosDist, toCenter_X );
+//    accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
+dp4         r7.x, r1, -c8
+//
+//    temp = dp4( cosDist, toCenter_Y );
+//    accumCos.y += temp.xxxx;
+dp4         r7.y, r1, -c9
+//
+// }
+//
+// accumBin = (1, 0, -accumCos.x);
+// accumTan = (0, 1, -accumCos.y);
+// accumNorm = (accumCos.x, accumCos.y, 1);
+mov         r11, c16.xxzx;
+add         r11, r11, r7;
+dp3         r10.x, r11, r11;
+rsq         r10.x, r10.x;
+mul         r11, r11, r10.xxxx;
+
+//
+// Add in our scrunch (offset in X/Y plane).
+// Scale down our scrunch amount by the wave scaling
+mul         r10.x, c12.y, r4.z;
+mad         r6.xy, r11.xy, r10.xx, r6.xy;
+
+//   mul            r6.z, r6.z, r10.xxxx; DEBUG
+
+//   mad         r6, r11, c12.yyzz, r6;
+
+// accumNorm = mul (accumNorm, kScrunchScale ); // kScrunchScale = (scrunchScale, scrunchScale, 1, 1);
+// accumCos *= (scrunchScale, scrunchScale, 0, 0);
+
+//##mul         r2.x, r6.z, c12.x;
+//##add         r2.x, r2.x, c16.z;
+
+//##mul         r7.xy, r7.xy, r2.xx;
+
+// This is actually wrong, but useful right now for visualizing the generated coords.
+// See below for correct version.
+
+//##sub         r3, c16.xxzx, r7.xyzz;
+
+// Normalize?
+
+
+// Now rotate our normal vector into the wind
+//##dp3     r0.x, r3, c18.xyww;
+//##dp3     r0.y, r3, c18.zxww;
+//##mov     r3.xy, r0;
+
+// Initialize r0.w
+mov         r0.w, c16.zzzz;
+
+//##dp3         r0.x, r3, r3;
+//##rsq         r0.x, r0.x;
+//##mul         r3, r3, r0.xxxw;
+
+
+//
+// // Transform position to screen
+//
+//
+//m4x3  r6, v0, c25; // HACKAGE
+//mov       r6.w, c16.z; // HACKAGE
+//m4x4     oPos, r6, c0; // ADDFOG
+m4x4        r9, r6, c0;
+add         r10.x, r9.w, c11.x;
+mul         oFog, r10.x, c11.y;
+mov         oPos, r9;
+
+
+// Color
+mul oD0,    c4, v5.xxxx;
+
+// UVW0
+// This layer just stays put. The motion's in the texture
+// U = transformed U
+// V = transformed V
+dp4         r0.x, v7, c19;
+dp4         r0.y, v7, c20;
+//mul           r0.y, r0.y, -c16.z;
+//add           r0.y, r0.y, c16.z;
+//add           r0.y, r0.y, c16.z;
+//add           r0.y, r0.y, c16.y;
+mov         oT0, r0.xyww;
+mov         oT1, r0.xyww;
+mov         oT2, r0.xyww;
+
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_ShoreLeave7.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_ShoreLeave7.inl
@ -1,203 +1,203 @@
-
-vs.1.1
-
-dcl_position v0
-dcl_color v5
-dcl_texcoord0 v7
-
-
-// Store our input position in world space in r6
-m4x3		r6, v0, c25; // v0 * l2w
-// Fill out our w (m4x3 doesn't touch w).
-mov			r6.w, c16.z;
-
-//
-
-// Input diffuse v5 color is:
-// v5.r = overall transparency
-// v5.g = reflection strength (transparency)
-// v5.b = overall wave scaling
-//
-// v5.a is:
-// v5.w = 1/(2.f * edge length)
-// So per wave filtering is:
-// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
-// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
-// and is completely filtered at 2 times sampling frequency.
-
-// We'd like to make this autocalculated based on the depth of the water.
-// The frequency filtering (v5.w) still needs to be calculated offline, because
-// it's dependent on edge length, but the first 3 filterings can be calculated
-// based on this vertex.
-// Basically, we want the transparency, reflection strength, and wave scaling
-// to go to zero as the water depth goes to zero. Linear falloffs are as good
-// a place to start as any.
-//
-// depth = waterlevel - r6.z		=> depth in feet (may be negative)
-// depthNorm = depth / depthFalloff	=> zero at watertable, one at depthFalloff beneath
-// atten = minAtten + depthNorm * (maxAtten - minAtten);
-// These are all vector ops.
-// This provides separate ramp ups for each of the channels (they reach full unfiltered
-// values at different depths), but doesn't provide separate controls for where they
-// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
-// in feet (depth) is probably the most intuitive. So that changes the first calculation
-// of depth to:
-// depth = waterlevel - r6.z + offset
-//		= (waterlevel + offset) - r6.z
-// And since we only need offsets for 3 channels, we can make the waterlevel constant
-// waterlevel[chan] = watertableheight + offset[chan],
-// with waterlevel.w = watertableheight.
-//
-// So:
-//	c30 = waterlevel + offset
-//	c31 = (maxAtten - minAtten) / depthFalloff
-//	c32 = minAtten.
-// And in particular:
-//	c30.w = waterlevel
-//	c31.w = 1.f;
-//	c32.w = 0;
-// So r4.w is the depth of this vertex in feet.
-
-// Dot our position with our direction vectors.
-mul		r0, c8, r6.xxxx;
-mad		r0, c9, r6.yyyy, r0;
-
-//
-//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
-mul         r0, r0, c5;
-add			r0, r0, c6;
-//
-//    // Now we need dist mod'd into range [-Pi..Pi]
-//    dist *= rcp(kTwoPi);
-rcp         r4, c15.wwww;
-add			r0, r0, c15.zzzz;
-mul         r0, r0, r4;
-//    dist = frac(dist);
-expp     r1.y, r0.xxxx
-mov      r1.x, r1.yyyy
-expp     r1.y, r0.zzzz
-mov      r1.z, r1.yyyy
-expp     r1.y, r0.wwww
-mov      r1.w, r1.yyyy
-expp     r1.y, r0.yyyy
-//    dist *= kTwoPi;
-mul         r0, r1, c15.wwww;
-//    dist += -kPi;
-sub         r0, r0, c15.zzzz;
-
-//
-//    sincos(dist, sinDist, cosDist);
-// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-mul         r1, r0, r0; // r0^2
-mul         r2, r1, r0; // r0^3 - probably stall
-mul         r3, r1, r1; // r0^4
-mul         r4, r1, r2; // r0^5
-mul         r5, r2, r3; // r0^7
-
-mul         r1, r1, c14.yyyy;       // r1 = r0^2 * vCos.y
-mad         r2, r2, c13.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
-add         r1, r1, c14.xxxx;       // r1 = 1 + r0^2 * vCos.y
-mad         r2, r4, c13.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-mad         r1, r3, c14.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-
-// r0^7 & r0^6 terms
-mul         r4, r4, r0; // r0^6
-mad         r2, r5, c13.wwww, r2;
-mad         r1, r4, c14.wwww, r1;
-
-// Calc our depth based filtering here into r4 (because we don't use it again
-// after here, and we need our filtering shortly).
-sub			r4, c30, r6.zzzz;
-mul			r4, r4, c31;
-add			r4, r4, c32;
-// Clamp .xyz to range [0..1]
-min			r4.xyz, r4, c16.zzzz;
-max			r4.xyz, r4, c16.xxxx;
-
-// Calc our filter (see above).
-mul			r11, v5.wwww, c29;
-max			r11, r11, c16.xxxx;
-min			r11, r11, c16.zzzz;
-
-//mov    r2, r1;
-// r2 == sinDist
-// r1 == cosDist
-//    sinDist *= filter;
-mul         r2, r2, r11;
-//    sinDist *= kAmplitude.xyzw
-mul         r2, r2, c7;
-//    height = dp4(sinDist, kOne);
-//    accumPos.z += height; (but accumPos.z is currently 0).
-dp4         r8.x, r2, c16.zzzz;
-
-// Smooth the approach to the shore.
-/*
-sub		r10.x, r6.z, c30.w;			// r10.x = height
-mul		r10.x, r10.x, r10.x;		// r10.x = h^2
-mul		r10.x, r10.x, c10.x;		// r10.x = -h^2 * k1 / k2^2
-add		r10.x, r10.x, c10.y;		// r10.x = k1 + -h^2 * k1 / k2^2
-max		r10.x, r10.x, c16.xxxx;		// Clamp to >= zero
-add		r8.x, r8.x, r10.x;			// r8.x += del
-*/
-
-mul			r8.y, r8.x, r4.z;
-add			r8.z, r8.y, c30.w;
-max			r6.z, r6.z, r8.z;
-// r8.x == wave height relative to 0
-// r8.y == dampened wave relative to 0
-// r8.z == dampened wave height in world space
-// r6.z == wave height clamped to never go beneath ground level
-//
-//    cosDist *= filter;
-mul         r1, r1, r11;
-
-// Pos = (in.x + S, in.y + R, r6.z)
-// S = sum(k Dir.x A cos())
-// R = sum(k Dir.y A cos())
-// c17 = k Dir.x A
-// c18 = k Dir.y A
-//    S = sum(cosDist * c17);
-dp4         r7.x, r1, c17;
-dp4			r7.y, r1, c18;
-
-add			r6.xy, r6.xy, r7.xy;
-
-// Initialize r0.w
-mov			r0.w, c16.zzzz;
-
-//##dp3         r0.x, r3, r3;
-//##rsq         r0.x, r0.x;
-//##mul			r3, r3, r0.xxxw;
-
-
-//
-// // Transform position to screen
-//
-//
-//m4x3	r6, v0, c25; // HACKAGE
-//mov		r6.w, c16.z; // HACKAGE
-//m4x4     oPos, r6, c0; // ADDFOG
-m4x4		r9, r6, c0;
-add			r10.x, r9.w, c11.x;
-mul			oFog, r10.x, c11.y;
-mov			oPos, r9;
-
-
-// Color
-mul	oD0,	c4, v5.xxxx;
-
-// UVW0
-// This layer just stays put. The motion's in the texture
-// U = transformed U
-// V = transformed V
-dp4			r0.x, v7, c19;
-dp4			r0.y, v7, c20;
-//mul			r0.y, r0.y, -c16.z;
-//add			r0.y, r0.y, c16.z;
-//add			r0.y, r0.y, c16.z;
-//add			r0.y, r0.y, c16.y;
-mov			oT0, r0.xyww;
-mov			oT1, r0.xyww;
-mov			oT2, r0.xyww;
-
+
+vs.1.1
+
+dcl_position v0
+dcl_color v5
+dcl_texcoord0 v7
+
+
+// Store our input position in world space in r6
+m4x3        r6, v0, c25; // v0 * l2w
+// Fill out our w (m4x3 doesn't touch w).
+mov         r6.w, c16.z;
+
+//
+
+// Input diffuse v5 color is:
+// v5.r = overall transparency
+// v5.g = reflection strength (transparency)
+// v5.b = overall wave scaling
+//
+// v5.a is:
+// v5.w = 1/(2.f * edge length)
+// So per wave filtering is:
+// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
+// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
+// and is completely filtered at 2 times sampling frequency.
+
+// We'd like to make this autocalculated based on the depth of the water.
+// The frequency filtering (v5.w) still needs to be calculated offline, because
+// it's dependent on edge length, but the first 3 filterings can be calculated
+// based on this vertex.
+// Basically, we want the transparency, reflection strength, and wave scaling
+// to go to zero as the water depth goes to zero. Linear falloffs are as good
+// a place to start as any.
+//
+// depth = waterlevel - r6.z        => depth in feet (may be negative)
+// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
+// atten = minAtten + depthNorm * (maxAtten - minAtten);
+// These are all vector ops.
+// This provides separate ramp ups for each of the channels (they reach full unfiltered
+// values at different depths), but doesn't provide separate controls for where they
+// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
+// in feet (depth) is probably the most intuitive. So that changes the first calculation
+// of depth to:
+// depth = waterlevel - r6.z + offset
+//      = (waterlevel + offset) - r6.z
+// And since we only need offsets for 3 channels, we can make the waterlevel constant
+// waterlevel[chan] = watertableheight + offset[chan],
+// with waterlevel.w = watertableheight.
+//
+// So:
+//  c30 = waterlevel + offset
+//  c31 = (maxAtten - minAtten) / depthFalloff
+//  c32 = minAtten.
+// And in particular:
+//  c30.w = waterlevel
+//  c31.w = 1.f;
+//  c32.w = 0;
+// So r4.w is the depth of this vertex in feet.
+
+// Dot our position with our direction vectors.
+mul     r0, c8, r6.xxxx;
+mad     r0, c9, r6.yyyy, r0;
+
+//
+//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
+mul         r0, r0, c5;
+add         r0, r0, c6;
+//
+//    // Now we need dist mod'd into range [-Pi..Pi]
+//    dist *= rcp(kTwoPi);
+rcp         r4, c15.wwww;
+add         r0, r0, c15.zzzz;
+mul         r0, r0, r4;
+//    dist = frac(dist);
+expp     r1.y, r0.xxxx
+mov      r1.x, r1.yyyy
+expp     r1.y, r0.zzzz
+mov      r1.z, r1.yyyy
+expp     r1.y, r0.wwww
+mov      r1.w, r1.yyyy
+expp     r1.y, r0.yyyy
+//    dist *= kTwoPi;
+mul         r0, r1, c15.wwww;
+//    dist += -kPi;
+sub         r0, r0, c15.zzzz;
+
+//
+//    sincos(dist, sinDist, cosDist);
+// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+mul         r1, r0, r0; // r0^2
+mul         r2, r1, r0; // r0^3 - probably stall
+mul         r3, r1, r1; // r0^4
+mul         r4, r1, r2; // r0^5
+mul         r5, r2, r3; // r0^7
+
+mul         r1, r1, c14.yyyy;       // r1 = r0^2 * vCos.y
+mad         r2, r2, c13.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
+add         r1, r1, c14.xxxx;       // r1 = 1 + r0^2 * vCos.y
+mad         r2, r4, c13.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+mad         r1, r3, c14.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+
+// r0^7 & r0^6 terms
+mul         r4, r4, r0; // r0^6
+mad         r2, r5, c13.wwww, r2;
+mad         r1, r4, c14.wwww, r1;
+
+// Calc our depth based filtering here into r4 (because we don't use it again
+// after here, and we need our filtering shortly).
+sub         r4, c30, r6.zzzz;
+mul         r4, r4, c31;
+add         r4, r4, c32;
+// Clamp .xyz to range [0..1]
+min         r4.xyz, r4, c16.zzzz;
+max         r4.xyz, r4, c16.xxxx;
+
+// Calc our filter (see above).
+mul         r11, v5.wwww, c29;
+max         r11, r11, c16.xxxx;
+min         r11, r11, c16.zzzz;
+
+//mov    r2, r1;
+// r2 == sinDist
+// r1 == cosDist
+//    sinDist *= filter;
+mul         r2, r2, r11;
+//    sinDist *= kAmplitude.xyzw
+mul         r2, r2, c7;
+//    height = dp4(sinDist, kOne);
+//    accumPos.z += height; (but accumPos.z is currently 0).
+dp4         r8.x, r2, c16.zzzz;
+
+// Smooth the approach to the shore.
+/*
+sub     r10.x, r6.z, c30.w;         // r10.x = height
+mul     r10.x, r10.x, r10.x;        // r10.x = h^2
+mul     r10.x, r10.x, c10.x;        // r10.x = -h^2 * k1 / k2^2
+add     r10.x, r10.x, c10.y;        // r10.x = k1 + -h^2 * k1 / k2^2
+max     r10.x, r10.x, c16.xxxx;     // Clamp to >= zero
+add     r8.x, r8.x, r10.x;          // r8.x += del
+*/
+
+mul         r8.y, r8.x, r4.z;
+add         r8.z, r8.y, c30.w;
+max         r6.z, r6.z, r8.z;
+// r8.x == wave height relative to 0
+// r8.y == dampened wave relative to 0
+// r8.z == dampened wave height in world space
+// r6.z == wave height clamped to never go beneath ground level
+//
+//    cosDist *= filter;
+mul         r1, r1, r11;
+
+// Pos = (in.x + S, in.y + R, r6.z)
+// S = sum(k Dir.x A cos())
+// R = sum(k Dir.y A cos())
+// c17 = k Dir.x A
+// c18 = k Dir.y A
+//    S = sum(cosDist * c17);
+dp4         r7.x, r1, c17;
+dp4         r7.y, r1, c18;
+
+add         r6.xy, r6.xy, r7.xy;
+
+// Initialize r0.w
+mov         r0.w, c16.zzzz;
+
+//##dp3         r0.x, r3, r3;
+//##rsq         r0.x, r0.x;
+//##mul         r3, r3, r0.xxxw;
+
+
+//
+// // Transform position to screen
+//
+//
+//m4x3  r6, v0, c25; // HACKAGE
+//mov       r6.w, c16.z; // HACKAGE
+//m4x4     oPos, r6, c0; // ADDFOG
+m4x4        r9, r6, c0;
+add         r10.x, r9.w, c11.x;
+mul         oFog, r10.x, c11.y;
+mov         oPos, r9;
+
+
+// Color
+mul oD0,    c4, v5.xxxx;
+
+// UVW0
+// This layer just stays put. The motion's in the texture
+// U = transformed U
+// V = transformed V
+dp4         r0.x, v7, c19;
+dp4         r0.y, v7, c20;
+//mul           r0.y, r0.y, -c16.z;
+//add           r0.y, r0.y, c16.z;
+//add           r0.y, r0.y, c16.z;
+//add           r0.y, r0.y, c16.y;
+mov         oT0, r0.xyww;
+mov         oT1, r0.xyww;
+mov         oT2, r0.xyww;
+
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveDec1Lay.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveDec1Lay.inl
@ -1,207 +1,207 @@
-vs.1.1
-
-dcl_position v0
-dcl_color v5
-dcl_texcoord0 v7
-
-// Store our input position in world space in r6
-m4x3		r6, v0, c18; // v0 * l2w
-// Fill out our w (m4x3 doesn't touch w).
-mov			r6.w, c13.z;
-
-//
-
-// Input diffuse v5 color is:
-// v5.r = overall transparency
-// v5.g = illumination
-// v5.b = overall wave scaling
-//
-// v5.a is:
-// v5.w = 1/(2.f * edge length)
-// So per wave filtering is:
-// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
-// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
-// and is completely filtered at 2 times sampling frequency.
-
-// We'd like to make this autocalculated based on the depth of the water.
-// The frequency filtering (v5.w) still needs to be calculated offline, because
-// it's dependent on edge length, but the first 3 filterings can be calculated
-// based on this vertex.
-// Basically, we want the transparency, reflection strength, and wave scaling
-// to go to zero as the water depth goes to zero. Linear falloffs are as good
-// a place to start as any.
-//
-// depth = waterlevel - r6.z		=> depth in feet (may be negative)
-// depthNorm = depth / depthFalloff	=> zero at watertable, one at depthFalloff beneath
-// atten = minAtten + depthNorm * (maxAtten - minAtten);
-// These are all vector ops.
-// This provides separate ramp ups for each of the channels (they reach full unfiltered
-// values at different depths), but doesn't provide separate controls for where they
-// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
-// in feet (depth) is probably the most intuitive. So that changes the first calculation
-// of depth to:
-// depth = waterlevel - r6.z + offset
-//		= (waterlevel + offset) - r6.z
-// And since we only need offsets for 3 channels, we can make the waterlevel constant
-// waterlevel[chan] = watertableheight + offset[chan],
-// with waterlevel.w = watertableheight.
-//
-// So:
-//	c22 = waterlevel + offset
-//	c23 = (maxAtten - minAtten) / depthFalloff
-//	c24 = minAtten.
-// And in particular:
-//	c22.w = waterlevel
-//	c23.w = 1.f;
-//	c24.w = 0;
-// So r4.w is the depth of this vertex in feet.
-
-// Dot our position with our direction vectors.
-mul		r0, c7, r6.xxxx;
-mad		r0, c8, r6.yyyy, r0;
-
-//
-//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
-mul         r0, r0, c4;
-add			r0, r0, c5;
-//
-//    // Now we need dist mod'd into range [-Pi..Pi]
-//    dist *= rcp(kTwoPi);
-rcp         r4, c12.wwww;
-add			r0, r0, c12.zzzz;
-mul         r0, r0, r4;
-//    dist = frac(dist);
-expp     r1.y, r0.xxxx
-mov      r1.x, r1.yyyy
-expp     r1.y, r0.zzzz
-mov      r1.z, r1.yyyy
-expp     r1.y, r0.wwww
-mov      r1.w, r1.yyyy
-expp     r1.y, r0.yyyy
-//    dist *= kTwoPi;
-mul         r0, r1, c12.wwww;
-//    dist += -kPi;
-sub         r0, r0, c12.zzzz;
-
-//
-//    sincos(dist, sinDist, cosDist);
-// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-mul         r1, r0, r0; // r0^2
-mul         r2, r1, r0; // r0^3 - probably stall
-mul         r3, r1, r1; // r0^4
-mul         r4, r1, r2; // r0^5
-mul         r5, r2, r3; // r0^7
-
-mul         r1, r1, c11.yyyy;       // r1 = r0^2 * vCos.y
-mad         r2, r2, c10.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
-add         r1, r1, c11.xxxx;       // r1 = 1 + r0^2 * vCos.y
-mad         r2, r4, c10.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-mad         r1, r3, c11.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-
-// r0^7 & r0^6 terms
-mul         r4, r4, r0; // r0^6
-mad         r2, r5, c10.wwww, r2;
-mad         r1, r4, c11.wwww, r1;
-
-// Calc our depth based filtering here into r4 (because we don't use it again
-// after here, and we need our filtering shortly).
-sub			r4, c22, r6.zzzz;
-mul			r4, r4, c23;
-add			r4, r4, c24;
-// Clamp .xyz to range [0..1]
-min			r4.xyz, r4, c13.zzzz;
-max			r4.xyz, r4, c13.xxxx;
-//mov r4.xyz, c13.xxx; // HACKTEST
-
-// Calc our filter (see above).
-mul			r11, v5.wwww, c21;
-max			r11, r11, c13.xxxx;
-min			r11, r11, c13.zzzz;
-
-//mov    r2, r1;
-// r2 == sinDist
-// r1 == cosDist
-//    sinDist *= filter;
-mul         r2, r2, r11;
-//    sinDist *= kAmplitude.xyzw
-mul         r2, r2, c6;
-//    height = dp4(sinDist, kOne);
-//    accumPos.z += height; (but accumPos.z is currently 0).
-dp4         r8.x, r2, c13.zzzz;
-mul			r8.y, r8.x, r4.z;
-add			r8.z, r8.y, c22.w;
-max			r6.z, r6.z, r8.z;
-// r8.x == wave height relative to 0
-// r8.y == dampened wave relative to 0
-// r8.z == dampened wave height in world space
-// r6.z == wave height clamped to never go beneath ground level
-//
-//    cosDist *= kFreq.xyzw;
-mul         r1, r1, c4;
-//    cosDist *= kAmplitude.xyzw; // Combine?
-mul         r1, r1, c6;
-//    cosDist *= filter;
-mul         r1, r1, r11;
-//
-// accumCos = (0, 0, 0, 0);
-mov         r7, c13.xxxx;
-//    temp = dp4( cosDist, toCenter_X );
-//    accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
-dp4         r7.x, r1, -c7
-//
-//    temp = dp4( cosDist, toCenter_Y );
-//    accumCos.y += temp.xxxx;
-dp4         r7.y, r1, -c8
-//
-// }
-//
-// accumBin = (1, 0, -accumCos.x);
-// accumTan = (0, 1, -accumCos.y);
-// accumNorm = (accumCos.x, accumCos.y, 1);
-mov         r11, c13.xxzx;
-add         r11, r11, r7;
-dp3         r10.x, r11, r11;
-rsq         r10.x, r10.x;
-mul         r11, r11, r10.xxxx;
-
-//
-// Add in our scrunch (offset in X/Y plane).
-// Scale down our scrunch amount by the wave scaling
-mul			r10.x, c9.y, r4.z;
-mad         r6.xy, r11.xy, r10.xx, r6.xy;
-
-// Bias our vert up a bit to compensate for precision errors.
-// In particular, our filter coefficients are coming in as
-// interpolated bytes, so there's bound to be a lot of slop
-// from that. We've got a free slot in c25.x, so we'll use that.
-// A better implementation would be to bias and scale our screen
-// vert, effectively pushing the vert toward the camera without
-// actually moving it, but this is easier and might work just
-// as well.
-add			r6.z, r6.z, c25.x;
-
-//
-// // Transform position to screen
-//
-//
-//m4x3	r6, v0, c18; // HACKAGE
-//mov		r6.w, c13.z; // HACKAGE
-//m4x4     oPos, r6, c0; // ADDFOG
-m4x4		r9, r6, c0;
-add			r10.x, r9.w, c29.x;
-mul			oFog, r10.x, c29.y;
-mov			oPos, r9;
-
-
-// Output color is vertex green
-// Output alpha is vertex red (vtx alpha is used for wave filtering)
-// Whole thing modulated by material color/opacity.
-mul		oD0, v5.yyyx, c26;
-
-// Usual texture transform
-mov		r11.zw, c13.zzzz;
-dp4		r11.x, v7, c14;
-dp4		r11.y, v7, c15;
-mov		oT0, r11;
-
+vs.1.1
+
+dcl_position v0
+dcl_color v5
+dcl_texcoord0 v7
+
+// Store our input position in world space in r6
+m4x3        r6, v0, c18; // v0 * l2w
+// Fill out our w (m4x3 doesn't touch w).
+mov         r6.w, c13.z;
+
+//
+
+// Input diffuse v5 color is:
+// v5.r = overall transparency
+// v5.g = illumination
+// v5.b = overall wave scaling
+//
+// v5.a is:
+// v5.w = 1/(2.f * edge length)
+// So per wave filtering is:
+// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
+// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
+// and is completely filtered at 2 times sampling frequency.
+
+// We'd like to make this autocalculated based on the depth of the water.
+// The frequency filtering (v5.w) still needs to be calculated offline, because
+// it's dependent on edge length, but the first 3 filterings can be calculated
+// based on this vertex.
+// Basically, we want the transparency, reflection strength, and wave scaling
+// to go to zero as the water depth goes to zero. Linear falloffs are as good
+// a place to start as any.
+//
+// depth = waterlevel - r6.z        => depth in feet (may be negative)
+// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
+// atten = minAtten + depthNorm * (maxAtten - minAtten);
+// These are all vector ops.
+// This provides separate ramp ups for each of the channels (they reach full unfiltered
+// values at different depths), but doesn't provide separate controls for where they
+// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
+// in feet (depth) is probably the most intuitive. So that changes the first calculation
+// of depth to:
+// depth = waterlevel - r6.z + offset
+//      = (waterlevel + offset) - r6.z
+// And since we only need offsets for 3 channels, we can make the waterlevel constant
+// waterlevel[chan] = watertableheight + offset[chan],
+// with waterlevel.w = watertableheight.
+//
+// So:
+//  c22 = waterlevel + offset
+//  c23 = (maxAtten - minAtten) / depthFalloff
+//  c24 = minAtten.
+// And in particular:
+//  c22.w = waterlevel
+//  c23.w = 1.f;
+//  c24.w = 0;
+// So r4.w is the depth of this vertex in feet.
+
+// Dot our position with our direction vectors.
+mul     r0, c7, r6.xxxx;
+mad     r0, c8, r6.yyyy, r0;
+
+//
+//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
+mul         r0, r0, c4;
+add         r0, r0, c5;
+//
+//    // Now we need dist mod'd into range [-Pi..Pi]
+//    dist *= rcp(kTwoPi);
+rcp         r4, c12.wwww;
+add         r0, r0, c12.zzzz;
+mul         r0, r0, r4;
+//    dist = frac(dist);
+expp     r1.y, r0.xxxx
+mov      r1.x, r1.yyyy
+expp     r1.y, r0.zzzz
+mov      r1.z, r1.yyyy
+expp     r1.y, r0.wwww
+mov      r1.w, r1.yyyy
+expp     r1.y, r0.yyyy
+//    dist *= kTwoPi;
+mul         r0, r1, c12.wwww;
+//    dist += -kPi;
+sub         r0, r0, c12.zzzz;
+
+//
+//    sincos(dist, sinDist, cosDist);
+// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+mul         r1, r0, r0; // r0^2
+mul         r2, r1, r0; // r0^3 - probably stall
+mul         r3, r1, r1; // r0^4
+mul         r4, r1, r2; // r0^5
+mul         r5, r2, r3; // r0^7
+
+mul         r1, r1, c11.yyyy;       // r1 = r0^2 * vCos.y
+mad         r2, r2, c10.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
+add         r1, r1, c11.xxxx;       // r1 = 1 + r0^2 * vCos.y
+mad         r2, r4, c10.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+mad         r1, r3, c11.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+
+// r0^7 & r0^6 terms
+mul         r4, r4, r0; // r0^6
+mad         r2, r5, c10.wwww, r2;
+mad         r1, r4, c11.wwww, r1;
+
+// Calc our depth based filtering here into r4 (because we don't use it again
+// after here, and we need our filtering shortly).
+sub         r4, c22, r6.zzzz;
+mul         r4, r4, c23;
+add         r4, r4, c24;
+// Clamp .xyz to range [0..1]
+min         r4.xyz, r4, c13.zzzz;
+max         r4.xyz, r4, c13.xxxx;
+//mov r4.xyz, c13.xxx; // HACKTEST
+
+// Calc our filter (see above).
+mul         r11, v5.wwww, c21;
+max         r11, r11, c13.xxxx;
+min         r11, r11, c13.zzzz;
+
+//mov    r2, r1;
+// r2 == sinDist
+// r1 == cosDist
+//    sinDist *= filter;
+mul         r2, r2, r11;
+//    sinDist *= kAmplitude.xyzw
+mul         r2, r2, c6;
+//    height = dp4(sinDist, kOne);
+//    accumPos.z += height; (but accumPos.z is currently 0).
+dp4         r8.x, r2, c13.zzzz;
+mul         r8.y, r8.x, r4.z;
+add         r8.z, r8.y, c22.w;
+max         r6.z, r6.z, r8.z;
+// r8.x == wave height relative to 0
+// r8.y == dampened wave relative to 0
+// r8.z == dampened wave height in world space
+// r6.z == wave height clamped to never go beneath ground level
+//
+//    cosDist *= kFreq.xyzw;
+mul         r1, r1, c4;
+//    cosDist *= kAmplitude.xyzw; // Combine?
+mul         r1, r1, c6;
+//    cosDist *= filter;
+mul         r1, r1, r11;
+//
+// accumCos = (0, 0, 0, 0);
+mov         r7, c13.xxxx;
+//    temp = dp4( cosDist, toCenter_X );
+//    accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
+dp4         r7.x, r1, -c7
+//
+//    temp = dp4( cosDist, toCenter_Y );
+//    accumCos.y += temp.xxxx;
+dp4         r7.y, r1, -c8
+//
+// }
+//
+// accumBin = (1, 0, -accumCos.x);
+// accumTan = (0, 1, -accumCos.y);
+// accumNorm = (accumCos.x, accumCos.y, 1);
+mov         r11, c13.xxzx;
+add         r11, r11, r7;
+dp3         r10.x, r11, r11;
+rsq         r10.x, r10.x;
+mul         r11, r11, r10.xxxx;
+
+//
+// Add in our scrunch (offset in X/Y plane).
+// Scale down our scrunch amount by the wave scaling
+mul         r10.x, c9.y, r4.z;
+mad         r6.xy, r11.xy, r10.xx, r6.xy;
+
+// Bias our vert up a bit to compensate for precision errors.
+// In particular, our filter coefficients are coming in as
+// interpolated bytes, so there's bound to be a lot of slop
+// from that. We've got a free slot in c25.x, so we'll use that.
+// A better implementation would be to bias and scale our screen
+// vert, effectively pushing the vert toward the camera without
+// actually moving it, but this is easier and might work just
+// as well.
+add         r6.z, r6.z, c25.x;
+
+//
+// // Transform position to screen
+//
+//
+//m4x3  r6, v0, c18; // HACKAGE
+//mov       r6.w, c13.z; // HACKAGE
+//m4x4     oPos, r6, c0; // ADDFOG
+m4x4        r9, r6, c0;
+add         r10.x, r9.w, c29.x;
+mul         oFog, r10.x, c29.y;
+mov         oPos, r9;
+
+
+// Output color is vertex green
+// Output alpha is vertex red (vtx alpha is used for wave filtering)
+// Whole thing modulated by material color/opacity.
+mul     oD0, v5.yyyx, c26;
+
+// Usual texture transform
+mov     r11.zw, c13.zzzz;
+dp4     r11.x, v7, c14;
+dp4     r11.y, v7, c15;
+mov     oT0, r11;
+
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveDec1Lay_7.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveDec1Lay_7.inl
@ -1,189 +1,189 @@
-
-vs.1.1
-
-dcl_position v0
-dcl_color v5
-dcl_texcoord0 v7
-
-// Store our input position in world space in r6
-m4x3		r6, v0, c18; // v0 * l2w
-// Fill out our w (m4x3 doesn't touch w).
-mov			r6.w, c13.z;
-
-//
-
-// Input diffuse v5 color is:
-// v5.r = overall transparency
-// v5.g = illumination
-// v5.b = overall wave scaling
-//
-// v5.a is:
-// v5.w = 1/(2.f * edge length)
-// So per wave filtering is:
-// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
-// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
-// and is completely filtered at 2 times sampling frequency.
-
-// We'd like to make this autocalculated based on the depth of the water.
-// The frequency filtering (v5.w) still needs to be calculated offline, because
-// it's dependent on edge length, but the first 3 filterings can be calculated
-// based on this vertex.
-// Basically, we want the transparency, reflection strength, and wave scaling
-// to go to zero as the water depth goes to zero. Linear falloffs are as good
-// a place to start as any.
-//
-// depth = waterlevel - r6.z		=> depth in feet (may be negative)
-// depthNorm = depth / depthFalloff	=> zero at watertable, one at depthFalloff beneath
-// atten = minAtten + depthNorm * (maxAtten - minAtten);
-// These are all vector ops.
-// This provides separate ramp ups for each of the channels (they reach full unfiltered
-// values at different depths), but doesn't provide separate controls for where they
-// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
-// in feet (depth) is probably the most intuitive. So that changes the first calculation
-// of depth to:
-// depth = waterlevel - r6.z + offset
-//		= (waterlevel + offset) - r6.z
-// And since we only need offsets for 3 channels, we can make the waterlevel constant
-// waterlevel[chan] = watertableheight + offset[chan],
-// with waterlevel.w = watertableheight.
-//
-// So:
-//	c22 = waterlevel + offset
-//	c23 = (maxAtten - minAtten) / depthFalloff
-//	c24 = minAtten.
-// And in particular:
-//	c22.w = waterlevel
-//	c23.w = 1.f;
-//	c24.w = 0;
-// So r4.w is the depth of this vertex in feet.
-
-// Dot our position with our direction vectors.
-mul		r0, c7, r6.xxxx;
-mad		r0, c8, r6.yyyy, r0;
-
-//
-//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
-mul         r0, r0, c4;
-add			r0, r0, c5;
-//
-//    // Now we need dist mod'd into range [-Pi..Pi]
-//    dist *= rcp(kTwoPi);
-rcp         r4, c12.wwww;
-add			r0, r0, c12.zzzz;
-mul         r0, r0, r4;
-//    dist = frac(dist);
-expp     r1.y, r0.xxxx
-mov      r1.x, r1.yyyy
-expp     r1.y, r0.zzzz
-mov      r1.z, r1.yyyy
-expp     r1.y, r0.wwww
-mov      r1.w, r1.yyyy
-expp     r1.y, r0.yyyy
-//    dist *= kTwoPi;
-mul         r0, r1, c12.wwww;
-//    dist += -kPi;
-sub         r0, r0, c12.zzzz;
-
-//
-//    sincos(dist, sinDist, cosDist);
-// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-mul         r1, r0, r0; // r0^2
-mul         r2, r1, r0; // r0^3 - probably stall
-mul         r3, r1, r1; // r0^4
-mul         r4, r1, r2; // r0^5
-mul         r5, r2, r3; // r0^7
-
-mul         r1, r1, c11.yyyy;       // r1 = r0^2 * vCos.y
-mad         r2, r2, c10.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
-add         r1, r1, c11.xxxx;       // r1 = 1 + r0^2 * vCos.y
-mad         r2, r4, c10.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-mad         r1, r3, c11.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-
-// r0^7 & r0^6 terms
-mul         r4, r4, r0; // r0^6
-mad         r2, r5, c10.wwww, r2;
-mad         r1, r4, c11.wwww, r1;
-
-// Calc our depth based filtering here into r4 (because we don't use it again
-// after here, and we need our filtering shortly).
-sub			r4, c22, r6.zzzz;
-mul			r4, r4, c23;
-add			r4, r4, c24;
-// Clamp .xyz to range [0..1]
-min			r4.xyz, r4, c13.zzzz;
-max			r4.xyz, r4, c13.xxxx;
-//mov r4.xyz, c13.xxx; // HACKTEST
-
-// Calc our filter (see above).
-mul			r11, v5.wwww, c21;
-max			r11, r11, c13.xxxx;
-min			r11, r11, c13.zzzz;
-
-//mov    r2, r1;
-// r2 == sinDist
-// r1 == cosDist
-//    sinDist *= filter;
-mul         r2, r2, r11;
-//    sinDist *= kAmplitude.xyzw
-mul         r2, r2, c6;
-//    height = dp4(sinDist, kOne);
-//    accumPos.z += height; (but accumPos.z is currently 0).
-dp4         r8.x, r2, c13.zzzz;
-mul			r8.y, r8.x, r4.z;
-add			r8.z, r8.y, c22.w;
-max			r6.z, r6.z, r8.z;
-// r8.x == wave height relative to 0
-// r8.y == dampened wave relative to 0
-// r8.z == dampened wave height in world space
-// r6.z == wave height clamped to never go beneath ground level
-//
-//    cosDist *= filter;
-mul         r1, r1, r11;
-
-// Pos = (in.x + S, in.y + R, r6.z)
-// S = sum(k Dir.x A cos())
-// R = sum(k Dir.y A cos())
-// c30 = k Dir.x A
-// c31 = k Dir.y A
-//    S = sum(cosDist * c30);
-dp4         r7.x, r1, c30;
-//    R = sum(cosDist * c31);
-dp4			r7.y, r1, c31;
-
-add			r6.xy, r6.xy, r7.xy;
-
-// Bias our vert up a bit to compensate for precision errors.
-// In particular, our filter coefficients are coming in as
-// interpolated bytes, so there's bound to be a lot of slop
-// from that. We've got a free slot in c25.x, so we'll use that.
-// A better implementation would be to bias and scale our screen
-// vert, effectively pushing the vert toward the camera without
-// actually moving it, but this is easier and might work just
-// as well.
-add			r6.z, r6.z, c25.x;
-
-//
-// // Transform position to screen
-//
-//
-//m4x3	r6, v0, c18; // HACKAGE
-//mov		r6.w, c13.z; // HACKAGE
-//m4x4     oPos, r6, c0; // ADDFOG
-m4x4		r9, r6, c0;
-add			r10.x, r9.w, c29.x;
-mul			oFog, r10.x, c29.y;
-mov			oPos, r9;
-
-
-// Output color is vertex green
-// Output alpha is vertex red (vtx alpha is used for wave filtering)
-// Whole thing modulated by material color/opacity.
-mul		oD0, v5.yyyx, c26;
-
-// Usual texture transform
-mov		r11.zw, c13.zzzz;
-dp4		r11.x, v7, c14;
-dp4		r11.y, v7, c15;
-mov		oT0, r11;
-
+
+vs.1.1
+
+dcl_position v0
+dcl_color v5
+dcl_texcoord0 v7
+
+// Store our input position in world space in r6
+m4x3        r6, v0, c18; // v0 * l2w
+// Fill out our w (m4x3 doesn't touch w).
+mov         r6.w, c13.z;
+
+//
+
+// Input diffuse v5 color is:
+// v5.r = overall transparency
+// v5.g = illumination
+// v5.b = overall wave scaling
+//
+// v5.a is:
+// v5.w = 1/(2.f * edge length)
+// So per wave filtering is:
+// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
+// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
+// and is completely filtered at 2 times sampling frequency.
+
+// We'd like to make this autocalculated based on the depth of the water.
+// The frequency filtering (v5.w) still needs to be calculated offline, because
+// it's dependent on edge length, but the first 3 filterings can be calculated
+// based on this vertex.
+// Basically, we want the transparency, reflection strength, and wave scaling
+// to go to zero as the water depth goes to zero. Linear falloffs are as good
+// a place to start as any.
+//
+// depth = waterlevel - r6.z        => depth in feet (may be negative)
+// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
+// atten = minAtten + depthNorm * (maxAtten - minAtten);
+// These are all vector ops.
+// This provides separate ramp ups for each of the channels (they reach full unfiltered
+// values at different depths), but doesn't provide separate controls for where they
+// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
+// in feet (depth) is probably the most intuitive. So that changes the first calculation
+// of depth to:
+// depth = waterlevel - r6.z + offset
+//      = (waterlevel + offset) - r6.z
+// And since we only need offsets for 3 channels, we can make the waterlevel constant
+// waterlevel[chan] = watertableheight + offset[chan],
+// with waterlevel.w = watertableheight.
+//
+// So:
+//  c22 = waterlevel + offset
+//  c23 = (maxAtten - minAtten) / depthFalloff
+//  c24 = minAtten.
+// And in particular:
+//  c22.w = waterlevel
+//  c23.w = 1.f;
+//  c24.w = 0;
+// So r4.w is the depth of this vertex in feet.
+
+// Dot our position with our direction vectors.
+mul     r0, c7, r6.xxxx;
+mad     r0, c8, r6.yyyy, r0;
+
+//
+//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
+mul         r0, r0, c4;
+add         r0, r0, c5;
+//
+//    // Now we need dist mod'd into range [-Pi..Pi]
+//    dist *= rcp(kTwoPi);
+rcp         r4, c12.wwww;
+add         r0, r0, c12.zzzz;
+mul         r0, r0, r4;
+//    dist = frac(dist);
+expp     r1.y, r0.xxxx
+mov      r1.x, r1.yyyy
+expp     r1.y, r0.zzzz
+mov      r1.z, r1.yyyy
+expp     r1.y, r0.wwww
+mov      r1.w, r1.yyyy
+expp     r1.y, r0.yyyy
+//    dist *= kTwoPi;
+mul         r0, r1, c12.wwww;
+//    dist += -kPi;
+sub         r0, r0, c12.zzzz;
+
+//
+//    sincos(dist, sinDist, cosDist);
+// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+mul         r1, r0, r0; // r0^2
+mul         r2, r1, r0; // r0^3 - probably stall
+mul         r3, r1, r1; // r0^4
+mul         r4, r1, r2; // r0^5
+mul         r5, r2, r3; // r0^7
+
+mul         r1, r1, c11.yyyy;       // r1 = r0^2 * vCos.y
+mad         r2, r2, c10.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
+add         r1, r1, c11.xxxx;       // r1 = 1 + r0^2 * vCos.y
+mad         r2, r4, c10.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+mad         r1, r3, c11.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+
+// r0^7 & r0^6 terms
+mul         r4, r4, r0; // r0^6
+mad         r2, r5, c10.wwww, r2;
+mad         r1, r4, c11.wwww, r1;
+
+// Calc our depth based filtering here into r4 (because we don't use it again
+// after here, and we need our filtering shortly).
+sub         r4, c22, r6.zzzz;
+mul         r4, r4, c23;
+add         r4, r4, c24;
+// Clamp .xyz to range [0..1]
+min         r4.xyz, r4, c13.zzzz;
+max         r4.xyz, r4, c13.xxxx;
+//mov r4.xyz, c13.xxx; // HACKTEST
+
+// Calc our filter (see above).
+mul         r11, v5.wwww, c21;
+max         r11, r11, c13.xxxx;
+min         r11, r11, c13.zzzz;
+
+//mov    r2, r1;
+// r2 == sinDist
+// r1 == cosDist
+//    sinDist *= filter;
+mul         r2, r2, r11;
+//    sinDist *= kAmplitude.xyzw
+mul         r2, r2, c6;
+//    height = dp4(sinDist, kOne);
+//    accumPos.z += height; (but accumPos.z is currently 0).
+dp4         r8.x, r2, c13.zzzz;
+mul         r8.y, r8.x, r4.z;
+add         r8.z, r8.y, c22.w;
+max         r6.z, r6.z, r8.z;
+// r8.x == wave height relative to 0
+// r8.y == dampened wave relative to 0
+// r8.z == dampened wave height in world space
+// r6.z == wave height clamped to never go beneath ground level
+//
+//    cosDist *= filter;
+mul         r1, r1, r11;
+
+// Pos = (in.x + S, in.y + R, r6.z)
+// S = sum(k Dir.x A cos())
+// R = sum(k Dir.y A cos())
+// c30 = k Dir.x A
+// c31 = k Dir.y A
+//    S = sum(cosDist * c30);
+dp4         r7.x, r1, c30;
+//    R = sum(cosDist * c31);
+dp4         r7.y, r1, c31;
+
+add         r6.xy, r6.xy, r7.xy;
+
+// Bias our vert up a bit to compensate for precision errors.
+// In particular, our filter coefficients are coming in as
+// interpolated bytes, so there's bound to be a lot of slop
+// from that. We've got a free slot in c25.x, so we'll use that.
+// A better implementation would be to bias and scale our screen
+// vert, effectively pushing the vert toward the camera without
+// actually moving it, but this is easier and might work just
+// as well.
+add         r6.z, r6.z, c25.x;
+
+//
+// // Transform position to screen
+//
+//
+//m4x3  r6, v0, c18; // HACKAGE
+//mov       r6.w, c13.z; // HACKAGE
+//m4x4     oPos, r6, c0; // ADDFOG
+m4x4        r9, r6, c0;
+add         r10.x, r9.w, c29.x;
+mul         oFog, r10.x, c29.y;
+mov         oPos, r9;
+
+
+// Output color is vertex green
+// Output alpha is vertex red (vtx alpha is used for wave filtering)
+// Whole thing modulated by material color/opacity.
+mul     oD0, v5.yyyx, c26;
+
+// Usual texture transform
+mov     r11.zw, c13.zzzz;
+dp4     r11.x, v7, c14;
+dp4     r11.y, v7, c15;
+mov     oT0, r11;
+
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveDec2Lay11.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveDec2Lay11.inl
@ -1,209 +1,209 @@
-vs.1.1
-
-dcl_position v0
-dcl_color v5
-dcl_texcoord0 v7
-
-// Store our input position in world space in r6
-m4x3		r6, v0, c18; // v0 * l2w
-// Fill out our w (m4x3 doesn't touch w).
-mov			r6.w, c13.z;
-
-//
-
-// Input diffuse v5 color is:
-// v5.r = overall transparency
-// v5.g = illumination
-// v5.b = overall wave scaling
-//
-// v5.a is:
-// v5.w = 1/(2.f * edge length)
-// So per wave filtering is:
-// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
-// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
-// and is completely filtered at 2 times sampling frequency.
-
-// We'd like to make this autocalculated based on the depth of the water.
-// The frequency filtering (v5.w) still needs to be calculated offline, because
-// it's dependent on edge length, but the first 3 filterings can be calculated
-// based on this vertex.
-// Basically, we want the transparency, reflection strength, and wave scaling
-// to go to zero as the water depth goes to zero. Linear falloffs are as good
-// a place to start as any.
-//
-// depth = waterlevel - r6.z		=> depth in feet (may be negative)
-// depthNorm = depth / depthFalloff	=> zero at watertable, one at depthFalloff beneath
-// atten = minAtten + depthNorm * (maxAtten - minAtten);
-// These are all vector ops.
-// This provides separate ramp ups for each of the channels (they reach full unfiltered
-// values at different depths), but doesn't provide separate controls for where they
-// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
-// in feet (depth) is probably the most intuitive. So that changes the first calculation
-// of depth to:
-// depth = waterlevel - r6.z + offset
-//		= (waterlevel + offset) - r6.z
-// And since we only need offsets for 3 channels, we can make the waterlevel constant
-// waterlevel[chan] = watertableheight + offset[chan],
-// with waterlevel.w = watertableheight.
-//
-// So:
-//	c22 = waterlevel + offset
-//	c23 = (maxAtten - minAtten) / depthFalloff
-//	c24 = minAtten.
-// And in particular:
-//	c22.w = waterlevel
-//	c23.w = 1.f;
-//	c24.w = 0;
-// So r4.w is the depth of this vertex in feet.
-
-// Dot our position with our direction vectors.
-mul		r0, c7, r6.xxxx;
-mad		r0, c8, r6.yyyy, r0;
-
-//
-//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
-mul         r0, r0, c4;
-add			r0, r0, c5;
-//
-//    // Now we need dist mod'd into range [-Pi..Pi]
-//    dist *= rcp(kTwoPi);
-rcp         r4, c12.wwww;
-add			r0, r0, c12.zzzz;
-mul         r0, r0, r4;
-//    dist = frac(dist);
-expp     r1.y, r0.xxxx
-mov      r1.x, r1.yyyy
-expp     r1.y, r0.zzzz
-mov      r1.z, r1.yyyy
-expp     r1.y, r0.wwww
-mov      r1.w, r1.yyyy
-expp     r1.y, r0.yyyy
-//    dist *= kTwoPi;
-mul         r0, r1, c12.wwww;
-//    dist += -kPi;
-sub         r0, r0, c12.zzzz;
-
-//
-//    sincos(dist, sinDist, cosDist);
-// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-mul         r1, r0, r0; // r0^2
-mul         r2, r1, r0; // r0^3 - probably stall
-mul         r3, r1, r1; // r0^4
-mul         r4, r1, r2; // r0^5
-mul         r5, r2, r3; // r0^7
-
-mul         r1, r1, c11.yyyy;       // r1 = r0^2 * vCos.y
-mad         r2, r2, c10.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
-add         r1, r1, c11.xxxx;       // r1 = 1 + r0^2 * vCos.y
-mad         r2, r4, c10.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-mad         r1, r3, c11.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-
-// r0^7 & r0^6 terms
-mul         r4, r4, r0; // r0^6
-mad         r2, r5, c10.wwww, r2;
-mad         r1, r4, c11.wwww, r1;
-
-// Calc our depth based filtering here into r4 (because we don't use it again
-// after here, and we need our filtering shortly).
-sub			r4, c22, r6.zzzz;
-mul			r4, r4, c23;
-add			r4, r4, c24;
-// Clamp .xyz to range [0..1]
-min			r4.xyz, r4, c13.zzzz;
-max			r4.xyz, r4, c13.xxxx;
-//mov r4.xyz, c13.xxx; // HACKTEST
-
-// Calc our filter (see above).
-mul			r11, v5.wwww, c21;
-max			r11, r11, c13.xxxx;
-min			r11, r11, c13.zzzz;
-
-//mov    r2, r1;
-// r2 == sinDist
-// r1 == cosDist
-//    sinDist *= filter;
-mul         r2, r2, r11;
-//    sinDist *= kAmplitude.xyzw
-mul         r2, r2, c6;
-//    height = dp4(sinDist, kOne);
-//    accumPos.z += height; (but accumPos.z is currently 0).
-dp4         r8.x, r2, c13.zzzz;
-mul			r8.y, r8.x, r4.z;
-add			r8.z, r8.y, c22.w;
-max			r6.z, r6.z, r8.z;
-// r8.x == wave height relative to 0
-// r8.y == dampened wave relative to 0
-// r8.z == dampened wave height in world space
-// r6.z == wave height clamped to never go beneath ground level
-//
-//    cosDist *= kFreq.xyzw;
-mul         r1, r1, c4;
-//    cosDist *= kAmplitude.xyzw; // Combine?
-mul         r1, r1, c6;
-//    cosDist *= filter;
-mul         r1, r1, r11;
-//
-// accumCos = (0, 0, 0, 0);
-mov         r7, c13.xxxx;
-//    temp = dp4( cosDist, toCenter_X );
-//    accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
-dp4         r7.x, r1, -c7
-//
-//    temp = dp4( cosDist, toCenter_Y );
-//    accumCos.y += temp.xxxx;
-dp4         r7.y, r1, -c8
-//
-// }
-//
-// accumBin = (1, 0, -accumCos.x);
-// accumTan = (0, 1, -accumCos.y);
-// accumNorm = (accumCos.x, accumCos.y, 1);
-mov         r11, c13.xxzx;
-add         r11, r11, r7;
-dp3         r10.x, r11, r11;
-rsq         r10.x, r10.x;
-mul         r11, r11, r10.xxxx;
-
-//
-// Add in our scrunch (offset in X/Y plane).
-// Scale down our scrunch amount by the wave scaling
-mul			r10.x, c9.y, r4.z;
-mad         r6.xy, r11.xy, r10.xx, r6.xy;
-
-// Bias our vert up a bit to compensate for precision errors.
-// In particular, our filter coefficients are coming in as
-// interpolated bytes, so there's bound to be a lot of slop
-// from that. We've got a free slot in c25.x, so we'll use that.
-// A better implementation would be to bias and scale our screen
-// vert, effectively pushing the vert toward the camera without
-// actually moving it, but this is easier and might work just
-// as well.
-add			r6.z, r6.z, c25.x;
-
-//
-// // Transform position to screen
-//
-//
-//m4x3	r6, v0, c18; // HACKAGE
-//mov		r6.w, c13.z; // HACKAGE
-//m4x4     oPos, r6, c0; // ADDFOG
-m4x4		r9, r6, c0;
-add			r10.x, r9.w, c29.x;
-mul			oFog, r10.x, c29.y;
-mov			oPos, r9;
-
-// Output color is vertex green
-// Output alpha is vertex red (vtx alpha is used for wave filtering)
-// Whole thing modulated by material color/opacity.
-mul		oD0, v5.yyyx, c26;
-
-// Usual texture transform
-mov		r11.zw, c13.zzzz;
-dp4		r11.x, v7, c14;
-dp4		r11.y, v7, c15;
-mov		oT0, r11;
-
-dp4		r11.x, v7, c16;
-dp4		r11.y, v7, c17;
-mov		oT1, r11;
+vs.1.1
+
+dcl_position v0
+dcl_color v5
+dcl_texcoord0 v7
+
+// Store our input position in world space in r6
+m4x3        r6, v0, c18; // v0 * l2w
+// Fill out our w (m4x3 doesn't touch w).
+mov         r6.w, c13.z;
+
+//
+
+// Input diffuse v5 color is:
+// v5.r = overall transparency
+// v5.g = illumination
+// v5.b = overall wave scaling
+//
+// v5.a is:
+// v5.w = 1/(2.f * edge length)
+// So per wave filtering is:
+// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
+// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
+// and is completely filtered at 2 times sampling frequency.
+
+// We'd like to make this autocalculated based on the depth of the water.
+// The frequency filtering (v5.w) still needs to be calculated offline, because
+// it's dependent on edge length, but the first 3 filterings can be calculated
+// based on this vertex.
+// Basically, we want the transparency, reflection strength, and wave scaling
+// to go to zero as the water depth goes to zero. Linear falloffs are as good
+// a place to start as any.
+//
+// depth = waterlevel - r6.z        => depth in feet (may be negative)
+// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
+// atten = minAtten + depthNorm * (maxAtten - minAtten);
+// These are all vector ops.
+// This provides separate ramp ups for each of the channels (they reach full unfiltered
+// values at different depths), but doesn't provide separate controls for where they
+// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
+// in feet (depth) is probably the most intuitive. So that changes the first calculation
+// of depth to:
+// depth = waterlevel - r6.z + offset
+//      = (waterlevel + offset) - r6.z
+// And since we only need offsets for 3 channels, we can make the waterlevel constant
+// waterlevel[chan] = watertableheight + offset[chan],
+// with waterlevel.w = watertableheight.
+//
+// So:
+//  c22 = waterlevel + offset
+//  c23 = (maxAtten - minAtten) / depthFalloff
+//  c24 = minAtten.
+// And in particular:
+//  c22.w = waterlevel
+//  c23.w = 1.f;
+//  c24.w = 0;
+// So r4.w is the depth of this vertex in feet.
+
+// Dot our position with our direction vectors.
+mul     r0, c7, r6.xxxx;
+mad     r0, c8, r6.yyyy, r0;
+
+//
+//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
+mul         r0, r0, c4;
+add         r0, r0, c5;
+//
+//    // Now we need dist mod'd into range [-Pi..Pi]
+//    dist *= rcp(kTwoPi);
+rcp         r4, c12.wwww;
+add         r0, r0, c12.zzzz;
+mul         r0, r0, r4;
+//    dist = frac(dist);
+expp     r1.y, r0.xxxx
+mov      r1.x, r1.yyyy
+expp     r1.y, r0.zzzz
+mov      r1.z, r1.yyyy
+expp     r1.y, r0.wwww
+mov      r1.w, r1.yyyy
+expp     r1.y, r0.yyyy
+//    dist *= kTwoPi;
+mul         r0, r1, c12.wwww;
+//    dist += -kPi;
+sub         r0, r0, c12.zzzz;
+
+//
+//    sincos(dist, sinDist, cosDist);
+// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+mul         r1, r0, r0; // r0^2
+mul         r2, r1, r0; // r0^3 - probably stall
+mul         r3, r1, r1; // r0^4
+mul         r4, r1, r2; // r0^5
+mul         r5, r2, r3; // r0^7
+
+mul         r1, r1, c11.yyyy;       // r1 = r0^2 * vCos.y
+mad         r2, r2, c10.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
+add         r1, r1, c11.xxxx;       // r1 = 1 + r0^2 * vCos.y
+mad         r2, r4, c10.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+mad         r1, r3, c11.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+
+// r0^7 & r0^6 terms
+mul         r4, r4, r0; // r0^6
+mad         r2, r5, c10.wwww, r2;
+mad         r1, r4, c11.wwww, r1;
+
+// Calc our depth based filtering here into r4 (because we don't use it again
+// after here, and we need our filtering shortly).
+sub         r4, c22, r6.zzzz;
+mul         r4, r4, c23;
+add         r4, r4, c24;
+// Clamp .xyz to range [0..1]
+min         r4.xyz, r4, c13.zzzz;
+max         r4.xyz, r4, c13.xxxx;
+//mov r4.xyz, c13.xxx; // HACKTEST
+
+// Calc our filter (see above).
+mul         r11, v5.wwww, c21;
+max         r11, r11, c13.xxxx;
+min         r11, r11, c13.zzzz;
+
+//mov    r2, r1;
+// r2 == sinDist
+// r1 == cosDist
+//    sinDist *= filter;
+mul         r2, r2, r11;
+//    sinDist *= kAmplitude.xyzw
+mul         r2, r2, c6;
+//    height = dp4(sinDist, kOne);
+//    accumPos.z += height; (but accumPos.z is currently 0).
+dp4         r8.x, r2, c13.zzzz;
+mul         r8.y, r8.x, r4.z;
+add         r8.z, r8.y, c22.w;
+max         r6.z, r6.z, r8.z;
+// r8.x == wave height relative to 0
+// r8.y == dampened wave relative to 0
+// r8.z == dampened wave height in world space
+// r6.z == wave height clamped to never go beneath ground level
+//
+//    cosDist *= kFreq.xyzw;
+mul         r1, r1, c4;
+//    cosDist *= kAmplitude.xyzw; // Combine?
+mul         r1, r1, c6;
+//    cosDist *= filter;
+mul         r1, r1, r11;
+//
+// accumCos = (0, 0, 0, 0);
+mov         r7, c13.xxxx;
+//    temp = dp4( cosDist, toCenter_X );
+//    accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
+dp4         r7.x, r1, -c7
+//
+//    temp = dp4( cosDist, toCenter_Y );
+//    accumCos.y += temp.xxxx;
+dp4         r7.y, r1, -c8
+//
+// }
+//
+// accumBin = (1, 0, -accumCos.x);
+// accumTan = (0, 1, -accumCos.y);
+// accumNorm = (accumCos.x, accumCos.y, 1);
+mov         r11, c13.xxzx;
+add         r11, r11, r7;
+dp3         r10.x, r11, r11;
+rsq         r10.x, r10.x;
+mul         r11, r11, r10.xxxx;
+
+//
+// Add in our scrunch (offset in X/Y plane).
+// Scale down our scrunch amount by the wave scaling
+mul         r10.x, c9.y, r4.z;
+mad         r6.xy, r11.xy, r10.xx, r6.xy;
+
+// Bias our vert up a bit to compensate for precision errors.
+// In particular, our filter coefficients are coming in as
+// interpolated bytes, so there's bound to be a lot of slop
+// from that. We've got a free slot in c25.x, so we'll use that.
+// A better implementation would be to bias and scale our screen
+// vert, effectively pushing the vert toward the camera without
+// actually moving it, but this is easier and might work just
+// as well.
+add         r6.z, r6.z, c25.x;
+
+//
+// // Transform position to screen
+//
+//
+//m4x3  r6, v0, c18; // HACKAGE
+//mov       r6.w, c13.z; // HACKAGE
+//m4x4     oPos, r6, c0; // ADDFOG
+m4x4        r9, r6, c0;
+add         r10.x, r9.w, c29.x;
+mul         oFog, r10.x, c29.y;
+mov         oPos, r9;
+
+// Output color is vertex green
+// Output alpha is vertex red (vtx alpha is used for wave filtering)
+// Whole thing modulated by material color/opacity.
+mul     oD0, v5.yyyx, c26;
+
+// Usual texture transform
+mov     r11.zw, c13.zzzz;
+dp4     r11.x, v7, c14;
+dp4     r11.y, v7, c15;
+mov     oT0, r11;
+
+dp4     r11.x, v7, c16;
+dp4     r11.y, v7, c17;
+mov     oT1, r11;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveDec2Lay11_7.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveDec2Lay11_7.inl
@ -1,191 +1,191 @@
-
-vs.1.1
-
-dcl_position v0
-dcl_color v5
-dcl_texcoord0 v7
-
-// Store our input position in world space in r6
-m4x3		r6, v0, c18; // v0 * l2w
-// Fill out our w (m4x3 doesn't touch w).
-mov			r6.w, c13.z;
-
-//
-
-// Input diffuse v5 color is:
-// v5.r = overall transparency
-// v5.g = illumination
-// v5.b = overall wave scaling
-//
-// v5.a is:
-// v5.w = 1/(2.f * edge length)
-// So per wave filtering is:
-// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
-// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
-// and is completely filtered at 2 times sampling frequency.
-
-// We'd like to make this autocalculated based on the depth of the water.
-// The frequency filtering (v5.w) still needs to be calculated offline, because
-// it's dependent on edge length, but the first 3 filterings can be calculated
-// based on this vertex.
-// Basically, we want the transparency, reflection strength, and wave scaling
-// to go to zero as the water depth goes to zero. Linear falloffs are as good
-// a place to start as any.
-//
-// depth = waterlevel - r6.z		=> depth in feet (may be negative)
-// depthNorm = depth / depthFalloff	=> zero at watertable, one at depthFalloff beneath
-// atten = minAtten + depthNorm * (maxAtten - minAtten);
-// These are all vector ops.
-// This provides separate ramp ups for each of the channels (they reach full unfiltered
-// values at different depths), but doesn't provide separate controls for where they
-// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
-// in feet (depth) is probably the most intuitive. So that changes the first calculation
-// of depth to:
-// depth = waterlevel - r6.z + offset
-//		= (waterlevel + offset) - r6.z
-// And since we only need offsets for 3 channels, we can make the waterlevel constant
-// waterlevel[chan] = watertableheight + offset[chan],
-// with waterlevel.w = watertableheight.
-//
-// So:
-//	c22 = waterlevel + offset
-//	c23 = (maxAtten - minAtten) / depthFalloff
-//	c24 = minAtten.
-// And in particular:
-//	c22.w = waterlevel
-//	c23.w = 1.f;
-//	c24.w = 0;
-// So r4.w is the depth of this vertex in feet.
-
-// Dot our position with our direction vectors.
-mul		r0, c7, r6.xxxx;
-mad		r0, c8, r6.yyyy, r0;
-
-//
-//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
-mul         r0, r0, c4;
-add			r0, r0, c5;
-//
-//    // Now we need dist mod'd into range [-Pi..Pi]
-//    dist *= rcp(kTwoPi);
-rcp         r4, c12.wwww;
-add			r0, r0, c12.zzzz;
-mul         r0, r0, r4;
-//    dist = frac(dist);
-expp     r1.y, r0.xxxx
-mov      r1.x, r1.yyyy
-expp     r1.y, r0.zzzz
-mov      r1.z, r1.yyyy
-expp     r1.y, r0.wwww
-mov      r1.w, r1.yyyy
-expp     r1.y, r0.yyyy
-//    dist *= kTwoPi;
-mul         r0, r1, c12.wwww;
-//    dist += -kPi;
-sub         r0, r0, c12.zzzz;
-
-//
-//    sincos(dist, sinDist, cosDist);
-// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-mul         r1, r0, r0; // r0^2
-mul         r2, r1, r0; // r0^3 - probably stall
-mul         r3, r1, r1; // r0^4
-mul         r4, r1, r2; // r0^5
-mul         r5, r2, r3; // r0^7
-
-mul         r1, r1, c11.yyyy;       // r1 = r0^2 * vCos.y
-mad         r2, r2, c10.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
-add         r1, r1, c11.xxxx;       // r1 = 1 + r0^2 * vCos.y
-mad         r2, r4, c10.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-mad         r1, r3, c11.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-
-// r0^7 & r0^6 terms
-mul         r4, r4, r0; // r0^6
-mad         r2, r5, c10.wwww, r2;
-mad         r1, r4, c11.wwww, r1;
-
-// Calc our depth based filtering here into r4 (because we don't use it again
-// after here, and we need our filtering shortly).
-sub			r4, c22, r6.zzzz;
-mul			r4, r4, c23;
-add			r4, r4, c24;
-// Clamp .xyz to range [0..1]
-min			r4.xyz, r4, c13.zzzz;
-max			r4.xyz, r4, c13.xxxx;
-//mov r4.xyz, c13.xxx; // HACKTEST
-
-// Calc our filter (see above).
-mul			r11, v5.wwww, c21;
-max			r11, r11, c13.xxxx;
-min			r11, r11, c13.zzzz;
-
-//mov    r2, r1;
-// r2 == sinDist
-// r1 == cosDist
-//    sinDist *= filter;
-mul         r2, r2, r11;
-//    sinDist *= kAmplitude.xyzw
-mul         r2, r2, c6;
-//    height = dp4(sinDist, kOne);
-//    accumPos.z += height; (but accumPos.z is currently 0).
-dp4         r8.x, r2, c13.zzzz;
-mul			r8.y, r8.x, r4.z;
-add			r8.z, r8.y, c22.w;
-max			r6.z, r6.z, r8.z;
-// r8.x == wave height relative to 0
-// r8.y == dampened wave relative to 0
-// r8.z == dampened wave height in world space
-// r6.z == wave height clamped to never go beneath ground level
-//
-//    cosDist *= filter;
-mul         r1, r1, r11;
-
-// Pos = (in.x + S, in.y + R, r6.z)
-// S = sum(k Dir.x A cos())
-// R = sum(k Dir.y A cos())
-// c30 = k Dir.x A
-// c31 = k Dir.y A
-//    S = sum(cosDist * c30);
-dp4         r7.x, r1, c30;
-//    R = sum(cosDist * c31);
-dp4			r7.y, r1, c31;
-
-add			r6.xy, r6.xy, r7.xy;
-
-// Bias our vert up a bit to compensate for precision errors.
-// In particular, our filter coefficients are coming in as
-// interpolated bytes, so there's bound to be a lot of slop
-// from that. We've got a free slot in c25.x, so we'll use that.
-// A better implementation would be to bias and scale our screen
-// vert, effectively pushing the vert toward the camera without
-// actually moving it, but this is easier and might work just
-// as well.
-add			r6.z, r6.z, c25.x;
-
-//
-// // Transform position to screen
-//
-//
-//m4x3	r6, v0, c18; // HACKAGE
-//mov		r6.w, c13.z; // HACKAGE
-//m4x4     oPos, r6, c0; // ADDFOG
-m4x4		r9, r6, c0;
-add			r10.x, r9.w, c29.x;
-mul			oFog, r10.x, c29.y;
-mov			oPos, r9;
-
-// Output color is vertex green
-// Output alpha is vertex red (vtx alpha is used for wave filtering)
-// Whole thing modulated by material color/opacity.
-mul		oD0, v5.yyyx, c26;
-
-// Usual texture transform
-mov		r11.zw, c13.zzzz;
-dp4		r11.x, v7, c14;
-dp4		r11.y, v7, c15;
-mov		oT0, r11;
-
-dp4		r11.x, v7, c16;
-dp4		r11.y, v7, c17;
-mov		oT1, r11;
+
+vs.1.1
+
+dcl_position v0
+dcl_color v5
+dcl_texcoord0 v7
+
+// Store our input position in world space in r6
+m4x3        r6, v0, c18; // v0 * l2w
+// Fill out our w (m4x3 doesn't touch w).
+mov         r6.w, c13.z;
+
+//
+
+// Input diffuse v5 color is:
+// v5.r = overall transparency
+// v5.g = illumination
+// v5.b = overall wave scaling
+//
+// v5.a is:
+// v5.w = 1/(2.f * edge length)
+// So per wave filtering is:
+// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
+// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
+// and is completely filtered at 2 times sampling frequency.
+
+// We'd like to make this autocalculated based on the depth of the water.
+// The frequency filtering (v5.w) still needs to be calculated offline, because
+// it's dependent on edge length, but the first 3 filterings can be calculated
+// based on this vertex.
+// Basically, we want the transparency, reflection strength, and wave scaling
+// to go to zero as the water depth goes to zero. Linear falloffs are as good
+// a place to start as any.
+//
+// depth = waterlevel - r6.z        => depth in feet (may be negative)
+// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
+// atten = minAtten + depthNorm * (maxAtten - minAtten);
+// These are all vector ops.
+// This provides separate ramp ups for each of the channels (they reach full unfiltered
+// values at different depths), but doesn't provide separate controls for where they
+// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
+// in feet (depth) is probably the most intuitive. So that changes the first calculation
+// of depth to:
+// depth = waterlevel - r6.z + offset
+//      = (waterlevel + offset) - r6.z
+// And since we only need offsets for 3 channels, we can make the waterlevel constant
+// waterlevel[chan] = watertableheight + offset[chan],
+// with waterlevel.w = watertableheight.
+//
+// So:
+//  c22 = waterlevel + offset
+//  c23 = (maxAtten - minAtten) / depthFalloff
+//  c24 = minAtten.
+// And in particular:
+//  c22.w = waterlevel
+//  c23.w = 1.f;
+//  c24.w = 0;
+// So r4.w is the depth of this vertex in feet.
+
+// Dot our position with our direction vectors.
+mul     r0, c7, r6.xxxx;
+mad     r0, c8, r6.yyyy, r0;
+
+//
+//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
+mul         r0, r0, c4;
+add         r0, r0, c5;
+//
+//    // Now we need dist mod'd into range [-Pi..Pi]
+//    dist *= rcp(kTwoPi);
+rcp         r4, c12.wwww;
+add         r0, r0, c12.zzzz;
+mul         r0, r0, r4;
+//    dist = frac(dist);
+expp     r1.y, r0.xxxx
+mov      r1.x, r1.yyyy
+expp     r1.y, r0.zzzz
+mov      r1.z, r1.yyyy
+expp     r1.y, r0.wwww
+mov      r1.w, r1.yyyy
+expp     r1.y, r0.yyyy
+//    dist *= kTwoPi;
+mul         r0, r1, c12.wwww;
+//    dist += -kPi;
+sub         r0, r0, c12.zzzz;
+
+//
+//    sincos(dist, sinDist, cosDist);
+// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+mul         r1, r0, r0; // r0^2
+mul         r2, r1, r0; // r0^3 - probably stall
+mul         r3, r1, r1; // r0^4
+mul         r4, r1, r2; // r0^5
+mul         r5, r2, r3; // r0^7
+
+mul         r1, r1, c11.yyyy;       // r1 = r0^2 * vCos.y
+mad         r2, r2, c10.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
+add         r1, r1, c11.xxxx;       // r1 = 1 + r0^2 * vCos.y
+mad         r2, r4, c10.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+mad         r1, r3, c11.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+
+// r0^7 & r0^6 terms
+mul         r4, r4, r0; // r0^6
+mad         r2, r5, c10.wwww, r2;
+mad         r1, r4, c11.wwww, r1;
+
+// Calc our depth based filtering here into r4 (because we don't use it again
+// after here, and we need our filtering shortly).
+sub         r4, c22, r6.zzzz;
+mul         r4, r4, c23;
+add         r4, r4, c24;
+// Clamp .xyz to range [0..1]
+min         r4.xyz, r4, c13.zzzz;
+max         r4.xyz, r4, c13.xxxx;
+//mov r4.xyz, c13.xxx; // HACKTEST
+
+// Calc our filter (see above).
+mul         r11, v5.wwww, c21;
+max         r11, r11, c13.xxxx;
+min         r11, r11, c13.zzzz;
+
+//mov    r2, r1;
+// r2 == sinDist
+// r1 == cosDist
+//    sinDist *= filter;
+mul         r2, r2, r11;
+//    sinDist *= kAmplitude.xyzw
+mul         r2, r2, c6;
+//    height = dp4(sinDist, kOne);
+//    accumPos.z += height; (but accumPos.z is currently 0).
+dp4         r8.x, r2, c13.zzzz;
+mul         r8.y, r8.x, r4.z;
+add         r8.z, r8.y, c22.w;
+max         r6.z, r6.z, r8.z;
+// r8.x == wave height relative to 0
+// r8.y == dampened wave relative to 0
+// r8.z == dampened wave height in world space
+// r6.z == wave height clamped to never go beneath ground level
+//
+//    cosDist *= filter;
+mul         r1, r1, r11;
+
+// Pos = (in.x + S, in.y + R, r6.z)
+// S = sum(k Dir.x A cos())
+// R = sum(k Dir.y A cos())
+// c30 = k Dir.x A
+// c31 = k Dir.y A
+//    S = sum(cosDist * c30);
+dp4         r7.x, r1, c30;
+//    R = sum(cosDist * c31);
+dp4         r7.y, r1, c31;
+
+add         r6.xy, r6.xy, r7.xy;
+
+// Bias our vert up a bit to compensate for precision errors.
+// In particular, our filter coefficients are coming in as
+// interpolated bytes, so there's bound to be a lot of slop
+// from that. We've got a free slot in c25.x, so we'll use that.
+// A better implementation would be to bias and scale our screen
+// vert, effectively pushing the vert toward the camera without
+// actually moving it, but this is easier and might work just
+// as well.
+add         r6.z, r6.z, c25.x;
+
+//
+// // Transform position to screen
+//
+//
+//m4x3  r6, v0, c18; // HACKAGE
+//mov       r6.w, c13.z; // HACKAGE
+//m4x4     oPos, r6, c0; // ADDFOG
+m4x4        r9, r6, c0;
+add         r10.x, r9.w, c29.x;
+mul         oFog, r10.x, c29.y;
+mov         oPos, r9;
+
+// Output color is vertex green
+// Output alpha is vertex red (vtx alpha is used for wave filtering)
+// Whole thing modulated by material color/opacity.
+mul     oD0, v5.yyyx, c26;
+
+// Usual texture transform
+mov     r11.zw, c13.zzzz;
+dp4     r11.x, v7, c14;
+dp4     r11.y, v7, c15;
+mov     oT0, r11;
+
+dp4     r11.x, v7, c16;
+dp4     r11.y, v7, c17;
+mov     oT1, r11;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveDec2Lay12.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveDec2Lay12.inl
@ -1,210 +1,210 @@
-vs.1.1
-
-dcl_position v0
-dcl_color v5
-dcl_texcoord0 v7
-dcl_texcoord1 v8
-
-// Store our input position in world space in r6
-m4x3		r6, v0, c18; // v0 * l2w
-// Fill out our w (m4x3 doesn't touch w).
-mov			r6.w, c13.z;
-
-//
-
-// Input diffuse v5 color is:
-// v5.r = overall transparency
-// v5.g = illumination
-// v5.b = overall wave scaling
-//
-// v5.a is:
-// v5.w = 1/(2.f * edge length)
-// So per wave filtering is:
-// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
-// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
-// and is completely filtered at 2 times sampling frequency.
-
-// We'd like to make this autocalculated based on the depth of the water.
-// The frequency filtering (v5.w) still needs to be calculated offline, because
-// it's dependent on edge length, but the first 3 filterings can be calculated
-// based on this vertex.
-// Basically, we want the transparency, reflection strength, and wave scaling
-// to go to zero as the water depth goes to zero. Linear falloffs are as good
-// a place to start as any.
-//
-// depth = waterlevel - r6.z		=> depth in feet (may be negative)
-// depthNorm = depth / depthFalloff	=> zero at watertable, one at depthFalloff beneath
-// atten = minAtten + depthNorm * (maxAtten - minAtten);
-// These are all vector ops.
-// This provides separate ramp ups for each of the channels (they reach full unfiltered
-// values at different depths), but doesn't provide separate controls for where they
-// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
-// in feet (depth) is probably the most intuitive. So that changes the first calculation
-// of depth to:
-// depth = waterlevel - r6.z + offset
-//		= (waterlevel + offset) - r6.z
-// And since we only need offsets for 3 channels, we can make the waterlevel constant
-// waterlevel[chan] = watertableheight + offset[chan],
-// with waterlevel.w = watertableheight.
-//
-// So:
-//	c22 = waterlevel + offset
-//	c23 = (maxAtten - minAtten) / depthFalloff
-//	c24 = minAtten.
-// And in particular:
-//	c22.w = waterlevel
-//	c23.w = 1.f;
-//	c24.w = 0;
-// So r4.w is the depth of this vertex in feet.
-
-// Dot our position with our direction vectors.
-mul		r0, c7, r6.xxxx;
-mad		r0, c8, r6.yyyy, r0;
-
-//
-//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
-mul         r0, r0, c4;
-add			r0, r0, c5;
-//
-//    // Now we need dist mod'd into range [-Pi..Pi]
-//    dist *= rcp(kTwoPi);
-rcp         r4, c12.wwww;
-add			r0, r0, c12.zzzz;
-mul         r0, r0, r4;
-//    dist = frac(dist);
-expp     r1.y, r0.xxxx
-mov      r1.x, r1.yyyy
-expp     r1.y, r0.zzzz
-mov      r1.z, r1.yyyy
-expp     r1.y, r0.wwww
-mov      r1.w, r1.yyyy
-expp     r1.y, r0.yyyy
-//    dist *= kTwoPi;
-mul         r0, r1, c12.wwww;
-//    dist += -kPi;
-sub         r0, r0, c12.zzzz;
-
-//
-//    sincos(dist, sinDist, cosDist);
-// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-mul         r1, r0, r0; // r0^2
-mul         r2, r1, r0; // r0^3 - probably stall
-mul         r3, r1, r1; // r0^4
-mul         r4, r1, r2; // r0^5
-mul         r5, r2, r3; // r0^7
-
-mul         r1, r1, c11.yyyy;       // r1 = r0^2 * vCos.y
-mad         r2, r2, c10.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
-add         r1, r1, c11.xxxx;       // r1 = 1 + r0^2 * vCos.y
-mad         r2, r4, c10.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-mad         r1, r3, c11.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-
-// r0^7 & r0^6 terms
-mul         r4, r4, r0; // r0^6
-mad         r2, r5, c10.wwww, r2;
-mad         r1, r4, c11.wwww, r1;
-
-// Calc our depth based filtering here into r4 (because we don't use it again
-// after here, and we need our filtering shortly).
-sub			r4, c22, r6.zzzz;
-mul			r4, r4, c23;
-add			r4, r4, c24;
-// Clamp .xyz to range [0..1]
-min			r4.xyz, r4, c13.zzzz;
-max			r4.xyz, r4, c13.xxxx;
-//mov r4.xyz, c13.xxx; // HACKTEST
-
-// Calc our filter (see above).
-mul			r11, v5.wwww, c21;
-max			r11, r11, c13.xxxx;
-min			r11, r11, c13.zzzz;
-
-//mov    r2, r1;
-// r2 == sinDist
-// r1 == cosDist
-//    sinDist *= filter;
-mul         r2, r2, r11;
-//    sinDist *= kAmplitude.xyzw
-mul         r2, r2, c6;
-//    height = dp4(sinDist, kOne);
-//    accumPos.z += height; (but accumPos.z is currently 0).
-dp4         r8.x, r2, c13.zzzz;
-mul			r8.y, r8.x, r4.z;
-add			r8.z, r8.y, c22.w;
-max			r6.z, r6.z, r8.z;
-// r8.x == wave height relative to 0
-// r8.y == dampened wave relative to 0
-// r8.z == dampened wave height in world space
-// r6.z == wave height clamped to never go beneath ground level
-//
-//    cosDist *= kFreq.xyzw;
-mul         r1, r1, c4;
-//    cosDist *= kAmplitude.xyzw; // Combine?
-mul         r1, r1, c6;
-//    cosDist *= filter;
-mul         r1, r1, r11;
-//
-// accumCos = (0, 0, 0, 0);
-mov         r7, c13.xxxx;
-//    temp = dp4( cosDist, toCenter_X );
-//    accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
-dp4         r7.x, r1, -c7
-//
-//    temp = dp4( cosDist, toCenter_Y );
-//    accumCos.y += temp.xxxx;
-dp4         r7.y, r1, -c8
-//
-// }
-//
-// accumBin = (1, 0, -accumCos.x);
-// accumTan = (0, 1, -accumCos.y);
-// accumNorm = (accumCos.x, accumCos.y, 1);
-mov         r11, c13.xxzx;
-add         r11, r11, r7;
-dp3         r10.x, r11, r11;
-rsq         r10.x, r10.x;
-mul         r11, r11, r10.xxxx;
-
-//
-// Add in our scrunch (offset in X/Y plane).
-// Scale down our scrunch amount by the wave scaling
-mul			r10.x, c9.y, r4.z;
-mad         r6.xy, r11.xy, r10.xx, r6.xy;
-
-// Bias our vert up a bit to compensate for precision errors.
-// In particular, our filter coefficients are coming in as
-// interpolated bytes, so there's bound to be a lot of slop
-// from that. We've got a free slot in c25.x, so we'll use that.
-// A better implementation would be to bias and scale our screen
-// vert, effectively pushing the vert toward the camera without
-// actually moving it, but this is easier and might work just
-// as well.
-add			r6.z, r6.z, c25.x;
-
-//
-// // Transform position to screen
-//
-//
-//m4x3	r6, v0, c18; // HACKAGE
-//mov		r6.w, c13.z; // HACKAGE
-//m4x4     oPos, r6, c0; // ADDFOG
-m4x4		r9, r6, c0;
-add			r10.x, r9.w, c29.x;
-mul			oFog, r10.x, c29.y;
-mov			oPos, r9;
-
-// Output color is vertex green
-// Output alpha is vertex red (vtx alpha is used for wave filtering)
-// Whole thing modulated by material color/opacity.
-mul		oD0, v5.yyyx, c26;
-
-// Usual texture transform
-mov		r11.zw, c13.zzzz;
-dp4		r11.x, v7, c14;
-dp4		r11.y, v7, c15;
-mov		oT0, r11;
-
-dp4		r11.x, v8, c16;
-dp4		r11.y, v8, c17;
-mov		oT1, r11;
+vs.1.1
+
+dcl_position v0
+dcl_color v5
+dcl_texcoord0 v7
+dcl_texcoord1 v8
+
+// Store our input position in world space in r6
+m4x3        r6, v0, c18; // v0 * l2w
+// Fill out our w (m4x3 doesn't touch w).
+mov         r6.w, c13.z;
+
+//
+
+// Input diffuse v5 color is:
+// v5.r = overall transparency
+// v5.g = illumination
+// v5.b = overall wave scaling
+//
+// v5.a is:
+// v5.w = 1/(2.f * edge length)
+// So per wave filtering is:
+// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
+// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
+// and is completely filtered at 2 times sampling frequency.
+
+// We'd like to make this autocalculated based on the depth of the water.
+// The frequency filtering (v5.w) still needs to be calculated offline, because
+// it's dependent on edge length, but the first 3 filterings can be calculated
+// based on this vertex.
+// Basically, we want the transparency, reflection strength, and wave scaling
+// to go to zero as the water depth goes to zero. Linear falloffs are as good
+// a place to start as any.
+//
+// depth = waterlevel - r6.z        => depth in feet (may be negative)
+// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
+// atten = minAtten + depthNorm * (maxAtten - minAtten);
+// These are all vector ops.
+// This provides separate ramp ups for each of the channels (they reach full unfiltered
+// values at different depths), but doesn't provide separate controls for where they
+// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
+// in feet (depth) is probably the most intuitive. So that changes the first calculation
+// of depth to:
+// depth = waterlevel - r6.z + offset
+//      = (waterlevel + offset) - r6.z
+// And since we only need offsets for 3 channels, we can make the waterlevel constant
+// waterlevel[chan] = watertableheight + offset[chan],
+// with waterlevel.w = watertableheight.
+//
+// So:
+//  c22 = waterlevel + offset
+//  c23 = (maxAtten - minAtten) / depthFalloff
+//  c24 = minAtten.
+// And in particular:
+//  c22.w = waterlevel
+//  c23.w = 1.f;
+//  c24.w = 0;
+// So r4.w is the depth of this vertex in feet.
+
+// Dot our position with our direction vectors.
+mul     r0, c7, r6.xxxx;
+mad     r0, c8, r6.yyyy, r0;
+
+//
+//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
+mul         r0, r0, c4;
+add         r0, r0, c5;
+//
+//    // Now we need dist mod'd into range [-Pi..Pi]
+//    dist *= rcp(kTwoPi);
+rcp         r4, c12.wwww;
+add         r0, r0, c12.zzzz;
+mul         r0, r0, r4;
+//    dist = frac(dist);
+expp     r1.y, r0.xxxx
+mov      r1.x, r1.yyyy
+expp     r1.y, r0.zzzz
+mov      r1.z, r1.yyyy
+expp     r1.y, r0.wwww
+mov      r1.w, r1.yyyy
+expp     r1.y, r0.yyyy
+//    dist *= kTwoPi;
+mul         r0, r1, c12.wwww;
+//    dist += -kPi;
+sub         r0, r0, c12.zzzz;
+
+//
+//    sincos(dist, sinDist, cosDist);
+// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+mul         r1, r0, r0; // r0^2
+mul         r2, r1, r0; // r0^3 - probably stall
+mul         r3, r1, r1; // r0^4
+mul         r4, r1, r2; // r0^5
+mul         r5, r2, r3; // r0^7
+
+mul         r1, r1, c11.yyyy;       // r1 = r0^2 * vCos.y
+mad         r2, r2, c10.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
+add         r1, r1, c11.xxxx;       // r1 = 1 + r0^2 * vCos.y
+mad         r2, r4, c10.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+mad         r1, r3, c11.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+
+// r0^7 & r0^6 terms
+mul         r4, r4, r0; // r0^6
+mad         r2, r5, c10.wwww, r2;
+mad         r1, r4, c11.wwww, r1;
+
+// Calc our depth based filtering here into r4 (because we don't use it again
+// after here, and we need our filtering shortly).
+sub         r4, c22, r6.zzzz;
+mul         r4, r4, c23;
+add         r4, r4, c24;
+// Clamp .xyz to range [0..1]
+min         r4.xyz, r4, c13.zzzz;
+max         r4.xyz, r4, c13.xxxx;
+//mov r4.xyz, c13.xxx; // HACKTEST
+
+// Calc our filter (see above).
+mul         r11, v5.wwww, c21;
+max         r11, r11, c13.xxxx;
+min         r11, r11, c13.zzzz;
+
+//mov    r2, r1;
+// r2 == sinDist
+// r1 == cosDist
+//    sinDist *= filter;
+mul         r2, r2, r11;
+//    sinDist *= kAmplitude.xyzw
+mul         r2, r2, c6;
+//    height = dp4(sinDist, kOne);
+//    accumPos.z += height; (but accumPos.z is currently 0).
+dp4         r8.x, r2, c13.zzzz;
+mul         r8.y, r8.x, r4.z;
+add         r8.z, r8.y, c22.w;
+max         r6.z, r6.z, r8.z;
+// r8.x == wave height relative to 0
+// r8.y == dampened wave relative to 0
+// r8.z == dampened wave height in world space
+// r6.z == wave height clamped to never go beneath ground level
+//
+//    cosDist *= kFreq.xyzw;
+mul         r1, r1, c4;
+//    cosDist *= kAmplitude.xyzw; // Combine?
+mul         r1, r1, c6;
+//    cosDist *= filter;
+mul         r1, r1, r11;
+//
+// accumCos = (0, 0, 0, 0);
+mov         r7, c13.xxxx;
+//    temp = dp4( cosDist, toCenter_X );
+//    accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
+dp4         r7.x, r1, -c7
+//
+//    temp = dp4( cosDist, toCenter_Y );
+//    accumCos.y += temp.xxxx;
+dp4         r7.y, r1, -c8
+//
+// }
+//
+// accumBin = (1, 0, -accumCos.x);
+// accumTan = (0, 1, -accumCos.y);
+// accumNorm = (accumCos.x, accumCos.y, 1);
+mov         r11, c13.xxzx;
+add         r11, r11, r7;
+dp3         r10.x, r11, r11;
+rsq         r10.x, r10.x;
+mul         r11, r11, r10.xxxx;
+
+//
+// Add in our scrunch (offset in X/Y plane).
+// Scale down our scrunch amount by the wave scaling
+mul         r10.x, c9.y, r4.z;
+mad         r6.xy, r11.xy, r10.xx, r6.xy;
+
+// Bias our vert up a bit to compensate for precision errors.
+// In particular, our filter coefficients are coming in as
+// interpolated bytes, so there's bound to be a lot of slop
+// from that. We've got a free slot in c25.x, so we'll use that.
+// A better implementation would be to bias and scale our screen
+// vert, effectively pushing the vert toward the camera without
+// actually moving it, but this is easier and might work just
+// as well.
+add         r6.z, r6.z, c25.x;
+
+//
+// // Transform position to screen
+//
+//
+//m4x3  r6, v0, c18; // HACKAGE
+//mov       r6.w, c13.z; // HACKAGE
+//m4x4     oPos, r6, c0; // ADDFOG
+m4x4        r9, r6, c0;
+add         r10.x, r9.w, c29.x;
+mul         oFog, r10.x, c29.y;
+mov         oPos, r9;
+
+// Output color is vertex green
+// Output alpha is vertex red (vtx alpha is used for wave filtering)
+// Whole thing modulated by material color/opacity.
+mul     oD0, v5.yyyx, c26;
+
+// Usual texture transform
+mov     r11.zw, c13.zzzz;
+dp4     r11.x, v7, c14;
+dp4     r11.y, v7, c15;
+mov     oT0, r11;
+
+dp4     r11.x, v8, c16;
+dp4     r11.y, v8, c17;
+mov     oT1, r11;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveDec2Lay12_7.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveDec2Lay12_7.inl
@ -1,192 +1,192 @@
-
-vs.1.1
-
-dcl_position v0
-dcl_color v5
-dcl_texcoord0 v7
-dcl_texcoord1 v8
-
-// Store our input position in world space in r6
-m4x3		r6, v0, c18; // v0 * l2w
-// Fill out our w (m4x3 doesn't touch w).
-mov			r6.w, c13.z;
-
-//
-
-// Input diffuse v5 color is:
-// v5.r = overall transparency
-// v5.g = illumination
-// v5.b = overall wave scaling
-//
-// v5.a is:
-// v5.w = 1/(2.f * edge length)
-// So per wave filtering is:
-// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
-// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
-// and is completely filtered at 2 times sampling frequency.
-
-// We'd like to make this autocalculated based on the depth of the water.
-// The frequency filtering (v5.w) still needs to be calculated offline, because
-// it's dependent on edge length, but the first 3 filterings can be calculated
-// based on this vertex.
-// Basically, we want the transparency, reflection strength, and wave scaling
-// to go to zero as the water depth goes to zero. Linear falloffs are as good
-// a place to start as any.
-//
-// depth = waterlevel - r6.z		=> depth in feet (may be negative)
-// depthNorm = depth / depthFalloff	=> zero at watertable, one at depthFalloff beneath
-// atten = minAtten + depthNorm * (maxAtten - minAtten);
-// These are all vector ops.
-// This provides separate ramp ups for each of the channels (they reach full unfiltered
-// values at different depths), but doesn't provide separate controls for where they
-// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
-// in feet (depth) is probably the most intuitive. So that changes the first calculation
-// of depth to:
-// depth = waterlevel - r6.z + offset
-//		= (waterlevel + offset) - r6.z
-// And since we only need offsets for 3 channels, we can make the waterlevel constant
-// waterlevel[chan] = watertableheight + offset[chan],
-// with waterlevel.w = watertableheight.
-//
-// So:
-//	c22 = waterlevel + offset
-//	c23 = (maxAtten - minAtten) / depthFalloff
-//	c24 = minAtten.
-// And in particular:
-//	c22.w = waterlevel
-//	c23.w = 1.f;
-//	c24.w = 0;
-// So r4.w is the depth of this vertex in feet.
-
-// Dot our position with our direction vectors.
-mul		r0, c7, r6.xxxx;
-mad		r0, c8, r6.yyyy, r0;
-
-//
-//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
-mul         r0, r0, c4;
-add			r0, r0, c5;
-//
-//    // Now we need dist mod'd into range [-Pi..Pi]
-//    dist *= rcp(kTwoPi);
-rcp         r4, c12.wwww;
-add			r0, r0, c12.zzzz;
-mul         r0, r0, r4;
-//    dist = frac(dist);
-expp     r1.y, r0.xxxx
-mov      r1.x, r1.yyyy
-expp     r1.y, r0.zzzz
-mov      r1.z, r1.yyyy
-expp     r1.y, r0.wwww
-mov      r1.w, r1.yyyy
-expp     r1.y, r0.yyyy
-//    dist *= kTwoPi;
-mul         r0, r1, c12.wwww;
-//    dist += -kPi;
-sub         r0, r0, c12.zzzz;
-
-//
-//    sincos(dist, sinDist, cosDist);
-// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-mul         r1, r0, r0; // r0^2
-mul         r2, r1, r0; // r0^3 - probably stall
-mul         r3, r1, r1; // r0^4
-mul         r4, r1, r2; // r0^5
-mul         r5, r2, r3; // r0^7
-
-mul         r1, r1, c11.yyyy;       // r1 = r0^2 * vCos.y
-mad         r2, r2, c10.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
-add         r1, r1, c11.xxxx;       // r1 = 1 + r0^2 * vCos.y
-mad         r2, r4, c10.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-mad         r1, r3, c11.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-
-// r0^7 & r0^6 terms
-mul         r4, r4, r0; // r0^6
-mad         r2, r5, c10.wwww, r2;
-mad         r1, r4, c11.wwww, r1;
-
-// Calc our depth based filtering here into r4 (because we don't use it again
-// after here, and we need our filtering shortly).
-sub			r4, c22, r6.zzzz;
-mul			r4, r4, c23;
-add			r4, r4, c24;
-// Clamp .xyz to range [0..1]
-min			r4.xyz, r4, c13.zzzz;
-max			r4.xyz, r4, c13.xxxx;
-//mov r4.xyz, c13.xxx; // HACKTEST
-
-// Calc our filter (see above).
-mul			r11, v5.wwww, c21;
-max			r11, r11, c13.xxxx;
-min			r11, r11, c13.zzzz;
-
-//mov    r2, r1;
-// r2 == sinDist
-// r1 == cosDist
-//    sinDist *= filter;
-mul         r2, r2, r11;
-//    sinDist *= kAmplitude.xyzw
-mul         r2, r2, c6;
-//    height = dp4(sinDist, kOne);
-//    accumPos.z += height; (but accumPos.z is currently 0).
-dp4         r8.x, r2, c13.zzzz;
-mul			r8.y, r8.x, r4.z;
-add			r8.z, r8.y, c22.w;
-max			r6.z, r6.z, r8.z;
-// r8.x == wave height relative to 0
-// r8.y == dampened wave relative to 0
-// r8.z == dampened wave height in world space
-// r6.z == wave height clamped to never go beneath ground level
-//
-//    cosDist *= filter;
-mul         r1, r1, r11;
-
-// Pos = (in.x + S, in.y + R, r6.z)
-// S = sum(k Dir.x A cos())
-// R = sum(k Dir.y A cos())
-// c30 = k Dir.x A
-// c31 = k Dir.y A
-//    S = sum(cosDist * c30);
-dp4         r7.x, r1, c30;
-//    R = sum(cosDist * c31);
-dp4			r7.y, r1, c31;
-
-add			r6.xy, r6.xy, r7.xy;
-
-// Bias our vert up a bit to compensate for precision errors.
-// In particular, our filter coefficients are coming in as
-// interpolated bytes, so there's bound to be a lot of slop
-// from that. We've got a free slot in c25.x, so we'll use that.
-// A better implementation would be to bias and scale our screen
-// vert, effectively pushing the vert toward the camera without
-// actually moving it, but this is easier and might work just
-// as well.
-add			r6.z, r6.z, c25.x;
-
-//
-// // Transform position to screen
-//
-//
-//m4x3	r6, v0, c18; // HACKAGE
-//mov		r6.w, c13.z; // HACKAGE
-//m4x4     oPos, r6, c0; // ADDFOG
-m4x4		r9, r6, c0;
-add			r10.x, r9.w, c29.x;
-mul			oFog, r10.x, c29.y;
-mov			oPos, r9;
-
-// Output color is vertex green
-// Output alpha is vertex red (vtx alpha is used for wave filtering)
-// Whole thing modulated by material color/opacity.
-mul		oD0, v5.yyyx, c26;
-
-// Usual texture transform
-mov		r11.zw, c13.zzzz;
-dp4		r11.x, v7, c14;
-dp4		r11.y, v7, c15;
-mov		oT0, r11;
-
-dp4		r11.x, v8, c16;
-dp4		r11.y, v8, c17;
-mov		oT1, r11;
+
+vs.1.1
+
+dcl_position v0
+dcl_color v5
+dcl_texcoord0 v7
+dcl_texcoord1 v8
+
+// Store our input position in world space in r6
+m4x3        r6, v0, c18; // v0 * l2w
+// Fill out our w (m4x3 doesn't touch w).
+mov         r6.w, c13.z;
+
+//
+
+// Input diffuse v5 color is:
+// v5.r = overall transparency
+// v5.g = illumination
+// v5.b = overall wave scaling
+//
+// v5.a is:
+// v5.w = 1/(2.f * edge length)
+// So per wave filtering is:
+// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
+// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
+// and is completely filtered at 2 times sampling frequency.
+
+// We'd like to make this autocalculated based on the depth of the water.
+// The frequency filtering (v5.w) still needs to be calculated offline, because
+// it's dependent on edge length, but the first 3 filterings can be calculated
+// based on this vertex.
+// Basically, we want the transparency, reflection strength, and wave scaling
+// to go to zero as the water depth goes to zero. Linear falloffs are as good
+// a place to start as any.
+//
+// depth = waterlevel - r6.z        => depth in feet (may be negative)
+// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
+// atten = minAtten + depthNorm * (maxAtten - minAtten);
+// These are all vector ops.
+// This provides separate ramp ups for each of the channels (they reach full unfiltered
+// values at different depths), but doesn't provide separate controls for where they
+// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
+// in feet (depth) is probably the most intuitive. So that changes the first calculation
+// of depth to:
+// depth = waterlevel - r6.z + offset
+//      = (waterlevel + offset) - r6.z
+// And since we only need offsets for 3 channels, we can make the waterlevel constant
+// waterlevel[chan] = watertableheight + offset[chan],
+// with waterlevel.w = watertableheight.
+//
+// So:
+//  c22 = waterlevel + offset
+//  c23 = (maxAtten - minAtten) / depthFalloff
+//  c24 = minAtten.
+// And in particular:
+//  c22.w = waterlevel
+//  c23.w = 1.f;
+//  c24.w = 0;
+// So r4.w is the depth of this vertex in feet.
+
+// Dot our position with our direction vectors.
+mul     r0, c7, r6.xxxx;
+mad     r0, c8, r6.yyyy, r0;
+
+//
+//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
+mul         r0, r0, c4;
+add         r0, r0, c5;
+//
+//    // Now we need dist mod'd into range [-Pi..Pi]
+//    dist *= rcp(kTwoPi);
+rcp         r4, c12.wwww;
+add         r0, r0, c12.zzzz;
+mul         r0, r0, r4;
+//    dist = frac(dist);
+expp     r1.y, r0.xxxx
+mov      r1.x, r1.yyyy
+expp     r1.y, r0.zzzz
+mov      r1.z, r1.yyyy
+expp     r1.y, r0.wwww
+mov      r1.w, r1.yyyy
+expp     r1.y, r0.yyyy
+//    dist *= kTwoPi;
+mul         r0, r1, c12.wwww;
+//    dist += -kPi;
+sub         r0, r0, c12.zzzz;
+
+//
+//    sincos(dist, sinDist, cosDist);
+// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+mul         r1, r0, r0; // r0^2
+mul         r2, r1, r0; // r0^3 - probably stall
+mul         r3, r1, r1; // r0^4
+mul         r4, r1, r2; // r0^5
+mul         r5, r2, r3; // r0^7
+
+mul         r1, r1, c11.yyyy;       // r1 = r0^2 * vCos.y
+mad         r2, r2, c10.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
+add         r1, r1, c11.xxxx;       // r1 = 1 + r0^2 * vCos.y
+mad         r2, r4, c10.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+mad         r1, r3, c11.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+
+// r0^7 & r0^6 terms
+mul         r4, r4, r0; // r0^6
+mad         r2, r5, c10.wwww, r2;
+mad         r1, r4, c11.wwww, r1;
+
+// Calc our depth based filtering here into r4 (because we don't use it again
+// after here, and we need our filtering shortly).
+sub         r4, c22, r6.zzzz;
+mul         r4, r4, c23;
+add         r4, r4, c24;
+// Clamp .xyz to range [0..1]
+min         r4.xyz, r4, c13.zzzz;
+max         r4.xyz, r4, c13.xxxx;
+//mov r4.xyz, c13.xxx; // HACKTEST
+
+// Calc our filter (see above).
+mul         r11, v5.wwww, c21;
+max         r11, r11, c13.xxxx;
+min         r11, r11, c13.zzzz;
+
+//mov    r2, r1;
+// r2 == sinDist
+// r1 == cosDist
+//    sinDist *= filter;
+mul         r2, r2, r11;
+//    sinDist *= kAmplitude.xyzw
+mul         r2, r2, c6;
+//    height = dp4(sinDist, kOne);
+//    accumPos.z += height; (but accumPos.z is currently 0).
+dp4         r8.x, r2, c13.zzzz;
+mul         r8.y, r8.x, r4.z;
+add         r8.z, r8.y, c22.w;
+max         r6.z, r6.z, r8.z;
+// r8.x == wave height relative to 0
+// r8.y == dampened wave relative to 0
+// r8.z == dampened wave height in world space
+// r6.z == wave height clamped to never go beneath ground level
+//
+//    cosDist *= filter;
+mul         r1, r1, r11;
+
+// Pos = (in.x + S, in.y + R, r6.z)
+// S = sum(k Dir.x A cos())
+// R = sum(k Dir.y A cos())
+// c30 = k Dir.x A
+// c31 = k Dir.y A
+//    S = sum(cosDist * c30);
+dp4         r7.x, r1, c30;
+//    R = sum(cosDist * c31);
+dp4         r7.y, r1, c31;
+
+add         r6.xy, r6.xy, r7.xy;
+
+// Bias our vert up a bit to compensate for precision errors.
+// In particular, our filter coefficients are coming in as
+// interpolated bytes, so there's bound to be a lot of slop
+// from that. We've got a free slot in c25.x, so we'll use that.
+// A better implementation would be to bias and scale our screen
+// vert, effectively pushing the vert toward the camera without
+// actually moving it, but this is easier and might work just
+// as well.
+add         r6.z, r6.z, c25.x;
+
+//
+// // Transform position to screen
+//
+//
+//m4x3  r6, v0, c18; // HACKAGE
+//mov       r6.w, c13.z; // HACKAGE
+//m4x4     oPos, r6, c0; // ADDFOG
+m4x4        r9, r6, c0;
+add         r10.x, r9.w, c29.x;
+mul         oFog, r10.x, c29.y;
+mov         oPos, r9;
+
+// Output color is vertex green
+// Output alpha is vertex red (vtx alpha is used for wave filtering)
+// Whole thing modulated by material color/opacity.
+mul     oD0, v5.yyyx, c26;
+
+// Usual texture transform
+mov     r11.zw, c13.zzzz;
+dp4     r11.x, v7, c14;
+dp4     r11.y, v7, c15;
+mov     oT0, r11;
+
+dp4     r11.x, v8, c16;
+dp4     r11.y, v8, c17;
+mov     oT1, r11;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveDecEnv.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveDecEnv.inl
@ -1,298 +1,298 @@
-vs.1.1
-
-dcl_position v0
-dcl_color v5
-dcl_texcoord0 v7
-dcl_texcoord1 v8
-dcl_texcoord2 v9
-
-// Store our input position in world space in r6
-m4x3		r6, v0, c18; // v0 * l2w
-// Fill out our w (m4x3 doesn't touch w).
-mov			r6.w, c13.z;
-
-//
-
-// Input diffuse v5 color is:
-// v5.r = overall transparency
-// v5.g = illumination
-// v5.b = overall wave scaling
-//
-// v5.a is:
-// v5.w = 1/(2.f * edge length)
-// So per wave filtering is:
-// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
-// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
-// and is completely filtered at 2 times sampling frequency.
-
-// We'd like to make this autocalculated based on the depth of the water.
-// The frequency filtering (v5.w) still needs to be calculated offline, because
-// it's dependent on edge length, but the first 3 filterings can be calculated
-// based on this vertex.
-// Basically, we want the transparency, reflection strength, and wave scaling
-// to go to zero as the water depth goes to zero. Linear falloffs are as good
-// a place to start as any.
-//
-// depth = waterlevel - r6.z		=> depth in feet (may be negative)
-// depthNorm = depth / depthFalloff	=> zero at watertable, one at depthFalloff beneath
-// atten = minAtten + depthNorm * (maxAtten - minAtten);
-// These are all vector ops.
-// This provides separate ramp ups for each of the channels (they reach full unfiltered
-// values at different depths), but doesn't provide separate controls for where they
-// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
-// in feet (depth) is probably the most intuitive. So that changes the first calculation
-// of depth to:
-// depth = waterlevel - r6.z + offset
-//		= (waterlevel + offset) - r6.z
-// And since we only need offsets for 3 channels, we can make the waterlevel constant
-// waterlevel[chan] = watertableheight + offset[chan],
-// with waterlevel.w = watertableheight.
-//
-// So:
-//	c22 = waterlevel + offset
-//	c23 = (maxAtten - minAtten) / depthFalloff
-//	c24 = minAtten.
-// And in particular:
-//	c22.w = waterlevel
-//	c23.w = 1.f;
-//	c24.w = 0;
-// So r4.w is the depth of this vertex in feet.
-
-// Dot our position with our direction vectors.
-mul		r0, c7, r6.xxxx;
-mad		r0, c8, r6.yyyy, r0;
-
-//
-//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
-mul         r0, r0, c4;
-add			r0, r0, c5;
-//
-//    // Now we need dist mod'd into range [-Pi..Pi]
-//    dist *= rcp(kTwoPi);
-rcp         r4, c12.wwww;
-add			r0, r0, c12.zzzz;
-mul         r0, r0, r4;
-//    dist = frac(dist);
-expp     r1.y, r0.xxxx
-mov      r1.x, r1.yyyy
-expp     r1.y, r0.zzzz
-mov      r1.z, r1.yyyy
-expp     r1.y, r0.wwww
-mov      r1.w, r1.yyyy
-expp     r1.y, r0.yyyy
-//    dist *= kTwoPi;
-mul         r0, r1, c12.wwww;
-//    dist += -kPi;
-sub         r0, r0, c12.zzzz;
-
-//
-//    sincos(dist, sinDist, cosDist);
-// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-mul         r1, r0, r0; // r0^2
-mul         r2, r1, r0; // r0^3 - probably stall
-mul         r3, r1, r1; // r0^4
-mul         r4, r1, r2; // r0^5
-mul         r5, r2, r3; // r0^7
-
-mul         r1, r1, c11.yyyy;       // r1 = r0^2 * vCos.y
-mad         r2, r2, c10.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
-add         r1, r1, c11.xxxx;       // r1 = 1 + r0^2 * vCos.y
-mad         r2, r4, c10.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-mad         r1, r3, c11.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-
-// r0^7 & r0^6 terms
-mul         r4, r4, r0; // r0^6
-mad         r2, r5, c10.wwww, r2;
-mad         r1, r4, c11.wwww, r1;
-
-// Calc our depth based filtering here into r4 (because we don't use it again
-// after here, and we need our filtering shortly).
-sub			r4, c22, r6.zzzz;
-mul			r4, r4, c23;
-add			r4, r4, c24;
-// Clamp .xyz to range [0..1]
-min			r4.xyz, r4, c13.zzzz;
-max			r4.xyz, r4, c13.xxxx;
-//mov r4.xyz, c13.xxx; // HACKTEST
-
-// Calc our filter (see above).
-mul			r11, v5.wwww, c21;
-max			r11, r11, c13.xxxx;
-min			r11, r11, c13.zzzz;
-
-//mov    r2, r1;
-// r2 == sinDist
-// r1 == cosDist
-//    sinDist *= filter;
-mul         r2, r2, r11;
-//    sinDist *= kAmplitude.xyzw
-mul         r2, r2, c6;
-//    height = dp4(sinDist, kOne);
-//    accumPos.z += height; (but accumPos.z is currently 0).
-dp4         r8.x, r2, c13.zzzz;
-mul			r8.y, r8.x, r4.z;
-add			r8.z, r8.y, c22.w;
-max			r6.z, r6.z, r8.z;
-// r8.x == wave height relative to 0
-// r8.y == dampened wave relative to 0
-// r8.z == dampened wave height in world space
-// r6.z == wave height clamped to never go beneath ground level
-//
-//    cosDist *= kFreq.xyzw;
-mul         r1, r1, c4;
-//    cosDist *= kAmplitude.xyzw; // Combine?
-mul         r1, r1, c6;
-//    cosDist *= filter;
-mul         r1, r1, r11;
-//
-// accumCos = (0, 0, 0, 0);
-mov         r7, c13.xxxz;
-//    temp = dp4( cosDist, toCenter_X );
-//    accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
-dp4         r7.x, r1, -c7
-//
-//    temp = dp4( cosDist, toCenter_Y );
-//    accumCos.y += temp.xxxx;
-dp4         r7.y, r1, -c8
-//
-// }
-//
-// accumBin = (1, 0, -accumCos.x);
-// accumTan = (0, 1, -accumCos.y);
-// accumNorm = (accumCos.x, accumCos.y, 1);
-mov         r11, c13.xxzx;
-add         r11, r11, r7.xyzz;
-dp3         r10.x, r11, r11;
-rsq         r10.x, r10.x;
-mul         r11, r11, r10.xxxx;
-
-//
-// Add in our scrunch (offset in X/Y plane).
-// Scale down our scrunch amount by the wave scaling
-mul			r10.x, c9.y, r4.z;
-mad         r6.xy, r11.xy, r10.xx, r6.xy;
-
-// Bias our vert up a bit to compensate for precision errors.
-// In particular, our filter coefficients are coming in as
-// interpolated bytes, so there's bound to be a lot of slop
-// from that. We've got a free slot in c25.x, so we'll use that.
-// A better implementation would be to bias and scale our screen
-// vert, effectively pushing the vert toward the camera without
-// actually moving it, but this is easier and might work just
-// as well.
-add			r6.z, r6.z, c25.x;
-
-//
-// // Transform position to screen
-//
-//
-//m4x4     oPos, r6, c0; // ADDFOG
-m4x4		r9, r6, c0;
-add			r10.x, r9.w, c29.x;
-mul			oFog, r10.x, c29.y;
-//mov			oFog.x, c13.y;
-mov			oPos, r9;
-
-// Calculate our normal scrunch and apply to our cosines.
-mul			r2.x, r6.z, c9.x;
-add			r2.x, r2.x, c13.z;
-mul			r2.x, r2.x, r4.z;
-mul			r7.xy, r7.xy, r2.xx;
-
-// Now onto texture coordinate generation.
-//
-// First is the usual texture transform
-mov		r11.zw, c13.zzzz;
-dp4		r11.x, v7, c14;
-dp4		r11.y, v7, c15;
-mov		oT0, r11;
-
-// Calculate our basis vectors as input into our tex3x3vspec
-// This would be like:
-//add			r1, c13.zxxx, r7.zzxz;
-//add			r2, c13.xzxx, r7.zzyz;
-//sub			r3, c13.xxzz, r7.xyzz;
-// BUT =>
-// Now r1-r3 are surface2world, but we still need to fold
-// in texture2surface. That's imbedded in our uv's v8,v9, plus
-// the normal we just computed into r11.
-// So the full matrix multiply surface2world * texture2surface would be:
-//	| r1.v8		r1.v9		r1.(0,0,1) |
-//	| r2.v8		r2.v9		r2.(0,0,1) |
-//	| r3.v8		r3.v9		r3.(0,0,1) |
-// But we notice that
-//	r1 = (1, 0, r7.x)
-//	r2 = (0, 1, r7.y)
-//	r3 = (-r7.x, -r7.y, 1)
-// and also:
-//	r7.z == v8.z == v9.z == 0
-// and r7.w == 1.0
-//
-// Considering the zeros, and doing the matrix multiply by hand, we get
-// the final matrix of
-//	|	v8.x		v9.x		r7.x	|
-//	|	v8.y		v9.y		r7.y	|
-//	|	-dp3(r7,v8)	-dp3(r7,v9)	1		|
-// So we wind up not needing r1-r3 at all
-add			r1, v8.xzzz, r7.zzxw;
-mov			r1.y, v9.x;
-
-add			r2, v8.yzzz, r7.zzxw;
-mov			r2.y, v9.y;
-
-dp3			r3.x, -r7, v8;
-dp3			r3.y, -r7, v9;
-mov			r3.zw, r7.ww;
-
-// Following section is debug only to skip the per-vert tangent space axes.
-//add r1, c13.zxxx, r7.zzxw;
-//add r2, c13.xzxx, r7.zzyw;
-//
-//mov r3.x, -r7.x;
-//mov r3.y, -r7.y;
-//mov r3.zw, c13.zz;
-
-// See vs_WaveFixedFin6.inl for derivation of the following
-sub			r0, r6, c27; // c27 is camera position.
-dp3			r10.x, r0, r0;
-rsq			r10.x, r10.x;
-mul			r0, r0, r10.xxxx;
-
-dp3			r10.x, r0, c28; // c28 is kEnvAdjust
-mad			r10.y, r10.x, r10.x, -c28.w;
-
-rsq			r9.x, r10.y;
-
-mad			r10.z, r10.y, r9.x, r10.x;
-
-mad			r0.xyz, r0, r10.zzz, -c28.xyz;
-
-mov			r1.w, -r0.x;
-mov			r2.w, -r0.y;
-mov			r3.w, -r0.z;
-
-// Now r1-r3 are texture2world, with the eye-ray vector in .w. We just
-// need to normalize them and bung them into output UV's 1-3.
-// Note we're accounting for our environment map being flipped from
-// D3D (and all rational thought) by putting r2 into UV3 and r3 into UV2.
-mov r10.w, c13.z;
-dp3			r10.x, r1, r1;
-rsq			r10.x, r10.x;
-mul			oT1, r1, r10.xxxw;
-
-dp3			r10.x, r3, r3;
-rsq			r10.x, r10.x;
-mul			oT2, r3, r10.xxxw;
-//mul			oT3, r3, r10.xxxw; // YZHACK
-
-dp3			r10.x, r2, r2;
-rsq			r10.x, r10.x;
-mul			oT3, r2, r10.xxxw;
-//mul			oT2, r2, r10.xxxw;
-
-// Output color is vertex green
-// Output alpha is vertex red (vtx alpha is used for wave filtering)
-// Whole thing modulated by material color/opacity.
-mul		oD0, v5.yyyx, c26;
-
+vs.1.1
+
+dcl_position v0
+dcl_color v5
+dcl_texcoord0 v7
+dcl_texcoord1 v8
+dcl_texcoord2 v9
+
+// Store our input position in world space in r6
+m4x3        r6, v0, c18; // v0 * l2w
+// Fill out our w (m4x3 doesn't touch w).
+mov         r6.w, c13.z;
+
+//
+
+// Input diffuse v5 color is:
+// v5.r = overall transparency
+// v5.g = illumination
+// v5.b = overall wave scaling
+//
+// v5.a is:
+// v5.w = 1/(2.f * edge length)
+// So per wave filtering is:
+// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
+// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
+// and is completely filtered at 2 times sampling frequency.
+
+// We'd like to make this autocalculated based on the depth of the water.
+// The frequency filtering (v5.w) still needs to be calculated offline, because
+// it's dependent on edge length, but the first 3 filterings can be calculated
+// based on this vertex.
+// Basically, we want the transparency, reflection strength, and wave scaling
+// to go to zero as the water depth goes to zero. Linear falloffs are as good
+// a place to start as any.
+//
+// depth = waterlevel - r6.z        => depth in feet (may be negative)
+// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
+// atten = minAtten + depthNorm * (maxAtten - minAtten);
+// These are all vector ops.
+// This provides separate ramp ups for each of the channels (they reach full unfiltered
+// values at different depths), but doesn't provide separate controls for where they
+// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
+// in feet (depth) is probably the most intuitive. So that changes the first calculation
+// of depth to:
+// depth = waterlevel - r6.z + offset
+//      = (waterlevel + offset) - r6.z
+// And since we only need offsets for 3 channels, we can make the waterlevel constant
+// waterlevel[chan] = watertableheight + offset[chan],
+// with waterlevel.w = watertableheight.
+//
+// So:
+//  c22 = waterlevel + offset
+//  c23 = (maxAtten - minAtten) / depthFalloff
+//  c24 = minAtten.
+// And in particular:
+//  c22.w = waterlevel
+//  c23.w = 1.f;
+//  c24.w = 0;
+// So r4.w is the depth of this vertex in feet.
+
+// Dot our position with our direction vectors.
+mul     r0, c7, r6.xxxx;
+mad     r0, c8, r6.yyyy, r0;
+
+//
+//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
+mul         r0, r0, c4;
+add         r0, r0, c5;
+//
+//    // Now we need dist mod'd into range [-Pi..Pi]
+//    dist *= rcp(kTwoPi);
+rcp         r4, c12.wwww;
+add         r0, r0, c12.zzzz;
+mul         r0, r0, r4;
+//    dist = frac(dist);
+expp     r1.y, r0.xxxx
+mov      r1.x, r1.yyyy
+expp     r1.y, r0.zzzz
+mov      r1.z, r1.yyyy
+expp     r1.y, r0.wwww
+mov      r1.w, r1.yyyy
+expp     r1.y, r0.yyyy
+//    dist *= kTwoPi;
+mul         r0, r1, c12.wwww;
+//    dist += -kPi;
+sub         r0, r0, c12.zzzz;
+
+//
+//    sincos(dist, sinDist, cosDist);
+// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+mul         r1, r0, r0; // r0^2
+mul         r2, r1, r0; // r0^3 - probably stall
+mul         r3, r1, r1; // r0^4
+mul         r4, r1, r2; // r0^5
+mul         r5, r2, r3; // r0^7
+
+mul         r1, r1, c11.yyyy;       // r1 = r0^2 * vCos.y
+mad         r2, r2, c10.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
+add         r1, r1, c11.xxxx;       // r1 = 1 + r0^2 * vCos.y
+mad         r2, r4, c10.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+mad         r1, r3, c11.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+
+// r0^7 & r0^6 terms
+mul         r4, r4, r0; // r0^6
+mad         r2, r5, c10.wwww, r2;
+mad         r1, r4, c11.wwww, r1;
+
+// Calc our depth based filtering here into r4 (because we don't use it again
+// after here, and we need our filtering shortly).
+sub         r4, c22, r6.zzzz;
+mul         r4, r4, c23;
+add         r4, r4, c24;
+// Clamp .xyz to range [0..1]
+min         r4.xyz, r4, c13.zzzz;
+max         r4.xyz, r4, c13.xxxx;
+//mov r4.xyz, c13.xxx; // HACKTEST
+
+// Calc our filter (see above).
+mul         r11, v5.wwww, c21;
+max         r11, r11, c13.xxxx;
+min         r11, r11, c13.zzzz;
+
+//mov    r2, r1;
+// r2 == sinDist
+// r1 == cosDist
+//    sinDist *= filter;
+mul         r2, r2, r11;
+//    sinDist *= kAmplitude.xyzw
+mul         r2, r2, c6;
+//    height = dp4(sinDist, kOne);
+//    accumPos.z += height; (but accumPos.z is currently 0).
+dp4         r8.x, r2, c13.zzzz;
+mul         r8.y, r8.x, r4.z;
+add         r8.z, r8.y, c22.w;
+max         r6.z, r6.z, r8.z;
+// r8.x == wave height relative to 0
+// r8.y == dampened wave relative to 0
+// r8.z == dampened wave height in world space
+// r6.z == wave height clamped to never go beneath ground level
+//
+//    cosDist *= kFreq.xyzw;
+mul         r1, r1, c4;
+//    cosDist *= kAmplitude.xyzw; // Combine?
+mul         r1, r1, c6;
+//    cosDist *= filter;
+mul         r1, r1, r11;
+//
+// accumCos = (0, 0, 0, 0);
+mov         r7, c13.xxxz;
+//    temp = dp4( cosDist, toCenter_X );
+//    accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
+dp4         r7.x, r1, -c7
+//
+//    temp = dp4( cosDist, toCenter_Y );
+//    accumCos.y += temp.xxxx;
+dp4         r7.y, r1, -c8
+//
+// }
+//
+// accumBin = (1, 0, -accumCos.x);
+// accumTan = (0, 1, -accumCos.y);
+// accumNorm = (accumCos.x, accumCos.y, 1);
+mov         r11, c13.xxzx;
+add         r11, r11, r7.xyzz;
+dp3         r10.x, r11, r11;
+rsq         r10.x, r10.x;
+mul         r11, r11, r10.xxxx;
+
+//
+// Add in our scrunch (offset in X/Y plane).
+// Scale down our scrunch amount by the wave scaling
+mul         r10.x, c9.y, r4.z;
+mad         r6.xy, r11.xy, r10.xx, r6.xy;
+
+// Bias our vert up a bit to compensate for precision errors.
+// In particular, our filter coefficients are coming in as
+// interpolated bytes, so there's bound to be a lot of slop
+// from that. We've got a free slot in c25.x, so we'll use that.
+// A better implementation would be to bias and scale our screen
+// vert, effectively pushing the vert toward the camera without
+// actually moving it, but this is easier and might work just
+// as well.
+add         r6.z, r6.z, c25.x;
+
+//
+// // Transform position to screen
+//
+//
+//m4x4     oPos, r6, c0; // ADDFOG
+m4x4        r9, r6, c0;
+add         r10.x, r9.w, c29.x;
+mul         oFog, r10.x, c29.y;
+//mov           oFog.x, c13.y;
+mov         oPos, r9;
+
+// Calculate our normal scrunch and apply to our cosines.
+mul         r2.x, r6.z, c9.x;
+add         r2.x, r2.x, c13.z;
+mul         r2.x, r2.x, r4.z;
+mul         r7.xy, r7.xy, r2.xx;
+
+// Now onto texture coordinate generation.
+//
+// First is the usual texture transform
+mov     r11.zw, c13.zzzz;
+dp4     r11.x, v7, c14;
+dp4     r11.y, v7, c15;
+mov     oT0, r11;
+
+// Calculate our basis vectors as input into our tex3x3vspec
+// This would be like:
+//add           r1, c13.zxxx, r7.zzxz;
+//add           r2, c13.xzxx, r7.zzyz;
+//sub           r3, c13.xxzz, r7.xyzz;
+// BUT =>
+// Now r1-r3 are surface2world, but we still need to fold
+// in texture2surface. That's imbedded in our uv's v8,v9, plus
+// the normal we just computed into r11.
+// So the full matrix multiply surface2world * texture2surface would be:
+//  | r1.v8     r1.v9       r1.(0,0,1) |
+//  | r2.v8     r2.v9       r2.(0,0,1) |
+//  | r3.v8     r3.v9       r3.(0,0,1) |
+// But we notice that
+//  r1 = (1, 0, r7.x)
+//  r2 = (0, 1, r7.y)
+//  r3 = (-r7.x, -r7.y, 1)
+// and also:
+//  r7.z == v8.z == v9.z == 0
+// and r7.w == 1.0
+//
+// Considering the zeros, and doing the matrix multiply by hand, we get
+// the final matrix of
+//  |   v8.x        v9.x        r7.x    |
+//  |   v8.y        v9.y        r7.y    |
+//  |   -dp3(r7,v8) -dp3(r7,v9) 1       |
+// So we wind up not needing r1-r3 at all
+add         r1, v8.xzzz, r7.zzxw;
+mov         r1.y, v9.x;
+
+add         r2, v8.yzzz, r7.zzxw;
+mov         r2.y, v9.y;
+
+dp3         r3.x, -r7, v8;
+dp3         r3.y, -r7, v9;
+mov         r3.zw, r7.ww;
+
+// Following section is debug only to skip the per-vert tangent space axes.
+//add r1, c13.zxxx, r7.zzxw;
+//add r2, c13.xzxx, r7.zzyw;
+//
+//mov r3.x, -r7.x;
+//mov r3.y, -r7.y;
+//mov r3.zw, c13.zz;
+
+// See vs_WaveFixedFin6.inl for derivation of the following
+sub         r0, r6, c27; // c27 is camera position.
+dp3         r10.x, r0, r0;
+rsq         r10.x, r10.x;
+mul         r0, r0, r10.xxxx;
+
+dp3         r10.x, r0, c28; // c28 is kEnvAdjust
+mad         r10.y, r10.x, r10.x, -c28.w;
+
+rsq         r9.x, r10.y;
+
+mad         r10.z, r10.y, r9.x, r10.x;
+
+mad         r0.xyz, r0, r10.zzz, -c28.xyz;
+
+mov         r1.w, -r0.x;
+mov         r2.w, -r0.y;
+mov         r3.w, -r0.z;
+
+// Now r1-r3 are texture2world, with the eye-ray vector in .w. We just
+// need to normalize them and bung them into output UV's 1-3.
+// Note we're accounting for our environment map being flipped from
+// D3D (and all rational thought) by putting r2 into UV3 and r3 into UV2.
+mov r10.w, c13.z;
+dp3         r10.x, r1, r1;
+rsq         r10.x, r10.x;
+mul         oT1, r1, r10.xxxw;
+
+dp3         r10.x, r3, r3;
+rsq         r10.x, r10.x;
+mul         oT2, r3, r10.xxxw;
+//mul           oT3, r3, r10.xxxw; // YZHACK
+
+dp3         r10.x, r2, r2;
+rsq         r10.x, r10.x;
+mul         oT3, r2, r10.xxxw;
+//mul           oT2, r2, r10.xxxw;
+
+// Output color is vertex green
+// Output alpha is vertex red (vtx alpha is used for wave filtering)
+// Whole thing modulated by material color/opacity.
+mul     oD0, v5.yyyx, c26;
+
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveDecEnv_7.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveDecEnv_7.inl
@ -1,331 +1,331 @@
-
-
-vs.1.0
-
-dcl_position v0
-dcl_color v5
-dcl_texcoord0 v7
-dcl_texcoord1 v8
-dcl_texcoord2 v9
-
-// Store our input position in world space in r6
-m4x3		r6, v0, c18; // v0 * l2w
-// Fill out our w (m4x3 doesn't touch w).
-mov			r6.w, c13.z;
-
-//
-
-// Input diffuse v5 color is:
-// v5.r = overall transparency
-// v5.g = illumination
-// v5.b = overall wave scaling
-//
-// v5.a is:
-// v5.w = 1/(2.f * edge length)
-// So per wave filtering is:
-// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
-// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
-// and is completely filtered at 2 times sampling frequency.
-
-// We'd like to make this autocalculated based on the depth of the water.
-// The frequency filtering (v5.w) still needs to be calculated offline, because
-// it's dependent on edge length, but the first 3 filterings can be calculated
-// based on this vertex.
-// Basically, we want the transparency, reflection strength, and wave scaling
-// to go to zero as the water depth goes to zero. Linear falloffs are as good
-// a place to start as any.
-//
-// depth = waterlevel - r6.z		=> depth in feet (may be negative)
-// depthNorm = depth / depthFalloff	=> zero at watertable, one at depthFalloff beneath
-// atten = minAtten + depthNorm * (maxAtten - minAtten);
-// These are all vector ops.
-// This provides separate ramp ups for each of the channels (they reach full unfiltered
-// values at different depths), but doesn't provide separate controls for where they
-// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
-// in feet (depth) is probably the most intuitive. So that changes the first calculation
-// of depth to:
-// depth = waterlevel - r6.z + offset
-//		= (waterlevel + offset) - r6.z
-// And since we only need offsets for 3 channels, we can make the waterlevel constant
-// waterlevel[chan] = watertableheight + offset[chan],
-// with waterlevel.w = watertableheight.
-//
-// So:
-//	c22 = waterlevel + offset
-//	c23 = (maxAtten - minAtten) / depthFalloff
-//	c24 = minAtten.
-// And in particular:
-//	c22.w = waterlevel
-//	c23.w = 1.f;
-//	c24.w = 0;
-// So r4.w is the depth of this vertex in feet.
-
-// Dot our position with our direction vectors.
-mul		r0, c7, r6.xxxx;
-mad		r0, c8, r6.yyyy, r0;
-
-//
-//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
-mul         r0, r0, c4;
-add			r0, r0, c5;
-//
-//    // Now we need dist mod'd into range [-Pi..Pi]
-//    dist *= rcp(kTwoPi);
-rcp         r4, c12.wwww;
-add			r0, r0, c12.zzzz;
-mul         r0, r0, r4;
-//    dist = frac(dist);
-expp     r1.y, r0.xxxx
-mov      r1.x, r1.yyyy
-expp     r1.y, r0.zzzz
-mov      r1.z, r1.yyyy
-expp     r1.y, r0.wwww
-mov      r1.w, r1.yyyy
-expp     r1.y, r0.yyyy
-//    dist *= kTwoPi;
-mul         r0, r1, c12.wwww;
-//    dist += -kPi;
-sub         r0, r0, c12.zzzz;
-
-//
-//    sincos(dist, sinDist, cosDist);
-// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-mul         r1, r0, r0; // r0^2
-mul         r2, r1, r0; // r0^3 - probably stall
-mul         r3, r1, r1; // r0^4
-mul         r4, r1, r2; // r0^5
-mul         r5, r2, r3; // r0^7
-
-mul         r1, r1, c11.yyyy;       // r1 = r0^2 * vCos.y
-mad         r2, r2, c10.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
-add         r1, r1, c11.xxxx;       // r1 = 1 + r0^2 * vCos.y
-mad         r2, r4, c10.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-mad         r1, r3, c11.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-
-// r0^7 & r0^6 terms
-mul         r4, r4, r0; // r0^6
-mad         r2, r5, c10.wwww, r2;
-mad         r1, r4, c11.wwww, r1;
-
-// Calc our depth based filtering here into r4 (because we don't use it again
-// after here, and we need our filtering shortly).
-sub			r4, c22, r6.zzzz;
-mul			r4, r4, c23;
-add			r4, r4, c24;
-// Clamp .xyz to range [0..1]
-min			r4.xyz, r4, c13.zzzz;
-max			r4.xyz, r4, c13.xxxx;
-//mov r4.xyz, c13.xxx; // HACKTEST
-
-// Calc our filter (see above).
-mul			r11, v5.wwww, c21;
-max			r11, r11, c13.xxxx;
-min			r11, r11, c13.zzzz;
-
-//mov    r2, r1;
-// r2 == sinDist
-// r1 == cosDist
-//    sinDist *= filter;
-mul         r2, r2, r11;
-//    sinDist *= kAmplitude.xyzw
-mul         r2, r2, c6;
-//    height = dp4(sinDist, kOne);
-//    accumPos.z += height; (but accumPos.z is currently 0).
-dp4         r8.x, r2, c13.zzzz;
-mul			r8.y, r8.x, r4.z;
-add			r8.z, r8.y, c22.w;
-max			r6.z, r6.z, r8.z;
-// r8.x == wave height relative to 0
-// r8.y == dampened wave relative to 0
-// r8.z == dampened wave height in world space
-// r6.z == wave height clamped to never go beneath ground level
-//
-//    cosDist *= filter;
-mul         r1, r1, r11;
-// Pos = (in.x + S, in.y + R, r6.z)
-// S = sum(k Dir.x A cos())
-// R = sum(k Dir.y A cos())
-// c30 = k Dir.x A
-// c31 = k Dir.y A
-//    S = sum(cosDist * c30);
-dp4         r7.x, r1, c30;
-//    R = sum(cosDist * c31);
-dp4			r7.y, r1, c31;
-
-add			r6.xy, r6.xy, r7.xy;
-
-
-// Bias our vert up a bit to compensate for precision errors.
-// In particular, our filter coefficients are coming in as
-// interpolated bytes, so there's bound to be a lot of slop
-// from that. We've got a free slot in c25.x, so we'll use that.
-// A better implementation would be to bias and scale our screen
-// vert, effectively pushing the vert toward the camera without
-// actually moving it, but this is easier and might work just
-// as well.
-add			r6.z, r6.z, c25.x;
-
-//
-// // Transform position to screen
-//
-//
-//m4x4     oPos, r6, c0; // ADDFOG
-m4x4		r9, r6, c0;
-add			r10.x, r9.w, c29.x;
-mul			oFog, r10.x, c29.y;
-//mov			oFog, c13.y;
-mov			oPos, r9;
-
-// Now onto texture coordinate generation.
-//
-// First is the usual texture transform
-mov		r11.zw, c13.zzzz;
-dp4		r11.x, v7, c14;
-dp4		r11.y, v7, c15;
-mov		oT0, r11;
-
-// Calculate our basis vectors as input into our tex3x3vspec
-// First we get our basis set off our surface. This is
-// Okay, here we go:
-// W == sum(k w Dir.x^2 A sin()) x
-// V == sum(k w Dir.x Dir.y A sin()) x
-// U == sum(k w Dir.y^2 A sin()) x
-//
-// T == sum(A sin())
-//
-// S == sum(k Dir.x A cos())
-// R == sum(k Dir.y A cos())
-//
-// Q == sum(k w A cos()) x
-//
-// M == sum(A cos())
-//
-// P == sum(w Dir.x A cos()) x
-// N == sum(w Dir.y A cos()) x
-//
-// Then:
-// Pos = (in.x + S, in.y + R, waterheight + T) // Already done above.
-//
-// Bin = (1 - W, -V, P)
-// Tan = (-V, 1 - U, N)
-// Nor = (-P, -N, 1 - Q)
-//
-// The matrix
-//		|Bx, Tx, Nx|
-//		|By, Ty, Ny|
-//		|Bz, Tz, Nz|
-// is surface2world, but we still need to fold in
-// texture2surface. We'll go with the generalized
-// (not assuming a flat surface) partials of dPos/dU and dPos/dV
-// as coming in as uv coords v8 and v9.
-// Then, if r5 = v8 X v9, then texture to surface is
-//		|v8.x, v9.x, r5.x|
-//		|v8.y, v9.y, r5.y|
-//		|v8.z, v9.z, r5.z|
-//
-// So, let's say we calc 3 vectors,
-//		r7 = (Bx, Tx, Nx)
-//		r8 = (By, Ty, Ny)
-//		r9 = (Bz, Tz, Nz)
-//
-// Then surface2world * texture2surface =
-//		|r7 dot v8, r7 dot v9, r7 dot r5|
-//		|r8 dot v8, r8 dot v9, r8 dot r5|
-//		|r9 dot v8, r9 dot v9, r9 dot r5|
-//
-// We will need r5 as v8 X v9
-mov			r7, v8;
-mul			r5.xyz, r7.yzx, v9.zxy;
-mad			r5.xyz, r7.zxy, -v9.yzx, r5.xyz;
-
-// Okay, r1 currently has the vector of cosines, and r2 has vector of sines.
-// Everything will want that times amplitude, so go ahead and fold that in.
-mul			r1, r1, c6; // r1 = A cos() = M
-// Sines already have amplitude folded in, so r2 = A sin() = T.
-// Now just compute r7-9 one element at a time.
-dp4			r7.x, r2, -c35; // r7.x = -W
-dp4			r7.y, r2, -c36; // r7.y = -V
-dp4			r7.z, r1, -c32; // r7.z = -P
-add			r7.x, r7.x, c13.z; // r7.x = 1 - W;
-
-dp4			r8.x, r2, -c36; // r8.x = -V
-dp4			r8.y, r2, -c37; // r8.y = -U
-dp4			r8.z, r1, -c33; // r8.z = -N
-add			r8.y, r8.y, c13.z; // r8.y = 1 - U
-
-dp4			r9.z, r2, -c34; // r9.z = -Q
-mov			r9.x, -r7.z; // r9.x = P = -r7.z
-mov			r9.y, -r8.z; // r9.y = N = -r8.z
-add			r9.z, r9.z, c13.z; // r9.z = 1 - Q
-
-// Okay, got everything we need, construct r1-3 as surface2world*texture2surface.
-dp3			r1.x, r7, v8;
-dp3			r1.y, r7, v9;
-dp3			r1.z, r7, r5;
-
-dp3			r2.x, r8, v8;
-dp3			r2.y, r8, v9;
-dp3			r2.z, r8, r5;
-
-dp3			r3.x, r9, v8;
-dp3			r3.y, r9, v9;
-dp3			r3.z, r9, r5;
-
-// Following section is debug only to skip the per-vert tangent space axes.
-//add r1, c13.zxxx, r7.zzxw;
-//add r2, c13.xzxx, r7.zzyw;
-//
-//mov r3.x, -r7.x;
-//mov r3.y, -r7.y;
-//mov r3.zw, c13.zz;
-
-// See vs_WaveFixedFin6.inl for derivation of the following
-sub			r0, r6, c27; // c27 is camera position.
-dp3			r10.x, r0, r0;
-rsq			r10.x, r10.x;
-mul			r0, r0, r10.xxxx;
-
-dp3			r10.x, r0, c28; // c28 is kEnvAdjust
-mad			r10.y, r10.x, r10.x, -c28.w;
-
-rsq			r9.x, r10.y;
-
-mad			r10.z, r10.y, r9.x, r10.x;
-
-mad			r0.xyz, r0, r10.zzz, -c28.xyz;
-
-// ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump.
-dp3			r10.x, r0, r0;
-rsq			r9.x, r10.x;
-mul			r0.xyz, r0.xyz, r9.xxx;
-
-mov			r1.w, -r0.x;
-mov			r2.w, -r0.y;
-mov			r3.w, -r0.z;
-
-
-// Now r1-r3 are texture2world, with the eye-ray vector in .w. We just
-// need to normalize them and bung them into output UV's 1-3.
-// Note we're accounting for our environment map being flipped from
-// D3D (and all rational thought) by putting r2 into UV3 and r3 into UV2.
-mov r10.w, c13.z;
-dp3			r10.x, r1, r1;
-rsq			r10.x, r10.x;
-mul			oT1, r1, r10.xxxw;
-
-dp3			r10.x, r3, r3;
-rsq			r10.x, r10.x;
-mul			oT2, r3, r10.xxxw;
-//mul			oT3, r3, r10.xxxw; // YZHACK
-
-dp3			r10.x, r2, r2;
-rsq			r10.x, r10.x;
-mul			oT3, r2, r10.xxxw;
-//mul			oT2, r2, r10.xxxw;
-
-// Output color is vertex green
-// Output alpha is vertex red (vtx alpha is used for wave filtering)
-// Whole thing modulated by material color/opacity.
-mul		oD0, v5.yyyx, c26;
-
+
+
+vs.1.0
+
+dcl_position v0
+dcl_color v5
+dcl_texcoord0 v7
+dcl_texcoord1 v8
+dcl_texcoord2 v9
+
+// Store our input position in world space in r6
+m4x3        r6, v0, c18; // v0 * l2w
+// Fill out our w (m4x3 doesn't touch w).
+mov         r6.w, c13.z;
+
+//
+
+// Input diffuse v5 color is:
+// v5.r = overall transparency
+// v5.g = illumination
+// v5.b = overall wave scaling
+//
+// v5.a is:
+// v5.w = 1/(2.f * edge length)
+// So per wave filtering is:
+// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
+// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
+// and is completely filtered at 2 times sampling frequency.
+
+// We'd like to make this autocalculated based on the depth of the water.
+// The frequency filtering (v5.w) still needs to be calculated offline, because
+// it's dependent on edge length, but the first 3 filterings can be calculated
+// based on this vertex.
+// Basically, we want the transparency, reflection strength, and wave scaling
+// to go to zero as the water depth goes to zero. Linear falloffs are as good
+// a place to start as any.
+//
+// depth = waterlevel - r6.z        => depth in feet (may be negative)
+// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
+// atten = minAtten + depthNorm * (maxAtten - minAtten);
+// These are all vector ops.
+// This provides separate ramp ups for each of the channels (they reach full unfiltered
+// values at different depths), but doesn't provide separate controls for where they
+// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
+// in feet (depth) is probably the most intuitive. So that changes the first calculation
+// of depth to:
+// depth = waterlevel - r6.z + offset
+//      = (waterlevel + offset) - r6.z
+// And since we only need offsets for 3 channels, we can make the waterlevel constant
+// waterlevel[chan] = watertableheight + offset[chan],
+// with waterlevel.w = watertableheight.
+//
+// So:
+//  c22 = waterlevel + offset
+//  c23 = (maxAtten - minAtten) / depthFalloff
+//  c24 = minAtten.
+// And in particular:
+//  c22.w = waterlevel
+//  c23.w = 1.f;
+//  c24.w = 0;
+// So r4.w is the depth of this vertex in feet.
+
+// Dot our position with our direction vectors.
+mul     r0, c7, r6.xxxx;
+mad     r0, c8, r6.yyyy, r0;
+
+//
+//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
+mul         r0, r0, c4;
+add         r0, r0, c5;
+//
+//    // Now we need dist mod'd into range [-Pi..Pi]
+//    dist *= rcp(kTwoPi);
+rcp         r4, c12.wwww;
+add         r0, r0, c12.zzzz;
+mul         r0, r0, r4;
+//    dist = frac(dist);
+expp     r1.y, r0.xxxx
+mov      r1.x, r1.yyyy
+expp     r1.y, r0.zzzz
+mov      r1.z, r1.yyyy
+expp     r1.y, r0.wwww
+mov      r1.w, r1.yyyy
+expp     r1.y, r0.yyyy
+//    dist *= kTwoPi;
+mul         r0, r1, c12.wwww;
+//    dist += -kPi;
+sub         r0, r0, c12.zzzz;
+
+//
+//    sincos(dist, sinDist, cosDist);
+// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+mul         r1, r0, r0; // r0^2
+mul         r2, r1, r0; // r0^3 - probably stall
+mul         r3, r1, r1; // r0^4
+mul         r4, r1, r2; // r0^5
+mul         r5, r2, r3; // r0^7
+
+mul         r1, r1, c11.yyyy;       // r1 = r0^2 * vCos.y
+mad         r2, r2, c10.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
+add         r1, r1, c11.xxxx;       // r1 = 1 + r0^2 * vCos.y
+mad         r2, r4, c10.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+mad         r1, r3, c11.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+
+// r0^7 & r0^6 terms
+mul         r4, r4, r0; // r0^6
+mad         r2, r5, c10.wwww, r2;
+mad         r1, r4, c11.wwww, r1;
+
+// Calc our depth based filtering here into r4 (because we don't use it again
+// after here, and we need our filtering shortly).
+sub         r4, c22, r6.zzzz;
+mul         r4, r4, c23;
+add         r4, r4, c24;
+// Clamp .xyz to range [0..1]
+min         r4.xyz, r4, c13.zzzz;
+max         r4.xyz, r4, c13.xxxx;
+//mov r4.xyz, c13.xxx; // HACKTEST
+
+// Calc our filter (see above).
+mul         r11, v5.wwww, c21;
+max         r11, r11, c13.xxxx;
+min         r11, r11, c13.zzzz;
+
+//mov    r2, r1;
+// r2 == sinDist
+// r1 == cosDist
+//    sinDist *= filter;
+mul         r2, r2, r11;
+//    sinDist *= kAmplitude.xyzw
+mul         r2, r2, c6;
+//    height = dp4(sinDist, kOne);
+//    accumPos.z += height; (but accumPos.z is currently 0).
+dp4         r8.x, r2, c13.zzzz;
+mul         r8.y, r8.x, r4.z;
+add         r8.z, r8.y, c22.w;
+max         r6.z, r6.z, r8.z;
+// r8.x == wave height relative to 0
+// r8.y == dampened wave relative to 0
+// r8.z == dampened wave height in world space
+// r6.z == wave height clamped to never go beneath ground level
+//
+//    cosDist *= filter;
+mul         r1, r1, r11;
+// Pos = (in.x + S, in.y + R, r6.z)
+// S = sum(k Dir.x A cos())
+// R = sum(k Dir.y A cos())
+// c30 = k Dir.x A
+// c31 = k Dir.y A
+//    S = sum(cosDist * c30);
+dp4         r7.x, r1, c30;
+//    R = sum(cosDist * c31);
+dp4         r7.y, r1, c31;
+
+add         r6.xy, r6.xy, r7.xy;
+
+
+// Bias our vert up a bit to compensate for precision errors.
+// In particular, our filter coefficients are coming in as
+// interpolated bytes, so there's bound to be a lot of slop
+// from that. We've got a free slot in c25.x, so we'll use that.
+// A better implementation would be to bias and scale our screen
+// vert, effectively pushing the vert toward the camera without
+// actually moving it, but this is easier and might work just
+// as well.
+add         r6.z, r6.z, c25.x;
+
+//
+// // Transform position to screen
+//
+//
+//m4x4     oPos, r6, c0; // ADDFOG
+m4x4        r9, r6, c0;
+add         r10.x, r9.w, c29.x;
+mul         oFog, r10.x, c29.y;
+//mov           oFog, c13.y;
+mov         oPos, r9;
+
+// Now onto texture coordinate generation.
+//
+// First is the usual texture transform
+mov     r11.zw, c13.zzzz;
+dp4     r11.x, v7, c14;
+dp4     r11.y, v7, c15;
+mov     oT0, r11;
+
+// Calculate our basis vectors as input into our tex3x3vspec
+// First we get our basis set off our surface. This is
+// Okay, here we go:
+// W == sum(k w Dir.x^2 A sin()) x
+// V == sum(k w Dir.x Dir.y A sin()) x
+// U == sum(k w Dir.y^2 A sin()) x
+//
+// T == sum(A sin())
+//
+// S == sum(k Dir.x A cos())
+// R == sum(k Dir.y A cos())
+//
+// Q == sum(k w A cos()) x
+//
+// M == sum(A cos())
+//
+// P == sum(w Dir.x A cos()) x
+// N == sum(w Dir.y A cos()) x
+//
+// Then:
+// Pos = (in.x + S, in.y + R, waterheight + T) // Already done above.
+//
+// Bin = (1 - W, -V, P)
+// Tan = (-V, 1 - U, N)
+// Nor = (-P, -N, 1 - Q)
+//
+// The matrix
+//      |Bx, Tx, Nx|
+//      |By, Ty, Ny|
+//      |Bz, Tz, Nz|
+// is surface2world, but we still need to fold in
+// texture2surface. We'll go with the generalized
+// (not assuming a flat surface) partials of dPos/dU and dPos/dV
+// as coming in as uv coords v8 and v9.
+// Then, if r5 = v8 X v9, then texture to surface is
+//      |v8.x, v9.x, r5.x|
+//      |v8.y, v9.y, r5.y|
+//      |v8.z, v9.z, r5.z|
+//
+// So, let's say we calc 3 vectors,
+//      r7 = (Bx, Tx, Nx)
+//      r8 = (By, Ty, Ny)
+//      r9 = (Bz, Tz, Nz)
+//
+// Then surface2world * texture2surface =
+//      |r7 dot v8, r7 dot v9, r7 dot r5|
+//      |r8 dot v8, r8 dot v9, r8 dot r5|
+//      |r9 dot v8, r9 dot v9, r9 dot r5|
+//
+// We will need r5 as v8 X v9
+mov         r7, v8;
+mul         r5.xyz, r7.yzx, v9.zxy;
+mad         r5.xyz, r7.zxy, -v9.yzx, r5.xyz;
+
+// Okay, r1 currently has the vector of cosines, and r2 has vector of sines.
+// Everything will want that times amplitude, so go ahead and fold that in.
+mul         r1, r1, c6; // r1 = A cos() = M
+// Sines already have amplitude folded in, so r2 = A sin() = T.
+// Now just compute r7-9 one element at a time.
+dp4         r7.x, r2, -c35; // r7.x = -W
+dp4         r7.y, r2, -c36; // r7.y = -V
+dp4         r7.z, r1, -c32; // r7.z = -P
+add         r7.x, r7.x, c13.z; // r7.x = 1 - W;
+
+dp4         r8.x, r2, -c36; // r8.x = -V
+dp4         r8.y, r2, -c37; // r8.y = -U
+dp4         r8.z, r1, -c33; // r8.z = -N
+add         r8.y, r8.y, c13.z; // r8.y = 1 - U
+
+dp4         r9.z, r2, -c34; // r9.z = -Q
+mov         r9.x, -r7.z; // r9.x = P = -r7.z
+mov         r9.y, -r8.z; // r9.y = N = -r8.z
+add         r9.z, r9.z, c13.z; // r9.z = 1 - Q
+
+// Okay, got everything we need, construct r1-3 as surface2world*texture2surface.
+dp3         r1.x, r7, v8;
+dp3         r1.y, r7, v9;
+dp3         r1.z, r7, r5;
+
+dp3         r2.x, r8, v8;
+dp3         r2.y, r8, v9;
+dp3         r2.z, r8, r5;
+
+dp3         r3.x, r9, v8;
+dp3         r3.y, r9, v9;
+dp3         r3.z, r9, r5;
+
+// Following section is debug only to skip the per-vert tangent space axes.
+//add r1, c13.zxxx, r7.zzxw;
+//add r2, c13.xzxx, r7.zzyw;
+//
+//mov r3.x, -r7.x;
+//mov r3.y, -r7.y;
+//mov r3.zw, c13.zz;
+
+// See vs_WaveFixedFin6.inl for derivation of the following
+sub         r0, r6, c27; // c27 is camera position.
+dp3         r10.x, r0, r0;
+rsq         r10.x, r10.x;
+mul         r0, r0, r10.xxxx;
+
+dp3         r10.x, r0, c28; // c28 is kEnvAdjust
+mad         r10.y, r10.x, r10.x, -c28.w;
+
+rsq         r9.x, r10.y;
+
+mad         r10.z, r10.y, r9.x, r10.x;
+
+mad         r0.xyz, r0, r10.zzz, -c28.xyz;
+
+// ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump.
+dp3         r10.x, r0, r0;
+rsq         r9.x, r10.x;
+mul         r0.xyz, r0.xyz, r9.xxx;
+
+mov         r1.w, -r0.x;
+mov         r2.w, -r0.y;
+mov         r3.w, -r0.z;
+
+
+// Now r1-r3 are texture2world, with the eye-ray vector in .w. We just
+// need to normalize them and bung them into output UV's 1-3.
+// Note we're accounting for our environment map being flipped from
+// D3D (and all rational thought) by putting r2 into UV3 and r3 into UV2.
+mov r10.w, c13.z;
+dp3         r10.x, r1, r1;
+rsq         r10.x, r10.x;
+mul         oT1, r1, r10.xxxw;
+
+dp3         r10.x, r3, r3;
+rsq         r10.x, r10.x;
+mul         oT2, r3, r10.xxxw;
+//mul           oT3, r3, r10.xxxw; // YZHACK
+
+dp3         r10.x, r2, r2;
+rsq         r10.x, r10.x;
+mul         oT3, r2, r10.xxxw;
+//mul           oT2, r2, r10.xxxw;
+
+// Output color is vertex green
+// Output alpha is vertex red (vtx alpha is used for wave filtering)
+// Whole thing modulated by material color/opacity.
+mul     oD0, v5.yyyx, c26;
+
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveFixedFin6.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveFixedFin6.inl
@ -1,449 +1,449 @@
-vs.1.1
-
-dcl_position v0
-dcl_color v5
-
-// Store our input position in world space in r6
-m4x3		r6, v0, c21; // v0 * l2w
-// Fill out our w (m4x3 doesn't touch w).
-mov			r6.w, c16.zzzz;
-
-//
-
-// Input diffuse v5 color is:
-// v5.r = overall transparency
-// v5.g = reflection strength (transparency)
-// v5.b = overall wave scaling
-//
-// v5.a is:
-// v5.w = 1/(2.f * edge length)
-// So per wave filtering is:
-// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
-// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
-// and is completely filtered at 2 times sampling frequency.
-
-// We'd like to make this autocalculated based on the depth of the water.
-// The frequency filtering (v5.w) still needs to be calculated offline, because
-// it's dependent on edge length, but the first 3 filterings can be calculated
-// based on this vertex.
-// Basically, we want the transparency, reflection strength, and wave scaling
-// to go to zero as the water depth goes to zero. Linear falloffs are as good
-// a place to start as any.
-//
-// depth = waterlevel - r6.z		=> depth in feet (may be negative)
-// depthNorm = depth / depthFalloff	=> zero at watertable, one at depthFalloff beneath
-// atten = minAtten + depthNorm * (maxAtten - minAtten);
-// These are all vector ops.
-// This provides separate ramp ups for each of the channels (they reach full unfiltered
-// values at different depths), but doesn't provide separate controls for where they
-// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
-// in feet (depth) is probably the most intuitive. So that changes the first calculation
-// of depth to:
-// depth = waterlevel - r6.z + offset
-//		= (waterlevel + offset) - r6.z
-// And since we only need offsets for 3 channels, we can make the waterlevel constant
-// waterlevel[chan] = watertableheight + offset[chan],
-// with waterlevel.w = watertableheight.
-//
-// So:
-//	c25 = waterlevel + offset
-//	c26 = (maxAtten - minAtten) / depthFalloff
-//	c27 = minAtten.
-// And in particular:
-//	c25.w = waterlevel
-//	c26.w = 1.f;
-//	c27.w = 0;
-// So r4.w is the depth of this vertex in feet.
-
-// Dot our position with our direction vectors.
-mul		r0, c8, r6.xxxx;
-mad		r0, c9, r6.yyyy, r0;
-
-//
-//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
-mul         r0, r0, c5;
-add			r0, r0, c6;
-//
-//    // Now we need dist mod'd into range [-Pi..Pi]
-//    dist *= rcp(kTwoPi);
-rcp         r4, c15.wwww;
-add			r0, r0, c15.zzzz;
-mul         r0, r0, r4;
-//    dist = frac(dist);
-expp     r1.y, r0.xxxx
-mov      r1.x, r1.yyyy
-expp     r1.y, r0.zzzz
-mov      r1.z, r1.yyyy
-expp     r1.y, r0.wwww
-mov      r1.w, r1.yyyy
-expp     r1.y, r0.yyyy
-//    dist *= kTwoPi;
-mul         r0, r1, c15.wwww;
-//    dist += -kPi;
-sub         r0, r0, c15.zzzz;
-
-//
-//    sincos(dist, sinDist, cosDist);
-// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-mul         r1, r0, r0; // r0^2
-mul         r2, r1, r0; // r0^3 - probably stall
-mul         r3, r1, r1; // r0^4
-mul         r4, r1, r2; // r0^5
-mul         r5, r2, r3; // r0^7
-
-mul         r1, r1, c14.yyyy;       // r1 = r0^2 * vCos.y
-mad         r2, r2, c13.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
-add         r1, r1, c14.xxxx;       // r1 = 1 + r0^2 * vCos.y
-mad         r2, r4, c13.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-mad         r1, r3, c14.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-
-// r0^7 & r0^6 terms
-mul         r4, r4, r0; // r0^6
-mad         r2, r5, c13.wwww, r2;
-mad         r1, r4, c14.wwww, r1;
-
-// Calc our depth based filtering here into r4 (because we don't use it again
-// after here, and we need our filtering shortly).
-sub			r4, c25, r6.zzzz;
-mul			r4, r4, c26;
-add			r4, r4, c27;
-// Clamp .xyz to range [0..1]
-min			r4.xyz, r4, c16.zzzz;
-max			r4.xyz, r4, c16.xxxx;
-
-// Calc our filter (see above).
-mul			r11, v5.wwww, c24;
-max			r11, r11, c16.xxxx;
-min			r11, r11, c16.zzzz;
-
-//mov    r2, r1;
-// r2 == sinDist
-// r1 == cosDist
-//    sinDist *= filter;
-mul         r2, r2, r11;
-//    sinDist *= kAmplitude.xyzw
-mul         r2, r2, c7;
-//    height = dp4(sinDist, kOne);
-//    accumPos.z += height; (but accumPos.z is currently 0).
-dp4         r8.x, r2, c16.zzzz;
-mul			r8.y, r8.x, r4.z;
-add			r8.z, r8.y, c25.w;
-max			r6.z, r6.z, r8.z; // CLAMP
-// r8.x == wave height relative to 0
-// r8.y == dampened wave relative to 0
-// r8.z == dampened wave height in world space
-// r6.z == wave height clamped to never go beneath ground level
-//
-//    cosDist *= kFreq.xyzw;
-mul         r1, r1, c5;
-//    cosDist *= kAmplitude.xyzw; // Combine?
-mul         r1, r1, c7;
-//    cosDist *= filter;
-mul         r1, r1, r11;
-//
-// accumCos = (0, 0, 0, 0);
-mov         r7, c16.xxxx;
-//    temp = dp4( cosDist, toCenter_X );
-//    accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
-dp4         r7.x, r1, -c8
-//
-//    temp = dp4( cosDist, toCenter_Y );
-//    accumCos.y += temp.xxxx;
-dp4         r7.y, r1, -c9
-//
-// }
-//
-// accumBin = (1, 0, -accumCos.x);
-// accumTan = (0, 1, -accumCos.y);
-// accumNorm = (accumCos.x, accumCos.y, 1);
-mov         r11, c16.xxzx;
-add         r11, r11, r7;
-dp3         r10.x, r11, r11;
-rsq         r10.x, r10.x;
-mul         r11, r11, r10.xxxx;
-
-//
-// // Scrunch in based on computed (normalized) normal
-// temp = mul( accumNorm, kNegScrunchScale ); // kNegScrunchScale = (-scrunchScale, -scrunchScale, 0, 0);
-// accumPos += temp;
-//dp3			r10.x, r11, c18.zxw; // winddir.x, winddir.y, 0, 0 // NUKE
-// r10.x tells us whether our normal is opposed to the wind.
-// If opposed, r10.x = 0, else r10.x = 1.f;
-// We'll use this to kill the Scrunch on the back sides of waves.
-// We use it for position right here, and then again for the
-// normal just down a bit further.
-//slt			r10.x, r10.x, c16.x; // NUKE
-//mov			r10.x, c16.z; // HACKAGE NUKE
-//mul			r9, r10.xxxx, r11; // NUKE
-
-// Add in our scrunch (offset in X/Y plane).
-// Scale down our scrunch amount by the wave scaling
-mul			r10.x, c12.y, r4.z;
-//mov	r10.x, c12.y; // NUKETEST TAKEOUT
-mad         r6.xy, r11.xy, r10.xx, r6.xy;
-
-//   mul			r6.z, r6.z, r10.xxxx; DEBUG
-
-//   mad         r6, r11, c12.yyzz, r6;
-
-// accumNorm = mul (accumNorm, kScrunchScale ); // kScrunchScale = (scrunchScale, scrunchScale, 1, 1);
-// accumCos *= (scrunchScale, scrunchScale, 0, 0);
-
-mul			r2.x, r6.z, c12.x;
-//mad			r2.x, r2.x, r10.x, c16.z; NUKE
-add			r2.x, r2.x, c16.z;
-mul			r2.x, r2.x, r4.z; // HACKAGE // NUKETEST BACKIN
-
-//   mul         r7, r7, c12.xxzz;
-mul			r7.xy, r7.xy, r2.xx;
-
-// This is actually wrong, but useful right now for visualizing the generated coords.
-// See below for correct version.
-
-sub			r3, c16.xxzz, r7.xyzz;
-
-//mov			oD0, r3; // SEENORM
-
-dp3			r8.x, r3, c18.zxww; // WAVEFACE
-mul			r8.x, r8.x, c12.w; // WAVEFACE
-max			r8.x, r8.x, c16.x; // WAVEFACE
-min			r8.x, r8.x, c16.z; // WAVEFACE
-//mov			r9.x, c12.z;
-//add			r9.x, r9.x, -c16.z;
-//mad			r8.x, r9.x, r8.x, c16.z; // WAVEFACE
-mul			r8.x, r8.x, -c16.z;
-add			r8.x, r8.x, c16.z;
-
-// Normalize?
-
-// We can either calculate an orthonormal basis from the
-// computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0),
-// or compute our basis directly from the partial derivatives, with
-// Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1)
-//
-// These work out to identically the same result, so we'll compute directly
-// from the partials because it takes 2 fewer instructions.
-//
-// Note that our basis is NOT orthonormal. The Normal is equal to
-// Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents
-// are both correct tangents to the surface, and their projections on the XY plane
-// are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications?
-// Not really. I'm actually not really sure which is more "proper" for bump mapping.
-//
-// Note also that we add when we should subtract and subtract when we should
-// add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows
-// of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for
-// explanation.
-//
-// Binormal = Y % Normal
-// Cross product3 is:
-//	mul		res.xyz, a.yzx, b.zxy
-//	mad		res.xyz, -a.zxy, b.yzx, res.xyz
-//   mul			r1.xyz, c16.zxx, r3.zxy;
-//   mad			r1.xyz, -c16.xxz, r3.yzx, r1.xyz;
-
-// Tangent = Normal % X
-//   mul			r2.xyz, r3.yzx, c16.xzx;
-//   mad			r2.xyz, -r3.zxy, c16.xxz, r2;
-
-add			r1, c16.zxxx, r7.zzxz;
-add			r2, c16.xzxx, r7.zzyz;
-
-
-// Note that we're swapping z and y to match our environment map tools in max.
-// We do this through our normal map transform (oT1, oT2, oT3), making it
-// a concatenation of:
-//
-//	rotate about Z (blue) to turn our map into the wind
-//	windRot =	|	dirY	-dirX	0 |
-//				|	dirX	dirY	0 |
-//				|	0		0		1 |
-//
-//	swap our Y and Z axes to match our environment map
-//	swapYZ	=	|	1		0		0 |
-//				|	0		0		1 |
-//				|	0		1		0 |
-//
-//	rotate the normal into the surface's tangent space basis
-//	basis	=	|	Bx		Tx		Nx |
-//				|	By		Ty		Ny |
-//				|	Bz		Tz		Nz |
-//
-//	Note that we've constucted the basis by taking advantage of the
-//	matrix being a pure rotation, as noted below, so r1, r2 and r3
-//	are actually constructed as:
-//	basis	=	|	Bx		-By		-Bz |
-//				|	-Tx		Ty		-Tz |
-//				|	-Nx		-Ny		-Nz |
-//
-//	Then the final normal map transform is:
-//
-//		basis * swapYZ * windRot [ * normal ]
-
-
-//   sub         r1.w, c17.x, r6.x;
-//   sub         r2.w, c17.z, r6.z;
-//   sub         r3.w, c17.y, r6.y;
-
-// Big note here. All this math can blow up if the camera position
-// is outside the environment sphere. It's assumed that's dealt
-// with in the app setting up the constants. For that reason, the
-// camera position used here might not be the real local camera position,
-// which is needed for the angular attenuation, so we burn another constant
-// with our pseudo-camera position. To restrain the pseudo-camera from
-// leaving the sphere, we make:
-//	pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R)
-// where dist = |realPos - envCenter|
-
-// So, our "finitized" eyeray is:
-//	camPos + D * t - envCenter = D * t - (envCenter - camPos)
-// with
-//	D = (pos - camPos) / |pos - camPos| // normalized usual eyeray
-// and
-//	t = D dot F + sqrt( (D dot F)^2 - G )
-// with
-//	F = (envCenter - camPos)	=> c19.xyz
-//	G = F^2 - R^2				=> c19.w
-//	R = environment radius.		=> unused
-//
-// This all derives from the positive root of equation
-//	(camPos + (pos - camPos) * t - envCenter)^2 = R^2,
-// In other words, where on a sphere of radius R centered about envCenter
-// does the ray from the real camera position through this point hit.
-//
-// Note that F, G, and R are all constants (one point, two scalars).
-//
-// So first we calculate D into r0,
-// then D dot F into r10.x,
-// then (D dot F)^2 - G into r10.y
-// then rsq( (D dot F)^2 - G ) into r9.x;
-// then t = r10.z = r10.x + r10.y * r9.x;
-// and
-// r0 = D * t - (envCenter - camPos)
-//		= r0 * r10.zzzz - F;
-//
-sub			r0, r6, c17;
-dp3			r10.x, r0, r0;
-rsq			r10.x, r10.x;
-mul			r0, r0, r10.xxxx; // r0 = D
-
-dp3			r10.x, r0, c19; // r10.x = D dot F
-mad			r10.y, r10.x, r10.x, -c19.w; // r10.y = (D dot F)^2 - G
-
-rsq			r9.x, r10.y; // r9.x = 1/SQRT((D dot F)^2 - G)
-
-mad			r10.z, r10.y, r9.x, r10.x; // r10.z = D dot F + SQRT((D dot F)^2 - G)
-
-mad			r0.xyz, r0, r10.zzz, -c19.xyz; // r0.xyz = D * t - (envCenter - camPos)
-
-mov			r1.w, -r0.x;
-mov			r2.w, -r0.y;
-mov			r3.w, -r0.z;
-
-// Now rotate our basis vectors into the wind
-// This should be redone, and put our wind direction into
-// the water texture.
-dp3		r0.x, r1, c18.xyww;
-dp3		r0.y, r1, c18.zxww;
-mov		r1.xy, r0;
-
-dp3		r0.x, r2, c18.xyww;
-dp3		r0.y, r2, c18.zxww;
-mov		r2.xy, r0;
-
-dp3		r0.x, r3, c18.xyww;
-dp3		r0.y, r3, c18.zxww;
-mov		r3.xy, r0;
-
-mov			r0.zw, c16.zzxz;
-
-dp3         r0.x, r1, r1;
-rsq         r0.x, r0.x;
-mul         oT1, r1.xyzw, r0.xxxw;
-//   mul			r8, r1.xyzw, r0.xxxw; // VISUAL
-
-dp3         r0.x, r2, r2;
-rsq         r0.x, r0.x;
-mul         oT3, r2.xyzw, r0.xxxw;
-//   mul			r9, r2.xyzw, r0.xxxw; // VISUAL
-
-dp3         r0.x, r3, r3;
-rsq         r0.x, r0.x;
-mul         oT2, r3.xyzw, r0.xxxw;
-//   mul			r9, r3.xyzw, r0.xxxw; // VISUAL
-
-//	mul		   r3, r3.xzyw, r0.xxxw;
-//	mul			r3.xy, r3, -c16.zzzz;
-
-
-/*
-// Want:
-//    oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x)
-//    oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y)
-//    ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z)
-// with BIN, TAN, and NORM normalized.
-// Unnormalized, we have
-//    BIN = (1, 0, -r7.x) where r7 == accumCos
-//    TAN = (0, 1, -r7.y)
-//    NORM= (r7.x, r7.y, 1)
-// So, unnormalized, we have
-//    oT1 = (1, 0, r7.x, view2pos.x)
-//    oT2 = (0, 1, r7.y, view2pos.y)
-//    oT3 = (-r7.x, -r7.y, 1, view2pos.z)
-// which is just reversing the signs on the accumCos
-// terms above. So the normalized version is just
-// reversing the signs on the normalized version above.
-*/
-//mov oT3, r4;
-
-//
-// // Transform position to screen
-//
-//
-//m4x3	r6, v0, c21; // HACKAGE
-//mov		r6.w, c16.z; // HACKAGE
-//m4x4     oPos, r6, c0; // ADDFOG
-m4x4		r9, r6, c0;
-add			r10.x, r9.w, c28.x;
-mul			oFog, r10.x, c28.y;
-//mov			oFog, c16.y; // TESTFOGHACK
-mov			oPos, r9;
-
-mov         oD0, c4; // SEENORM
-
-// Transform our uvw
-dp4			r0.x, v0, c10;
-dp4			r0.y, v0, c11;
-
-//mov			r0.zw, c16.xxxz;
-mov			oT0, r0
-
-// Questionble attenuation follows
-// Find vector from this point to camera and normalize
-sub			r0, c17, r6;
-dp3			r1.x, r0, r0;
-rsq			r1.x, r1.x;
-mul			r0, r0, r1.xxxx;
-// Dot that with the computed normal
-dp3			r1.x, r0, r11;
-mul			r1.x, r1.x, v5.z;
-//	dp3			r1.x, r0, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3
-// Map dot=1 => 0, dot=0 => 1
-sub			r1.xyzw, c16.zzzz, r1.xxxx;
-add			r1.w, r1.wwww, c16.zzzz;
-mul			r1.w, r1.wwww, c16.yyyy;
-// No need to clamp, since the destination register (in the pixel shader)
-// will saturate [0..1] anyway.
-//%%% mul			r1.w, r1.w, r4.x;
-//%%% mul			r1.xyz, r1.xyz, r4.yyy;
-mul r1, r1, r4.yyyx; // HACKTESTCOLOR
-mul	r1.xyz, r1, r8.xxx; // WAVEFACE
-mul r1.w,	r1.wwww, v5.xxxx;
-mul			oD1, r1, c20;
-
-// mov oD1, r4.yyyy;
-
-//mov			oD1, c16.zzzz; // HACKAGE
-//	mov			oD1, r9;
-//	mov			oD1, r8.xzyw;
+vs.1.1
+
+dcl_position v0
+dcl_color v5
+
+// Store our input position in world space in r6
+m4x3        r6, v0, c21; // v0 * l2w
+// Fill out our w (m4x3 doesn't touch w).
+mov         r6.w, c16.zzzz;
+
+//
+
+// Input diffuse v5 color is:
+// v5.r = overall transparency
+// v5.g = reflection strength (transparency)
+// v5.b = overall wave scaling
+//
+// v5.a is:
+// v5.w = 1/(2.f * edge length)
+// So per wave filtering is:
+// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
+// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
+// and is completely filtered at 2 times sampling frequency.
+
+// We'd like to make this autocalculated based on the depth of the water.
+// The frequency filtering (v5.w) still needs to be calculated offline, because
+// it's dependent on edge length, but the first 3 filterings can be calculated
+// based on this vertex.
+// Basically, we want the transparency, reflection strength, and wave scaling
+// to go to zero as the water depth goes to zero. Linear falloffs are as good
+// a place to start as any.
+//
+// depth = waterlevel - r6.z        => depth in feet (may be negative)
+// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
+// atten = minAtten + depthNorm * (maxAtten - minAtten);
+// These are all vector ops.
+// This provides separate ramp ups for each of the channels (they reach full unfiltered
+// values at different depths), but doesn't provide separate controls for where they
+// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
+// in feet (depth) is probably the most intuitive. So that changes the first calculation
+// of depth to:
+// depth = waterlevel - r6.z + offset
+//      = (waterlevel + offset) - r6.z
+// And since we only need offsets for 3 channels, we can make the waterlevel constant
+// waterlevel[chan] = watertableheight + offset[chan],
+// with waterlevel.w = watertableheight.
+//
+// So:
+//  c25 = waterlevel + offset
+//  c26 = (maxAtten - minAtten) / depthFalloff
+//  c27 = minAtten.
+// And in particular:
+//  c25.w = waterlevel
+//  c26.w = 1.f;
+//  c27.w = 0;
+// So r4.w is the depth of this vertex in feet.
+
+// Dot our position with our direction vectors.
+mul     r0, c8, r6.xxxx;
+mad     r0, c9, r6.yyyy, r0;
+
+//
+//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
+mul         r0, r0, c5;
+add         r0, r0, c6;
+//
+//    // Now we need dist mod'd into range [-Pi..Pi]
+//    dist *= rcp(kTwoPi);
+rcp         r4, c15.wwww;
+add         r0, r0, c15.zzzz;
+mul         r0, r0, r4;
+//    dist = frac(dist);
+expp     r1.y, r0.xxxx
+mov      r1.x, r1.yyyy
+expp     r1.y, r0.zzzz
+mov      r1.z, r1.yyyy
+expp     r1.y, r0.wwww
+mov      r1.w, r1.yyyy
+expp     r1.y, r0.yyyy
+//    dist *= kTwoPi;
+mul         r0, r1, c15.wwww;
+//    dist += -kPi;
+sub         r0, r0, c15.zzzz;
+
+//
+//    sincos(dist, sinDist, cosDist);
+// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+mul         r1, r0, r0; // r0^2
+mul         r2, r1, r0; // r0^3 - probably stall
+mul         r3, r1, r1; // r0^4
+mul         r4, r1, r2; // r0^5
+mul         r5, r2, r3; // r0^7
+
+mul         r1, r1, c14.yyyy;       // r1 = r0^2 * vCos.y
+mad         r2, r2, c13.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
+add         r1, r1, c14.xxxx;       // r1 = 1 + r0^2 * vCos.y
+mad         r2, r4, c13.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+mad         r1, r3, c14.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+
+// r0^7 & r0^6 terms
+mul         r4, r4, r0; // r0^6
+mad         r2, r5, c13.wwww, r2;
+mad         r1, r4, c14.wwww, r1;
+
+// Calc our depth based filtering here into r4 (because we don't use it again
+// after here, and we need our filtering shortly).
+sub         r4, c25, r6.zzzz;
+mul         r4, r4, c26;
+add         r4, r4, c27;
+// Clamp .xyz to range [0..1]
+min         r4.xyz, r4, c16.zzzz;
+max         r4.xyz, r4, c16.xxxx;
+
+// Calc our filter (see above).
+mul         r11, v5.wwww, c24;
+max         r11, r11, c16.xxxx;
+min         r11, r11, c16.zzzz;
+
+//mov    r2, r1;
+// r2 == sinDist
+// r1 == cosDist
+//    sinDist *= filter;
+mul         r2, r2, r11;
+//    sinDist *= kAmplitude.xyzw
+mul         r2, r2, c7;
+//    height = dp4(sinDist, kOne);
+//    accumPos.z += height; (but accumPos.z is currently 0).
+dp4         r8.x, r2, c16.zzzz;
+mul         r8.y, r8.x, r4.z;
+add         r8.z, r8.y, c25.w;
+max         r6.z, r6.z, r8.z; // CLAMP
+// r8.x == wave height relative to 0
+// r8.y == dampened wave relative to 0
+// r8.z == dampened wave height in world space
+// r6.z == wave height clamped to never go beneath ground level
+//
+//    cosDist *= kFreq.xyzw;
+mul         r1, r1, c5;
+//    cosDist *= kAmplitude.xyzw; // Combine?
+mul         r1, r1, c7;
+//    cosDist *= filter;
+mul         r1, r1, r11;
+//
+// accumCos = (0, 0, 0, 0);
+mov         r7, c16.xxxx;
+//    temp = dp4( cosDist, toCenter_X );
+//    accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
+dp4         r7.x, r1, -c8
+//
+//    temp = dp4( cosDist, toCenter_Y );
+//    accumCos.y += temp.xxxx;
+dp4         r7.y, r1, -c9
+//
+// }
+//
+// accumBin = (1, 0, -accumCos.x);
+// accumTan = (0, 1, -accumCos.y);
+// accumNorm = (accumCos.x, accumCos.y, 1);
+mov         r11, c16.xxzx;
+add         r11, r11, r7;
+dp3         r10.x, r11, r11;
+rsq         r10.x, r10.x;
+mul         r11, r11, r10.xxxx;
+
+//
+// // Scrunch in based on computed (normalized) normal
+// temp = mul( accumNorm, kNegScrunchScale ); // kNegScrunchScale = (-scrunchScale, -scrunchScale, 0, 0);
+// accumPos += temp;
+//dp3           r10.x, r11, c18.zxw; // winddir.x, winddir.y, 0, 0 // NUKE
+// r10.x tells us whether our normal is opposed to the wind.
+// If opposed, r10.x = 0, else r10.x = 1.f;
+// We'll use this to kill the Scrunch on the back sides of waves.
+// We use it for position right here, and then again for the
+// normal just down a bit further.
+//slt           r10.x, r10.x, c16.x; // NUKE
+//mov           r10.x, c16.z; // HACKAGE NUKE
+//mul           r9, r10.xxxx, r11; // NUKE
+
+// Add in our scrunch (offset in X/Y plane).
+// Scale down our scrunch amount by the wave scaling
+mul         r10.x, c12.y, r4.z;
+//mov   r10.x, c12.y; // NUKETEST TAKEOUT
+mad         r6.xy, r11.xy, r10.xx, r6.xy;
+
+//   mul            r6.z, r6.z, r10.xxxx; DEBUG
+
+//   mad         r6, r11, c12.yyzz, r6;
+
+// accumNorm = mul (accumNorm, kScrunchScale ); // kScrunchScale = (scrunchScale, scrunchScale, 1, 1);
+// accumCos *= (scrunchScale, scrunchScale, 0, 0);
+
+mul         r2.x, r6.z, c12.x;
+//mad           r2.x, r2.x, r10.x, c16.z; NUKE
+add         r2.x, r2.x, c16.z;
+mul         r2.x, r2.x, r4.z; // HACKAGE // NUKETEST BACKIN
+
+//   mul         r7, r7, c12.xxzz;
+mul         r7.xy, r7.xy, r2.xx;
+
+// This is actually wrong, but useful right now for visualizing the generated coords.
+// See below for correct version.
+
+sub         r3, c16.xxzz, r7.xyzz;
+
+//mov           oD0, r3; // SEENORM
+
+dp3         r8.x, r3, c18.zxww; // WAVEFACE
+mul         r8.x, r8.x, c12.w; // WAVEFACE
+max         r8.x, r8.x, c16.x; // WAVEFACE
+min         r8.x, r8.x, c16.z; // WAVEFACE
+//mov           r9.x, c12.z;
+//add           r9.x, r9.x, -c16.z;
+//mad           r8.x, r9.x, r8.x, c16.z; // WAVEFACE
+mul         r8.x, r8.x, -c16.z;
+add         r8.x, r8.x, c16.z;
+
+// Normalize?
+
+// We can either calculate an orthonormal basis from the
+// computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0),
+// or compute our basis directly from the partial derivatives, with
+// Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1)
+//
+// These work out to identically the same result, so we'll compute directly
+// from the partials because it takes 2 fewer instructions.
+//
+// Note that our basis is NOT orthonormal. The Normal is equal to
+// Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents
+// are both correct tangents to the surface, and their projections on the XY plane
+// are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications?
+// Not really. I'm actually not really sure which is more "proper" for bump mapping.
+//
+// Note also that we add when we should subtract and subtract when we should
+// add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows
+// of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for
+// explanation.
+//
+// Binormal = Y % Normal
+// Cross product3 is:
+//  mul     res.xyz, a.yzx, b.zxy
+//  mad     res.xyz, -a.zxy, b.yzx, res.xyz
+//   mul            r1.xyz, c16.zxx, r3.zxy;
+//   mad            r1.xyz, -c16.xxz, r3.yzx, r1.xyz;
+
+// Tangent = Normal % X
+//   mul            r2.xyz, r3.yzx, c16.xzx;
+//   mad            r2.xyz, -r3.zxy, c16.xxz, r2;
+
+add         r1, c16.zxxx, r7.zzxz;
+add         r2, c16.xzxx, r7.zzyz;
+
+
+// Note that we're swapping z and y to match our environment map tools in max.
+// We do this through our normal map transform (oT1, oT2, oT3), making it
+// a concatenation of:
+//
+//  rotate about Z (blue) to turn our map into the wind
+//  windRot =   |   dirY    -dirX   0 |
+//              |   dirX    dirY    0 |
+//              |   0       0       1 |
+//
+//  swap our Y and Z axes to match our environment map
+//  swapYZ  =   |   1       0       0 |
+//              |   0       0       1 |
+//              |   0       1       0 |
+//
+//  rotate the normal into the surface's tangent space basis
+//  basis   =   |   Bx      Tx      Nx |
+//              |   By      Ty      Ny |
+//              |   Bz      Tz      Nz |
+//
+//  Note that we've constucted the basis by taking advantage of the
+//  matrix being a pure rotation, as noted below, so r1, r2 and r3
+//  are actually constructed as:
+//  basis   =   |   Bx      -By     -Bz |
+//              |   -Tx     Ty      -Tz |
+//              |   -Nx     -Ny     -Nz |
+//
+//  Then the final normal map transform is:
+//
+//      basis * swapYZ * windRot [ * normal ]
+
+
+//   sub         r1.w, c17.x, r6.x;
+//   sub         r2.w, c17.z, r6.z;
+//   sub         r3.w, c17.y, r6.y;
+
+// Big note here. All this math can blow up if the camera position
+// is outside the environment sphere. It's assumed that's dealt
+// with in the app setting up the constants. For that reason, the
+// camera position used here might not be the real local camera position,
+// which is needed for the angular attenuation, so we burn another constant
+// with our pseudo-camera position. To restrain the pseudo-camera from
+// leaving the sphere, we make:
+//  pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R)
+// where dist = |realPos - envCenter|
+
+// So, our "finitized" eyeray is:
+//  camPos + D * t - envCenter = D * t - (envCenter - camPos)
+// with
+//  D = (pos - camPos) / |pos - camPos| // normalized usual eyeray
+// and
+//  t = D dot F + sqrt( (D dot F)^2 - G )
+// with
+//  F = (envCenter - camPos)    => c19.xyz
+//  G = F^2 - R^2               => c19.w
+//  R = environment radius.     => unused
+//
+// This all derives from the positive root of equation
+//  (camPos + (pos - camPos) * t - envCenter)^2 = R^2,
+// In other words, where on a sphere of radius R centered about envCenter
+// does the ray from the real camera position through this point hit.
+//
+// Note that F, G, and R are all constants (one point, two scalars).
+//
+// So first we calculate D into r0,
+// then D dot F into r10.x,
+// then (D dot F)^2 - G into r10.y
+// then rsq( (D dot F)^2 - G ) into r9.x;
+// then t = r10.z = r10.x + r10.y * r9.x;
+// and
+// r0 = D * t - (envCenter - camPos)
+//      = r0 * r10.zzzz - F;
+//
+sub         r0, r6, c17;
+dp3         r10.x, r0, r0;
+rsq         r10.x, r10.x;
+mul         r0, r0, r10.xxxx; // r0 = D
+
+dp3         r10.x, r0, c19; // r10.x = D dot F
+mad         r10.y, r10.x, r10.x, -c19.w; // r10.y = (D dot F)^2 - G
+
+rsq         r9.x, r10.y; // r9.x = 1/SQRT((D dot F)^2 - G)
+
+mad         r10.z, r10.y, r9.x, r10.x; // r10.z = D dot F + SQRT((D dot F)^2 - G)
+
+mad         r0.xyz, r0, r10.zzz, -c19.xyz; // r0.xyz = D * t - (envCenter - camPos)
+
+mov         r1.w, -r0.x;
+mov         r2.w, -r0.y;
+mov         r3.w, -r0.z;
+
+// Now rotate our basis vectors into the wind
+// This should be redone, and put our wind direction into
+// the water texture.
+dp3     r0.x, r1, c18.xyww;
+dp3     r0.y, r1, c18.zxww;
+mov     r1.xy, r0;
+
+dp3     r0.x, r2, c18.xyww;
+dp3     r0.y, r2, c18.zxww;
+mov     r2.xy, r0;
+
+dp3     r0.x, r3, c18.xyww;
+dp3     r0.y, r3, c18.zxww;
+mov     r3.xy, r0;
+
+mov         r0.zw, c16.zzxz;
+
+dp3         r0.x, r1, r1;
+rsq         r0.x, r0.x;
+mul         oT1, r1.xyzw, r0.xxxw;
+//   mul            r8, r1.xyzw, r0.xxxw; // VISUAL
+
+dp3         r0.x, r2, r2;
+rsq         r0.x, r0.x;
+mul         oT3, r2.xyzw, r0.xxxw;
+//   mul            r9, r2.xyzw, r0.xxxw; // VISUAL
+
+dp3         r0.x, r3, r3;
+rsq         r0.x, r0.x;
+mul         oT2, r3.xyzw, r0.xxxw;
+//   mul            r9, r3.xyzw, r0.xxxw; // VISUAL
+
+//  mul        r3, r3.xzyw, r0.xxxw;
+//  mul         r3.xy, r3, -c16.zzzz;
+
+
+/*
+// Want:
+//    oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x)
+//    oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y)
+//    ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z)
+// with BIN, TAN, and NORM normalized.
+// Unnormalized, we have
+//    BIN = (1, 0, -r7.x) where r7 == accumCos
+//    TAN = (0, 1, -r7.y)
+//    NORM= (r7.x, r7.y, 1)
+// So, unnormalized, we have
+//    oT1 = (1, 0, r7.x, view2pos.x)
+//    oT2 = (0, 1, r7.y, view2pos.y)
+//    oT3 = (-r7.x, -r7.y, 1, view2pos.z)
+// which is just reversing the signs on the accumCos
+// terms above. So the normalized version is just
+// reversing the signs on the normalized version above.
+*/
+//mov oT3, r4;
+
+//
+// // Transform position to screen
+//
+//
+//m4x3  r6, v0, c21; // HACKAGE
+//mov       r6.w, c16.z; // HACKAGE
+//m4x4     oPos, r6, c0; // ADDFOG
+m4x4        r9, r6, c0;
+add         r10.x, r9.w, c28.x;
+mul         oFog, r10.x, c28.y;
+//mov           oFog, c16.y; // TESTFOGHACK
+mov         oPos, r9;
+
+mov         oD0, c4; // SEENORM
+
+// Transform our uvw
+dp4         r0.x, v0, c10;
+dp4         r0.y, v0, c11;
+
+//mov           r0.zw, c16.xxxz;
+mov         oT0, r0
+
+// Questionble attenuation follows
+// Find vector from this point to camera and normalize
+sub         r0, c17, r6;
+dp3         r1.x, r0, r0;
+rsq         r1.x, r1.x;
+mul         r0, r0, r1.xxxx;
+// Dot that with the computed normal
+dp3         r1.x, r0, r11;
+mul         r1.x, r1.x, v5.z;
+//  dp3         r1.x, r0, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3
+// Map dot=1 => 0, dot=0 => 1
+sub         r1.xyzw, c16.zzzz, r1.xxxx;
+add         r1.w, r1.wwww, c16.zzzz;
+mul         r1.w, r1.wwww, c16.yyyy;
+// No need to clamp, since the destination register (in the pixel shader)
+// will saturate [0..1] anyway.
+//%%% mul           r1.w, r1.w, r4.x;
+//%%% mul           r1.xyz, r1.xyz, r4.yyy;
+mul r1, r1, r4.yyyx; // HACKTESTCOLOR
+mul r1.xyz, r1, r8.xxx; // WAVEFACE
+mul r1.w,   r1.wwww, v5.xxxx;
+mul         oD1, r1, c20;
+
+// mov oD1, r4.yyyy;
+
+//mov           oD1, c16.zzzz; // HACKAGE
+//  mov         oD1, r9;
+//  mov         oD1, r8.xzyw;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveFixedFin7.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveFixedFin7.inl
@ -1,437 +1,437 @@
-
-vs.1.1
-
-dcl_position v0
-dcl_color v5
-
-// Store our input position in world space in r6
-m4x3		r6, v0, c21; // v0 * l2w
-// Fill out our w (m4x3 doesn't touch w).
-mov			r6.w, c16.zzzz;
-
-//
-
-// Input diffuse v5 color is:
-// v5.r = overall transparency
-// v5.g = reflection strength (transparency)
-// v5.b = overall wave scaling
-//
-// v5.a is:
-// v5.w = 1/(2.f * edge length)
-// So per wave filtering is:
-// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
-// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
-// and is completely filtered at 2 times sampling frequency.
-
-// We'd like to make this autocalculated based on the depth of the water.
-// The frequency filtering (v5.w) still needs to be calculated offline, because
-// it's dependent on edge length, but the first 3 filterings can be calculated
-// based on this vertex.
-// Basically, we want the transparency, reflection strength, and wave scaling
-// to go to zero as the water depth goes to zero. Linear falloffs are as good
-// a place to start as any.
-//
-// depth = waterlevel - r6.z		=> depth in feet (may be negative)
-// depthNorm = depth / depthFalloff	=> zero at watertable, one at depthFalloff beneath
-// atten = minAtten + depthNorm * (maxAtten - minAtten);
-// These are all vector ops.
-// This provides separate ramp ups for each of the channels (they reach full unfiltered
-// values at different depths), but doesn't provide separate controls for where they
-// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
-// in feet (depth) is probably the most intuitive. So that changes the first calculation
-// of depth to:
-// depth = waterlevel - r6.z + offset
-//		= (waterlevel + offset) - r6.z
-// And since we only need offsets for 3 channels, we can make the waterlevel constant
-// waterlevel[chan] = watertableheight + offset[chan],
-// with waterlevel.w = watertableheight.
-//
-// So:
-//	c25 = waterlevel + offset
-//	c26 = (maxAtten - minAtten) / depthFalloff
-//	c27 = minAtten.
-// And in particular:
-//	c25.w = waterlevel
-//	c26.w = 1.f;
-//	c27.w = 0;
-// So r4.w is the depth of this vertex in feet.
-
-// Dot our position with our direction vectors.
-mul		r0, c8, r6.xxxx;
-mad		r0, c9, r6.yyyy, r0;
-
-//
-//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
-mul         r0, r0, c5;
-add			r0, r0, c6;
-//
-//    // Now we need dist mod'd into range [-Pi..Pi]
-//    dist *= rcp(kTwoPi);
-rcp         r4, c15.wwww;
-add			r0, r0, c15.zzzz;
-mul         r0, r0, r4;
-//    dist = frac(dist);
-expp     r1.y, r0.xxxx
-mov      r1.x, r1.yyyy
-expp     r1.y, r0.zzzz
-mov      r1.z, r1.yyyy
-expp     r1.y, r0.wwww
-mov      r1.w, r1.yyyy
-expp     r1.y, r0.yyyy
-//    dist *= kTwoPi;
-mul         r0, r1, c15.wwww;
-//    dist += -kPi;
-sub         r0, r0, c15.zzzz;
-
-//
-//    sincos(dist, sinDist, cosDist);
-// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-mul         r1, r0, r0; // r0^2
-mul         r2, r1, r0; // r0^3 - probably stall
-mul         r3, r1, r1; // r0^4
-mul         r4, r1, r2; // r0^5
-mul         r5, r2, r3; // r0^7
-
-mul         r1, r1, c14.yyyy;       // r1 = r0^2 * vCos.y
-mad         r2, r2, c13.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
-add         r1, r1, c14.xxxx;       // r1 = 1 + r0^2 * vCos.y
-mad         r2, r4, c13.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-mad         r1, r3, c14.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-
-// r0^7 & r0^6 terms
-mul         r4, r4, r0; // r0^6
-mad         r2, r5, c13.wwww, r2;
-mad         r1, r4, c14.wwww, r1;
-
-// Calc our depth based filtering here into r4 (because we don't use it again
-// after here, and we need our filtering shortly).
-sub			r4, c25, r6.zzzz;
-mul			r4, r4, c26;
-add			r4, r4, c27;
-// Clamp .xyz to range [0..1]
-min			r4.xyz, r4, c16.zzzz;
-max			r4.xyz, r4, c16.xxxx;
-
-// Calc our filter (see above).
-mul			r11, v5.wwww, c24;
-max			r11, r11, c16.xxxx;
-min			r11, r11, c16.zzzz;
-
-//mov    r2, r1;
-// r2 == sinDist
-// r1 == cosDist
-//    sinDist *= filter;
-mul         r2, r2, r11;
-//    sinDist *= kAmplitude.xyzw
-mul         r5, r2, c7;
-// r5 is now T = sum(Ai * sin())
-//    height = dp4(sinDist, kOne);
-//    accumPos.z += height; (but accumPos.z is currently 0).
-dp4         r8.x, r5, c16.zzzz;
-mul			r8.y, r8.x, r4.z;
-add			r8.z, r8.y, c25.w;
-max			r6.z, r6.z, r8.z; // CLAMP
-// r8.x == wave height relative to 0
-// r8.y == dampened wave relative to 0
-// r8.z == dampened wave height in world space
-// r6.z == wave height clamped to never go beneath ground level
-//
-//    cosDist *= kAmplitude.xyzw; // Combine?
-mul         r7, r1, c7;
-//    cosDist *= filter;
-mul         r7, r7, r11;
-// r7 is now M = sum(Ai * cos())
-
-// Okay, here we go:
-// W == sum(k w Dir.x^2 A sin())
-// V == sum(k w Dir.x Dir.y A sin())
-// U == sum(k w Dir.y^2 A sin())
-//
-// T == sum(A sin())
-//
-// S == sum(k Dir.x A cos())
-// R == sum(k Dir.y A cos())
-//
-// Q == sum(k w A cos())
-//
-// M == sum(A cos())
-//
-// P == sum(w Dir.x A cos())
-// N == sum(w Dir.y A cos())
-//
-// Then:
-// Pos = (in.x + S, in.y + R, waterheight + T)
-//
-// Bin = (1 - W, -V, P)
-// Tan = (-V, 1 - U, N)
-// Nor = (-P, -N, 1 - Q)
-//
-// But we want the transpose of that to go into r1-r3
-
-dp4			r10.x, r7, c29;
-add			r6.x, r6.x, r10.x;
-dp4			r10.x, r7, c30;
-add			r6.y, r6.y, r10.x;
-
-dp4			r1.x, r5, -c34;
-dp4			r2.x, r5, -c35;
-dp4			r3.x, r7, c31;
-add			r1.x, r1.xxxx, c16.zzzz;
-
-dp4			r1.y, r5, -c35;
-dp4			r2.y, r5, -c36;
-dp4			r3.y, r7, c32;
-add			r2.y, r2.yyyy, c16.zzzz;
-
-dp4			r1.z, r7, -c31;
-dp4			r2.z, r7, -c32;
-dp4			r3.z, r5, -c33;
-add			r3.z, r3.zzzz, c16.zzzz;
-
-
-// Calculate our normalized vector from camera to vtx.
-// We'll use that a couple of times coming up.
-sub			r5, r6, c17;
-dp3			r10.x, r5, r5;
-rsq			r10.x, r10.x;
-mul			r5, r5, r10.xxxx; // r0 = D
-rcp			r5.w, r10.x;
-
-// Calculate our specular attenuation from and into r5.w.
-// r5.w starts off the distance from vtx to camera.
-// Once we've turned it into an attenuation factor, we
-// scale the x and y of our normal map (through the transform bases)
-// so that in the distance, the normal map is flat. Note that the
-// geometry in the distance isn't necessarily flat. We want to apply
-// this scale to the normal read from the normal map before it is
-// transformed into surface space.
-add			r5.w, r5.w, c11.x;
-mul			r5.w, r5.w, c11.y;
-min			r5.w, r5.w, c16.z;
-max			r5.w, r5.w, c16.x;
-mul			r5.w, r5.w, r5.w; // Square it to account for perspective
-mul			r5.w, r5.w, c11.z;
-
-
-// Normalize?
-
-// We can either calculate an orthonormal basis from the
-// computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0),
-// or compute our basis directly from the partial derivatives, with
-// Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1)
-//
-// These work out to identically the same result, so we'll compute directly
-// from the partials because it takes 2 fewer instructions.
-//
-// Note that our basis is NOT orthonormal. The Normal is equal to
-// Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents
-// are both correct tangents to the surface, and their projections on the XY plane
-// are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications?
-// Not really. I'm actually not really sure which is more "proper" for bump mapping.
-//
-// Note also that we add when we should subtract and subtract when we should
-// add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows
-// of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for
-// explanation.
-//
-// Binormal = Y % Normal
-// Cross product3 is:
-//	mul		res.xyz, a.yzx, b.zxy
-//	mad		res.xyz, -a.zxy, b.yzx, res.xyz
-//   mul			r1.xyz, c16.zxx, r3.zxy;
-//   mad			r1.xyz, -c16.xxz, r3.yzx, r1.xyz;
-
-// Tangent = Normal % X
-//   mul			r2.xyz, r3.yzx, c16.xzx;
-//   mad			r2.xyz, -r3.zxy, c16.xxz, r2;
-
-//mad			r1, r5.wwww, c16.zxxx, r7.zzxz;
-//mad			r2, r5.wwww, c16.xzxx, r7.zzyz;
-//mul			r3.xy, r3.xy, r5.wwww;
-
-
-// Note that we're swapping z and y to match our environment map tools in max.
-// We do this through our normal map transform (oT1, oT2, oT3), making it
-// a concatenation of:
-//
-//	rotate about Z (blue) to turn our map into the wind
-//	windRot =	|	dirY	-dirX	0 |
-//				|	dirX	dirY	0 |
-//				|	0		0		1 |
-//
-//	swap our Y and Z axes to match our environment map
-//	swapYZ	=	|	1		0		0 |
-//				|	0		0		1 |
-//				|	0		1		0 |
-//
-//	rotate the normal into the surface's tangent space basis
-//	basis	=	|	Bx		Tx		Nx |
-//				|	By		Ty		Ny |
-//				|	Bz		Tz		Nz |
-//
-//	Note that we've constucted the basis by taking advantage of the
-//	matrix being a pure rotation, as noted below, so r1, r2 and r3
-//	are actually constructed as:
-//	basis	=	|	Bx		-By		-Bz |
-//				|	-Tx		Ty		-Tz |
-//				|	-Nx		-Ny		-Nz |
-//
-//	Then the final normal map transform is:
-//
-//		basis * swapYZ * windRot [ * normal ]
-
-
-//   sub         r1.w, c17.x, r6.x;
-//   sub         r2.w, c17.z, r6.z;
-//   sub         r3.w, c17.y, r6.y;
-
-// Big note here. All this math can blow up if the camera position
-// is outside the environment sphere. It's assumed that's dealt
-// with in the app setting up the constants. For that reason, the
-// camera position used here might not be the real local camera position,
-// which is needed for the angular attenuation, so we burn another constant
-// with our pseudo-camera position. To restrain the pseudo-camera from
-// leaving the sphere, we make:
-//	pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R)
-// where dist = |realPos - envCenter|
-
-// So, our "finitized" eyeray is:
-//	camPos + D * t - envCenter = D * t - (envCenter - camPos)
-// with
-//	D = (pos - camPos) / |pos - camPos| // normalized usual eyeray
-// and
-//	t = D dot F + sqrt( (D dot F)^2 - G )
-// with
-//	F = (envCenter - camPos)	=> c19.xyz
-//	G = F^2 - R^2				=> c19.w
-//	R = environment radius.		=> unused
-//
-// This all derives from the positive root of equation
-//	(camPos + (pos - camPos) * t - envCenter)^2 = R^2,
-// In other words, where on a sphere of radius R centered about envCenter
-// does the ray from the real camera position through this point hit.
-//
-// Note that F, G, and R are all constants (one point, two scalars).
-//
-// So first we calculate D into r0,
-// then D dot F into r10.x,
-// then (D dot F)^2 - G into r10.y
-// then rsq( (D dot F)^2 - G ) into r9.x;
-// then t = r10.z = r10.x + r10.y * r9.x;
-// and
-// r0 = D * t - (envCenter - camPos)
-//		= r0 * r10.zzzz - F;
-//
-mov			r0, r5; // r0 = D
-
-dp3			r10.x, r0, c19; // r10.x = D dot F
-mad			r10.y, r10.x, r10.x, -c19.w; // r10.y = (D dot F)^2 - G
-
-rsq			r9.x, r10.y; // r9.x = 1/SQRT((D dot F)^2 - G)
-
-mad			r10.z, r10.y, r9.x, r10.x; // r10.z = D dot F + SQRT((D dot F)^2 - G)
-
-mad			r0.xyz, r0, r10.zzz, -c19.xyz; // r0.xyz = D * t - (envCenter - camPos)
-
-// ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump.
-dp3			r10.x, r0, r0;
-rsq			r9.x, r10.x;
-mul			r0.xyz, r0.xyz, r9.xxx;
-
-mov			r1.w, -r0.x;
-mov			r2.w, -r0.y;
-mov			r3.w, -r0.z;
-
-mov			r0.zw, c16.zzxz;
-
-dp3         r0.x, r1, r1;
-rsq         r0.xy, r0.x;
-mul			r0.x, r0.x, r5.w;
-mul         oT1, r1.xyzw, r0.xxyw;
-//   mul			r8, r1.xyzw, r0.xxxw; // VISUAL
-mul			r11.x, r1.z, r0.y;
-
-
-dp3         r0.x, r2, r2;
-rsq         r0.xy, r0.x;
-mul			r0.x, r0.x, r5.w;
-mul         oT3, r2.xyzw, r0.xxyw;
-//   mul			r9, r2.xyzw, r0.xxxw; // VISUAL
-mul			r11.y, r2.z, r0.y;
-
-dp3         r0.x, r3, r3;
-rsq         r0.xy, r0.x;
-mul			r0.x, r0.x, r5.w;
-mul         oT2, r3.xyzw, r0.xxyw;
-//   mul			r9, r3.xyzw, r0.xxxw; // VISUAL
-mul			r11.z, r3.z, r0.y;
-
-
-/*
-// Want:
-//    oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x)
-//    oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y)
-//    ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z)
-// with BIN, TAN, and NORM normalized.
-// Unnormalized, we have
-//    BIN = (1, 0, -r7.x) where r7 == accumCos
-//    TAN = (0, 1, -r7.y)
-//    NORM= (r7.x, r7.y, 1)
-// So, unnormalized, we have
-//    oT1 = (1, 0, r7.x, view2pos.x)
-//    oT2 = (0, 1, r7.y, view2pos.y)
-//    oT3 = (-r7.x, -r7.y, 1, view2pos.z)
-// which is just reversing the signs on the accumCos
-// terms above. So the normalized version is just
-// reversing the signs on the normalized version above.
-*/
-//mov oT3, r4;
-
-//
-// // Transform position to screen
-//
-//
-//m4x3	r6, v0, c21; // HACKAGE
-//mov		r6.w, c16.z; // HACKAGE
-//m4x4     oPos, r6, c0; // ADDFOG
-m4x4		r9, r6, c0;
-add			r10.x, r9.w, c28.x;
-mul			oFog, r10.x, c28.y;
-//mov			oFog, c16.zzzz; // TESTFOGHACK
-mov			oPos, r9;
-
-// Transform our uvw
-mul			r0.x, v0.xxxx, c10.xxxx;
-mul			r0.y, v0.yyyy, c10.xxxx;
-
-//mov			r0.zw, c16.xxxz;
-mov			oT0, r0
-
-// Questionble attenuation follows
-// vector from this point to camera and normalize stashed in r5
-// Dot that with the computed normal
-dp3			r1.x, -r5, r11;
-mul			r1.x, r1.x, v5.z;
-//	dp3			r1.x, r5, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3
-// Map dot=1 => 0, dot=0 => 1
-sub			r1.xyzw, c16.zzzz, r1.xxxx;
-add			r1.w, r1.wwww, c16.zzzz;
-mul			r1.w, r1.wwww, c16.yyyy;
-// No need to clamp, since the destination register (in the pixel shader)
-// will saturate [0..1] anyway.
-//%%% mul			r1.w, r1.w, r4.x;
-//%%% mul			r1.xyz, r1.xyz, r4.yyy;
-mul r1, r1, r4.yyyx; // HACKTESTCOLOR
-//mul	r1.xyz, r1, r8.xxx; // WAVEFACE
-mul r1.w,	r1.wwww, v5.xxxx;
-mul r1.w,	r1.wwww, c4.wwww;
-mul			oD0, r1, c20;
-
-mov         oD1, c4; // SEENORM
-//mov oD1, c16.xxxx;
-// mov oD1, r4.yyyy;
-
-//mov			oD1, c16.zzzz; // HACKAGE
-//	mov			oD1, r9;
-//	mov			oD1, r8.xzyw;
+
+vs.1.1
+
+dcl_position v0
+dcl_color v5
+
+// Store our input position in world space in r6
+m4x3        r6, v0, c21; // v0 * l2w
+// Fill out our w (m4x3 doesn't touch w).
+mov         r6.w, c16.zzzz;
+
+//
+
+// Input diffuse v5 color is:
+// v5.r = overall transparency
+// v5.g = reflection strength (transparency)
+// v5.b = overall wave scaling
+//
+// v5.a is:
+// v5.w = 1/(2.f * edge length)
+// So per wave filtering is:
+// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
+// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
+// and is completely filtered at 2 times sampling frequency.
+
+// We'd like to make this autocalculated based on the depth of the water.
+// The frequency filtering (v5.w) still needs to be calculated offline, because
+// it's dependent on edge length, but the first 3 filterings can be calculated
+// based on this vertex.
+// Basically, we want the transparency, reflection strength, and wave scaling
+// to go to zero as the water depth goes to zero. Linear falloffs are as good
+// a place to start as any.
+//
+// depth = waterlevel - r6.z        => depth in feet (may be negative)
+// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
+// atten = minAtten + depthNorm * (maxAtten - minAtten);
+// These are all vector ops.
+// This provides separate ramp ups for each of the channels (they reach full unfiltered
+// values at different depths), but doesn't provide separate controls for where they
+// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
+// in feet (depth) is probably the most intuitive. So that changes the first calculation
+// of depth to:
+// depth = waterlevel - r6.z + offset
+//      = (waterlevel + offset) - r6.z
+// And since we only need offsets for 3 channels, we can make the waterlevel constant
+// waterlevel[chan] = watertableheight + offset[chan],
+// with waterlevel.w = watertableheight.
+//
+// So:
+//  c25 = waterlevel + offset
+//  c26 = (maxAtten - minAtten) / depthFalloff
+//  c27 = minAtten.
+// And in particular:
+//  c25.w = waterlevel
+//  c26.w = 1.f;
+//  c27.w = 0;
+// So r4.w is the depth of this vertex in feet.
+
+// Dot our position with our direction vectors.
+mul     r0, c8, r6.xxxx;
+mad     r0, c9, r6.yyyy, r0;
+
+//
+//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
+mul         r0, r0, c5;
+add         r0, r0, c6;
+//
+//    // Now we need dist mod'd into range [-Pi..Pi]
+//    dist *= rcp(kTwoPi);
+rcp         r4, c15.wwww;
+add         r0, r0, c15.zzzz;
+mul         r0, r0, r4;
+//    dist = frac(dist);
+expp     r1.y, r0.xxxx
+mov      r1.x, r1.yyyy
+expp     r1.y, r0.zzzz
+mov      r1.z, r1.yyyy
+expp     r1.y, r0.wwww
+mov      r1.w, r1.yyyy
+expp     r1.y, r0.yyyy
+//    dist *= kTwoPi;
+mul         r0, r1, c15.wwww;
+//    dist += -kPi;
+sub         r0, r0, c15.zzzz;
+
+//
+//    sincos(dist, sinDist, cosDist);
+// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+mul         r1, r0, r0; // r0^2
+mul         r2, r1, r0; // r0^3 - probably stall
+mul         r3, r1, r1; // r0^4
+mul         r4, r1, r2; // r0^5
+mul         r5, r2, r3; // r0^7
+
+mul         r1, r1, c14.yyyy;       // r1 = r0^2 * vCos.y
+mad         r2, r2, c13.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
+add         r1, r1, c14.xxxx;       // r1 = 1 + r0^2 * vCos.y
+mad         r2, r4, c13.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+mad         r1, r3, c14.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+
+// r0^7 & r0^6 terms
+mul         r4, r4, r0; // r0^6
+mad         r2, r5, c13.wwww, r2;
+mad         r1, r4, c14.wwww, r1;
+
+// Calc our depth based filtering here into r4 (because we don't use it again
+// after here, and we need our filtering shortly).
+sub         r4, c25, r6.zzzz;
+mul         r4, r4, c26;
+add         r4, r4, c27;
+// Clamp .xyz to range [0..1]
+min         r4.xyz, r4, c16.zzzz;
+max         r4.xyz, r4, c16.xxxx;
+
+// Calc our filter (see above).
+mul         r11, v5.wwww, c24;
+max         r11, r11, c16.xxxx;
+min         r11, r11, c16.zzzz;
+
+//mov    r2, r1;
+// r2 == sinDist
+// r1 == cosDist
+//    sinDist *= filter;
+mul         r2, r2, r11;
+//    sinDist *= kAmplitude.xyzw
+mul         r5, r2, c7;
+// r5 is now T = sum(Ai * sin())
+//    height = dp4(sinDist, kOne);
+//    accumPos.z += height; (but accumPos.z is currently 0).
+dp4         r8.x, r5, c16.zzzz;
+mul         r8.y, r8.x, r4.z;
+add         r8.z, r8.y, c25.w;
+max         r6.z, r6.z, r8.z; // CLAMP
+// r8.x == wave height relative to 0
+// r8.y == dampened wave relative to 0
+// r8.z == dampened wave height in world space
+// r6.z == wave height clamped to never go beneath ground level
+//
+//    cosDist *= kAmplitude.xyzw; // Combine?
+mul         r7, r1, c7;
+//    cosDist *= filter;
+mul         r7, r7, r11;
+// r7 is now M = sum(Ai * cos())
+
+// Okay, here we go:
+// W == sum(k w Dir.x^2 A sin())
+// V == sum(k w Dir.x Dir.y A sin())
+// U == sum(k w Dir.y^2 A sin())
+//
+// T == sum(A sin())
+//
+// S == sum(k Dir.x A cos())
+// R == sum(k Dir.y A cos())
+//
+// Q == sum(k w A cos())
+//
+// M == sum(A cos())
+//
+// P == sum(w Dir.x A cos())
+// N == sum(w Dir.y A cos())
+//
+// Then:
+// Pos = (in.x + S, in.y + R, waterheight + T)
+//
+// Bin = (1 - W, -V, P)
+// Tan = (-V, 1 - U, N)
+// Nor = (-P, -N, 1 - Q)
+//
+// But we want the transpose of that to go into r1-r3
+
+dp4         r10.x, r7, c29;
+add         r6.x, r6.x, r10.x;
+dp4         r10.x, r7, c30;
+add         r6.y, r6.y, r10.x;
+
+dp4         r1.x, r5, -c34;
+dp4         r2.x, r5, -c35;
+dp4         r3.x, r7, c31;
+add         r1.x, r1.xxxx, c16.zzzz;
+
+dp4         r1.y, r5, -c35;
+dp4         r2.y, r5, -c36;
+dp4         r3.y, r7, c32;
+add         r2.y, r2.yyyy, c16.zzzz;
+
+dp4         r1.z, r7, -c31;
+dp4         r2.z, r7, -c32;
+dp4         r3.z, r5, -c33;
+add         r3.z, r3.zzzz, c16.zzzz;
+
+
+// Calculate our normalized vector from camera to vtx.
+// We'll use that a couple of times coming up.
+sub         r5, r6, c17;
+dp3         r10.x, r5, r5;
+rsq         r10.x, r10.x;
+mul         r5, r5, r10.xxxx; // r0 = D
+rcp         r5.w, r10.x;
+
+// Calculate our specular attenuation from and into r5.w.
+// r5.w starts off the distance from vtx to camera.
+// Once we've turned it into an attenuation factor, we
+// scale the x and y of our normal map (through the transform bases)
+// so that in the distance, the normal map is flat. Note that the
+// geometry in the distance isn't necessarily flat. We want to apply
+// this scale to the normal read from the normal map before it is
+// transformed into surface space.
+add         r5.w, r5.w, c11.x;
+mul         r5.w, r5.w, c11.y;
+min         r5.w, r5.w, c16.z;
+max         r5.w, r5.w, c16.x;
+mul         r5.w, r5.w, r5.w; // Square it to account for perspective
+mul         r5.w, r5.w, c11.z;
+
+
+// Normalize?
+
+// We can either calculate an orthonormal basis from the
+// computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0),
+// or compute our basis directly from the partial derivatives, with
+// Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1)
+//
+// These work out to identically the same result, so we'll compute directly
+// from the partials because it takes 2 fewer instructions.
+//
+// Note that our basis is NOT orthonormal. The Normal is equal to
+// Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents
+// are both correct tangents to the surface, and their projections on the XY plane
+// are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications?
+// Not really. I'm actually not really sure which is more "proper" for bump mapping.
+//
+// Note also that we add when we should subtract and subtract when we should
+// add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows
+// of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for
+// explanation.
+//
+// Binormal = Y % Normal
+// Cross product3 is:
+//  mul     res.xyz, a.yzx, b.zxy
+//  mad     res.xyz, -a.zxy, b.yzx, res.xyz
+//   mul            r1.xyz, c16.zxx, r3.zxy;
+//   mad            r1.xyz, -c16.xxz, r3.yzx, r1.xyz;
+
+// Tangent = Normal % X
+//   mul            r2.xyz, r3.yzx, c16.xzx;
+//   mad            r2.xyz, -r3.zxy, c16.xxz, r2;
+
+//mad           r1, r5.wwww, c16.zxxx, r7.zzxz;
+//mad           r2, r5.wwww, c16.xzxx, r7.zzyz;
+//mul           r3.xy, r3.xy, r5.wwww;
+
+
+// Note that we're swapping z and y to match our environment map tools in max.
+// We do this through our normal map transform (oT1, oT2, oT3), making it
+// a concatenation of:
+//
+//  rotate about Z (blue) to turn our map into the wind
+//  windRot =   |   dirY    -dirX   0 |
+//              |   dirX    dirY    0 |
+//              |   0       0       1 |
+//
+//  swap our Y and Z axes to match our environment map
+//  swapYZ  =   |   1       0       0 |
+//              |   0       0       1 |
+//              |   0       1       0 |
+//
+//  rotate the normal into the surface's tangent space basis
+//  basis   =   |   Bx      Tx      Nx |
+//              |   By      Ty      Ny |
+//              |   Bz      Tz      Nz |
+//
+//  Note that we've constucted the basis by taking advantage of the
+//  matrix being a pure rotation, as noted below, so r1, r2 and r3
+//  are actually constructed as:
+//  basis   =   |   Bx      -By     -Bz |
+//              |   -Tx     Ty      -Tz |
+//              |   -Nx     -Ny     -Nz |
+//
+//  Then the final normal map transform is:
+//
+//      basis * swapYZ * windRot [ * normal ]
+
+
+//   sub         r1.w, c17.x, r6.x;
+//   sub         r2.w, c17.z, r6.z;
+//   sub         r3.w, c17.y, r6.y;
+
+// Big note here. All this math can blow up if the camera position
+// is outside the environment sphere. It's assumed that's dealt
+// with in the app setting up the constants. For that reason, the
+// camera position used here might not be the real local camera position,
+// which is needed for the angular attenuation, so we burn another constant
+// with our pseudo-camera position. To restrain the pseudo-camera from
+// leaving the sphere, we make:
+//  pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R)
+// where dist = |realPos - envCenter|
+
+// So, our "finitized" eyeray is:
+//  camPos + D * t - envCenter = D * t - (envCenter - camPos)
+// with
+//  D = (pos - camPos) / |pos - camPos| // normalized usual eyeray
+// and
+//  t = D dot F + sqrt( (D dot F)^2 - G )
+// with
+//  F = (envCenter - camPos)    => c19.xyz
+//  G = F^2 - R^2               => c19.w
+//  R = environment radius.     => unused
+//
+// This all derives from the positive root of equation
+//  (camPos + (pos - camPos) * t - envCenter)^2 = R^2,
+// In other words, where on a sphere of radius R centered about envCenter
+// does the ray from the real camera position through this point hit.
+//
+// Note that F, G, and R are all constants (one point, two scalars).
+//
+// So first we calculate D into r0,
+// then D dot F into r10.x,
+// then (D dot F)^2 - G into r10.y
+// then rsq( (D dot F)^2 - G ) into r9.x;
+// then t = r10.z = r10.x + r10.y * r9.x;
+// and
+// r0 = D * t - (envCenter - camPos)
+//      = r0 * r10.zzzz - F;
+//
+mov         r0, r5; // r0 = D
+
+dp3         r10.x, r0, c19; // r10.x = D dot F
+mad         r10.y, r10.x, r10.x, -c19.w; // r10.y = (D dot F)^2 - G
+
+rsq         r9.x, r10.y; // r9.x = 1/SQRT((D dot F)^2 - G)
+
+mad         r10.z, r10.y, r9.x, r10.x; // r10.z = D dot F + SQRT((D dot F)^2 - G)
+
+mad         r0.xyz, r0, r10.zzz, -c19.xyz; // r0.xyz = D * t - (envCenter - camPos)
+
+// ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump.
+dp3         r10.x, r0, r0;
+rsq         r9.x, r10.x;
+mul         r0.xyz, r0.xyz, r9.xxx;
+
+mov         r1.w, -r0.x;
+mov         r2.w, -r0.y;
+mov         r3.w, -r0.z;
+
+mov         r0.zw, c16.zzxz;
+
+dp3         r0.x, r1, r1;
+rsq         r0.xy, r0.x;
+mul         r0.x, r0.x, r5.w;
+mul         oT1, r1.xyzw, r0.xxyw;
+//   mul            r8, r1.xyzw, r0.xxxw; // VISUAL
+mul         r11.x, r1.z, r0.y;
+
+
+dp3         r0.x, r2, r2;
+rsq         r0.xy, r0.x;
+mul         r0.x, r0.x, r5.w;
+mul         oT3, r2.xyzw, r0.xxyw;
+//   mul            r9, r2.xyzw, r0.xxxw; // VISUAL
+mul         r11.y, r2.z, r0.y;
+
+dp3         r0.x, r3, r3;
+rsq         r0.xy, r0.x;
+mul         r0.x, r0.x, r5.w;
+mul         oT2, r3.xyzw, r0.xxyw;
+//   mul            r9, r3.xyzw, r0.xxxw; // VISUAL
+mul         r11.z, r3.z, r0.y;
+
+
+/*
+// Want:
+//    oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x)
+//    oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y)
+//    ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z)
+// with BIN, TAN, and NORM normalized.
+// Unnormalized, we have
+//    BIN = (1, 0, -r7.x) where r7 == accumCos
+//    TAN = (0, 1, -r7.y)
+//    NORM= (r7.x, r7.y, 1)
+// So, unnormalized, we have
+//    oT1 = (1, 0, r7.x, view2pos.x)
+//    oT2 = (0, 1, r7.y, view2pos.y)
+//    oT3 = (-r7.x, -r7.y, 1, view2pos.z)
+// which is just reversing the signs on the accumCos
+// terms above. So the normalized version is just
+// reversing the signs on the normalized version above.
+*/
+//mov oT3, r4;
+
+//
+// // Transform position to screen
+//
+//
+//m4x3  r6, v0, c21; // HACKAGE
+//mov       r6.w, c16.z; // HACKAGE
+//m4x4     oPos, r6, c0; // ADDFOG
+m4x4        r9, r6, c0;
+add         r10.x, r9.w, c28.x;
+mul         oFog, r10.x, c28.y;
+//mov           oFog, c16.zzzz; // TESTFOGHACK
+mov         oPos, r9;
+
+// Transform our uvw
+mul         r0.x, v0.xxxx, c10.xxxx;
+mul         r0.y, v0.yyyy, c10.xxxx;
+
+//mov           r0.zw, c16.xxxz;
+mov         oT0, r0
+
+// Questionble attenuation follows
+// vector from this point to camera and normalize stashed in r5
+// Dot that with the computed normal
+dp3         r1.x, -r5, r11;
+mul         r1.x, r1.x, v5.z;
+//  dp3         r1.x, r5, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3
+// Map dot=1 => 0, dot=0 => 1
+sub         r1.xyzw, c16.zzzz, r1.xxxx;
+add         r1.w, r1.wwww, c16.zzzz;
+mul         r1.w, r1.wwww, c16.yyyy;
+// No need to clamp, since the destination register (in the pixel shader)
+// will saturate [0..1] anyway.
+//%%% mul           r1.w, r1.w, r4.x;
+//%%% mul           r1.xyz, r1.xyz, r4.yyy;
+mul r1, r1, r4.yyyx; // HACKTESTCOLOR
+//mul   r1.xyz, r1, r8.xxx; // WAVEFACE
+mul r1.w,   r1.wwww, v5.xxxx;
+mul r1.w,   r1.wwww, c4.wwww;
+mul         oD0, r1, c20;
+
+mov         oD1, c4; // SEENORM
+//mov oD1, c16.xxxx;
+// mov oD1, r4.yyyy;
+
+//mov           oD1, c16.zzzz; // HACKAGE
+//  mov         oD1, r9;
+//  mov         oD1, r8.xzyw;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveGraph2.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveGraph2.inl
@ -1,166 +1,166 @@
-
-vs.1.1
-
-dcl_position v0
-dcl_normal v3
-
-// c0 = (0,0.5,1.0,2.0) (aka NumericConsts)
-// c1 = frequencies
-// c2 = phases
-// c3 = amplitudes
-
-// c4 = PiConsts = (1/(2PI), PI/2, PI, 2*PI) // NOTE THIS IS DIFFERENT
-//		because we don't need oonsqpi here but do want 1/2Pi.
-// c5 = cosConsts = (1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f);
-
-// c6 = ((cMax - cMin), cMin, 2ndLayerVOffset, 2ndLayerScale);
-// c7 = overall color, including current opacity. Will
-//		probably only use the opacity, which we could stuff into
-//		the free slot of c6, but we're a wuss.
-
-// First, "move" the position to oPos
-mov r0, v0;
-//mov r0.y, -r0.yyyy;
-mov r0.w, c0.zzzz;
-mov oPos, r0;
-
-// Now the tricky part.
-
-// The base layer defines the shape of the incoming wave
-// The next layer has bubbles (noise) and moves in when the
-//		wave is moving in, moves out when wave is moving out.
-// So calculate uvw for first layer, second uvw shares u val
-//		and v val is const
-
-// The .x component of the normal
-// tells us how much to shift this vert based on the
-// cumulative cosine wave.
-
-// Figure c = Sigma((cosine(v0.x * freq + phase) + 1) * amp);
-// Note that range c must be [0..1]
-// Also, c(-1) must equal c(1) so it will wrap.
-// That implies freq = k * 2 * PI, where k is an integer.
-// To keep c >= 0, we can add 1 to each term in the sigma BEFORE
-// modulating by the amplitude.
-// That puts our range at [0..2*sigma(amp)], so as long as
-// sigma(amp) <= 0.5, we're fine.
-
-// Get our input to cosine value (v0.x * freq + phase).
-add		r0, v0.xxxx, c0.zzzz;
-mul		r0, r0, c1;
-add		r0, r0, c2;
-
-// Get it into range [-Pi..Pi]
-// First divide out the 2PI
-// add			r0, r0, c4.zzzz; HACKOUT
-mul         r0, r0, c4.xxxx;
-
-// Do an integer mod
-expp		r1.y, r0.xxxx
-mov			r1.x, r1.yyyy
-expp		r1.y, r0.zzzz
-mov			r1.z, r1.yyyy
-expp		r1.y, r0.wwww
-mov			r1.w, r1.yyyy
-expp		r1.y, r0.yyyy
-
-//mov oD1, r1; // HACKTEST
-//mov oD1.w, c0.zzzz; // HACKTEST
-
-// Move back into PI space, w/ *= 2P, -= PI
-mul         r0, r1, c4.wwww;
-sub         r0, r0, c4.zzzz;
-
-// Okay, compute cosine here.
-// cos = 1 + r0^2 * kCos.y + r0^4 * kCos.Z + r0^6 * kCos.w
-// Note: could pare off an instr by putting 1/kCos.w in kCos.x,
-// then doing a mad to get r3=(1/kCos.w + r0^6), then mad that
-// into the accum by kCos.w to get (1 + r0^6*kCos.x). But who cares.
-mul			r1, r0, r0; // r0^2
-mul			r2, r1, r1; // r0^4
-mul			r3, r1, r2; // r0^6
-
-mov			r4, c5.xxxx;			// r4 = 1
-mad			r4, r1, c5.yyyy, r4;	// r4 += r0^2 * kCos.y
-mad			r4, r2, c5.zzzz, r4;	// r4 += r0^4 * kCos.z
-mad			r4, r3, c5.wwww, r4;	// r4 += r0^6 * kCos.w
-
-add			r4, r4, c0.zzzz;	// shift from [-1..1] to [0..2]
-//mov	r4, c0.xxxx; // HACKLAST
-mul			r4, r4, c3;			// times amplitude
-
-dp4			r5.y, r4, c0.zzzz; // r5.x = sigma((cos() + 1) * amp);
-
-// V calculation, goes something like:
-// For layers 0 and 2:
-//		V = { 1 + c6.z	<= r5.y = 0 } * norm.x // norm.x == v3.x
-//			{ 1 + 0		<= r5.y = 1 }
-// For layer 1:
-//		V = (norm.x + c6.z) * c6.w // Scaled like U
-//
-// Another way to formulate that is
-// baseV = cMin + sinAge * (cMax-cMin) where
-//		cMin = 2
-//		cMax = 1
-//		sinAge = color.a = c7.w
-// delV = sigma(cos) = r5.y
-// Then
-//		V0 = V2 = (baseV + delV) * v3.x
-//		V1 = (norm.x + baseV + delV) * c6.w
-//
-// If we're sure we want cMin = 2 and cMax = 1, then it simplifies to:
-//	baseV = 2 - sinAge = c0.w - c7.w
-//	delV = r5.y
-//  (baseV + delV) = c0.w - c7.w + r5.y
-//
-// If we want to stay general, then
-//	baseV = c6.x * c7.w + c6.y
-//	delV = -r5.y
-//	(baseV + delV) = constant + r5.y
-//
-
-// make r5.y = (baseV + delV)
-add			r5.y, c6.xxxx, r5.yyyy;
-
-//mov oD1, r5.yyyy; // HACKLAST
-//mov oD1.w, c0.zzzz; // HACKLAST
-
-// U is input U (or v0.x * 0.5f + 0.5f)
-mul			r5.x, v0.x, c0.y;
-add			r5.x, r5.x, c0.y;
-
-// Fill out wq.
-mov			r5.zw, c0.xz;
-
-mul			oT0, r5, v3.wxww;
-// mov oD1, r5.yyyw; // HACKTEST
-mul			oT2, r5, v3.wxww;
-
-// Second uv shares u, but v is norm.x + c6.x;
-// Then we scale it.
-// If we want the bubble texture to move with the
-// wave front, we want the second UV calc (RESCALE1).
-// But it looks better to have the bubbles moving
-// slightly faster than the wave front. RESCALE0
-// happens to do that, because we're scaling the
-// texture by a factor of 2, but we should probably
-// supply an independent scale of the motion vs. the
-// scale of the texture.
-// Let's move c6 to r6 for ease of use.
-mov				r6, c6;
-// add			r5.x, r5.x, c6.y;
-// add			r5.y, c6.xxxx, v3.xxxx; // RESCALE0
-// mul			r5.xy, r5, c6.wwww;		// RESCALE0
-add			r5.x, r5.x, r6.y;	// RESCALE1 // offset U
-mov			r5.y, v3.xx;		// RESCALE1 // Init V to value stashed in normal.x
-mul			r5.xy, r5, r6.wwww;	// RESCALE1 // scale them by single scale value
-mad			r5.y, r6.xx, r6.zz, r5.yy;	// RESCALE1 // add in our scaled V offset (sinage * vScale)
-mov			oT1, r5;
-
-//mov	oT0, v7; // HACKTEST
-//mov oT1, v7; // HACKTEST
-//mov oT2, v7; // HACKTEST
-
-// Just slam in the constant color (includes our current opacity).
-mov			oD0, c7;
-//mov	oD0, c0.zzzz; // HACKTEST
+
+vs.1.1
+
+dcl_position v0
+dcl_normal v3
+
+// c0 = (0,0.5,1.0,2.0) (aka NumericConsts)
+// c1 = frequencies
+// c2 = phases
+// c3 = amplitudes
+
+// c4 = PiConsts = (1/(2PI), PI/2, PI, 2*PI) // NOTE THIS IS DIFFERENT
+//      because we don't need oonsqpi here but do want 1/2Pi.
+// c5 = cosConsts = (1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f);
+
+// c6 = ((cMax - cMin), cMin, 2ndLayerVOffset, 2ndLayerScale);
+// c7 = overall color, including current opacity. Will
+//      probably only use the opacity, which we could stuff into
+//      the free slot of c6, but we're a wuss.
+
+// First, "move" the position to oPos
+mov r0, v0;
+//mov r0.y, -r0.yyyy;
+mov r0.w, c0.zzzz;
+mov oPos, r0;
+
+// Now the tricky part.
+
+// The base layer defines the shape of the incoming wave
+// The next layer has bubbles (noise) and moves in when the
+//      wave is moving in, moves out when wave is moving out.
+// So calculate uvw for first layer, second uvw shares u val
+//      and v val is const
+
+// The .x component of the normal
+// tells us how much to shift this vert based on the
+// cumulative cosine wave.
+
+// Figure c = Sigma((cosine(v0.x * freq + phase) + 1) * amp);
+// Note that range c must be [0..1]
+// Also, c(-1) must equal c(1) so it will wrap.
+// That implies freq = k * 2 * PI, where k is an integer.
+// To keep c >= 0, we can add 1 to each term in the sigma BEFORE
+// modulating by the amplitude.
+// That puts our range at [0..2*sigma(amp)], so as long as
+// sigma(amp) <= 0.5, we're fine.
+
+// Get our input to cosine value (v0.x * freq + phase).
+add     r0, v0.xxxx, c0.zzzz;
+mul     r0, r0, c1;
+add     r0, r0, c2;
+
+// Get it into range [-Pi..Pi]
+// First divide out the 2PI
+// add          r0, r0, c4.zzzz; HACKOUT
+mul         r0, r0, c4.xxxx;
+
+// Do an integer mod
+expp        r1.y, r0.xxxx
+mov         r1.x, r1.yyyy
+expp        r1.y, r0.zzzz
+mov         r1.z, r1.yyyy
+expp        r1.y, r0.wwww
+mov         r1.w, r1.yyyy
+expp        r1.y, r0.yyyy
+
+//mov oD1, r1; // HACKTEST
+//mov oD1.w, c0.zzzz; // HACKTEST
+
+// Move back into PI space, w/ *= 2P, -= PI
+mul         r0, r1, c4.wwww;
+sub         r0, r0, c4.zzzz;
+
+// Okay, compute cosine here.
+// cos = 1 + r0^2 * kCos.y + r0^4 * kCos.Z + r0^6 * kCos.w
+// Note: could pare off an instr by putting 1/kCos.w in kCos.x,
+// then doing a mad to get r3=(1/kCos.w + r0^6), then mad that
+// into the accum by kCos.w to get (1 + r0^6*kCos.x). But who cares.
+mul         r1, r0, r0; // r0^2
+mul         r2, r1, r1; // r0^4
+mul         r3, r1, r2; // r0^6
+
+mov         r4, c5.xxxx;            // r4 = 1
+mad         r4, r1, c5.yyyy, r4;    // r4 += r0^2 * kCos.y
+mad         r4, r2, c5.zzzz, r4;    // r4 += r0^4 * kCos.z
+mad         r4, r3, c5.wwww, r4;    // r4 += r0^6 * kCos.w
+
+add         r4, r4, c0.zzzz;    // shift from [-1..1] to [0..2]
+//mov   r4, c0.xxxx; // HACKLAST
+mul         r4, r4, c3;         // times amplitude
+
+dp4         r5.y, r4, c0.zzzz; // r5.x = sigma((cos() + 1) * amp);
+
+// V calculation, goes something like:
+// For layers 0 and 2:
+//      V = { 1 + c6.z  <= r5.y = 0 } * norm.x // norm.x == v3.x
+//          { 1 + 0     <= r5.y = 1 }
+// For layer 1:
+//      V = (norm.x + c6.z) * c6.w // Scaled like U
+//
+// Another way to formulate that is
+// baseV = cMin + sinAge * (cMax-cMin) where
+//      cMin = 2
+//      cMax = 1
+//      sinAge = color.a = c7.w
+// delV = sigma(cos) = r5.y
+// Then
+//      V0 = V2 = (baseV + delV) * v3.x
+//      V1 = (norm.x + baseV + delV) * c6.w
+//
+// If we're sure we want cMin = 2 and cMax = 1, then it simplifies to:
+//  baseV = 2 - sinAge = c0.w - c7.w
+//  delV = r5.y
+//  (baseV + delV) = c0.w - c7.w + r5.y
+//
+// If we want to stay general, then
+//  baseV = c6.x * c7.w + c6.y
+//  delV = -r5.y
+//  (baseV + delV) = constant + r5.y
+//
+
+// make r5.y = (baseV + delV)
+add         r5.y, c6.xxxx, r5.yyyy;
+
+//mov oD1, r5.yyyy; // HACKLAST
+//mov oD1.w, c0.zzzz; // HACKLAST
+
+// U is input U (or v0.x * 0.5f + 0.5f)
+mul         r5.x, v0.x, c0.y;
+add         r5.x, r5.x, c0.y;
+
+// Fill out wq.
+mov         r5.zw, c0.xz;
+
+mul         oT0, r5, v3.wxww;
+// mov oD1, r5.yyyw; // HACKTEST
+mul         oT2, r5, v3.wxww;
+
+// Second uv shares u, but v is norm.x + c6.x;
+// Then we scale it.
+// If we want the bubble texture to move with the
+// wave front, we want the second UV calc (RESCALE1).
+// But it looks better to have the bubbles moving
+// slightly faster than the wave front. RESCALE0
+// happens to do that, because we're scaling the
+// texture by a factor of 2, but we should probably
+// supply an independent scale of the motion vs. the
+// scale of the texture.
+// Let's move c6 to r6 for ease of use.
+mov             r6, c6;
+// add          r5.x, r5.x, c6.y;
+// add          r5.y, c6.xxxx, v3.xxxx; // RESCALE0
+// mul          r5.xy, r5, c6.wwww;     // RESCALE0
+add         r5.x, r5.x, r6.y;   // RESCALE1 // offset U
+mov         r5.y, v3.xx;        // RESCALE1 // Init V to value stashed in normal.x
+mul         r5.xy, r5, r6.wwww; // RESCALE1 // scale them by single scale value
+mad         r5.y, r6.xx, r6.zz, r5.yy;  // RESCALE1 // add in our scaled V offset (sinage * vScale)
+mov         oT1, r5;
+
+//mov   oT0, v7; // HACKTEST
+//mov oT1, v7; // HACKTEST
+//mov oT2, v7; // HACKTEST
+
+// Just slam in the constant color (includes our current opacity).
+mov         oD0, c7;
+//mov   oD0, c0.zzzz; // HACKTEST
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveGridFin.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveGridFin.inl
@ -1,471 +1,471 @@
-vs.1.1
-
-dcl_position v0
-
-//m4x4 oPos, v0, c0
-
-
-/*
-In fact, I was trying to understand how it was possible to expand FRC into 4
-instructions...
-Actually, I can do it in 7 instructions :)
-
-EXPP r0.y, r1.xxxx
-MOV r0.x, r0.y
-EXPP r0.y, r1.zzzz
-MOV r0.z, r0.y
-EXPP r0.y, r1.wwww
-MOV r0.w, r0.y
-EXPP r0.y, r1.yyyy
-*/
-
-/*
-   // Constants for sin and cos. 3 term approximation seems plenty
-   // (it's what i used for software sim, and had no visibly different
-   // results than the math library functions).
-   // When doing sin/cos together, some speedup might be obtained
-   // with good pairing of ops doing them simultaneously. Also save
-   // an instruction calculating r0^3.
-        D3DXVECTOR4 vSin( 1.0f, -1.0f/6.0f, 1.0f/120.0f, -1.0f/5040.0f );
-        D3DXVECTOR4 vCos( 1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f );
-*/
-
-/*
-Cos():
-
-
-  r1 = mul(r0, r0);     // r0^2
-  r2 = mul(r1, r1);     // r0^4
-
-  //cos
-  r3 = mad( r1, vCos.yyyy, vCos.xxxx );
-  r3 = mad( r2, vCos.zzzz, r3 );
-*/
-
-/*
-Sin();
-  r1 = mul(r0, r0);     // r0^3
-  r1 = mul(r0, r1);
-  r2 = mul(r1, r1);     // r0^6
-
-  r3 = mad( r1, vSin.yyyy, r0 );
-  r3 = mad( r2, vSin.zzzz, r3 );
-*/
-
-/*
-SinCos():
-
-  r1 = mul(r0, r0);     // r0^2
-  r2 = mul(r1, r0);     // r0^3 // probably stall
-  r3 = mul(r1, r1);     // r0^4
-  r4 = mul(r2, r2);     // r0^6
-
-  r5 = mad( r1, vCos.yyyy, vCos.xxxx );
-  r6 = mad( r2, vSin.yyyy, r0 );
-  r5 = mad( r3, vCos.zzzz, r5 );
-  r6 = mad( r4, vSin.zzzz, r6 );
-
-*/
-
-/*
-consts
-   kOneOverEightNsqPi      = 1.f / ( 8.f * Pi * 4.f * 4.f );
-   kPiOverTwo           = Pi / 2.f;
-   kTwoPi               = Pi * 2.f;
-   kPi                  = Pi;
-*/
-/*
-CONSTANT REGISTERS
-VOLATILE CONSTS - change per invocation
-C0-C3 local2proj matrix
-C4    color
-C5    freq vector
-C6    phase vector
-C7    amplitude vector
-C8    center0
-C9    center1
-C10      center2
-C11      center3
-C12      scrunch = (scrunch, -scrunch, 0, 1);
-CONSTANT CONSTS - forever more
-C13      SinConsts = (1.0f, -1.0f/6.0f, 1.0f/120.0f, -1.0f/5040.0f);
-C14      CosConsts = (1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f);
-C15      PiConsts = (1.f / 8*Pi*N^2, Pi/2, Pi, 2*Pi);
-C16      numberConsts = (0.f, 0.5f, 1.f, 2.f);
-//=====================================
-TEMP REGISTERS
-r6    accumPos
-r7    accumCos
-r8    toCenter_Y
-r9    toCenter_X
-r11      filter
-r10      tempFloat
-*/
-// const float4 kCosConsts = float4(1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f);
-// const float4 kSinConsts = float4(1.0f, -1.0f/6.0f, 1.0f/120.0f, -1.0f/5040.0f);
-
-// const float4 kPiConsts = float4(1.f / (8.f * 3.1415f * 16f), 3.1415f*0.5f, 3.1415f, 3.1515f*2.f);
-// const float4 k0512 = float4(0.f, 0.5f, 1.f, 2.f);
-
-// accumPos = inPos;
-   mov         r6, v0;
-//
-// For each wave
-// {
-//    // First, we want to filter out waves based on distance from the local origin
-//    dist = dp3(inPos, inPos);
-   dp3         r0, r6, r6;
-//    dist *= kFreqSq.xyzw;
-   mul         r0, r0, c5;
-   mul         r0, r0, c5;
-//    dist *= kOneOverEightNsqPi; // combine this into kFreqSq?
-   mul         r0, r0, c15.xxxx;
-//    dist = min(dist, kPiOverTwo);
-   min         r0, r0, c15.yyyy;
-//    filter = cos(dist);
-   mul         r1, r0, r0;    // r0^2
-   mul         r2, r1, r1;    // r1^2
-   mul         r1, r1, c14.yyyy;
-   add         r11, r1, c14.xxxx;
-   mad         r11, r2, c14.zzzz, r11;
-
-
-//    filter *= kAmplitude.xyzw;
-//   mul         r11, r11, c7;
-//    // Notice that if dist is a 4vec, all this can be simultaneously done for 4 waves at a time.
-//
-//    Find the x/y distances and stuff them into r9(x) and r8(y) respectively
-   // toCenter_X.x = dir0.x * pos.x;
-   // toCenter_Y.x = dir0.y * pos.y;
-   mul		r0, c8, r6.xxxx;
-   mad		r0, c9, r6.yyyy, r0;
-
-//
-//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
-   mul         r0, r0, c5;
-   add			r0, r0, c6;
-//
-//    // Now we need dist mod'd into range [-Pi..Pi]
-//    dist *= rcp(kTwoPi);
-   rcp         r4, c15.wwww;
-   add			r0, r0, c15.zzzz;
-   mul         r0, r0, r4;
-//    dist = frac(dist);
-   expp     r1.y, r0.xxxx
-   mov      r1.x, r1.yyyy
-   expp     r1.y, r0.zzzz
-   mov      r1.z, r1.yyyy
-   expp     r1.y, r0.wwww
-   mov      r1.w, r1.yyyy
-   expp     r1.y, r0.yyyy
-//    dist *= kTwoPi;
-   mul         r0, r1, c15.wwww;
-//    dist += -kPi;
-   sub         r0, r0, c15.zzzz;
-
-//
-//    sincos(dist, sinDist, cosDist);
-   // sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-   // cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-   mul         r1, r0, r0; // r0^2
-   mul         r2, r1, r0; // r0^3 - probably stall
-   mul         r3, r1, r1; // r0^4
-   mul         r4, r1, r2; // r0^5
-   mul         r5, r2, r3; // r0^7
-
-   mul         r1, r1, c14.yyyy;       // r1 = r0^2 * vCos.y
-   mad         r2, r2, c13.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
-   add         r1, r1, c14.xxxx;       // r1 = 1 + r0^2 * vCos.y
-   mad         r2, r4, c13.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-   mad         r1, r3, c14.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-
-   // r0^7 & r0^6 terms
-   mul         r4, r4, r0; // r0^6
-   mad         r2, r5, c13.wwww, r2;
-   mad         r1, r4, c14.wwww, r1;
-
-//mov    r2, r1;
-   // r2 == sinDist
-   // r1 == cosDist
-//    sinDist *= filter;
-   mul         r2, r2, r11;
-//    sinDist *= kAmplitude.xyzw
-   mul         r2, r2, c7;
-//    height = dp4(sinDist, kOne);
-//    accumPos.z += height; (but accumPos.z is currently 0).
-   dp4         r6.z, r2, c16.zzzz;
-//
-//    cosDist *= kFreq.xyzw;
-   mul         r1, r1, c5;
-//    cosDist *= kAmplitude.xyzw; // Combine?
-   mul         r1, r1, c7;
-//    cosDist *= filter;
-   mul         r1, r1, r11;
-//
-// accumCos = (0, 0, 0, 0);
-   mov         r7, c16.xxxx;
-//    temp = dp4( cosDist, toCenter_X );
-//    accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
-   dp4         r7.x, r1, -c8
-//
-//    temp = dp4( cosDist, toCenter_Y );
-//    accumCos.y += temp.xxxx;
-   dp4         r7.y, r1, -c9
-//
-// }
-//
-// accumBin = (1, 0, -accumCos.x);
-// accumTan = (0, 1, -accumCos.y);
-// accumNorm = (accumCos.x, accumCos.y, 1);
-   mov         r11, c16.xxzx;
-   add         r11, r11, r7;
-   dp3         r10.x, r11, r11;
-   rsq         r10.x, r10.x;
-   mul         r11, r11, r10.xxxx;
-
-//
-// // Scrunch in based on computed (normalized) normal
-// temp = mul( accumNorm, kNegScrunchScale ); // kNegScrunchScale = (-scrunchScale, -scrunchScale, 0, 0);
-// accumPos += temp;
-   dp3			r10.x, r11, c18.zxw; // winddir.x, winddir.y, 0, 0
-   // r10.x tells us whether our normal is opposed to the wind.
-   // If opposed, r10.x = 0, else r10.x = 1.f;
-   // We'll use this to kill the Scrunch on the back sides of waves.
-   // We use it for position right here, and then again for the
-   // normal just down a bit further.
-   slt			r10.x, r10.x, c16.x;
-   mul			r9, r10.xxxx, r11;
-
-   mad         r6, r9, c12.yyzz, r6;
-
-//   mul			r6.z, r6.z, r10.xxxx; DEBUG
-
-//   mad         r6, r11, c12.yyzz, r6;
-
-// accumNorm = mul (accumNorm, kScrunchScale ); // kScrunchScale = (scrunchScale, scrunchScale, 1, 1);
-   // accumCos *= (scrunchScale, scrunchScale, 0, 0);
-
-   mul			r2.x, r6.z, c12.x;
-   mul			r2.x, r2.x, r10.x; // ???
-   add			r2.x, r2.x, c16.z;
-
-//   mul         r7, r7, c12.xxzz;
-   mul			r7.xy, r7.xy, r2.xx;
-
-// This is actually wrong, but useful right now for visualizing the generated coords.
-// See below for correct version.
-
-   sub			r3, c16.xxzx, r7.xyzz;
-
-   // Normalize?
-
-   // We can either calculate an orthonormal basis from the
-   // computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0),
-   // or compute our basis directly from the partial derivatives, with
-   // Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1)
-   //
-   // These work out to identically the same result, so we'll compute directly
-   // from the partials because it takes 2 fewer instructions.
-   //
-   // Note that our basis is NOT orthonormal. The Normal is equal to
-   // Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents
-   // are both correct tangents to the surface, and their projections on the XY plane
-   // are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications?
-   // Not really. I'm actually not really sure which is more "proper" for bump mapping.
-   //
-   // Note also that we add when we should subtract and subtract when we should
-   // add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows
-   // of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for
-   // explanation.
-   //
-   // Binormal = Y % Normal
-   // Cross product3 is:
-   //	mul		res.xyz, a.yzx, b.zxy
-   //	mad		res.xyz, -a.zxy, b.yzx, res.xyz
-//   mul			r1.xyz, c16.zxx, r3.zxy;
-//   mad			r1.xyz, -c16.xxz, r3.yzx, r1.xyz;
-
-   // Tangent = Normal % X
-//   mul			r2.xyz, r3.yzx, c16.xzx;
-//   mad			r2.xyz, -r3.zxy, c16.xxz, r2;
-
-   add			r1, c16.zxxx, r7.zzxz;
-   add			r2, c16.xzxx, r7.zzyz;
-
-   // Note that we're swapping z and y to match our environment map tools in max.
-   // We do this through our normal map transform (oT1, oT2, oT3), making it
-   // a concatenation of:
-   //
-   //	rotate about Z (blue) to turn our map into the wind
-   //	windRot =	|	dirY	-dirX	0 |
-   //				|	dirX	dirY	0 |
-   //				|	0		0		1 |
-   //
-   //	swap our Y and Z axes to match our environment map
-   //	swapYZ	=	|	1		0		0 |
-   //				|	0		0		1 |
-   //				|	0		1		0 |
-   //
-   //	rotate the normal into the surface's tangent space basis
-   //	basis	=	|	Bx		Tx		Nx |
-   //				|	By		Ty		Ny |
-   //				|	Bz		Tz		Nz |
-   //
-   //	Note that we've constucted the basis by taking advantage of the
-   //	matrix being a pure rotation, as noted below, so r1, r2 and r3
-   //	are actually constructed as:
-   //	basis	=	|	Bx		-By		-Bz |
-   //				|	-Tx		Ty		-Tz |
-   //				|	-Nx		-Ny		-Nz |
-   //
-   //	Then the final normal map transform is:
-   //
-   //		basis * swapYZ * windRot [ * normal ]
-
-
-//   sub         r1.w, c17.x, r6.x;
-//   sub         r2.w, c17.z, r6.z;
-//   sub         r3.w, c17.y, r6.y;
-
-  // Big note here. All this math can blow up if the camera position
-   // is outside the environment sphere. It's assumed that's dealt
-   // with in the app setting up the constants. For that reason, the
-   // camera position used here might not be the real local camera position,
-   // which is needed for the angular attenuation, so we burn another constant
-   // with our pseudo-camera position. To restrain the pseudo-camera from
-   // leaving the sphere, we make:
-   //	pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R)
-   // where dist = |realPos - envCenter|
-
-   // So, our "finitized" eyeray is:
-   //	camPos + D * t - envCenter = D * t - (envCenter - camPos)
-   // with
-   //	D = (pos - camPos) / |pos - camPos| // normalized usual eyeray
-   // and
-   //	t = D dot F + sqrt( (D dot F)^2 - G )
-   // with
-   //	F = (envCenter - camPos)	=> c19.xyz
-   //	G = F^2 - R^2				=> c19.w
-   //	R = environment radius.		=> unused
-   //
-   // This all derives from the positive root of equation
-   //	(camPos + (pos - camPos) * t - envCenter)^2 = R^2,
-   // In other words, where on a sphere of radius R centered about envCenter
-   // does the ray from the real camera position through this point hit.
-   //
-   // Note that F, G, and R are all constants (one point, two scalars).
-   //
-   // So first we calculate D into r0,
-   // then D dot F into r10.x,
-   // then (D dot F)^2 - G into r10.y
-   // then rsq( (D dot F)^2 - G ) into r9.x;
-   // then t = r10.z = r10.x + r10.y * r9.x;
-   // and
-   // r0 = D * t - (envCenter - camPos)
-   //		= r0 * r10.zzzz - F;
-   //
-   sub			r0, r6, c17;
-   dp3			r10.x, r0, r0;
-   rsq			r10.x, r10.x;
-   mul			r0, r0, r10.xxxx;
-
-   dp3			r10.x, r0, c19;
-   mad			r10.y, r10.x, r10.x, -c19.w;
-
-   rsq			r9.x, r10.y;
-
-   mad			r10.z, r10.y, r9.x, r10.x;
-
-   mad			r0.xyz, r0, r10.zzz, -c19.xyz;
-
-   mov			r1.w, -r0.x;
-   mov			r2.w, -r0.y;
-   mov			r3.w, -r0.z;
-
-   // Now rotate our basis vectors into the wind
-	dp3		r0.x, r1, c18.xyww;
-	dp3		r0.y, r1, c18.zxww;
-	mov		r1.xy, r0;
-
-	dp3		r0.x, r2, c18.xyww;
-	dp3		r0.y, r2, c18.zxww;
-	mov		r2.xy, r0;
-
-	dp3		r0.x, r3, c18.xyww;
-	dp3		r0.y, r3, c18.zxww;
-	mov		r3.xy, r0;
-
-   mov			r0.w, c16.zzzz;
-
-   dp3         r0.x, r1, r1;
-   rsq         r0.x, r0.x;
-   mul         oT1, r1.xyzw, r0.xxxw;
-//   mul			r8, r1.xyzw, r0.xxxw; // VISUAL
-
-   dp3         r0.x, r2, r2;
-   rsq         r0.x, r0.x;
-   mul         oT3, r2.xyzw, r0.xxxw;
-//   mul			r9, r2.xyzw, r0.xxxw; // VISUAL
-
-   dp3         r0.x, r3, r3;
-   rsq         r0.x, r0.x;
-   mul         oT2, r3.xyzw, r0.xxxw;
-//   mul			r9, r3.xyzw, r0.xxxw; // VISUAL
-
-//	mul		   r3, r3.xzyw, r0.xxxw;
-//	mul			r3.xy, r3, -c16.zzzz;
-
-/*
-   // Want:
-   //    oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x)
-   //    oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y)
-   //    ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z)
-   // with BIN, TAN, and NORM normalized.
-   // Unnormalized, we have
-   //    BIN = (1, 0, -r7.x) where r7 == accumCos
-   //    TAN = (0, 1, -r7.y)
-   //    NORM= (r7.x, r7.y, 1)
-   // So, unnormalized, we have
-   //    oT1 = (1, 0, r7.x, view2pos.x)
-   //    oT2 = (0, 1, r7.y, view2pos.y)
-   //    oT3 = (-r7.x, -r7.y, 1, view2pos.z)
-   // which is just reversing the signs on the accumCos
-   // terms above. So the normalized version is just
-   // reversing the signs on the normalized version above.
-*/
-//mov oT3, r4;
-
-//
-// // Transform position to screen
-//
-//
-   m4x4     oPos, r6, c0;
-
-// Still need to attenuate based on position
-   mov         oD0, c4;
-
-// This should be in local space after xforming v0
-   dp4			r0.x, v0, c10;
-   dp4			r0.y, v0, c11;
-   mov			r0.zw, c16.xxxz;
-   mov			oT0, r0
-//   mov			oT0, v7;
-
-// Questionble attenuation follows
-	// Find vector from this point to camera and normalize
-	sub			r0, c17, r6;
-	dp3			r1.x, r0, r0;
-    rsq			r1.x, r1.x;
-	mul			r0, r0, r1.xxxx;
-	// Dot that with the computed normal
-	dp3			r1.x, r0, r11;
-//	dp3			r1.x, r0, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3
-	// Map dot=1 => 0, dot=0 => 1
-	sub			r1.xyzw, c16.zzzz, r1.xxxx;
-	add			r1.w, r1.wwww, c16.zzzz;
-	mul			r1.w, r1.wwww, c16.yyyy;
-	// No need to clamp, since the destination register (in the pixel shader)
-	// will saturate [0..1] anyway.
-	mul			oD1, r1, c20;
-//	mov			oD1, r9;
-//	mov			oD1, r8.xzyw;
+vs.1.1
+
+dcl_position v0
+
+//m4x4 oPos, v0, c0
+
+
+/*
+In fact, I was trying to understand how it was possible to expand FRC into 4
+instructions...
+Actually, I can do it in 7 instructions :)
+
+EXPP r0.y, r1.xxxx
+MOV r0.x, r0.y
+EXPP r0.y, r1.zzzz
+MOV r0.z, r0.y
+EXPP r0.y, r1.wwww
+MOV r0.w, r0.y
+EXPP r0.y, r1.yyyy
+*/
+
+/*
+   // Constants for sin and cos. 3 term approximation seems plenty
+   // (it's what i used for software sim, and had no visibly different
+   // results than the math library functions).
+   // When doing sin/cos together, some speedup might be obtained
+   // with good pairing of ops doing them simultaneously. Also save
+   // an instruction calculating r0^3.
+        D3DXVECTOR4 vSin( 1.0f, -1.0f/6.0f, 1.0f/120.0f, -1.0f/5040.0f );
+        D3DXVECTOR4 vCos( 1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f );
+*/
+
+/*
+Cos():
+
+
+  r1 = mul(r0, r0);     // r0^2
+  r2 = mul(r1, r1);     // r0^4
+
+  //cos
+  r3 = mad( r1, vCos.yyyy, vCos.xxxx );
+  r3 = mad( r2, vCos.zzzz, r3 );
+*/
+
+/*
+Sin();
+  r1 = mul(r0, r0);     // r0^3
+  r1 = mul(r0, r1);
+  r2 = mul(r1, r1);     // r0^6
+
+  r3 = mad( r1, vSin.yyyy, r0 );
+  r3 = mad( r2, vSin.zzzz, r3 );
+*/
+
+/*
+SinCos():
+
+  r1 = mul(r0, r0);     // r0^2
+  r2 = mul(r1, r0);     // r0^3 // probably stall
+  r3 = mul(r1, r1);     // r0^4
+  r4 = mul(r2, r2);     // r0^6
+
+  r5 = mad( r1, vCos.yyyy, vCos.xxxx );
+  r6 = mad( r2, vSin.yyyy, r0 );
+  r5 = mad( r3, vCos.zzzz, r5 );
+  r6 = mad( r4, vSin.zzzz, r6 );
+
+*/
+
+/*
+consts
+   kOneOverEightNsqPi      = 1.f / ( 8.f * Pi * 4.f * 4.f );
+   kPiOverTwo           = Pi / 2.f;
+   kTwoPi               = Pi * 2.f;
+   kPi                  = Pi;
+*/
+/*
+CONSTANT REGISTERS
+VOLATILE CONSTS - change per invocation
+C0-C3 local2proj matrix
+C4    color
+C5    freq vector
+C6    phase vector
+C7    amplitude vector
+C8    center0
+C9    center1
+C10      center2
+C11      center3
+C12      scrunch = (scrunch, -scrunch, 0, 1);
+CONSTANT CONSTS - forever more
+C13      SinConsts = (1.0f, -1.0f/6.0f, 1.0f/120.0f, -1.0f/5040.0f);
+C14      CosConsts = (1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f);
+C15      PiConsts = (1.f / 8*Pi*N^2, Pi/2, Pi, 2*Pi);
+C16      numberConsts = (0.f, 0.5f, 1.f, 2.f);
+//=====================================
+TEMP REGISTERS
+r6    accumPos
+r7    accumCos
+r8    toCenter_Y
+r9    toCenter_X
+r11      filter
+r10      tempFloat
+*/
+// const float4 kCosConsts = float4(1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f);
+// const float4 kSinConsts = float4(1.0f, -1.0f/6.0f, 1.0f/120.0f, -1.0f/5040.0f);
+
+// const float4 kPiConsts = float4(1.f / (8.f * 3.1415f * 16f), 3.1415f*0.5f, 3.1415f, 3.1515f*2.f);
+// const float4 k0512 = float4(0.f, 0.5f, 1.f, 2.f);
+
+// accumPos = inPos;
+   mov         r6, v0;
+//
+// For each wave
+// {
+//    // First, we want to filter out waves based on distance from the local origin
+//    dist = dp3(inPos, inPos);
+   dp3         r0, r6, r6;
+//    dist *= kFreqSq.xyzw;
+   mul         r0, r0, c5;
+   mul         r0, r0, c5;
+//    dist *= kOneOverEightNsqPi; // combine this into kFreqSq?
+   mul         r0, r0, c15.xxxx;
+//    dist = min(dist, kPiOverTwo);
+   min         r0, r0, c15.yyyy;
+//    filter = cos(dist);
+   mul         r1, r0, r0;    // r0^2
+   mul         r2, r1, r1;    // r1^2
+   mul         r1, r1, c14.yyyy;
+   add         r11, r1, c14.xxxx;
+   mad         r11, r2, c14.zzzz, r11;
+
+
+//    filter *= kAmplitude.xyzw;
+//   mul         r11, r11, c7;
+//    // Notice that if dist is a 4vec, all this can be simultaneously done for 4 waves at a time.
+//
+//    Find the x/y distances and stuff them into r9(x) and r8(y) respectively
+   // toCenter_X.x = dir0.x * pos.x;
+   // toCenter_Y.x = dir0.y * pos.y;
+   mul      r0, c8, r6.xxxx;
+   mad      r0, c9, r6.yyyy, r0;
+
+//
+//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
+   mul         r0, r0, c5;
+   add          r0, r0, c6;
+//
+//    // Now we need dist mod'd into range [-Pi..Pi]
+//    dist *= rcp(kTwoPi);
+   rcp         r4, c15.wwww;
+   add          r0, r0, c15.zzzz;
+   mul         r0, r0, r4;
+//    dist = frac(dist);
+   expp     r1.y, r0.xxxx
+   mov      r1.x, r1.yyyy
+   expp     r1.y, r0.zzzz
+   mov      r1.z, r1.yyyy
+   expp     r1.y, r0.wwww
+   mov      r1.w, r1.yyyy
+   expp     r1.y, r0.yyyy
+//    dist *= kTwoPi;
+   mul         r0, r1, c15.wwww;
+//    dist += -kPi;
+   sub         r0, r0, c15.zzzz;
+
+//
+//    sincos(dist, sinDist, cosDist);
+   // sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+   // cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+   mul         r1, r0, r0; // r0^2
+   mul         r2, r1, r0; // r0^3 - probably stall
+   mul         r3, r1, r1; // r0^4
+   mul         r4, r1, r2; // r0^5
+   mul         r5, r2, r3; // r0^7
+
+   mul         r1, r1, c14.yyyy;       // r1 = r0^2 * vCos.y
+   mad         r2, r2, c13.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
+   add         r1, r1, c14.xxxx;       // r1 = 1 + r0^2 * vCos.y
+   mad         r2, r4, c13.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+   mad         r1, r3, c14.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+
+   // r0^7 & r0^6 terms
+   mul         r4, r4, r0; // r0^6
+   mad         r2, r5, c13.wwww, r2;
+   mad         r1, r4, c14.wwww, r1;
+
+//mov    r2, r1;
+   // r2 == sinDist
+   // r1 == cosDist
+//    sinDist *= filter;
+   mul         r2, r2, r11;
+//    sinDist *= kAmplitude.xyzw
+   mul         r2, r2, c7;
+//    height = dp4(sinDist, kOne);
+//    accumPos.z += height; (but accumPos.z is currently 0).
+   dp4         r6.z, r2, c16.zzzz;
+//
+//    cosDist *= kFreq.xyzw;
+   mul         r1, r1, c5;
+//    cosDist *= kAmplitude.xyzw; // Combine?
+   mul         r1, r1, c7;
+//    cosDist *= filter;
+   mul         r1, r1, r11;
+//
+// accumCos = (0, 0, 0, 0);
+   mov         r7, c16.xxxx;
+//    temp = dp4( cosDist, toCenter_X );
+//    accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
+   dp4         r7.x, r1, -c8
+//
+//    temp = dp4( cosDist, toCenter_Y );
+//    accumCos.y += temp.xxxx;
+   dp4         r7.y, r1, -c9
+//
+// }
+//
+// accumBin = (1, 0, -accumCos.x);
+// accumTan = (0, 1, -accumCos.y);
+// accumNorm = (accumCos.x, accumCos.y, 1);
+   mov         r11, c16.xxzx;
+   add         r11, r11, r7;
+   dp3         r10.x, r11, r11;
+   rsq         r10.x, r10.x;
+   mul         r11, r11, r10.xxxx;
+
+//
+// // Scrunch in based on computed (normalized) normal
+// temp = mul( accumNorm, kNegScrunchScale ); // kNegScrunchScale = (-scrunchScale, -scrunchScale, 0, 0);
+// accumPos += temp;
+   dp3          r10.x, r11, c18.zxw; // winddir.x, winddir.y, 0, 0
+   // r10.x tells us whether our normal is opposed to the wind.
+   // If opposed, r10.x = 0, else r10.x = 1.f;
+   // We'll use this to kill the Scrunch on the back sides of waves.
+   // We use it for position right here, and then again for the
+   // normal just down a bit further.
+   slt          r10.x, r10.x, c16.x;
+   mul          r9, r10.xxxx, r11;
+
+   mad         r6, r9, c12.yyzz, r6;
+
+//   mul            r6.z, r6.z, r10.xxxx; DEBUG
+
+//   mad         r6, r11, c12.yyzz, r6;
+
+// accumNorm = mul (accumNorm, kScrunchScale ); // kScrunchScale = (scrunchScale, scrunchScale, 1, 1);
+   // accumCos *= (scrunchScale, scrunchScale, 0, 0);
+
+   mul          r2.x, r6.z, c12.x;
+   mul          r2.x, r2.x, r10.x; // ???
+   add          r2.x, r2.x, c16.z;
+
+//   mul         r7, r7, c12.xxzz;
+   mul          r7.xy, r7.xy, r2.xx;
+
+// This is actually wrong, but useful right now for visualizing the generated coords.
+// See below for correct version.
+
+   sub          r3, c16.xxzx, r7.xyzz;
+
+   // Normalize?
+
+   // We can either calculate an orthonormal basis from the
+   // computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0),
+   // or compute our basis directly from the partial derivatives, with
+   // Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1)
+   //
+   // These work out to identically the same result, so we'll compute directly
+   // from the partials because it takes 2 fewer instructions.
+   //
+   // Note that our basis is NOT orthonormal. The Normal is equal to
+   // Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents
+   // are both correct tangents to the surface, and their projections on the XY plane
+   // are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications?
+   // Not really. I'm actually not really sure which is more "proper" for bump mapping.
+   //
+   // Note also that we add when we should subtract and subtract when we should
+   // add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows
+   // of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for
+   // explanation.
+   //
+   // Binormal = Y % Normal
+   // Cross product3 is:
+   //   mul     res.xyz, a.yzx, b.zxy
+   //   mad     res.xyz, -a.zxy, b.yzx, res.xyz
+//   mul            r1.xyz, c16.zxx, r3.zxy;
+//   mad            r1.xyz, -c16.xxz, r3.yzx, r1.xyz;
+
+   // Tangent = Normal % X
+//   mul            r2.xyz, r3.yzx, c16.xzx;
+//   mad            r2.xyz, -r3.zxy, c16.xxz, r2;
+
+   add          r1, c16.zxxx, r7.zzxz;
+   add          r2, c16.xzxx, r7.zzyz;
+
+   // Note that we're swapping z and y to match our environment map tools in max.
+   // We do this through our normal map transform (oT1, oT2, oT3), making it
+   // a concatenation of:
+   //
+   //   rotate about Z (blue) to turn our map into the wind
+   //   windRot =   |   dirY    -dirX   0 |
+   //               |   dirX    dirY    0 |
+   //               |   0       0       1 |
+   //
+   //   swap our Y and Z axes to match our environment map
+   //   swapYZ  =   |   1       0       0 |
+   //               |   0       0       1 |
+   //               |   0       1       0 |
+   //
+   //   rotate the normal into the surface's tangent space basis
+   //   basis   =   |   Bx      Tx      Nx |
+   //               |   By      Ty      Ny |
+   //               |   Bz      Tz      Nz |
+   //
+   //   Note that we've constucted the basis by taking advantage of the
+   //   matrix being a pure rotation, as noted below, so r1, r2 and r3
+   //   are actually constructed as:
+   //   basis   =   |   Bx      -By     -Bz |
+   //               |   -Tx     Ty      -Tz |
+   //               |   -Nx     -Ny     -Nz |
+   //
+   //   Then the final normal map transform is:
+   //
+   //       basis * swapYZ * windRot [ * normal ]
+
+
+//   sub         r1.w, c17.x, r6.x;
+//   sub         r2.w, c17.z, r6.z;
+//   sub         r3.w, c17.y, r6.y;
+
+  // Big note here. All this math can blow up if the camera position
+   // is outside the environment sphere. It's assumed that's dealt
+   // with in the app setting up the constants. For that reason, the
+   // camera position used here might not be the real local camera position,
+   // which is needed for the angular attenuation, so we burn another constant
+   // with our pseudo-camera position. To restrain the pseudo-camera from
+   // leaving the sphere, we make:
+   //   pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R)
+   // where dist = |realPos - envCenter|
+
+   // So, our "finitized" eyeray is:
+   //   camPos + D * t - envCenter = D * t - (envCenter - camPos)
+   // with
+   //   D = (pos - camPos) / |pos - camPos| // normalized usual eyeray
+   // and
+   //   t = D dot F + sqrt( (D dot F)^2 - G )
+   // with
+   //   F = (envCenter - camPos)    => c19.xyz
+   //   G = F^2 - R^2               => c19.w
+   //   R = environment radius.     => unused
+   //
+   // This all derives from the positive root of equation
+   //   (camPos + (pos - camPos) * t - envCenter)^2 = R^2,
+   // In other words, where on a sphere of radius R centered about envCenter
+   // does the ray from the real camera position through this point hit.
+   //
+   // Note that F, G, and R are all constants (one point, two scalars).
+   //
+   // So first we calculate D into r0,
+   // then D dot F into r10.x,
+   // then (D dot F)^2 - G into r10.y
+   // then rsq( (D dot F)^2 - G ) into r9.x;
+   // then t = r10.z = r10.x + r10.y * r9.x;
+   // and
+   // r0 = D * t - (envCenter - camPos)
+   //       = r0 * r10.zzzz - F;
+   //
+   sub          r0, r6, c17;
+   dp3          r10.x, r0, r0;
+   rsq          r10.x, r10.x;
+   mul          r0, r0, r10.xxxx;
+
+   dp3          r10.x, r0, c19;
+   mad          r10.y, r10.x, r10.x, -c19.w;
+
+   rsq          r9.x, r10.y;
+
+   mad          r10.z, r10.y, r9.x, r10.x;
+
+   mad          r0.xyz, r0, r10.zzz, -c19.xyz;
+
+   mov          r1.w, -r0.x;
+   mov          r2.w, -r0.y;
+   mov          r3.w, -r0.z;
+
+   // Now rotate our basis vectors into the wind
+    dp3     r0.x, r1, c18.xyww;
+    dp3     r0.y, r1, c18.zxww;
+    mov     r1.xy, r0;
+
+    dp3     r0.x, r2, c18.xyww;
+    dp3     r0.y, r2, c18.zxww;
+    mov     r2.xy, r0;
+
+    dp3     r0.x, r3, c18.xyww;
+    dp3     r0.y, r3, c18.zxww;
+    mov     r3.xy, r0;
+
+   mov          r0.w, c16.zzzz;
+
+   dp3         r0.x, r1, r1;
+   rsq         r0.x, r0.x;
+   mul         oT1, r1.xyzw, r0.xxxw;
+//   mul            r8, r1.xyzw, r0.xxxw; // VISUAL
+
+   dp3         r0.x, r2, r2;
+   rsq         r0.x, r0.x;
+   mul         oT3, r2.xyzw, r0.xxxw;
+//   mul            r9, r2.xyzw, r0.xxxw; // VISUAL
+
+   dp3         r0.x, r3, r3;
+   rsq         r0.x, r0.x;
+   mul         oT2, r3.xyzw, r0.xxxw;
+//   mul            r9, r3.xyzw, r0.xxxw; // VISUAL
+
+//  mul        r3, r3.xzyw, r0.xxxw;
+//  mul         r3.xy, r3, -c16.zzzz;
+
+/*
+   // Want:
+   //    oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x)
+   //    oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y)
+   //    ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z)
+   // with BIN, TAN, and NORM normalized.
+   // Unnormalized, we have
+   //    BIN = (1, 0, -r7.x) where r7 == accumCos
+   //    TAN = (0, 1, -r7.y)
+   //    NORM= (r7.x, r7.y, 1)
+   // So, unnormalized, we have
+   //    oT1 = (1, 0, r7.x, view2pos.x)
+   //    oT2 = (0, 1, r7.y, view2pos.y)
+   //    oT3 = (-r7.x, -r7.y, 1, view2pos.z)
+   // which is just reversing the signs on the accumCos
+   // terms above. So the normalized version is just
+   // reversing the signs on the normalized version above.
+*/
+//mov oT3, r4;
+
+//
+// // Transform position to screen
+//
+//
+   m4x4     oPos, r6, c0;
+
+// Still need to attenuate based on position
+   mov         oD0, c4;
+
+// This should be in local space after xforming v0
+   dp4          r0.x, v0, c10;
+   dp4          r0.y, v0, c11;
+   mov          r0.zw, c16.xxxz;
+   mov          oT0, r0
+//   mov            oT0, v7;
+
+// Questionble attenuation follows
+    // Find vector from this point to camera and normalize
+    sub         r0, c17, r6;
+    dp3         r1.x, r0, r0;
+    rsq         r1.x, r1.x;
+    mul         r0, r0, r1.xxxx;
+    // Dot that with the computed normal
+    dp3         r1.x, r0, r11;
+//  dp3         r1.x, r0, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3
+    // Map dot=1 => 0, dot=0 => 1
+    sub         r1.xyzw, c16.zzzz, r1.xxxx;
+    add         r1.w, r1.wwww, c16.zzzz;
+    mul         r1.w, r1.wwww, c16.yyyy;
+    // No need to clamp, since the destination register (in the pixel shader)
+    // will saturate [0..1] anyway.
+    mul         oD1, r1, c20;
+//  mov         oD1, r9;
+//  mov         oD1, r8.xzyw;
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveRip.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveRip.inl
@ -1,243 +1,243 @@
-vs.1.1
-dcl_position v0
-dcl_color v5
-dcl_texcoord0 v7
-
-
-// Store our input position in world space in r6
-m4x3		r6, v0, c25; // v0 * l2w
-// Fill out our w (m4x3 doesn't touch w).
-mov			r6.w, c16.z;
-
-//
-
-// Input diffuse v5 color is:
-// v5.r = overall transparency
-// v5.g = reflection strength (transparency)
-// v5.b = overall wave scaling
-//
-// v5.a is:
-// v5.w = 1/(2.f * edge length)
-// So per wave filtering is:
-// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
-// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
-// and is completely filtered at 2 times sampling frequency.
-
-// We'd like to make this autocalculated based on the depth of the water.
-// The frequency filtering (v5.w) still needs to be calculated offline, because
-// it's dependent on edge length, but the first 3 filterings can be calculated
-// based on this vertex.
-// Basically, we want the transparency, reflection strength, and wave scaling
-// to go to zero as the water depth goes to zero. Linear falloffs are as good
-// a place to start as any.
-//
-// depth = waterlevel - r6.z		=> depth in feet (may be negative)
-// depthNorm = depth / depthFalloff	=> zero at watertable, one at depthFalloff beneath
-// atten = minAtten + depthNorm * (maxAtten - minAtten);
-// These are all vector ops.
-// This provides separate ramp ups for each of the channels (they reach full unfiltered
-// values at different depths), but doesn't provide separate controls for where they
-// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
-// in feet (depth) is probably the most intuitive. So that changes the first calculation
-// of depth to:
-// depth = waterlevel - r6.z + offset
-//		= (waterlevel + offset) - r6.z
-// And since we only need offsets for 3 channels, we can make the waterlevel constant
-// waterlevel[chan] = watertableheight + offset[chan],
-// with waterlevel.w = watertableheight.
-//
-// So:
-//	c30 = waterlevel + offset
-//	c31 = (maxAtten - minAtten) / depthFalloff
-//	c32 = minAtten.
-// And in particular:
-//	c30.w = waterlevel
-//	c31.w = 1.f;
-//	c32.w = 0;
-// So r4.w is the depth of this vertex in feet.
-
-// Dot our position with our direction vectors.
-mul		r0, c8, r6.xxxx;
-mad		r0, c9, r6.yyyy, r0;
-
-//
-//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
-mul         r0, r0, c5;
-add			r0, r0, c6;
-//
-//    // Now we need dist mod'd into range [-Pi..Pi]
-//    dist *= rcp(kTwoPi);
-rcp         r4, c15.wwww;
-add			r0, r0, c15.zzzz;
-mul         r0, r0, r4;
-//    dist = frac(dist);
-expp     r1.y, r0.xxxx
-mov      r1.x, r1.yyyy
-expp     r1.y, r0.zzzz
-mov      r1.z, r1.yyyy
-expp     r1.y, r0.wwww
-mov      r1.w, r1.yyyy
-expp     r1.y, r0.yyyy
-//    dist *= kTwoPi;
-mul         r0, r1, c15.wwww;
-//    dist += -kPi;
-sub         r0, r0, c15.zzzz;
-
-//
-//    sincos(dist, sinDist, cosDist);
-// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-mul         r1, r0, r0; // r0^2
-mul         r2, r1, r0; // r0^3 - probably stall
-mul         r3, r1, r1; // r0^4
-mul         r4, r1, r2; // r0^5
-mul         r5, r2, r3; // r0^7
-
-mul         r1, r1, c14.yyyy;       // r1 = r0^2 * vCos.y
-mad         r2, r2, c13.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
-add         r1, r1, c14.xxxx;       // r1 = 1 + r0^2 * vCos.y
-mad         r2, r4, c13.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-mad         r1, r3, c14.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-
-// r0^7 & r0^6 terms
-mul         r4, r4, r0; // r0^6
-mad         r2, r5, c13.wwww, r2;
-mad         r1, r4, c14.wwww, r1;
-
-// Calc our depth based filtering here into r4 (because we don't use it again
-// after here, and we need our filtering shortly).
-sub			r4, c30, r6.zzzz;
-mul			r4, r4, c31;
-add			r4, r4, c32;
-// Clamp .xyz to range [0..1]
-min			r4.xyz, r4, c16.zzzz;
-max			r4.xyz, r4, c16.xxxx;
-//mov r4.xyz, c16.xxx; // HACKTEST
-
-// Calc our filter (see above).
-mul			r11, v5.wwww, c29;
-max			r11, r11, c16.xxxx;
-min			r11, r11, c16.zzzz;
-
-//mov    r2, r1;
-// r2 == sinDist
-// r1 == cosDist
-//    sinDist *= filter;
-mul         r2, r2, r11;
-//    sinDist *= kAmplitude.xyzw
-mul         r2, r2, c7;
-//    height = dp4(sinDist, kOne);
-//    accumPos.z += height; (but accumPos.z is currently 0).
-dp4         r8.x, r2, c16.zzzz;
-mul			r8.y, r8.x, r4.z;
-add			r8.z, r8.y, c30.w;
-max			r6.z, r6.z, r8.z;
-// r8.x == wave height relative to 0
-// r8.y == dampened wave relative to 0
-// r8.z == dampened wave height in world space
-// r6.z == wave height clamped to never go beneath ground level
-//
-//    cosDist *= kFreq.xyzw;
-mul         r1, r1, c5;
-//    cosDist *= kAmplitude.xyzw; // Combine?
-mul         r1, r1, c7;
-//    cosDist *= filter;
-mul         r1, r1, r11;
-//
-// accumCos = (0, 0, 0, 0);
-mov         r7, c16.xxxx;
-//    temp = dp4( cosDist, toCenter_X );
-//    accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
-dp4         r7.x, r1, -c8
-//
-//    temp = dp4( cosDist, toCenter_Y );
-//    accumCos.y += temp.xxxx;
-dp4         r7.y, r1, -c9
-//
-// }
-//
-// accumBin = (1, 0, -accumCos.x);
-// accumTan = (0, 1, -accumCos.y);
-// accumNorm = (accumCos.x, accumCos.y, 1);
-mov         r11, c16.xxzx;
-add         r11, r11, r7;
-dp3         r10.x, r11, r11;
-rsq         r10.x, r10.x;
-mul         r11, r11, r10.xxxx;
-
-//
-// Add in our scrunch (offset in X/Y plane).
-// Scale down our scrunch amount by the wave scaling
-mul			r10.x, c12.y, r4.z;
-mad         r6.xy, r11.xy, r10.xx, r6.xy;
-
-// Bias our vert up a bit to compensate for precision errors.
-// In particular, our filter coefficients are coming in as
-// interpolated bytes, so there's bound to be a lot of slop
-// from that. We've got a free slot in c35.z, so we'll use that.
-// A better implementation would be to bias and scale our screen
-// vert, effectively pushing the vert toward the camera without
-// actually moving it, but this is easier and might work just
-// as well.
-add			r6.z, r6.z, c35.z;
-
-//
-// // Transform position to screen
-//
-//
-//m4x3	r6, v0, c25; // HACKAGE
-//mov		r6.w, c16.z; // HACKAGE
-//m4x4     oPos, r6, c0; // ADDFOG
-m4x4		r9, r6, c0;
-add			r10.x, r9.w, c4.x;
-mul			oFog, r10.x, c4.y;
-mov			oPos, r9;
-
-
-// Dyna Stuff
-// Constants
-// c33 = fC1U, fC2U, fC1V, fC2V
-// c34 = fInitAtten, t, life, 1.f / (life-decay)
-// c35 = ramp, 1.f / ramp, BIAS (positive is up), FREE
-//
-// Vertex Info
-// v7.z = fBirth (because we don't use it for anything else).
-//
-// Initialize r1.zw to 0,1
-mov		r1, c16.xxxz;
-// Calc r1.x = age, r1.y = atten
-// age = t - birth.
-sub		r1.x, c34.y, v7.z;
-// atten = clamp0_1(age / ramp) * clamp0_1((life-age) / (life-decay));
-// first clamp0_1(age/ramp)
-mul		r1.y, r1.x, c35.y;
-min		r1.y, r1.y, c16.z; // Clamp to one (can't go negative).
-// now clamp0_1((life-age) / (life-decay));
-sub		r1.z, c34.z, r1.x;
-mul		r1.z, r1.z, c34.w;
-min		r1.z, r1.z, c16.z; // Clamp to one
-max		r1.z, r1.z, c16.x; // Clamp to zero
-mul		r1.y, r1.y, r1.z; // atten is the product of the two terms.
-
-// color is (atten, atten, atten, 1.f)
-// Need to calculate opacity we would have had from vs_WaveFixedFin6.inl
-// Right now that's just modulating by r4.y.
-mul		r0.y, r4.y, c34.x;
-mul		oD0, r0.yyyy, r1.yyyw;
-//mov oD0, c16.zzzz; // HACKTEST
-
-// UVW = (inUVW - 0.5) * scale + 0.5
-// where:
-// scale = (fC1U / (age * fC2U + 1.f)), fC1V / (age * fC2U + 1.f), 1.f, 1.f
-mov		r2, c16.xxxz;
-mul		r2.xy, r1.xx, c33.yw;
-add		r2.xy, r2.xy, c16.zz;
-rcp		r2.x, r2.x;
-rcp		r2.y, r2.y;
-mul		r2.xy, r2.xy, c33.xz;
-sub		r1.xy, v7.xy, c16.yy;
-mul		r1.xy, r1.xy, r2.xy;
-add		r1.xy, r1.xy, c16.yy;
-mov		oT0, r1;
-
-
+vs.1.1
+dcl_position v0
+dcl_color v5
+dcl_texcoord0 v7
+
+
+// Store our input position in world space in r6
+m4x3        r6, v0, c25; // v0 * l2w
+// Fill out our w (m4x3 doesn't touch w).
+mov         r6.w, c16.z;
+
+//
+
+// Input diffuse v5 color is:
+// v5.r = overall transparency
+// v5.g = reflection strength (transparency)
+// v5.b = overall wave scaling
+//
+// v5.a is:
+// v5.w = 1/(2.f * edge length)
+// So per wave filtering is:
+// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
+// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
+// and is completely filtered at 2 times sampling frequency.
+
+// We'd like to make this autocalculated based on the depth of the water.
+// The frequency filtering (v5.w) still needs to be calculated offline, because
+// it's dependent on edge length, but the first 3 filterings can be calculated
+// based on this vertex.
+// Basically, we want the transparency, reflection strength, and wave scaling
+// to go to zero as the water depth goes to zero. Linear falloffs are as good
+// a place to start as any.
+//
+// depth = waterlevel - r6.z        => depth in feet (may be negative)
+// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
+// atten = minAtten + depthNorm * (maxAtten - minAtten);
+// These are all vector ops.
+// This provides separate ramp ups for each of the channels (they reach full unfiltered
+// values at different depths), but doesn't provide separate controls for where they
+// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
+// in feet (depth) is probably the most intuitive. So that changes the first calculation
+// of depth to:
+// depth = waterlevel - r6.z + offset
+//      = (waterlevel + offset) - r6.z
+// And since we only need offsets for 3 channels, we can make the waterlevel constant
+// waterlevel[chan] = watertableheight + offset[chan],
+// with waterlevel.w = watertableheight.
+//
+// So:
+//  c30 = waterlevel + offset
+//  c31 = (maxAtten - minAtten) / depthFalloff
+//  c32 = minAtten.
+// And in particular:
+//  c30.w = waterlevel
+//  c31.w = 1.f;
+//  c32.w = 0;
+// So r4.w is the depth of this vertex in feet.
+
+// Dot our position with our direction vectors.
+mul     r0, c8, r6.xxxx;
+mad     r0, c9, r6.yyyy, r0;
+
+//
+//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
+mul         r0, r0, c5;
+add         r0, r0, c6;
+//
+//    // Now we need dist mod'd into range [-Pi..Pi]
+//    dist *= rcp(kTwoPi);
+rcp         r4, c15.wwww;
+add         r0, r0, c15.zzzz;
+mul         r0, r0, r4;
+//    dist = frac(dist);
+expp     r1.y, r0.xxxx
+mov      r1.x, r1.yyyy
+expp     r1.y, r0.zzzz
+mov      r1.z, r1.yyyy
+expp     r1.y, r0.wwww
+mov      r1.w, r1.yyyy
+expp     r1.y, r0.yyyy
+//    dist *= kTwoPi;
+mul         r0, r1, c15.wwww;
+//    dist += -kPi;
+sub         r0, r0, c15.zzzz;
+
+//
+//    sincos(dist, sinDist, cosDist);
+// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+mul         r1, r0, r0; // r0^2
+mul         r2, r1, r0; // r0^3 - probably stall
+mul         r3, r1, r1; // r0^4
+mul         r4, r1, r2; // r0^5
+mul         r5, r2, r3; // r0^7
+
+mul         r1, r1, c14.yyyy;       // r1 = r0^2 * vCos.y
+mad         r2, r2, c13.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
+add         r1, r1, c14.xxxx;       // r1 = 1 + r0^2 * vCos.y
+mad         r2, r4, c13.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+mad         r1, r3, c14.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+
+// r0^7 & r0^6 terms
+mul         r4, r4, r0; // r0^6
+mad         r2, r5, c13.wwww, r2;
+mad         r1, r4, c14.wwww, r1;
+
+// Calc our depth based filtering here into r4 (because we don't use it again
+// after here, and we need our filtering shortly).
+sub         r4, c30, r6.zzzz;
+mul         r4, r4, c31;
+add         r4, r4, c32;
+// Clamp .xyz to range [0..1]
+min         r4.xyz, r4, c16.zzzz;
+max         r4.xyz, r4, c16.xxxx;
+//mov r4.xyz, c16.xxx; // HACKTEST
+
+// Calc our filter (see above).
+mul         r11, v5.wwww, c29;
+max         r11, r11, c16.xxxx;
+min         r11, r11, c16.zzzz;
+
+//mov    r2, r1;
+// r2 == sinDist
+// r1 == cosDist
+//    sinDist *= filter;
+mul         r2, r2, r11;
+//    sinDist *= kAmplitude.xyzw
+mul         r2, r2, c7;
+//    height = dp4(sinDist, kOne);
+//    accumPos.z += height; (but accumPos.z is currently 0).
+dp4         r8.x, r2, c16.zzzz;
+mul         r8.y, r8.x, r4.z;
+add         r8.z, r8.y, c30.w;
+max         r6.z, r6.z, r8.z;
+// r8.x == wave height relative to 0
+// r8.y == dampened wave relative to 0
+// r8.z == dampened wave height in world space
+// r6.z == wave height clamped to never go beneath ground level
+//
+//    cosDist *= kFreq.xyzw;
+mul         r1, r1, c5;
+//    cosDist *= kAmplitude.xyzw; // Combine?
+mul         r1, r1, c7;
+//    cosDist *= filter;
+mul         r1, r1, r11;
+//
+// accumCos = (0, 0, 0, 0);
+mov         r7, c16.xxxx;
+//    temp = dp4( cosDist, toCenter_X );
+//    accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
+dp4         r7.x, r1, -c8
+//
+//    temp = dp4( cosDist, toCenter_Y );
+//    accumCos.y += temp.xxxx;
+dp4         r7.y, r1, -c9
+//
+// }
+//
+// accumBin = (1, 0, -accumCos.x);
+// accumTan = (0, 1, -accumCos.y);
+// accumNorm = (accumCos.x, accumCos.y, 1);
+mov         r11, c16.xxzx;
+add         r11, r11, r7;
+dp3         r10.x, r11, r11;
+rsq         r10.x, r10.x;
+mul         r11, r11, r10.xxxx;
+
+//
+// Add in our scrunch (offset in X/Y plane).
+// Scale down our scrunch amount by the wave scaling
+mul         r10.x, c12.y, r4.z;
+mad         r6.xy, r11.xy, r10.xx, r6.xy;
+
+// Bias our vert up a bit to compensate for precision errors.
+// In particular, our filter coefficients are coming in as
+// interpolated bytes, so there's bound to be a lot of slop
+// from that. We've got a free slot in c35.z, so we'll use that.
+// A better implementation would be to bias and scale our screen
+// vert, effectively pushing the vert toward the camera without
+// actually moving it, but this is easier and might work just
+// as well.
+add         r6.z, r6.z, c35.z;
+
+//
+// // Transform position to screen
+//
+//
+//m4x3  r6, v0, c25; // HACKAGE
+//mov       r6.w, c16.z; // HACKAGE
+//m4x4     oPos, r6, c0; // ADDFOG
+m4x4        r9, r6, c0;
+add         r10.x, r9.w, c4.x;
+mul         oFog, r10.x, c4.y;
+mov         oPos, r9;
+
+
+// Dyna Stuff
+// Constants
+// c33 = fC1U, fC2U, fC1V, fC2V
+// c34 = fInitAtten, t, life, 1.f / (life-decay)
+// c35 = ramp, 1.f / ramp, BIAS (positive is up), FREE
+//
+// Vertex Info
+// v7.z = fBirth (because we don't use it for anything else).
+//
+// Initialize r1.zw to 0,1
+mov     r1, c16.xxxz;
+// Calc r1.x = age, r1.y = atten
+// age = t - birth.
+sub     r1.x, c34.y, v7.z;
+// atten = clamp0_1(age / ramp) * clamp0_1((life-age) / (life-decay));
+// first clamp0_1(age/ramp)
+mul     r1.y, r1.x, c35.y;
+min     r1.y, r1.y, c16.z; // Clamp to one (can't go negative).
+// now clamp0_1((life-age) / (life-decay));
+sub     r1.z, c34.z, r1.x;
+mul     r1.z, r1.z, c34.w;
+min     r1.z, r1.z, c16.z; // Clamp to one
+max     r1.z, r1.z, c16.x; // Clamp to zero
+mul     r1.y, r1.y, r1.z; // atten is the product of the two terms.
+
+// color is (atten, atten, atten, 1.f)
+// Need to calculate opacity we would have had from vs_WaveFixedFin6.inl
+// Right now that's just modulating by r4.y.
+mul     r0.y, r4.y, c34.x;
+mul     oD0, r0.yyyy, r1.yyyw;
+//mov oD0, c16.zzzz; // HACKTEST
+
+// UVW = (inUVW - 0.5) * scale + 0.5
+// where:
+// scale = (fC1U / (age * fC2U + 1.f)), fC1V / (age * fC2U + 1.f), 1.f, 1.f
+mov     r2, c16.xxxz;
+mul     r2.xy, r1.xx, c33.yw;
+add     r2.xy, r2.xy, c16.zz;
+rcp     r2.x, r2.x;
+rcp     r2.y, r2.y;
+mul     r2.xy, r2.xy, c33.xz;
+sub     r1.xy, v7.xy, c16.yy;
+mul     r1.xy, r1.xy, r2.xy;
+add     r1.xy, r1.xy, c16.yy;
+mov     oT0, r1;
+
+
--- a/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveRip7.inl
+++ b/Sources/Plasma/PubUtilLib/plSurface/ShaderSrc/vs_WaveRip7.inl
@ -1,226 +1,226 @@
-
-vs.1.1
-
-dcl_position v0
-dcl_color v5
-dcl_texcoord0 v7
-
-// Store our input position in world space in r6
-m4x3		r6, v0, c25; // v0 * l2w
-// Fill out our w (m4x3 doesn't touch w).
-mov			r6.w, c16.z;
-
-//
-
-// Input diffuse v5 color is:
-// v5.r = overall transparency
-// v5.g = reflection strength (transparency)
-// v5.b = overall wave scaling
-//
-// v5.a is:
-// v5.w = 1/(2.f * edge length)
-// So per wave filtering is:
-// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
-// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
-// and is completely filtered at 2 times sampling frequency.
-
-// We'd like to make this autocalculated based on the depth of the water.
-// The frequency filtering (v5.w) still needs to be calculated offline, because
-// it's dependent on edge length, but the first 3 filterings can be calculated
-// based on this vertex.
-// Basically, we want the transparency, reflection strength, and wave scaling
-// to go to zero as the water depth goes to zero. Linear falloffs are as good
-// a place to start as any.
-//
-// depth = waterlevel - r6.z		=> depth in feet (may be negative)
-// depthNorm = depth / depthFalloff	=> zero at watertable, one at depthFalloff beneath
-// atten = minAtten + depthNorm * (maxAtten - minAtten);
-// These are all vector ops.
-// This provides separate ramp ups for each of the channels (they reach full unfiltered
-// values at different depths), but doesn't provide separate controls for where they
-// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
-// in feet (depth) is probably the most intuitive. So that changes the first calculation
-// of depth to:
-// depth = waterlevel - r6.z + offset
-//		= (waterlevel + offset) - r6.z
-// And since we only need offsets for 3 channels, we can make the waterlevel constant
-// waterlevel[chan] = watertableheight + offset[chan],
-// with waterlevel.w = watertableheight.
-//
-// So:
-//	c30 = waterlevel + offset
-//	c31 = (maxAtten - minAtten) / depthFalloff
-//	c32 = minAtten.
-// And in particular:
-//	c30.w = waterlevel
-//	c31.w = 1.f;
-//	c32.w = 0;
-// So r4.w is the depth of this vertex in feet.
-
-// Dot our position with our direction vectors.
-mul		r0, c8, r6.xxxx;
-mad		r0, c9, r6.yyyy, r0;
-
-//
-//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
-mul         r0, r0, c5;
-add			r0, r0, c6;
-//
-//    // Now we need dist mod'd into range [-Pi..Pi]
-//    dist *= rcp(kTwoPi);
-rcp         r4, c15.wwww;
-add			r0, r0, c15.zzzz;
-mul         r0, r0, r4;
-//    dist = frac(dist);
-expp     r1.y, r0.xxxx
-mov      r1.x, r1.yyyy
-expp     r1.y, r0.zzzz
-mov      r1.z, r1.yyyy
-expp     r1.y, r0.wwww
-mov      r1.w, r1.yyyy
-expp     r1.y, r0.yyyy
-//    dist *= kTwoPi;
-mul         r0, r1, c15.wwww;
-//    dist += -kPi;
-sub         r0, r0, c15.zzzz;
-
-//
-//    sincos(dist, sinDist, cosDist);
-// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-mul         r1, r0, r0; // r0^2
-mul         r2, r1, r0; // r0^3 - probably stall
-mul         r3, r1, r1; // r0^4
-mul         r4, r1, r2; // r0^5
-mul         r5, r2, r3; // r0^7
-
-mul         r1, r1, c14.yyyy;       // r1 = r0^2 * vCos.y
-mad         r2, r2, c13.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
-add         r1, r1, c14.xxxx;       // r1 = 1 + r0^2 * vCos.y
-mad         r2, r4, c13.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
-mad         r1, r3, c14.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
-
-// r0^7 & r0^6 terms
-mul         r4, r4, r0; // r0^6
-mad         r2, r5, c13.wwww, r2;
-mad         r1, r4, c14.wwww, r1;
-
-// Calc our depth based filtering here into r4 (because we don't use it again
-// after here, and we need our filtering shortly).
-sub			r4, c30, r6.zzzz;
-mul			r4, r4, c31;
-add			r4, r4, c32;
-// Clamp .xyz to range [0..1]
-min			r4.xyz, r4, c16.zzzz;
-max			r4.xyz, r4, c16.xxxx;
-//mov r4.xyz, c16.xxx; // HACKTEST
-
-// Calc our filter (see above).
-mul			r11, v5.wwww, c29;
-max			r11, r11, c16.xxxx;
-min			r11, r11, c16.zzzz;
-
-//mov    r2, r1;
-// r2 == sinDist
-// r1 == cosDist
-//    sinDist *= filter;
-mul         r2, r2, r11;
-//    sinDist *= kAmplitude.xyzw
-mul         r2, r2, c7;
-//    height = dp4(sinDist, kOne);
-//    accumPos.z += height; (but accumPos.z is currently 0).
-dp4         r8.x, r2, c16.zzzz;
-mul			r8.y, r8.x, r4.z;
-add			r8.z, r8.y, c30.w;
-max			r6.z, r6.z, r8.z;
-// r8.x == wave height relative to 0
-// r8.y == dampened wave relative to 0
-// r8.z == dampened wave height in world space
-// r6.z == wave height clamped to never go beneath ground level
-//
-//    cosDist *= filter;
-mul         r1, r1, r11;
-
-// Pos = (in.x + S, in.y + R, r6.z)
-// S = sum(k Dir.x A cos())
-// R = sum(k Dir.y A cos())
-// c10 = k Dir.x A
-// c11 = k Dir.y A
-//    S = sum(cosDist * c10);
-dp4         r7.x, r1, c10;
-//	  R = sum(cosDist * c11);
-dp4			r7.y, r1, c11;
-
-add			r6.xy, r6.xy, r7.xy;
-
-
-// Bias our vert up a bit to compensate for precision errors.
-// In particular, our filter coefficients are coming in as
-// interpolated bytes, so there's bound to be a lot of slop
-// from that. We've got a free slot in c35.z, so we'll use that.
-// A better implementation would be to bias and scale our screen
-// vert, effectively pushing the vert toward the camera without
-// actually moving it, but this is easier and might work just
-// as well.
-add			r6.z, r6.z, c35.z;
-
-//
-// // Transform position to screen
-//
-//
-//m4x3	r6, v0, c25; // HACKAGE
-//mov		r6.w, c16.z; // HACKAGE
-//m4x4     oPos, r6, c0; // ADDFOG
-m4x4		r9, r6, c0;
-add			r10.x, r9.w, c4.x;
-mul			oFog, r10.x, c4.y;
-mov			oPos, r9;
-
-
-// Dyna Stuff
-// Constants
-// c33 = fC1U, fC2U, fC1V, fC2V
-// c34 = fInitAtten, t, life, 1.f / (life-decay)
-// c35 = ramp, 1.f / ramp, BIAS (positive is up), FREE
-//
-// Vertex Info
-// v7.z = fBirth (because we don't use it for anything else).
-//
-// Initialize r1.zw to 0,1
-mov		r1, c16.xxxz;
-// Calc r1.x = age, r1.y = atten
-// age = t - birth.
-sub		r1.x, c34.y, v7.z;
-// atten = clamp0_1(age / ramp) * clamp0_1((life-age) / (life-decay));
-// first clamp0_1(age/ramp)
-mul		r1.y, r1.x, c35.y;
-min		r1.y, r1.y, c16.z; // Clamp to one (can't go negative).
-// now clamp0_1((life-age) / (life-decay));
-sub		r1.z, c34.z, r1.x;
-mul		r1.z, r1.z, c34.w;
-min		r1.z, r1.z, c16.z; // Clamp to one
-max		r1.z, r1.z, c16.x; // Clamp to zero
-mul		r1.y, r1.y, r1.z; // atten is the product of the two terms.
-
-// color is (atten, atten, atten, 1.f)
-// Need to calculate opacity we would have had from vs_WaveFixedFin7.inl
-// Right now that's just modulating by r4.y.
-mul		r0.y, r4.y, c34.x;
-mul		oD0, r0.yyyy, r1.yyyw;
-//mov oD0, c16.zzzz; // HACKTEST
-
-// UVW = (inUVW - 0.5) * scale + 0.5
-// where:
-// scale = (fC1U / (age * fC2U + 1.f)), fC1V / (age * fC2U + 1.f), 1.f, 1.f
-mov		r2, c16.xxxz;
-mul		r2.xy, r1.xx, c33.yw;
-add		r2.xy, r2.xy, c16.zz;
-rcp		r2.x, r2.x;
-rcp		r2.y, r2.y;
-mul		r2.xy, r2.xy, c33.xz;
-sub		r1.xy, v7.xy, c16.yy;
-mul		r1.xy, r1.xy, r2.xy;
-add		r1.xy, r1.xy, c16.yy;
-mov		oT0, r1;
-
-
+
+vs.1.1
+
+dcl_position v0
+dcl_color v5
+dcl_texcoord0 v7
+
+// Store our input position in world space in r6
+m4x3        r6, v0, c25; // v0 * l2w
+// Fill out our w (m4x3 doesn't touch w).
+mov         r6.w, c16.z;
+
+//
+
+// Input diffuse v5 color is:
+// v5.r = overall transparency
+// v5.g = reflection strength (transparency)
+// v5.b = overall wave scaling
+//
+// v5.a is:
+// v5.w = 1/(2.f * edge length)
+// So per wave filtering is:
+// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
+// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
+// and is completely filtered at 2 times sampling frequency.
+
+// We'd like to make this autocalculated based on the depth of the water.
+// The frequency filtering (v5.w) still needs to be calculated offline, because
+// it's dependent on edge length, but the first 3 filterings can be calculated
+// based on this vertex.
+// Basically, we want the transparency, reflection strength, and wave scaling
+// to go to zero as the water depth goes to zero. Linear falloffs are as good
+// a place to start as any.
+//
+// depth = waterlevel - r6.z        => depth in feet (may be negative)
+// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
+// atten = minAtten + depthNorm * (maxAtten - minAtten);
+// These are all vector ops.
+// This provides separate ramp ups for each of the channels (they reach full unfiltered
+// values at different depths), but doesn't provide separate controls for where they
+// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
+// in feet (depth) is probably the most intuitive. So that changes the first calculation
+// of depth to:
+// depth = waterlevel - r6.z + offset
+//      = (waterlevel + offset) - r6.z
+// And since we only need offsets for 3 channels, we can make the waterlevel constant
+// waterlevel[chan] = watertableheight + offset[chan],
+// with waterlevel.w = watertableheight.
+//
+// So:
+//  c30 = waterlevel + offset
+//  c31 = (maxAtten - minAtten) / depthFalloff
+//  c32 = minAtten.
+// And in particular:
+//  c30.w = waterlevel
+//  c31.w = 1.f;
+//  c32.w = 0;
+// So r4.w is the depth of this vertex in feet.
+
+// Dot our position with our direction vectors.
+mul     r0, c8, r6.xxxx;
+mad     r0, c9, r6.yyyy, r0;
+
+//
+//    dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
+mul         r0, r0, c5;
+add         r0, r0, c6;
+//
+//    // Now we need dist mod'd into range [-Pi..Pi]
+//    dist *= rcp(kTwoPi);
+rcp         r4, c15.wwww;
+add         r0, r0, c15.zzzz;
+mul         r0, r0, r4;
+//    dist = frac(dist);
+expp     r1.y, r0.xxxx
+mov      r1.x, r1.yyyy
+expp     r1.y, r0.zzzz
+mov      r1.z, r1.yyyy
+expp     r1.y, r0.wwww
+mov      r1.w, r1.yyyy
+expp     r1.y, r0.yyyy
+//    dist *= kTwoPi;
+mul         r0, r1, c15.wwww;
+//    dist += -kPi;
+sub         r0, r0, c15.zzzz;
+
+//
+//    sincos(dist, sinDist, cosDist);
+// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+mul         r1, r0, r0; // r0^2
+mul         r2, r1, r0; // r0^3 - probably stall
+mul         r3, r1, r1; // r0^4
+mul         r4, r1, r2; // r0^5
+mul         r5, r2, r3; // r0^7
+
+mul         r1, r1, c14.yyyy;       // r1 = r0^2 * vCos.y
+mad         r2, r2, c13.yyyy, r0;   // r2 = r0 + r0^3 * vSin.y
+add         r1, r1, c14.xxxx;       // r1 = 1 + r0^2 * vCos.y
+mad         r2, r4, c13.zzzz, r2;   // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
+mad         r1, r3, c14.zzzz, r1;   // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
+
+// r0^7 & r0^6 terms
+mul         r4, r4, r0; // r0^6
+mad         r2, r5, c13.wwww, r2;
+mad         r1, r4, c14.wwww, r1;
+
+// Calc our depth based filtering here into r4 (because we don't use it again
+// after here, and we need our filtering shortly).
+sub         r4, c30, r6.zzzz;
+mul         r4, r4, c31;
+add         r4, r4, c32;
+// Clamp .xyz to range [0..1]
+min         r4.xyz, r4, c16.zzzz;
+max         r4.xyz, r4, c16.xxxx;
+//mov r4.xyz, c16.xxx; // HACKTEST
+
+// Calc our filter (see above).
+mul         r11, v5.wwww, c29;
+max         r11, r11, c16.xxxx;
+min         r11, r11, c16.zzzz;
+
+//mov    r2, r1;
+// r2 == sinDist
+// r1 == cosDist
+//    sinDist *= filter;
+mul         r2, r2, r11;
+//    sinDist *= kAmplitude.xyzw
+mul         r2, r2, c7;
+//    height = dp4(sinDist, kOne);
+//    accumPos.z += height; (but accumPos.z is currently 0).
+dp4         r8.x, r2, c16.zzzz;
+mul         r8.y, r8.x, r4.z;
+add         r8.z, r8.y, c30.w;
+max         r6.z, r6.z, r8.z;
+// r8.x == wave height relative to 0
+// r8.y == dampened wave relative to 0
+// r8.z == dampened wave height in world space
+// r6.z == wave height clamped to never go beneath ground level
+//
+//    cosDist *= filter;
+mul         r1, r1, r11;
+
+// Pos = (in.x + S, in.y + R, r6.z)
+// S = sum(k Dir.x A cos())
+// R = sum(k Dir.y A cos())
+// c10 = k Dir.x A
+// c11 = k Dir.y A
+//    S = sum(cosDist * c10);
+dp4         r7.x, r1, c10;
+//    R = sum(cosDist * c11);
+dp4         r7.y, r1, c11;
+
+add         r6.xy, r6.xy, r7.xy;
+
+
+// Bias our vert up a bit to compensate for precision errors.
+// In particular, our filter coefficients are coming in as
+// interpolated bytes, so there's bound to be a lot of slop
+// from that. We've got a free slot in c35.z, so we'll use that.
+// A better implementation would be to bias and scale our screen
+// vert, effectively pushing the vert toward the camera without
+// actually moving it, but this is easier and might work just
+// as well.
+add         r6.z, r6.z, c35.z;
+
+//
+// // Transform position to screen
+//
+//
+//m4x3  r6, v0, c25; // HACKAGE
+//mov       r6.w, c16.z; // HACKAGE
+//m4x4     oPos, r6, c0; // ADDFOG
+m4x4        r9, r6, c0;
+add         r10.x, r9.w, c4.x;
+mul         oFog, r10.x, c4.y;
+mov         oPos, r9;
+
+
+// Dyna Stuff
+// Constants
+// c33 = fC1U, fC2U, fC1V, fC2V
+// c34 = fInitAtten, t, life, 1.f / (life-decay)
+// c35 = ramp, 1.f / ramp, BIAS (positive is up), FREE
+//
+// Vertex Info
+// v7.z = fBirth (because we don't use it for anything else).
+//
+// Initialize r1.zw to 0,1
+mov     r1, c16.xxxz;
+// Calc r1.x = age, r1.y = atten
+// age = t - birth.
+sub     r1.x, c34.y, v7.z;
+// atten = clamp0_1(age / ramp) * clamp0_1((life-age) / (life-decay));
+// first clamp0_1(age/ramp)
+mul     r1.y, r1.x, c35.y;
+min     r1.y, r1.y, c16.z; // Clamp to one (can't go negative).
+// now clamp0_1((life-age) / (life-decay));
+sub     r1.z, c34.z, r1.x;
+mul     r1.z, r1.z, c34.w;
+min     r1.z, r1.z, c16.z; // Clamp to one
+max     r1.z, r1.z, c16.x; // Clamp to zero
+mul     r1.y, r1.y, r1.z; // atten is the product of the two terms.
+
+// color is (atten, atten, atten, 1.f)
+// Need to calculate opacity we would have had from vs_WaveFixedFin7.inl
+// Right now that's just modulating by r4.y.
+mul     r0.y, r4.y, c34.x;
+mul     oD0, r0.yyyy, r1.yyyw;
+//mov oD0, c16.zzzz; // HACKTEST
+
+// UVW = (inUVW - 0.5) * scale + 0.5
+// where:
+// scale = (fC1U / (age * fC2U + 1.f)), fC1V / (age * fC2U + 1.f), 1.f, 1.f
+mov     r2, c16.xxxz;
+mul     r2.xy, r1.xx, c33.yw;
+add     r2.xy, r2.xy, c16.zz;
+rcp     r2.x, r2.x;
+rcp     r2.y, r2.y;
+mul     r2.xy, r2.xy, c33.xz;
+sub     r1.xy, v7.xy, c16.yy;
+mul     r1.xy, r1.xy, r2.xy;
+add     r1.xy, r1.xy, c16.yy;
+mov     oT0, r1;
+
+