2
3
mirror of https://foundry.openuru.org/gitblit/r/CWE-ou-minkata.git synced 2025-07-14 02:27:40 -04:00

Fix line endings and tabs

This commit is contained in:
Branan Purvine-Riley
2011-04-11 16:27:55 -07:00
parent d4250e19b5
commit 908aaeb6f6
2738 changed files with 702562 additions and 702562 deletions

View File

@ -1,17 +1,17 @@
// Grab noise texture,
// modulate biased version by vtx color 0,
// add to vtx color 1
ps.1.1
tex t0;
tex t1;
add r0.rgb, t0_bias, t1_bias;
+add r0.a, t0, t1;
//mov r0, t1_bias;
mad r0.rgb, r0, v0, v1;
//mov r0, v1;
// Grab noise texture,
// modulate biased version by vtx color 0,
// add to vtx color 1
ps.1.1
tex t0;
tex t1;
add r0.rgb, t0_bias, t1_bias;
+add r0.a, t0, t1;
//mov r0, t1_bias;
mad r0.rgb, r0, v0, v1;
//mov r0, v1;

View File

@ -1,14 +1,14 @@
ps.1.1
// Add blend color, output sum of alpha
// Color is t0 + t1
// Alpha is t0.a + t1.a
tex t0;
tex t1;
add r0.rgb, t0, t1;
+add r0.a, t0, t1;
mul r0, r0, v0;
ps.1.1
// Add blend color, output sum of alpha
// Color is t0 + t1
// Alpha is t0.a + t1.a
tex t0;
tex t1;
add r0.rgb, t0, t1;
+add r0.a, t0, t1;
mul r0, r0, v0;

View File

@ -1,14 +1,14 @@
ps.1.1
// Add blend color, output base alpha
// Color is t0 + t1
// Alpha is t0.a
tex t0;
tex t1;
add r0.rgb, t0, t1;
+mov r0.a, t0;
mul r0, r0, v0;
ps.1.1
// Add blend color, output base alpha
// Color is t0 + t1
// Alpha is t0.a
tex t0;
tex t1;
add r0.rgb, t0, t1;
+mov r0.a, t0;
mul r0, r0, v0;

View File

@ -1,14 +1,14 @@
ps.1.1
// Add blend color, output product of alpha
// Color is t0 + t1
// Alpha is t0.a * t1.a
tex t0;
tex t1;
add r0.rgb, t0, t1;
+mul r0.a, t0, t1;
mul r0, r0, v0;
ps.1.1
// Add blend color, output product of alpha
// Color is t0 + t1
// Alpha is t0.a * t1.a
tex t0;
tex t1;
add r0.rgb, t0, t1;
+mul r0.a, t0, t1;
mul r0, r0, v0;

View File

@ -1,14 +1,14 @@
ps.1.1
// Alpha blend color, output sum of alphas
// Color is t0 * (1 - t1.a) + t1 * t1.a
// Alpha is t0.a + t1.a
tex t0
tex t1
lrp r0.rgb, t1.a, t1, t0
add r0.a, t0, t1;
mul r0, r0, v0;
ps.1.1
// Alpha blend color, output sum of alphas
// Color is t0 * (1 - t1.a) + t1 * t1.a
// Alpha is t0.a + t1.a
tex t0
tex t1
lrp r0.rgb, t1.a, t1, t0
add r0.a, t0, t1;
mul r0, r0, v0;

View File

@ -1,14 +1,14 @@
ps.1.1
// Alpha blend layers, output base alpha
//
// Color is t0 * (1 - t1.a) + t1 * t1.a
// Alpha is t0.a
tex t0
tex t1
lrp r0.rgb, t1.a, t1, t0
mov r0.a, t0;
mul r0, r0, v0;
ps.1.1
// Alpha blend layers, output base alpha
//
// Color is t0 * (1 - t1.a) + t1 * t1.a
// Alpha is t0.a
tex t0
tex t1
lrp r0.rgb, t1.a, t1, t0
mov r0.a, t0;
mul r0, r0, v0;

View File

@ -1,14 +1,14 @@
ps.1.1
// Alpha blend color, output product of alphas
// Color is t0 * (1 - t1.a) + t1 * t1.a
// Alpha is t0.a * t1.a
tex t0
tex t1
lrp r0.rgb, t1.a, t1, t0
mul r0.a, t0, t1;
mul r0, r0, v0;
ps.1.1
// Alpha blend color, output product of alphas
// Color is t0 * (1 - t1.a) + t1 * t1.a
// Alpha is t0.a * t1.a
tex t0
tex t1
lrp r0.rgb, t1.a, t1, t0
mul r0.a, t0, t1;
mul r0, r0, v0;

View File

@ -1,9 +1,9 @@
ps.1.1
// Single layer, just modulate by vertex color and emit
//
tex t0
mul r0, t0, v0;
ps.1.1
// Single layer, just modulate by vertex color and emit
//
tex t0
mul r0, t0, v0;

View File

@ -1,14 +1,14 @@
ps.1.1
// Multiply blend color, output sum of alpha
// Color is t0 * t1
// Alpha is t0.a + t1.a
tex t0;
tex t1;
mul r0.rgb, t0, t1;
+add r0.a, t0, t1;
mul r0, r0, v0;
ps.1.1
// Multiply blend color, output sum of alpha
// Color is t0 * t1
// Alpha is t0.a + t1.a
tex t0;
tex t1;
mul r0.rgb, t0, t1;
+add r0.a, t0, t1;
mul r0, r0, v0;

View File

@ -1,14 +1,14 @@
ps.1.1
// Multiply blend color, output base alpha
// Color is t0 * t1
// Alpha is t0.a
tex t0;
tex t1;
mul r0.rgb, t0, t1;
+mov r0.a, t0;
mul r0, r0, v0;
ps.1.1
// Multiply blend color, output base alpha
// Color is t0 * t1
// Alpha is t0.a
tex t0;
tex t1;
mul r0.rgb, t0, t1;
+mov r0.a, t0;
mul r0, r0, v0;

View File

@ -1,14 +1,14 @@
ps.1.1
// Multiply blend color, output product of alpha
// Color is t0 * t1
// Alpha is t0.a * t1.a
tex t0;
tex t1;
mul r0.rgb, t0, t1;
+mul r0.a, t0, t1;
mul r0, r0, v0;
ps.1.1
// Multiply blend color, output product of alpha
// Color is t0 * t1
// Alpha is t0.a * t1.a
tex t0;
tex t1;
mul r0.rgb, t0, t1;
+mul r0.a, t0, t1;
mul r0, r0, v0;

View File

@ -1,31 +1,31 @@
// Composite the cosines together.
// Input map is cosine(pix) for each of
// the 4 waves.
//
// The constants are set up so:
// Nx = -freq * amp * dirX * cos(pix);
// Ny = -freq * amp * dirY * cos(pix);
// So c[i].x = -freq[i] * amp[i] * dirX[i]
// etc.
// All textures are:
// (r,g,b,a) = (cos(), cos(), 1, 1)
//
// So c[0].z = 1, but all other c[i].z = 0
// Note also the c4 used for biasing back at the end.
ps.1.1
tex t0;
tex t1;
tex t2;
tex t3;
mul r0, t0_bx2, c0;
mad r0, t1_bx2, c1, r0;
mad r0, t2_bx2, c2, r0;
mad r0, t3_bx2, c3, r0;
// Now bias it back into range [0..1] for output.
mul r0, r0, c4; // c4 = (0.5, 0.5, 0.5, 1)
add r0, r0, c4;
//mov r0, c4;
// Composite the cosines together.
// Input map is cosine(pix) for each of
// the 4 waves.
//
// The constants are set up so:
// Nx = -freq * amp * dirX * cos(pix);
// Ny = -freq * amp * dirY * cos(pix);
// So c[i].x = -freq[i] * amp[i] * dirX[i]
// etc.
// All textures are:
// (r,g,b,a) = (cos(), cos(), 1, 1)
//
// So c[0].z = 1, but all other c[i].z = 0
// Note also the c4 used for biasing back at the end.
ps.1.1
tex t0;
tex t1;
tex t2;
tex t3;
mul r0, t0_bx2, c0;
mad r0, t1_bx2, c1, r0;
mad r0, t2_bx2, c2, r0;
mad r0, t3_bx2, c3, r0;
// Now bias it back into range [0..1] for output.
mul r0, r0, c4; // c4 = (0.5, 0.5, 0.5, 1)
add r0, r0, c4;
//mov r0, c4;

View File

@ -1,6 +1,6 @@
ps.1.1
// Grass shader. Just does a simple tex mult
tex t0
mul r0, t0, v0
ps.1.1
// Grass shader. Just does a simple tex mult
tex t0
mul r0, t0, v0

View File

@ -1,35 +1,35 @@
// Composite the cosines together.
// Input map is cosine(pix) for each of
// the 4 waves.
//
// The constants are set up so:
// Nx = -freq * amp * dirX * cos(pix);
// Ny = -freq * amp * dirY * cos(pix);
// So c[i].x = -freq[i] * amp[i] * dirX[i]
// etc.
// All textures are:
// (r,g,b,a) = (cos(), cos(), 1, 1)
//
// Here all c[i].z = 0, because we're accumulating ontop
// of layers that have been primed with z = 1.
// Note also the c4 used for biasing back at the end.
ps.1.1
tex t0;
tex t1;
tex t2;
tex t3;
mul r0, t0_bx2, c0;
mad r0, t1_bx2, c1, r0;
mad r0, t2_bx2, c2, r0;
mad r0, t3_bx2, c3, r0;
// Now bias it back into range [0..1] for output.
mul r0.rgb, r0, c4;
+mov r0.a, c4;
add r0.rgb, r0, c5;
//mov r0, c4;
// Composite the cosines together.
// Input map is cosine(pix) for each of
// the 4 waves.
//
// The constants are set up so:
// Nx = -freq * amp * dirX * cos(pix);
// Ny = -freq * amp * dirY * cos(pix);
// So c[i].x = -freq[i] * amp[i] * dirX[i]
// etc.
// All textures are:
// (r,g,b,a) = (cos(), cos(), 1, 1)
//
// Here all c[i].z = 0, because we're accumulating ontop
// of layers that have been primed with z = 1.
// Note also the c4 used for biasing back at the end.
ps.1.1
tex t0;
tex t1;
tex t2;
tex t3;
mul r0, t0_bx2, c0;
mad r0, t1_bx2, c1, r0;
mad r0, t2_bx2, c2, r0;
mad r0, t3_bx2, c3, r0;
// Now bias it back into range [0..1] for output.
mul r0.rgb, r0, c4;
+mov r0.a, c4;
add r0.rgb, r0, c5;
//mov r0, c4;

View File

@ -1,21 +1,21 @@
ps.1.1
def c0, 1.0, 1.0, 1.0, 1.0 // Temp Hack
tex t0;
tex t1;
tex t2;
mov r1.a, t1;
lrp r0.rgb, r1.a, t1, t0;
+mul r0.a, 1-t1, 1-t0;
lrp r0.rgb, t2.a, t2, r0;
+mul r0.a, 1-t2, r0;
mul r0.rgb, r0, v0;
+mul r0.a, 1-r0, v0;
//mov r0.a, c1;
//mov r0.rgb, t2;
//+mov r0.a, 1-t2;
ps.1.1
def c0, 1.0, 1.0, 1.0, 1.0 // Temp Hack
tex t0;
tex t1;
tex t2;
mov r1.a, t1;
lrp r0.rgb, r1.a, t1, t0;
+mul r0.a, 1-t1, 1-t0;
lrp r0.rgb, t2.a, t2, r0;
+mul r0.a, 1-t2, r0;
mul r0.rgb, r0, v0;
+mul r0.a, 1-r0, v0;
//mov r0.a, c1;
//mov r0.rgb, t2;
//+mov r0.a, 1-t2;

View File

@ -1,35 +1,35 @@
// Very simular to ps_WaveFixed.inl. Only the final coloring is different.
// Even though so far they are identical.
ps.1.1
//def c0, 1.0, 0.0, 0.0, 1.0 // Temp Hack
tex t0 // Bind texture in stage 0 to register t0.
texm3x3pad t1, t0_bx2 // First row of matrix multiply.
texm3x3pad t2, t0_bx2 // Second row of matrix multiply.
texm3x3vspec t3, t0_bx2 // Third row of matrix multiply to get a 3-vector.
// Reflect 3-vector by the eye-ray vector.
// Use reflected vector to do a texture lookup
// at stage 3.
// t3 now has our reflected environment map value
// We've (presumably) attenuated the effect on a vertex basis
// and have our color w/ attenuated alpha in v0. So all we need
// is to multiply t3 by v0 into r0 and we're done.
mul r0.rgb, t3, v0;
+mul r0.a, t0, v0;
// mov r0, t0;
/*
tex t0;
texcoord t1;
texcoord t2;
texcoord t3;
mov r0.rgb, t3;
+mov r0.a, c0;
*/
// Very simular to ps_WaveFixed.inl. Only the final coloring is different.
// Even though so far they are identical.
ps.1.1
//def c0, 1.0, 0.0, 0.0, 1.0 // Temp Hack
tex t0 // Bind texture in stage 0 to register t0.
texm3x3pad t1, t0_bx2 // First row of matrix multiply.
texm3x3pad t2, t0_bx2 // Second row of matrix multiply.
texm3x3vspec t3, t0_bx2 // Third row of matrix multiply to get a 3-vector.
// Reflect 3-vector by the eye-ray vector.
// Use reflected vector to do a texture lookup
// at stage 3.
// t3 now has our reflected environment map value
// We've (presumably) attenuated the effect on a vertex basis
// and have our color w/ attenuated alpha in v0. So all we need
// is to multiply t3 by v0 into r0 and we're done.
mul r0.rgb, t3, v0;
+mul r0.a, t0, v0;
// mov r0, t0;
/*
tex t0;
texcoord t1;
texcoord t2;
texcoord t3;
mov r0.rgb, t3;
+mov r0.a, c0;
*/

View File

@ -1,77 +1,77 @@
//ps.1.1
// def c0, 1.0, 0.0, 0.0, 1.0
// mov r0, c0
// Short pixel shader. Use the texm3x3vspec to do a per-pixel
// reflected lookup into our environment map.
// Input:
// t0 - Normal map in tangent space. Apply _bx2 modifier to shift
// [0..255] -> [-1..1]
// t1 - UVW = tangent + eye2pos.x, map ignored.
// t2 - UVW = binormal + eye2pos.y, map ignored
// t3 - UVW = normal + eye2pos.z, map = environment cube map
// v0 - attenuating color/alpha.
// See docs on texm3x3vspec for explanation of the eye2pos wackiness.
// Output:
// r0 = reflected lookup from environment map X input v0.
// Since environment map has alpha = 255, the output of this
// shader can be used for either alpha or additive blending,
// as long as v0 is fed in appropriately.
ps.1.1
def c0, 1.0, 0.0, 0.0, 1.0 // Temp Hack
/*
def c1, 0.0, 1.0, 0.0, 1.0
def c2, 0.0, 0.0, 1.0, 1.0
*/
tex t0 // Bind texture in stage 0 to register t0.
texm3x3pad t1, t0_bx2 // First row of matrix multiply.
texm3x3pad t2, t0_bx2 // Second row of matrix multiply.
texm3x3vspec t3, t0_bx2 // Third row of matrix multiply to get a 3-vector.
// Reflect 3-vector by the eye-ray vector.
// Use reflected vector to do a texture lookup
// at stage 3.
// t3 now has our reflected environment map value
// We've (presumably) attenuated the effect on a vertex basis
// and have our color w/ attenuated alpha in v0. So all we need
// is to multiply t3 by v0 into r0, add our base color from v1 and we're done.
mad r0.rgb, t3, v0, v1;
/* HACKAGE
//+mul r0.a, v1, v0;
HACKAGE */
mov r0.a, v0; //HACKAGE
/*
mov r0.rgb, v0;
mov r0.a, v0;
*/
/*
tex t0;
texcoord t1;
texcoord t2;
texcoord t3;
mov r0.rgb, t3;
+mov r0.a, c0;
*/
/*
tex t0;
texcoord t1;
texcoord t2;
texcoord t3;
mul r0.rgb, t0_bx2, c1;
+mov r0.a, c2;
*/
//ps.1.1
// def c0, 1.0, 0.0, 0.0, 1.0
// mov r0, c0
// Short pixel shader. Use the texm3x3vspec to do a per-pixel
// reflected lookup into our environment map.
// Input:
// t0 - Normal map in tangent space. Apply _bx2 modifier to shift
// [0..255] -> [-1..1]
// t1 - UVW = tangent + eye2pos.x, map ignored.
// t2 - UVW = binormal + eye2pos.y, map ignored
// t3 - UVW = normal + eye2pos.z, map = environment cube map
// v0 - attenuating color/alpha.
// See docs on texm3x3vspec for explanation of the eye2pos wackiness.
// Output:
// r0 = reflected lookup from environment map X input v0.
// Since environment map has alpha = 255, the output of this
// shader can be used for either alpha or additive blending,
// as long as v0 is fed in appropriately.
ps.1.1
def c0, 1.0, 0.0, 0.0, 1.0 // Temp Hack
/*
def c1, 0.0, 1.0, 0.0, 1.0
def c2, 0.0, 0.0, 1.0, 1.0
*/
tex t0 // Bind texture in stage 0 to register t0.
texm3x3pad t1, t0_bx2 // First row of matrix multiply.
texm3x3pad t2, t0_bx2 // Second row of matrix multiply.
texm3x3vspec t3, t0_bx2 // Third row of matrix multiply to get a 3-vector.
// Reflect 3-vector by the eye-ray vector.
// Use reflected vector to do a texture lookup
// at stage 3.
// t3 now has our reflected environment map value
// We've (presumably) attenuated the effect on a vertex basis
// and have our color w/ attenuated alpha in v0. So all we need
// is to multiply t3 by v0 into r0, add our base color from v1 and we're done.
mad r0.rgb, t3, v0, v1;
/* HACKAGE
//+mul r0.a, v1, v0;
HACKAGE */
mov r0.a, v0; //HACKAGE
/*
mov r0.rgb, v0;
mov r0.a, v0;
*/
/*
tex t0;
texcoord t1;
texcoord t2;
texcoord t3;
mov r0.rgb, t3;
+mov r0.a, c0;
*/
/*
tex t0;
texcoord t1;
texcoord t2;
texcoord t3;
mul r0.rgb, t0_bx2, c1;
+mov r0.a, c2;
*/

View File

@ -1,30 +1,30 @@
ps.1.1
// Have a couple extra textures to burn here. Only thing
// I've thought of is to have an additional texture to
// make the front of the wave solid. So it's UVW would be
// the same as the base texture, but the texture itself would
// be just a thin horizontal band of alpha. Then just add that
// alpha to the output alpha.
//
// Let's get the first cut running first.
tex t0;
tex t1;
tex t2;
//mul r0, v0, t0;
//mul r0, r0, t1;
//add r0.a, r0, t2;
// 1.0 mov r0, t0;
// 1.0 mul r0, r0, t1;
mul r0, t0, t1;
// TEST add r0.a, r0, t2; // TEST
add r0, r0, t2; // TEST
mul r0, r0, v0;
//mul r0.rgb, r0, r0.a; // TEST
//mov r0, t1;
ps.1.1
// Have a couple extra textures to burn here. Only thing
// I've thought of is to have an additional texture to
// make the front of the wave solid. So it's UVW would be
// the same as the base texture, but the texture itself would
// be just a thin horizontal band of alpha. Then just add that
// alpha to the output alpha.
//
// Let's get the first cut running first.
tex t0;
tex t1;
tex t2;
//mul r0, v0, t0;
//mul r0, r0, t1;
//add r0.a, r0, t2;
// 1.0 mov r0, t0;
// 1.0 mul r0, r0, t1;
mul r0, t0, t1;
// TEST add r0.a, r0, t2; // TEST
add r0, r0, t2; // TEST
mul r0, r0, v0;
//mul r0.rgb, r0, r0.a; // TEST
//mov r0, t1;

View File

@ -1,63 +1,63 @@
//ps.1.1
// def c0, 1.0, 0.0, 0.0, 1.0
// mov r0, c0
// Short pixel shader. Use the texm3x3vspec to do a per-pixel
// reflected lookup into our environment map.
// Input:
// t0 - Normal map in tangent space. Apply _bx2 modifier to shift
// [0..255] -> [-1..1]
// t1 - UVW = tangent + eye2pos.x, map ignored.
// t2 - UVW = binormal + eye2pos.y, map ignored
// t3 - UVW = normal + eye2pos.z, map = environment cube map
// v0 - attenuating color/alpha.
// See docs on texm3x3vspec for explanation of the eye2pos wackiness.
// Output:
// r0 = reflected lookup from environment map X input v0.
// Since environment map has alpha = 255, the output of this
// shader can be used for either alpha or additive blending,
// as long as v0 is fed in appropriately.
ps.1.1
//def c0, 1.0, 1.0, 1.0, 1.0 // Temp Hack
//def c1, 2.0, 2.0, 2.0, 1.0
//texcoord t0;
//texcoord t1;
//texcoord t2;
//texcoord t3;
tex t0 // Bind texture in stage 0 to register t0.
texm3x3pad t1, t0_bx2 // First row of matrix multiply.
texm3x3pad t2, t0_bx2 // Second row of matrix multiply.
texm3x3vspec t3, t0_bx2 // Third row of matrix multiply to get a 3-vector.
// Reflect 3-vector by the eye-ray vector.
// Use reflected vector to do a texture lookup
// at stage 3.
// t3 now has our reflected environment map value
// We've (presumably) attenuated the effect on a vertex basis
// and have our color w/ attenuated alpha in v0. So all we need
// is to multiply t3 by v0 into r0 and we're done.
mad r0.rgb, t3, v1, v0;
//add r0.rgb, t3, v0;
+mov r0.a, v1;
//mov r0.rgb, v1.a; // HACKAGE
//mov r0.a, v1.a; // HACKAGE
//mov r0, v1; // HACKAGE
//mov r0, c0
//mul r0, r0, t0;
//mov r0, v1;
//mov r0, t3;
//mov r0.rgb, t3;
//+mov r0.a, c0;
//ps.1.1
// def c0, 1.0, 0.0, 0.0, 1.0
// mov r0, c0
// Short pixel shader. Use the texm3x3vspec to do a per-pixel
// reflected lookup into our environment map.
// Input:
// t0 - Normal map in tangent space. Apply _bx2 modifier to shift
// [0..255] -> [-1..1]
// t1 - UVW = tangent + eye2pos.x, map ignored.
// t2 - UVW = binormal + eye2pos.y, map ignored
// t3 - UVW = normal + eye2pos.z, map = environment cube map
// v0 - attenuating color/alpha.
// See docs on texm3x3vspec for explanation of the eye2pos wackiness.
// Output:
// r0 = reflected lookup from environment map X input v0.
// Since environment map has alpha = 255, the output of this
// shader can be used for either alpha or additive blending,
// as long as v0 is fed in appropriately.
ps.1.1
//def c0, 1.0, 1.0, 1.0, 1.0 // Temp Hack
//def c1, 2.0, 2.0, 2.0, 1.0
//texcoord t0;
//texcoord t1;
//texcoord t2;
//texcoord t3;
tex t0 // Bind texture in stage 0 to register t0.
texm3x3pad t1, t0_bx2 // First row of matrix multiply.
texm3x3pad t2, t0_bx2 // Second row of matrix multiply.
texm3x3vspec t3, t0_bx2 // Third row of matrix multiply to get a 3-vector.
// Reflect 3-vector by the eye-ray vector.
// Use reflected vector to do a texture lookup
// at stage 3.
// t3 now has our reflected environment map value
// We've (presumably) attenuated the effect on a vertex basis
// and have our color w/ attenuated alpha in v0. So all we need
// is to multiply t3 by v0 into r0 and we're done.
mad r0.rgb, t3, v1, v0;
//add r0.rgb, t3, v0;
+mov r0.a, v1;
//mov r0.rgb, v1.a; // HACKAGE
//mov r0.a, v1.a; // HACKAGE
//mov r0, v1; // HACKAGE
//mov r0, c0
//mul r0, r0, t0;
//mov r0, v1;
//mov r0, t3;
//mov r0.rgb, t3;
//+mov r0.a, c0;

View File

@ -1,21 +1,21 @@
ps.1.1
//def c0, 1.0, 0.0, 0.0, 1.0 // Temp Hack
// Want
// Color: vert.rgb * t0.rgb
// Alpha: vert.a * t0.a * t1.a
tex t0;
//tex t1;
//mul r0.rgb, v0, t0;
//+mul r0.a, v0.a, t0.a;
//mul r0.a, r0.a, t1.a;
//mul r0, t0, t1;
mul r0, t0, v0;
//mov r0, t0;
ps.1.1
//def c0, 1.0, 0.0, 0.0, 1.0 // Temp Hack
// Want
// Color: vert.rgb * t0.rgb
// Alpha: vert.a * t0.a * t1.a
tex t0;
//tex t1;
//mul r0.rgb, v0, t0;
//+mul r0.a, v0.a, t0.a;
//mul r0.a, r0.a, t1.a;
//mul r0, t0, t1;
mul r0, t0, v0;
//mov r0, t0;

View File

@ -1,34 +1,34 @@
vs.1.1
dcl_position v0
dcl_texcoord0 v7
// Take in a screen space position,
// transform the UVW,
// and spit it out.
// c0 = uvXform0[0]
// c1 = uvXform0[1]
// c2 = uvXform1[0]
// c3 = uvXform1[1]
// c4 = (0,0.5,1.0,2.0)
// c5 = (noiseScale, bias, 0, 1)
mov oPos, v0;
mov r0.zw, c4.xxxz; // yzw will stay constant (0,0,1);
dp4 r0.x, v7, c0;
dp4 r0.y, v7, c1;
mov oT0, r0;
dp4 r0.x, v7, c2;
dp4 r0.y, v7, c3;
mov oT1, r0;
mov oD0, c5.xxzz;
mov oD1, c5.yyzz;
vs.1.1
dcl_position v0
dcl_texcoord0 v7
// Take in a screen space position,
// transform the UVW,
// and spit it out.
// c0 = uvXform0[0]
// c1 = uvXform0[1]
// c2 = uvXform1[0]
// c3 = uvXform1[1]
// c4 = (0,0.5,1.0,2.0)
// c5 = (noiseScale, bias, 0, 1)
mov oPos, v0;
mov r0.zw, c4.xxxz; // yzw will stay constant (0,0,1);
dp4 r0.x, v7, c0;
dp4 r0.y, v7, c1;
mov oT0, r0;
dp4 r0.x, v7, c2;
dp4 r0.y, v7, c3;
mov oT1, r0;
mov oD0, c5.xxzz;
mov oD1, c5.yyzz;

View File

@ -1,31 +1,31 @@
vs.1.1
dcl_position v0
dcl_texcoord0 v7
// Take in a screen space position,
// transform the UVW,
// and spit it out.
// c4 = (0,0.5,1.0,2.0)
//mov r0, v0;
//mov r0.w, c4.zzzz;
//mov oPos, r0;
mov oPos, v0;
dp4 r0.x, v7, c0;
mov r0.yzw, c4.xxxz; // yzw will stay constant (0,0,1);
mov oT0, r0;
dp4 r0.x, v7, c1;
mov oT1, r0;
dp4 r0.x, v7, c2;
mov oT2, r0;
dp4 r0.x, v7, c3;
mov oT3, r0;
vs.1.1
dcl_position v0
dcl_texcoord0 v7
// Take in a screen space position,
// transform the UVW,
// and spit it out.
// c4 = (0,0.5,1.0,2.0)
//mov r0, v0;
//mov r0.w, c4.zzzz;
//mov oPos, r0;
mov oPos, v0;
dp4 r0.x, v7, c0;
mov r0.yzw, c4.xxxz; // yzw will stay constant (0,0,1);
mov oT0, r0;
dp4 r0.x, v7, c1;
mov oT1, r0;
dp4 r0.x, v7, c2;
mov oT2, r0;
dp4 r0.x, v7, c3;
mov oT3, r0;

View File

@ -1,60 +1,60 @@
vs.1.1
// Grass shader. Moves verts according sine waves seeded by position
// Based on the article "Animated Grass with Pixel and Vertex Shaders"
// by John Isidoro and Drew Card, in the book
// "Direct3D ShaderX Vertex and Pixel Shader Tips and Tricks"
// c0 = Local2NDC
// c4 = (0.0, 0.5, 1.0, 2.0)
// c5 = (time, X, X, X)
// c6 = Pi constants
// c7 = Sin constants (-1/3!, 1/!5, -1/7!, 1/9!)
// c8 = waveDistortX
// c9 = waveDistortY
// c10 = waveDistortZ
// c11 = waveDirX (0.25, 0.0, -0.7, -0.8)
// c12 = waveDirY (0.0, 0.15, -0.7, 0.1)
// c13 = waveSpeed (0.2, 0.15, 0.4, 0.4)
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
mul r0, c11, v0.x // pos X,Y input to waves
mad r0, c12, v0.y, r0
mov r1, c5.x // time
mad r0, r1, c13, r0 // scale by speed and add to X,Y input
frc r0.xy, r0
frc r1.xy, r0.zwzw
mov r0.zw, r1.xyxy
sub r0, r0, c4.y // - 0.5
mul r1, r0, c6.w // *= 2 pi
mul r2, r1, r1 // ^2
mul r3, r2, r1 // ^3
mul r5, r3, r2 // ^5
mul r7, r5, r2 // ^7
mul r9, r7, r2 // ^9
mad r0, r3, c7.x, r1 // - r1^3 / 3!
mad r0, r5, c7.y, r0 // + r1^5 / 5!
mad r0, r7, c7.z, r0 // - r1^7 / 7!
mad r0, r9, c7.w, r0 // + r1^9 / 9!
dp4 r3.x, r0, c8
dp4 r3.y, r0, c9
dp4 r3.zw, r0, c10
sub r4, c4.z, v7.y
mul r3, r3, r4 // mult by Y tex coord. So the waves only affect the top verts
mov r2.w, v0 //
add r2.xyz, r3, v0 // add offset to position
m4x4 oPos, r2, c0 // trans to NDC
mov oFog, c4.z // no fog
mov oD0, v5
mov oT0, v7
vs.1.1
// Grass shader. Moves verts according sine waves seeded by position
// Based on the article "Animated Grass with Pixel and Vertex Shaders"
// by John Isidoro and Drew Card, in the book
// "Direct3D ShaderX Vertex and Pixel Shader Tips and Tricks"
// c0 = Local2NDC
// c4 = (0.0, 0.5, 1.0, 2.0)
// c5 = (time, X, X, X)
// c6 = Pi constants
// c7 = Sin constants (-1/3!, 1/!5, -1/7!, 1/9!)
// c8 = waveDistortX
// c9 = waveDistortY
// c10 = waveDistortZ
// c11 = waveDirX (0.25, 0.0, -0.7, -0.8)
// c12 = waveDirY (0.0, 0.15, -0.7, 0.1)
// c13 = waveSpeed (0.2, 0.15, 0.4, 0.4)
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
mul r0, c11, v0.x // pos X,Y input to waves
mad r0, c12, v0.y, r0
mov r1, c5.x // time
mad r0, r1, c13, r0 // scale by speed and add to X,Y input
frc r0.xy, r0
frc r1.xy, r0.zwzw
mov r0.zw, r1.xyxy
sub r0, r0, c4.y // - 0.5
mul r1, r0, c6.w // *= 2 pi
mul r2, r1, r1 // ^2
mul r3, r2, r1 // ^3
mul r5, r3, r2 // ^5
mul r7, r5, r2 // ^7
mul r9, r7, r2 // ^9
mad r0, r3, c7.x, r1 // - r1^3 / 3!
mad r0, r5, c7.y, r0 // + r1^5 / 5!
mad r0, r7, c7.z, r0 // - r1^7 / 7!
mad r0, r9, c7.w, r0 // + r1^9 / 9!
dp4 r3.x, r0, c8
dp4 r3.y, r0, c9
dp4 r3.zw, r0, c10
sub r4, c4.z, v7.y
mul r3, r3, r4 // mult by Y tex coord. So the waves only affect the top verts
mov r2.w, v0 //
add r2.xyz, r3, v0 // add offset to position
m4x4 oPos, r2, c0 // trans to NDC
mov oFog, c4.z // no fog
mov oD0, v5
mov oT0, v7

View File

@ -1,245 +1,245 @@
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
// Store our input position in world space in r6
m4x3 r6, v0, c25; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c16.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = reflection strength (transparency)
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c30 = waterlevel + offset
// c31 = (maxAtten - minAtten) / depthFalloff
// c32 = minAtten.
// And in particular:
// c30.w = waterlevel
// c31.w = 1.f;
// c32.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c8, r6.xxxx;
mad r0, c9, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c5;
add r0, r0, c6;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c15.wwww;
add r0, r0, c15.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c15.wwww;
// dist += -kPi;
sub r0, r0, c15.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c13.wwww, r2;
mad r1, r4, c14.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c30, r6.zzzz;
mul r4, r4, c31;
add r4, r4, c32;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c16.zzzz;
max r4.xyz, r4, c16.xxxx;
// Calc our filter (see above).
mul r11, v5.wwww, c29;
max r11, r11, c16.xxxx;
min r11, r11, c16.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c7;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c16.zzzz;
// Smooth the approach to the shore.
sub r10.x, r6.z, c30.w; // r10.x = height
mul r10.x, r10.x, r10.x; // r10.x = h^2
mul r10.x, r10.x, c10.x; // r10.x = -h^2 * k1 / k2^2
add r10.x, r10.x, c10.y; // r10.x = k1 + -h^2 * k1 / k2^2
max r10.x, r10.x, c16.xxxx; // Clamp to >= zero
add r8.x, r8.x, r10.x; // r8.x += del
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c30.w;
max r6.z, r6.z, r8.z;
add r6.z, r6.z, c12.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= kFreq.xyzw;
mul r1, r1, c5;
// cosDist *= kAmplitude.xyzw; // Combine?
mul r1, r1, c7;
// cosDist *= filter;
mul r1, r1, r11;
//
// accumCos = (0, 0, 0, 0);
mov r7, c16.xxxx;
// temp = dp4( cosDist, toCenter_X );
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
dp4 r7.x, r1, -c8
//
// temp = dp4( cosDist, toCenter_Y );
// accumCos.y += temp.xxxx;
dp4 r7.y, r1, -c9
//
// }
//
// accumBin = (1, 0, -accumCos.x);
// accumTan = (0, 1, -accumCos.y);
// accumNorm = (accumCos.x, accumCos.y, 1);
mov r11, c16.xxzx;
add r11, r11, r7;
dp3 r10.x, r11, r11;
rsq r10.x, r10.x;
mul r11, r11, r10.xxxx;
//
// Add in our scrunch (offset in X/Y plane).
// Scale down our scrunch amount by the wave scaling
mul r10.x, c12.y, r4.z;
mad r6.xy, r11.xy, r10.xx, r6.xy;
// mul r6.z, r6.z, r10.xxxx; DEBUG
// mad r6, r11, c12.yyzz, r6;
// accumNorm = mul (accumNorm, kScrunchScale ); // kScrunchScale = (scrunchScale, scrunchScale, 1, 1);
// accumCos *= (scrunchScale, scrunchScale, 0, 0);
//##mul r2.x, r6.z, c12.x;
//##add r2.x, r2.x, c16.z;
//##mul r7.xy, r7.xy, r2.xx;
// This is actually wrong, but useful right now for visualizing the generated coords.
// See below for correct version.
//##sub r3, c16.xxzx, r7.xyzz;
// Normalize?
// Now rotate our normal vector into the wind
//##dp3 r0.x, r3, c18.xyww;
//##dp3 r0.y, r3, c18.zxww;
//##mov r3.xy, r0;
// Initialize r0.w
mov r0.w, c16.zzzz;
//##dp3 r0.x, r3, r3;
//##rsq r0.x, r0.x;
//##mul r3, r3, r0.xxxw;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c25; // HACKAGE
//mov r6.w, c16.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c11.x;
mul oFog, r10.x, c11.y;
mov oPos, r9;
// Color
mul oD0, c4, v5.xxxx;
// UVW0
// This layer just stays put. The motion's in the texture
// U = transformed U
// V = transformed V
dp4 r0.x, v7, c19;
dp4 r0.y, v7, c20;
//mul r0.y, r0.y, -c16.z;
//add r0.y, r0.y, c16.z;
//add r0.y, r0.y, c16.z;
//add r0.y, r0.y, c16.y;
mov oT0, r0.xyww;
mov oT1, r0.xyww;
mov oT2, r0.xyww;
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
// Store our input position in world space in r6
m4x3 r6, v0, c25; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c16.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = reflection strength (transparency)
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c30 = waterlevel + offset
// c31 = (maxAtten - minAtten) / depthFalloff
// c32 = minAtten.
// And in particular:
// c30.w = waterlevel
// c31.w = 1.f;
// c32.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c8, r6.xxxx;
mad r0, c9, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c5;
add r0, r0, c6;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c15.wwww;
add r0, r0, c15.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c15.wwww;
// dist += -kPi;
sub r0, r0, c15.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c13.wwww, r2;
mad r1, r4, c14.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c30, r6.zzzz;
mul r4, r4, c31;
add r4, r4, c32;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c16.zzzz;
max r4.xyz, r4, c16.xxxx;
// Calc our filter (see above).
mul r11, v5.wwww, c29;
max r11, r11, c16.xxxx;
min r11, r11, c16.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c7;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c16.zzzz;
// Smooth the approach to the shore.
sub r10.x, r6.z, c30.w; // r10.x = height
mul r10.x, r10.x, r10.x; // r10.x = h^2
mul r10.x, r10.x, c10.x; // r10.x = -h^2 * k1 / k2^2
add r10.x, r10.x, c10.y; // r10.x = k1 + -h^2 * k1 / k2^2
max r10.x, r10.x, c16.xxxx; // Clamp to >= zero
add r8.x, r8.x, r10.x; // r8.x += del
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c30.w;
max r6.z, r6.z, r8.z;
add r6.z, r6.z, c12.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= kFreq.xyzw;
mul r1, r1, c5;
// cosDist *= kAmplitude.xyzw; // Combine?
mul r1, r1, c7;
// cosDist *= filter;
mul r1, r1, r11;
//
// accumCos = (0, 0, 0, 0);
mov r7, c16.xxxx;
// temp = dp4( cosDist, toCenter_X );
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
dp4 r7.x, r1, -c8
//
// temp = dp4( cosDist, toCenter_Y );
// accumCos.y += temp.xxxx;
dp4 r7.y, r1, -c9
//
// }
//
// accumBin = (1, 0, -accumCos.x);
// accumTan = (0, 1, -accumCos.y);
// accumNorm = (accumCos.x, accumCos.y, 1);
mov r11, c16.xxzx;
add r11, r11, r7;
dp3 r10.x, r11, r11;
rsq r10.x, r10.x;
mul r11, r11, r10.xxxx;
//
// Add in our scrunch (offset in X/Y plane).
// Scale down our scrunch amount by the wave scaling
mul r10.x, c12.y, r4.z;
mad r6.xy, r11.xy, r10.xx, r6.xy;
// mul r6.z, r6.z, r10.xxxx; DEBUG
// mad r6, r11, c12.yyzz, r6;
// accumNorm = mul (accumNorm, kScrunchScale ); // kScrunchScale = (scrunchScale, scrunchScale, 1, 1);
// accumCos *= (scrunchScale, scrunchScale, 0, 0);
//##mul r2.x, r6.z, c12.x;
//##add r2.x, r2.x, c16.z;
//##mul r7.xy, r7.xy, r2.xx;
// This is actually wrong, but useful right now for visualizing the generated coords.
// See below for correct version.
//##sub r3, c16.xxzx, r7.xyzz;
// Normalize?
// Now rotate our normal vector into the wind
//##dp3 r0.x, r3, c18.xyww;
//##dp3 r0.y, r3, c18.zxww;
//##mov r3.xy, r0;
// Initialize r0.w
mov r0.w, c16.zzzz;
//##dp3 r0.x, r3, r3;
//##rsq r0.x, r0.x;
//##mul r3, r3, r0.xxxw;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c25; // HACKAGE
//mov r6.w, c16.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c11.x;
mul oFog, r10.x, c11.y;
mov oPos, r9;
// Color
mul oD0, c4, v5.xxxx;
// UVW0
// This layer just stays put. The motion's in the texture
// U = transformed U
// V = transformed V
dp4 r0.x, v7, c19;
dp4 r0.y, v7, c20;
//mul r0.y, r0.y, -c16.z;
//add r0.y, r0.y, c16.z;
//add r0.y, r0.y, c16.z;
//add r0.y, r0.y, c16.y;
mov oT0, r0.xyww;
mov oT1, r0.xyww;
mov oT2, r0.xyww;

View File

@ -1,203 +1,203 @@
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
// Store our input position in world space in r6
m4x3 r6, v0, c25; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c16.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = reflection strength (transparency)
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c30 = waterlevel + offset
// c31 = (maxAtten - minAtten) / depthFalloff
// c32 = minAtten.
// And in particular:
// c30.w = waterlevel
// c31.w = 1.f;
// c32.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c8, r6.xxxx;
mad r0, c9, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c5;
add r0, r0, c6;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c15.wwww;
add r0, r0, c15.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c15.wwww;
// dist += -kPi;
sub r0, r0, c15.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c13.wwww, r2;
mad r1, r4, c14.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c30, r6.zzzz;
mul r4, r4, c31;
add r4, r4, c32;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c16.zzzz;
max r4.xyz, r4, c16.xxxx;
// Calc our filter (see above).
mul r11, v5.wwww, c29;
max r11, r11, c16.xxxx;
min r11, r11, c16.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c7;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c16.zzzz;
// Smooth the approach to the shore.
/*
sub r10.x, r6.z, c30.w; // r10.x = height
mul r10.x, r10.x, r10.x; // r10.x = h^2
mul r10.x, r10.x, c10.x; // r10.x = -h^2 * k1 / k2^2
add r10.x, r10.x, c10.y; // r10.x = k1 + -h^2 * k1 / k2^2
max r10.x, r10.x, c16.xxxx; // Clamp to >= zero
add r8.x, r8.x, r10.x; // r8.x += del
*/
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c30.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= filter;
mul r1, r1, r11;
// Pos = (in.x + S, in.y + R, r6.z)
// S = sum(k Dir.x A cos())
// R = sum(k Dir.y A cos())
// c17 = k Dir.x A
// c18 = k Dir.y A
// S = sum(cosDist * c17);
dp4 r7.x, r1, c17;
dp4 r7.y, r1, c18;
add r6.xy, r6.xy, r7.xy;
// Initialize r0.w
mov r0.w, c16.zzzz;
//##dp3 r0.x, r3, r3;
//##rsq r0.x, r0.x;
//##mul r3, r3, r0.xxxw;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c25; // HACKAGE
//mov r6.w, c16.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c11.x;
mul oFog, r10.x, c11.y;
mov oPos, r9;
// Color
mul oD0, c4, v5.xxxx;
// UVW0
// This layer just stays put. The motion's in the texture
// U = transformed U
// V = transformed V
dp4 r0.x, v7, c19;
dp4 r0.y, v7, c20;
//mul r0.y, r0.y, -c16.z;
//add r0.y, r0.y, c16.z;
//add r0.y, r0.y, c16.z;
//add r0.y, r0.y, c16.y;
mov oT0, r0.xyww;
mov oT1, r0.xyww;
mov oT2, r0.xyww;
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
// Store our input position in world space in r6
m4x3 r6, v0, c25; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c16.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = reflection strength (transparency)
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c30 = waterlevel + offset
// c31 = (maxAtten - minAtten) / depthFalloff
// c32 = minAtten.
// And in particular:
// c30.w = waterlevel
// c31.w = 1.f;
// c32.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c8, r6.xxxx;
mad r0, c9, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c5;
add r0, r0, c6;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c15.wwww;
add r0, r0, c15.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c15.wwww;
// dist += -kPi;
sub r0, r0, c15.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c13.wwww, r2;
mad r1, r4, c14.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c30, r6.zzzz;
mul r4, r4, c31;
add r4, r4, c32;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c16.zzzz;
max r4.xyz, r4, c16.xxxx;
// Calc our filter (see above).
mul r11, v5.wwww, c29;
max r11, r11, c16.xxxx;
min r11, r11, c16.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c7;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c16.zzzz;
// Smooth the approach to the shore.
/*
sub r10.x, r6.z, c30.w; // r10.x = height
mul r10.x, r10.x, r10.x; // r10.x = h^2
mul r10.x, r10.x, c10.x; // r10.x = -h^2 * k1 / k2^2
add r10.x, r10.x, c10.y; // r10.x = k1 + -h^2 * k1 / k2^2
max r10.x, r10.x, c16.xxxx; // Clamp to >= zero
add r8.x, r8.x, r10.x; // r8.x += del
*/
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c30.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= filter;
mul r1, r1, r11;
// Pos = (in.x + S, in.y + R, r6.z)
// S = sum(k Dir.x A cos())
// R = sum(k Dir.y A cos())
// c17 = k Dir.x A
// c18 = k Dir.y A
// S = sum(cosDist * c17);
dp4 r7.x, r1, c17;
dp4 r7.y, r1, c18;
add r6.xy, r6.xy, r7.xy;
// Initialize r0.w
mov r0.w, c16.zzzz;
//##dp3 r0.x, r3, r3;
//##rsq r0.x, r0.x;
//##mul r3, r3, r0.xxxw;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c25; // HACKAGE
//mov r6.w, c16.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c11.x;
mul oFog, r10.x, c11.y;
mov oPos, r9;
// Color
mul oD0, c4, v5.xxxx;
// UVW0
// This layer just stays put. The motion's in the texture
// U = transformed U
// V = transformed V
dp4 r0.x, v7, c19;
dp4 r0.y, v7, c20;
//mul r0.y, r0.y, -c16.z;
//add r0.y, r0.y, c16.z;
//add r0.y, r0.y, c16.z;
//add r0.y, r0.y, c16.y;
mov oT0, r0.xyww;
mov oT1, r0.xyww;
mov oT2, r0.xyww;

View File

@ -1,207 +1,207 @@
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
// Store our input position in world space in r6
m4x3 r6, v0, c18; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c13.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = illumination
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c22 = waterlevel + offset
// c23 = (maxAtten - minAtten) / depthFalloff
// c24 = minAtten.
// And in particular:
// c22.w = waterlevel
// c23.w = 1.f;
// c24.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c7, r6.xxxx;
mad r0, c8, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c4;
add r0, r0, c5;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c12.wwww;
add r0, r0, c12.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c12.wwww;
// dist += -kPi;
sub r0, r0, c12.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c10.wwww, r2;
mad r1, r4, c11.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c22, r6.zzzz;
mul r4, r4, c23;
add r4, r4, c24;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c13.zzzz;
max r4.xyz, r4, c13.xxxx;
//mov r4.xyz, c13.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c21;
max r11, r11, c13.xxxx;
min r11, r11, c13.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c6;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c13.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c22.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= kFreq.xyzw;
mul r1, r1, c4;
// cosDist *= kAmplitude.xyzw; // Combine?
mul r1, r1, c6;
// cosDist *= filter;
mul r1, r1, r11;
//
// accumCos = (0, 0, 0, 0);
mov r7, c13.xxxx;
// temp = dp4( cosDist, toCenter_X );
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
dp4 r7.x, r1, -c7
//
// temp = dp4( cosDist, toCenter_Y );
// accumCos.y += temp.xxxx;
dp4 r7.y, r1, -c8
//
// }
//
// accumBin = (1, 0, -accumCos.x);
// accumTan = (0, 1, -accumCos.y);
// accumNorm = (accumCos.x, accumCos.y, 1);
mov r11, c13.xxzx;
add r11, r11, r7;
dp3 r10.x, r11, r11;
rsq r10.x, r10.x;
mul r11, r11, r10.xxxx;
//
// Add in our scrunch (offset in X/Y plane).
// Scale down our scrunch amount by the wave scaling
mul r10.x, c9.y, r4.z;
mad r6.xy, r11.xy, r10.xx, r6.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c25.x, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c25.x;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c18; // HACKAGE
//mov r6.w, c13.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c29.x;
mul oFog, r10.x, c29.y;
mov oPos, r9;
// Output color is vertex green
// Output alpha is vertex red (vtx alpha is used for wave filtering)
// Whole thing modulated by material color/opacity.
mul oD0, v5.yyyx, c26;
// Usual texture transform
mov r11.zw, c13.zzzz;
dp4 r11.x, v7, c14;
dp4 r11.y, v7, c15;
mov oT0, r11;
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
// Store our input position in world space in r6
m4x3 r6, v0, c18; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c13.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = illumination
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c22 = waterlevel + offset
// c23 = (maxAtten - minAtten) / depthFalloff
// c24 = minAtten.
// And in particular:
// c22.w = waterlevel
// c23.w = 1.f;
// c24.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c7, r6.xxxx;
mad r0, c8, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c4;
add r0, r0, c5;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c12.wwww;
add r0, r0, c12.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c12.wwww;
// dist += -kPi;
sub r0, r0, c12.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c10.wwww, r2;
mad r1, r4, c11.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c22, r6.zzzz;
mul r4, r4, c23;
add r4, r4, c24;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c13.zzzz;
max r4.xyz, r4, c13.xxxx;
//mov r4.xyz, c13.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c21;
max r11, r11, c13.xxxx;
min r11, r11, c13.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c6;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c13.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c22.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= kFreq.xyzw;
mul r1, r1, c4;
// cosDist *= kAmplitude.xyzw; // Combine?
mul r1, r1, c6;
// cosDist *= filter;
mul r1, r1, r11;
//
// accumCos = (0, 0, 0, 0);
mov r7, c13.xxxx;
// temp = dp4( cosDist, toCenter_X );
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
dp4 r7.x, r1, -c7
//
// temp = dp4( cosDist, toCenter_Y );
// accumCos.y += temp.xxxx;
dp4 r7.y, r1, -c8
//
// }
//
// accumBin = (1, 0, -accumCos.x);
// accumTan = (0, 1, -accumCos.y);
// accumNorm = (accumCos.x, accumCos.y, 1);
mov r11, c13.xxzx;
add r11, r11, r7;
dp3 r10.x, r11, r11;
rsq r10.x, r10.x;
mul r11, r11, r10.xxxx;
//
// Add in our scrunch (offset in X/Y plane).
// Scale down our scrunch amount by the wave scaling
mul r10.x, c9.y, r4.z;
mad r6.xy, r11.xy, r10.xx, r6.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c25.x, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c25.x;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c18; // HACKAGE
//mov r6.w, c13.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c29.x;
mul oFog, r10.x, c29.y;
mov oPos, r9;
// Output color is vertex green
// Output alpha is vertex red (vtx alpha is used for wave filtering)
// Whole thing modulated by material color/opacity.
mul oD0, v5.yyyx, c26;
// Usual texture transform
mov r11.zw, c13.zzzz;
dp4 r11.x, v7, c14;
dp4 r11.y, v7, c15;
mov oT0, r11;

View File

@ -1,189 +1,189 @@
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
// Store our input position in world space in r6
m4x3 r6, v0, c18; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c13.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = illumination
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c22 = waterlevel + offset
// c23 = (maxAtten - minAtten) / depthFalloff
// c24 = minAtten.
// And in particular:
// c22.w = waterlevel
// c23.w = 1.f;
// c24.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c7, r6.xxxx;
mad r0, c8, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c4;
add r0, r0, c5;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c12.wwww;
add r0, r0, c12.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c12.wwww;
// dist += -kPi;
sub r0, r0, c12.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c10.wwww, r2;
mad r1, r4, c11.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c22, r6.zzzz;
mul r4, r4, c23;
add r4, r4, c24;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c13.zzzz;
max r4.xyz, r4, c13.xxxx;
//mov r4.xyz, c13.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c21;
max r11, r11, c13.xxxx;
min r11, r11, c13.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c6;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c13.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c22.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= filter;
mul r1, r1, r11;
// Pos = (in.x + S, in.y + R, r6.z)
// S = sum(k Dir.x A cos())
// R = sum(k Dir.y A cos())
// c30 = k Dir.x A
// c31 = k Dir.y A
// S = sum(cosDist * c30);
dp4 r7.x, r1, c30;
// R = sum(cosDist * c31);
dp4 r7.y, r1, c31;
add r6.xy, r6.xy, r7.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c25.x, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c25.x;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c18; // HACKAGE
//mov r6.w, c13.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c29.x;
mul oFog, r10.x, c29.y;
mov oPos, r9;
// Output color is vertex green
// Output alpha is vertex red (vtx alpha is used for wave filtering)
// Whole thing modulated by material color/opacity.
mul oD0, v5.yyyx, c26;
// Usual texture transform
mov r11.zw, c13.zzzz;
dp4 r11.x, v7, c14;
dp4 r11.y, v7, c15;
mov oT0, r11;
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
// Store our input position in world space in r6
m4x3 r6, v0, c18; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c13.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = illumination
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c22 = waterlevel + offset
// c23 = (maxAtten - minAtten) / depthFalloff
// c24 = minAtten.
// And in particular:
// c22.w = waterlevel
// c23.w = 1.f;
// c24.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c7, r6.xxxx;
mad r0, c8, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c4;
add r0, r0, c5;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c12.wwww;
add r0, r0, c12.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c12.wwww;
// dist += -kPi;
sub r0, r0, c12.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c10.wwww, r2;
mad r1, r4, c11.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c22, r6.zzzz;
mul r4, r4, c23;
add r4, r4, c24;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c13.zzzz;
max r4.xyz, r4, c13.xxxx;
//mov r4.xyz, c13.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c21;
max r11, r11, c13.xxxx;
min r11, r11, c13.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c6;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c13.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c22.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= filter;
mul r1, r1, r11;
// Pos = (in.x + S, in.y + R, r6.z)
// S = sum(k Dir.x A cos())
// R = sum(k Dir.y A cos())
// c30 = k Dir.x A
// c31 = k Dir.y A
// S = sum(cosDist * c30);
dp4 r7.x, r1, c30;
// R = sum(cosDist * c31);
dp4 r7.y, r1, c31;
add r6.xy, r6.xy, r7.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c25.x, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c25.x;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c18; // HACKAGE
//mov r6.w, c13.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c29.x;
mul oFog, r10.x, c29.y;
mov oPos, r9;
// Output color is vertex green
// Output alpha is vertex red (vtx alpha is used for wave filtering)
// Whole thing modulated by material color/opacity.
mul oD0, v5.yyyx, c26;
// Usual texture transform
mov r11.zw, c13.zzzz;
dp4 r11.x, v7, c14;
dp4 r11.y, v7, c15;
mov oT0, r11;

View File

@ -1,209 +1,209 @@
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
// Store our input position in world space in r6
m4x3 r6, v0, c18; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c13.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = illumination
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c22 = waterlevel + offset
// c23 = (maxAtten - minAtten) / depthFalloff
// c24 = minAtten.
// And in particular:
// c22.w = waterlevel
// c23.w = 1.f;
// c24.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c7, r6.xxxx;
mad r0, c8, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c4;
add r0, r0, c5;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c12.wwww;
add r0, r0, c12.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c12.wwww;
// dist += -kPi;
sub r0, r0, c12.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c10.wwww, r2;
mad r1, r4, c11.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c22, r6.zzzz;
mul r4, r4, c23;
add r4, r4, c24;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c13.zzzz;
max r4.xyz, r4, c13.xxxx;
//mov r4.xyz, c13.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c21;
max r11, r11, c13.xxxx;
min r11, r11, c13.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c6;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c13.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c22.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= kFreq.xyzw;
mul r1, r1, c4;
// cosDist *= kAmplitude.xyzw; // Combine?
mul r1, r1, c6;
// cosDist *= filter;
mul r1, r1, r11;
//
// accumCos = (0, 0, 0, 0);
mov r7, c13.xxxx;
// temp = dp4( cosDist, toCenter_X );
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
dp4 r7.x, r1, -c7
//
// temp = dp4( cosDist, toCenter_Y );
// accumCos.y += temp.xxxx;
dp4 r7.y, r1, -c8
//
// }
//
// accumBin = (1, 0, -accumCos.x);
// accumTan = (0, 1, -accumCos.y);
// accumNorm = (accumCos.x, accumCos.y, 1);
mov r11, c13.xxzx;
add r11, r11, r7;
dp3 r10.x, r11, r11;
rsq r10.x, r10.x;
mul r11, r11, r10.xxxx;
//
// Add in our scrunch (offset in X/Y plane).
// Scale down our scrunch amount by the wave scaling
mul r10.x, c9.y, r4.z;
mad r6.xy, r11.xy, r10.xx, r6.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c25.x, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c25.x;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c18; // HACKAGE
//mov r6.w, c13.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c29.x;
mul oFog, r10.x, c29.y;
mov oPos, r9;
// Output color is vertex green
// Output alpha is vertex red (vtx alpha is used for wave filtering)
// Whole thing modulated by material color/opacity.
mul oD0, v5.yyyx, c26;
// Usual texture transform
mov r11.zw, c13.zzzz;
dp4 r11.x, v7, c14;
dp4 r11.y, v7, c15;
mov oT0, r11;
dp4 r11.x, v7, c16;
dp4 r11.y, v7, c17;
mov oT1, r11;
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
// Store our input position in world space in r6
m4x3 r6, v0, c18; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c13.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = illumination
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c22 = waterlevel + offset
// c23 = (maxAtten - minAtten) / depthFalloff
// c24 = minAtten.
// And in particular:
// c22.w = waterlevel
// c23.w = 1.f;
// c24.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c7, r6.xxxx;
mad r0, c8, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c4;
add r0, r0, c5;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c12.wwww;
add r0, r0, c12.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c12.wwww;
// dist += -kPi;
sub r0, r0, c12.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c10.wwww, r2;
mad r1, r4, c11.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c22, r6.zzzz;
mul r4, r4, c23;
add r4, r4, c24;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c13.zzzz;
max r4.xyz, r4, c13.xxxx;
//mov r4.xyz, c13.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c21;
max r11, r11, c13.xxxx;
min r11, r11, c13.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c6;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c13.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c22.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= kFreq.xyzw;
mul r1, r1, c4;
// cosDist *= kAmplitude.xyzw; // Combine?
mul r1, r1, c6;
// cosDist *= filter;
mul r1, r1, r11;
//
// accumCos = (0, 0, 0, 0);
mov r7, c13.xxxx;
// temp = dp4( cosDist, toCenter_X );
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
dp4 r7.x, r1, -c7
//
// temp = dp4( cosDist, toCenter_Y );
// accumCos.y += temp.xxxx;
dp4 r7.y, r1, -c8
//
// }
//
// accumBin = (1, 0, -accumCos.x);
// accumTan = (0, 1, -accumCos.y);
// accumNorm = (accumCos.x, accumCos.y, 1);
mov r11, c13.xxzx;
add r11, r11, r7;
dp3 r10.x, r11, r11;
rsq r10.x, r10.x;
mul r11, r11, r10.xxxx;
//
// Add in our scrunch (offset in X/Y plane).
// Scale down our scrunch amount by the wave scaling
mul r10.x, c9.y, r4.z;
mad r6.xy, r11.xy, r10.xx, r6.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c25.x, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c25.x;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c18; // HACKAGE
//mov r6.w, c13.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c29.x;
mul oFog, r10.x, c29.y;
mov oPos, r9;
// Output color is vertex green
// Output alpha is vertex red (vtx alpha is used for wave filtering)
// Whole thing modulated by material color/opacity.
mul oD0, v5.yyyx, c26;
// Usual texture transform
mov r11.zw, c13.zzzz;
dp4 r11.x, v7, c14;
dp4 r11.y, v7, c15;
mov oT0, r11;
dp4 r11.x, v7, c16;
dp4 r11.y, v7, c17;
mov oT1, r11;

View File

@ -1,191 +1,191 @@
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
// Store our input position in world space in r6
m4x3 r6, v0, c18; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c13.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = illumination
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c22 = waterlevel + offset
// c23 = (maxAtten - minAtten) / depthFalloff
// c24 = minAtten.
// And in particular:
// c22.w = waterlevel
// c23.w = 1.f;
// c24.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c7, r6.xxxx;
mad r0, c8, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c4;
add r0, r0, c5;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c12.wwww;
add r0, r0, c12.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c12.wwww;
// dist += -kPi;
sub r0, r0, c12.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c10.wwww, r2;
mad r1, r4, c11.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c22, r6.zzzz;
mul r4, r4, c23;
add r4, r4, c24;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c13.zzzz;
max r4.xyz, r4, c13.xxxx;
//mov r4.xyz, c13.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c21;
max r11, r11, c13.xxxx;
min r11, r11, c13.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c6;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c13.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c22.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= filter;
mul r1, r1, r11;
// Pos = (in.x + S, in.y + R, r6.z)
// S = sum(k Dir.x A cos())
// R = sum(k Dir.y A cos())
// c30 = k Dir.x A
// c31 = k Dir.y A
// S = sum(cosDist * c30);
dp4 r7.x, r1, c30;
// R = sum(cosDist * c31);
dp4 r7.y, r1, c31;
add r6.xy, r6.xy, r7.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c25.x, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c25.x;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c18; // HACKAGE
//mov r6.w, c13.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c29.x;
mul oFog, r10.x, c29.y;
mov oPos, r9;
// Output color is vertex green
// Output alpha is vertex red (vtx alpha is used for wave filtering)
// Whole thing modulated by material color/opacity.
mul oD0, v5.yyyx, c26;
// Usual texture transform
mov r11.zw, c13.zzzz;
dp4 r11.x, v7, c14;
dp4 r11.y, v7, c15;
mov oT0, r11;
dp4 r11.x, v7, c16;
dp4 r11.y, v7, c17;
mov oT1, r11;
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
// Store our input position in world space in r6
m4x3 r6, v0, c18; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c13.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = illumination
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c22 = waterlevel + offset
// c23 = (maxAtten - minAtten) / depthFalloff
// c24 = minAtten.
// And in particular:
// c22.w = waterlevel
// c23.w = 1.f;
// c24.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c7, r6.xxxx;
mad r0, c8, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c4;
add r0, r0, c5;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c12.wwww;
add r0, r0, c12.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c12.wwww;
// dist += -kPi;
sub r0, r0, c12.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c10.wwww, r2;
mad r1, r4, c11.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c22, r6.zzzz;
mul r4, r4, c23;
add r4, r4, c24;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c13.zzzz;
max r4.xyz, r4, c13.xxxx;
//mov r4.xyz, c13.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c21;
max r11, r11, c13.xxxx;
min r11, r11, c13.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c6;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c13.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c22.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= filter;
mul r1, r1, r11;
// Pos = (in.x + S, in.y + R, r6.z)
// S = sum(k Dir.x A cos())
// R = sum(k Dir.y A cos())
// c30 = k Dir.x A
// c31 = k Dir.y A
// S = sum(cosDist * c30);
dp4 r7.x, r1, c30;
// R = sum(cosDist * c31);
dp4 r7.y, r1, c31;
add r6.xy, r6.xy, r7.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c25.x, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c25.x;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c18; // HACKAGE
//mov r6.w, c13.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c29.x;
mul oFog, r10.x, c29.y;
mov oPos, r9;
// Output color is vertex green
// Output alpha is vertex red (vtx alpha is used for wave filtering)
// Whole thing modulated by material color/opacity.
mul oD0, v5.yyyx, c26;
// Usual texture transform
mov r11.zw, c13.zzzz;
dp4 r11.x, v7, c14;
dp4 r11.y, v7, c15;
mov oT0, r11;
dp4 r11.x, v7, c16;
dp4 r11.y, v7, c17;
mov oT1, r11;

View File

@ -1,210 +1,210 @@
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
dcl_texcoord1 v8
// Store our input position in world space in r6
m4x3 r6, v0, c18; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c13.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = illumination
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c22 = waterlevel + offset
// c23 = (maxAtten - minAtten) / depthFalloff
// c24 = minAtten.
// And in particular:
// c22.w = waterlevel
// c23.w = 1.f;
// c24.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c7, r6.xxxx;
mad r0, c8, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c4;
add r0, r0, c5;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c12.wwww;
add r0, r0, c12.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c12.wwww;
// dist += -kPi;
sub r0, r0, c12.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c10.wwww, r2;
mad r1, r4, c11.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c22, r6.zzzz;
mul r4, r4, c23;
add r4, r4, c24;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c13.zzzz;
max r4.xyz, r4, c13.xxxx;
//mov r4.xyz, c13.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c21;
max r11, r11, c13.xxxx;
min r11, r11, c13.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c6;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c13.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c22.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= kFreq.xyzw;
mul r1, r1, c4;
// cosDist *= kAmplitude.xyzw; // Combine?
mul r1, r1, c6;
// cosDist *= filter;
mul r1, r1, r11;
//
// accumCos = (0, 0, 0, 0);
mov r7, c13.xxxx;
// temp = dp4( cosDist, toCenter_X );
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
dp4 r7.x, r1, -c7
//
// temp = dp4( cosDist, toCenter_Y );
// accumCos.y += temp.xxxx;
dp4 r7.y, r1, -c8
//
// }
//
// accumBin = (1, 0, -accumCos.x);
// accumTan = (0, 1, -accumCos.y);
// accumNorm = (accumCos.x, accumCos.y, 1);
mov r11, c13.xxzx;
add r11, r11, r7;
dp3 r10.x, r11, r11;
rsq r10.x, r10.x;
mul r11, r11, r10.xxxx;
//
// Add in our scrunch (offset in X/Y plane).
// Scale down our scrunch amount by the wave scaling
mul r10.x, c9.y, r4.z;
mad r6.xy, r11.xy, r10.xx, r6.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c25.x, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c25.x;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c18; // HACKAGE
//mov r6.w, c13.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c29.x;
mul oFog, r10.x, c29.y;
mov oPos, r9;
// Output color is vertex green
// Output alpha is vertex red (vtx alpha is used for wave filtering)
// Whole thing modulated by material color/opacity.
mul oD0, v5.yyyx, c26;
// Usual texture transform
mov r11.zw, c13.zzzz;
dp4 r11.x, v7, c14;
dp4 r11.y, v7, c15;
mov oT0, r11;
dp4 r11.x, v8, c16;
dp4 r11.y, v8, c17;
mov oT1, r11;
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
dcl_texcoord1 v8
// Store our input position in world space in r6
m4x3 r6, v0, c18; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c13.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = illumination
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c22 = waterlevel + offset
// c23 = (maxAtten - minAtten) / depthFalloff
// c24 = minAtten.
// And in particular:
// c22.w = waterlevel
// c23.w = 1.f;
// c24.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c7, r6.xxxx;
mad r0, c8, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c4;
add r0, r0, c5;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c12.wwww;
add r0, r0, c12.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c12.wwww;
// dist += -kPi;
sub r0, r0, c12.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c10.wwww, r2;
mad r1, r4, c11.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c22, r6.zzzz;
mul r4, r4, c23;
add r4, r4, c24;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c13.zzzz;
max r4.xyz, r4, c13.xxxx;
//mov r4.xyz, c13.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c21;
max r11, r11, c13.xxxx;
min r11, r11, c13.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c6;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c13.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c22.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= kFreq.xyzw;
mul r1, r1, c4;
// cosDist *= kAmplitude.xyzw; // Combine?
mul r1, r1, c6;
// cosDist *= filter;
mul r1, r1, r11;
//
// accumCos = (0, 0, 0, 0);
mov r7, c13.xxxx;
// temp = dp4( cosDist, toCenter_X );
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
dp4 r7.x, r1, -c7
//
// temp = dp4( cosDist, toCenter_Y );
// accumCos.y += temp.xxxx;
dp4 r7.y, r1, -c8
//
// }
//
// accumBin = (1, 0, -accumCos.x);
// accumTan = (0, 1, -accumCos.y);
// accumNorm = (accumCos.x, accumCos.y, 1);
mov r11, c13.xxzx;
add r11, r11, r7;
dp3 r10.x, r11, r11;
rsq r10.x, r10.x;
mul r11, r11, r10.xxxx;
//
// Add in our scrunch (offset in X/Y plane).
// Scale down our scrunch amount by the wave scaling
mul r10.x, c9.y, r4.z;
mad r6.xy, r11.xy, r10.xx, r6.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c25.x, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c25.x;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c18; // HACKAGE
//mov r6.w, c13.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c29.x;
mul oFog, r10.x, c29.y;
mov oPos, r9;
// Output color is vertex green
// Output alpha is vertex red (vtx alpha is used for wave filtering)
// Whole thing modulated by material color/opacity.
mul oD0, v5.yyyx, c26;
// Usual texture transform
mov r11.zw, c13.zzzz;
dp4 r11.x, v7, c14;
dp4 r11.y, v7, c15;
mov oT0, r11;
dp4 r11.x, v8, c16;
dp4 r11.y, v8, c17;
mov oT1, r11;

View File

@ -1,192 +1,192 @@
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
dcl_texcoord1 v8
// Store our input position in world space in r6
m4x3 r6, v0, c18; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c13.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = illumination
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c22 = waterlevel + offset
// c23 = (maxAtten - minAtten) / depthFalloff
// c24 = minAtten.
// And in particular:
// c22.w = waterlevel
// c23.w = 1.f;
// c24.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c7, r6.xxxx;
mad r0, c8, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c4;
add r0, r0, c5;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c12.wwww;
add r0, r0, c12.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c12.wwww;
// dist += -kPi;
sub r0, r0, c12.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c10.wwww, r2;
mad r1, r4, c11.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c22, r6.zzzz;
mul r4, r4, c23;
add r4, r4, c24;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c13.zzzz;
max r4.xyz, r4, c13.xxxx;
//mov r4.xyz, c13.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c21;
max r11, r11, c13.xxxx;
min r11, r11, c13.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c6;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c13.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c22.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= filter;
mul r1, r1, r11;
// Pos = (in.x + S, in.y + R, r6.z)
// S = sum(k Dir.x A cos())
// R = sum(k Dir.y A cos())
// c30 = k Dir.x A
// c31 = k Dir.y A
// S = sum(cosDist * c30);
dp4 r7.x, r1, c30;
// R = sum(cosDist * c31);
dp4 r7.y, r1, c31;
add r6.xy, r6.xy, r7.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c25.x, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c25.x;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c18; // HACKAGE
//mov r6.w, c13.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c29.x;
mul oFog, r10.x, c29.y;
mov oPos, r9;
// Output color is vertex green
// Output alpha is vertex red (vtx alpha is used for wave filtering)
// Whole thing modulated by material color/opacity.
mul oD0, v5.yyyx, c26;
// Usual texture transform
mov r11.zw, c13.zzzz;
dp4 r11.x, v7, c14;
dp4 r11.y, v7, c15;
mov oT0, r11;
dp4 r11.x, v8, c16;
dp4 r11.y, v8, c17;
mov oT1, r11;
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
dcl_texcoord1 v8
// Store our input position in world space in r6
m4x3 r6, v0, c18; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c13.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = illumination
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c22 = waterlevel + offset
// c23 = (maxAtten - minAtten) / depthFalloff
// c24 = minAtten.
// And in particular:
// c22.w = waterlevel
// c23.w = 1.f;
// c24.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c7, r6.xxxx;
mad r0, c8, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c4;
add r0, r0, c5;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c12.wwww;
add r0, r0, c12.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c12.wwww;
// dist += -kPi;
sub r0, r0, c12.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c10.wwww, r2;
mad r1, r4, c11.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c22, r6.zzzz;
mul r4, r4, c23;
add r4, r4, c24;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c13.zzzz;
max r4.xyz, r4, c13.xxxx;
//mov r4.xyz, c13.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c21;
max r11, r11, c13.xxxx;
min r11, r11, c13.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c6;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c13.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c22.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= filter;
mul r1, r1, r11;
// Pos = (in.x + S, in.y + R, r6.z)
// S = sum(k Dir.x A cos())
// R = sum(k Dir.y A cos())
// c30 = k Dir.x A
// c31 = k Dir.y A
// S = sum(cosDist * c30);
dp4 r7.x, r1, c30;
// R = sum(cosDist * c31);
dp4 r7.y, r1, c31;
add r6.xy, r6.xy, r7.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c25.x, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c25.x;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c18; // HACKAGE
//mov r6.w, c13.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c29.x;
mul oFog, r10.x, c29.y;
mov oPos, r9;
// Output color is vertex green
// Output alpha is vertex red (vtx alpha is used for wave filtering)
// Whole thing modulated by material color/opacity.
mul oD0, v5.yyyx, c26;
// Usual texture transform
mov r11.zw, c13.zzzz;
dp4 r11.x, v7, c14;
dp4 r11.y, v7, c15;
mov oT0, r11;
dp4 r11.x, v8, c16;
dp4 r11.y, v8, c17;
mov oT1, r11;

View File

@ -1,298 +1,298 @@
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
dcl_texcoord1 v8
dcl_texcoord2 v9
// Store our input position in world space in r6
m4x3 r6, v0, c18; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c13.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = illumination
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c22 = waterlevel + offset
// c23 = (maxAtten - minAtten) / depthFalloff
// c24 = minAtten.
// And in particular:
// c22.w = waterlevel
// c23.w = 1.f;
// c24.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c7, r6.xxxx;
mad r0, c8, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c4;
add r0, r0, c5;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c12.wwww;
add r0, r0, c12.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c12.wwww;
// dist += -kPi;
sub r0, r0, c12.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c10.wwww, r2;
mad r1, r4, c11.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c22, r6.zzzz;
mul r4, r4, c23;
add r4, r4, c24;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c13.zzzz;
max r4.xyz, r4, c13.xxxx;
//mov r4.xyz, c13.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c21;
max r11, r11, c13.xxxx;
min r11, r11, c13.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c6;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c13.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c22.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= kFreq.xyzw;
mul r1, r1, c4;
// cosDist *= kAmplitude.xyzw; // Combine?
mul r1, r1, c6;
// cosDist *= filter;
mul r1, r1, r11;
//
// accumCos = (0, 0, 0, 0);
mov r7, c13.xxxz;
// temp = dp4( cosDist, toCenter_X );
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
dp4 r7.x, r1, -c7
//
// temp = dp4( cosDist, toCenter_Y );
// accumCos.y += temp.xxxx;
dp4 r7.y, r1, -c8
//
// }
//
// accumBin = (1, 0, -accumCos.x);
// accumTan = (0, 1, -accumCos.y);
// accumNorm = (accumCos.x, accumCos.y, 1);
mov r11, c13.xxzx;
add r11, r11, r7.xyzz;
dp3 r10.x, r11, r11;
rsq r10.x, r10.x;
mul r11, r11, r10.xxxx;
//
// Add in our scrunch (offset in X/Y plane).
// Scale down our scrunch amount by the wave scaling
mul r10.x, c9.y, r4.z;
mad r6.xy, r11.xy, r10.xx, r6.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c25.x, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c25.x;
//
// // Transform position to screen
//
//
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c29.x;
mul oFog, r10.x, c29.y;
//mov oFog.x, c13.y;
mov oPos, r9;
// Calculate our normal scrunch and apply to our cosines.
mul r2.x, r6.z, c9.x;
add r2.x, r2.x, c13.z;
mul r2.x, r2.x, r4.z;
mul r7.xy, r7.xy, r2.xx;
// Now onto texture coordinate generation.
//
// First is the usual texture transform
mov r11.zw, c13.zzzz;
dp4 r11.x, v7, c14;
dp4 r11.y, v7, c15;
mov oT0, r11;
// Calculate our basis vectors as input into our tex3x3vspec
// This would be like:
//add r1, c13.zxxx, r7.zzxz;
//add r2, c13.xzxx, r7.zzyz;
//sub r3, c13.xxzz, r7.xyzz;
// BUT =>
// Now r1-r3 are surface2world, but we still need to fold
// in texture2surface. That's imbedded in our uv's v8,v9, plus
// the normal we just computed into r11.
// So the full matrix multiply surface2world * texture2surface would be:
// | r1.v8 r1.v9 r1.(0,0,1) |
// | r2.v8 r2.v9 r2.(0,0,1) |
// | r3.v8 r3.v9 r3.(0,0,1) |
// But we notice that
// r1 = (1, 0, r7.x)
// r2 = (0, 1, r7.y)
// r3 = (-r7.x, -r7.y, 1)
// and also:
// r7.z == v8.z == v9.z == 0
// and r7.w == 1.0
//
// Considering the zeros, and doing the matrix multiply by hand, we get
// the final matrix of
// | v8.x v9.x r7.x |
// | v8.y v9.y r7.y |
// | -dp3(r7,v8) -dp3(r7,v9) 1 |
// So we wind up not needing r1-r3 at all
add r1, v8.xzzz, r7.zzxw;
mov r1.y, v9.x;
add r2, v8.yzzz, r7.zzxw;
mov r2.y, v9.y;
dp3 r3.x, -r7, v8;
dp3 r3.y, -r7, v9;
mov r3.zw, r7.ww;
// Following section is debug only to skip the per-vert tangent space axes.
//add r1, c13.zxxx, r7.zzxw;
//add r2, c13.xzxx, r7.zzyw;
//
//mov r3.x, -r7.x;
//mov r3.y, -r7.y;
//mov r3.zw, c13.zz;
// See vs_WaveFixedFin6.inl for derivation of the following
sub r0, r6, c27; // c27 is camera position.
dp3 r10.x, r0, r0;
rsq r10.x, r10.x;
mul r0, r0, r10.xxxx;
dp3 r10.x, r0, c28; // c28 is kEnvAdjust
mad r10.y, r10.x, r10.x, -c28.w;
rsq r9.x, r10.y;
mad r10.z, r10.y, r9.x, r10.x;
mad r0.xyz, r0, r10.zzz, -c28.xyz;
mov r1.w, -r0.x;
mov r2.w, -r0.y;
mov r3.w, -r0.z;
// Now r1-r3 are texture2world, with the eye-ray vector in .w. We just
// need to normalize them and bung them into output UV's 1-3.
// Note we're accounting for our environment map being flipped from
// D3D (and all rational thought) by putting r2 into UV3 and r3 into UV2.
mov r10.w, c13.z;
dp3 r10.x, r1, r1;
rsq r10.x, r10.x;
mul oT1, r1, r10.xxxw;
dp3 r10.x, r3, r3;
rsq r10.x, r10.x;
mul oT2, r3, r10.xxxw;
//mul oT3, r3, r10.xxxw; // YZHACK
dp3 r10.x, r2, r2;
rsq r10.x, r10.x;
mul oT3, r2, r10.xxxw;
//mul oT2, r2, r10.xxxw;
// Output color is vertex green
// Output alpha is vertex red (vtx alpha is used for wave filtering)
// Whole thing modulated by material color/opacity.
mul oD0, v5.yyyx, c26;
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
dcl_texcoord1 v8
dcl_texcoord2 v9
// Store our input position in world space in r6
m4x3 r6, v0, c18; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c13.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = illumination
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c22 = waterlevel + offset
// c23 = (maxAtten - minAtten) / depthFalloff
// c24 = minAtten.
// And in particular:
// c22.w = waterlevel
// c23.w = 1.f;
// c24.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c7, r6.xxxx;
mad r0, c8, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c4;
add r0, r0, c5;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c12.wwww;
add r0, r0, c12.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c12.wwww;
// dist += -kPi;
sub r0, r0, c12.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c10.wwww, r2;
mad r1, r4, c11.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c22, r6.zzzz;
mul r4, r4, c23;
add r4, r4, c24;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c13.zzzz;
max r4.xyz, r4, c13.xxxx;
//mov r4.xyz, c13.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c21;
max r11, r11, c13.xxxx;
min r11, r11, c13.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c6;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c13.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c22.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= kFreq.xyzw;
mul r1, r1, c4;
// cosDist *= kAmplitude.xyzw; // Combine?
mul r1, r1, c6;
// cosDist *= filter;
mul r1, r1, r11;
//
// accumCos = (0, 0, 0, 0);
mov r7, c13.xxxz;
// temp = dp4( cosDist, toCenter_X );
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
dp4 r7.x, r1, -c7
//
// temp = dp4( cosDist, toCenter_Y );
// accumCos.y += temp.xxxx;
dp4 r7.y, r1, -c8
//
// }
//
// accumBin = (1, 0, -accumCos.x);
// accumTan = (0, 1, -accumCos.y);
// accumNorm = (accumCos.x, accumCos.y, 1);
mov r11, c13.xxzx;
add r11, r11, r7.xyzz;
dp3 r10.x, r11, r11;
rsq r10.x, r10.x;
mul r11, r11, r10.xxxx;
//
// Add in our scrunch (offset in X/Y plane).
// Scale down our scrunch amount by the wave scaling
mul r10.x, c9.y, r4.z;
mad r6.xy, r11.xy, r10.xx, r6.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c25.x, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c25.x;
//
// // Transform position to screen
//
//
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c29.x;
mul oFog, r10.x, c29.y;
//mov oFog.x, c13.y;
mov oPos, r9;
// Calculate our normal scrunch and apply to our cosines.
mul r2.x, r6.z, c9.x;
add r2.x, r2.x, c13.z;
mul r2.x, r2.x, r4.z;
mul r7.xy, r7.xy, r2.xx;
// Now onto texture coordinate generation.
//
// First is the usual texture transform
mov r11.zw, c13.zzzz;
dp4 r11.x, v7, c14;
dp4 r11.y, v7, c15;
mov oT0, r11;
// Calculate our basis vectors as input into our tex3x3vspec
// This would be like:
//add r1, c13.zxxx, r7.zzxz;
//add r2, c13.xzxx, r7.zzyz;
//sub r3, c13.xxzz, r7.xyzz;
// BUT =>
// Now r1-r3 are surface2world, but we still need to fold
// in texture2surface. That's imbedded in our uv's v8,v9, plus
// the normal we just computed into r11.
// So the full matrix multiply surface2world * texture2surface would be:
// | r1.v8 r1.v9 r1.(0,0,1) |
// | r2.v8 r2.v9 r2.(0,0,1) |
// | r3.v8 r3.v9 r3.(0,0,1) |
// But we notice that
// r1 = (1, 0, r7.x)
// r2 = (0, 1, r7.y)
// r3 = (-r7.x, -r7.y, 1)
// and also:
// r7.z == v8.z == v9.z == 0
// and r7.w == 1.0
//
// Considering the zeros, and doing the matrix multiply by hand, we get
// the final matrix of
// | v8.x v9.x r7.x |
// | v8.y v9.y r7.y |
// | -dp3(r7,v8) -dp3(r7,v9) 1 |
// So we wind up not needing r1-r3 at all
add r1, v8.xzzz, r7.zzxw;
mov r1.y, v9.x;
add r2, v8.yzzz, r7.zzxw;
mov r2.y, v9.y;
dp3 r3.x, -r7, v8;
dp3 r3.y, -r7, v9;
mov r3.zw, r7.ww;
// Following section is debug only to skip the per-vert tangent space axes.
//add r1, c13.zxxx, r7.zzxw;
//add r2, c13.xzxx, r7.zzyw;
//
//mov r3.x, -r7.x;
//mov r3.y, -r7.y;
//mov r3.zw, c13.zz;
// See vs_WaveFixedFin6.inl for derivation of the following
sub r0, r6, c27; // c27 is camera position.
dp3 r10.x, r0, r0;
rsq r10.x, r10.x;
mul r0, r0, r10.xxxx;
dp3 r10.x, r0, c28; // c28 is kEnvAdjust
mad r10.y, r10.x, r10.x, -c28.w;
rsq r9.x, r10.y;
mad r10.z, r10.y, r9.x, r10.x;
mad r0.xyz, r0, r10.zzz, -c28.xyz;
mov r1.w, -r0.x;
mov r2.w, -r0.y;
mov r3.w, -r0.z;
// Now r1-r3 are texture2world, with the eye-ray vector in .w. We just
// need to normalize them and bung them into output UV's 1-3.
// Note we're accounting for our environment map being flipped from
// D3D (and all rational thought) by putting r2 into UV3 and r3 into UV2.
mov r10.w, c13.z;
dp3 r10.x, r1, r1;
rsq r10.x, r10.x;
mul oT1, r1, r10.xxxw;
dp3 r10.x, r3, r3;
rsq r10.x, r10.x;
mul oT2, r3, r10.xxxw;
//mul oT3, r3, r10.xxxw; // YZHACK
dp3 r10.x, r2, r2;
rsq r10.x, r10.x;
mul oT3, r2, r10.xxxw;
//mul oT2, r2, r10.xxxw;
// Output color is vertex green
// Output alpha is vertex red (vtx alpha is used for wave filtering)
// Whole thing modulated by material color/opacity.
mul oD0, v5.yyyx, c26;

View File

@ -1,331 +1,331 @@
vs.1.0
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
dcl_texcoord1 v8
dcl_texcoord2 v9
// Store our input position in world space in r6
m4x3 r6, v0, c18; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c13.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = illumination
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c22 = waterlevel + offset
// c23 = (maxAtten - minAtten) / depthFalloff
// c24 = minAtten.
// And in particular:
// c22.w = waterlevel
// c23.w = 1.f;
// c24.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c7, r6.xxxx;
mad r0, c8, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c4;
add r0, r0, c5;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c12.wwww;
add r0, r0, c12.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c12.wwww;
// dist += -kPi;
sub r0, r0, c12.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c10.wwww, r2;
mad r1, r4, c11.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c22, r6.zzzz;
mul r4, r4, c23;
add r4, r4, c24;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c13.zzzz;
max r4.xyz, r4, c13.xxxx;
//mov r4.xyz, c13.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c21;
max r11, r11, c13.xxxx;
min r11, r11, c13.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c6;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c13.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c22.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= filter;
mul r1, r1, r11;
// Pos = (in.x + S, in.y + R, r6.z)
// S = sum(k Dir.x A cos())
// R = sum(k Dir.y A cos())
// c30 = k Dir.x A
// c31 = k Dir.y A
// S = sum(cosDist * c30);
dp4 r7.x, r1, c30;
// R = sum(cosDist * c31);
dp4 r7.y, r1, c31;
add r6.xy, r6.xy, r7.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c25.x, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c25.x;
//
// // Transform position to screen
//
//
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c29.x;
mul oFog, r10.x, c29.y;
//mov oFog, c13.y;
mov oPos, r9;
// Now onto texture coordinate generation.
//
// First is the usual texture transform
mov r11.zw, c13.zzzz;
dp4 r11.x, v7, c14;
dp4 r11.y, v7, c15;
mov oT0, r11;
// Calculate our basis vectors as input into our tex3x3vspec
// First we get our basis set off our surface. This is
// Okay, here we go:
// W == sum(k w Dir.x^2 A sin()) x
// V == sum(k w Dir.x Dir.y A sin()) x
// U == sum(k w Dir.y^2 A sin()) x
//
// T == sum(A sin())
//
// S == sum(k Dir.x A cos())
// R == sum(k Dir.y A cos())
//
// Q == sum(k w A cos()) x
//
// M == sum(A cos())
//
// P == sum(w Dir.x A cos()) x
// N == sum(w Dir.y A cos()) x
//
// Then:
// Pos = (in.x + S, in.y + R, waterheight + T) // Already done above.
//
// Bin = (1 - W, -V, P)
// Tan = (-V, 1 - U, N)
// Nor = (-P, -N, 1 - Q)
//
// The matrix
// |Bx, Tx, Nx|
// |By, Ty, Ny|
// |Bz, Tz, Nz|
// is surface2world, but we still need to fold in
// texture2surface. We'll go with the generalized
// (not assuming a flat surface) partials of dPos/dU and dPos/dV
// as coming in as uv coords v8 and v9.
// Then, if r5 = v8 X v9, then texture to surface is
// |v8.x, v9.x, r5.x|
// |v8.y, v9.y, r5.y|
// |v8.z, v9.z, r5.z|
//
// So, let's say we calc 3 vectors,
// r7 = (Bx, Tx, Nx)
// r8 = (By, Ty, Ny)
// r9 = (Bz, Tz, Nz)
//
// Then surface2world * texture2surface =
// |r7 dot v8, r7 dot v9, r7 dot r5|
// |r8 dot v8, r8 dot v9, r8 dot r5|
// |r9 dot v8, r9 dot v9, r9 dot r5|
//
// We will need r5 as v8 X v9
mov r7, v8;
mul r5.xyz, r7.yzx, v9.zxy;
mad r5.xyz, r7.zxy, -v9.yzx, r5.xyz;
// Okay, r1 currently has the vector of cosines, and r2 has vector of sines.
// Everything will want that times amplitude, so go ahead and fold that in.
mul r1, r1, c6; // r1 = A cos() = M
// Sines already have amplitude folded in, so r2 = A sin() = T.
// Now just compute r7-9 one element at a time.
dp4 r7.x, r2, -c35; // r7.x = -W
dp4 r7.y, r2, -c36; // r7.y = -V
dp4 r7.z, r1, -c32; // r7.z = -P
add r7.x, r7.x, c13.z; // r7.x = 1 - W;
dp4 r8.x, r2, -c36; // r8.x = -V
dp4 r8.y, r2, -c37; // r8.y = -U
dp4 r8.z, r1, -c33; // r8.z = -N
add r8.y, r8.y, c13.z; // r8.y = 1 - U
dp4 r9.z, r2, -c34; // r9.z = -Q
mov r9.x, -r7.z; // r9.x = P = -r7.z
mov r9.y, -r8.z; // r9.y = N = -r8.z
add r9.z, r9.z, c13.z; // r9.z = 1 - Q
// Okay, got everything we need, construct r1-3 as surface2world*texture2surface.
dp3 r1.x, r7, v8;
dp3 r1.y, r7, v9;
dp3 r1.z, r7, r5;
dp3 r2.x, r8, v8;
dp3 r2.y, r8, v9;
dp3 r2.z, r8, r5;
dp3 r3.x, r9, v8;
dp3 r3.y, r9, v9;
dp3 r3.z, r9, r5;
// Following section is debug only to skip the per-vert tangent space axes.
//add r1, c13.zxxx, r7.zzxw;
//add r2, c13.xzxx, r7.zzyw;
//
//mov r3.x, -r7.x;
//mov r3.y, -r7.y;
//mov r3.zw, c13.zz;
// See vs_WaveFixedFin6.inl for derivation of the following
sub r0, r6, c27; // c27 is camera position.
dp3 r10.x, r0, r0;
rsq r10.x, r10.x;
mul r0, r0, r10.xxxx;
dp3 r10.x, r0, c28; // c28 is kEnvAdjust
mad r10.y, r10.x, r10.x, -c28.w;
rsq r9.x, r10.y;
mad r10.z, r10.y, r9.x, r10.x;
mad r0.xyz, r0, r10.zzz, -c28.xyz;
// ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump.
dp3 r10.x, r0, r0;
rsq r9.x, r10.x;
mul r0.xyz, r0.xyz, r9.xxx;
mov r1.w, -r0.x;
mov r2.w, -r0.y;
mov r3.w, -r0.z;
// Now r1-r3 are texture2world, with the eye-ray vector in .w. We just
// need to normalize them and bung them into output UV's 1-3.
// Note we're accounting for our environment map being flipped from
// D3D (and all rational thought) by putting r2 into UV3 and r3 into UV2.
mov r10.w, c13.z;
dp3 r10.x, r1, r1;
rsq r10.x, r10.x;
mul oT1, r1, r10.xxxw;
dp3 r10.x, r3, r3;
rsq r10.x, r10.x;
mul oT2, r3, r10.xxxw;
//mul oT3, r3, r10.xxxw; // YZHACK
dp3 r10.x, r2, r2;
rsq r10.x, r10.x;
mul oT3, r2, r10.xxxw;
//mul oT2, r2, r10.xxxw;
// Output color is vertex green
// Output alpha is vertex red (vtx alpha is used for wave filtering)
// Whole thing modulated by material color/opacity.
mul oD0, v5.yyyx, c26;
vs.1.0
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
dcl_texcoord1 v8
dcl_texcoord2 v9
// Store our input position in world space in r6
m4x3 r6, v0, c18; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c13.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = illumination
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c22 = waterlevel + offset
// c23 = (maxAtten - minAtten) / depthFalloff
// c24 = minAtten.
// And in particular:
// c22.w = waterlevel
// c23.w = 1.f;
// c24.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c7, r6.xxxx;
mad r0, c8, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c4;
add r0, r0, c5;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c12.wwww;
add r0, r0, c12.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c12.wwww;
// dist += -kPi;
sub r0, r0, c12.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c10.wwww, r2;
mad r1, r4, c11.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c22, r6.zzzz;
mul r4, r4, c23;
add r4, r4, c24;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c13.zzzz;
max r4.xyz, r4, c13.xxxx;
//mov r4.xyz, c13.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c21;
max r11, r11, c13.xxxx;
min r11, r11, c13.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c6;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c13.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c22.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= filter;
mul r1, r1, r11;
// Pos = (in.x + S, in.y + R, r6.z)
// S = sum(k Dir.x A cos())
// R = sum(k Dir.y A cos())
// c30 = k Dir.x A
// c31 = k Dir.y A
// S = sum(cosDist * c30);
dp4 r7.x, r1, c30;
// R = sum(cosDist * c31);
dp4 r7.y, r1, c31;
add r6.xy, r6.xy, r7.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c25.x, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c25.x;
//
// // Transform position to screen
//
//
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c29.x;
mul oFog, r10.x, c29.y;
//mov oFog, c13.y;
mov oPos, r9;
// Now onto texture coordinate generation.
//
// First is the usual texture transform
mov r11.zw, c13.zzzz;
dp4 r11.x, v7, c14;
dp4 r11.y, v7, c15;
mov oT0, r11;
// Calculate our basis vectors as input into our tex3x3vspec
// First we get our basis set off our surface. This is
// Okay, here we go:
// W == sum(k w Dir.x^2 A sin()) x
// V == sum(k w Dir.x Dir.y A sin()) x
// U == sum(k w Dir.y^2 A sin()) x
//
// T == sum(A sin())
//
// S == sum(k Dir.x A cos())
// R == sum(k Dir.y A cos())
//
// Q == sum(k w A cos()) x
//
// M == sum(A cos())
//
// P == sum(w Dir.x A cos()) x
// N == sum(w Dir.y A cos()) x
//
// Then:
// Pos = (in.x + S, in.y + R, waterheight + T) // Already done above.
//
// Bin = (1 - W, -V, P)
// Tan = (-V, 1 - U, N)
// Nor = (-P, -N, 1 - Q)
//
// The matrix
// |Bx, Tx, Nx|
// |By, Ty, Ny|
// |Bz, Tz, Nz|
// is surface2world, but we still need to fold in
// texture2surface. We'll go with the generalized
// (not assuming a flat surface) partials of dPos/dU and dPos/dV
// as coming in as uv coords v8 and v9.
// Then, if r5 = v8 X v9, then texture to surface is
// |v8.x, v9.x, r5.x|
// |v8.y, v9.y, r5.y|
// |v8.z, v9.z, r5.z|
//
// So, let's say we calc 3 vectors,
// r7 = (Bx, Tx, Nx)
// r8 = (By, Ty, Ny)
// r9 = (Bz, Tz, Nz)
//
// Then surface2world * texture2surface =
// |r7 dot v8, r7 dot v9, r7 dot r5|
// |r8 dot v8, r8 dot v9, r8 dot r5|
// |r9 dot v8, r9 dot v9, r9 dot r5|
//
// We will need r5 as v8 X v9
mov r7, v8;
mul r5.xyz, r7.yzx, v9.zxy;
mad r5.xyz, r7.zxy, -v9.yzx, r5.xyz;
// Okay, r1 currently has the vector of cosines, and r2 has vector of sines.
// Everything will want that times amplitude, so go ahead and fold that in.
mul r1, r1, c6; // r1 = A cos() = M
// Sines already have amplitude folded in, so r2 = A sin() = T.
// Now just compute r7-9 one element at a time.
dp4 r7.x, r2, -c35; // r7.x = -W
dp4 r7.y, r2, -c36; // r7.y = -V
dp4 r7.z, r1, -c32; // r7.z = -P
add r7.x, r7.x, c13.z; // r7.x = 1 - W;
dp4 r8.x, r2, -c36; // r8.x = -V
dp4 r8.y, r2, -c37; // r8.y = -U
dp4 r8.z, r1, -c33; // r8.z = -N
add r8.y, r8.y, c13.z; // r8.y = 1 - U
dp4 r9.z, r2, -c34; // r9.z = -Q
mov r9.x, -r7.z; // r9.x = P = -r7.z
mov r9.y, -r8.z; // r9.y = N = -r8.z
add r9.z, r9.z, c13.z; // r9.z = 1 - Q
// Okay, got everything we need, construct r1-3 as surface2world*texture2surface.
dp3 r1.x, r7, v8;
dp3 r1.y, r7, v9;
dp3 r1.z, r7, r5;
dp3 r2.x, r8, v8;
dp3 r2.y, r8, v9;
dp3 r2.z, r8, r5;
dp3 r3.x, r9, v8;
dp3 r3.y, r9, v9;
dp3 r3.z, r9, r5;
// Following section is debug only to skip the per-vert tangent space axes.
//add r1, c13.zxxx, r7.zzxw;
//add r2, c13.xzxx, r7.zzyw;
//
//mov r3.x, -r7.x;
//mov r3.y, -r7.y;
//mov r3.zw, c13.zz;
// See vs_WaveFixedFin6.inl for derivation of the following
sub r0, r6, c27; // c27 is camera position.
dp3 r10.x, r0, r0;
rsq r10.x, r10.x;
mul r0, r0, r10.xxxx;
dp3 r10.x, r0, c28; // c28 is kEnvAdjust
mad r10.y, r10.x, r10.x, -c28.w;
rsq r9.x, r10.y;
mad r10.z, r10.y, r9.x, r10.x;
mad r0.xyz, r0, r10.zzz, -c28.xyz;
// ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump.
dp3 r10.x, r0, r0;
rsq r9.x, r10.x;
mul r0.xyz, r0.xyz, r9.xxx;
mov r1.w, -r0.x;
mov r2.w, -r0.y;
mov r3.w, -r0.z;
// Now r1-r3 are texture2world, with the eye-ray vector in .w. We just
// need to normalize them and bung them into output UV's 1-3.
// Note we're accounting for our environment map being flipped from
// D3D (and all rational thought) by putting r2 into UV3 and r3 into UV2.
mov r10.w, c13.z;
dp3 r10.x, r1, r1;
rsq r10.x, r10.x;
mul oT1, r1, r10.xxxw;
dp3 r10.x, r3, r3;
rsq r10.x, r10.x;
mul oT2, r3, r10.xxxw;
//mul oT3, r3, r10.xxxw; // YZHACK
dp3 r10.x, r2, r2;
rsq r10.x, r10.x;
mul oT3, r2, r10.xxxw;
//mul oT2, r2, r10.xxxw;
// Output color is vertex green
// Output alpha is vertex red (vtx alpha is used for wave filtering)
// Whole thing modulated by material color/opacity.
mul oD0, v5.yyyx, c26;

View File

@ -1,449 +1,449 @@
vs.1.1
dcl_position v0
dcl_color v5
// Store our input position in world space in r6
m4x3 r6, v0, c21; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c16.zzzz;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = reflection strength (transparency)
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c25 = waterlevel + offset
// c26 = (maxAtten - minAtten) / depthFalloff
// c27 = minAtten.
// And in particular:
// c25.w = waterlevel
// c26.w = 1.f;
// c27.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c8, r6.xxxx;
mad r0, c9, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c5;
add r0, r0, c6;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c15.wwww;
add r0, r0, c15.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c15.wwww;
// dist += -kPi;
sub r0, r0, c15.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c13.wwww, r2;
mad r1, r4, c14.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c25, r6.zzzz;
mul r4, r4, c26;
add r4, r4, c27;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c16.zzzz;
max r4.xyz, r4, c16.xxxx;
// Calc our filter (see above).
mul r11, v5.wwww, c24;
max r11, r11, c16.xxxx;
min r11, r11, c16.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c7;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c16.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c25.w;
max r6.z, r6.z, r8.z; // CLAMP
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= kFreq.xyzw;
mul r1, r1, c5;
// cosDist *= kAmplitude.xyzw; // Combine?
mul r1, r1, c7;
// cosDist *= filter;
mul r1, r1, r11;
//
// accumCos = (0, 0, 0, 0);
mov r7, c16.xxxx;
// temp = dp4( cosDist, toCenter_X );
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
dp4 r7.x, r1, -c8
//
// temp = dp4( cosDist, toCenter_Y );
// accumCos.y += temp.xxxx;
dp4 r7.y, r1, -c9
//
// }
//
// accumBin = (1, 0, -accumCos.x);
// accumTan = (0, 1, -accumCos.y);
// accumNorm = (accumCos.x, accumCos.y, 1);
mov r11, c16.xxzx;
add r11, r11, r7;
dp3 r10.x, r11, r11;
rsq r10.x, r10.x;
mul r11, r11, r10.xxxx;
//
// // Scrunch in based on computed (normalized) normal
// temp = mul( accumNorm, kNegScrunchScale ); // kNegScrunchScale = (-scrunchScale, -scrunchScale, 0, 0);
// accumPos += temp;
//dp3 r10.x, r11, c18.zxw; // winddir.x, winddir.y, 0, 0 // NUKE
// r10.x tells us whether our normal is opposed to the wind.
// If opposed, r10.x = 0, else r10.x = 1.f;
// We'll use this to kill the Scrunch on the back sides of waves.
// We use it for position right here, and then again for the
// normal just down a bit further.
//slt r10.x, r10.x, c16.x; // NUKE
//mov r10.x, c16.z; // HACKAGE NUKE
//mul r9, r10.xxxx, r11; // NUKE
// Add in our scrunch (offset in X/Y plane).
// Scale down our scrunch amount by the wave scaling
mul r10.x, c12.y, r4.z;
//mov r10.x, c12.y; // NUKETEST TAKEOUT
mad r6.xy, r11.xy, r10.xx, r6.xy;
// mul r6.z, r6.z, r10.xxxx; DEBUG
// mad r6, r11, c12.yyzz, r6;
// accumNorm = mul (accumNorm, kScrunchScale ); // kScrunchScale = (scrunchScale, scrunchScale, 1, 1);
// accumCos *= (scrunchScale, scrunchScale, 0, 0);
mul r2.x, r6.z, c12.x;
//mad r2.x, r2.x, r10.x, c16.z; NUKE
add r2.x, r2.x, c16.z;
mul r2.x, r2.x, r4.z; // HACKAGE // NUKETEST BACKIN
// mul r7, r7, c12.xxzz;
mul r7.xy, r7.xy, r2.xx;
// This is actually wrong, but useful right now for visualizing the generated coords.
// See below for correct version.
sub r3, c16.xxzz, r7.xyzz;
//mov oD0, r3; // SEENORM
dp3 r8.x, r3, c18.zxww; // WAVEFACE
mul r8.x, r8.x, c12.w; // WAVEFACE
max r8.x, r8.x, c16.x; // WAVEFACE
min r8.x, r8.x, c16.z; // WAVEFACE
//mov r9.x, c12.z;
//add r9.x, r9.x, -c16.z;
//mad r8.x, r9.x, r8.x, c16.z; // WAVEFACE
mul r8.x, r8.x, -c16.z;
add r8.x, r8.x, c16.z;
// Normalize?
// We can either calculate an orthonormal basis from the
// computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0),
// or compute our basis directly from the partial derivatives, with
// Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1)
//
// These work out to identically the same result, so we'll compute directly
// from the partials because it takes 2 fewer instructions.
//
// Note that our basis is NOT orthonormal. The Normal is equal to
// Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents
// are both correct tangents to the surface, and their projections on the XY plane
// are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications?
// Not really. I'm actually not really sure which is more "proper" for bump mapping.
//
// Note also that we add when we should subtract and subtract when we should
// add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows
// of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for
// explanation.
//
// Binormal = Y % Normal
// Cross product3 is:
// mul res.xyz, a.yzx, b.zxy
// mad res.xyz, -a.zxy, b.yzx, res.xyz
// mul r1.xyz, c16.zxx, r3.zxy;
// mad r1.xyz, -c16.xxz, r3.yzx, r1.xyz;
// Tangent = Normal % X
// mul r2.xyz, r3.yzx, c16.xzx;
// mad r2.xyz, -r3.zxy, c16.xxz, r2;
add r1, c16.zxxx, r7.zzxz;
add r2, c16.xzxx, r7.zzyz;
// Note that we're swapping z and y to match our environment map tools in max.
// We do this through our normal map transform (oT1, oT2, oT3), making it
// a concatenation of:
//
// rotate about Z (blue) to turn our map into the wind
// windRot = | dirY -dirX 0 |
// | dirX dirY 0 |
// | 0 0 1 |
//
// swap our Y and Z axes to match our environment map
// swapYZ = | 1 0 0 |
// | 0 0 1 |
// | 0 1 0 |
//
// rotate the normal into the surface's tangent space basis
// basis = | Bx Tx Nx |
// | By Ty Ny |
// | Bz Tz Nz |
//
// Note that we've constucted the basis by taking advantage of the
// matrix being a pure rotation, as noted below, so r1, r2 and r3
// are actually constructed as:
// basis = | Bx -By -Bz |
// | -Tx Ty -Tz |
// | -Nx -Ny -Nz |
//
// Then the final normal map transform is:
//
// basis * swapYZ * windRot [ * normal ]
// sub r1.w, c17.x, r6.x;
// sub r2.w, c17.z, r6.z;
// sub r3.w, c17.y, r6.y;
// Big note here. All this math can blow up if the camera position
// is outside the environment sphere. It's assumed that's dealt
// with in the app setting up the constants. For that reason, the
// camera position used here might not be the real local camera position,
// which is needed for the angular attenuation, so we burn another constant
// with our pseudo-camera position. To restrain the pseudo-camera from
// leaving the sphere, we make:
// pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R)
// where dist = |realPos - envCenter|
// So, our "finitized" eyeray is:
// camPos + D * t - envCenter = D * t - (envCenter - camPos)
// with
// D = (pos - camPos) / |pos - camPos| // normalized usual eyeray
// and
// t = D dot F + sqrt( (D dot F)^2 - G )
// with
// F = (envCenter - camPos) => c19.xyz
// G = F^2 - R^2 => c19.w
// R = environment radius. => unused
//
// This all derives from the positive root of equation
// (camPos + (pos - camPos) * t - envCenter)^2 = R^2,
// In other words, where on a sphere of radius R centered about envCenter
// does the ray from the real camera position through this point hit.
//
// Note that F, G, and R are all constants (one point, two scalars).
//
// So first we calculate D into r0,
// then D dot F into r10.x,
// then (D dot F)^2 - G into r10.y
// then rsq( (D dot F)^2 - G ) into r9.x;
// then t = r10.z = r10.x + r10.y * r9.x;
// and
// r0 = D * t - (envCenter - camPos)
// = r0 * r10.zzzz - F;
//
sub r0, r6, c17;
dp3 r10.x, r0, r0;
rsq r10.x, r10.x;
mul r0, r0, r10.xxxx; // r0 = D
dp3 r10.x, r0, c19; // r10.x = D dot F
mad r10.y, r10.x, r10.x, -c19.w; // r10.y = (D dot F)^2 - G
rsq r9.x, r10.y; // r9.x = 1/SQRT((D dot F)^2 - G)
mad r10.z, r10.y, r9.x, r10.x; // r10.z = D dot F + SQRT((D dot F)^2 - G)
mad r0.xyz, r0, r10.zzz, -c19.xyz; // r0.xyz = D * t - (envCenter - camPos)
mov r1.w, -r0.x;
mov r2.w, -r0.y;
mov r3.w, -r0.z;
// Now rotate our basis vectors into the wind
// This should be redone, and put our wind direction into
// the water texture.
dp3 r0.x, r1, c18.xyww;
dp3 r0.y, r1, c18.zxww;
mov r1.xy, r0;
dp3 r0.x, r2, c18.xyww;
dp3 r0.y, r2, c18.zxww;
mov r2.xy, r0;
dp3 r0.x, r3, c18.xyww;
dp3 r0.y, r3, c18.zxww;
mov r3.xy, r0;
mov r0.zw, c16.zzxz;
dp3 r0.x, r1, r1;
rsq r0.x, r0.x;
mul oT1, r1.xyzw, r0.xxxw;
// mul r8, r1.xyzw, r0.xxxw; // VISUAL
dp3 r0.x, r2, r2;
rsq r0.x, r0.x;
mul oT3, r2.xyzw, r0.xxxw;
// mul r9, r2.xyzw, r0.xxxw; // VISUAL
dp3 r0.x, r3, r3;
rsq r0.x, r0.x;
mul oT2, r3.xyzw, r0.xxxw;
// mul r9, r3.xyzw, r0.xxxw; // VISUAL
// mul r3, r3.xzyw, r0.xxxw;
// mul r3.xy, r3, -c16.zzzz;
/*
// Want:
// oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x)
// oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y)
// ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z)
// with BIN, TAN, and NORM normalized.
// Unnormalized, we have
// BIN = (1, 0, -r7.x) where r7 == accumCos
// TAN = (0, 1, -r7.y)
// NORM= (r7.x, r7.y, 1)
// So, unnormalized, we have
// oT1 = (1, 0, r7.x, view2pos.x)
// oT2 = (0, 1, r7.y, view2pos.y)
// oT3 = (-r7.x, -r7.y, 1, view2pos.z)
// which is just reversing the signs on the accumCos
// terms above. So the normalized version is just
// reversing the signs on the normalized version above.
*/
//mov oT3, r4;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c21; // HACKAGE
//mov r6.w, c16.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c28.x;
mul oFog, r10.x, c28.y;
//mov oFog, c16.y; // TESTFOGHACK
mov oPos, r9;
mov oD0, c4; // SEENORM
// Transform our uvw
dp4 r0.x, v0, c10;
dp4 r0.y, v0, c11;
//mov r0.zw, c16.xxxz;
mov oT0, r0
// Questionble attenuation follows
// Find vector from this point to camera and normalize
sub r0, c17, r6;
dp3 r1.x, r0, r0;
rsq r1.x, r1.x;
mul r0, r0, r1.xxxx;
// Dot that with the computed normal
dp3 r1.x, r0, r11;
mul r1.x, r1.x, v5.z;
// dp3 r1.x, r0, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3
// Map dot=1 => 0, dot=0 => 1
sub r1.xyzw, c16.zzzz, r1.xxxx;
add r1.w, r1.wwww, c16.zzzz;
mul r1.w, r1.wwww, c16.yyyy;
// No need to clamp, since the destination register (in the pixel shader)
// will saturate [0..1] anyway.
//%%% mul r1.w, r1.w, r4.x;
//%%% mul r1.xyz, r1.xyz, r4.yyy;
mul r1, r1, r4.yyyx; // HACKTESTCOLOR
mul r1.xyz, r1, r8.xxx; // WAVEFACE
mul r1.w, r1.wwww, v5.xxxx;
mul oD1, r1, c20;
// mov oD1, r4.yyyy;
//mov oD1, c16.zzzz; // HACKAGE
// mov oD1, r9;
// mov oD1, r8.xzyw;
vs.1.1
dcl_position v0
dcl_color v5
// Store our input position in world space in r6
m4x3 r6, v0, c21; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c16.zzzz;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = reflection strength (transparency)
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c25 = waterlevel + offset
// c26 = (maxAtten - minAtten) / depthFalloff
// c27 = minAtten.
// And in particular:
// c25.w = waterlevel
// c26.w = 1.f;
// c27.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c8, r6.xxxx;
mad r0, c9, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c5;
add r0, r0, c6;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c15.wwww;
add r0, r0, c15.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c15.wwww;
// dist += -kPi;
sub r0, r0, c15.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c13.wwww, r2;
mad r1, r4, c14.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c25, r6.zzzz;
mul r4, r4, c26;
add r4, r4, c27;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c16.zzzz;
max r4.xyz, r4, c16.xxxx;
// Calc our filter (see above).
mul r11, v5.wwww, c24;
max r11, r11, c16.xxxx;
min r11, r11, c16.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c7;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c16.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c25.w;
max r6.z, r6.z, r8.z; // CLAMP
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= kFreq.xyzw;
mul r1, r1, c5;
// cosDist *= kAmplitude.xyzw; // Combine?
mul r1, r1, c7;
// cosDist *= filter;
mul r1, r1, r11;
//
// accumCos = (0, 0, 0, 0);
mov r7, c16.xxxx;
// temp = dp4( cosDist, toCenter_X );
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
dp4 r7.x, r1, -c8
//
// temp = dp4( cosDist, toCenter_Y );
// accumCos.y += temp.xxxx;
dp4 r7.y, r1, -c9
//
// }
//
// accumBin = (1, 0, -accumCos.x);
// accumTan = (0, 1, -accumCos.y);
// accumNorm = (accumCos.x, accumCos.y, 1);
mov r11, c16.xxzx;
add r11, r11, r7;
dp3 r10.x, r11, r11;
rsq r10.x, r10.x;
mul r11, r11, r10.xxxx;
//
// // Scrunch in based on computed (normalized) normal
// temp = mul( accumNorm, kNegScrunchScale ); // kNegScrunchScale = (-scrunchScale, -scrunchScale, 0, 0);
// accumPos += temp;
//dp3 r10.x, r11, c18.zxw; // winddir.x, winddir.y, 0, 0 // NUKE
// r10.x tells us whether our normal is opposed to the wind.
// If opposed, r10.x = 0, else r10.x = 1.f;
// We'll use this to kill the Scrunch on the back sides of waves.
// We use it for position right here, and then again for the
// normal just down a bit further.
//slt r10.x, r10.x, c16.x; // NUKE
//mov r10.x, c16.z; // HACKAGE NUKE
//mul r9, r10.xxxx, r11; // NUKE
// Add in our scrunch (offset in X/Y plane).
// Scale down our scrunch amount by the wave scaling
mul r10.x, c12.y, r4.z;
//mov r10.x, c12.y; // NUKETEST TAKEOUT
mad r6.xy, r11.xy, r10.xx, r6.xy;
// mul r6.z, r6.z, r10.xxxx; DEBUG
// mad r6, r11, c12.yyzz, r6;
// accumNorm = mul (accumNorm, kScrunchScale ); // kScrunchScale = (scrunchScale, scrunchScale, 1, 1);
// accumCos *= (scrunchScale, scrunchScale, 0, 0);
mul r2.x, r6.z, c12.x;
//mad r2.x, r2.x, r10.x, c16.z; NUKE
add r2.x, r2.x, c16.z;
mul r2.x, r2.x, r4.z; // HACKAGE // NUKETEST BACKIN
// mul r7, r7, c12.xxzz;
mul r7.xy, r7.xy, r2.xx;
// This is actually wrong, but useful right now for visualizing the generated coords.
// See below for correct version.
sub r3, c16.xxzz, r7.xyzz;
//mov oD0, r3; // SEENORM
dp3 r8.x, r3, c18.zxww; // WAVEFACE
mul r8.x, r8.x, c12.w; // WAVEFACE
max r8.x, r8.x, c16.x; // WAVEFACE
min r8.x, r8.x, c16.z; // WAVEFACE
//mov r9.x, c12.z;
//add r9.x, r9.x, -c16.z;
//mad r8.x, r9.x, r8.x, c16.z; // WAVEFACE
mul r8.x, r8.x, -c16.z;
add r8.x, r8.x, c16.z;
// Normalize?
// We can either calculate an orthonormal basis from the
// computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0),
// or compute our basis directly from the partial derivatives, with
// Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1)
//
// These work out to identically the same result, so we'll compute directly
// from the partials because it takes 2 fewer instructions.
//
// Note that our basis is NOT orthonormal. The Normal is equal to
// Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents
// are both correct tangents to the surface, and their projections on the XY plane
// are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications?
// Not really. I'm actually not really sure which is more "proper" for bump mapping.
//
// Note also that we add when we should subtract and subtract when we should
// add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows
// of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for
// explanation.
//
// Binormal = Y % Normal
// Cross product3 is:
// mul res.xyz, a.yzx, b.zxy
// mad res.xyz, -a.zxy, b.yzx, res.xyz
// mul r1.xyz, c16.zxx, r3.zxy;
// mad r1.xyz, -c16.xxz, r3.yzx, r1.xyz;
// Tangent = Normal % X
// mul r2.xyz, r3.yzx, c16.xzx;
// mad r2.xyz, -r3.zxy, c16.xxz, r2;
add r1, c16.zxxx, r7.zzxz;
add r2, c16.xzxx, r7.zzyz;
// Note that we're swapping z and y to match our environment map tools in max.
// We do this through our normal map transform (oT1, oT2, oT3), making it
// a concatenation of:
//
// rotate about Z (blue) to turn our map into the wind
// windRot = | dirY -dirX 0 |
// | dirX dirY 0 |
// | 0 0 1 |
//
// swap our Y and Z axes to match our environment map
// swapYZ = | 1 0 0 |
// | 0 0 1 |
// | 0 1 0 |
//
// rotate the normal into the surface's tangent space basis
// basis = | Bx Tx Nx |
// | By Ty Ny |
// | Bz Tz Nz |
//
// Note that we've constucted the basis by taking advantage of the
// matrix being a pure rotation, as noted below, so r1, r2 and r3
// are actually constructed as:
// basis = | Bx -By -Bz |
// | -Tx Ty -Tz |
// | -Nx -Ny -Nz |
//
// Then the final normal map transform is:
//
// basis * swapYZ * windRot [ * normal ]
// sub r1.w, c17.x, r6.x;
// sub r2.w, c17.z, r6.z;
// sub r3.w, c17.y, r6.y;
// Big note here. All this math can blow up if the camera position
// is outside the environment sphere. It's assumed that's dealt
// with in the app setting up the constants. For that reason, the
// camera position used here might not be the real local camera position,
// which is needed for the angular attenuation, so we burn another constant
// with our pseudo-camera position. To restrain the pseudo-camera from
// leaving the sphere, we make:
// pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R)
// where dist = |realPos - envCenter|
// So, our "finitized" eyeray is:
// camPos + D * t - envCenter = D * t - (envCenter - camPos)
// with
// D = (pos - camPos) / |pos - camPos| // normalized usual eyeray
// and
// t = D dot F + sqrt( (D dot F)^2 - G )
// with
// F = (envCenter - camPos) => c19.xyz
// G = F^2 - R^2 => c19.w
// R = environment radius. => unused
//
// This all derives from the positive root of equation
// (camPos + (pos - camPos) * t - envCenter)^2 = R^2,
// In other words, where on a sphere of radius R centered about envCenter
// does the ray from the real camera position through this point hit.
//
// Note that F, G, and R are all constants (one point, two scalars).
//
// So first we calculate D into r0,
// then D dot F into r10.x,
// then (D dot F)^2 - G into r10.y
// then rsq( (D dot F)^2 - G ) into r9.x;
// then t = r10.z = r10.x + r10.y * r9.x;
// and
// r0 = D * t - (envCenter - camPos)
// = r0 * r10.zzzz - F;
//
sub r0, r6, c17;
dp3 r10.x, r0, r0;
rsq r10.x, r10.x;
mul r0, r0, r10.xxxx; // r0 = D
dp3 r10.x, r0, c19; // r10.x = D dot F
mad r10.y, r10.x, r10.x, -c19.w; // r10.y = (D dot F)^2 - G
rsq r9.x, r10.y; // r9.x = 1/SQRT((D dot F)^2 - G)
mad r10.z, r10.y, r9.x, r10.x; // r10.z = D dot F + SQRT((D dot F)^2 - G)
mad r0.xyz, r0, r10.zzz, -c19.xyz; // r0.xyz = D * t - (envCenter - camPos)
mov r1.w, -r0.x;
mov r2.w, -r0.y;
mov r3.w, -r0.z;
// Now rotate our basis vectors into the wind
// This should be redone, and put our wind direction into
// the water texture.
dp3 r0.x, r1, c18.xyww;
dp3 r0.y, r1, c18.zxww;
mov r1.xy, r0;
dp3 r0.x, r2, c18.xyww;
dp3 r0.y, r2, c18.zxww;
mov r2.xy, r0;
dp3 r0.x, r3, c18.xyww;
dp3 r0.y, r3, c18.zxww;
mov r3.xy, r0;
mov r0.zw, c16.zzxz;
dp3 r0.x, r1, r1;
rsq r0.x, r0.x;
mul oT1, r1.xyzw, r0.xxxw;
// mul r8, r1.xyzw, r0.xxxw; // VISUAL
dp3 r0.x, r2, r2;
rsq r0.x, r0.x;
mul oT3, r2.xyzw, r0.xxxw;
// mul r9, r2.xyzw, r0.xxxw; // VISUAL
dp3 r0.x, r3, r3;
rsq r0.x, r0.x;
mul oT2, r3.xyzw, r0.xxxw;
// mul r9, r3.xyzw, r0.xxxw; // VISUAL
// mul r3, r3.xzyw, r0.xxxw;
// mul r3.xy, r3, -c16.zzzz;
/*
// Want:
// oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x)
// oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y)
// ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z)
// with BIN, TAN, and NORM normalized.
// Unnormalized, we have
// BIN = (1, 0, -r7.x) where r7 == accumCos
// TAN = (0, 1, -r7.y)
// NORM= (r7.x, r7.y, 1)
// So, unnormalized, we have
// oT1 = (1, 0, r7.x, view2pos.x)
// oT2 = (0, 1, r7.y, view2pos.y)
// oT3 = (-r7.x, -r7.y, 1, view2pos.z)
// which is just reversing the signs on the accumCos
// terms above. So the normalized version is just
// reversing the signs on the normalized version above.
*/
//mov oT3, r4;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c21; // HACKAGE
//mov r6.w, c16.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c28.x;
mul oFog, r10.x, c28.y;
//mov oFog, c16.y; // TESTFOGHACK
mov oPos, r9;
mov oD0, c4; // SEENORM
// Transform our uvw
dp4 r0.x, v0, c10;
dp4 r0.y, v0, c11;
//mov r0.zw, c16.xxxz;
mov oT0, r0
// Questionble attenuation follows
// Find vector from this point to camera and normalize
sub r0, c17, r6;
dp3 r1.x, r0, r0;
rsq r1.x, r1.x;
mul r0, r0, r1.xxxx;
// Dot that with the computed normal
dp3 r1.x, r0, r11;
mul r1.x, r1.x, v5.z;
// dp3 r1.x, r0, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3
// Map dot=1 => 0, dot=0 => 1
sub r1.xyzw, c16.zzzz, r1.xxxx;
add r1.w, r1.wwww, c16.zzzz;
mul r1.w, r1.wwww, c16.yyyy;
// No need to clamp, since the destination register (in the pixel shader)
// will saturate [0..1] anyway.
//%%% mul r1.w, r1.w, r4.x;
//%%% mul r1.xyz, r1.xyz, r4.yyy;
mul r1, r1, r4.yyyx; // HACKTESTCOLOR
mul r1.xyz, r1, r8.xxx; // WAVEFACE
mul r1.w, r1.wwww, v5.xxxx;
mul oD1, r1, c20;
// mov oD1, r4.yyyy;
//mov oD1, c16.zzzz; // HACKAGE
// mov oD1, r9;
// mov oD1, r8.xzyw;

View File

@ -1,437 +1,437 @@
vs.1.1
dcl_position v0
dcl_color v5
// Store our input position in world space in r6
m4x3 r6, v0, c21; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c16.zzzz;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = reflection strength (transparency)
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c25 = waterlevel + offset
// c26 = (maxAtten - minAtten) / depthFalloff
// c27 = minAtten.
// And in particular:
// c25.w = waterlevel
// c26.w = 1.f;
// c27.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c8, r6.xxxx;
mad r0, c9, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c5;
add r0, r0, c6;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c15.wwww;
add r0, r0, c15.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c15.wwww;
// dist += -kPi;
sub r0, r0, c15.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c13.wwww, r2;
mad r1, r4, c14.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c25, r6.zzzz;
mul r4, r4, c26;
add r4, r4, c27;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c16.zzzz;
max r4.xyz, r4, c16.xxxx;
// Calc our filter (see above).
mul r11, v5.wwww, c24;
max r11, r11, c16.xxxx;
min r11, r11, c16.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r5, r2, c7;
// r5 is now T = sum(Ai * sin())
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r5, c16.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c25.w;
max r6.z, r6.z, r8.z; // CLAMP
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= kAmplitude.xyzw; // Combine?
mul r7, r1, c7;
// cosDist *= filter;
mul r7, r7, r11;
// r7 is now M = sum(Ai * cos())
// Okay, here we go:
// W == sum(k w Dir.x^2 A sin())
// V == sum(k w Dir.x Dir.y A sin())
// U == sum(k w Dir.y^2 A sin())
//
// T == sum(A sin())
//
// S == sum(k Dir.x A cos())
// R == sum(k Dir.y A cos())
//
// Q == sum(k w A cos())
//
// M == sum(A cos())
//
// P == sum(w Dir.x A cos())
// N == sum(w Dir.y A cos())
//
// Then:
// Pos = (in.x + S, in.y + R, waterheight + T)
//
// Bin = (1 - W, -V, P)
// Tan = (-V, 1 - U, N)
// Nor = (-P, -N, 1 - Q)
//
// But we want the transpose of that to go into r1-r3
dp4 r10.x, r7, c29;
add r6.x, r6.x, r10.x;
dp4 r10.x, r7, c30;
add r6.y, r6.y, r10.x;
dp4 r1.x, r5, -c34;
dp4 r2.x, r5, -c35;
dp4 r3.x, r7, c31;
add r1.x, r1.xxxx, c16.zzzz;
dp4 r1.y, r5, -c35;
dp4 r2.y, r5, -c36;
dp4 r3.y, r7, c32;
add r2.y, r2.yyyy, c16.zzzz;
dp4 r1.z, r7, -c31;
dp4 r2.z, r7, -c32;
dp4 r3.z, r5, -c33;
add r3.z, r3.zzzz, c16.zzzz;
// Calculate our normalized vector from camera to vtx.
// We'll use that a couple of times coming up.
sub r5, r6, c17;
dp3 r10.x, r5, r5;
rsq r10.x, r10.x;
mul r5, r5, r10.xxxx; // r0 = D
rcp r5.w, r10.x;
// Calculate our specular attenuation from and into r5.w.
// r5.w starts off the distance from vtx to camera.
// Once we've turned it into an attenuation factor, we
// scale the x and y of our normal map (through the transform bases)
// so that in the distance, the normal map is flat. Note that the
// geometry in the distance isn't necessarily flat. We want to apply
// this scale to the normal read from the normal map before it is
// transformed into surface space.
add r5.w, r5.w, c11.x;
mul r5.w, r5.w, c11.y;
min r5.w, r5.w, c16.z;
max r5.w, r5.w, c16.x;
mul r5.w, r5.w, r5.w; // Square it to account for perspective
mul r5.w, r5.w, c11.z;
// Normalize?
// We can either calculate an orthonormal basis from the
// computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0),
// or compute our basis directly from the partial derivatives, with
// Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1)
//
// These work out to identically the same result, so we'll compute directly
// from the partials because it takes 2 fewer instructions.
//
// Note that our basis is NOT orthonormal. The Normal is equal to
// Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents
// are both correct tangents to the surface, and their projections on the XY plane
// are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications?
// Not really. I'm actually not really sure which is more "proper" for bump mapping.
//
// Note also that we add when we should subtract and subtract when we should
// add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows
// of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for
// explanation.
//
// Binormal = Y % Normal
// Cross product3 is:
// mul res.xyz, a.yzx, b.zxy
// mad res.xyz, -a.zxy, b.yzx, res.xyz
// mul r1.xyz, c16.zxx, r3.zxy;
// mad r1.xyz, -c16.xxz, r3.yzx, r1.xyz;
// Tangent = Normal % X
// mul r2.xyz, r3.yzx, c16.xzx;
// mad r2.xyz, -r3.zxy, c16.xxz, r2;
//mad r1, r5.wwww, c16.zxxx, r7.zzxz;
//mad r2, r5.wwww, c16.xzxx, r7.zzyz;
//mul r3.xy, r3.xy, r5.wwww;
// Note that we're swapping z and y to match our environment map tools in max.
// We do this through our normal map transform (oT1, oT2, oT3), making it
// a concatenation of:
//
// rotate about Z (blue) to turn our map into the wind
// windRot = | dirY -dirX 0 |
// | dirX dirY 0 |
// | 0 0 1 |
//
// swap our Y and Z axes to match our environment map
// swapYZ = | 1 0 0 |
// | 0 0 1 |
// | 0 1 0 |
//
// rotate the normal into the surface's tangent space basis
// basis = | Bx Tx Nx |
// | By Ty Ny |
// | Bz Tz Nz |
//
// Note that we've constucted the basis by taking advantage of the
// matrix being a pure rotation, as noted below, so r1, r2 and r3
// are actually constructed as:
// basis = | Bx -By -Bz |
// | -Tx Ty -Tz |
// | -Nx -Ny -Nz |
//
// Then the final normal map transform is:
//
// basis * swapYZ * windRot [ * normal ]
// sub r1.w, c17.x, r6.x;
// sub r2.w, c17.z, r6.z;
// sub r3.w, c17.y, r6.y;
// Big note here. All this math can blow up if the camera position
// is outside the environment sphere. It's assumed that's dealt
// with in the app setting up the constants. For that reason, the
// camera position used here might not be the real local camera position,
// which is needed for the angular attenuation, so we burn another constant
// with our pseudo-camera position. To restrain the pseudo-camera from
// leaving the sphere, we make:
// pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R)
// where dist = |realPos - envCenter|
// So, our "finitized" eyeray is:
// camPos + D * t - envCenter = D * t - (envCenter - camPos)
// with
// D = (pos - camPos) / |pos - camPos| // normalized usual eyeray
// and
// t = D dot F + sqrt( (D dot F)^2 - G )
// with
// F = (envCenter - camPos) => c19.xyz
// G = F^2 - R^2 => c19.w
// R = environment radius. => unused
//
// This all derives from the positive root of equation
// (camPos + (pos - camPos) * t - envCenter)^2 = R^2,
// In other words, where on a sphere of radius R centered about envCenter
// does the ray from the real camera position through this point hit.
//
// Note that F, G, and R are all constants (one point, two scalars).
//
// So first we calculate D into r0,
// then D dot F into r10.x,
// then (D dot F)^2 - G into r10.y
// then rsq( (D dot F)^2 - G ) into r9.x;
// then t = r10.z = r10.x + r10.y * r9.x;
// and
// r0 = D * t - (envCenter - camPos)
// = r0 * r10.zzzz - F;
//
mov r0, r5; // r0 = D
dp3 r10.x, r0, c19; // r10.x = D dot F
mad r10.y, r10.x, r10.x, -c19.w; // r10.y = (D dot F)^2 - G
rsq r9.x, r10.y; // r9.x = 1/SQRT((D dot F)^2 - G)
mad r10.z, r10.y, r9.x, r10.x; // r10.z = D dot F + SQRT((D dot F)^2 - G)
mad r0.xyz, r0, r10.zzz, -c19.xyz; // r0.xyz = D * t - (envCenter - camPos)
// ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump.
dp3 r10.x, r0, r0;
rsq r9.x, r10.x;
mul r0.xyz, r0.xyz, r9.xxx;
mov r1.w, -r0.x;
mov r2.w, -r0.y;
mov r3.w, -r0.z;
mov r0.zw, c16.zzxz;
dp3 r0.x, r1, r1;
rsq r0.xy, r0.x;
mul r0.x, r0.x, r5.w;
mul oT1, r1.xyzw, r0.xxyw;
// mul r8, r1.xyzw, r0.xxxw; // VISUAL
mul r11.x, r1.z, r0.y;
dp3 r0.x, r2, r2;
rsq r0.xy, r0.x;
mul r0.x, r0.x, r5.w;
mul oT3, r2.xyzw, r0.xxyw;
// mul r9, r2.xyzw, r0.xxxw; // VISUAL
mul r11.y, r2.z, r0.y;
dp3 r0.x, r3, r3;
rsq r0.xy, r0.x;
mul r0.x, r0.x, r5.w;
mul oT2, r3.xyzw, r0.xxyw;
// mul r9, r3.xyzw, r0.xxxw; // VISUAL
mul r11.z, r3.z, r0.y;
/*
// Want:
// oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x)
// oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y)
// ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z)
// with BIN, TAN, and NORM normalized.
// Unnormalized, we have
// BIN = (1, 0, -r7.x) where r7 == accumCos
// TAN = (0, 1, -r7.y)
// NORM= (r7.x, r7.y, 1)
// So, unnormalized, we have
// oT1 = (1, 0, r7.x, view2pos.x)
// oT2 = (0, 1, r7.y, view2pos.y)
// oT3 = (-r7.x, -r7.y, 1, view2pos.z)
// which is just reversing the signs on the accumCos
// terms above. So the normalized version is just
// reversing the signs on the normalized version above.
*/
//mov oT3, r4;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c21; // HACKAGE
//mov r6.w, c16.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c28.x;
mul oFog, r10.x, c28.y;
//mov oFog, c16.zzzz; // TESTFOGHACK
mov oPos, r9;
// Transform our uvw
mul r0.x, v0.xxxx, c10.xxxx;
mul r0.y, v0.yyyy, c10.xxxx;
//mov r0.zw, c16.xxxz;
mov oT0, r0
// Questionble attenuation follows
// vector from this point to camera and normalize stashed in r5
// Dot that with the computed normal
dp3 r1.x, -r5, r11;
mul r1.x, r1.x, v5.z;
// dp3 r1.x, r5, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3
// Map dot=1 => 0, dot=0 => 1
sub r1.xyzw, c16.zzzz, r1.xxxx;
add r1.w, r1.wwww, c16.zzzz;
mul r1.w, r1.wwww, c16.yyyy;
// No need to clamp, since the destination register (in the pixel shader)
// will saturate [0..1] anyway.
//%%% mul r1.w, r1.w, r4.x;
//%%% mul r1.xyz, r1.xyz, r4.yyy;
mul r1, r1, r4.yyyx; // HACKTESTCOLOR
//mul r1.xyz, r1, r8.xxx; // WAVEFACE
mul r1.w, r1.wwww, v5.xxxx;
mul r1.w, r1.wwww, c4.wwww;
mul oD0, r1, c20;
mov oD1, c4; // SEENORM
//mov oD1, c16.xxxx;
// mov oD1, r4.yyyy;
//mov oD1, c16.zzzz; // HACKAGE
// mov oD1, r9;
// mov oD1, r8.xzyw;
vs.1.1
dcl_position v0
dcl_color v5
// Store our input position in world space in r6
m4x3 r6, v0, c21; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c16.zzzz;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = reflection strength (transparency)
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c25 = waterlevel + offset
// c26 = (maxAtten - minAtten) / depthFalloff
// c27 = minAtten.
// And in particular:
// c25.w = waterlevel
// c26.w = 1.f;
// c27.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c8, r6.xxxx;
mad r0, c9, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c5;
add r0, r0, c6;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c15.wwww;
add r0, r0, c15.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c15.wwww;
// dist += -kPi;
sub r0, r0, c15.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c13.wwww, r2;
mad r1, r4, c14.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c25, r6.zzzz;
mul r4, r4, c26;
add r4, r4, c27;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c16.zzzz;
max r4.xyz, r4, c16.xxxx;
// Calc our filter (see above).
mul r11, v5.wwww, c24;
max r11, r11, c16.xxxx;
min r11, r11, c16.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r5, r2, c7;
// r5 is now T = sum(Ai * sin())
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r5, c16.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c25.w;
max r6.z, r6.z, r8.z; // CLAMP
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= kAmplitude.xyzw; // Combine?
mul r7, r1, c7;
// cosDist *= filter;
mul r7, r7, r11;
// r7 is now M = sum(Ai * cos())
// Okay, here we go:
// W == sum(k w Dir.x^2 A sin())
// V == sum(k w Dir.x Dir.y A sin())
// U == sum(k w Dir.y^2 A sin())
//
// T == sum(A sin())
//
// S == sum(k Dir.x A cos())
// R == sum(k Dir.y A cos())
//
// Q == sum(k w A cos())
//
// M == sum(A cos())
//
// P == sum(w Dir.x A cos())
// N == sum(w Dir.y A cos())
//
// Then:
// Pos = (in.x + S, in.y + R, waterheight + T)
//
// Bin = (1 - W, -V, P)
// Tan = (-V, 1 - U, N)
// Nor = (-P, -N, 1 - Q)
//
// But we want the transpose of that to go into r1-r3
dp4 r10.x, r7, c29;
add r6.x, r6.x, r10.x;
dp4 r10.x, r7, c30;
add r6.y, r6.y, r10.x;
dp4 r1.x, r5, -c34;
dp4 r2.x, r5, -c35;
dp4 r3.x, r7, c31;
add r1.x, r1.xxxx, c16.zzzz;
dp4 r1.y, r5, -c35;
dp4 r2.y, r5, -c36;
dp4 r3.y, r7, c32;
add r2.y, r2.yyyy, c16.zzzz;
dp4 r1.z, r7, -c31;
dp4 r2.z, r7, -c32;
dp4 r3.z, r5, -c33;
add r3.z, r3.zzzz, c16.zzzz;
// Calculate our normalized vector from camera to vtx.
// We'll use that a couple of times coming up.
sub r5, r6, c17;
dp3 r10.x, r5, r5;
rsq r10.x, r10.x;
mul r5, r5, r10.xxxx; // r0 = D
rcp r5.w, r10.x;
// Calculate our specular attenuation from and into r5.w.
// r5.w starts off the distance from vtx to camera.
// Once we've turned it into an attenuation factor, we
// scale the x and y of our normal map (through the transform bases)
// so that in the distance, the normal map is flat. Note that the
// geometry in the distance isn't necessarily flat. We want to apply
// this scale to the normal read from the normal map before it is
// transformed into surface space.
add r5.w, r5.w, c11.x;
mul r5.w, r5.w, c11.y;
min r5.w, r5.w, c16.z;
max r5.w, r5.w, c16.x;
mul r5.w, r5.w, r5.w; // Square it to account for perspective
mul r5.w, r5.w, c11.z;
// Normalize?
// We can either calculate an orthonormal basis from the
// computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0),
// or compute our basis directly from the partial derivatives, with
// Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1)
//
// These work out to identically the same result, so we'll compute directly
// from the partials because it takes 2 fewer instructions.
//
// Note that our basis is NOT orthonormal. The Normal is equal to
// Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents
// are both correct tangents to the surface, and their projections on the XY plane
// are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications?
// Not really. I'm actually not really sure which is more "proper" for bump mapping.
//
// Note also that we add when we should subtract and subtract when we should
// add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows
// of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for
// explanation.
//
// Binormal = Y % Normal
// Cross product3 is:
// mul res.xyz, a.yzx, b.zxy
// mad res.xyz, -a.zxy, b.yzx, res.xyz
// mul r1.xyz, c16.zxx, r3.zxy;
// mad r1.xyz, -c16.xxz, r3.yzx, r1.xyz;
// Tangent = Normal % X
// mul r2.xyz, r3.yzx, c16.xzx;
// mad r2.xyz, -r3.zxy, c16.xxz, r2;
//mad r1, r5.wwww, c16.zxxx, r7.zzxz;
//mad r2, r5.wwww, c16.xzxx, r7.zzyz;
//mul r3.xy, r3.xy, r5.wwww;
// Note that we're swapping z and y to match our environment map tools in max.
// We do this through our normal map transform (oT1, oT2, oT3), making it
// a concatenation of:
//
// rotate about Z (blue) to turn our map into the wind
// windRot = | dirY -dirX 0 |
// | dirX dirY 0 |
// | 0 0 1 |
//
// swap our Y and Z axes to match our environment map
// swapYZ = | 1 0 0 |
// | 0 0 1 |
// | 0 1 0 |
//
// rotate the normal into the surface's tangent space basis
// basis = | Bx Tx Nx |
// | By Ty Ny |
// | Bz Tz Nz |
//
// Note that we've constucted the basis by taking advantage of the
// matrix being a pure rotation, as noted below, so r1, r2 and r3
// are actually constructed as:
// basis = | Bx -By -Bz |
// | -Tx Ty -Tz |
// | -Nx -Ny -Nz |
//
// Then the final normal map transform is:
//
// basis * swapYZ * windRot [ * normal ]
// sub r1.w, c17.x, r6.x;
// sub r2.w, c17.z, r6.z;
// sub r3.w, c17.y, r6.y;
// Big note here. All this math can blow up if the camera position
// is outside the environment sphere. It's assumed that's dealt
// with in the app setting up the constants. For that reason, the
// camera position used here might not be the real local camera position,
// which is needed for the angular attenuation, so we burn another constant
// with our pseudo-camera position. To restrain the pseudo-camera from
// leaving the sphere, we make:
// pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R)
// where dist = |realPos - envCenter|
// So, our "finitized" eyeray is:
// camPos + D * t - envCenter = D * t - (envCenter - camPos)
// with
// D = (pos - camPos) / |pos - camPos| // normalized usual eyeray
// and
// t = D dot F + sqrt( (D dot F)^2 - G )
// with
// F = (envCenter - camPos) => c19.xyz
// G = F^2 - R^2 => c19.w
// R = environment radius. => unused
//
// This all derives from the positive root of equation
// (camPos + (pos - camPos) * t - envCenter)^2 = R^2,
// In other words, where on a sphere of radius R centered about envCenter
// does the ray from the real camera position through this point hit.
//
// Note that F, G, and R are all constants (one point, two scalars).
//
// So first we calculate D into r0,
// then D dot F into r10.x,
// then (D dot F)^2 - G into r10.y
// then rsq( (D dot F)^2 - G ) into r9.x;
// then t = r10.z = r10.x + r10.y * r9.x;
// and
// r0 = D * t - (envCenter - camPos)
// = r0 * r10.zzzz - F;
//
mov r0, r5; // r0 = D
dp3 r10.x, r0, c19; // r10.x = D dot F
mad r10.y, r10.x, r10.x, -c19.w; // r10.y = (D dot F)^2 - G
rsq r9.x, r10.y; // r9.x = 1/SQRT((D dot F)^2 - G)
mad r10.z, r10.y, r9.x, r10.x; // r10.z = D dot F + SQRT((D dot F)^2 - G)
mad r0.xyz, r0, r10.zzz, -c19.xyz; // r0.xyz = D * t - (envCenter - camPos)
// ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump.
dp3 r10.x, r0, r0;
rsq r9.x, r10.x;
mul r0.xyz, r0.xyz, r9.xxx;
mov r1.w, -r0.x;
mov r2.w, -r0.y;
mov r3.w, -r0.z;
mov r0.zw, c16.zzxz;
dp3 r0.x, r1, r1;
rsq r0.xy, r0.x;
mul r0.x, r0.x, r5.w;
mul oT1, r1.xyzw, r0.xxyw;
// mul r8, r1.xyzw, r0.xxxw; // VISUAL
mul r11.x, r1.z, r0.y;
dp3 r0.x, r2, r2;
rsq r0.xy, r0.x;
mul r0.x, r0.x, r5.w;
mul oT3, r2.xyzw, r0.xxyw;
// mul r9, r2.xyzw, r0.xxxw; // VISUAL
mul r11.y, r2.z, r0.y;
dp3 r0.x, r3, r3;
rsq r0.xy, r0.x;
mul r0.x, r0.x, r5.w;
mul oT2, r3.xyzw, r0.xxyw;
// mul r9, r3.xyzw, r0.xxxw; // VISUAL
mul r11.z, r3.z, r0.y;
/*
// Want:
// oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x)
// oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y)
// ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z)
// with BIN, TAN, and NORM normalized.
// Unnormalized, we have
// BIN = (1, 0, -r7.x) where r7 == accumCos
// TAN = (0, 1, -r7.y)
// NORM= (r7.x, r7.y, 1)
// So, unnormalized, we have
// oT1 = (1, 0, r7.x, view2pos.x)
// oT2 = (0, 1, r7.y, view2pos.y)
// oT3 = (-r7.x, -r7.y, 1, view2pos.z)
// which is just reversing the signs on the accumCos
// terms above. So the normalized version is just
// reversing the signs on the normalized version above.
*/
//mov oT3, r4;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c21; // HACKAGE
//mov r6.w, c16.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c28.x;
mul oFog, r10.x, c28.y;
//mov oFog, c16.zzzz; // TESTFOGHACK
mov oPos, r9;
// Transform our uvw
mul r0.x, v0.xxxx, c10.xxxx;
mul r0.y, v0.yyyy, c10.xxxx;
//mov r0.zw, c16.xxxz;
mov oT0, r0
// Questionble attenuation follows
// vector from this point to camera and normalize stashed in r5
// Dot that with the computed normal
dp3 r1.x, -r5, r11;
mul r1.x, r1.x, v5.z;
// dp3 r1.x, r5, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3
// Map dot=1 => 0, dot=0 => 1
sub r1.xyzw, c16.zzzz, r1.xxxx;
add r1.w, r1.wwww, c16.zzzz;
mul r1.w, r1.wwww, c16.yyyy;
// No need to clamp, since the destination register (in the pixel shader)
// will saturate [0..1] anyway.
//%%% mul r1.w, r1.w, r4.x;
//%%% mul r1.xyz, r1.xyz, r4.yyy;
mul r1, r1, r4.yyyx; // HACKTESTCOLOR
//mul r1.xyz, r1, r8.xxx; // WAVEFACE
mul r1.w, r1.wwww, v5.xxxx;
mul r1.w, r1.wwww, c4.wwww;
mul oD0, r1, c20;
mov oD1, c4; // SEENORM
//mov oD1, c16.xxxx;
// mov oD1, r4.yyyy;
//mov oD1, c16.zzzz; // HACKAGE
// mov oD1, r9;
// mov oD1, r8.xzyw;

View File

@ -1,166 +1,166 @@
vs.1.1
dcl_position v0
dcl_normal v3
// c0 = (0,0.5,1.0,2.0) (aka NumericConsts)
// c1 = frequencies
// c2 = phases
// c3 = amplitudes
// c4 = PiConsts = (1/(2PI), PI/2, PI, 2*PI) // NOTE THIS IS DIFFERENT
// because we don't need oonsqpi here but do want 1/2Pi.
// c5 = cosConsts = (1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f);
// c6 = ((cMax - cMin), cMin, 2ndLayerVOffset, 2ndLayerScale);
// c7 = overall color, including current opacity. Will
// probably only use the opacity, which we could stuff into
// the free slot of c6, but we're a wuss.
// First, "move" the position to oPos
mov r0, v0;
//mov r0.y, -r0.yyyy;
mov r0.w, c0.zzzz;
mov oPos, r0;
// Now the tricky part.
// The base layer defines the shape of the incoming wave
// The next layer has bubbles (noise) and moves in when the
// wave is moving in, moves out when wave is moving out.
// So calculate uvw for first layer, second uvw shares u val
// and v val is const
// The .x component of the normal
// tells us how much to shift this vert based on the
// cumulative cosine wave.
// Figure c = Sigma((cosine(v0.x * freq + phase) + 1) * amp);
// Note that range c must be [0..1]
// Also, c(-1) must equal c(1) so it will wrap.
// That implies freq = k * 2 * PI, where k is an integer.
// To keep c >= 0, we can add 1 to each term in the sigma BEFORE
// modulating by the amplitude.
// That puts our range at [0..2*sigma(amp)], so as long as
// sigma(amp) <= 0.5, we're fine.
// Get our input to cosine value (v0.x * freq + phase).
add r0, v0.xxxx, c0.zzzz;
mul r0, r0, c1;
add r0, r0, c2;
// Get it into range [-Pi..Pi]
// First divide out the 2PI
// add r0, r0, c4.zzzz; HACKOUT
mul r0, r0, c4.xxxx;
// Do an integer mod
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
//mov oD1, r1; // HACKTEST
//mov oD1.w, c0.zzzz; // HACKTEST
// Move back into PI space, w/ *= 2P, -= PI
mul r0, r1, c4.wwww;
sub r0, r0, c4.zzzz;
// Okay, compute cosine here.
// cos = 1 + r0^2 * kCos.y + r0^4 * kCos.Z + r0^6 * kCos.w
// Note: could pare off an instr by putting 1/kCos.w in kCos.x,
// then doing a mad to get r3=(1/kCos.w + r0^6), then mad that
// into the accum by kCos.w to get (1 + r0^6*kCos.x). But who cares.
mul r1, r0, r0; // r0^2
mul r2, r1, r1; // r0^4
mul r3, r1, r2; // r0^6
mov r4, c5.xxxx; // r4 = 1
mad r4, r1, c5.yyyy, r4; // r4 += r0^2 * kCos.y
mad r4, r2, c5.zzzz, r4; // r4 += r0^4 * kCos.z
mad r4, r3, c5.wwww, r4; // r4 += r0^6 * kCos.w
add r4, r4, c0.zzzz; // shift from [-1..1] to [0..2]
//mov r4, c0.xxxx; // HACKLAST
mul r4, r4, c3; // times amplitude
dp4 r5.y, r4, c0.zzzz; // r5.x = sigma((cos() + 1) * amp);
// V calculation, goes something like:
// For layers 0 and 2:
// V = { 1 + c6.z <= r5.y = 0 } * norm.x // norm.x == v3.x
// { 1 + 0 <= r5.y = 1 }
// For layer 1:
// V = (norm.x + c6.z) * c6.w // Scaled like U
//
// Another way to formulate that is
// baseV = cMin + sinAge * (cMax-cMin) where
// cMin = 2
// cMax = 1
// sinAge = color.a = c7.w
// delV = sigma(cos) = r5.y
// Then
// V0 = V2 = (baseV + delV) * v3.x
// V1 = (norm.x + baseV + delV) * c6.w
//
// If we're sure we want cMin = 2 and cMax = 1, then it simplifies to:
// baseV = 2 - sinAge = c0.w - c7.w
// delV = r5.y
// (baseV + delV) = c0.w - c7.w + r5.y
//
// If we want to stay general, then
// baseV = c6.x * c7.w + c6.y
// delV = -r5.y
// (baseV + delV) = constant + r5.y
//
// make r5.y = (baseV + delV)
add r5.y, c6.xxxx, r5.yyyy;
//mov oD1, r5.yyyy; // HACKLAST
//mov oD1.w, c0.zzzz; // HACKLAST
// U is input U (or v0.x * 0.5f + 0.5f)
mul r5.x, v0.x, c0.y;
add r5.x, r5.x, c0.y;
// Fill out wq.
mov r5.zw, c0.xz;
mul oT0, r5, v3.wxww;
// mov oD1, r5.yyyw; // HACKTEST
mul oT2, r5, v3.wxww;
// Second uv shares u, but v is norm.x + c6.x;
// Then we scale it.
// If we want the bubble texture to move with the
// wave front, we want the second UV calc (RESCALE1).
// But it looks better to have the bubbles moving
// slightly faster than the wave front. RESCALE0
// happens to do that, because we're scaling the
// texture by a factor of 2, but we should probably
// supply an independent scale of the motion vs. the
// scale of the texture.
// Let's move c6 to r6 for ease of use.
mov r6, c6;
// add r5.x, r5.x, c6.y;
// add r5.y, c6.xxxx, v3.xxxx; // RESCALE0
// mul r5.xy, r5, c6.wwww; // RESCALE0
add r5.x, r5.x, r6.y; // RESCALE1 // offset U
mov r5.y, v3.xx; // RESCALE1 // Init V to value stashed in normal.x
mul r5.xy, r5, r6.wwww; // RESCALE1 // scale them by single scale value
mad r5.y, r6.xx, r6.zz, r5.yy; // RESCALE1 // add in our scaled V offset (sinage * vScale)
mov oT1, r5;
//mov oT0, v7; // HACKTEST
//mov oT1, v7; // HACKTEST
//mov oT2, v7; // HACKTEST
// Just slam in the constant color (includes our current opacity).
mov oD0, c7;
//mov oD0, c0.zzzz; // HACKTEST
vs.1.1
dcl_position v0
dcl_normal v3
// c0 = (0,0.5,1.0,2.0) (aka NumericConsts)
// c1 = frequencies
// c2 = phases
// c3 = amplitudes
// c4 = PiConsts = (1/(2PI), PI/2, PI, 2*PI) // NOTE THIS IS DIFFERENT
// because we don't need oonsqpi here but do want 1/2Pi.
// c5 = cosConsts = (1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f);
// c6 = ((cMax - cMin), cMin, 2ndLayerVOffset, 2ndLayerScale);
// c7 = overall color, including current opacity. Will
// probably only use the opacity, which we could stuff into
// the free slot of c6, but we're a wuss.
// First, "move" the position to oPos
mov r0, v0;
//mov r0.y, -r0.yyyy;
mov r0.w, c0.zzzz;
mov oPos, r0;
// Now the tricky part.
// The base layer defines the shape of the incoming wave
// The next layer has bubbles (noise) and moves in when the
// wave is moving in, moves out when wave is moving out.
// So calculate uvw for first layer, second uvw shares u val
// and v val is const
// The .x component of the normal
// tells us how much to shift this vert based on the
// cumulative cosine wave.
// Figure c = Sigma((cosine(v0.x * freq + phase) + 1) * amp);
// Note that range c must be [0..1]
// Also, c(-1) must equal c(1) so it will wrap.
// That implies freq = k * 2 * PI, where k is an integer.
// To keep c >= 0, we can add 1 to each term in the sigma BEFORE
// modulating by the amplitude.
// That puts our range at [0..2*sigma(amp)], so as long as
// sigma(amp) <= 0.5, we're fine.
// Get our input to cosine value (v0.x * freq + phase).
add r0, v0.xxxx, c0.zzzz;
mul r0, r0, c1;
add r0, r0, c2;
// Get it into range [-Pi..Pi]
// First divide out the 2PI
// add r0, r0, c4.zzzz; HACKOUT
mul r0, r0, c4.xxxx;
// Do an integer mod
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
//mov oD1, r1; // HACKTEST
//mov oD1.w, c0.zzzz; // HACKTEST
// Move back into PI space, w/ *= 2P, -= PI
mul r0, r1, c4.wwww;
sub r0, r0, c4.zzzz;
// Okay, compute cosine here.
// cos = 1 + r0^2 * kCos.y + r0^4 * kCos.Z + r0^6 * kCos.w
// Note: could pare off an instr by putting 1/kCos.w in kCos.x,
// then doing a mad to get r3=(1/kCos.w + r0^6), then mad that
// into the accum by kCos.w to get (1 + r0^6*kCos.x). But who cares.
mul r1, r0, r0; // r0^2
mul r2, r1, r1; // r0^4
mul r3, r1, r2; // r0^6
mov r4, c5.xxxx; // r4 = 1
mad r4, r1, c5.yyyy, r4; // r4 += r0^2 * kCos.y
mad r4, r2, c5.zzzz, r4; // r4 += r0^4 * kCos.z
mad r4, r3, c5.wwww, r4; // r4 += r0^6 * kCos.w
add r4, r4, c0.zzzz; // shift from [-1..1] to [0..2]
//mov r4, c0.xxxx; // HACKLAST
mul r4, r4, c3; // times amplitude
dp4 r5.y, r4, c0.zzzz; // r5.x = sigma((cos() + 1) * amp);
// V calculation, goes something like:
// For layers 0 and 2:
// V = { 1 + c6.z <= r5.y = 0 } * norm.x // norm.x == v3.x
// { 1 + 0 <= r5.y = 1 }
// For layer 1:
// V = (norm.x + c6.z) * c6.w // Scaled like U
//
// Another way to formulate that is
// baseV = cMin + sinAge * (cMax-cMin) where
// cMin = 2
// cMax = 1
// sinAge = color.a = c7.w
// delV = sigma(cos) = r5.y
// Then
// V0 = V2 = (baseV + delV) * v3.x
// V1 = (norm.x + baseV + delV) * c6.w
//
// If we're sure we want cMin = 2 and cMax = 1, then it simplifies to:
// baseV = 2 - sinAge = c0.w - c7.w
// delV = r5.y
// (baseV + delV) = c0.w - c7.w + r5.y
//
// If we want to stay general, then
// baseV = c6.x * c7.w + c6.y
// delV = -r5.y
// (baseV + delV) = constant + r5.y
//
// make r5.y = (baseV + delV)
add r5.y, c6.xxxx, r5.yyyy;
//mov oD1, r5.yyyy; // HACKLAST
//mov oD1.w, c0.zzzz; // HACKLAST
// U is input U (or v0.x * 0.5f + 0.5f)
mul r5.x, v0.x, c0.y;
add r5.x, r5.x, c0.y;
// Fill out wq.
mov r5.zw, c0.xz;
mul oT0, r5, v3.wxww;
// mov oD1, r5.yyyw; // HACKTEST
mul oT2, r5, v3.wxww;
// Second uv shares u, but v is norm.x + c6.x;
// Then we scale it.
// If we want the bubble texture to move with the
// wave front, we want the second UV calc (RESCALE1).
// But it looks better to have the bubbles moving
// slightly faster than the wave front. RESCALE0
// happens to do that, because we're scaling the
// texture by a factor of 2, but we should probably
// supply an independent scale of the motion vs. the
// scale of the texture.
// Let's move c6 to r6 for ease of use.
mov r6, c6;
// add r5.x, r5.x, c6.y;
// add r5.y, c6.xxxx, v3.xxxx; // RESCALE0
// mul r5.xy, r5, c6.wwww; // RESCALE0
add r5.x, r5.x, r6.y; // RESCALE1 // offset U
mov r5.y, v3.xx; // RESCALE1 // Init V to value stashed in normal.x
mul r5.xy, r5, r6.wwww; // RESCALE1 // scale them by single scale value
mad r5.y, r6.xx, r6.zz, r5.yy; // RESCALE1 // add in our scaled V offset (sinage * vScale)
mov oT1, r5;
//mov oT0, v7; // HACKTEST
//mov oT1, v7; // HACKTEST
//mov oT2, v7; // HACKTEST
// Just slam in the constant color (includes our current opacity).
mov oD0, c7;
//mov oD0, c0.zzzz; // HACKTEST

View File

@ -1,471 +1,471 @@
vs.1.1
dcl_position v0
//m4x4 oPos, v0, c0
/*
In fact, I was trying to understand how it was possible to expand FRC into 4
instructions...
Actually, I can do it in 7 instructions :)
EXPP r0.y, r1.xxxx
MOV r0.x, r0.y
EXPP r0.y, r1.zzzz
MOV r0.z, r0.y
EXPP r0.y, r1.wwww
MOV r0.w, r0.y
EXPP r0.y, r1.yyyy
*/
/*
// Constants for sin and cos. 3 term approximation seems plenty
// (it's what i used for software sim, and had no visibly different
// results than the math library functions).
// When doing sin/cos together, some speedup might be obtained
// with good pairing of ops doing them simultaneously. Also save
// an instruction calculating r0^3.
D3DXVECTOR4 vSin( 1.0f, -1.0f/6.0f, 1.0f/120.0f, -1.0f/5040.0f );
D3DXVECTOR4 vCos( 1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f );
*/
/*
Cos():
r1 = mul(r0, r0); // r0^2
r2 = mul(r1, r1); // r0^4
//cos
r3 = mad( r1, vCos.yyyy, vCos.xxxx );
r3 = mad( r2, vCos.zzzz, r3 );
*/
/*
Sin();
r1 = mul(r0, r0); // r0^3
r1 = mul(r0, r1);
r2 = mul(r1, r1); // r0^6
r3 = mad( r1, vSin.yyyy, r0 );
r3 = mad( r2, vSin.zzzz, r3 );
*/
/*
SinCos():
r1 = mul(r0, r0); // r0^2
r2 = mul(r1, r0); // r0^3 // probably stall
r3 = mul(r1, r1); // r0^4
r4 = mul(r2, r2); // r0^6
r5 = mad( r1, vCos.yyyy, vCos.xxxx );
r6 = mad( r2, vSin.yyyy, r0 );
r5 = mad( r3, vCos.zzzz, r5 );
r6 = mad( r4, vSin.zzzz, r6 );
*/
/*
consts
kOneOverEightNsqPi = 1.f / ( 8.f * Pi * 4.f * 4.f );
kPiOverTwo = Pi / 2.f;
kTwoPi = Pi * 2.f;
kPi = Pi;
*/
/*
CONSTANT REGISTERS
VOLATILE CONSTS - change per invocation
C0-C3 local2proj matrix
C4 color
C5 freq vector
C6 phase vector
C7 amplitude vector
C8 center0
C9 center1
C10 center2
C11 center3
C12 scrunch = (scrunch, -scrunch, 0, 1);
CONSTANT CONSTS - forever more
C13 SinConsts = (1.0f, -1.0f/6.0f, 1.0f/120.0f, -1.0f/5040.0f);
C14 CosConsts = (1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f);
C15 PiConsts = (1.f / 8*Pi*N^2, Pi/2, Pi, 2*Pi);
C16 numberConsts = (0.f, 0.5f, 1.f, 2.f);
//=====================================
TEMP REGISTERS
r6 accumPos
r7 accumCos
r8 toCenter_Y
r9 toCenter_X
r11 filter
r10 tempFloat
*/
// const float4 kCosConsts = float4(1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f);
// const float4 kSinConsts = float4(1.0f, -1.0f/6.0f, 1.0f/120.0f, -1.0f/5040.0f);
// const float4 kPiConsts = float4(1.f / (8.f * 3.1415f * 16f), 3.1415f*0.5f, 3.1415f, 3.1515f*2.f);
// const float4 k0512 = float4(0.f, 0.5f, 1.f, 2.f);
// accumPos = inPos;
mov r6, v0;
//
// For each wave
// {
// // First, we want to filter out waves based on distance from the local origin
// dist = dp3(inPos, inPos);
dp3 r0, r6, r6;
// dist *= kFreqSq.xyzw;
mul r0, r0, c5;
mul r0, r0, c5;
// dist *= kOneOverEightNsqPi; // combine this into kFreqSq?
mul r0, r0, c15.xxxx;
// dist = min(dist, kPiOverTwo);
min r0, r0, c15.yyyy;
// filter = cos(dist);
mul r1, r0, r0; // r0^2
mul r2, r1, r1; // r1^2
mul r1, r1, c14.yyyy;
add r11, r1, c14.xxxx;
mad r11, r2, c14.zzzz, r11;
// filter *= kAmplitude.xyzw;
// mul r11, r11, c7;
// // Notice that if dist is a 4vec, all this can be simultaneously done for 4 waves at a time.
//
// Find the x/y distances and stuff them into r9(x) and r8(y) respectively
// toCenter_X.x = dir0.x * pos.x;
// toCenter_Y.x = dir0.y * pos.y;
mul r0, c8, r6.xxxx;
mad r0, c9, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c5;
add r0, r0, c6;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c15.wwww;
add r0, r0, c15.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c15.wwww;
// dist += -kPi;
sub r0, r0, c15.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c13.wwww, r2;
mad r1, r4, c14.wwww, r1;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c7;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r6.z, r2, c16.zzzz;
//
// cosDist *= kFreq.xyzw;
mul r1, r1, c5;
// cosDist *= kAmplitude.xyzw; // Combine?
mul r1, r1, c7;
// cosDist *= filter;
mul r1, r1, r11;
//
// accumCos = (0, 0, 0, 0);
mov r7, c16.xxxx;
// temp = dp4( cosDist, toCenter_X );
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
dp4 r7.x, r1, -c8
//
// temp = dp4( cosDist, toCenter_Y );
// accumCos.y += temp.xxxx;
dp4 r7.y, r1, -c9
//
// }
//
// accumBin = (1, 0, -accumCos.x);
// accumTan = (0, 1, -accumCos.y);
// accumNorm = (accumCos.x, accumCos.y, 1);
mov r11, c16.xxzx;
add r11, r11, r7;
dp3 r10.x, r11, r11;
rsq r10.x, r10.x;
mul r11, r11, r10.xxxx;
//
// // Scrunch in based on computed (normalized) normal
// temp = mul( accumNorm, kNegScrunchScale ); // kNegScrunchScale = (-scrunchScale, -scrunchScale, 0, 0);
// accumPos += temp;
dp3 r10.x, r11, c18.zxw; // winddir.x, winddir.y, 0, 0
// r10.x tells us whether our normal is opposed to the wind.
// If opposed, r10.x = 0, else r10.x = 1.f;
// We'll use this to kill the Scrunch on the back sides of waves.
// We use it for position right here, and then again for the
// normal just down a bit further.
slt r10.x, r10.x, c16.x;
mul r9, r10.xxxx, r11;
mad r6, r9, c12.yyzz, r6;
// mul r6.z, r6.z, r10.xxxx; DEBUG
// mad r6, r11, c12.yyzz, r6;
// accumNorm = mul (accumNorm, kScrunchScale ); // kScrunchScale = (scrunchScale, scrunchScale, 1, 1);
// accumCos *= (scrunchScale, scrunchScale, 0, 0);
mul r2.x, r6.z, c12.x;
mul r2.x, r2.x, r10.x; // ???
add r2.x, r2.x, c16.z;
// mul r7, r7, c12.xxzz;
mul r7.xy, r7.xy, r2.xx;
// This is actually wrong, but useful right now for visualizing the generated coords.
// See below for correct version.
sub r3, c16.xxzx, r7.xyzz;
// Normalize?
// We can either calculate an orthonormal basis from the
// computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0),
// or compute our basis directly from the partial derivatives, with
// Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1)
//
// These work out to identically the same result, so we'll compute directly
// from the partials because it takes 2 fewer instructions.
//
// Note that our basis is NOT orthonormal. The Normal is equal to
// Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents
// are both correct tangents to the surface, and their projections on the XY plane
// are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications?
// Not really. I'm actually not really sure which is more "proper" for bump mapping.
//
// Note also that we add when we should subtract and subtract when we should
// add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows
// of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for
// explanation.
//
// Binormal = Y % Normal
// Cross product3 is:
// mul res.xyz, a.yzx, b.zxy
// mad res.xyz, -a.zxy, b.yzx, res.xyz
// mul r1.xyz, c16.zxx, r3.zxy;
// mad r1.xyz, -c16.xxz, r3.yzx, r1.xyz;
// Tangent = Normal % X
// mul r2.xyz, r3.yzx, c16.xzx;
// mad r2.xyz, -r3.zxy, c16.xxz, r2;
add r1, c16.zxxx, r7.zzxz;
add r2, c16.xzxx, r7.zzyz;
// Note that we're swapping z and y to match our environment map tools in max.
// We do this through our normal map transform (oT1, oT2, oT3), making it
// a concatenation of:
//
// rotate about Z (blue) to turn our map into the wind
// windRot = | dirY -dirX 0 |
// | dirX dirY 0 |
// | 0 0 1 |
//
// swap our Y and Z axes to match our environment map
// swapYZ = | 1 0 0 |
// | 0 0 1 |
// | 0 1 0 |
//
// rotate the normal into the surface's tangent space basis
// basis = | Bx Tx Nx |
// | By Ty Ny |
// | Bz Tz Nz |
//
// Note that we've constucted the basis by taking advantage of the
// matrix being a pure rotation, as noted below, so r1, r2 and r3
// are actually constructed as:
// basis = | Bx -By -Bz |
// | -Tx Ty -Tz |
// | -Nx -Ny -Nz |
//
// Then the final normal map transform is:
//
// basis * swapYZ * windRot [ * normal ]
// sub r1.w, c17.x, r6.x;
// sub r2.w, c17.z, r6.z;
// sub r3.w, c17.y, r6.y;
// Big note here. All this math can blow up if the camera position
// is outside the environment sphere. It's assumed that's dealt
// with in the app setting up the constants. For that reason, the
// camera position used here might not be the real local camera position,
// which is needed for the angular attenuation, so we burn another constant
// with our pseudo-camera position. To restrain the pseudo-camera from
// leaving the sphere, we make:
// pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R)
// where dist = |realPos - envCenter|
// So, our "finitized" eyeray is:
// camPos + D * t - envCenter = D * t - (envCenter - camPos)
// with
// D = (pos - camPos) / |pos - camPos| // normalized usual eyeray
// and
// t = D dot F + sqrt( (D dot F)^2 - G )
// with
// F = (envCenter - camPos) => c19.xyz
// G = F^2 - R^2 => c19.w
// R = environment radius. => unused
//
// This all derives from the positive root of equation
// (camPos + (pos - camPos) * t - envCenter)^2 = R^2,
// In other words, where on a sphere of radius R centered about envCenter
// does the ray from the real camera position through this point hit.
//
// Note that F, G, and R are all constants (one point, two scalars).
//
// So first we calculate D into r0,
// then D dot F into r10.x,
// then (D dot F)^2 - G into r10.y
// then rsq( (D dot F)^2 - G ) into r9.x;
// then t = r10.z = r10.x + r10.y * r9.x;
// and
// r0 = D * t - (envCenter - camPos)
// = r0 * r10.zzzz - F;
//
sub r0, r6, c17;
dp3 r10.x, r0, r0;
rsq r10.x, r10.x;
mul r0, r0, r10.xxxx;
dp3 r10.x, r0, c19;
mad r10.y, r10.x, r10.x, -c19.w;
rsq r9.x, r10.y;
mad r10.z, r10.y, r9.x, r10.x;
mad r0.xyz, r0, r10.zzz, -c19.xyz;
mov r1.w, -r0.x;
mov r2.w, -r0.y;
mov r3.w, -r0.z;
// Now rotate our basis vectors into the wind
dp3 r0.x, r1, c18.xyww;
dp3 r0.y, r1, c18.zxww;
mov r1.xy, r0;
dp3 r0.x, r2, c18.xyww;
dp3 r0.y, r2, c18.zxww;
mov r2.xy, r0;
dp3 r0.x, r3, c18.xyww;
dp3 r0.y, r3, c18.zxww;
mov r3.xy, r0;
mov r0.w, c16.zzzz;
dp3 r0.x, r1, r1;
rsq r0.x, r0.x;
mul oT1, r1.xyzw, r0.xxxw;
// mul r8, r1.xyzw, r0.xxxw; // VISUAL
dp3 r0.x, r2, r2;
rsq r0.x, r0.x;
mul oT3, r2.xyzw, r0.xxxw;
// mul r9, r2.xyzw, r0.xxxw; // VISUAL
dp3 r0.x, r3, r3;
rsq r0.x, r0.x;
mul oT2, r3.xyzw, r0.xxxw;
// mul r9, r3.xyzw, r0.xxxw; // VISUAL
// mul r3, r3.xzyw, r0.xxxw;
// mul r3.xy, r3, -c16.zzzz;
/*
// Want:
// oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x)
// oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y)
// ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z)
// with BIN, TAN, and NORM normalized.
// Unnormalized, we have
// BIN = (1, 0, -r7.x) where r7 == accumCos
// TAN = (0, 1, -r7.y)
// NORM= (r7.x, r7.y, 1)
// So, unnormalized, we have
// oT1 = (1, 0, r7.x, view2pos.x)
// oT2 = (0, 1, r7.y, view2pos.y)
// oT3 = (-r7.x, -r7.y, 1, view2pos.z)
// which is just reversing the signs on the accumCos
// terms above. So the normalized version is just
// reversing the signs on the normalized version above.
*/
//mov oT3, r4;
//
// // Transform position to screen
//
//
m4x4 oPos, r6, c0;
// Still need to attenuate based on position
mov oD0, c4;
// This should be in local space after xforming v0
dp4 r0.x, v0, c10;
dp4 r0.y, v0, c11;
mov r0.zw, c16.xxxz;
mov oT0, r0
// mov oT0, v7;
// Questionble attenuation follows
// Find vector from this point to camera and normalize
sub r0, c17, r6;
dp3 r1.x, r0, r0;
rsq r1.x, r1.x;
mul r0, r0, r1.xxxx;
// Dot that with the computed normal
dp3 r1.x, r0, r11;
// dp3 r1.x, r0, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3
// Map dot=1 => 0, dot=0 => 1
sub r1.xyzw, c16.zzzz, r1.xxxx;
add r1.w, r1.wwww, c16.zzzz;
mul r1.w, r1.wwww, c16.yyyy;
// No need to clamp, since the destination register (in the pixel shader)
// will saturate [0..1] anyway.
mul oD1, r1, c20;
// mov oD1, r9;
// mov oD1, r8.xzyw;
vs.1.1
dcl_position v0
//m4x4 oPos, v0, c0
/*
In fact, I was trying to understand how it was possible to expand FRC into 4
instructions...
Actually, I can do it in 7 instructions :)
EXPP r0.y, r1.xxxx
MOV r0.x, r0.y
EXPP r0.y, r1.zzzz
MOV r0.z, r0.y
EXPP r0.y, r1.wwww
MOV r0.w, r0.y
EXPP r0.y, r1.yyyy
*/
/*
// Constants for sin and cos. 3 term approximation seems plenty
// (it's what i used for software sim, and had no visibly different
// results than the math library functions).
// When doing sin/cos together, some speedup might be obtained
// with good pairing of ops doing them simultaneously. Also save
// an instruction calculating r0^3.
D3DXVECTOR4 vSin( 1.0f, -1.0f/6.0f, 1.0f/120.0f, -1.0f/5040.0f );
D3DXVECTOR4 vCos( 1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f );
*/
/*
Cos():
r1 = mul(r0, r0); // r0^2
r2 = mul(r1, r1); // r0^4
//cos
r3 = mad( r1, vCos.yyyy, vCos.xxxx );
r3 = mad( r2, vCos.zzzz, r3 );
*/
/*
Sin();
r1 = mul(r0, r0); // r0^3
r1 = mul(r0, r1);
r2 = mul(r1, r1); // r0^6
r3 = mad( r1, vSin.yyyy, r0 );
r3 = mad( r2, vSin.zzzz, r3 );
*/
/*
SinCos():
r1 = mul(r0, r0); // r0^2
r2 = mul(r1, r0); // r0^3 // probably stall
r3 = mul(r1, r1); // r0^4
r4 = mul(r2, r2); // r0^6
r5 = mad( r1, vCos.yyyy, vCos.xxxx );
r6 = mad( r2, vSin.yyyy, r0 );
r5 = mad( r3, vCos.zzzz, r5 );
r6 = mad( r4, vSin.zzzz, r6 );
*/
/*
consts
kOneOverEightNsqPi = 1.f / ( 8.f * Pi * 4.f * 4.f );
kPiOverTwo = Pi / 2.f;
kTwoPi = Pi * 2.f;
kPi = Pi;
*/
/*
CONSTANT REGISTERS
VOLATILE CONSTS - change per invocation
C0-C3 local2proj matrix
C4 color
C5 freq vector
C6 phase vector
C7 amplitude vector
C8 center0
C9 center1
C10 center2
C11 center3
C12 scrunch = (scrunch, -scrunch, 0, 1);
CONSTANT CONSTS - forever more
C13 SinConsts = (1.0f, -1.0f/6.0f, 1.0f/120.0f, -1.0f/5040.0f);
C14 CosConsts = (1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f);
C15 PiConsts = (1.f / 8*Pi*N^2, Pi/2, Pi, 2*Pi);
C16 numberConsts = (0.f, 0.5f, 1.f, 2.f);
//=====================================
TEMP REGISTERS
r6 accumPos
r7 accumCos
r8 toCenter_Y
r9 toCenter_X
r11 filter
r10 tempFloat
*/
// const float4 kCosConsts = float4(1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f);
// const float4 kSinConsts = float4(1.0f, -1.0f/6.0f, 1.0f/120.0f, -1.0f/5040.0f);
// const float4 kPiConsts = float4(1.f / (8.f * 3.1415f * 16f), 3.1415f*0.5f, 3.1415f, 3.1515f*2.f);
// const float4 k0512 = float4(0.f, 0.5f, 1.f, 2.f);
// accumPos = inPos;
mov r6, v0;
//
// For each wave
// {
// // First, we want to filter out waves based on distance from the local origin
// dist = dp3(inPos, inPos);
dp3 r0, r6, r6;
// dist *= kFreqSq.xyzw;
mul r0, r0, c5;
mul r0, r0, c5;
// dist *= kOneOverEightNsqPi; // combine this into kFreqSq?
mul r0, r0, c15.xxxx;
// dist = min(dist, kPiOverTwo);
min r0, r0, c15.yyyy;
// filter = cos(dist);
mul r1, r0, r0; // r0^2
mul r2, r1, r1; // r1^2
mul r1, r1, c14.yyyy;
add r11, r1, c14.xxxx;
mad r11, r2, c14.zzzz, r11;
// filter *= kAmplitude.xyzw;
// mul r11, r11, c7;
// // Notice that if dist is a 4vec, all this can be simultaneously done for 4 waves at a time.
//
// Find the x/y distances and stuff them into r9(x) and r8(y) respectively
// toCenter_X.x = dir0.x * pos.x;
// toCenter_Y.x = dir0.y * pos.y;
mul r0, c8, r6.xxxx;
mad r0, c9, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c5;
add r0, r0, c6;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c15.wwww;
add r0, r0, c15.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c15.wwww;
// dist += -kPi;
sub r0, r0, c15.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c13.wwww, r2;
mad r1, r4, c14.wwww, r1;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c7;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r6.z, r2, c16.zzzz;
//
// cosDist *= kFreq.xyzw;
mul r1, r1, c5;
// cosDist *= kAmplitude.xyzw; // Combine?
mul r1, r1, c7;
// cosDist *= filter;
mul r1, r1, r11;
//
// accumCos = (0, 0, 0, 0);
mov r7, c16.xxxx;
// temp = dp4( cosDist, toCenter_X );
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
dp4 r7.x, r1, -c8
//
// temp = dp4( cosDist, toCenter_Y );
// accumCos.y += temp.xxxx;
dp4 r7.y, r1, -c9
//
// }
//
// accumBin = (1, 0, -accumCos.x);
// accumTan = (0, 1, -accumCos.y);
// accumNorm = (accumCos.x, accumCos.y, 1);
mov r11, c16.xxzx;
add r11, r11, r7;
dp3 r10.x, r11, r11;
rsq r10.x, r10.x;
mul r11, r11, r10.xxxx;
//
// // Scrunch in based on computed (normalized) normal
// temp = mul( accumNorm, kNegScrunchScale ); // kNegScrunchScale = (-scrunchScale, -scrunchScale, 0, 0);
// accumPos += temp;
dp3 r10.x, r11, c18.zxw; // winddir.x, winddir.y, 0, 0
// r10.x tells us whether our normal is opposed to the wind.
// If opposed, r10.x = 0, else r10.x = 1.f;
// We'll use this to kill the Scrunch on the back sides of waves.
// We use it for position right here, and then again for the
// normal just down a bit further.
slt r10.x, r10.x, c16.x;
mul r9, r10.xxxx, r11;
mad r6, r9, c12.yyzz, r6;
// mul r6.z, r6.z, r10.xxxx; DEBUG
// mad r6, r11, c12.yyzz, r6;
// accumNorm = mul (accumNorm, kScrunchScale ); // kScrunchScale = (scrunchScale, scrunchScale, 1, 1);
// accumCos *= (scrunchScale, scrunchScale, 0, 0);
mul r2.x, r6.z, c12.x;
mul r2.x, r2.x, r10.x; // ???
add r2.x, r2.x, c16.z;
// mul r7, r7, c12.xxzz;
mul r7.xy, r7.xy, r2.xx;
// This is actually wrong, but useful right now for visualizing the generated coords.
// See below for correct version.
sub r3, c16.xxzx, r7.xyzz;
// Normalize?
// We can either calculate an orthonormal basis from the
// computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0),
// or compute our basis directly from the partial derivatives, with
// Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1)
//
// These work out to identically the same result, so we'll compute directly
// from the partials because it takes 2 fewer instructions.
//
// Note that our basis is NOT orthonormal. The Normal is equal to
// Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents
// are both correct tangents to the surface, and their projections on the XY plane
// are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications?
// Not really. I'm actually not really sure which is more "proper" for bump mapping.
//
// Note also that we add when we should subtract and subtract when we should
// add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows
// of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for
// explanation.
//
// Binormal = Y % Normal
// Cross product3 is:
// mul res.xyz, a.yzx, b.zxy
// mad res.xyz, -a.zxy, b.yzx, res.xyz
// mul r1.xyz, c16.zxx, r3.zxy;
// mad r1.xyz, -c16.xxz, r3.yzx, r1.xyz;
// Tangent = Normal % X
// mul r2.xyz, r3.yzx, c16.xzx;
// mad r2.xyz, -r3.zxy, c16.xxz, r2;
add r1, c16.zxxx, r7.zzxz;
add r2, c16.xzxx, r7.zzyz;
// Note that we're swapping z and y to match our environment map tools in max.
// We do this through our normal map transform (oT1, oT2, oT3), making it
// a concatenation of:
//
// rotate about Z (blue) to turn our map into the wind
// windRot = | dirY -dirX 0 |
// | dirX dirY 0 |
// | 0 0 1 |
//
// swap our Y and Z axes to match our environment map
// swapYZ = | 1 0 0 |
// | 0 0 1 |
// | 0 1 0 |
//
// rotate the normal into the surface's tangent space basis
// basis = | Bx Tx Nx |
// | By Ty Ny |
// | Bz Tz Nz |
//
// Note that we've constucted the basis by taking advantage of the
// matrix being a pure rotation, as noted below, so r1, r2 and r3
// are actually constructed as:
// basis = | Bx -By -Bz |
// | -Tx Ty -Tz |
// | -Nx -Ny -Nz |
//
// Then the final normal map transform is:
//
// basis * swapYZ * windRot [ * normal ]
// sub r1.w, c17.x, r6.x;
// sub r2.w, c17.z, r6.z;
// sub r3.w, c17.y, r6.y;
// Big note here. All this math can blow up if the camera position
// is outside the environment sphere. It's assumed that's dealt
// with in the app setting up the constants. For that reason, the
// camera position used here might not be the real local camera position,
// which is needed for the angular attenuation, so we burn another constant
// with our pseudo-camera position. To restrain the pseudo-camera from
// leaving the sphere, we make:
// pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R)
// where dist = |realPos - envCenter|
// So, our "finitized" eyeray is:
// camPos + D * t - envCenter = D * t - (envCenter - camPos)
// with
// D = (pos - camPos) / |pos - camPos| // normalized usual eyeray
// and
// t = D dot F + sqrt( (D dot F)^2 - G )
// with
// F = (envCenter - camPos) => c19.xyz
// G = F^2 - R^2 => c19.w
// R = environment radius. => unused
//
// This all derives from the positive root of equation
// (camPos + (pos - camPos) * t - envCenter)^2 = R^2,
// In other words, where on a sphere of radius R centered about envCenter
// does the ray from the real camera position through this point hit.
//
// Note that F, G, and R are all constants (one point, two scalars).
//
// So first we calculate D into r0,
// then D dot F into r10.x,
// then (D dot F)^2 - G into r10.y
// then rsq( (D dot F)^2 - G ) into r9.x;
// then t = r10.z = r10.x + r10.y * r9.x;
// and
// r0 = D * t - (envCenter - camPos)
// = r0 * r10.zzzz - F;
//
sub r0, r6, c17;
dp3 r10.x, r0, r0;
rsq r10.x, r10.x;
mul r0, r0, r10.xxxx;
dp3 r10.x, r0, c19;
mad r10.y, r10.x, r10.x, -c19.w;
rsq r9.x, r10.y;
mad r10.z, r10.y, r9.x, r10.x;
mad r0.xyz, r0, r10.zzz, -c19.xyz;
mov r1.w, -r0.x;
mov r2.w, -r0.y;
mov r3.w, -r0.z;
// Now rotate our basis vectors into the wind
dp3 r0.x, r1, c18.xyww;
dp3 r0.y, r1, c18.zxww;
mov r1.xy, r0;
dp3 r0.x, r2, c18.xyww;
dp3 r0.y, r2, c18.zxww;
mov r2.xy, r0;
dp3 r0.x, r3, c18.xyww;
dp3 r0.y, r3, c18.zxww;
mov r3.xy, r0;
mov r0.w, c16.zzzz;
dp3 r0.x, r1, r1;
rsq r0.x, r0.x;
mul oT1, r1.xyzw, r0.xxxw;
// mul r8, r1.xyzw, r0.xxxw; // VISUAL
dp3 r0.x, r2, r2;
rsq r0.x, r0.x;
mul oT3, r2.xyzw, r0.xxxw;
// mul r9, r2.xyzw, r0.xxxw; // VISUAL
dp3 r0.x, r3, r3;
rsq r0.x, r0.x;
mul oT2, r3.xyzw, r0.xxxw;
// mul r9, r3.xyzw, r0.xxxw; // VISUAL
// mul r3, r3.xzyw, r0.xxxw;
// mul r3.xy, r3, -c16.zzzz;
/*
// Want:
// oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x)
// oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y)
// ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z)
// with BIN, TAN, and NORM normalized.
// Unnormalized, we have
// BIN = (1, 0, -r7.x) where r7 == accumCos
// TAN = (0, 1, -r7.y)
// NORM= (r7.x, r7.y, 1)
// So, unnormalized, we have
// oT1 = (1, 0, r7.x, view2pos.x)
// oT2 = (0, 1, r7.y, view2pos.y)
// oT3 = (-r7.x, -r7.y, 1, view2pos.z)
// which is just reversing the signs on the accumCos
// terms above. So the normalized version is just
// reversing the signs on the normalized version above.
*/
//mov oT3, r4;
//
// // Transform position to screen
//
//
m4x4 oPos, r6, c0;
// Still need to attenuate based on position
mov oD0, c4;
// This should be in local space after xforming v0
dp4 r0.x, v0, c10;
dp4 r0.y, v0, c11;
mov r0.zw, c16.xxxz;
mov oT0, r0
// mov oT0, v7;
// Questionble attenuation follows
// Find vector from this point to camera and normalize
sub r0, c17, r6;
dp3 r1.x, r0, r0;
rsq r1.x, r1.x;
mul r0, r0, r1.xxxx;
// Dot that with the computed normal
dp3 r1.x, r0, r11;
// dp3 r1.x, r0, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3
// Map dot=1 => 0, dot=0 => 1
sub r1.xyzw, c16.zzzz, r1.xxxx;
add r1.w, r1.wwww, c16.zzzz;
mul r1.w, r1.wwww, c16.yyyy;
// No need to clamp, since the destination register (in the pixel shader)
// will saturate [0..1] anyway.
mul oD1, r1, c20;
// mov oD1, r9;
// mov oD1, r8.xzyw;

View File

@ -1,243 +1,243 @@
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
// Store our input position in world space in r6
m4x3 r6, v0, c25; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c16.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = reflection strength (transparency)
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c30 = waterlevel + offset
// c31 = (maxAtten - minAtten) / depthFalloff
// c32 = minAtten.
// And in particular:
// c30.w = waterlevel
// c31.w = 1.f;
// c32.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c8, r6.xxxx;
mad r0, c9, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c5;
add r0, r0, c6;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c15.wwww;
add r0, r0, c15.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c15.wwww;
// dist += -kPi;
sub r0, r0, c15.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c13.wwww, r2;
mad r1, r4, c14.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c30, r6.zzzz;
mul r4, r4, c31;
add r4, r4, c32;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c16.zzzz;
max r4.xyz, r4, c16.xxxx;
//mov r4.xyz, c16.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c29;
max r11, r11, c16.xxxx;
min r11, r11, c16.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c7;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c16.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c30.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= kFreq.xyzw;
mul r1, r1, c5;
// cosDist *= kAmplitude.xyzw; // Combine?
mul r1, r1, c7;
// cosDist *= filter;
mul r1, r1, r11;
//
// accumCos = (0, 0, 0, 0);
mov r7, c16.xxxx;
// temp = dp4( cosDist, toCenter_X );
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
dp4 r7.x, r1, -c8
//
// temp = dp4( cosDist, toCenter_Y );
// accumCos.y += temp.xxxx;
dp4 r7.y, r1, -c9
//
// }
//
// accumBin = (1, 0, -accumCos.x);
// accumTan = (0, 1, -accumCos.y);
// accumNorm = (accumCos.x, accumCos.y, 1);
mov r11, c16.xxzx;
add r11, r11, r7;
dp3 r10.x, r11, r11;
rsq r10.x, r10.x;
mul r11, r11, r10.xxxx;
//
// Add in our scrunch (offset in X/Y plane).
// Scale down our scrunch amount by the wave scaling
mul r10.x, c12.y, r4.z;
mad r6.xy, r11.xy, r10.xx, r6.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c35.z, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c35.z;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c25; // HACKAGE
//mov r6.w, c16.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c4.x;
mul oFog, r10.x, c4.y;
mov oPos, r9;
// Dyna Stuff
// Constants
// c33 = fC1U, fC2U, fC1V, fC2V
// c34 = fInitAtten, t, life, 1.f / (life-decay)
// c35 = ramp, 1.f / ramp, BIAS (positive is up), FREE
//
// Vertex Info
// v7.z = fBirth (because we don't use it for anything else).
//
// Initialize r1.zw to 0,1
mov r1, c16.xxxz;
// Calc r1.x = age, r1.y = atten
// age = t - birth.
sub r1.x, c34.y, v7.z;
// atten = clamp0_1(age / ramp) * clamp0_1((life-age) / (life-decay));
// first clamp0_1(age/ramp)
mul r1.y, r1.x, c35.y;
min r1.y, r1.y, c16.z; // Clamp to one (can't go negative).
// now clamp0_1((life-age) / (life-decay));
sub r1.z, c34.z, r1.x;
mul r1.z, r1.z, c34.w;
min r1.z, r1.z, c16.z; // Clamp to one
max r1.z, r1.z, c16.x; // Clamp to zero
mul r1.y, r1.y, r1.z; // atten is the product of the two terms.
// color is (atten, atten, atten, 1.f)
// Need to calculate opacity we would have had from vs_WaveFixedFin6.inl
// Right now that's just modulating by r4.y.
mul r0.y, r4.y, c34.x;
mul oD0, r0.yyyy, r1.yyyw;
//mov oD0, c16.zzzz; // HACKTEST
// UVW = (inUVW - 0.5) * scale + 0.5
// where:
// scale = (fC1U / (age * fC2U + 1.f)), fC1V / (age * fC2U + 1.f), 1.f, 1.f
mov r2, c16.xxxz;
mul r2.xy, r1.xx, c33.yw;
add r2.xy, r2.xy, c16.zz;
rcp r2.x, r2.x;
rcp r2.y, r2.y;
mul r2.xy, r2.xy, c33.xz;
sub r1.xy, v7.xy, c16.yy;
mul r1.xy, r1.xy, r2.xy;
add r1.xy, r1.xy, c16.yy;
mov oT0, r1;
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
// Store our input position in world space in r6
m4x3 r6, v0, c25; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c16.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = reflection strength (transparency)
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c30 = waterlevel + offset
// c31 = (maxAtten - minAtten) / depthFalloff
// c32 = minAtten.
// And in particular:
// c30.w = waterlevel
// c31.w = 1.f;
// c32.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c8, r6.xxxx;
mad r0, c9, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c5;
add r0, r0, c6;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c15.wwww;
add r0, r0, c15.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c15.wwww;
// dist += -kPi;
sub r0, r0, c15.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c13.wwww, r2;
mad r1, r4, c14.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c30, r6.zzzz;
mul r4, r4, c31;
add r4, r4, c32;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c16.zzzz;
max r4.xyz, r4, c16.xxxx;
//mov r4.xyz, c16.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c29;
max r11, r11, c16.xxxx;
min r11, r11, c16.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c7;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c16.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c30.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= kFreq.xyzw;
mul r1, r1, c5;
// cosDist *= kAmplitude.xyzw; // Combine?
mul r1, r1, c7;
// cosDist *= filter;
mul r1, r1, r11;
//
// accumCos = (0, 0, 0, 0);
mov r7, c16.xxxx;
// temp = dp4( cosDist, toCenter_X );
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
dp4 r7.x, r1, -c8
//
// temp = dp4( cosDist, toCenter_Y );
// accumCos.y += temp.xxxx;
dp4 r7.y, r1, -c9
//
// }
//
// accumBin = (1, 0, -accumCos.x);
// accumTan = (0, 1, -accumCos.y);
// accumNorm = (accumCos.x, accumCos.y, 1);
mov r11, c16.xxzx;
add r11, r11, r7;
dp3 r10.x, r11, r11;
rsq r10.x, r10.x;
mul r11, r11, r10.xxxx;
//
// Add in our scrunch (offset in X/Y plane).
// Scale down our scrunch amount by the wave scaling
mul r10.x, c12.y, r4.z;
mad r6.xy, r11.xy, r10.xx, r6.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c35.z, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c35.z;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c25; // HACKAGE
//mov r6.w, c16.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c4.x;
mul oFog, r10.x, c4.y;
mov oPos, r9;
// Dyna Stuff
// Constants
// c33 = fC1U, fC2U, fC1V, fC2V
// c34 = fInitAtten, t, life, 1.f / (life-decay)
// c35 = ramp, 1.f / ramp, BIAS (positive is up), FREE
//
// Vertex Info
// v7.z = fBirth (because we don't use it for anything else).
//
// Initialize r1.zw to 0,1
mov r1, c16.xxxz;
// Calc r1.x = age, r1.y = atten
// age = t - birth.
sub r1.x, c34.y, v7.z;
// atten = clamp0_1(age / ramp) * clamp0_1((life-age) / (life-decay));
// first clamp0_1(age/ramp)
mul r1.y, r1.x, c35.y;
min r1.y, r1.y, c16.z; // Clamp to one (can't go negative).
// now clamp0_1((life-age) / (life-decay));
sub r1.z, c34.z, r1.x;
mul r1.z, r1.z, c34.w;
min r1.z, r1.z, c16.z; // Clamp to one
max r1.z, r1.z, c16.x; // Clamp to zero
mul r1.y, r1.y, r1.z; // atten is the product of the two terms.
// color is (atten, atten, atten, 1.f)
// Need to calculate opacity we would have had from vs_WaveFixedFin6.inl
// Right now that's just modulating by r4.y.
mul r0.y, r4.y, c34.x;
mul oD0, r0.yyyy, r1.yyyw;
//mov oD0, c16.zzzz; // HACKTEST
// UVW = (inUVW - 0.5) * scale + 0.5
// where:
// scale = (fC1U / (age * fC2U + 1.f)), fC1V / (age * fC2U + 1.f), 1.f, 1.f
mov r2, c16.xxxz;
mul r2.xy, r1.xx, c33.yw;
add r2.xy, r2.xy, c16.zz;
rcp r2.x, r2.x;
rcp r2.y, r2.y;
mul r2.xy, r2.xy, c33.xz;
sub r1.xy, v7.xy, c16.yy;
mul r1.xy, r1.xy, r2.xy;
add r1.xy, r1.xy, c16.yy;
mov oT0, r1;

View File

@ -1,226 +1,226 @@
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
// Store our input position in world space in r6
m4x3 r6, v0, c25; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c16.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = reflection strength (transparency)
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c30 = waterlevel + offset
// c31 = (maxAtten - minAtten) / depthFalloff
// c32 = minAtten.
// And in particular:
// c30.w = waterlevel
// c31.w = 1.f;
// c32.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c8, r6.xxxx;
mad r0, c9, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c5;
add r0, r0, c6;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c15.wwww;
add r0, r0, c15.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c15.wwww;
// dist += -kPi;
sub r0, r0, c15.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c13.wwww, r2;
mad r1, r4, c14.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c30, r6.zzzz;
mul r4, r4, c31;
add r4, r4, c32;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c16.zzzz;
max r4.xyz, r4, c16.xxxx;
//mov r4.xyz, c16.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c29;
max r11, r11, c16.xxxx;
min r11, r11, c16.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c7;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c16.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c30.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= filter;
mul r1, r1, r11;
// Pos = (in.x + S, in.y + R, r6.z)
// S = sum(k Dir.x A cos())
// R = sum(k Dir.y A cos())
// c10 = k Dir.x A
// c11 = k Dir.y A
// S = sum(cosDist * c10);
dp4 r7.x, r1, c10;
// R = sum(cosDist * c11);
dp4 r7.y, r1, c11;
add r6.xy, r6.xy, r7.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c35.z, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c35.z;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c25; // HACKAGE
//mov r6.w, c16.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c4.x;
mul oFog, r10.x, c4.y;
mov oPos, r9;
// Dyna Stuff
// Constants
// c33 = fC1U, fC2U, fC1V, fC2V
// c34 = fInitAtten, t, life, 1.f / (life-decay)
// c35 = ramp, 1.f / ramp, BIAS (positive is up), FREE
//
// Vertex Info
// v7.z = fBirth (because we don't use it for anything else).
//
// Initialize r1.zw to 0,1
mov r1, c16.xxxz;
// Calc r1.x = age, r1.y = atten
// age = t - birth.
sub r1.x, c34.y, v7.z;
// atten = clamp0_1(age / ramp) * clamp0_1((life-age) / (life-decay));
// first clamp0_1(age/ramp)
mul r1.y, r1.x, c35.y;
min r1.y, r1.y, c16.z; // Clamp to one (can't go negative).
// now clamp0_1((life-age) / (life-decay));
sub r1.z, c34.z, r1.x;
mul r1.z, r1.z, c34.w;
min r1.z, r1.z, c16.z; // Clamp to one
max r1.z, r1.z, c16.x; // Clamp to zero
mul r1.y, r1.y, r1.z; // atten is the product of the two terms.
// color is (atten, atten, atten, 1.f)
// Need to calculate opacity we would have had from vs_WaveFixedFin7.inl
// Right now that's just modulating by r4.y.
mul r0.y, r4.y, c34.x;
mul oD0, r0.yyyy, r1.yyyw;
//mov oD0, c16.zzzz; // HACKTEST
// UVW = (inUVW - 0.5) * scale + 0.5
// where:
// scale = (fC1U / (age * fC2U + 1.f)), fC1V / (age * fC2U + 1.f), 1.f, 1.f
mov r2, c16.xxxz;
mul r2.xy, r1.xx, c33.yw;
add r2.xy, r2.xy, c16.zz;
rcp r2.x, r2.x;
rcp r2.y, r2.y;
mul r2.xy, r2.xy, c33.xz;
sub r1.xy, v7.xy, c16.yy;
mul r1.xy, r1.xy, r2.xy;
add r1.xy, r1.xy, c16.yy;
mov oT0, r1;
vs.1.1
dcl_position v0
dcl_color v5
dcl_texcoord0 v7
// Store our input position in world space in r6
m4x3 r6, v0, c25; // v0 * l2w
// Fill out our w (m4x3 doesn't touch w).
mov r6.w, c16.z;
//
// Input diffuse v5 color is:
// v5.r = overall transparency
// v5.g = reflection strength (transparency)
// v5.b = overall wave scaling
//
// v5.a is:
// v5.w = 1/(2.f * edge length)
// So per wave filtering is:
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
// and is completely filtered at 2 times sampling frequency.
// We'd like to make this autocalculated based on the depth of the water.
// The frequency filtering (v5.w) still needs to be calculated offline, because
// it's dependent on edge length, but the first 3 filterings can be calculated
// based on this vertex.
// Basically, we want the transparency, reflection strength, and wave scaling
// to go to zero as the water depth goes to zero. Linear falloffs are as good
// a place to start as any.
//
// depth = waterlevel - r6.z => depth in feet (may be negative)
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
// atten = minAtten + depthNorm * (maxAtten - minAtten);
// These are all vector ops.
// This provides separate ramp ups for each of the channels (they reach full unfiltered
// values at different depths), but doesn't provide separate controls for where they
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
// in feet (depth) is probably the most intuitive. So that changes the first calculation
// of depth to:
// depth = waterlevel - r6.z + offset
// = (waterlevel + offset) - r6.z
// And since we only need offsets for 3 channels, we can make the waterlevel constant
// waterlevel[chan] = watertableheight + offset[chan],
// with waterlevel.w = watertableheight.
//
// So:
// c30 = waterlevel + offset
// c31 = (maxAtten - minAtten) / depthFalloff
// c32 = minAtten.
// And in particular:
// c30.w = waterlevel
// c31.w = 1.f;
// c32.w = 0;
// So r4.w is the depth of this vertex in feet.
// Dot our position with our direction vectors.
mul r0, c8, r6.xxxx;
mad r0, c9, r6.yyyy, r0;
//
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
mul r0, r0, c5;
add r0, r0, c6;
//
// // Now we need dist mod'd into range [-Pi..Pi]
// dist *= rcp(kTwoPi);
rcp r4, c15.wwww;
add r0, r0, c15.zzzz;
mul r0, r0, r4;
// dist = frac(dist);
expp r1.y, r0.xxxx
mov r1.x, r1.yyyy
expp r1.y, r0.zzzz
mov r1.z, r1.yyyy
expp r1.y, r0.wwww
mov r1.w, r1.yyyy
expp r1.y, r0.yyyy
// dist *= kTwoPi;
mul r0, r1, c15.wwww;
// dist += -kPi;
sub r0, r0, c15.zzzz;
//
// sincos(dist, sinDist, cosDist);
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
mul r1, r0, r0; // r0^2
mul r2, r1, r0; // r0^3 - probably stall
mul r3, r1, r1; // r0^4
mul r4, r1, r2; // r0^5
mul r5, r2, r3; // r0^7
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
// r0^7 & r0^6 terms
mul r4, r4, r0; // r0^6
mad r2, r5, c13.wwww, r2;
mad r1, r4, c14.wwww, r1;
// Calc our depth based filtering here into r4 (because we don't use it again
// after here, and we need our filtering shortly).
sub r4, c30, r6.zzzz;
mul r4, r4, c31;
add r4, r4, c32;
// Clamp .xyz to range [0..1]
min r4.xyz, r4, c16.zzzz;
max r4.xyz, r4, c16.xxxx;
//mov r4.xyz, c16.xxx; // HACKTEST
// Calc our filter (see above).
mul r11, v5.wwww, c29;
max r11, r11, c16.xxxx;
min r11, r11, c16.zzzz;
//mov r2, r1;
// r2 == sinDist
// r1 == cosDist
// sinDist *= filter;
mul r2, r2, r11;
// sinDist *= kAmplitude.xyzw
mul r2, r2, c7;
// height = dp4(sinDist, kOne);
// accumPos.z += height; (but accumPos.z is currently 0).
dp4 r8.x, r2, c16.zzzz;
mul r8.y, r8.x, r4.z;
add r8.z, r8.y, c30.w;
max r6.z, r6.z, r8.z;
// r8.x == wave height relative to 0
// r8.y == dampened wave relative to 0
// r8.z == dampened wave height in world space
// r6.z == wave height clamped to never go beneath ground level
//
// cosDist *= filter;
mul r1, r1, r11;
// Pos = (in.x + S, in.y + R, r6.z)
// S = sum(k Dir.x A cos())
// R = sum(k Dir.y A cos())
// c10 = k Dir.x A
// c11 = k Dir.y A
// S = sum(cosDist * c10);
dp4 r7.x, r1, c10;
// R = sum(cosDist * c11);
dp4 r7.y, r1, c11;
add r6.xy, r6.xy, r7.xy;
// Bias our vert up a bit to compensate for precision errors.
// In particular, our filter coefficients are coming in as
// interpolated bytes, so there's bound to be a lot of slop
// from that. We've got a free slot in c35.z, so we'll use that.
// A better implementation would be to bias and scale our screen
// vert, effectively pushing the vert toward the camera without
// actually moving it, but this is easier and might work just
// as well.
add r6.z, r6.z, c35.z;
//
// // Transform position to screen
//
//
//m4x3 r6, v0, c25; // HACKAGE
//mov r6.w, c16.z; // HACKAGE
//m4x4 oPos, r6, c0; // ADDFOG
m4x4 r9, r6, c0;
add r10.x, r9.w, c4.x;
mul oFog, r10.x, c4.y;
mov oPos, r9;
// Dyna Stuff
// Constants
// c33 = fC1U, fC2U, fC1V, fC2V
// c34 = fInitAtten, t, life, 1.f / (life-decay)
// c35 = ramp, 1.f / ramp, BIAS (positive is up), FREE
//
// Vertex Info
// v7.z = fBirth (because we don't use it for anything else).
//
// Initialize r1.zw to 0,1
mov r1, c16.xxxz;
// Calc r1.x = age, r1.y = atten
// age = t - birth.
sub r1.x, c34.y, v7.z;
// atten = clamp0_1(age / ramp) * clamp0_1((life-age) / (life-decay));
// first clamp0_1(age/ramp)
mul r1.y, r1.x, c35.y;
min r1.y, r1.y, c16.z; // Clamp to one (can't go negative).
// now clamp0_1((life-age) / (life-decay));
sub r1.z, c34.z, r1.x;
mul r1.z, r1.z, c34.w;
min r1.z, r1.z, c16.z; // Clamp to one
max r1.z, r1.z, c16.x; // Clamp to zero
mul r1.y, r1.y, r1.z; // atten is the product of the two terms.
// color is (atten, atten, atten, 1.f)
// Need to calculate opacity we would have had from vs_WaveFixedFin7.inl
// Right now that's just modulating by r4.y.
mul r0.y, r4.y, c34.x;
mul oD0, r0.yyyy, r1.yyyw;
//mov oD0, c16.zzzz; // HACKTEST
// UVW = (inUVW - 0.5) * scale + 0.5
// where:
// scale = (fC1U / (age * fC2U + 1.f)), fC1V / (age * fC2U + 1.f), 1.f, 1.f
mov r2, c16.xxxz;
mul r2.xy, r1.xx, c33.yw;
add r2.xy, r2.xy, c16.zz;
rcp r2.x, r2.x;
rcp r2.y, r2.y;
mul r2.xy, r2.xy, c33.xz;
sub r1.xy, v7.xy, c16.yy;
mul r1.xy, r1.xy, r2.xy;
add r1.xy, r1.xy, c16.yy;
mov oT0, r1;