mirror of
https://foundry.openuru.org/gitblit/r/CWE-ou-minkata.git
synced 2025-07-14 02:27:40 -04:00
Fix line endings and tabs
This commit is contained in:
Binary file not shown.
@ -1,17 +1,17 @@
|
||||
|
||||
|
||||
// Grab noise texture,
|
||||
// modulate biased version by vtx color 0,
|
||||
// add to vtx color 1
|
||||
|
||||
ps.1.1
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
|
||||
add r0.rgb, t0_bias, t1_bias;
|
||||
+add r0.a, t0, t1;
|
||||
//mov r0, t1_bias;
|
||||
mad r0.rgb, r0, v0, v1;
|
||||
//mov r0, v1;
|
||||
|
||||
|
||||
|
||||
// Grab noise texture,
|
||||
// modulate biased version by vtx color 0,
|
||||
// add to vtx color 1
|
||||
|
||||
ps.1.1
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
|
||||
add r0.rgb, t0_bias, t1_bias;
|
||||
+add r0.a, t0, t1;
|
||||
//mov r0, t1_bias;
|
||||
mad r0.rgb, r0, v0, v1;
|
||||
//mov r0, v1;
|
||||
|
||||
|
@ -1,14 +1,14 @@
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Add blend color, output sum of alpha
|
||||
|
||||
// Color is t0 + t1
|
||||
// Alpha is t0.a + t1.a
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
|
||||
add r0.rgb, t0, t1;
|
||||
+add r0.a, t0, t1;
|
||||
mul r0, r0, v0;
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Add blend color, output sum of alpha
|
||||
|
||||
// Color is t0 + t1
|
||||
// Alpha is t0.a + t1.a
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
|
||||
add r0.rgb, t0, t1;
|
||||
+add r0.a, t0, t1;
|
||||
mul r0, r0, v0;
|
||||
|
@ -1,14 +1,14 @@
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Add blend color, output base alpha
|
||||
|
||||
// Color is t0 + t1
|
||||
// Alpha is t0.a
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
|
||||
add r0.rgb, t0, t1;
|
||||
+mov r0.a, t0;
|
||||
mul r0, r0, v0;
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Add blend color, output base alpha
|
||||
|
||||
// Color is t0 + t1
|
||||
// Alpha is t0.a
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
|
||||
add r0.rgb, t0, t1;
|
||||
+mov r0.a, t0;
|
||||
mul r0, r0, v0;
|
||||
|
@ -1,14 +1,14 @@
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Add blend color, output product of alpha
|
||||
|
||||
// Color is t0 + t1
|
||||
// Alpha is t0.a * t1.a
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
|
||||
add r0.rgb, t0, t1;
|
||||
+mul r0.a, t0, t1;
|
||||
mul r0, r0, v0;
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Add blend color, output product of alpha
|
||||
|
||||
// Color is t0 + t1
|
||||
// Alpha is t0.a * t1.a
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
|
||||
add r0.rgb, t0, t1;
|
||||
+mul r0.a, t0, t1;
|
||||
mul r0, r0, v0;
|
||||
|
@ -1,14 +1,14 @@
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Alpha blend color, output sum of alphas
|
||||
|
||||
// Color is t0 * (1 - t1.a) + t1 * t1.a
|
||||
// Alpha is t0.a + t1.a
|
||||
|
||||
tex t0
|
||||
tex t1
|
||||
|
||||
lrp r0.rgb, t1.a, t1, t0
|
||||
add r0.a, t0, t1;
|
||||
mul r0, r0, v0;
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Alpha blend color, output sum of alphas
|
||||
|
||||
// Color is t0 * (1 - t1.a) + t1 * t1.a
|
||||
// Alpha is t0.a + t1.a
|
||||
|
||||
tex t0
|
||||
tex t1
|
||||
|
||||
lrp r0.rgb, t1.a, t1, t0
|
||||
add r0.a, t0, t1;
|
||||
mul r0, r0, v0;
|
||||
|
@ -1,14 +1,14 @@
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Alpha blend layers, output base alpha
|
||||
//
|
||||
// Color is t0 * (1 - t1.a) + t1 * t1.a
|
||||
// Alpha is t0.a
|
||||
|
||||
tex t0
|
||||
tex t1
|
||||
|
||||
lrp r0.rgb, t1.a, t1, t0
|
||||
mov r0.a, t0;
|
||||
mul r0, r0, v0;
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Alpha blend layers, output base alpha
|
||||
//
|
||||
// Color is t0 * (1 - t1.a) + t1 * t1.a
|
||||
// Alpha is t0.a
|
||||
|
||||
tex t0
|
||||
tex t1
|
||||
|
||||
lrp r0.rgb, t1.a, t1, t0
|
||||
mov r0.a, t0;
|
||||
mul r0, r0, v0;
|
||||
|
@ -1,14 +1,14 @@
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Alpha blend color, output product of alphas
|
||||
|
||||
// Color is t0 * (1 - t1.a) + t1 * t1.a
|
||||
// Alpha is t0.a * t1.a
|
||||
|
||||
tex t0
|
||||
tex t1
|
||||
|
||||
lrp r0.rgb, t1.a, t1, t0
|
||||
mul r0.a, t0, t1;
|
||||
mul r0, r0, v0;
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Alpha blend color, output product of alphas
|
||||
|
||||
// Color is t0 * (1 - t1.a) + t1 * t1.a
|
||||
// Alpha is t0.a * t1.a
|
||||
|
||||
tex t0
|
||||
tex t1
|
||||
|
||||
lrp r0.rgb, t1.a, t1, t0
|
||||
mul r0.a, t0, t1;
|
||||
mul r0, r0, v0;
|
||||
|
@ -1,9 +1,9 @@
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Single layer, just modulate by vertex color and emit
|
||||
//
|
||||
|
||||
tex t0
|
||||
|
||||
mul r0, t0, v0;
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Single layer, just modulate by vertex color and emit
|
||||
//
|
||||
|
||||
tex t0
|
||||
|
||||
mul r0, t0, v0;
|
||||
|
@ -1,14 +1,14 @@
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Multiply blend color, output sum of alpha
|
||||
|
||||
// Color is t0 * t1
|
||||
// Alpha is t0.a + t1.a
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
|
||||
mul r0.rgb, t0, t1;
|
||||
+add r0.a, t0, t1;
|
||||
mul r0, r0, v0;
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Multiply blend color, output sum of alpha
|
||||
|
||||
// Color is t0 * t1
|
||||
// Alpha is t0.a + t1.a
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
|
||||
mul r0.rgb, t0, t1;
|
||||
+add r0.a, t0, t1;
|
||||
mul r0, r0, v0;
|
||||
|
@ -1,14 +1,14 @@
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Multiply blend color, output base alpha
|
||||
|
||||
// Color is t0 * t1
|
||||
// Alpha is t0.a
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
|
||||
mul r0.rgb, t0, t1;
|
||||
+mov r0.a, t0;
|
||||
mul r0, r0, v0;
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Multiply blend color, output base alpha
|
||||
|
||||
// Color is t0 * t1
|
||||
// Alpha is t0.a
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
|
||||
mul r0.rgb, t0, t1;
|
||||
+mov r0.a, t0;
|
||||
mul r0, r0, v0;
|
||||
|
@ -1,14 +1,14 @@
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Multiply blend color, output product of alpha
|
||||
|
||||
// Color is t0 * t1
|
||||
// Alpha is t0.a * t1.a
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
|
||||
mul r0.rgb, t0, t1;
|
||||
+mul r0.a, t0, t1;
|
||||
mul r0, r0, v0;
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Multiply blend color, output product of alpha
|
||||
|
||||
// Color is t0 * t1
|
||||
// Alpha is t0.a * t1.a
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
|
||||
mul r0.rgb, t0, t1;
|
||||
+mul r0.a, t0, t1;
|
||||
mul r0, r0, v0;
|
||||
|
@ -1,31 +1,31 @@
|
||||
|
||||
// Composite the cosines together.
|
||||
// Input map is cosine(pix) for each of
|
||||
// the 4 waves.
|
||||
//
|
||||
// The constants are set up so:
|
||||
// Nx = -freq * amp * dirX * cos(pix);
|
||||
// Ny = -freq * amp * dirY * cos(pix);
|
||||
// So c[i].x = -freq[i] * amp[i] * dirX[i]
|
||||
// etc.
|
||||
// All textures are:
|
||||
// (r,g,b,a) = (cos(), cos(), 1, 1)
|
||||
//
|
||||
// So c[0].z = 1, but all other c[i].z = 0
|
||||
// Note also the c4 used for biasing back at the end.
|
||||
|
||||
ps.1.1
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
tex t2;
|
||||
tex t3;
|
||||
|
||||
mul r0, t0_bx2, c0;
|
||||
mad r0, t1_bx2, c1, r0;
|
||||
mad r0, t2_bx2, c2, r0;
|
||||
mad r0, t3_bx2, c3, r0;
|
||||
// Now bias it back into range [0..1] for output.
|
||||
mul r0, r0, c4; // c4 = (0.5, 0.5, 0.5, 1)
|
||||
add r0, r0, c4;
|
||||
//mov r0, c4;
|
||||
|
||||
// Composite the cosines together.
|
||||
// Input map is cosine(pix) for each of
|
||||
// the 4 waves.
|
||||
//
|
||||
// The constants are set up so:
|
||||
// Nx = -freq * amp * dirX * cos(pix);
|
||||
// Ny = -freq * amp * dirY * cos(pix);
|
||||
// So c[i].x = -freq[i] * amp[i] * dirX[i]
|
||||
// etc.
|
||||
// All textures are:
|
||||
// (r,g,b,a) = (cos(), cos(), 1, 1)
|
||||
//
|
||||
// So c[0].z = 1, but all other c[i].z = 0
|
||||
// Note also the c4 used for biasing back at the end.
|
||||
|
||||
ps.1.1
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
tex t2;
|
||||
tex t3;
|
||||
|
||||
mul r0, t0_bx2, c0;
|
||||
mad r0, t1_bx2, c1, r0;
|
||||
mad r0, t2_bx2, c2, r0;
|
||||
mad r0, t3_bx2, c3, r0;
|
||||
// Now bias it back into range [0..1] for output.
|
||||
mul r0, r0, c4; // c4 = (0.5, 0.5, 0.5, 1)
|
||||
add r0, r0, c4;
|
||||
//mov r0, c4;
|
||||
|
@ -1,6 +1,6 @@
|
||||
ps.1.1
|
||||
|
||||
// Grass shader. Just does a simple tex mult
|
||||
|
||||
tex t0
|
||||
mul r0, t0, v0
|
||||
ps.1.1
|
||||
|
||||
// Grass shader. Just does a simple tex mult
|
||||
|
||||
tex t0
|
||||
mul r0, t0, v0
|
||||
|
@ -1,35 +1,35 @@
|
||||
|
||||
|
||||
// Composite the cosines together.
|
||||
// Input map is cosine(pix) for each of
|
||||
// the 4 waves.
|
||||
//
|
||||
// The constants are set up so:
|
||||
// Nx = -freq * amp * dirX * cos(pix);
|
||||
// Ny = -freq * amp * dirY * cos(pix);
|
||||
// So c[i].x = -freq[i] * amp[i] * dirX[i]
|
||||
// etc.
|
||||
// All textures are:
|
||||
// (r,g,b,a) = (cos(), cos(), 1, 1)
|
||||
//
|
||||
// Here all c[i].z = 0, because we're accumulating ontop
|
||||
// of layers that have been primed with z = 1.
|
||||
// Note also the c4 used for biasing back at the end.
|
||||
|
||||
ps.1.1
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
tex t2;
|
||||
tex t3;
|
||||
|
||||
mul r0, t0_bx2, c0;
|
||||
mad r0, t1_bx2, c1, r0;
|
||||
mad r0, t2_bx2, c2, r0;
|
||||
mad r0, t3_bx2, c3, r0;
|
||||
|
||||
// Now bias it back into range [0..1] for output.
|
||||
mul r0.rgb, r0, c4;
|
||||
+mov r0.a, c4;
|
||||
add r0.rgb, r0, c5;
|
||||
//mov r0, c4;
|
||||
|
||||
|
||||
// Composite the cosines together.
|
||||
// Input map is cosine(pix) for each of
|
||||
// the 4 waves.
|
||||
//
|
||||
// The constants are set up so:
|
||||
// Nx = -freq * amp * dirX * cos(pix);
|
||||
// Ny = -freq * amp * dirY * cos(pix);
|
||||
// So c[i].x = -freq[i] * amp[i] * dirX[i]
|
||||
// etc.
|
||||
// All textures are:
|
||||
// (r,g,b,a) = (cos(), cos(), 1, 1)
|
||||
//
|
||||
// Here all c[i].z = 0, because we're accumulating ontop
|
||||
// of layers that have been primed with z = 1.
|
||||
// Note also the c4 used for biasing back at the end.
|
||||
|
||||
ps.1.1
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
tex t2;
|
||||
tex t3;
|
||||
|
||||
mul r0, t0_bx2, c0;
|
||||
mad r0, t1_bx2, c1, r0;
|
||||
mad r0, t2_bx2, c2, r0;
|
||||
mad r0, t3_bx2, c3, r0;
|
||||
|
||||
// Now bias it back into range [0..1] for output.
|
||||
mul r0.rgb, r0, c4;
|
||||
+mov r0.a, c4;
|
||||
add r0.rgb, r0, c5;
|
||||
//mov r0, c4;
|
||||
|
@ -1,21 +1,21 @@
|
||||
|
||||
ps.1.1
|
||||
|
||||
def c0, 1.0, 1.0, 1.0, 1.0 // Temp Hack
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
tex t2;
|
||||
|
||||
mov r1.a, t1;
|
||||
lrp r0.rgb, r1.a, t1, t0;
|
||||
+mul r0.a, 1-t1, 1-t0;
|
||||
lrp r0.rgb, t2.a, t2, r0;
|
||||
+mul r0.a, 1-t2, r0;
|
||||
mul r0.rgb, r0, v0;
|
||||
+mul r0.a, 1-r0, v0;
|
||||
|
||||
//mov r0.a, c1;
|
||||
|
||||
//mov r0.rgb, t2;
|
||||
//+mov r0.a, 1-t2;
|
||||
|
||||
ps.1.1
|
||||
|
||||
def c0, 1.0, 1.0, 1.0, 1.0 // Temp Hack
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
tex t2;
|
||||
|
||||
mov r1.a, t1;
|
||||
lrp r0.rgb, r1.a, t1, t0;
|
||||
+mul r0.a, 1-t1, 1-t0;
|
||||
lrp r0.rgb, t2.a, t2, r0;
|
||||
+mul r0.a, 1-t2, r0;
|
||||
mul r0.rgb, r0, v0;
|
||||
+mul r0.a, 1-r0, v0;
|
||||
|
||||
//mov r0.a, c1;
|
||||
|
||||
//mov r0.rgb, t2;
|
||||
//+mov r0.a, 1-t2;
|
||||
|
@ -1,35 +1,35 @@
|
||||
|
||||
// Very simular to ps_WaveFixed.inl. Only the final coloring is different.
|
||||
// Even though so far they are identical.
|
||||
|
||||
ps.1.1
|
||||
|
||||
//def c0, 1.0, 0.0, 0.0, 1.0 // Temp Hack
|
||||
|
||||
|
||||
tex t0 // Bind texture in stage 0 to register t0.
|
||||
texm3x3pad t1, t0_bx2 // First row of matrix multiply.
|
||||
texm3x3pad t2, t0_bx2 // Second row of matrix multiply.
|
||||
texm3x3vspec t3, t0_bx2 // Third row of matrix multiply to get a 3-vector.
|
||||
// Reflect 3-vector by the eye-ray vector.
|
||||
// Use reflected vector to do a texture lookup
|
||||
// at stage 3.
|
||||
|
||||
// t3 now has our reflected environment map value
|
||||
// We've (presumably) attenuated the effect on a vertex basis
|
||||
// and have our color w/ attenuated alpha in v0. So all we need
|
||||
// is to multiply t3 by v0 into r0 and we're done.
|
||||
mul r0.rgb, t3, v0;
|
||||
+mul r0.a, t0, v0;
|
||||
|
||||
// mov r0, t0;
|
||||
|
||||
/*
|
||||
tex t0;
|
||||
texcoord t1;
|
||||
texcoord t2;
|
||||
texcoord t3;
|
||||
|
||||
mov r0.rgb, t3;
|
||||
+mov r0.a, c0;
|
||||
*/
|
||||
|
||||
// Very simular to ps_WaveFixed.inl. Only the final coloring is different.
|
||||
// Even though so far they are identical.
|
||||
|
||||
ps.1.1
|
||||
|
||||
//def c0, 1.0, 0.0, 0.0, 1.0 // Temp Hack
|
||||
|
||||
|
||||
tex t0 // Bind texture in stage 0 to register t0.
|
||||
texm3x3pad t1, t0_bx2 // First row of matrix multiply.
|
||||
texm3x3pad t2, t0_bx2 // Second row of matrix multiply.
|
||||
texm3x3vspec t3, t0_bx2 // Third row of matrix multiply to get a 3-vector.
|
||||
// Reflect 3-vector by the eye-ray vector.
|
||||
// Use reflected vector to do a texture lookup
|
||||
// at stage 3.
|
||||
|
||||
// t3 now has our reflected environment map value
|
||||
// We've (presumably) attenuated the effect on a vertex basis
|
||||
// and have our color w/ attenuated alpha in v0. So all we need
|
||||
// is to multiply t3 by v0 into r0 and we're done.
|
||||
mul r0.rgb, t3, v0;
|
||||
+mul r0.a, t0, v0;
|
||||
|
||||
// mov r0, t0;
|
||||
|
||||
/*
|
||||
tex t0;
|
||||
texcoord t1;
|
||||
texcoord t2;
|
||||
texcoord t3;
|
||||
|
||||
mov r0.rgb, t3;
|
||||
+mov r0.a, c0;
|
||||
*/
|
||||
|
@ -1,77 +1,77 @@
|
||||
//ps.1.1
|
||||
|
||||
// def c0, 1.0, 0.0, 0.0, 1.0
|
||||
|
||||
// mov r0, c0
|
||||
|
||||
// Short pixel shader. Use the texm3x3vspec to do a per-pixel
|
||||
// reflected lookup into our environment map.
|
||||
// Input:
|
||||
// t0 - Normal map in tangent space. Apply _bx2 modifier to shift
|
||||
// [0..255] -> [-1..1]
|
||||
// t1 - UVW = tangent + eye2pos.x, map ignored.
|
||||
// t2 - UVW = binormal + eye2pos.y, map ignored
|
||||
// t3 - UVW = normal + eye2pos.z, map = environment cube map
|
||||
// v0 - attenuating color/alpha.
|
||||
// See docs on texm3x3vspec for explanation of the eye2pos wackiness.
|
||||
// Output:
|
||||
// r0 = reflected lookup from environment map X input v0.
|
||||
// Since environment map has alpha = 255, the output of this
|
||||
// shader can be used for either alpha or additive blending,
|
||||
// as long as v0 is fed in appropriately.
|
||||
|
||||
ps.1.1
|
||||
|
||||
def c0, 1.0, 0.0, 0.0, 1.0 // Temp Hack
|
||||
/*
|
||||
def c1, 0.0, 1.0, 0.0, 1.0
|
||||
def c2, 0.0, 0.0, 1.0, 1.0
|
||||
*/
|
||||
|
||||
|
||||
tex t0 // Bind texture in stage 0 to register t0.
|
||||
texm3x3pad t1, t0_bx2 // First row of matrix multiply.
|
||||
texm3x3pad t2, t0_bx2 // Second row of matrix multiply.
|
||||
texm3x3vspec t3, t0_bx2 // Third row of matrix multiply to get a 3-vector.
|
||||
// Reflect 3-vector by the eye-ray vector.
|
||||
// Use reflected vector to do a texture lookup
|
||||
// at stage 3.
|
||||
|
||||
// t3 now has our reflected environment map value
|
||||
// We've (presumably) attenuated the effect on a vertex basis
|
||||
// and have our color w/ attenuated alpha in v0. So all we need
|
||||
// is to multiply t3 by v0 into r0, add our base color from v1 and we're done.
|
||||
mad r0.rgb, t3, v0, v1;
|
||||
/* HACKAGE
|
||||
//+mul r0.a, v1, v0;
|
||||
HACKAGE */
|
||||
mov r0.a, v0; //HACKAGE
|
||||
/*
|
||||
mov r0.rgb, v0;
|
||||
mov r0.a, v0;
|
||||
*/
|
||||
|
||||
/*
|
||||
tex t0;
|
||||
texcoord t1;
|
||||
texcoord t2;
|
||||
texcoord t3;
|
||||
|
||||
mov r0.rgb, t3;
|
||||
|
||||
+mov r0.a, c0;
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
tex t0;
|
||||
texcoord t1;
|
||||
texcoord t2;
|
||||
texcoord t3;
|
||||
|
||||
mul r0.rgb, t0_bx2, c1;
|
||||
+mov r0.a, c2;
|
||||
*/
|
||||
//ps.1.1
|
||||
|
||||
// def c0, 1.0, 0.0, 0.0, 1.0
|
||||
|
||||
// mov r0, c0
|
||||
|
||||
// Short pixel shader. Use the texm3x3vspec to do a per-pixel
|
||||
// reflected lookup into our environment map.
|
||||
// Input:
|
||||
// t0 - Normal map in tangent space. Apply _bx2 modifier to shift
|
||||
// [0..255] -> [-1..1]
|
||||
// t1 - UVW = tangent + eye2pos.x, map ignored.
|
||||
// t2 - UVW = binormal + eye2pos.y, map ignored
|
||||
// t3 - UVW = normal + eye2pos.z, map = environment cube map
|
||||
// v0 - attenuating color/alpha.
|
||||
// See docs on texm3x3vspec for explanation of the eye2pos wackiness.
|
||||
// Output:
|
||||
// r0 = reflected lookup from environment map X input v0.
|
||||
// Since environment map has alpha = 255, the output of this
|
||||
// shader can be used for either alpha or additive blending,
|
||||
// as long as v0 is fed in appropriately.
|
||||
|
||||
ps.1.1
|
||||
|
||||
def c0, 1.0, 0.0, 0.0, 1.0 // Temp Hack
|
||||
/*
|
||||
def c1, 0.0, 1.0, 0.0, 1.0
|
||||
def c2, 0.0, 0.0, 1.0, 1.0
|
||||
*/
|
||||
|
||||
|
||||
tex t0 // Bind texture in stage 0 to register t0.
|
||||
texm3x3pad t1, t0_bx2 // First row of matrix multiply.
|
||||
texm3x3pad t2, t0_bx2 // Second row of matrix multiply.
|
||||
texm3x3vspec t3, t0_bx2 // Third row of matrix multiply to get a 3-vector.
|
||||
// Reflect 3-vector by the eye-ray vector.
|
||||
// Use reflected vector to do a texture lookup
|
||||
// at stage 3.
|
||||
|
||||
// t3 now has our reflected environment map value
|
||||
// We've (presumably) attenuated the effect on a vertex basis
|
||||
// and have our color w/ attenuated alpha in v0. So all we need
|
||||
// is to multiply t3 by v0 into r0, add our base color from v1 and we're done.
|
||||
mad r0.rgb, t3, v0, v1;
|
||||
/* HACKAGE
|
||||
//+mul r0.a, v1, v0;
|
||||
HACKAGE */
|
||||
mov r0.a, v0; //HACKAGE
|
||||
/*
|
||||
mov r0.rgb, v0;
|
||||
mov r0.a, v0;
|
||||
*/
|
||||
|
||||
/*
|
||||
tex t0;
|
||||
texcoord t1;
|
||||
texcoord t2;
|
||||
texcoord t3;
|
||||
|
||||
mov r0.rgb, t3;
|
||||
|
||||
+mov r0.a, c0;
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
tex t0;
|
||||
texcoord t1;
|
||||
texcoord t2;
|
||||
texcoord t3;
|
||||
|
||||
mul r0.rgb, t0_bx2, c1;
|
||||
+mov r0.a, c2;
|
||||
*/
|
||||
|
@ -1,30 +1,30 @@
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Have a couple extra textures to burn here. Only thing
|
||||
// I've thought of is to have an additional texture to
|
||||
// make the front of the wave solid. So it's UVW would be
|
||||
// the same as the base texture, but the texture itself would
|
||||
// be just a thin horizontal band of alpha. Then just add that
|
||||
// alpha to the output alpha.
|
||||
//
|
||||
// Let's get the first cut running first.
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
tex t2;
|
||||
|
||||
//mul r0, v0, t0;
|
||||
//mul r0, r0, t1;
|
||||
//add r0.a, r0, t2;
|
||||
|
||||
// 1.0 mov r0, t0;
|
||||
// 1.0 mul r0, r0, t1;
|
||||
mul r0, t0, t1;
|
||||
// TEST add r0.a, r0, t2; // TEST
|
||||
add r0, r0, t2; // TEST
|
||||
mul r0, r0, v0;
|
||||
|
||||
//mul r0.rgb, r0, r0.a; // TEST
|
||||
|
||||
//mov r0, t1;
|
||||
|
||||
ps.1.1
|
||||
|
||||
// Have a couple extra textures to burn here. Only thing
|
||||
// I've thought of is to have an additional texture to
|
||||
// make the front of the wave solid. So it's UVW would be
|
||||
// the same as the base texture, but the texture itself would
|
||||
// be just a thin horizontal band of alpha. Then just add that
|
||||
// alpha to the output alpha.
|
||||
//
|
||||
// Let's get the first cut running first.
|
||||
|
||||
tex t0;
|
||||
tex t1;
|
||||
tex t2;
|
||||
|
||||
//mul r0, v0, t0;
|
||||
//mul r0, r0, t1;
|
||||
//add r0.a, r0, t2;
|
||||
|
||||
// 1.0 mov r0, t0;
|
||||
// 1.0 mul r0, r0, t1;
|
||||
mul r0, t0, t1;
|
||||
// TEST add r0.a, r0, t2; // TEST
|
||||
add r0, r0, t2; // TEST
|
||||
mul r0, r0, v0;
|
||||
|
||||
//mul r0.rgb, r0, r0.a; // TEST
|
||||
|
||||
//mov r0, t1;
|
||||
|
@ -1,63 +1,63 @@
|
||||
//ps.1.1
|
||||
|
||||
// def c0, 1.0, 0.0, 0.0, 1.0
|
||||
|
||||
// mov r0, c0
|
||||
|
||||
// Short pixel shader. Use the texm3x3vspec to do a per-pixel
|
||||
// reflected lookup into our environment map.
|
||||
// Input:
|
||||
// t0 - Normal map in tangent space. Apply _bx2 modifier to shift
|
||||
// [0..255] -> [-1..1]
|
||||
// t1 - UVW = tangent + eye2pos.x, map ignored.
|
||||
// t2 - UVW = binormal + eye2pos.y, map ignored
|
||||
// t3 - UVW = normal + eye2pos.z, map = environment cube map
|
||||
// v0 - attenuating color/alpha.
|
||||
// See docs on texm3x3vspec for explanation of the eye2pos wackiness.
|
||||
// Output:
|
||||
// r0 = reflected lookup from environment map X input v0.
|
||||
// Since environment map has alpha = 255, the output of this
|
||||
// shader can be used for either alpha or additive blending,
|
||||
// as long as v0 is fed in appropriately.
|
||||
|
||||
ps.1.1
|
||||
|
||||
//def c0, 1.0, 1.0, 1.0, 1.0 // Temp Hack
|
||||
//def c1, 2.0, 2.0, 2.0, 1.0
|
||||
|
||||
//texcoord t0;
|
||||
//texcoord t1;
|
||||
//texcoord t2;
|
||||
//texcoord t3;
|
||||
|
||||
tex t0 // Bind texture in stage 0 to register t0.
|
||||
texm3x3pad t1, t0_bx2 // First row of matrix multiply.
|
||||
texm3x3pad t2, t0_bx2 // Second row of matrix multiply.
|
||||
texm3x3vspec t3, t0_bx2 // Third row of matrix multiply to get a 3-vector.
|
||||
// Reflect 3-vector by the eye-ray vector.
|
||||
// Use reflected vector to do a texture lookup
|
||||
// at stage 3.
|
||||
|
||||
// t3 now has our reflected environment map value
|
||||
// We've (presumably) attenuated the effect on a vertex basis
|
||||
// and have our color w/ attenuated alpha in v0. So all we need
|
||||
// is to multiply t3 by v0 into r0 and we're done.
|
||||
mad r0.rgb, t3, v1, v0;
|
||||
//add r0.rgb, t3, v0;
|
||||
+mov r0.a, v1;
|
||||
|
||||
//mov r0.rgb, v1.a; // HACKAGE
|
||||
//mov r0.a, v1.a; // HACKAGE
|
||||
//mov r0, v1; // HACKAGE
|
||||
|
||||
//mov r0, c0
|
||||
|
||||
//mul r0, r0, t0;
|
||||
|
||||
//mov r0, v1;
|
||||
//mov r0, t3;
|
||||
|
||||
//mov r0.rgb, t3;
|
||||
//+mov r0.a, c0;
|
||||
|
||||
|
||||
//ps.1.1
|
||||
|
||||
// def c0, 1.0, 0.0, 0.0, 1.0
|
||||
|
||||
// mov r0, c0
|
||||
|
||||
// Short pixel shader. Use the texm3x3vspec to do a per-pixel
|
||||
// reflected lookup into our environment map.
|
||||
// Input:
|
||||
// t0 - Normal map in tangent space. Apply _bx2 modifier to shift
|
||||
// [0..255] -> [-1..1]
|
||||
// t1 - UVW = tangent + eye2pos.x, map ignored.
|
||||
// t2 - UVW = binormal + eye2pos.y, map ignored
|
||||
// t3 - UVW = normal + eye2pos.z, map = environment cube map
|
||||
// v0 - attenuating color/alpha.
|
||||
// See docs on texm3x3vspec for explanation of the eye2pos wackiness.
|
||||
// Output:
|
||||
// r0 = reflected lookup from environment map X input v0.
|
||||
// Since environment map has alpha = 255, the output of this
|
||||
// shader can be used for either alpha or additive blending,
|
||||
// as long as v0 is fed in appropriately.
|
||||
|
||||
ps.1.1
|
||||
|
||||
//def c0, 1.0, 1.0, 1.0, 1.0 // Temp Hack
|
||||
//def c1, 2.0, 2.0, 2.0, 1.0
|
||||
|
||||
//texcoord t0;
|
||||
//texcoord t1;
|
||||
//texcoord t2;
|
||||
//texcoord t3;
|
||||
|
||||
tex t0 // Bind texture in stage 0 to register t0.
|
||||
texm3x3pad t1, t0_bx2 // First row of matrix multiply.
|
||||
texm3x3pad t2, t0_bx2 // Second row of matrix multiply.
|
||||
texm3x3vspec t3, t0_bx2 // Third row of matrix multiply to get a 3-vector.
|
||||
// Reflect 3-vector by the eye-ray vector.
|
||||
// Use reflected vector to do a texture lookup
|
||||
// at stage 3.
|
||||
|
||||
// t3 now has our reflected environment map value
|
||||
// We've (presumably) attenuated the effect on a vertex basis
|
||||
// and have our color w/ attenuated alpha in v0. So all we need
|
||||
// is to multiply t3 by v0 into r0 and we're done.
|
||||
mad r0.rgb, t3, v1, v0;
|
||||
//add r0.rgb, t3, v0;
|
||||
+mov r0.a, v1;
|
||||
|
||||
//mov r0.rgb, v1.a; // HACKAGE
|
||||
//mov r0.a, v1.a; // HACKAGE
|
||||
//mov r0, v1; // HACKAGE
|
||||
|
||||
//mov r0, c0
|
||||
|
||||
//mul r0, r0, t0;
|
||||
|
||||
//mov r0, v1;
|
||||
//mov r0, t3;
|
||||
|
||||
//mov r0.rgb, t3;
|
||||
//+mov r0.a, c0;
|
||||
|
||||
|
||||
|
@ -1,21 +1,21 @@
|
||||
|
||||
ps.1.1
|
||||
|
||||
//def c0, 1.0, 0.0, 0.0, 1.0 // Temp Hack
|
||||
|
||||
// Want
|
||||
// Color: vert.rgb * t0.rgb
|
||||
// Alpha: vert.a * t0.a * t1.a
|
||||
|
||||
tex t0;
|
||||
//tex t1;
|
||||
|
||||
//mul r0.rgb, v0, t0;
|
||||
//+mul r0.a, v0.a, t0.a;
|
||||
//mul r0.a, r0.a, t1.a;
|
||||
|
||||
//mul r0, t0, t1;
|
||||
|
||||
mul r0, t0, v0;
|
||||
|
||||
//mov r0, t0;
|
||||
|
||||
ps.1.1
|
||||
|
||||
//def c0, 1.0, 0.0, 0.0, 1.0 // Temp Hack
|
||||
|
||||
// Want
|
||||
// Color: vert.rgb * t0.rgb
|
||||
// Alpha: vert.a * t0.a * t1.a
|
||||
|
||||
tex t0;
|
||||
//tex t1;
|
||||
|
||||
//mul r0.rgb, v0, t0;
|
||||
//+mul r0.a, v0.a, t0.a;
|
||||
//mul r0.a, r0.a, t1.a;
|
||||
|
||||
//mul r0, t0, t1;
|
||||
|
||||
mul r0, t0, v0;
|
||||
|
||||
//mov r0, t0;
|
||||
|
@ -1,34 +1,34 @@
|
||||
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_texcoord0 v7
|
||||
|
||||
// Take in a screen space position,
|
||||
// transform the UVW,
|
||||
// and spit it out.
|
||||
// c0 = uvXform0[0]
|
||||
// c1 = uvXform0[1]
|
||||
// c2 = uvXform1[0]
|
||||
// c3 = uvXform1[1]
|
||||
// c4 = (0,0.5,1.0,2.0)
|
||||
// c5 = (noiseScale, bias, 0, 1)
|
||||
|
||||
mov oPos, v0;
|
||||
|
||||
mov r0.zw, c4.xxxz; // yzw will stay constant (0,0,1);
|
||||
|
||||
dp4 r0.x, v7, c0;
|
||||
dp4 r0.y, v7, c1;
|
||||
|
||||
mov oT0, r0;
|
||||
|
||||
dp4 r0.x, v7, c2;
|
||||
dp4 r0.y, v7, c3;
|
||||
|
||||
mov oT1, r0;
|
||||
|
||||
mov oD0, c5.xxzz;
|
||||
mov oD1, c5.yyzz;
|
||||
|
||||
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_texcoord0 v7
|
||||
|
||||
// Take in a screen space position,
|
||||
// transform the UVW,
|
||||
// and spit it out.
|
||||
// c0 = uvXform0[0]
|
||||
// c1 = uvXform0[1]
|
||||
// c2 = uvXform1[0]
|
||||
// c3 = uvXform1[1]
|
||||
// c4 = (0,0.5,1.0,2.0)
|
||||
// c5 = (noiseScale, bias, 0, 1)
|
||||
|
||||
mov oPos, v0;
|
||||
|
||||
mov r0.zw, c4.xxxz; // yzw will stay constant (0,0,1);
|
||||
|
||||
dp4 r0.x, v7, c0;
|
||||
dp4 r0.y, v7, c1;
|
||||
|
||||
mov oT0, r0;
|
||||
|
||||
dp4 r0.x, v7, c2;
|
||||
dp4 r0.y, v7, c3;
|
||||
|
||||
mov oT1, r0;
|
||||
|
||||
mov oD0, c5.xxzz;
|
||||
mov oD1, c5.yyzz;
|
||||
|
||||
|
@ -1,31 +1,31 @@
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_texcoord0 v7
|
||||
|
||||
// Take in a screen space position,
|
||||
// transform the UVW,
|
||||
// and spit it out.
|
||||
// c4 = (0,0.5,1.0,2.0)
|
||||
|
||||
//mov r0, v0;
|
||||
//mov r0.w, c4.zzzz;
|
||||
//mov oPos, r0;
|
||||
mov oPos, v0;
|
||||
|
||||
dp4 r0.x, v7, c0;
|
||||
mov r0.yzw, c4.xxxz; // yzw will stay constant (0,0,1);
|
||||
|
||||
mov oT0, r0;
|
||||
|
||||
dp4 r0.x, v7, c1;
|
||||
|
||||
mov oT1, r0;
|
||||
|
||||
dp4 r0.x, v7, c2;
|
||||
|
||||
mov oT2, r0;
|
||||
|
||||
dp4 r0.x, v7, c3;
|
||||
|
||||
mov oT3, r0;
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_texcoord0 v7
|
||||
|
||||
// Take in a screen space position,
|
||||
// transform the UVW,
|
||||
// and spit it out.
|
||||
// c4 = (0,0.5,1.0,2.0)
|
||||
|
||||
//mov r0, v0;
|
||||
//mov r0.w, c4.zzzz;
|
||||
//mov oPos, r0;
|
||||
mov oPos, v0;
|
||||
|
||||
dp4 r0.x, v7, c0;
|
||||
mov r0.yzw, c4.xxxz; // yzw will stay constant (0,0,1);
|
||||
|
||||
mov oT0, r0;
|
||||
|
||||
dp4 r0.x, v7, c1;
|
||||
|
||||
mov oT1, r0;
|
||||
|
||||
dp4 r0.x, v7, c2;
|
||||
|
||||
mov oT2, r0;
|
||||
|
||||
dp4 r0.x, v7, c3;
|
||||
|
||||
mov oT3, r0;
|
||||
|
@ -1,60 +1,60 @@
|
||||
vs.1.1
|
||||
|
||||
// Grass shader. Moves verts according sine waves seeded by position
|
||||
// Based on the article "Animated Grass with Pixel and Vertex Shaders"
|
||||
// by John Isidoro and Drew Card, in the book
|
||||
// "Direct3D ShaderX Vertex and Pixel Shader Tips and Tricks"
|
||||
|
||||
// c0 = Local2NDC
|
||||
// c4 = (0.0, 0.5, 1.0, 2.0)
|
||||
// c5 = (time, X, X, X)
|
||||
// c6 = Pi constants
|
||||
// c7 = Sin constants (-1/3!, 1/!5, -1/7!, 1/9!)
|
||||
// c8 = waveDistortX
|
||||
// c9 = waveDistortY
|
||||
// c10 = waveDistortZ
|
||||
// c11 = waveDirX (0.25, 0.0, -0.7, -0.8)
|
||||
// c12 = waveDirY (0.0, 0.15, -0.7, 0.1)
|
||||
// c13 = waveSpeed (0.2, 0.15, 0.4, 0.4)
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
|
||||
mul r0, c11, v0.x // pos X,Y input to waves
|
||||
mad r0, c12, v0.y, r0
|
||||
|
||||
mov r1, c5.x // time
|
||||
mad r0, r1, c13, r0 // scale by speed and add to X,Y input
|
||||
frc r0.xy, r0
|
||||
frc r1.xy, r0.zwzw
|
||||
mov r0.zw, r1.xyxy
|
||||
|
||||
sub r0, r0, c4.y // - 0.5
|
||||
mul r1, r0, c6.w // *= 2 pi
|
||||
|
||||
mul r2, r1, r1 // ^2
|
||||
mul r3, r2, r1 // ^3
|
||||
mul r5, r3, r2 // ^5
|
||||
mul r7, r5, r2 // ^7
|
||||
mul r9, r7, r2 // ^9
|
||||
|
||||
mad r0, r3, c7.x, r1 // - r1^3 / 3!
|
||||
mad r0, r5, c7.y, r0 // + r1^5 / 5!
|
||||
mad r0, r7, c7.z, r0 // - r1^7 / 7!
|
||||
mad r0, r9, c7.w, r0 // + r1^9 / 9!
|
||||
|
||||
dp4 r3.x, r0, c8
|
||||
dp4 r3.y, r0, c9
|
||||
dp4 r3.zw, r0, c10
|
||||
|
||||
sub r4, c4.z, v7.y
|
||||
mul r3, r3, r4 // mult by Y tex coord. So the waves only affect the top verts
|
||||
mov r2.w, v0 //
|
||||
add r2.xyz, r3, v0 // add offset to position
|
||||
|
||||
m4x4 oPos, r2, c0 // trans to NDC
|
||||
|
||||
mov oFog, c4.z // no fog
|
||||
mov oD0, v5
|
||||
mov oT0, v7
|
||||
vs.1.1
|
||||
|
||||
// Grass shader. Moves verts according sine waves seeded by position
|
||||
// Based on the article "Animated Grass with Pixel and Vertex Shaders"
|
||||
// by John Isidoro and Drew Card, in the book
|
||||
// "Direct3D ShaderX Vertex and Pixel Shader Tips and Tricks"
|
||||
|
||||
// c0 = Local2NDC
|
||||
// c4 = (0.0, 0.5, 1.0, 2.0)
|
||||
// c5 = (time, X, X, X)
|
||||
// c6 = Pi constants
|
||||
// c7 = Sin constants (-1/3!, 1/!5, -1/7!, 1/9!)
|
||||
// c8 = waveDistortX
|
||||
// c9 = waveDistortY
|
||||
// c10 = waveDistortZ
|
||||
// c11 = waveDirX (0.25, 0.0, -0.7, -0.8)
|
||||
// c12 = waveDirY (0.0, 0.15, -0.7, 0.1)
|
||||
// c13 = waveSpeed (0.2, 0.15, 0.4, 0.4)
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
|
||||
mul r0, c11, v0.x // pos X,Y input to waves
|
||||
mad r0, c12, v0.y, r0
|
||||
|
||||
mov r1, c5.x // time
|
||||
mad r0, r1, c13, r0 // scale by speed and add to X,Y input
|
||||
frc r0.xy, r0
|
||||
frc r1.xy, r0.zwzw
|
||||
mov r0.zw, r1.xyxy
|
||||
|
||||
sub r0, r0, c4.y // - 0.5
|
||||
mul r1, r0, c6.w // *= 2 pi
|
||||
|
||||
mul r2, r1, r1 // ^2
|
||||
mul r3, r2, r1 // ^3
|
||||
mul r5, r3, r2 // ^5
|
||||
mul r7, r5, r2 // ^7
|
||||
mul r9, r7, r2 // ^9
|
||||
|
||||
mad r0, r3, c7.x, r1 // - r1^3 / 3!
|
||||
mad r0, r5, c7.y, r0 // + r1^5 / 5!
|
||||
mad r0, r7, c7.z, r0 // - r1^7 / 7!
|
||||
mad r0, r9, c7.w, r0 // + r1^9 / 9!
|
||||
|
||||
dp4 r3.x, r0, c8
|
||||
dp4 r3.y, r0, c9
|
||||
dp4 r3.zw, r0, c10
|
||||
|
||||
sub r4, c4.z, v7.y
|
||||
mul r3, r3, r4 // mult by Y tex coord. So the waves only affect the top verts
|
||||
mov r2.w, v0 //
|
||||
add r2.xyz, r3, v0 // add offset to position
|
||||
|
||||
m4x4 oPos, r2, c0 // trans to NDC
|
||||
|
||||
mov oFog, c4.z // no fog
|
||||
mov oD0, v5
|
||||
mov oT0, v7
|
||||
|
@ -1,245 +1,245 @@
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c25; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c16.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = reflection strength (transparency)
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c30 = waterlevel + offset
|
||||
// c31 = (maxAtten - minAtten) / depthFalloff
|
||||
// c32 = minAtten.
|
||||
// And in particular:
|
||||
// c30.w = waterlevel
|
||||
// c31.w = 1.f;
|
||||
// c32.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c8, r6.xxxx;
|
||||
mad r0, c9, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c5;
|
||||
add r0, r0, c6;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c15.wwww;
|
||||
add r0, r0, c15.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c15.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c15.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c13.wwww, r2;
|
||||
mad r1, r4, c14.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c30, r6.zzzz;
|
||||
mul r4, r4, c31;
|
||||
add r4, r4, c32;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c16.zzzz;
|
||||
max r4.xyz, r4, c16.xxxx;
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c29;
|
||||
max r11, r11, c16.xxxx;
|
||||
min r11, r11, c16.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c7;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c16.zzzz;
|
||||
|
||||
// Smooth the approach to the shore.
|
||||
sub r10.x, r6.z, c30.w; // r10.x = height
|
||||
mul r10.x, r10.x, r10.x; // r10.x = h^2
|
||||
mul r10.x, r10.x, c10.x; // r10.x = -h^2 * k1 / k2^2
|
||||
add r10.x, r10.x, c10.y; // r10.x = k1 + -h^2 * k1 / k2^2
|
||||
max r10.x, r10.x, c16.xxxx; // Clamp to >= zero
|
||||
add r8.x, r8.x, r10.x; // r8.x += del
|
||||
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c30.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
add r6.z, r6.z, c12.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= kFreq.xyzw;
|
||||
mul r1, r1, c5;
|
||||
// cosDist *= kAmplitude.xyzw; // Combine?
|
||||
mul r1, r1, c7;
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
//
|
||||
// accumCos = (0, 0, 0, 0);
|
||||
mov r7, c16.xxxx;
|
||||
// temp = dp4( cosDist, toCenter_X );
|
||||
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
|
||||
dp4 r7.x, r1, -c8
|
||||
//
|
||||
// temp = dp4( cosDist, toCenter_Y );
|
||||
// accumCos.y += temp.xxxx;
|
||||
dp4 r7.y, r1, -c9
|
||||
//
|
||||
// }
|
||||
//
|
||||
// accumBin = (1, 0, -accumCos.x);
|
||||
// accumTan = (0, 1, -accumCos.y);
|
||||
// accumNorm = (accumCos.x, accumCos.y, 1);
|
||||
mov r11, c16.xxzx;
|
||||
add r11, r11, r7;
|
||||
dp3 r10.x, r11, r11;
|
||||
rsq r10.x, r10.x;
|
||||
mul r11, r11, r10.xxxx;
|
||||
|
||||
//
|
||||
// Add in our scrunch (offset in X/Y plane).
|
||||
// Scale down our scrunch amount by the wave scaling
|
||||
mul r10.x, c12.y, r4.z;
|
||||
mad r6.xy, r11.xy, r10.xx, r6.xy;
|
||||
|
||||
// mul r6.z, r6.z, r10.xxxx; DEBUG
|
||||
|
||||
// mad r6, r11, c12.yyzz, r6;
|
||||
|
||||
// accumNorm = mul (accumNorm, kScrunchScale ); // kScrunchScale = (scrunchScale, scrunchScale, 1, 1);
|
||||
// accumCos *= (scrunchScale, scrunchScale, 0, 0);
|
||||
|
||||
//##mul r2.x, r6.z, c12.x;
|
||||
//##add r2.x, r2.x, c16.z;
|
||||
|
||||
//##mul r7.xy, r7.xy, r2.xx;
|
||||
|
||||
// This is actually wrong, but useful right now for visualizing the generated coords.
|
||||
// See below for correct version.
|
||||
|
||||
//##sub r3, c16.xxzx, r7.xyzz;
|
||||
|
||||
// Normalize?
|
||||
|
||||
|
||||
// Now rotate our normal vector into the wind
|
||||
//##dp3 r0.x, r3, c18.xyww;
|
||||
//##dp3 r0.y, r3, c18.zxww;
|
||||
//##mov r3.xy, r0;
|
||||
|
||||
// Initialize r0.w
|
||||
mov r0.w, c16.zzzz;
|
||||
|
||||
//##dp3 r0.x, r3, r3;
|
||||
//##rsq r0.x, r0.x;
|
||||
//##mul r3, r3, r0.xxxw;
|
||||
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c25; // HACKAGE
|
||||
//mov r6.w, c16.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c11.x;
|
||||
mul oFog, r10.x, c11.y;
|
||||
mov oPos, r9;
|
||||
|
||||
|
||||
// Color
|
||||
mul oD0, c4, v5.xxxx;
|
||||
|
||||
// UVW0
|
||||
// This layer just stays put. The motion's in the texture
|
||||
// U = transformed U
|
||||
// V = transformed V
|
||||
dp4 r0.x, v7, c19;
|
||||
dp4 r0.y, v7, c20;
|
||||
//mul r0.y, r0.y, -c16.z;
|
||||
//add r0.y, r0.y, c16.z;
|
||||
//add r0.y, r0.y, c16.z;
|
||||
//add r0.y, r0.y, c16.y;
|
||||
mov oT0, r0.xyww;
|
||||
mov oT1, r0.xyww;
|
||||
mov oT2, r0.xyww;
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c25; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c16.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = reflection strength (transparency)
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c30 = waterlevel + offset
|
||||
// c31 = (maxAtten - minAtten) / depthFalloff
|
||||
// c32 = minAtten.
|
||||
// And in particular:
|
||||
// c30.w = waterlevel
|
||||
// c31.w = 1.f;
|
||||
// c32.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c8, r6.xxxx;
|
||||
mad r0, c9, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c5;
|
||||
add r0, r0, c6;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c15.wwww;
|
||||
add r0, r0, c15.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c15.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c15.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c13.wwww, r2;
|
||||
mad r1, r4, c14.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c30, r6.zzzz;
|
||||
mul r4, r4, c31;
|
||||
add r4, r4, c32;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c16.zzzz;
|
||||
max r4.xyz, r4, c16.xxxx;
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c29;
|
||||
max r11, r11, c16.xxxx;
|
||||
min r11, r11, c16.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c7;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c16.zzzz;
|
||||
|
||||
// Smooth the approach to the shore.
|
||||
sub r10.x, r6.z, c30.w; // r10.x = height
|
||||
mul r10.x, r10.x, r10.x; // r10.x = h^2
|
||||
mul r10.x, r10.x, c10.x; // r10.x = -h^2 * k1 / k2^2
|
||||
add r10.x, r10.x, c10.y; // r10.x = k1 + -h^2 * k1 / k2^2
|
||||
max r10.x, r10.x, c16.xxxx; // Clamp to >= zero
|
||||
add r8.x, r8.x, r10.x; // r8.x += del
|
||||
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c30.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
add r6.z, r6.z, c12.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= kFreq.xyzw;
|
||||
mul r1, r1, c5;
|
||||
// cosDist *= kAmplitude.xyzw; // Combine?
|
||||
mul r1, r1, c7;
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
//
|
||||
// accumCos = (0, 0, 0, 0);
|
||||
mov r7, c16.xxxx;
|
||||
// temp = dp4( cosDist, toCenter_X );
|
||||
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
|
||||
dp4 r7.x, r1, -c8
|
||||
//
|
||||
// temp = dp4( cosDist, toCenter_Y );
|
||||
// accumCos.y += temp.xxxx;
|
||||
dp4 r7.y, r1, -c9
|
||||
//
|
||||
// }
|
||||
//
|
||||
// accumBin = (1, 0, -accumCos.x);
|
||||
// accumTan = (0, 1, -accumCos.y);
|
||||
// accumNorm = (accumCos.x, accumCos.y, 1);
|
||||
mov r11, c16.xxzx;
|
||||
add r11, r11, r7;
|
||||
dp3 r10.x, r11, r11;
|
||||
rsq r10.x, r10.x;
|
||||
mul r11, r11, r10.xxxx;
|
||||
|
||||
//
|
||||
// Add in our scrunch (offset in X/Y plane).
|
||||
// Scale down our scrunch amount by the wave scaling
|
||||
mul r10.x, c12.y, r4.z;
|
||||
mad r6.xy, r11.xy, r10.xx, r6.xy;
|
||||
|
||||
// mul r6.z, r6.z, r10.xxxx; DEBUG
|
||||
|
||||
// mad r6, r11, c12.yyzz, r6;
|
||||
|
||||
// accumNorm = mul (accumNorm, kScrunchScale ); // kScrunchScale = (scrunchScale, scrunchScale, 1, 1);
|
||||
// accumCos *= (scrunchScale, scrunchScale, 0, 0);
|
||||
|
||||
//##mul r2.x, r6.z, c12.x;
|
||||
//##add r2.x, r2.x, c16.z;
|
||||
|
||||
//##mul r7.xy, r7.xy, r2.xx;
|
||||
|
||||
// This is actually wrong, but useful right now for visualizing the generated coords.
|
||||
// See below for correct version.
|
||||
|
||||
//##sub r3, c16.xxzx, r7.xyzz;
|
||||
|
||||
// Normalize?
|
||||
|
||||
|
||||
// Now rotate our normal vector into the wind
|
||||
//##dp3 r0.x, r3, c18.xyww;
|
||||
//##dp3 r0.y, r3, c18.zxww;
|
||||
//##mov r3.xy, r0;
|
||||
|
||||
// Initialize r0.w
|
||||
mov r0.w, c16.zzzz;
|
||||
|
||||
//##dp3 r0.x, r3, r3;
|
||||
//##rsq r0.x, r0.x;
|
||||
//##mul r3, r3, r0.xxxw;
|
||||
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c25; // HACKAGE
|
||||
//mov r6.w, c16.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c11.x;
|
||||
mul oFog, r10.x, c11.y;
|
||||
mov oPos, r9;
|
||||
|
||||
|
||||
// Color
|
||||
mul oD0, c4, v5.xxxx;
|
||||
|
||||
// UVW0
|
||||
// This layer just stays put. The motion's in the texture
|
||||
// U = transformed U
|
||||
// V = transformed V
|
||||
dp4 r0.x, v7, c19;
|
||||
dp4 r0.y, v7, c20;
|
||||
//mul r0.y, r0.y, -c16.z;
|
||||
//add r0.y, r0.y, c16.z;
|
||||
//add r0.y, r0.y, c16.z;
|
||||
//add r0.y, r0.y, c16.y;
|
||||
mov oT0, r0.xyww;
|
||||
mov oT1, r0.xyww;
|
||||
mov oT2, r0.xyww;
|
||||
|
||||
|
@ -1,203 +1,203 @@
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c25; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c16.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = reflection strength (transparency)
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c30 = waterlevel + offset
|
||||
// c31 = (maxAtten - minAtten) / depthFalloff
|
||||
// c32 = minAtten.
|
||||
// And in particular:
|
||||
// c30.w = waterlevel
|
||||
// c31.w = 1.f;
|
||||
// c32.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c8, r6.xxxx;
|
||||
mad r0, c9, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c5;
|
||||
add r0, r0, c6;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c15.wwww;
|
||||
add r0, r0, c15.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c15.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c15.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c13.wwww, r2;
|
||||
mad r1, r4, c14.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c30, r6.zzzz;
|
||||
mul r4, r4, c31;
|
||||
add r4, r4, c32;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c16.zzzz;
|
||||
max r4.xyz, r4, c16.xxxx;
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c29;
|
||||
max r11, r11, c16.xxxx;
|
||||
min r11, r11, c16.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c7;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c16.zzzz;
|
||||
|
||||
// Smooth the approach to the shore.
|
||||
/*
|
||||
sub r10.x, r6.z, c30.w; // r10.x = height
|
||||
mul r10.x, r10.x, r10.x; // r10.x = h^2
|
||||
mul r10.x, r10.x, c10.x; // r10.x = -h^2 * k1 / k2^2
|
||||
add r10.x, r10.x, c10.y; // r10.x = k1 + -h^2 * k1 / k2^2
|
||||
max r10.x, r10.x, c16.xxxx; // Clamp to >= zero
|
||||
add r8.x, r8.x, r10.x; // r8.x += del
|
||||
*/
|
||||
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c30.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
|
||||
// Pos = (in.x + S, in.y + R, r6.z)
|
||||
// S = sum(k Dir.x A cos())
|
||||
// R = sum(k Dir.y A cos())
|
||||
// c17 = k Dir.x A
|
||||
// c18 = k Dir.y A
|
||||
// S = sum(cosDist * c17);
|
||||
dp4 r7.x, r1, c17;
|
||||
dp4 r7.y, r1, c18;
|
||||
|
||||
add r6.xy, r6.xy, r7.xy;
|
||||
|
||||
// Initialize r0.w
|
||||
mov r0.w, c16.zzzz;
|
||||
|
||||
//##dp3 r0.x, r3, r3;
|
||||
//##rsq r0.x, r0.x;
|
||||
//##mul r3, r3, r0.xxxw;
|
||||
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c25; // HACKAGE
|
||||
//mov r6.w, c16.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c11.x;
|
||||
mul oFog, r10.x, c11.y;
|
||||
mov oPos, r9;
|
||||
|
||||
|
||||
// Color
|
||||
mul oD0, c4, v5.xxxx;
|
||||
|
||||
// UVW0
|
||||
// This layer just stays put. The motion's in the texture
|
||||
// U = transformed U
|
||||
// V = transformed V
|
||||
dp4 r0.x, v7, c19;
|
||||
dp4 r0.y, v7, c20;
|
||||
//mul r0.y, r0.y, -c16.z;
|
||||
//add r0.y, r0.y, c16.z;
|
||||
//add r0.y, r0.y, c16.z;
|
||||
//add r0.y, r0.y, c16.y;
|
||||
mov oT0, r0.xyww;
|
||||
mov oT1, r0.xyww;
|
||||
mov oT2, r0.xyww;
|
||||
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c25; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c16.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = reflection strength (transparency)
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c30 = waterlevel + offset
|
||||
// c31 = (maxAtten - minAtten) / depthFalloff
|
||||
// c32 = minAtten.
|
||||
// And in particular:
|
||||
// c30.w = waterlevel
|
||||
// c31.w = 1.f;
|
||||
// c32.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c8, r6.xxxx;
|
||||
mad r0, c9, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c5;
|
||||
add r0, r0, c6;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c15.wwww;
|
||||
add r0, r0, c15.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c15.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c15.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c13.wwww, r2;
|
||||
mad r1, r4, c14.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c30, r6.zzzz;
|
||||
mul r4, r4, c31;
|
||||
add r4, r4, c32;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c16.zzzz;
|
||||
max r4.xyz, r4, c16.xxxx;
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c29;
|
||||
max r11, r11, c16.xxxx;
|
||||
min r11, r11, c16.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c7;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c16.zzzz;
|
||||
|
||||
// Smooth the approach to the shore.
|
||||
/*
|
||||
sub r10.x, r6.z, c30.w; // r10.x = height
|
||||
mul r10.x, r10.x, r10.x; // r10.x = h^2
|
||||
mul r10.x, r10.x, c10.x; // r10.x = -h^2 * k1 / k2^2
|
||||
add r10.x, r10.x, c10.y; // r10.x = k1 + -h^2 * k1 / k2^2
|
||||
max r10.x, r10.x, c16.xxxx; // Clamp to >= zero
|
||||
add r8.x, r8.x, r10.x; // r8.x += del
|
||||
*/
|
||||
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c30.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
|
||||
// Pos = (in.x + S, in.y + R, r6.z)
|
||||
// S = sum(k Dir.x A cos())
|
||||
// R = sum(k Dir.y A cos())
|
||||
// c17 = k Dir.x A
|
||||
// c18 = k Dir.y A
|
||||
// S = sum(cosDist * c17);
|
||||
dp4 r7.x, r1, c17;
|
||||
dp4 r7.y, r1, c18;
|
||||
|
||||
add r6.xy, r6.xy, r7.xy;
|
||||
|
||||
// Initialize r0.w
|
||||
mov r0.w, c16.zzzz;
|
||||
|
||||
//##dp3 r0.x, r3, r3;
|
||||
//##rsq r0.x, r0.x;
|
||||
//##mul r3, r3, r0.xxxw;
|
||||
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c25; // HACKAGE
|
||||
//mov r6.w, c16.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c11.x;
|
||||
mul oFog, r10.x, c11.y;
|
||||
mov oPos, r9;
|
||||
|
||||
|
||||
// Color
|
||||
mul oD0, c4, v5.xxxx;
|
||||
|
||||
// UVW0
|
||||
// This layer just stays put. The motion's in the texture
|
||||
// U = transformed U
|
||||
// V = transformed V
|
||||
dp4 r0.x, v7, c19;
|
||||
dp4 r0.y, v7, c20;
|
||||
//mul r0.y, r0.y, -c16.z;
|
||||
//add r0.y, r0.y, c16.z;
|
||||
//add r0.y, r0.y, c16.z;
|
||||
//add r0.y, r0.y, c16.y;
|
||||
mov oT0, r0.xyww;
|
||||
mov oT1, r0.xyww;
|
||||
mov oT2, r0.xyww;
|
||||
|
||||
|
@ -1,207 +1,207 @@
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c18; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c13.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = illumination
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c22 = waterlevel + offset
|
||||
// c23 = (maxAtten - minAtten) / depthFalloff
|
||||
// c24 = minAtten.
|
||||
// And in particular:
|
||||
// c22.w = waterlevel
|
||||
// c23.w = 1.f;
|
||||
// c24.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c7, r6.xxxx;
|
||||
mad r0, c8, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c4;
|
||||
add r0, r0, c5;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c12.wwww;
|
||||
add r0, r0, c12.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c12.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c12.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c10.wwww, r2;
|
||||
mad r1, r4, c11.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c22, r6.zzzz;
|
||||
mul r4, r4, c23;
|
||||
add r4, r4, c24;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c13.zzzz;
|
||||
max r4.xyz, r4, c13.xxxx;
|
||||
//mov r4.xyz, c13.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c21;
|
||||
max r11, r11, c13.xxxx;
|
||||
min r11, r11, c13.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c6;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c13.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c22.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= kFreq.xyzw;
|
||||
mul r1, r1, c4;
|
||||
// cosDist *= kAmplitude.xyzw; // Combine?
|
||||
mul r1, r1, c6;
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
//
|
||||
// accumCos = (0, 0, 0, 0);
|
||||
mov r7, c13.xxxx;
|
||||
// temp = dp4( cosDist, toCenter_X );
|
||||
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
|
||||
dp4 r7.x, r1, -c7
|
||||
//
|
||||
// temp = dp4( cosDist, toCenter_Y );
|
||||
// accumCos.y += temp.xxxx;
|
||||
dp4 r7.y, r1, -c8
|
||||
//
|
||||
// }
|
||||
//
|
||||
// accumBin = (1, 0, -accumCos.x);
|
||||
// accumTan = (0, 1, -accumCos.y);
|
||||
// accumNorm = (accumCos.x, accumCos.y, 1);
|
||||
mov r11, c13.xxzx;
|
||||
add r11, r11, r7;
|
||||
dp3 r10.x, r11, r11;
|
||||
rsq r10.x, r10.x;
|
||||
mul r11, r11, r10.xxxx;
|
||||
|
||||
//
|
||||
// Add in our scrunch (offset in X/Y plane).
|
||||
// Scale down our scrunch amount by the wave scaling
|
||||
mul r10.x, c9.y, r4.z;
|
||||
mad r6.xy, r11.xy, r10.xx, r6.xy;
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c25.x, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c25.x;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c18; // HACKAGE
|
||||
//mov r6.w, c13.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c29.x;
|
||||
mul oFog, r10.x, c29.y;
|
||||
mov oPos, r9;
|
||||
|
||||
|
||||
// Output color is vertex green
|
||||
// Output alpha is vertex red (vtx alpha is used for wave filtering)
|
||||
// Whole thing modulated by material color/opacity.
|
||||
mul oD0, v5.yyyx, c26;
|
||||
|
||||
// Usual texture transform
|
||||
mov r11.zw, c13.zzzz;
|
||||
dp4 r11.x, v7, c14;
|
||||
dp4 r11.y, v7, c15;
|
||||
mov oT0, r11;
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c18; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c13.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = illumination
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c22 = waterlevel + offset
|
||||
// c23 = (maxAtten - minAtten) / depthFalloff
|
||||
// c24 = minAtten.
|
||||
// And in particular:
|
||||
// c22.w = waterlevel
|
||||
// c23.w = 1.f;
|
||||
// c24.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c7, r6.xxxx;
|
||||
mad r0, c8, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c4;
|
||||
add r0, r0, c5;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c12.wwww;
|
||||
add r0, r0, c12.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c12.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c12.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c10.wwww, r2;
|
||||
mad r1, r4, c11.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c22, r6.zzzz;
|
||||
mul r4, r4, c23;
|
||||
add r4, r4, c24;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c13.zzzz;
|
||||
max r4.xyz, r4, c13.xxxx;
|
||||
//mov r4.xyz, c13.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c21;
|
||||
max r11, r11, c13.xxxx;
|
||||
min r11, r11, c13.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c6;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c13.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c22.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= kFreq.xyzw;
|
||||
mul r1, r1, c4;
|
||||
// cosDist *= kAmplitude.xyzw; // Combine?
|
||||
mul r1, r1, c6;
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
//
|
||||
// accumCos = (0, 0, 0, 0);
|
||||
mov r7, c13.xxxx;
|
||||
// temp = dp4( cosDist, toCenter_X );
|
||||
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
|
||||
dp4 r7.x, r1, -c7
|
||||
//
|
||||
// temp = dp4( cosDist, toCenter_Y );
|
||||
// accumCos.y += temp.xxxx;
|
||||
dp4 r7.y, r1, -c8
|
||||
//
|
||||
// }
|
||||
//
|
||||
// accumBin = (1, 0, -accumCos.x);
|
||||
// accumTan = (0, 1, -accumCos.y);
|
||||
// accumNorm = (accumCos.x, accumCos.y, 1);
|
||||
mov r11, c13.xxzx;
|
||||
add r11, r11, r7;
|
||||
dp3 r10.x, r11, r11;
|
||||
rsq r10.x, r10.x;
|
||||
mul r11, r11, r10.xxxx;
|
||||
|
||||
//
|
||||
// Add in our scrunch (offset in X/Y plane).
|
||||
// Scale down our scrunch amount by the wave scaling
|
||||
mul r10.x, c9.y, r4.z;
|
||||
mad r6.xy, r11.xy, r10.xx, r6.xy;
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c25.x, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c25.x;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c18; // HACKAGE
|
||||
//mov r6.w, c13.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c29.x;
|
||||
mul oFog, r10.x, c29.y;
|
||||
mov oPos, r9;
|
||||
|
||||
|
||||
// Output color is vertex green
|
||||
// Output alpha is vertex red (vtx alpha is used for wave filtering)
|
||||
// Whole thing modulated by material color/opacity.
|
||||
mul oD0, v5.yyyx, c26;
|
||||
|
||||
// Usual texture transform
|
||||
mov r11.zw, c13.zzzz;
|
||||
dp4 r11.x, v7, c14;
|
||||
dp4 r11.y, v7, c15;
|
||||
mov oT0, r11;
|
||||
|
||||
|
@ -1,189 +1,189 @@
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c18; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c13.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = illumination
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c22 = waterlevel + offset
|
||||
// c23 = (maxAtten - minAtten) / depthFalloff
|
||||
// c24 = minAtten.
|
||||
// And in particular:
|
||||
// c22.w = waterlevel
|
||||
// c23.w = 1.f;
|
||||
// c24.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c7, r6.xxxx;
|
||||
mad r0, c8, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c4;
|
||||
add r0, r0, c5;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c12.wwww;
|
||||
add r0, r0, c12.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c12.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c12.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c10.wwww, r2;
|
||||
mad r1, r4, c11.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c22, r6.zzzz;
|
||||
mul r4, r4, c23;
|
||||
add r4, r4, c24;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c13.zzzz;
|
||||
max r4.xyz, r4, c13.xxxx;
|
||||
//mov r4.xyz, c13.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c21;
|
||||
max r11, r11, c13.xxxx;
|
||||
min r11, r11, c13.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c6;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c13.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c22.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
|
||||
// Pos = (in.x + S, in.y + R, r6.z)
|
||||
// S = sum(k Dir.x A cos())
|
||||
// R = sum(k Dir.y A cos())
|
||||
// c30 = k Dir.x A
|
||||
// c31 = k Dir.y A
|
||||
// S = sum(cosDist * c30);
|
||||
dp4 r7.x, r1, c30;
|
||||
// R = sum(cosDist * c31);
|
||||
dp4 r7.y, r1, c31;
|
||||
|
||||
add r6.xy, r6.xy, r7.xy;
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c25.x, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c25.x;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c18; // HACKAGE
|
||||
//mov r6.w, c13.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c29.x;
|
||||
mul oFog, r10.x, c29.y;
|
||||
mov oPos, r9;
|
||||
|
||||
|
||||
// Output color is vertex green
|
||||
// Output alpha is vertex red (vtx alpha is used for wave filtering)
|
||||
// Whole thing modulated by material color/opacity.
|
||||
mul oD0, v5.yyyx, c26;
|
||||
|
||||
// Usual texture transform
|
||||
mov r11.zw, c13.zzzz;
|
||||
dp4 r11.x, v7, c14;
|
||||
dp4 r11.y, v7, c15;
|
||||
mov oT0, r11;
|
||||
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c18; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c13.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = illumination
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c22 = waterlevel + offset
|
||||
// c23 = (maxAtten - minAtten) / depthFalloff
|
||||
// c24 = minAtten.
|
||||
// And in particular:
|
||||
// c22.w = waterlevel
|
||||
// c23.w = 1.f;
|
||||
// c24.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c7, r6.xxxx;
|
||||
mad r0, c8, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c4;
|
||||
add r0, r0, c5;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c12.wwww;
|
||||
add r0, r0, c12.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c12.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c12.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c10.wwww, r2;
|
||||
mad r1, r4, c11.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c22, r6.zzzz;
|
||||
mul r4, r4, c23;
|
||||
add r4, r4, c24;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c13.zzzz;
|
||||
max r4.xyz, r4, c13.xxxx;
|
||||
//mov r4.xyz, c13.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c21;
|
||||
max r11, r11, c13.xxxx;
|
||||
min r11, r11, c13.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c6;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c13.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c22.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
|
||||
// Pos = (in.x + S, in.y + R, r6.z)
|
||||
// S = sum(k Dir.x A cos())
|
||||
// R = sum(k Dir.y A cos())
|
||||
// c30 = k Dir.x A
|
||||
// c31 = k Dir.y A
|
||||
// S = sum(cosDist * c30);
|
||||
dp4 r7.x, r1, c30;
|
||||
// R = sum(cosDist * c31);
|
||||
dp4 r7.y, r1, c31;
|
||||
|
||||
add r6.xy, r6.xy, r7.xy;
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c25.x, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c25.x;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c18; // HACKAGE
|
||||
//mov r6.w, c13.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c29.x;
|
||||
mul oFog, r10.x, c29.y;
|
||||
mov oPos, r9;
|
||||
|
||||
|
||||
// Output color is vertex green
|
||||
// Output alpha is vertex red (vtx alpha is used for wave filtering)
|
||||
// Whole thing modulated by material color/opacity.
|
||||
mul oD0, v5.yyyx, c26;
|
||||
|
||||
// Usual texture transform
|
||||
mov r11.zw, c13.zzzz;
|
||||
dp4 r11.x, v7, c14;
|
||||
dp4 r11.y, v7, c15;
|
||||
mov oT0, r11;
|
||||
|
||||
|
@ -1,209 +1,209 @@
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c18; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c13.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = illumination
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c22 = waterlevel + offset
|
||||
// c23 = (maxAtten - minAtten) / depthFalloff
|
||||
// c24 = minAtten.
|
||||
// And in particular:
|
||||
// c22.w = waterlevel
|
||||
// c23.w = 1.f;
|
||||
// c24.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c7, r6.xxxx;
|
||||
mad r0, c8, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c4;
|
||||
add r0, r0, c5;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c12.wwww;
|
||||
add r0, r0, c12.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c12.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c12.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c10.wwww, r2;
|
||||
mad r1, r4, c11.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c22, r6.zzzz;
|
||||
mul r4, r4, c23;
|
||||
add r4, r4, c24;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c13.zzzz;
|
||||
max r4.xyz, r4, c13.xxxx;
|
||||
//mov r4.xyz, c13.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c21;
|
||||
max r11, r11, c13.xxxx;
|
||||
min r11, r11, c13.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c6;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c13.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c22.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= kFreq.xyzw;
|
||||
mul r1, r1, c4;
|
||||
// cosDist *= kAmplitude.xyzw; // Combine?
|
||||
mul r1, r1, c6;
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
//
|
||||
// accumCos = (0, 0, 0, 0);
|
||||
mov r7, c13.xxxx;
|
||||
// temp = dp4( cosDist, toCenter_X );
|
||||
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
|
||||
dp4 r7.x, r1, -c7
|
||||
//
|
||||
// temp = dp4( cosDist, toCenter_Y );
|
||||
// accumCos.y += temp.xxxx;
|
||||
dp4 r7.y, r1, -c8
|
||||
//
|
||||
// }
|
||||
//
|
||||
// accumBin = (1, 0, -accumCos.x);
|
||||
// accumTan = (0, 1, -accumCos.y);
|
||||
// accumNorm = (accumCos.x, accumCos.y, 1);
|
||||
mov r11, c13.xxzx;
|
||||
add r11, r11, r7;
|
||||
dp3 r10.x, r11, r11;
|
||||
rsq r10.x, r10.x;
|
||||
mul r11, r11, r10.xxxx;
|
||||
|
||||
//
|
||||
// Add in our scrunch (offset in X/Y plane).
|
||||
// Scale down our scrunch amount by the wave scaling
|
||||
mul r10.x, c9.y, r4.z;
|
||||
mad r6.xy, r11.xy, r10.xx, r6.xy;
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c25.x, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c25.x;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c18; // HACKAGE
|
||||
//mov r6.w, c13.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c29.x;
|
||||
mul oFog, r10.x, c29.y;
|
||||
mov oPos, r9;
|
||||
|
||||
// Output color is vertex green
|
||||
// Output alpha is vertex red (vtx alpha is used for wave filtering)
|
||||
// Whole thing modulated by material color/opacity.
|
||||
mul oD0, v5.yyyx, c26;
|
||||
|
||||
// Usual texture transform
|
||||
mov r11.zw, c13.zzzz;
|
||||
dp4 r11.x, v7, c14;
|
||||
dp4 r11.y, v7, c15;
|
||||
mov oT0, r11;
|
||||
|
||||
dp4 r11.x, v7, c16;
|
||||
dp4 r11.y, v7, c17;
|
||||
mov oT1, r11;
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c18; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c13.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = illumination
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c22 = waterlevel + offset
|
||||
// c23 = (maxAtten - minAtten) / depthFalloff
|
||||
// c24 = minAtten.
|
||||
// And in particular:
|
||||
// c22.w = waterlevel
|
||||
// c23.w = 1.f;
|
||||
// c24.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c7, r6.xxxx;
|
||||
mad r0, c8, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c4;
|
||||
add r0, r0, c5;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c12.wwww;
|
||||
add r0, r0, c12.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c12.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c12.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c10.wwww, r2;
|
||||
mad r1, r4, c11.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c22, r6.zzzz;
|
||||
mul r4, r4, c23;
|
||||
add r4, r4, c24;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c13.zzzz;
|
||||
max r4.xyz, r4, c13.xxxx;
|
||||
//mov r4.xyz, c13.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c21;
|
||||
max r11, r11, c13.xxxx;
|
||||
min r11, r11, c13.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c6;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c13.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c22.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= kFreq.xyzw;
|
||||
mul r1, r1, c4;
|
||||
// cosDist *= kAmplitude.xyzw; // Combine?
|
||||
mul r1, r1, c6;
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
//
|
||||
// accumCos = (0, 0, 0, 0);
|
||||
mov r7, c13.xxxx;
|
||||
// temp = dp4( cosDist, toCenter_X );
|
||||
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
|
||||
dp4 r7.x, r1, -c7
|
||||
//
|
||||
// temp = dp4( cosDist, toCenter_Y );
|
||||
// accumCos.y += temp.xxxx;
|
||||
dp4 r7.y, r1, -c8
|
||||
//
|
||||
// }
|
||||
//
|
||||
// accumBin = (1, 0, -accumCos.x);
|
||||
// accumTan = (0, 1, -accumCos.y);
|
||||
// accumNorm = (accumCos.x, accumCos.y, 1);
|
||||
mov r11, c13.xxzx;
|
||||
add r11, r11, r7;
|
||||
dp3 r10.x, r11, r11;
|
||||
rsq r10.x, r10.x;
|
||||
mul r11, r11, r10.xxxx;
|
||||
|
||||
//
|
||||
// Add in our scrunch (offset in X/Y plane).
|
||||
// Scale down our scrunch amount by the wave scaling
|
||||
mul r10.x, c9.y, r4.z;
|
||||
mad r6.xy, r11.xy, r10.xx, r6.xy;
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c25.x, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c25.x;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c18; // HACKAGE
|
||||
//mov r6.w, c13.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c29.x;
|
||||
mul oFog, r10.x, c29.y;
|
||||
mov oPos, r9;
|
||||
|
||||
// Output color is vertex green
|
||||
// Output alpha is vertex red (vtx alpha is used for wave filtering)
|
||||
// Whole thing modulated by material color/opacity.
|
||||
mul oD0, v5.yyyx, c26;
|
||||
|
||||
// Usual texture transform
|
||||
mov r11.zw, c13.zzzz;
|
||||
dp4 r11.x, v7, c14;
|
||||
dp4 r11.y, v7, c15;
|
||||
mov oT0, r11;
|
||||
|
||||
dp4 r11.x, v7, c16;
|
||||
dp4 r11.y, v7, c17;
|
||||
mov oT1, r11;
|
||||
|
@ -1,191 +1,191 @@
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c18; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c13.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = illumination
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c22 = waterlevel + offset
|
||||
// c23 = (maxAtten - minAtten) / depthFalloff
|
||||
// c24 = minAtten.
|
||||
// And in particular:
|
||||
// c22.w = waterlevel
|
||||
// c23.w = 1.f;
|
||||
// c24.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c7, r6.xxxx;
|
||||
mad r0, c8, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c4;
|
||||
add r0, r0, c5;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c12.wwww;
|
||||
add r0, r0, c12.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c12.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c12.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c10.wwww, r2;
|
||||
mad r1, r4, c11.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c22, r6.zzzz;
|
||||
mul r4, r4, c23;
|
||||
add r4, r4, c24;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c13.zzzz;
|
||||
max r4.xyz, r4, c13.xxxx;
|
||||
//mov r4.xyz, c13.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c21;
|
||||
max r11, r11, c13.xxxx;
|
||||
min r11, r11, c13.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c6;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c13.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c22.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
|
||||
// Pos = (in.x + S, in.y + R, r6.z)
|
||||
// S = sum(k Dir.x A cos())
|
||||
// R = sum(k Dir.y A cos())
|
||||
// c30 = k Dir.x A
|
||||
// c31 = k Dir.y A
|
||||
// S = sum(cosDist * c30);
|
||||
dp4 r7.x, r1, c30;
|
||||
// R = sum(cosDist * c31);
|
||||
dp4 r7.y, r1, c31;
|
||||
|
||||
add r6.xy, r6.xy, r7.xy;
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c25.x, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c25.x;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c18; // HACKAGE
|
||||
//mov r6.w, c13.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c29.x;
|
||||
mul oFog, r10.x, c29.y;
|
||||
mov oPos, r9;
|
||||
|
||||
// Output color is vertex green
|
||||
// Output alpha is vertex red (vtx alpha is used for wave filtering)
|
||||
// Whole thing modulated by material color/opacity.
|
||||
mul oD0, v5.yyyx, c26;
|
||||
|
||||
// Usual texture transform
|
||||
mov r11.zw, c13.zzzz;
|
||||
dp4 r11.x, v7, c14;
|
||||
dp4 r11.y, v7, c15;
|
||||
mov oT0, r11;
|
||||
|
||||
dp4 r11.x, v7, c16;
|
||||
dp4 r11.y, v7, c17;
|
||||
mov oT1, r11;
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c18; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c13.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = illumination
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c22 = waterlevel + offset
|
||||
// c23 = (maxAtten - minAtten) / depthFalloff
|
||||
// c24 = minAtten.
|
||||
// And in particular:
|
||||
// c22.w = waterlevel
|
||||
// c23.w = 1.f;
|
||||
// c24.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c7, r6.xxxx;
|
||||
mad r0, c8, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c4;
|
||||
add r0, r0, c5;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c12.wwww;
|
||||
add r0, r0, c12.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c12.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c12.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c10.wwww, r2;
|
||||
mad r1, r4, c11.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c22, r6.zzzz;
|
||||
mul r4, r4, c23;
|
||||
add r4, r4, c24;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c13.zzzz;
|
||||
max r4.xyz, r4, c13.xxxx;
|
||||
//mov r4.xyz, c13.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c21;
|
||||
max r11, r11, c13.xxxx;
|
||||
min r11, r11, c13.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c6;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c13.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c22.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
|
||||
// Pos = (in.x + S, in.y + R, r6.z)
|
||||
// S = sum(k Dir.x A cos())
|
||||
// R = sum(k Dir.y A cos())
|
||||
// c30 = k Dir.x A
|
||||
// c31 = k Dir.y A
|
||||
// S = sum(cosDist * c30);
|
||||
dp4 r7.x, r1, c30;
|
||||
// R = sum(cosDist * c31);
|
||||
dp4 r7.y, r1, c31;
|
||||
|
||||
add r6.xy, r6.xy, r7.xy;
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c25.x, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c25.x;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c18; // HACKAGE
|
||||
//mov r6.w, c13.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c29.x;
|
||||
mul oFog, r10.x, c29.y;
|
||||
mov oPos, r9;
|
||||
|
||||
// Output color is vertex green
|
||||
// Output alpha is vertex red (vtx alpha is used for wave filtering)
|
||||
// Whole thing modulated by material color/opacity.
|
||||
mul oD0, v5.yyyx, c26;
|
||||
|
||||
// Usual texture transform
|
||||
mov r11.zw, c13.zzzz;
|
||||
dp4 r11.x, v7, c14;
|
||||
dp4 r11.y, v7, c15;
|
||||
mov oT0, r11;
|
||||
|
||||
dp4 r11.x, v7, c16;
|
||||
dp4 r11.y, v7, c17;
|
||||
mov oT1, r11;
|
||||
|
@ -1,210 +1,210 @@
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
dcl_texcoord1 v8
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c18; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c13.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = illumination
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c22 = waterlevel + offset
|
||||
// c23 = (maxAtten - minAtten) / depthFalloff
|
||||
// c24 = minAtten.
|
||||
// And in particular:
|
||||
// c22.w = waterlevel
|
||||
// c23.w = 1.f;
|
||||
// c24.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c7, r6.xxxx;
|
||||
mad r0, c8, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c4;
|
||||
add r0, r0, c5;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c12.wwww;
|
||||
add r0, r0, c12.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c12.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c12.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c10.wwww, r2;
|
||||
mad r1, r4, c11.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c22, r6.zzzz;
|
||||
mul r4, r4, c23;
|
||||
add r4, r4, c24;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c13.zzzz;
|
||||
max r4.xyz, r4, c13.xxxx;
|
||||
//mov r4.xyz, c13.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c21;
|
||||
max r11, r11, c13.xxxx;
|
||||
min r11, r11, c13.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c6;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c13.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c22.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= kFreq.xyzw;
|
||||
mul r1, r1, c4;
|
||||
// cosDist *= kAmplitude.xyzw; // Combine?
|
||||
mul r1, r1, c6;
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
//
|
||||
// accumCos = (0, 0, 0, 0);
|
||||
mov r7, c13.xxxx;
|
||||
// temp = dp4( cosDist, toCenter_X );
|
||||
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
|
||||
dp4 r7.x, r1, -c7
|
||||
//
|
||||
// temp = dp4( cosDist, toCenter_Y );
|
||||
// accumCos.y += temp.xxxx;
|
||||
dp4 r7.y, r1, -c8
|
||||
//
|
||||
// }
|
||||
//
|
||||
// accumBin = (1, 0, -accumCos.x);
|
||||
// accumTan = (0, 1, -accumCos.y);
|
||||
// accumNorm = (accumCos.x, accumCos.y, 1);
|
||||
mov r11, c13.xxzx;
|
||||
add r11, r11, r7;
|
||||
dp3 r10.x, r11, r11;
|
||||
rsq r10.x, r10.x;
|
||||
mul r11, r11, r10.xxxx;
|
||||
|
||||
//
|
||||
// Add in our scrunch (offset in X/Y plane).
|
||||
// Scale down our scrunch amount by the wave scaling
|
||||
mul r10.x, c9.y, r4.z;
|
||||
mad r6.xy, r11.xy, r10.xx, r6.xy;
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c25.x, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c25.x;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c18; // HACKAGE
|
||||
//mov r6.w, c13.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c29.x;
|
||||
mul oFog, r10.x, c29.y;
|
||||
mov oPos, r9;
|
||||
|
||||
// Output color is vertex green
|
||||
// Output alpha is vertex red (vtx alpha is used for wave filtering)
|
||||
// Whole thing modulated by material color/opacity.
|
||||
mul oD0, v5.yyyx, c26;
|
||||
|
||||
// Usual texture transform
|
||||
mov r11.zw, c13.zzzz;
|
||||
dp4 r11.x, v7, c14;
|
||||
dp4 r11.y, v7, c15;
|
||||
mov oT0, r11;
|
||||
|
||||
dp4 r11.x, v8, c16;
|
||||
dp4 r11.y, v8, c17;
|
||||
mov oT1, r11;
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
dcl_texcoord1 v8
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c18; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c13.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = illumination
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c22 = waterlevel + offset
|
||||
// c23 = (maxAtten - minAtten) / depthFalloff
|
||||
// c24 = minAtten.
|
||||
// And in particular:
|
||||
// c22.w = waterlevel
|
||||
// c23.w = 1.f;
|
||||
// c24.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c7, r6.xxxx;
|
||||
mad r0, c8, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c4;
|
||||
add r0, r0, c5;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c12.wwww;
|
||||
add r0, r0, c12.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c12.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c12.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c10.wwww, r2;
|
||||
mad r1, r4, c11.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c22, r6.zzzz;
|
||||
mul r4, r4, c23;
|
||||
add r4, r4, c24;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c13.zzzz;
|
||||
max r4.xyz, r4, c13.xxxx;
|
||||
//mov r4.xyz, c13.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c21;
|
||||
max r11, r11, c13.xxxx;
|
||||
min r11, r11, c13.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c6;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c13.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c22.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= kFreq.xyzw;
|
||||
mul r1, r1, c4;
|
||||
// cosDist *= kAmplitude.xyzw; // Combine?
|
||||
mul r1, r1, c6;
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
//
|
||||
// accumCos = (0, 0, 0, 0);
|
||||
mov r7, c13.xxxx;
|
||||
// temp = dp4( cosDist, toCenter_X );
|
||||
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
|
||||
dp4 r7.x, r1, -c7
|
||||
//
|
||||
// temp = dp4( cosDist, toCenter_Y );
|
||||
// accumCos.y += temp.xxxx;
|
||||
dp4 r7.y, r1, -c8
|
||||
//
|
||||
// }
|
||||
//
|
||||
// accumBin = (1, 0, -accumCos.x);
|
||||
// accumTan = (0, 1, -accumCos.y);
|
||||
// accumNorm = (accumCos.x, accumCos.y, 1);
|
||||
mov r11, c13.xxzx;
|
||||
add r11, r11, r7;
|
||||
dp3 r10.x, r11, r11;
|
||||
rsq r10.x, r10.x;
|
||||
mul r11, r11, r10.xxxx;
|
||||
|
||||
//
|
||||
// Add in our scrunch (offset in X/Y plane).
|
||||
// Scale down our scrunch amount by the wave scaling
|
||||
mul r10.x, c9.y, r4.z;
|
||||
mad r6.xy, r11.xy, r10.xx, r6.xy;
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c25.x, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c25.x;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c18; // HACKAGE
|
||||
//mov r6.w, c13.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c29.x;
|
||||
mul oFog, r10.x, c29.y;
|
||||
mov oPos, r9;
|
||||
|
||||
// Output color is vertex green
|
||||
// Output alpha is vertex red (vtx alpha is used for wave filtering)
|
||||
// Whole thing modulated by material color/opacity.
|
||||
mul oD0, v5.yyyx, c26;
|
||||
|
||||
// Usual texture transform
|
||||
mov r11.zw, c13.zzzz;
|
||||
dp4 r11.x, v7, c14;
|
||||
dp4 r11.y, v7, c15;
|
||||
mov oT0, r11;
|
||||
|
||||
dp4 r11.x, v8, c16;
|
||||
dp4 r11.y, v8, c17;
|
||||
mov oT1, r11;
|
||||
|
@ -1,192 +1,192 @@
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
dcl_texcoord1 v8
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c18; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c13.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = illumination
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c22 = waterlevel + offset
|
||||
// c23 = (maxAtten - minAtten) / depthFalloff
|
||||
// c24 = minAtten.
|
||||
// And in particular:
|
||||
// c22.w = waterlevel
|
||||
// c23.w = 1.f;
|
||||
// c24.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c7, r6.xxxx;
|
||||
mad r0, c8, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c4;
|
||||
add r0, r0, c5;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c12.wwww;
|
||||
add r0, r0, c12.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c12.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c12.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c10.wwww, r2;
|
||||
mad r1, r4, c11.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c22, r6.zzzz;
|
||||
mul r4, r4, c23;
|
||||
add r4, r4, c24;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c13.zzzz;
|
||||
max r4.xyz, r4, c13.xxxx;
|
||||
//mov r4.xyz, c13.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c21;
|
||||
max r11, r11, c13.xxxx;
|
||||
min r11, r11, c13.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c6;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c13.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c22.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
|
||||
// Pos = (in.x + S, in.y + R, r6.z)
|
||||
// S = sum(k Dir.x A cos())
|
||||
// R = sum(k Dir.y A cos())
|
||||
// c30 = k Dir.x A
|
||||
// c31 = k Dir.y A
|
||||
// S = sum(cosDist * c30);
|
||||
dp4 r7.x, r1, c30;
|
||||
// R = sum(cosDist * c31);
|
||||
dp4 r7.y, r1, c31;
|
||||
|
||||
add r6.xy, r6.xy, r7.xy;
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c25.x, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c25.x;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c18; // HACKAGE
|
||||
//mov r6.w, c13.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c29.x;
|
||||
mul oFog, r10.x, c29.y;
|
||||
mov oPos, r9;
|
||||
|
||||
// Output color is vertex green
|
||||
// Output alpha is vertex red (vtx alpha is used for wave filtering)
|
||||
// Whole thing modulated by material color/opacity.
|
||||
mul oD0, v5.yyyx, c26;
|
||||
|
||||
// Usual texture transform
|
||||
mov r11.zw, c13.zzzz;
|
||||
dp4 r11.x, v7, c14;
|
||||
dp4 r11.y, v7, c15;
|
||||
mov oT0, r11;
|
||||
|
||||
dp4 r11.x, v8, c16;
|
||||
dp4 r11.y, v8, c17;
|
||||
mov oT1, r11;
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
dcl_texcoord1 v8
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c18; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c13.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = illumination
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c22 = waterlevel + offset
|
||||
// c23 = (maxAtten - minAtten) / depthFalloff
|
||||
// c24 = minAtten.
|
||||
// And in particular:
|
||||
// c22.w = waterlevel
|
||||
// c23.w = 1.f;
|
||||
// c24.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c7, r6.xxxx;
|
||||
mad r0, c8, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c4;
|
||||
add r0, r0, c5;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c12.wwww;
|
||||
add r0, r0, c12.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c12.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c12.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c10.wwww, r2;
|
||||
mad r1, r4, c11.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c22, r6.zzzz;
|
||||
mul r4, r4, c23;
|
||||
add r4, r4, c24;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c13.zzzz;
|
||||
max r4.xyz, r4, c13.xxxx;
|
||||
//mov r4.xyz, c13.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c21;
|
||||
max r11, r11, c13.xxxx;
|
||||
min r11, r11, c13.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c6;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c13.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c22.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
|
||||
// Pos = (in.x + S, in.y + R, r6.z)
|
||||
// S = sum(k Dir.x A cos())
|
||||
// R = sum(k Dir.y A cos())
|
||||
// c30 = k Dir.x A
|
||||
// c31 = k Dir.y A
|
||||
// S = sum(cosDist * c30);
|
||||
dp4 r7.x, r1, c30;
|
||||
// R = sum(cosDist * c31);
|
||||
dp4 r7.y, r1, c31;
|
||||
|
||||
add r6.xy, r6.xy, r7.xy;
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c25.x, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c25.x;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c18; // HACKAGE
|
||||
//mov r6.w, c13.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c29.x;
|
||||
mul oFog, r10.x, c29.y;
|
||||
mov oPos, r9;
|
||||
|
||||
// Output color is vertex green
|
||||
// Output alpha is vertex red (vtx alpha is used for wave filtering)
|
||||
// Whole thing modulated by material color/opacity.
|
||||
mul oD0, v5.yyyx, c26;
|
||||
|
||||
// Usual texture transform
|
||||
mov r11.zw, c13.zzzz;
|
||||
dp4 r11.x, v7, c14;
|
||||
dp4 r11.y, v7, c15;
|
||||
mov oT0, r11;
|
||||
|
||||
dp4 r11.x, v8, c16;
|
||||
dp4 r11.y, v8, c17;
|
||||
mov oT1, r11;
|
||||
|
@ -1,298 +1,298 @@
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
dcl_texcoord1 v8
|
||||
dcl_texcoord2 v9
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c18; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c13.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = illumination
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c22 = waterlevel + offset
|
||||
// c23 = (maxAtten - minAtten) / depthFalloff
|
||||
// c24 = minAtten.
|
||||
// And in particular:
|
||||
// c22.w = waterlevel
|
||||
// c23.w = 1.f;
|
||||
// c24.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c7, r6.xxxx;
|
||||
mad r0, c8, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c4;
|
||||
add r0, r0, c5;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c12.wwww;
|
||||
add r0, r0, c12.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c12.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c12.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c10.wwww, r2;
|
||||
mad r1, r4, c11.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c22, r6.zzzz;
|
||||
mul r4, r4, c23;
|
||||
add r4, r4, c24;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c13.zzzz;
|
||||
max r4.xyz, r4, c13.xxxx;
|
||||
//mov r4.xyz, c13.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c21;
|
||||
max r11, r11, c13.xxxx;
|
||||
min r11, r11, c13.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c6;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c13.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c22.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= kFreq.xyzw;
|
||||
mul r1, r1, c4;
|
||||
// cosDist *= kAmplitude.xyzw; // Combine?
|
||||
mul r1, r1, c6;
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
//
|
||||
// accumCos = (0, 0, 0, 0);
|
||||
mov r7, c13.xxxz;
|
||||
// temp = dp4( cosDist, toCenter_X );
|
||||
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
|
||||
dp4 r7.x, r1, -c7
|
||||
//
|
||||
// temp = dp4( cosDist, toCenter_Y );
|
||||
// accumCos.y += temp.xxxx;
|
||||
dp4 r7.y, r1, -c8
|
||||
//
|
||||
// }
|
||||
//
|
||||
// accumBin = (1, 0, -accumCos.x);
|
||||
// accumTan = (0, 1, -accumCos.y);
|
||||
// accumNorm = (accumCos.x, accumCos.y, 1);
|
||||
mov r11, c13.xxzx;
|
||||
add r11, r11, r7.xyzz;
|
||||
dp3 r10.x, r11, r11;
|
||||
rsq r10.x, r10.x;
|
||||
mul r11, r11, r10.xxxx;
|
||||
|
||||
//
|
||||
// Add in our scrunch (offset in X/Y plane).
|
||||
// Scale down our scrunch amount by the wave scaling
|
||||
mul r10.x, c9.y, r4.z;
|
||||
mad r6.xy, r11.xy, r10.xx, r6.xy;
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c25.x, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c25.x;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c29.x;
|
||||
mul oFog, r10.x, c29.y;
|
||||
//mov oFog.x, c13.y;
|
||||
mov oPos, r9;
|
||||
|
||||
// Calculate our normal scrunch and apply to our cosines.
|
||||
mul r2.x, r6.z, c9.x;
|
||||
add r2.x, r2.x, c13.z;
|
||||
mul r2.x, r2.x, r4.z;
|
||||
mul r7.xy, r7.xy, r2.xx;
|
||||
|
||||
// Now onto texture coordinate generation.
|
||||
//
|
||||
// First is the usual texture transform
|
||||
mov r11.zw, c13.zzzz;
|
||||
dp4 r11.x, v7, c14;
|
||||
dp4 r11.y, v7, c15;
|
||||
mov oT0, r11;
|
||||
|
||||
// Calculate our basis vectors as input into our tex3x3vspec
|
||||
// This would be like:
|
||||
//add r1, c13.zxxx, r7.zzxz;
|
||||
//add r2, c13.xzxx, r7.zzyz;
|
||||
//sub r3, c13.xxzz, r7.xyzz;
|
||||
// BUT =>
|
||||
// Now r1-r3 are surface2world, but we still need to fold
|
||||
// in texture2surface. That's imbedded in our uv's v8,v9, plus
|
||||
// the normal we just computed into r11.
|
||||
// So the full matrix multiply surface2world * texture2surface would be:
|
||||
// | r1.v8 r1.v9 r1.(0,0,1) |
|
||||
// | r2.v8 r2.v9 r2.(0,0,1) |
|
||||
// | r3.v8 r3.v9 r3.(0,0,1) |
|
||||
// But we notice that
|
||||
// r1 = (1, 0, r7.x)
|
||||
// r2 = (0, 1, r7.y)
|
||||
// r3 = (-r7.x, -r7.y, 1)
|
||||
// and also:
|
||||
// r7.z == v8.z == v9.z == 0
|
||||
// and r7.w == 1.0
|
||||
//
|
||||
// Considering the zeros, and doing the matrix multiply by hand, we get
|
||||
// the final matrix of
|
||||
// | v8.x v9.x r7.x |
|
||||
// | v8.y v9.y r7.y |
|
||||
// | -dp3(r7,v8) -dp3(r7,v9) 1 |
|
||||
// So we wind up not needing r1-r3 at all
|
||||
add r1, v8.xzzz, r7.zzxw;
|
||||
mov r1.y, v9.x;
|
||||
|
||||
add r2, v8.yzzz, r7.zzxw;
|
||||
mov r2.y, v9.y;
|
||||
|
||||
dp3 r3.x, -r7, v8;
|
||||
dp3 r3.y, -r7, v9;
|
||||
mov r3.zw, r7.ww;
|
||||
|
||||
// Following section is debug only to skip the per-vert tangent space axes.
|
||||
//add r1, c13.zxxx, r7.zzxw;
|
||||
//add r2, c13.xzxx, r7.zzyw;
|
||||
//
|
||||
//mov r3.x, -r7.x;
|
||||
//mov r3.y, -r7.y;
|
||||
//mov r3.zw, c13.zz;
|
||||
|
||||
// See vs_WaveFixedFin6.inl for derivation of the following
|
||||
sub r0, r6, c27; // c27 is camera position.
|
||||
dp3 r10.x, r0, r0;
|
||||
rsq r10.x, r10.x;
|
||||
mul r0, r0, r10.xxxx;
|
||||
|
||||
dp3 r10.x, r0, c28; // c28 is kEnvAdjust
|
||||
mad r10.y, r10.x, r10.x, -c28.w;
|
||||
|
||||
rsq r9.x, r10.y;
|
||||
|
||||
mad r10.z, r10.y, r9.x, r10.x;
|
||||
|
||||
mad r0.xyz, r0, r10.zzz, -c28.xyz;
|
||||
|
||||
mov r1.w, -r0.x;
|
||||
mov r2.w, -r0.y;
|
||||
mov r3.w, -r0.z;
|
||||
|
||||
// Now r1-r3 are texture2world, with the eye-ray vector in .w. We just
|
||||
// need to normalize them and bung them into output UV's 1-3.
|
||||
// Note we're accounting for our environment map being flipped from
|
||||
// D3D (and all rational thought) by putting r2 into UV3 and r3 into UV2.
|
||||
mov r10.w, c13.z;
|
||||
dp3 r10.x, r1, r1;
|
||||
rsq r10.x, r10.x;
|
||||
mul oT1, r1, r10.xxxw;
|
||||
|
||||
dp3 r10.x, r3, r3;
|
||||
rsq r10.x, r10.x;
|
||||
mul oT2, r3, r10.xxxw;
|
||||
//mul oT3, r3, r10.xxxw; // YZHACK
|
||||
|
||||
dp3 r10.x, r2, r2;
|
||||
rsq r10.x, r10.x;
|
||||
mul oT3, r2, r10.xxxw;
|
||||
//mul oT2, r2, r10.xxxw;
|
||||
|
||||
// Output color is vertex green
|
||||
// Output alpha is vertex red (vtx alpha is used for wave filtering)
|
||||
// Whole thing modulated by material color/opacity.
|
||||
mul oD0, v5.yyyx, c26;
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
dcl_texcoord1 v8
|
||||
dcl_texcoord2 v9
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c18; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c13.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = illumination
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c22 = waterlevel + offset
|
||||
// c23 = (maxAtten - minAtten) / depthFalloff
|
||||
// c24 = minAtten.
|
||||
// And in particular:
|
||||
// c22.w = waterlevel
|
||||
// c23.w = 1.f;
|
||||
// c24.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c7, r6.xxxx;
|
||||
mad r0, c8, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c4;
|
||||
add r0, r0, c5;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c12.wwww;
|
||||
add r0, r0, c12.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c12.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c12.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c10.wwww, r2;
|
||||
mad r1, r4, c11.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c22, r6.zzzz;
|
||||
mul r4, r4, c23;
|
||||
add r4, r4, c24;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c13.zzzz;
|
||||
max r4.xyz, r4, c13.xxxx;
|
||||
//mov r4.xyz, c13.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c21;
|
||||
max r11, r11, c13.xxxx;
|
||||
min r11, r11, c13.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c6;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c13.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c22.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= kFreq.xyzw;
|
||||
mul r1, r1, c4;
|
||||
// cosDist *= kAmplitude.xyzw; // Combine?
|
||||
mul r1, r1, c6;
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
//
|
||||
// accumCos = (0, 0, 0, 0);
|
||||
mov r7, c13.xxxz;
|
||||
// temp = dp4( cosDist, toCenter_X );
|
||||
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
|
||||
dp4 r7.x, r1, -c7
|
||||
//
|
||||
// temp = dp4( cosDist, toCenter_Y );
|
||||
// accumCos.y += temp.xxxx;
|
||||
dp4 r7.y, r1, -c8
|
||||
//
|
||||
// }
|
||||
//
|
||||
// accumBin = (1, 0, -accumCos.x);
|
||||
// accumTan = (0, 1, -accumCos.y);
|
||||
// accumNorm = (accumCos.x, accumCos.y, 1);
|
||||
mov r11, c13.xxzx;
|
||||
add r11, r11, r7.xyzz;
|
||||
dp3 r10.x, r11, r11;
|
||||
rsq r10.x, r10.x;
|
||||
mul r11, r11, r10.xxxx;
|
||||
|
||||
//
|
||||
// Add in our scrunch (offset in X/Y plane).
|
||||
// Scale down our scrunch amount by the wave scaling
|
||||
mul r10.x, c9.y, r4.z;
|
||||
mad r6.xy, r11.xy, r10.xx, r6.xy;
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c25.x, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c25.x;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c29.x;
|
||||
mul oFog, r10.x, c29.y;
|
||||
//mov oFog.x, c13.y;
|
||||
mov oPos, r9;
|
||||
|
||||
// Calculate our normal scrunch and apply to our cosines.
|
||||
mul r2.x, r6.z, c9.x;
|
||||
add r2.x, r2.x, c13.z;
|
||||
mul r2.x, r2.x, r4.z;
|
||||
mul r7.xy, r7.xy, r2.xx;
|
||||
|
||||
// Now onto texture coordinate generation.
|
||||
//
|
||||
// First is the usual texture transform
|
||||
mov r11.zw, c13.zzzz;
|
||||
dp4 r11.x, v7, c14;
|
||||
dp4 r11.y, v7, c15;
|
||||
mov oT0, r11;
|
||||
|
||||
// Calculate our basis vectors as input into our tex3x3vspec
|
||||
// This would be like:
|
||||
//add r1, c13.zxxx, r7.zzxz;
|
||||
//add r2, c13.xzxx, r7.zzyz;
|
||||
//sub r3, c13.xxzz, r7.xyzz;
|
||||
// BUT =>
|
||||
// Now r1-r3 are surface2world, but we still need to fold
|
||||
// in texture2surface. That's imbedded in our uv's v8,v9, plus
|
||||
// the normal we just computed into r11.
|
||||
// So the full matrix multiply surface2world * texture2surface would be:
|
||||
// | r1.v8 r1.v9 r1.(0,0,1) |
|
||||
// | r2.v8 r2.v9 r2.(0,0,1) |
|
||||
// | r3.v8 r3.v9 r3.(0,0,1) |
|
||||
// But we notice that
|
||||
// r1 = (1, 0, r7.x)
|
||||
// r2 = (0, 1, r7.y)
|
||||
// r3 = (-r7.x, -r7.y, 1)
|
||||
// and also:
|
||||
// r7.z == v8.z == v9.z == 0
|
||||
// and r7.w == 1.0
|
||||
//
|
||||
// Considering the zeros, and doing the matrix multiply by hand, we get
|
||||
// the final matrix of
|
||||
// | v8.x v9.x r7.x |
|
||||
// | v8.y v9.y r7.y |
|
||||
// | -dp3(r7,v8) -dp3(r7,v9) 1 |
|
||||
// So we wind up not needing r1-r3 at all
|
||||
add r1, v8.xzzz, r7.zzxw;
|
||||
mov r1.y, v9.x;
|
||||
|
||||
add r2, v8.yzzz, r7.zzxw;
|
||||
mov r2.y, v9.y;
|
||||
|
||||
dp3 r3.x, -r7, v8;
|
||||
dp3 r3.y, -r7, v9;
|
||||
mov r3.zw, r7.ww;
|
||||
|
||||
// Following section is debug only to skip the per-vert tangent space axes.
|
||||
//add r1, c13.zxxx, r7.zzxw;
|
||||
//add r2, c13.xzxx, r7.zzyw;
|
||||
//
|
||||
//mov r3.x, -r7.x;
|
||||
//mov r3.y, -r7.y;
|
||||
//mov r3.zw, c13.zz;
|
||||
|
||||
// See vs_WaveFixedFin6.inl for derivation of the following
|
||||
sub r0, r6, c27; // c27 is camera position.
|
||||
dp3 r10.x, r0, r0;
|
||||
rsq r10.x, r10.x;
|
||||
mul r0, r0, r10.xxxx;
|
||||
|
||||
dp3 r10.x, r0, c28; // c28 is kEnvAdjust
|
||||
mad r10.y, r10.x, r10.x, -c28.w;
|
||||
|
||||
rsq r9.x, r10.y;
|
||||
|
||||
mad r10.z, r10.y, r9.x, r10.x;
|
||||
|
||||
mad r0.xyz, r0, r10.zzz, -c28.xyz;
|
||||
|
||||
mov r1.w, -r0.x;
|
||||
mov r2.w, -r0.y;
|
||||
mov r3.w, -r0.z;
|
||||
|
||||
// Now r1-r3 are texture2world, with the eye-ray vector in .w. We just
|
||||
// need to normalize them and bung them into output UV's 1-3.
|
||||
// Note we're accounting for our environment map being flipped from
|
||||
// D3D (and all rational thought) by putting r2 into UV3 and r3 into UV2.
|
||||
mov r10.w, c13.z;
|
||||
dp3 r10.x, r1, r1;
|
||||
rsq r10.x, r10.x;
|
||||
mul oT1, r1, r10.xxxw;
|
||||
|
||||
dp3 r10.x, r3, r3;
|
||||
rsq r10.x, r10.x;
|
||||
mul oT2, r3, r10.xxxw;
|
||||
//mul oT3, r3, r10.xxxw; // YZHACK
|
||||
|
||||
dp3 r10.x, r2, r2;
|
||||
rsq r10.x, r10.x;
|
||||
mul oT3, r2, r10.xxxw;
|
||||
//mul oT2, r2, r10.xxxw;
|
||||
|
||||
// Output color is vertex green
|
||||
// Output alpha is vertex red (vtx alpha is used for wave filtering)
|
||||
// Whole thing modulated by material color/opacity.
|
||||
mul oD0, v5.yyyx, c26;
|
||||
|
||||
|
@ -1,331 +1,331 @@
|
||||
|
||||
|
||||
vs.1.0
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
dcl_texcoord1 v8
|
||||
dcl_texcoord2 v9
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c18; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c13.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = illumination
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c22 = waterlevel + offset
|
||||
// c23 = (maxAtten - minAtten) / depthFalloff
|
||||
// c24 = minAtten.
|
||||
// And in particular:
|
||||
// c22.w = waterlevel
|
||||
// c23.w = 1.f;
|
||||
// c24.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c7, r6.xxxx;
|
||||
mad r0, c8, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c4;
|
||||
add r0, r0, c5;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c12.wwww;
|
||||
add r0, r0, c12.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c12.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c12.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c10.wwww, r2;
|
||||
mad r1, r4, c11.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c22, r6.zzzz;
|
||||
mul r4, r4, c23;
|
||||
add r4, r4, c24;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c13.zzzz;
|
||||
max r4.xyz, r4, c13.xxxx;
|
||||
//mov r4.xyz, c13.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c21;
|
||||
max r11, r11, c13.xxxx;
|
||||
min r11, r11, c13.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c6;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c13.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c22.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
// Pos = (in.x + S, in.y + R, r6.z)
|
||||
// S = sum(k Dir.x A cos())
|
||||
// R = sum(k Dir.y A cos())
|
||||
// c30 = k Dir.x A
|
||||
// c31 = k Dir.y A
|
||||
// S = sum(cosDist * c30);
|
||||
dp4 r7.x, r1, c30;
|
||||
// R = sum(cosDist * c31);
|
||||
dp4 r7.y, r1, c31;
|
||||
|
||||
add r6.xy, r6.xy, r7.xy;
|
||||
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c25.x, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c25.x;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c29.x;
|
||||
mul oFog, r10.x, c29.y;
|
||||
//mov oFog, c13.y;
|
||||
mov oPos, r9;
|
||||
|
||||
// Now onto texture coordinate generation.
|
||||
//
|
||||
// First is the usual texture transform
|
||||
mov r11.zw, c13.zzzz;
|
||||
dp4 r11.x, v7, c14;
|
||||
dp4 r11.y, v7, c15;
|
||||
mov oT0, r11;
|
||||
|
||||
// Calculate our basis vectors as input into our tex3x3vspec
|
||||
// First we get our basis set off our surface. This is
|
||||
// Okay, here we go:
|
||||
// W == sum(k w Dir.x^2 A sin()) x
|
||||
// V == sum(k w Dir.x Dir.y A sin()) x
|
||||
// U == sum(k w Dir.y^2 A sin()) x
|
||||
//
|
||||
// T == sum(A sin())
|
||||
//
|
||||
// S == sum(k Dir.x A cos())
|
||||
// R == sum(k Dir.y A cos())
|
||||
//
|
||||
// Q == sum(k w A cos()) x
|
||||
//
|
||||
// M == sum(A cos())
|
||||
//
|
||||
// P == sum(w Dir.x A cos()) x
|
||||
// N == sum(w Dir.y A cos()) x
|
||||
//
|
||||
// Then:
|
||||
// Pos = (in.x + S, in.y + R, waterheight + T) // Already done above.
|
||||
//
|
||||
// Bin = (1 - W, -V, P)
|
||||
// Tan = (-V, 1 - U, N)
|
||||
// Nor = (-P, -N, 1 - Q)
|
||||
//
|
||||
// The matrix
|
||||
// |Bx, Tx, Nx|
|
||||
// |By, Ty, Ny|
|
||||
// |Bz, Tz, Nz|
|
||||
// is surface2world, but we still need to fold in
|
||||
// texture2surface. We'll go with the generalized
|
||||
// (not assuming a flat surface) partials of dPos/dU and dPos/dV
|
||||
// as coming in as uv coords v8 and v9.
|
||||
// Then, if r5 = v8 X v9, then texture to surface is
|
||||
// |v8.x, v9.x, r5.x|
|
||||
// |v8.y, v9.y, r5.y|
|
||||
// |v8.z, v9.z, r5.z|
|
||||
//
|
||||
// So, let's say we calc 3 vectors,
|
||||
// r7 = (Bx, Tx, Nx)
|
||||
// r8 = (By, Ty, Ny)
|
||||
// r9 = (Bz, Tz, Nz)
|
||||
//
|
||||
// Then surface2world * texture2surface =
|
||||
// |r7 dot v8, r7 dot v9, r7 dot r5|
|
||||
// |r8 dot v8, r8 dot v9, r8 dot r5|
|
||||
// |r9 dot v8, r9 dot v9, r9 dot r5|
|
||||
//
|
||||
// We will need r5 as v8 X v9
|
||||
mov r7, v8;
|
||||
mul r5.xyz, r7.yzx, v9.zxy;
|
||||
mad r5.xyz, r7.zxy, -v9.yzx, r5.xyz;
|
||||
|
||||
// Okay, r1 currently has the vector of cosines, and r2 has vector of sines.
|
||||
// Everything will want that times amplitude, so go ahead and fold that in.
|
||||
mul r1, r1, c6; // r1 = A cos() = M
|
||||
// Sines already have amplitude folded in, so r2 = A sin() = T.
|
||||
// Now just compute r7-9 one element at a time.
|
||||
dp4 r7.x, r2, -c35; // r7.x = -W
|
||||
dp4 r7.y, r2, -c36; // r7.y = -V
|
||||
dp4 r7.z, r1, -c32; // r7.z = -P
|
||||
add r7.x, r7.x, c13.z; // r7.x = 1 - W;
|
||||
|
||||
dp4 r8.x, r2, -c36; // r8.x = -V
|
||||
dp4 r8.y, r2, -c37; // r8.y = -U
|
||||
dp4 r8.z, r1, -c33; // r8.z = -N
|
||||
add r8.y, r8.y, c13.z; // r8.y = 1 - U
|
||||
|
||||
dp4 r9.z, r2, -c34; // r9.z = -Q
|
||||
mov r9.x, -r7.z; // r9.x = P = -r7.z
|
||||
mov r9.y, -r8.z; // r9.y = N = -r8.z
|
||||
add r9.z, r9.z, c13.z; // r9.z = 1 - Q
|
||||
|
||||
// Okay, got everything we need, construct r1-3 as surface2world*texture2surface.
|
||||
dp3 r1.x, r7, v8;
|
||||
dp3 r1.y, r7, v9;
|
||||
dp3 r1.z, r7, r5;
|
||||
|
||||
dp3 r2.x, r8, v8;
|
||||
dp3 r2.y, r8, v9;
|
||||
dp3 r2.z, r8, r5;
|
||||
|
||||
dp3 r3.x, r9, v8;
|
||||
dp3 r3.y, r9, v9;
|
||||
dp3 r3.z, r9, r5;
|
||||
|
||||
// Following section is debug only to skip the per-vert tangent space axes.
|
||||
//add r1, c13.zxxx, r7.zzxw;
|
||||
//add r2, c13.xzxx, r7.zzyw;
|
||||
//
|
||||
//mov r3.x, -r7.x;
|
||||
//mov r3.y, -r7.y;
|
||||
//mov r3.zw, c13.zz;
|
||||
|
||||
// See vs_WaveFixedFin6.inl for derivation of the following
|
||||
sub r0, r6, c27; // c27 is camera position.
|
||||
dp3 r10.x, r0, r0;
|
||||
rsq r10.x, r10.x;
|
||||
mul r0, r0, r10.xxxx;
|
||||
|
||||
dp3 r10.x, r0, c28; // c28 is kEnvAdjust
|
||||
mad r10.y, r10.x, r10.x, -c28.w;
|
||||
|
||||
rsq r9.x, r10.y;
|
||||
|
||||
mad r10.z, r10.y, r9.x, r10.x;
|
||||
|
||||
mad r0.xyz, r0, r10.zzz, -c28.xyz;
|
||||
|
||||
// ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump.
|
||||
dp3 r10.x, r0, r0;
|
||||
rsq r9.x, r10.x;
|
||||
mul r0.xyz, r0.xyz, r9.xxx;
|
||||
|
||||
mov r1.w, -r0.x;
|
||||
mov r2.w, -r0.y;
|
||||
mov r3.w, -r0.z;
|
||||
|
||||
|
||||
// Now r1-r3 are texture2world, with the eye-ray vector in .w. We just
|
||||
// need to normalize them and bung them into output UV's 1-3.
|
||||
// Note we're accounting for our environment map being flipped from
|
||||
// D3D (and all rational thought) by putting r2 into UV3 and r3 into UV2.
|
||||
mov r10.w, c13.z;
|
||||
dp3 r10.x, r1, r1;
|
||||
rsq r10.x, r10.x;
|
||||
mul oT1, r1, r10.xxxw;
|
||||
|
||||
dp3 r10.x, r3, r3;
|
||||
rsq r10.x, r10.x;
|
||||
mul oT2, r3, r10.xxxw;
|
||||
//mul oT3, r3, r10.xxxw; // YZHACK
|
||||
|
||||
dp3 r10.x, r2, r2;
|
||||
rsq r10.x, r10.x;
|
||||
mul oT3, r2, r10.xxxw;
|
||||
//mul oT2, r2, r10.xxxw;
|
||||
|
||||
// Output color is vertex green
|
||||
// Output alpha is vertex red (vtx alpha is used for wave filtering)
|
||||
// Whole thing modulated by material color/opacity.
|
||||
mul oD0, v5.yyyx, c26;
|
||||
|
||||
|
||||
|
||||
vs.1.0
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
dcl_texcoord1 v8
|
||||
dcl_texcoord2 v9
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c18; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c13.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = illumination
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c22 = waterlevel + offset
|
||||
// c23 = (maxAtten - minAtten) / depthFalloff
|
||||
// c24 = minAtten.
|
||||
// And in particular:
|
||||
// c22.w = waterlevel
|
||||
// c23.w = 1.f;
|
||||
// c24.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c7, r6.xxxx;
|
||||
mad r0, c8, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c4;
|
||||
add r0, r0, c5;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c12.wwww;
|
||||
add r0, r0, c12.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c12.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c12.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c11.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c10.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c11.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c10.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c11.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c10.wwww, r2;
|
||||
mad r1, r4, c11.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c22, r6.zzzz;
|
||||
mul r4, r4, c23;
|
||||
add r4, r4, c24;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c13.zzzz;
|
||||
max r4.xyz, r4, c13.xxxx;
|
||||
//mov r4.xyz, c13.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c21;
|
||||
max r11, r11, c13.xxxx;
|
||||
min r11, r11, c13.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c6;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c13.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c22.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
// Pos = (in.x + S, in.y + R, r6.z)
|
||||
// S = sum(k Dir.x A cos())
|
||||
// R = sum(k Dir.y A cos())
|
||||
// c30 = k Dir.x A
|
||||
// c31 = k Dir.y A
|
||||
// S = sum(cosDist * c30);
|
||||
dp4 r7.x, r1, c30;
|
||||
// R = sum(cosDist * c31);
|
||||
dp4 r7.y, r1, c31;
|
||||
|
||||
add r6.xy, r6.xy, r7.xy;
|
||||
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c25.x, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c25.x;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c29.x;
|
||||
mul oFog, r10.x, c29.y;
|
||||
//mov oFog, c13.y;
|
||||
mov oPos, r9;
|
||||
|
||||
// Now onto texture coordinate generation.
|
||||
//
|
||||
// First is the usual texture transform
|
||||
mov r11.zw, c13.zzzz;
|
||||
dp4 r11.x, v7, c14;
|
||||
dp4 r11.y, v7, c15;
|
||||
mov oT0, r11;
|
||||
|
||||
// Calculate our basis vectors as input into our tex3x3vspec
|
||||
// First we get our basis set off our surface. This is
|
||||
// Okay, here we go:
|
||||
// W == sum(k w Dir.x^2 A sin()) x
|
||||
// V == sum(k w Dir.x Dir.y A sin()) x
|
||||
// U == sum(k w Dir.y^2 A sin()) x
|
||||
//
|
||||
// T == sum(A sin())
|
||||
//
|
||||
// S == sum(k Dir.x A cos())
|
||||
// R == sum(k Dir.y A cos())
|
||||
//
|
||||
// Q == sum(k w A cos()) x
|
||||
//
|
||||
// M == sum(A cos())
|
||||
//
|
||||
// P == sum(w Dir.x A cos()) x
|
||||
// N == sum(w Dir.y A cos()) x
|
||||
//
|
||||
// Then:
|
||||
// Pos = (in.x + S, in.y + R, waterheight + T) // Already done above.
|
||||
//
|
||||
// Bin = (1 - W, -V, P)
|
||||
// Tan = (-V, 1 - U, N)
|
||||
// Nor = (-P, -N, 1 - Q)
|
||||
//
|
||||
// The matrix
|
||||
// |Bx, Tx, Nx|
|
||||
// |By, Ty, Ny|
|
||||
// |Bz, Tz, Nz|
|
||||
// is surface2world, but we still need to fold in
|
||||
// texture2surface. We'll go with the generalized
|
||||
// (not assuming a flat surface) partials of dPos/dU and dPos/dV
|
||||
// as coming in as uv coords v8 and v9.
|
||||
// Then, if r5 = v8 X v9, then texture to surface is
|
||||
// |v8.x, v9.x, r5.x|
|
||||
// |v8.y, v9.y, r5.y|
|
||||
// |v8.z, v9.z, r5.z|
|
||||
//
|
||||
// So, let's say we calc 3 vectors,
|
||||
// r7 = (Bx, Tx, Nx)
|
||||
// r8 = (By, Ty, Ny)
|
||||
// r9 = (Bz, Tz, Nz)
|
||||
//
|
||||
// Then surface2world * texture2surface =
|
||||
// |r7 dot v8, r7 dot v9, r7 dot r5|
|
||||
// |r8 dot v8, r8 dot v9, r8 dot r5|
|
||||
// |r9 dot v8, r9 dot v9, r9 dot r5|
|
||||
//
|
||||
// We will need r5 as v8 X v9
|
||||
mov r7, v8;
|
||||
mul r5.xyz, r7.yzx, v9.zxy;
|
||||
mad r5.xyz, r7.zxy, -v9.yzx, r5.xyz;
|
||||
|
||||
// Okay, r1 currently has the vector of cosines, and r2 has vector of sines.
|
||||
// Everything will want that times amplitude, so go ahead and fold that in.
|
||||
mul r1, r1, c6; // r1 = A cos() = M
|
||||
// Sines already have amplitude folded in, so r2 = A sin() = T.
|
||||
// Now just compute r7-9 one element at a time.
|
||||
dp4 r7.x, r2, -c35; // r7.x = -W
|
||||
dp4 r7.y, r2, -c36; // r7.y = -V
|
||||
dp4 r7.z, r1, -c32; // r7.z = -P
|
||||
add r7.x, r7.x, c13.z; // r7.x = 1 - W;
|
||||
|
||||
dp4 r8.x, r2, -c36; // r8.x = -V
|
||||
dp4 r8.y, r2, -c37; // r8.y = -U
|
||||
dp4 r8.z, r1, -c33; // r8.z = -N
|
||||
add r8.y, r8.y, c13.z; // r8.y = 1 - U
|
||||
|
||||
dp4 r9.z, r2, -c34; // r9.z = -Q
|
||||
mov r9.x, -r7.z; // r9.x = P = -r7.z
|
||||
mov r9.y, -r8.z; // r9.y = N = -r8.z
|
||||
add r9.z, r9.z, c13.z; // r9.z = 1 - Q
|
||||
|
||||
// Okay, got everything we need, construct r1-3 as surface2world*texture2surface.
|
||||
dp3 r1.x, r7, v8;
|
||||
dp3 r1.y, r7, v9;
|
||||
dp3 r1.z, r7, r5;
|
||||
|
||||
dp3 r2.x, r8, v8;
|
||||
dp3 r2.y, r8, v9;
|
||||
dp3 r2.z, r8, r5;
|
||||
|
||||
dp3 r3.x, r9, v8;
|
||||
dp3 r3.y, r9, v9;
|
||||
dp3 r3.z, r9, r5;
|
||||
|
||||
// Following section is debug only to skip the per-vert tangent space axes.
|
||||
//add r1, c13.zxxx, r7.zzxw;
|
||||
//add r2, c13.xzxx, r7.zzyw;
|
||||
//
|
||||
//mov r3.x, -r7.x;
|
||||
//mov r3.y, -r7.y;
|
||||
//mov r3.zw, c13.zz;
|
||||
|
||||
// See vs_WaveFixedFin6.inl for derivation of the following
|
||||
sub r0, r6, c27; // c27 is camera position.
|
||||
dp3 r10.x, r0, r0;
|
||||
rsq r10.x, r10.x;
|
||||
mul r0, r0, r10.xxxx;
|
||||
|
||||
dp3 r10.x, r0, c28; // c28 is kEnvAdjust
|
||||
mad r10.y, r10.x, r10.x, -c28.w;
|
||||
|
||||
rsq r9.x, r10.y;
|
||||
|
||||
mad r10.z, r10.y, r9.x, r10.x;
|
||||
|
||||
mad r0.xyz, r0, r10.zzz, -c28.xyz;
|
||||
|
||||
// ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump.
|
||||
dp3 r10.x, r0, r0;
|
||||
rsq r9.x, r10.x;
|
||||
mul r0.xyz, r0.xyz, r9.xxx;
|
||||
|
||||
mov r1.w, -r0.x;
|
||||
mov r2.w, -r0.y;
|
||||
mov r3.w, -r0.z;
|
||||
|
||||
|
||||
// Now r1-r3 are texture2world, with the eye-ray vector in .w. We just
|
||||
// need to normalize them and bung them into output UV's 1-3.
|
||||
// Note we're accounting for our environment map being flipped from
|
||||
// D3D (and all rational thought) by putting r2 into UV3 and r3 into UV2.
|
||||
mov r10.w, c13.z;
|
||||
dp3 r10.x, r1, r1;
|
||||
rsq r10.x, r10.x;
|
||||
mul oT1, r1, r10.xxxw;
|
||||
|
||||
dp3 r10.x, r3, r3;
|
||||
rsq r10.x, r10.x;
|
||||
mul oT2, r3, r10.xxxw;
|
||||
//mul oT3, r3, r10.xxxw; // YZHACK
|
||||
|
||||
dp3 r10.x, r2, r2;
|
||||
rsq r10.x, r10.x;
|
||||
mul oT3, r2, r10.xxxw;
|
||||
//mul oT2, r2, r10.xxxw;
|
||||
|
||||
// Output color is vertex green
|
||||
// Output alpha is vertex red (vtx alpha is used for wave filtering)
|
||||
// Whole thing modulated by material color/opacity.
|
||||
mul oD0, v5.yyyx, c26;
|
||||
|
||||
|
@ -1,449 +1,449 @@
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c21; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c16.zzzz;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = reflection strength (transparency)
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c25 = waterlevel + offset
|
||||
// c26 = (maxAtten - minAtten) / depthFalloff
|
||||
// c27 = minAtten.
|
||||
// And in particular:
|
||||
// c25.w = waterlevel
|
||||
// c26.w = 1.f;
|
||||
// c27.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c8, r6.xxxx;
|
||||
mad r0, c9, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c5;
|
||||
add r0, r0, c6;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c15.wwww;
|
||||
add r0, r0, c15.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c15.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c15.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c13.wwww, r2;
|
||||
mad r1, r4, c14.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c25, r6.zzzz;
|
||||
mul r4, r4, c26;
|
||||
add r4, r4, c27;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c16.zzzz;
|
||||
max r4.xyz, r4, c16.xxxx;
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c24;
|
||||
max r11, r11, c16.xxxx;
|
||||
min r11, r11, c16.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c7;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c16.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c25.w;
|
||||
max r6.z, r6.z, r8.z; // CLAMP
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= kFreq.xyzw;
|
||||
mul r1, r1, c5;
|
||||
// cosDist *= kAmplitude.xyzw; // Combine?
|
||||
mul r1, r1, c7;
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
//
|
||||
// accumCos = (0, 0, 0, 0);
|
||||
mov r7, c16.xxxx;
|
||||
// temp = dp4( cosDist, toCenter_X );
|
||||
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
|
||||
dp4 r7.x, r1, -c8
|
||||
//
|
||||
// temp = dp4( cosDist, toCenter_Y );
|
||||
// accumCos.y += temp.xxxx;
|
||||
dp4 r7.y, r1, -c9
|
||||
//
|
||||
// }
|
||||
//
|
||||
// accumBin = (1, 0, -accumCos.x);
|
||||
// accumTan = (0, 1, -accumCos.y);
|
||||
// accumNorm = (accumCos.x, accumCos.y, 1);
|
||||
mov r11, c16.xxzx;
|
||||
add r11, r11, r7;
|
||||
dp3 r10.x, r11, r11;
|
||||
rsq r10.x, r10.x;
|
||||
mul r11, r11, r10.xxxx;
|
||||
|
||||
//
|
||||
// // Scrunch in based on computed (normalized) normal
|
||||
// temp = mul( accumNorm, kNegScrunchScale ); // kNegScrunchScale = (-scrunchScale, -scrunchScale, 0, 0);
|
||||
// accumPos += temp;
|
||||
//dp3 r10.x, r11, c18.zxw; // winddir.x, winddir.y, 0, 0 // NUKE
|
||||
// r10.x tells us whether our normal is opposed to the wind.
|
||||
// If opposed, r10.x = 0, else r10.x = 1.f;
|
||||
// We'll use this to kill the Scrunch on the back sides of waves.
|
||||
// We use it for position right here, and then again for the
|
||||
// normal just down a bit further.
|
||||
//slt r10.x, r10.x, c16.x; // NUKE
|
||||
//mov r10.x, c16.z; // HACKAGE NUKE
|
||||
//mul r9, r10.xxxx, r11; // NUKE
|
||||
|
||||
// Add in our scrunch (offset in X/Y plane).
|
||||
// Scale down our scrunch amount by the wave scaling
|
||||
mul r10.x, c12.y, r4.z;
|
||||
//mov r10.x, c12.y; // NUKETEST TAKEOUT
|
||||
mad r6.xy, r11.xy, r10.xx, r6.xy;
|
||||
|
||||
// mul r6.z, r6.z, r10.xxxx; DEBUG
|
||||
|
||||
// mad r6, r11, c12.yyzz, r6;
|
||||
|
||||
// accumNorm = mul (accumNorm, kScrunchScale ); // kScrunchScale = (scrunchScale, scrunchScale, 1, 1);
|
||||
// accumCos *= (scrunchScale, scrunchScale, 0, 0);
|
||||
|
||||
mul r2.x, r6.z, c12.x;
|
||||
//mad r2.x, r2.x, r10.x, c16.z; NUKE
|
||||
add r2.x, r2.x, c16.z;
|
||||
mul r2.x, r2.x, r4.z; // HACKAGE // NUKETEST BACKIN
|
||||
|
||||
// mul r7, r7, c12.xxzz;
|
||||
mul r7.xy, r7.xy, r2.xx;
|
||||
|
||||
// This is actually wrong, but useful right now for visualizing the generated coords.
|
||||
// See below for correct version.
|
||||
|
||||
sub r3, c16.xxzz, r7.xyzz;
|
||||
|
||||
//mov oD0, r3; // SEENORM
|
||||
|
||||
dp3 r8.x, r3, c18.zxww; // WAVEFACE
|
||||
mul r8.x, r8.x, c12.w; // WAVEFACE
|
||||
max r8.x, r8.x, c16.x; // WAVEFACE
|
||||
min r8.x, r8.x, c16.z; // WAVEFACE
|
||||
//mov r9.x, c12.z;
|
||||
//add r9.x, r9.x, -c16.z;
|
||||
//mad r8.x, r9.x, r8.x, c16.z; // WAVEFACE
|
||||
mul r8.x, r8.x, -c16.z;
|
||||
add r8.x, r8.x, c16.z;
|
||||
|
||||
// Normalize?
|
||||
|
||||
// We can either calculate an orthonormal basis from the
|
||||
// computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0),
|
||||
// or compute our basis directly from the partial derivatives, with
|
||||
// Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1)
|
||||
//
|
||||
// These work out to identically the same result, so we'll compute directly
|
||||
// from the partials because it takes 2 fewer instructions.
|
||||
//
|
||||
// Note that our basis is NOT orthonormal. The Normal is equal to
|
||||
// Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents
|
||||
// are both correct tangents to the surface, and their projections on the XY plane
|
||||
// are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications?
|
||||
// Not really. I'm actually not really sure which is more "proper" for bump mapping.
|
||||
//
|
||||
// Note also that we add when we should subtract and subtract when we should
|
||||
// add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows
|
||||
// of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for
|
||||
// explanation.
|
||||
//
|
||||
// Binormal = Y % Normal
|
||||
// Cross product3 is:
|
||||
// mul res.xyz, a.yzx, b.zxy
|
||||
// mad res.xyz, -a.zxy, b.yzx, res.xyz
|
||||
// mul r1.xyz, c16.zxx, r3.zxy;
|
||||
// mad r1.xyz, -c16.xxz, r3.yzx, r1.xyz;
|
||||
|
||||
// Tangent = Normal % X
|
||||
// mul r2.xyz, r3.yzx, c16.xzx;
|
||||
// mad r2.xyz, -r3.zxy, c16.xxz, r2;
|
||||
|
||||
add r1, c16.zxxx, r7.zzxz;
|
||||
add r2, c16.xzxx, r7.zzyz;
|
||||
|
||||
|
||||
// Note that we're swapping z and y to match our environment map tools in max.
|
||||
// We do this through our normal map transform (oT1, oT2, oT3), making it
|
||||
// a concatenation of:
|
||||
//
|
||||
// rotate about Z (blue) to turn our map into the wind
|
||||
// windRot = | dirY -dirX 0 |
|
||||
// | dirX dirY 0 |
|
||||
// | 0 0 1 |
|
||||
//
|
||||
// swap our Y and Z axes to match our environment map
|
||||
// swapYZ = | 1 0 0 |
|
||||
// | 0 0 1 |
|
||||
// | 0 1 0 |
|
||||
//
|
||||
// rotate the normal into the surface's tangent space basis
|
||||
// basis = | Bx Tx Nx |
|
||||
// | By Ty Ny |
|
||||
// | Bz Tz Nz |
|
||||
//
|
||||
// Note that we've constucted the basis by taking advantage of the
|
||||
// matrix being a pure rotation, as noted below, so r1, r2 and r3
|
||||
// are actually constructed as:
|
||||
// basis = | Bx -By -Bz |
|
||||
// | -Tx Ty -Tz |
|
||||
// | -Nx -Ny -Nz |
|
||||
//
|
||||
// Then the final normal map transform is:
|
||||
//
|
||||
// basis * swapYZ * windRot [ * normal ]
|
||||
|
||||
|
||||
// sub r1.w, c17.x, r6.x;
|
||||
// sub r2.w, c17.z, r6.z;
|
||||
// sub r3.w, c17.y, r6.y;
|
||||
|
||||
// Big note here. All this math can blow up if the camera position
|
||||
// is outside the environment sphere. It's assumed that's dealt
|
||||
// with in the app setting up the constants. For that reason, the
|
||||
// camera position used here might not be the real local camera position,
|
||||
// which is needed for the angular attenuation, so we burn another constant
|
||||
// with our pseudo-camera position. To restrain the pseudo-camera from
|
||||
// leaving the sphere, we make:
|
||||
// pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R)
|
||||
// where dist = |realPos - envCenter|
|
||||
|
||||
// So, our "finitized" eyeray is:
|
||||
// camPos + D * t - envCenter = D * t - (envCenter - camPos)
|
||||
// with
|
||||
// D = (pos - camPos) / |pos - camPos| // normalized usual eyeray
|
||||
// and
|
||||
// t = D dot F + sqrt( (D dot F)^2 - G )
|
||||
// with
|
||||
// F = (envCenter - camPos) => c19.xyz
|
||||
// G = F^2 - R^2 => c19.w
|
||||
// R = environment radius. => unused
|
||||
//
|
||||
// This all derives from the positive root of equation
|
||||
// (camPos + (pos - camPos) * t - envCenter)^2 = R^2,
|
||||
// In other words, where on a sphere of radius R centered about envCenter
|
||||
// does the ray from the real camera position through this point hit.
|
||||
//
|
||||
// Note that F, G, and R are all constants (one point, two scalars).
|
||||
//
|
||||
// So first we calculate D into r0,
|
||||
// then D dot F into r10.x,
|
||||
// then (D dot F)^2 - G into r10.y
|
||||
// then rsq( (D dot F)^2 - G ) into r9.x;
|
||||
// then t = r10.z = r10.x + r10.y * r9.x;
|
||||
// and
|
||||
// r0 = D * t - (envCenter - camPos)
|
||||
// = r0 * r10.zzzz - F;
|
||||
//
|
||||
sub r0, r6, c17;
|
||||
dp3 r10.x, r0, r0;
|
||||
rsq r10.x, r10.x;
|
||||
mul r0, r0, r10.xxxx; // r0 = D
|
||||
|
||||
dp3 r10.x, r0, c19; // r10.x = D dot F
|
||||
mad r10.y, r10.x, r10.x, -c19.w; // r10.y = (D dot F)^2 - G
|
||||
|
||||
rsq r9.x, r10.y; // r9.x = 1/SQRT((D dot F)^2 - G)
|
||||
|
||||
mad r10.z, r10.y, r9.x, r10.x; // r10.z = D dot F + SQRT((D dot F)^2 - G)
|
||||
|
||||
mad r0.xyz, r0, r10.zzz, -c19.xyz; // r0.xyz = D * t - (envCenter - camPos)
|
||||
|
||||
mov r1.w, -r0.x;
|
||||
mov r2.w, -r0.y;
|
||||
mov r3.w, -r0.z;
|
||||
|
||||
// Now rotate our basis vectors into the wind
|
||||
// This should be redone, and put our wind direction into
|
||||
// the water texture.
|
||||
dp3 r0.x, r1, c18.xyww;
|
||||
dp3 r0.y, r1, c18.zxww;
|
||||
mov r1.xy, r0;
|
||||
|
||||
dp3 r0.x, r2, c18.xyww;
|
||||
dp3 r0.y, r2, c18.zxww;
|
||||
mov r2.xy, r0;
|
||||
|
||||
dp3 r0.x, r3, c18.xyww;
|
||||
dp3 r0.y, r3, c18.zxww;
|
||||
mov r3.xy, r0;
|
||||
|
||||
mov r0.zw, c16.zzxz;
|
||||
|
||||
dp3 r0.x, r1, r1;
|
||||
rsq r0.x, r0.x;
|
||||
mul oT1, r1.xyzw, r0.xxxw;
|
||||
// mul r8, r1.xyzw, r0.xxxw; // VISUAL
|
||||
|
||||
dp3 r0.x, r2, r2;
|
||||
rsq r0.x, r0.x;
|
||||
mul oT3, r2.xyzw, r0.xxxw;
|
||||
// mul r9, r2.xyzw, r0.xxxw; // VISUAL
|
||||
|
||||
dp3 r0.x, r3, r3;
|
||||
rsq r0.x, r0.x;
|
||||
mul oT2, r3.xyzw, r0.xxxw;
|
||||
// mul r9, r3.xyzw, r0.xxxw; // VISUAL
|
||||
|
||||
// mul r3, r3.xzyw, r0.xxxw;
|
||||
// mul r3.xy, r3, -c16.zzzz;
|
||||
|
||||
|
||||
/*
|
||||
// Want:
|
||||
// oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x)
|
||||
// oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y)
|
||||
// ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z)
|
||||
// with BIN, TAN, and NORM normalized.
|
||||
// Unnormalized, we have
|
||||
// BIN = (1, 0, -r7.x) where r7 == accumCos
|
||||
// TAN = (0, 1, -r7.y)
|
||||
// NORM= (r7.x, r7.y, 1)
|
||||
// So, unnormalized, we have
|
||||
// oT1 = (1, 0, r7.x, view2pos.x)
|
||||
// oT2 = (0, 1, r7.y, view2pos.y)
|
||||
// oT3 = (-r7.x, -r7.y, 1, view2pos.z)
|
||||
// which is just reversing the signs on the accumCos
|
||||
// terms above. So the normalized version is just
|
||||
// reversing the signs on the normalized version above.
|
||||
*/
|
||||
//mov oT3, r4;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c21; // HACKAGE
|
||||
//mov r6.w, c16.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c28.x;
|
||||
mul oFog, r10.x, c28.y;
|
||||
//mov oFog, c16.y; // TESTFOGHACK
|
||||
mov oPos, r9;
|
||||
|
||||
mov oD0, c4; // SEENORM
|
||||
|
||||
// Transform our uvw
|
||||
dp4 r0.x, v0, c10;
|
||||
dp4 r0.y, v0, c11;
|
||||
|
||||
//mov r0.zw, c16.xxxz;
|
||||
mov oT0, r0
|
||||
|
||||
// Questionble attenuation follows
|
||||
// Find vector from this point to camera and normalize
|
||||
sub r0, c17, r6;
|
||||
dp3 r1.x, r0, r0;
|
||||
rsq r1.x, r1.x;
|
||||
mul r0, r0, r1.xxxx;
|
||||
// Dot that with the computed normal
|
||||
dp3 r1.x, r0, r11;
|
||||
mul r1.x, r1.x, v5.z;
|
||||
// dp3 r1.x, r0, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3
|
||||
// Map dot=1 => 0, dot=0 => 1
|
||||
sub r1.xyzw, c16.zzzz, r1.xxxx;
|
||||
add r1.w, r1.wwww, c16.zzzz;
|
||||
mul r1.w, r1.wwww, c16.yyyy;
|
||||
// No need to clamp, since the destination register (in the pixel shader)
|
||||
// will saturate [0..1] anyway.
|
||||
//%%% mul r1.w, r1.w, r4.x;
|
||||
//%%% mul r1.xyz, r1.xyz, r4.yyy;
|
||||
mul r1, r1, r4.yyyx; // HACKTESTCOLOR
|
||||
mul r1.xyz, r1, r8.xxx; // WAVEFACE
|
||||
mul r1.w, r1.wwww, v5.xxxx;
|
||||
mul oD1, r1, c20;
|
||||
|
||||
// mov oD1, r4.yyyy;
|
||||
|
||||
//mov oD1, c16.zzzz; // HACKAGE
|
||||
// mov oD1, r9;
|
||||
// mov oD1, r8.xzyw;
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c21; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c16.zzzz;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = reflection strength (transparency)
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c25 = waterlevel + offset
|
||||
// c26 = (maxAtten - minAtten) / depthFalloff
|
||||
// c27 = minAtten.
|
||||
// And in particular:
|
||||
// c25.w = waterlevel
|
||||
// c26.w = 1.f;
|
||||
// c27.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c8, r6.xxxx;
|
||||
mad r0, c9, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c5;
|
||||
add r0, r0, c6;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c15.wwww;
|
||||
add r0, r0, c15.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c15.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c15.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c13.wwww, r2;
|
||||
mad r1, r4, c14.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c25, r6.zzzz;
|
||||
mul r4, r4, c26;
|
||||
add r4, r4, c27;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c16.zzzz;
|
||||
max r4.xyz, r4, c16.xxxx;
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c24;
|
||||
max r11, r11, c16.xxxx;
|
||||
min r11, r11, c16.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c7;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c16.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c25.w;
|
||||
max r6.z, r6.z, r8.z; // CLAMP
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= kFreq.xyzw;
|
||||
mul r1, r1, c5;
|
||||
// cosDist *= kAmplitude.xyzw; // Combine?
|
||||
mul r1, r1, c7;
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
//
|
||||
// accumCos = (0, 0, 0, 0);
|
||||
mov r7, c16.xxxx;
|
||||
// temp = dp4( cosDist, toCenter_X );
|
||||
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
|
||||
dp4 r7.x, r1, -c8
|
||||
//
|
||||
// temp = dp4( cosDist, toCenter_Y );
|
||||
// accumCos.y += temp.xxxx;
|
||||
dp4 r7.y, r1, -c9
|
||||
//
|
||||
// }
|
||||
//
|
||||
// accumBin = (1, 0, -accumCos.x);
|
||||
// accumTan = (0, 1, -accumCos.y);
|
||||
// accumNorm = (accumCos.x, accumCos.y, 1);
|
||||
mov r11, c16.xxzx;
|
||||
add r11, r11, r7;
|
||||
dp3 r10.x, r11, r11;
|
||||
rsq r10.x, r10.x;
|
||||
mul r11, r11, r10.xxxx;
|
||||
|
||||
//
|
||||
// // Scrunch in based on computed (normalized) normal
|
||||
// temp = mul( accumNorm, kNegScrunchScale ); // kNegScrunchScale = (-scrunchScale, -scrunchScale, 0, 0);
|
||||
// accumPos += temp;
|
||||
//dp3 r10.x, r11, c18.zxw; // winddir.x, winddir.y, 0, 0 // NUKE
|
||||
// r10.x tells us whether our normal is opposed to the wind.
|
||||
// If opposed, r10.x = 0, else r10.x = 1.f;
|
||||
// We'll use this to kill the Scrunch on the back sides of waves.
|
||||
// We use it for position right here, and then again for the
|
||||
// normal just down a bit further.
|
||||
//slt r10.x, r10.x, c16.x; // NUKE
|
||||
//mov r10.x, c16.z; // HACKAGE NUKE
|
||||
//mul r9, r10.xxxx, r11; // NUKE
|
||||
|
||||
// Add in our scrunch (offset in X/Y plane).
|
||||
// Scale down our scrunch amount by the wave scaling
|
||||
mul r10.x, c12.y, r4.z;
|
||||
//mov r10.x, c12.y; // NUKETEST TAKEOUT
|
||||
mad r6.xy, r11.xy, r10.xx, r6.xy;
|
||||
|
||||
// mul r6.z, r6.z, r10.xxxx; DEBUG
|
||||
|
||||
// mad r6, r11, c12.yyzz, r6;
|
||||
|
||||
// accumNorm = mul (accumNorm, kScrunchScale ); // kScrunchScale = (scrunchScale, scrunchScale, 1, 1);
|
||||
// accumCos *= (scrunchScale, scrunchScale, 0, 0);
|
||||
|
||||
mul r2.x, r6.z, c12.x;
|
||||
//mad r2.x, r2.x, r10.x, c16.z; NUKE
|
||||
add r2.x, r2.x, c16.z;
|
||||
mul r2.x, r2.x, r4.z; // HACKAGE // NUKETEST BACKIN
|
||||
|
||||
// mul r7, r7, c12.xxzz;
|
||||
mul r7.xy, r7.xy, r2.xx;
|
||||
|
||||
// This is actually wrong, but useful right now for visualizing the generated coords.
|
||||
// See below for correct version.
|
||||
|
||||
sub r3, c16.xxzz, r7.xyzz;
|
||||
|
||||
//mov oD0, r3; // SEENORM
|
||||
|
||||
dp3 r8.x, r3, c18.zxww; // WAVEFACE
|
||||
mul r8.x, r8.x, c12.w; // WAVEFACE
|
||||
max r8.x, r8.x, c16.x; // WAVEFACE
|
||||
min r8.x, r8.x, c16.z; // WAVEFACE
|
||||
//mov r9.x, c12.z;
|
||||
//add r9.x, r9.x, -c16.z;
|
||||
//mad r8.x, r9.x, r8.x, c16.z; // WAVEFACE
|
||||
mul r8.x, r8.x, -c16.z;
|
||||
add r8.x, r8.x, c16.z;
|
||||
|
||||
// Normalize?
|
||||
|
||||
// We can either calculate an orthonormal basis from the
|
||||
// computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0),
|
||||
// or compute our basis directly from the partial derivatives, with
|
||||
// Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1)
|
||||
//
|
||||
// These work out to identically the same result, so we'll compute directly
|
||||
// from the partials because it takes 2 fewer instructions.
|
||||
//
|
||||
// Note that our basis is NOT orthonormal. The Normal is equal to
|
||||
// Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents
|
||||
// are both correct tangents to the surface, and their projections on the XY plane
|
||||
// are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications?
|
||||
// Not really. I'm actually not really sure which is more "proper" for bump mapping.
|
||||
//
|
||||
// Note also that we add when we should subtract and subtract when we should
|
||||
// add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows
|
||||
// of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for
|
||||
// explanation.
|
||||
//
|
||||
// Binormal = Y % Normal
|
||||
// Cross product3 is:
|
||||
// mul res.xyz, a.yzx, b.zxy
|
||||
// mad res.xyz, -a.zxy, b.yzx, res.xyz
|
||||
// mul r1.xyz, c16.zxx, r3.zxy;
|
||||
// mad r1.xyz, -c16.xxz, r3.yzx, r1.xyz;
|
||||
|
||||
// Tangent = Normal % X
|
||||
// mul r2.xyz, r3.yzx, c16.xzx;
|
||||
// mad r2.xyz, -r3.zxy, c16.xxz, r2;
|
||||
|
||||
add r1, c16.zxxx, r7.zzxz;
|
||||
add r2, c16.xzxx, r7.zzyz;
|
||||
|
||||
|
||||
// Note that we're swapping z and y to match our environment map tools in max.
|
||||
// We do this through our normal map transform (oT1, oT2, oT3), making it
|
||||
// a concatenation of:
|
||||
//
|
||||
// rotate about Z (blue) to turn our map into the wind
|
||||
// windRot = | dirY -dirX 0 |
|
||||
// | dirX dirY 0 |
|
||||
// | 0 0 1 |
|
||||
//
|
||||
// swap our Y and Z axes to match our environment map
|
||||
// swapYZ = | 1 0 0 |
|
||||
// | 0 0 1 |
|
||||
// | 0 1 0 |
|
||||
//
|
||||
// rotate the normal into the surface's tangent space basis
|
||||
// basis = | Bx Tx Nx |
|
||||
// | By Ty Ny |
|
||||
// | Bz Tz Nz |
|
||||
//
|
||||
// Note that we've constucted the basis by taking advantage of the
|
||||
// matrix being a pure rotation, as noted below, so r1, r2 and r3
|
||||
// are actually constructed as:
|
||||
// basis = | Bx -By -Bz |
|
||||
// | -Tx Ty -Tz |
|
||||
// | -Nx -Ny -Nz |
|
||||
//
|
||||
// Then the final normal map transform is:
|
||||
//
|
||||
// basis * swapYZ * windRot [ * normal ]
|
||||
|
||||
|
||||
// sub r1.w, c17.x, r6.x;
|
||||
// sub r2.w, c17.z, r6.z;
|
||||
// sub r3.w, c17.y, r6.y;
|
||||
|
||||
// Big note here. All this math can blow up if the camera position
|
||||
// is outside the environment sphere. It's assumed that's dealt
|
||||
// with in the app setting up the constants. For that reason, the
|
||||
// camera position used here might not be the real local camera position,
|
||||
// which is needed for the angular attenuation, so we burn another constant
|
||||
// with our pseudo-camera position. To restrain the pseudo-camera from
|
||||
// leaving the sphere, we make:
|
||||
// pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R)
|
||||
// where dist = |realPos - envCenter|
|
||||
|
||||
// So, our "finitized" eyeray is:
|
||||
// camPos + D * t - envCenter = D * t - (envCenter - camPos)
|
||||
// with
|
||||
// D = (pos - camPos) / |pos - camPos| // normalized usual eyeray
|
||||
// and
|
||||
// t = D dot F + sqrt( (D dot F)^2 - G )
|
||||
// with
|
||||
// F = (envCenter - camPos) => c19.xyz
|
||||
// G = F^2 - R^2 => c19.w
|
||||
// R = environment radius. => unused
|
||||
//
|
||||
// This all derives from the positive root of equation
|
||||
// (camPos + (pos - camPos) * t - envCenter)^2 = R^2,
|
||||
// In other words, where on a sphere of radius R centered about envCenter
|
||||
// does the ray from the real camera position through this point hit.
|
||||
//
|
||||
// Note that F, G, and R are all constants (one point, two scalars).
|
||||
//
|
||||
// So first we calculate D into r0,
|
||||
// then D dot F into r10.x,
|
||||
// then (D dot F)^2 - G into r10.y
|
||||
// then rsq( (D dot F)^2 - G ) into r9.x;
|
||||
// then t = r10.z = r10.x + r10.y * r9.x;
|
||||
// and
|
||||
// r0 = D * t - (envCenter - camPos)
|
||||
// = r0 * r10.zzzz - F;
|
||||
//
|
||||
sub r0, r6, c17;
|
||||
dp3 r10.x, r0, r0;
|
||||
rsq r10.x, r10.x;
|
||||
mul r0, r0, r10.xxxx; // r0 = D
|
||||
|
||||
dp3 r10.x, r0, c19; // r10.x = D dot F
|
||||
mad r10.y, r10.x, r10.x, -c19.w; // r10.y = (D dot F)^2 - G
|
||||
|
||||
rsq r9.x, r10.y; // r9.x = 1/SQRT((D dot F)^2 - G)
|
||||
|
||||
mad r10.z, r10.y, r9.x, r10.x; // r10.z = D dot F + SQRT((D dot F)^2 - G)
|
||||
|
||||
mad r0.xyz, r0, r10.zzz, -c19.xyz; // r0.xyz = D * t - (envCenter - camPos)
|
||||
|
||||
mov r1.w, -r0.x;
|
||||
mov r2.w, -r0.y;
|
||||
mov r3.w, -r0.z;
|
||||
|
||||
// Now rotate our basis vectors into the wind
|
||||
// This should be redone, and put our wind direction into
|
||||
// the water texture.
|
||||
dp3 r0.x, r1, c18.xyww;
|
||||
dp3 r0.y, r1, c18.zxww;
|
||||
mov r1.xy, r0;
|
||||
|
||||
dp3 r0.x, r2, c18.xyww;
|
||||
dp3 r0.y, r2, c18.zxww;
|
||||
mov r2.xy, r0;
|
||||
|
||||
dp3 r0.x, r3, c18.xyww;
|
||||
dp3 r0.y, r3, c18.zxww;
|
||||
mov r3.xy, r0;
|
||||
|
||||
mov r0.zw, c16.zzxz;
|
||||
|
||||
dp3 r0.x, r1, r1;
|
||||
rsq r0.x, r0.x;
|
||||
mul oT1, r1.xyzw, r0.xxxw;
|
||||
// mul r8, r1.xyzw, r0.xxxw; // VISUAL
|
||||
|
||||
dp3 r0.x, r2, r2;
|
||||
rsq r0.x, r0.x;
|
||||
mul oT3, r2.xyzw, r0.xxxw;
|
||||
// mul r9, r2.xyzw, r0.xxxw; // VISUAL
|
||||
|
||||
dp3 r0.x, r3, r3;
|
||||
rsq r0.x, r0.x;
|
||||
mul oT2, r3.xyzw, r0.xxxw;
|
||||
// mul r9, r3.xyzw, r0.xxxw; // VISUAL
|
||||
|
||||
// mul r3, r3.xzyw, r0.xxxw;
|
||||
// mul r3.xy, r3, -c16.zzzz;
|
||||
|
||||
|
||||
/*
|
||||
// Want:
|
||||
// oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x)
|
||||
// oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y)
|
||||
// ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z)
|
||||
// with BIN, TAN, and NORM normalized.
|
||||
// Unnormalized, we have
|
||||
// BIN = (1, 0, -r7.x) where r7 == accumCos
|
||||
// TAN = (0, 1, -r7.y)
|
||||
// NORM= (r7.x, r7.y, 1)
|
||||
// So, unnormalized, we have
|
||||
// oT1 = (1, 0, r7.x, view2pos.x)
|
||||
// oT2 = (0, 1, r7.y, view2pos.y)
|
||||
// oT3 = (-r7.x, -r7.y, 1, view2pos.z)
|
||||
// which is just reversing the signs on the accumCos
|
||||
// terms above. So the normalized version is just
|
||||
// reversing the signs on the normalized version above.
|
||||
*/
|
||||
//mov oT3, r4;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c21; // HACKAGE
|
||||
//mov r6.w, c16.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c28.x;
|
||||
mul oFog, r10.x, c28.y;
|
||||
//mov oFog, c16.y; // TESTFOGHACK
|
||||
mov oPos, r9;
|
||||
|
||||
mov oD0, c4; // SEENORM
|
||||
|
||||
// Transform our uvw
|
||||
dp4 r0.x, v0, c10;
|
||||
dp4 r0.y, v0, c11;
|
||||
|
||||
//mov r0.zw, c16.xxxz;
|
||||
mov oT0, r0
|
||||
|
||||
// Questionble attenuation follows
|
||||
// Find vector from this point to camera and normalize
|
||||
sub r0, c17, r6;
|
||||
dp3 r1.x, r0, r0;
|
||||
rsq r1.x, r1.x;
|
||||
mul r0, r0, r1.xxxx;
|
||||
// Dot that with the computed normal
|
||||
dp3 r1.x, r0, r11;
|
||||
mul r1.x, r1.x, v5.z;
|
||||
// dp3 r1.x, r0, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3
|
||||
// Map dot=1 => 0, dot=0 => 1
|
||||
sub r1.xyzw, c16.zzzz, r1.xxxx;
|
||||
add r1.w, r1.wwww, c16.zzzz;
|
||||
mul r1.w, r1.wwww, c16.yyyy;
|
||||
// No need to clamp, since the destination register (in the pixel shader)
|
||||
// will saturate [0..1] anyway.
|
||||
//%%% mul r1.w, r1.w, r4.x;
|
||||
//%%% mul r1.xyz, r1.xyz, r4.yyy;
|
||||
mul r1, r1, r4.yyyx; // HACKTESTCOLOR
|
||||
mul r1.xyz, r1, r8.xxx; // WAVEFACE
|
||||
mul r1.w, r1.wwww, v5.xxxx;
|
||||
mul oD1, r1, c20;
|
||||
|
||||
// mov oD1, r4.yyyy;
|
||||
|
||||
//mov oD1, c16.zzzz; // HACKAGE
|
||||
// mov oD1, r9;
|
||||
// mov oD1, r8.xzyw;
|
||||
|
@ -1,437 +1,437 @@
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c21; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c16.zzzz;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = reflection strength (transparency)
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c25 = waterlevel + offset
|
||||
// c26 = (maxAtten - minAtten) / depthFalloff
|
||||
// c27 = minAtten.
|
||||
// And in particular:
|
||||
// c25.w = waterlevel
|
||||
// c26.w = 1.f;
|
||||
// c27.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c8, r6.xxxx;
|
||||
mad r0, c9, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c5;
|
||||
add r0, r0, c6;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c15.wwww;
|
||||
add r0, r0, c15.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c15.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c15.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c13.wwww, r2;
|
||||
mad r1, r4, c14.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c25, r6.zzzz;
|
||||
mul r4, r4, c26;
|
||||
add r4, r4, c27;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c16.zzzz;
|
||||
max r4.xyz, r4, c16.xxxx;
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c24;
|
||||
max r11, r11, c16.xxxx;
|
||||
min r11, r11, c16.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r5, r2, c7;
|
||||
// r5 is now T = sum(Ai * sin())
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r5, c16.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c25.w;
|
||||
max r6.z, r6.z, r8.z; // CLAMP
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= kAmplitude.xyzw; // Combine?
|
||||
mul r7, r1, c7;
|
||||
// cosDist *= filter;
|
||||
mul r7, r7, r11;
|
||||
// r7 is now M = sum(Ai * cos())
|
||||
|
||||
// Okay, here we go:
|
||||
// W == sum(k w Dir.x^2 A sin())
|
||||
// V == sum(k w Dir.x Dir.y A sin())
|
||||
// U == sum(k w Dir.y^2 A sin())
|
||||
//
|
||||
// T == sum(A sin())
|
||||
//
|
||||
// S == sum(k Dir.x A cos())
|
||||
// R == sum(k Dir.y A cos())
|
||||
//
|
||||
// Q == sum(k w A cos())
|
||||
//
|
||||
// M == sum(A cos())
|
||||
//
|
||||
// P == sum(w Dir.x A cos())
|
||||
// N == sum(w Dir.y A cos())
|
||||
//
|
||||
// Then:
|
||||
// Pos = (in.x + S, in.y + R, waterheight + T)
|
||||
//
|
||||
// Bin = (1 - W, -V, P)
|
||||
// Tan = (-V, 1 - U, N)
|
||||
// Nor = (-P, -N, 1 - Q)
|
||||
//
|
||||
// But we want the transpose of that to go into r1-r3
|
||||
|
||||
dp4 r10.x, r7, c29;
|
||||
add r6.x, r6.x, r10.x;
|
||||
dp4 r10.x, r7, c30;
|
||||
add r6.y, r6.y, r10.x;
|
||||
|
||||
dp4 r1.x, r5, -c34;
|
||||
dp4 r2.x, r5, -c35;
|
||||
dp4 r3.x, r7, c31;
|
||||
add r1.x, r1.xxxx, c16.zzzz;
|
||||
|
||||
dp4 r1.y, r5, -c35;
|
||||
dp4 r2.y, r5, -c36;
|
||||
dp4 r3.y, r7, c32;
|
||||
add r2.y, r2.yyyy, c16.zzzz;
|
||||
|
||||
dp4 r1.z, r7, -c31;
|
||||
dp4 r2.z, r7, -c32;
|
||||
dp4 r3.z, r5, -c33;
|
||||
add r3.z, r3.zzzz, c16.zzzz;
|
||||
|
||||
|
||||
// Calculate our normalized vector from camera to vtx.
|
||||
// We'll use that a couple of times coming up.
|
||||
sub r5, r6, c17;
|
||||
dp3 r10.x, r5, r5;
|
||||
rsq r10.x, r10.x;
|
||||
mul r5, r5, r10.xxxx; // r0 = D
|
||||
rcp r5.w, r10.x;
|
||||
|
||||
// Calculate our specular attenuation from and into r5.w.
|
||||
// r5.w starts off the distance from vtx to camera.
|
||||
// Once we've turned it into an attenuation factor, we
|
||||
// scale the x and y of our normal map (through the transform bases)
|
||||
// so that in the distance, the normal map is flat. Note that the
|
||||
// geometry in the distance isn't necessarily flat. We want to apply
|
||||
// this scale to the normal read from the normal map before it is
|
||||
// transformed into surface space.
|
||||
add r5.w, r5.w, c11.x;
|
||||
mul r5.w, r5.w, c11.y;
|
||||
min r5.w, r5.w, c16.z;
|
||||
max r5.w, r5.w, c16.x;
|
||||
mul r5.w, r5.w, r5.w; // Square it to account for perspective
|
||||
mul r5.w, r5.w, c11.z;
|
||||
|
||||
|
||||
// Normalize?
|
||||
|
||||
// We can either calculate an orthonormal basis from the
|
||||
// computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0),
|
||||
// or compute our basis directly from the partial derivatives, with
|
||||
// Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1)
|
||||
//
|
||||
// These work out to identically the same result, so we'll compute directly
|
||||
// from the partials because it takes 2 fewer instructions.
|
||||
//
|
||||
// Note that our basis is NOT orthonormal. The Normal is equal to
|
||||
// Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents
|
||||
// are both correct tangents to the surface, and their projections on the XY plane
|
||||
// are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications?
|
||||
// Not really. I'm actually not really sure which is more "proper" for bump mapping.
|
||||
//
|
||||
// Note also that we add when we should subtract and subtract when we should
|
||||
// add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows
|
||||
// of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for
|
||||
// explanation.
|
||||
//
|
||||
// Binormal = Y % Normal
|
||||
// Cross product3 is:
|
||||
// mul res.xyz, a.yzx, b.zxy
|
||||
// mad res.xyz, -a.zxy, b.yzx, res.xyz
|
||||
// mul r1.xyz, c16.zxx, r3.zxy;
|
||||
// mad r1.xyz, -c16.xxz, r3.yzx, r1.xyz;
|
||||
|
||||
// Tangent = Normal % X
|
||||
// mul r2.xyz, r3.yzx, c16.xzx;
|
||||
// mad r2.xyz, -r3.zxy, c16.xxz, r2;
|
||||
|
||||
//mad r1, r5.wwww, c16.zxxx, r7.zzxz;
|
||||
//mad r2, r5.wwww, c16.xzxx, r7.zzyz;
|
||||
//mul r3.xy, r3.xy, r5.wwww;
|
||||
|
||||
|
||||
// Note that we're swapping z and y to match our environment map tools in max.
|
||||
// We do this through our normal map transform (oT1, oT2, oT3), making it
|
||||
// a concatenation of:
|
||||
//
|
||||
// rotate about Z (blue) to turn our map into the wind
|
||||
// windRot = | dirY -dirX 0 |
|
||||
// | dirX dirY 0 |
|
||||
// | 0 0 1 |
|
||||
//
|
||||
// swap our Y and Z axes to match our environment map
|
||||
// swapYZ = | 1 0 0 |
|
||||
// | 0 0 1 |
|
||||
// | 0 1 0 |
|
||||
//
|
||||
// rotate the normal into the surface's tangent space basis
|
||||
// basis = | Bx Tx Nx |
|
||||
// | By Ty Ny |
|
||||
// | Bz Tz Nz |
|
||||
//
|
||||
// Note that we've constucted the basis by taking advantage of the
|
||||
// matrix being a pure rotation, as noted below, so r1, r2 and r3
|
||||
// are actually constructed as:
|
||||
// basis = | Bx -By -Bz |
|
||||
// | -Tx Ty -Tz |
|
||||
// | -Nx -Ny -Nz |
|
||||
//
|
||||
// Then the final normal map transform is:
|
||||
//
|
||||
// basis * swapYZ * windRot [ * normal ]
|
||||
|
||||
|
||||
// sub r1.w, c17.x, r6.x;
|
||||
// sub r2.w, c17.z, r6.z;
|
||||
// sub r3.w, c17.y, r6.y;
|
||||
|
||||
// Big note here. All this math can blow up if the camera position
|
||||
// is outside the environment sphere. It's assumed that's dealt
|
||||
// with in the app setting up the constants. For that reason, the
|
||||
// camera position used here might not be the real local camera position,
|
||||
// which is needed for the angular attenuation, so we burn another constant
|
||||
// with our pseudo-camera position. To restrain the pseudo-camera from
|
||||
// leaving the sphere, we make:
|
||||
// pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R)
|
||||
// where dist = |realPos - envCenter|
|
||||
|
||||
// So, our "finitized" eyeray is:
|
||||
// camPos + D * t - envCenter = D * t - (envCenter - camPos)
|
||||
// with
|
||||
// D = (pos - camPos) / |pos - camPos| // normalized usual eyeray
|
||||
// and
|
||||
// t = D dot F + sqrt( (D dot F)^2 - G )
|
||||
// with
|
||||
// F = (envCenter - camPos) => c19.xyz
|
||||
// G = F^2 - R^2 => c19.w
|
||||
// R = environment radius. => unused
|
||||
//
|
||||
// This all derives from the positive root of equation
|
||||
// (camPos + (pos - camPos) * t - envCenter)^2 = R^2,
|
||||
// In other words, where on a sphere of radius R centered about envCenter
|
||||
// does the ray from the real camera position through this point hit.
|
||||
//
|
||||
// Note that F, G, and R are all constants (one point, two scalars).
|
||||
//
|
||||
// So first we calculate D into r0,
|
||||
// then D dot F into r10.x,
|
||||
// then (D dot F)^2 - G into r10.y
|
||||
// then rsq( (D dot F)^2 - G ) into r9.x;
|
||||
// then t = r10.z = r10.x + r10.y * r9.x;
|
||||
// and
|
||||
// r0 = D * t - (envCenter - camPos)
|
||||
// = r0 * r10.zzzz - F;
|
||||
//
|
||||
mov r0, r5; // r0 = D
|
||||
|
||||
dp3 r10.x, r0, c19; // r10.x = D dot F
|
||||
mad r10.y, r10.x, r10.x, -c19.w; // r10.y = (D dot F)^2 - G
|
||||
|
||||
rsq r9.x, r10.y; // r9.x = 1/SQRT((D dot F)^2 - G)
|
||||
|
||||
mad r10.z, r10.y, r9.x, r10.x; // r10.z = D dot F + SQRT((D dot F)^2 - G)
|
||||
|
||||
mad r0.xyz, r0, r10.zzz, -c19.xyz; // r0.xyz = D * t - (envCenter - camPos)
|
||||
|
||||
// ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump.
|
||||
dp3 r10.x, r0, r0;
|
||||
rsq r9.x, r10.x;
|
||||
mul r0.xyz, r0.xyz, r9.xxx;
|
||||
|
||||
mov r1.w, -r0.x;
|
||||
mov r2.w, -r0.y;
|
||||
mov r3.w, -r0.z;
|
||||
|
||||
mov r0.zw, c16.zzxz;
|
||||
|
||||
dp3 r0.x, r1, r1;
|
||||
rsq r0.xy, r0.x;
|
||||
mul r0.x, r0.x, r5.w;
|
||||
mul oT1, r1.xyzw, r0.xxyw;
|
||||
// mul r8, r1.xyzw, r0.xxxw; // VISUAL
|
||||
mul r11.x, r1.z, r0.y;
|
||||
|
||||
|
||||
dp3 r0.x, r2, r2;
|
||||
rsq r0.xy, r0.x;
|
||||
mul r0.x, r0.x, r5.w;
|
||||
mul oT3, r2.xyzw, r0.xxyw;
|
||||
// mul r9, r2.xyzw, r0.xxxw; // VISUAL
|
||||
mul r11.y, r2.z, r0.y;
|
||||
|
||||
dp3 r0.x, r3, r3;
|
||||
rsq r0.xy, r0.x;
|
||||
mul r0.x, r0.x, r5.w;
|
||||
mul oT2, r3.xyzw, r0.xxyw;
|
||||
// mul r9, r3.xyzw, r0.xxxw; // VISUAL
|
||||
mul r11.z, r3.z, r0.y;
|
||||
|
||||
|
||||
/*
|
||||
// Want:
|
||||
// oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x)
|
||||
// oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y)
|
||||
// ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z)
|
||||
// with BIN, TAN, and NORM normalized.
|
||||
// Unnormalized, we have
|
||||
// BIN = (1, 0, -r7.x) where r7 == accumCos
|
||||
// TAN = (0, 1, -r7.y)
|
||||
// NORM= (r7.x, r7.y, 1)
|
||||
// So, unnormalized, we have
|
||||
// oT1 = (1, 0, r7.x, view2pos.x)
|
||||
// oT2 = (0, 1, r7.y, view2pos.y)
|
||||
// oT3 = (-r7.x, -r7.y, 1, view2pos.z)
|
||||
// which is just reversing the signs on the accumCos
|
||||
// terms above. So the normalized version is just
|
||||
// reversing the signs on the normalized version above.
|
||||
*/
|
||||
//mov oT3, r4;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c21; // HACKAGE
|
||||
//mov r6.w, c16.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c28.x;
|
||||
mul oFog, r10.x, c28.y;
|
||||
//mov oFog, c16.zzzz; // TESTFOGHACK
|
||||
mov oPos, r9;
|
||||
|
||||
// Transform our uvw
|
||||
mul r0.x, v0.xxxx, c10.xxxx;
|
||||
mul r0.y, v0.yyyy, c10.xxxx;
|
||||
|
||||
//mov r0.zw, c16.xxxz;
|
||||
mov oT0, r0
|
||||
|
||||
// Questionble attenuation follows
|
||||
// vector from this point to camera and normalize stashed in r5
|
||||
// Dot that with the computed normal
|
||||
dp3 r1.x, -r5, r11;
|
||||
mul r1.x, r1.x, v5.z;
|
||||
// dp3 r1.x, r5, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3
|
||||
// Map dot=1 => 0, dot=0 => 1
|
||||
sub r1.xyzw, c16.zzzz, r1.xxxx;
|
||||
add r1.w, r1.wwww, c16.zzzz;
|
||||
mul r1.w, r1.wwww, c16.yyyy;
|
||||
// No need to clamp, since the destination register (in the pixel shader)
|
||||
// will saturate [0..1] anyway.
|
||||
//%%% mul r1.w, r1.w, r4.x;
|
||||
//%%% mul r1.xyz, r1.xyz, r4.yyy;
|
||||
mul r1, r1, r4.yyyx; // HACKTESTCOLOR
|
||||
//mul r1.xyz, r1, r8.xxx; // WAVEFACE
|
||||
mul r1.w, r1.wwww, v5.xxxx;
|
||||
mul r1.w, r1.wwww, c4.wwww;
|
||||
mul oD0, r1, c20;
|
||||
|
||||
mov oD1, c4; // SEENORM
|
||||
//mov oD1, c16.xxxx;
|
||||
// mov oD1, r4.yyyy;
|
||||
|
||||
//mov oD1, c16.zzzz; // HACKAGE
|
||||
// mov oD1, r9;
|
||||
// mov oD1, r8.xzyw;
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c21; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c16.zzzz;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = reflection strength (transparency)
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c25 = waterlevel + offset
|
||||
// c26 = (maxAtten - minAtten) / depthFalloff
|
||||
// c27 = minAtten.
|
||||
// And in particular:
|
||||
// c25.w = waterlevel
|
||||
// c26.w = 1.f;
|
||||
// c27.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c8, r6.xxxx;
|
||||
mad r0, c9, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c5;
|
||||
add r0, r0, c6;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c15.wwww;
|
||||
add r0, r0, c15.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c15.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c15.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c13.wwww, r2;
|
||||
mad r1, r4, c14.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c25, r6.zzzz;
|
||||
mul r4, r4, c26;
|
||||
add r4, r4, c27;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c16.zzzz;
|
||||
max r4.xyz, r4, c16.xxxx;
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c24;
|
||||
max r11, r11, c16.xxxx;
|
||||
min r11, r11, c16.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r5, r2, c7;
|
||||
// r5 is now T = sum(Ai * sin())
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r5, c16.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c25.w;
|
||||
max r6.z, r6.z, r8.z; // CLAMP
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= kAmplitude.xyzw; // Combine?
|
||||
mul r7, r1, c7;
|
||||
// cosDist *= filter;
|
||||
mul r7, r7, r11;
|
||||
// r7 is now M = sum(Ai * cos())
|
||||
|
||||
// Okay, here we go:
|
||||
// W == sum(k w Dir.x^2 A sin())
|
||||
// V == sum(k w Dir.x Dir.y A sin())
|
||||
// U == sum(k w Dir.y^2 A sin())
|
||||
//
|
||||
// T == sum(A sin())
|
||||
//
|
||||
// S == sum(k Dir.x A cos())
|
||||
// R == sum(k Dir.y A cos())
|
||||
//
|
||||
// Q == sum(k w A cos())
|
||||
//
|
||||
// M == sum(A cos())
|
||||
//
|
||||
// P == sum(w Dir.x A cos())
|
||||
// N == sum(w Dir.y A cos())
|
||||
//
|
||||
// Then:
|
||||
// Pos = (in.x + S, in.y + R, waterheight + T)
|
||||
//
|
||||
// Bin = (1 - W, -V, P)
|
||||
// Tan = (-V, 1 - U, N)
|
||||
// Nor = (-P, -N, 1 - Q)
|
||||
//
|
||||
// But we want the transpose of that to go into r1-r3
|
||||
|
||||
dp4 r10.x, r7, c29;
|
||||
add r6.x, r6.x, r10.x;
|
||||
dp4 r10.x, r7, c30;
|
||||
add r6.y, r6.y, r10.x;
|
||||
|
||||
dp4 r1.x, r5, -c34;
|
||||
dp4 r2.x, r5, -c35;
|
||||
dp4 r3.x, r7, c31;
|
||||
add r1.x, r1.xxxx, c16.zzzz;
|
||||
|
||||
dp4 r1.y, r5, -c35;
|
||||
dp4 r2.y, r5, -c36;
|
||||
dp4 r3.y, r7, c32;
|
||||
add r2.y, r2.yyyy, c16.zzzz;
|
||||
|
||||
dp4 r1.z, r7, -c31;
|
||||
dp4 r2.z, r7, -c32;
|
||||
dp4 r3.z, r5, -c33;
|
||||
add r3.z, r3.zzzz, c16.zzzz;
|
||||
|
||||
|
||||
// Calculate our normalized vector from camera to vtx.
|
||||
// We'll use that a couple of times coming up.
|
||||
sub r5, r6, c17;
|
||||
dp3 r10.x, r5, r5;
|
||||
rsq r10.x, r10.x;
|
||||
mul r5, r5, r10.xxxx; // r0 = D
|
||||
rcp r5.w, r10.x;
|
||||
|
||||
// Calculate our specular attenuation from and into r5.w.
|
||||
// r5.w starts off the distance from vtx to camera.
|
||||
// Once we've turned it into an attenuation factor, we
|
||||
// scale the x and y of our normal map (through the transform bases)
|
||||
// so that in the distance, the normal map is flat. Note that the
|
||||
// geometry in the distance isn't necessarily flat. We want to apply
|
||||
// this scale to the normal read from the normal map before it is
|
||||
// transformed into surface space.
|
||||
add r5.w, r5.w, c11.x;
|
||||
mul r5.w, r5.w, c11.y;
|
||||
min r5.w, r5.w, c16.z;
|
||||
max r5.w, r5.w, c16.x;
|
||||
mul r5.w, r5.w, r5.w; // Square it to account for perspective
|
||||
mul r5.w, r5.w, c11.z;
|
||||
|
||||
|
||||
// Normalize?
|
||||
|
||||
// We can either calculate an orthonormal basis from the
|
||||
// computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0),
|
||||
// or compute our basis directly from the partial derivatives, with
|
||||
// Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1)
|
||||
//
|
||||
// These work out to identically the same result, so we'll compute directly
|
||||
// from the partials because it takes 2 fewer instructions.
|
||||
//
|
||||
// Note that our basis is NOT orthonormal. The Normal is equal to
|
||||
// Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents
|
||||
// are both correct tangents to the surface, and their projections on the XY plane
|
||||
// are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications?
|
||||
// Not really. I'm actually not really sure which is more "proper" for bump mapping.
|
||||
//
|
||||
// Note also that we add when we should subtract and subtract when we should
|
||||
// add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows
|
||||
// of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for
|
||||
// explanation.
|
||||
//
|
||||
// Binormal = Y % Normal
|
||||
// Cross product3 is:
|
||||
// mul res.xyz, a.yzx, b.zxy
|
||||
// mad res.xyz, -a.zxy, b.yzx, res.xyz
|
||||
// mul r1.xyz, c16.zxx, r3.zxy;
|
||||
// mad r1.xyz, -c16.xxz, r3.yzx, r1.xyz;
|
||||
|
||||
// Tangent = Normal % X
|
||||
// mul r2.xyz, r3.yzx, c16.xzx;
|
||||
// mad r2.xyz, -r3.zxy, c16.xxz, r2;
|
||||
|
||||
//mad r1, r5.wwww, c16.zxxx, r7.zzxz;
|
||||
//mad r2, r5.wwww, c16.xzxx, r7.zzyz;
|
||||
//mul r3.xy, r3.xy, r5.wwww;
|
||||
|
||||
|
||||
// Note that we're swapping z and y to match our environment map tools in max.
|
||||
// We do this through our normal map transform (oT1, oT2, oT3), making it
|
||||
// a concatenation of:
|
||||
//
|
||||
// rotate about Z (blue) to turn our map into the wind
|
||||
// windRot = | dirY -dirX 0 |
|
||||
// | dirX dirY 0 |
|
||||
// | 0 0 1 |
|
||||
//
|
||||
// swap our Y and Z axes to match our environment map
|
||||
// swapYZ = | 1 0 0 |
|
||||
// | 0 0 1 |
|
||||
// | 0 1 0 |
|
||||
//
|
||||
// rotate the normal into the surface's tangent space basis
|
||||
// basis = | Bx Tx Nx |
|
||||
// | By Ty Ny |
|
||||
// | Bz Tz Nz |
|
||||
//
|
||||
// Note that we've constucted the basis by taking advantage of the
|
||||
// matrix being a pure rotation, as noted below, so r1, r2 and r3
|
||||
// are actually constructed as:
|
||||
// basis = | Bx -By -Bz |
|
||||
// | -Tx Ty -Tz |
|
||||
// | -Nx -Ny -Nz |
|
||||
//
|
||||
// Then the final normal map transform is:
|
||||
//
|
||||
// basis * swapYZ * windRot [ * normal ]
|
||||
|
||||
|
||||
// sub r1.w, c17.x, r6.x;
|
||||
// sub r2.w, c17.z, r6.z;
|
||||
// sub r3.w, c17.y, r6.y;
|
||||
|
||||
// Big note here. All this math can blow up if the camera position
|
||||
// is outside the environment sphere. It's assumed that's dealt
|
||||
// with in the app setting up the constants. For that reason, the
|
||||
// camera position used here might not be the real local camera position,
|
||||
// which is needed for the angular attenuation, so we burn another constant
|
||||
// with our pseudo-camera position. To restrain the pseudo-camera from
|
||||
// leaving the sphere, we make:
|
||||
// pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R)
|
||||
// where dist = |realPos - envCenter|
|
||||
|
||||
// So, our "finitized" eyeray is:
|
||||
// camPos + D * t - envCenter = D * t - (envCenter - camPos)
|
||||
// with
|
||||
// D = (pos - camPos) / |pos - camPos| // normalized usual eyeray
|
||||
// and
|
||||
// t = D dot F + sqrt( (D dot F)^2 - G )
|
||||
// with
|
||||
// F = (envCenter - camPos) => c19.xyz
|
||||
// G = F^2 - R^2 => c19.w
|
||||
// R = environment radius. => unused
|
||||
//
|
||||
// This all derives from the positive root of equation
|
||||
// (camPos + (pos - camPos) * t - envCenter)^2 = R^2,
|
||||
// In other words, where on a sphere of radius R centered about envCenter
|
||||
// does the ray from the real camera position through this point hit.
|
||||
//
|
||||
// Note that F, G, and R are all constants (one point, two scalars).
|
||||
//
|
||||
// So first we calculate D into r0,
|
||||
// then D dot F into r10.x,
|
||||
// then (D dot F)^2 - G into r10.y
|
||||
// then rsq( (D dot F)^2 - G ) into r9.x;
|
||||
// then t = r10.z = r10.x + r10.y * r9.x;
|
||||
// and
|
||||
// r0 = D * t - (envCenter - camPos)
|
||||
// = r0 * r10.zzzz - F;
|
||||
//
|
||||
mov r0, r5; // r0 = D
|
||||
|
||||
dp3 r10.x, r0, c19; // r10.x = D dot F
|
||||
mad r10.y, r10.x, r10.x, -c19.w; // r10.y = (D dot F)^2 - G
|
||||
|
||||
rsq r9.x, r10.y; // r9.x = 1/SQRT((D dot F)^2 - G)
|
||||
|
||||
mad r10.z, r10.y, r9.x, r10.x; // r10.z = D dot F + SQRT((D dot F)^2 - G)
|
||||
|
||||
mad r0.xyz, r0, r10.zzz, -c19.xyz; // r0.xyz = D * t - (envCenter - camPos)
|
||||
|
||||
// ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump.
|
||||
dp3 r10.x, r0, r0;
|
||||
rsq r9.x, r10.x;
|
||||
mul r0.xyz, r0.xyz, r9.xxx;
|
||||
|
||||
mov r1.w, -r0.x;
|
||||
mov r2.w, -r0.y;
|
||||
mov r3.w, -r0.z;
|
||||
|
||||
mov r0.zw, c16.zzxz;
|
||||
|
||||
dp3 r0.x, r1, r1;
|
||||
rsq r0.xy, r0.x;
|
||||
mul r0.x, r0.x, r5.w;
|
||||
mul oT1, r1.xyzw, r0.xxyw;
|
||||
// mul r8, r1.xyzw, r0.xxxw; // VISUAL
|
||||
mul r11.x, r1.z, r0.y;
|
||||
|
||||
|
||||
dp3 r0.x, r2, r2;
|
||||
rsq r0.xy, r0.x;
|
||||
mul r0.x, r0.x, r5.w;
|
||||
mul oT3, r2.xyzw, r0.xxyw;
|
||||
// mul r9, r2.xyzw, r0.xxxw; // VISUAL
|
||||
mul r11.y, r2.z, r0.y;
|
||||
|
||||
dp3 r0.x, r3, r3;
|
||||
rsq r0.xy, r0.x;
|
||||
mul r0.x, r0.x, r5.w;
|
||||
mul oT2, r3.xyzw, r0.xxyw;
|
||||
// mul r9, r3.xyzw, r0.xxxw; // VISUAL
|
||||
mul r11.z, r3.z, r0.y;
|
||||
|
||||
|
||||
/*
|
||||
// Want:
|
||||
// oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x)
|
||||
// oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y)
|
||||
// ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z)
|
||||
// with BIN, TAN, and NORM normalized.
|
||||
// Unnormalized, we have
|
||||
// BIN = (1, 0, -r7.x) where r7 == accumCos
|
||||
// TAN = (0, 1, -r7.y)
|
||||
// NORM= (r7.x, r7.y, 1)
|
||||
// So, unnormalized, we have
|
||||
// oT1 = (1, 0, r7.x, view2pos.x)
|
||||
// oT2 = (0, 1, r7.y, view2pos.y)
|
||||
// oT3 = (-r7.x, -r7.y, 1, view2pos.z)
|
||||
// which is just reversing the signs on the accumCos
|
||||
// terms above. So the normalized version is just
|
||||
// reversing the signs on the normalized version above.
|
||||
*/
|
||||
//mov oT3, r4;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c21; // HACKAGE
|
||||
//mov r6.w, c16.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c28.x;
|
||||
mul oFog, r10.x, c28.y;
|
||||
//mov oFog, c16.zzzz; // TESTFOGHACK
|
||||
mov oPos, r9;
|
||||
|
||||
// Transform our uvw
|
||||
mul r0.x, v0.xxxx, c10.xxxx;
|
||||
mul r0.y, v0.yyyy, c10.xxxx;
|
||||
|
||||
//mov r0.zw, c16.xxxz;
|
||||
mov oT0, r0
|
||||
|
||||
// Questionble attenuation follows
|
||||
// vector from this point to camera and normalize stashed in r5
|
||||
// Dot that with the computed normal
|
||||
dp3 r1.x, -r5, r11;
|
||||
mul r1.x, r1.x, v5.z;
|
||||
// dp3 r1.x, r5, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3
|
||||
// Map dot=1 => 0, dot=0 => 1
|
||||
sub r1.xyzw, c16.zzzz, r1.xxxx;
|
||||
add r1.w, r1.wwww, c16.zzzz;
|
||||
mul r1.w, r1.wwww, c16.yyyy;
|
||||
// No need to clamp, since the destination register (in the pixel shader)
|
||||
// will saturate [0..1] anyway.
|
||||
//%%% mul r1.w, r1.w, r4.x;
|
||||
//%%% mul r1.xyz, r1.xyz, r4.yyy;
|
||||
mul r1, r1, r4.yyyx; // HACKTESTCOLOR
|
||||
//mul r1.xyz, r1, r8.xxx; // WAVEFACE
|
||||
mul r1.w, r1.wwww, v5.xxxx;
|
||||
mul r1.w, r1.wwww, c4.wwww;
|
||||
mul oD0, r1, c20;
|
||||
|
||||
mov oD1, c4; // SEENORM
|
||||
//mov oD1, c16.xxxx;
|
||||
// mov oD1, r4.yyyy;
|
||||
|
||||
//mov oD1, c16.zzzz; // HACKAGE
|
||||
// mov oD1, r9;
|
||||
// mov oD1, r8.xzyw;
|
||||
|
@ -1,166 +1,166 @@
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_normal v3
|
||||
|
||||
// c0 = (0,0.5,1.0,2.0) (aka NumericConsts)
|
||||
// c1 = frequencies
|
||||
// c2 = phases
|
||||
// c3 = amplitudes
|
||||
|
||||
// c4 = PiConsts = (1/(2PI), PI/2, PI, 2*PI) // NOTE THIS IS DIFFERENT
|
||||
// because we don't need oonsqpi here but do want 1/2Pi.
|
||||
// c5 = cosConsts = (1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f);
|
||||
|
||||
// c6 = ((cMax - cMin), cMin, 2ndLayerVOffset, 2ndLayerScale);
|
||||
// c7 = overall color, including current opacity. Will
|
||||
// probably only use the opacity, which we could stuff into
|
||||
// the free slot of c6, but we're a wuss.
|
||||
|
||||
// First, "move" the position to oPos
|
||||
mov r0, v0;
|
||||
//mov r0.y, -r0.yyyy;
|
||||
mov r0.w, c0.zzzz;
|
||||
mov oPos, r0;
|
||||
|
||||
// Now the tricky part.
|
||||
|
||||
// The base layer defines the shape of the incoming wave
|
||||
// The next layer has bubbles (noise) and moves in when the
|
||||
// wave is moving in, moves out when wave is moving out.
|
||||
// So calculate uvw for first layer, second uvw shares u val
|
||||
// and v val is const
|
||||
|
||||
// The .x component of the normal
|
||||
// tells us how much to shift this vert based on the
|
||||
// cumulative cosine wave.
|
||||
|
||||
// Figure c = Sigma((cosine(v0.x * freq + phase) + 1) * amp);
|
||||
// Note that range c must be [0..1]
|
||||
// Also, c(-1) must equal c(1) so it will wrap.
|
||||
// That implies freq = k * 2 * PI, where k is an integer.
|
||||
// To keep c >= 0, we can add 1 to each term in the sigma BEFORE
|
||||
// modulating by the amplitude.
|
||||
// That puts our range at [0..2*sigma(amp)], so as long as
|
||||
// sigma(amp) <= 0.5, we're fine.
|
||||
|
||||
// Get our input to cosine value (v0.x * freq + phase).
|
||||
add r0, v0.xxxx, c0.zzzz;
|
||||
mul r0, r0, c1;
|
||||
add r0, r0, c2;
|
||||
|
||||
// Get it into range [-Pi..Pi]
|
||||
// First divide out the 2PI
|
||||
// add r0, r0, c4.zzzz; HACKOUT
|
||||
mul r0, r0, c4.xxxx;
|
||||
|
||||
// Do an integer mod
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
|
||||
//mov oD1, r1; // HACKTEST
|
||||
//mov oD1.w, c0.zzzz; // HACKTEST
|
||||
|
||||
// Move back into PI space, w/ *= 2P, -= PI
|
||||
mul r0, r1, c4.wwww;
|
||||
sub r0, r0, c4.zzzz;
|
||||
|
||||
// Okay, compute cosine here.
|
||||
// cos = 1 + r0^2 * kCos.y + r0^4 * kCos.Z + r0^6 * kCos.w
|
||||
// Note: could pare off an instr by putting 1/kCos.w in kCos.x,
|
||||
// then doing a mad to get r3=(1/kCos.w + r0^6), then mad that
|
||||
// into the accum by kCos.w to get (1 + r0^6*kCos.x). But who cares.
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r1; // r0^4
|
||||
mul r3, r1, r2; // r0^6
|
||||
|
||||
mov r4, c5.xxxx; // r4 = 1
|
||||
mad r4, r1, c5.yyyy, r4; // r4 += r0^2 * kCos.y
|
||||
mad r4, r2, c5.zzzz, r4; // r4 += r0^4 * kCos.z
|
||||
mad r4, r3, c5.wwww, r4; // r4 += r0^6 * kCos.w
|
||||
|
||||
add r4, r4, c0.zzzz; // shift from [-1..1] to [0..2]
|
||||
//mov r4, c0.xxxx; // HACKLAST
|
||||
mul r4, r4, c3; // times amplitude
|
||||
|
||||
dp4 r5.y, r4, c0.zzzz; // r5.x = sigma((cos() + 1) * amp);
|
||||
|
||||
// V calculation, goes something like:
|
||||
// For layers 0 and 2:
|
||||
// V = { 1 + c6.z <= r5.y = 0 } * norm.x // norm.x == v3.x
|
||||
// { 1 + 0 <= r5.y = 1 }
|
||||
// For layer 1:
|
||||
// V = (norm.x + c6.z) * c6.w // Scaled like U
|
||||
//
|
||||
// Another way to formulate that is
|
||||
// baseV = cMin + sinAge * (cMax-cMin) where
|
||||
// cMin = 2
|
||||
// cMax = 1
|
||||
// sinAge = color.a = c7.w
|
||||
// delV = sigma(cos) = r5.y
|
||||
// Then
|
||||
// V0 = V2 = (baseV + delV) * v3.x
|
||||
// V1 = (norm.x + baseV + delV) * c6.w
|
||||
//
|
||||
// If we're sure we want cMin = 2 and cMax = 1, then it simplifies to:
|
||||
// baseV = 2 - sinAge = c0.w - c7.w
|
||||
// delV = r5.y
|
||||
// (baseV + delV) = c0.w - c7.w + r5.y
|
||||
//
|
||||
// If we want to stay general, then
|
||||
// baseV = c6.x * c7.w + c6.y
|
||||
// delV = -r5.y
|
||||
// (baseV + delV) = constant + r5.y
|
||||
//
|
||||
|
||||
// make r5.y = (baseV + delV)
|
||||
add r5.y, c6.xxxx, r5.yyyy;
|
||||
|
||||
//mov oD1, r5.yyyy; // HACKLAST
|
||||
//mov oD1.w, c0.zzzz; // HACKLAST
|
||||
|
||||
// U is input U (or v0.x * 0.5f + 0.5f)
|
||||
mul r5.x, v0.x, c0.y;
|
||||
add r5.x, r5.x, c0.y;
|
||||
|
||||
// Fill out wq.
|
||||
mov r5.zw, c0.xz;
|
||||
|
||||
mul oT0, r5, v3.wxww;
|
||||
// mov oD1, r5.yyyw; // HACKTEST
|
||||
mul oT2, r5, v3.wxww;
|
||||
|
||||
// Second uv shares u, but v is norm.x + c6.x;
|
||||
// Then we scale it.
|
||||
// If we want the bubble texture to move with the
|
||||
// wave front, we want the second UV calc (RESCALE1).
|
||||
// But it looks better to have the bubbles moving
|
||||
// slightly faster than the wave front. RESCALE0
|
||||
// happens to do that, because we're scaling the
|
||||
// texture by a factor of 2, but we should probably
|
||||
// supply an independent scale of the motion vs. the
|
||||
// scale of the texture.
|
||||
// Let's move c6 to r6 for ease of use.
|
||||
mov r6, c6;
|
||||
// add r5.x, r5.x, c6.y;
|
||||
// add r5.y, c6.xxxx, v3.xxxx; // RESCALE0
|
||||
// mul r5.xy, r5, c6.wwww; // RESCALE0
|
||||
add r5.x, r5.x, r6.y; // RESCALE1 // offset U
|
||||
mov r5.y, v3.xx; // RESCALE1 // Init V to value stashed in normal.x
|
||||
mul r5.xy, r5, r6.wwww; // RESCALE1 // scale them by single scale value
|
||||
mad r5.y, r6.xx, r6.zz, r5.yy; // RESCALE1 // add in our scaled V offset (sinage * vScale)
|
||||
mov oT1, r5;
|
||||
|
||||
//mov oT0, v7; // HACKTEST
|
||||
//mov oT1, v7; // HACKTEST
|
||||
//mov oT2, v7; // HACKTEST
|
||||
|
||||
// Just slam in the constant color (includes our current opacity).
|
||||
mov oD0, c7;
|
||||
//mov oD0, c0.zzzz; // HACKTEST
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_normal v3
|
||||
|
||||
// c0 = (0,0.5,1.0,2.0) (aka NumericConsts)
|
||||
// c1 = frequencies
|
||||
// c2 = phases
|
||||
// c3 = amplitudes
|
||||
|
||||
// c4 = PiConsts = (1/(2PI), PI/2, PI, 2*PI) // NOTE THIS IS DIFFERENT
|
||||
// because we don't need oonsqpi here but do want 1/2Pi.
|
||||
// c5 = cosConsts = (1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f);
|
||||
|
||||
// c6 = ((cMax - cMin), cMin, 2ndLayerVOffset, 2ndLayerScale);
|
||||
// c7 = overall color, including current opacity. Will
|
||||
// probably only use the opacity, which we could stuff into
|
||||
// the free slot of c6, but we're a wuss.
|
||||
|
||||
// First, "move" the position to oPos
|
||||
mov r0, v0;
|
||||
//mov r0.y, -r0.yyyy;
|
||||
mov r0.w, c0.zzzz;
|
||||
mov oPos, r0;
|
||||
|
||||
// Now the tricky part.
|
||||
|
||||
// The base layer defines the shape of the incoming wave
|
||||
// The next layer has bubbles (noise) and moves in when the
|
||||
// wave is moving in, moves out when wave is moving out.
|
||||
// So calculate uvw for first layer, second uvw shares u val
|
||||
// and v val is const
|
||||
|
||||
// The .x component of the normal
|
||||
// tells us how much to shift this vert based on the
|
||||
// cumulative cosine wave.
|
||||
|
||||
// Figure c = Sigma((cosine(v0.x * freq + phase) + 1) * amp);
|
||||
// Note that range c must be [0..1]
|
||||
// Also, c(-1) must equal c(1) so it will wrap.
|
||||
// That implies freq = k * 2 * PI, where k is an integer.
|
||||
// To keep c >= 0, we can add 1 to each term in the sigma BEFORE
|
||||
// modulating by the amplitude.
|
||||
// That puts our range at [0..2*sigma(amp)], so as long as
|
||||
// sigma(amp) <= 0.5, we're fine.
|
||||
|
||||
// Get our input to cosine value (v0.x * freq + phase).
|
||||
add r0, v0.xxxx, c0.zzzz;
|
||||
mul r0, r0, c1;
|
||||
add r0, r0, c2;
|
||||
|
||||
// Get it into range [-Pi..Pi]
|
||||
// First divide out the 2PI
|
||||
// add r0, r0, c4.zzzz; HACKOUT
|
||||
mul r0, r0, c4.xxxx;
|
||||
|
||||
// Do an integer mod
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
|
||||
//mov oD1, r1; // HACKTEST
|
||||
//mov oD1.w, c0.zzzz; // HACKTEST
|
||||
|
||||
// Move back into PI space, w/ *= 2P, -= PI
|
||||
mul r0, r1, c4.wwww;
|
||||
sub r0, r0, c4.zzzz;
|
||||
|
||||
// Okay, compute cosine here.
|
||||
// cos = 1 + r0^2 * kCos.y + r0^4 * kCos.Z + r0^6 * kCos.w
|
||||
// Note: could pare off an instr by putting 1/kCos.w in kCos.x,
|
||||
// then doing a mad to get r3=(1/kCos.w + r0^6), then mad that
|
||||
// into the accum by kCos.w to get (1 + r0^6*kCos.x). But who cares.
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r1; // r0^4
|
||||
mul r3, r1, r2; // r0^6
|
||||
|
||||
mov r4, c5.xxxx; // r4 = 1
|
||||
mad r4, r1, c5.yyyy, r4; // r4 += r0^2 * kCos.y
|
||||
mad r4, r2, c5.zzzz, r4; // r4 += r0^4 * kCos.z
|
||||
mad r4, r3, c5.wwww, r4; // r4 += r0^6 * kCos.w
|
||||
|
||||
add r4, r4, c0.zzzz; // shift from [-1..1] to [0..2]
|
||||
//mov r4, c0.xxxx; // HACKLAST
|
||||
mul r4, r4, c3; // times amplitude
|
||||
|
||||
dp4 r5.y, r4, c0.zzzz; // r5.x = sigma((cos() + 1) * amp);
|
||||
|
||||
// V calculation, goes something like:
|
||||
// For layers 0 and 2:
|
||||
// V = { 1 + c6.z <= r5.y = 0 } * norm.x // norm.x == v3.x
|
||||
// { 1 + 0 <= r5.y = 1 }
|
||||
// For layer 1:
|
||||
// V = (norm.x + c6.z) * c6.w // Scaled like U
|
||||
//
|
||||
// Another way to formulate that is
|
||||
// baseV = cMin + sinAge * (cMax-cMin) where
|
||||
// cMin = 2
|
||||
// cMax = 1
|
||||
// sinAge = color.a = c7.w
|
||||
// delV = sigma(cos) = r5.y
|
||||
// Then
|
||||
// V0 = V2 = (baseV + delV) * v3.x
|
||||
// V1 = (norm.x + baseV + delV) * c6.w
|
||||
//
|
||||
// If we're sure we want cMin = 2 and cMax = 1, then it simplifies to:
|
||||
// baseV = 2 - sinAge = c0.w - c7.w
|
||||
// delV = r5.y
|
||||
// (baseV + delV) = c0.w - c7.w + r5.y
|
||||
//
|
||||
// If we want to stay general, then
|
||||
// baseV = c6.x * c7.w + c6.y
|
||||
// delV = -r5.y
|
||||
// (baseV + delV) = constant + r5.y
|
||||
//
|
||||
|
||||
// make r5.y = (baseV + delV)
|
||||
add r5.y, c6.xxxx, r5.yyyy;
|
||||
|
||||
//mov oD1, r5.yyyy; // HACKLAST
|
||||
//mov oD1.w, c0.zzzz; // HACKLAST
|
||||
|
||||
// U is input U (or v0.x * 0.5f + 0.5f)
|
||||
mul r5.x, v0.x, c0.y;
|
||||
add r5.x, r5.x, c0.y;
|
||||
|
||||
// Fill out wq.
|
||||
mov r5.zw, c0.xz;
|
||||
|
||||
mul oT0, r5, v3.wxww;
|
||||
// mov oD1, r5.yyyw; // HACKTEST
|
||||
mul oT2, r5, v3.wxww;
|
||||
|
||||
// Second uv shares u, but v is norm.x + c6.x;
|
||||
// Then we scale it.
|
||||
// If we want the bubble texture to move with the
|
||||
// wave front, we want the second UV calc (RESCALE1).
|
||||
// But it looks better to have the bubbles moving
|
||||
// slightly faster than the wave front. RESCALE0
|
||||
// happens to do that, because we're scaling the
|
||||
// texture by a factor of 2, but we should probably
|
||||
// supply an independent scale of the motion vs. the
|
||||
// scale of the texture.
|
||||
// Let's move c6 to r6 for ease of use.
|
||||
mov r6, c6;
|
||||
// add r5.x, r5.x, c6.y;
|
||||
// add r5.y, c6.xxxx, v3.xxxx; // RESCALE0
|
||||
// mul r5.xy, r5, c6.wwww; // RESCALE0
|
||||
add r5.x, r5.x, r6.y; // RESCALE1 // offset U
|
||||
mov r5.y, v3.xx; // RESCALE1 // Init V to value stashed in normal.x
|
||||
mul r5.xy, r5, r6.wwww; // RESCALE1 // scale them by single scale value
|
||||
mad r5.y, r6.xx, r6.zz, r5.yy; // RESCALE1 // add in our scaled V offset (sinage * vScale)
|
||||
mov oT1, r5;
|
||||
|
||||
//mov oT0, v7; // HACKTEST
|
||||
//mov oT1, v7; // HACKTEST
|
||||
//mov oT2, v7; // HACKTEST
|
||||
|
||||
// Just slam in the constant color (includes our current opacity).
|
||||
mov oD0, c7;
|
||||
//mov oD0, c0.zzzz; // HACKTEST
|
||||
|
@ -1,471 +1,471 @@
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
|
||||
//m4x4 oPos, v0, c0
|
||||
|
||||
|
||||
/*
|
||||
In fact, I was trying to understand how it was possible to expand FRC into 4
|
||||
instructions...
|
||||
Actually, I can do it in 7 instructions :)
|
||||
|
||||
EXPP r0.y, r1.xxxx
|
||||
MOV r0.x, r0.y
|
||||
EXPP r0.y, r1.zzzz
|
||||
MOV r0.z, r0.y
|
||||
EXPP r0.y, r1.wwww
|
||||
MOV r0.w, r0.y
|
||||
EXPP r0.y, r1.yyyy
|
||||
*/
|
||||
|
||||
/*
|
||||
// Constants for sin and cos. 3 term approximation seems plenty
|
||||
// (it's what i used for software sim, and had no visibly different
|
||||
// results than the math library functions).
|
||||
// When doing sin/cos together, some speedup might be obtained
|
||||
// with good pairing of ops doing them simultaneously. Also save
|
||||
// an instruction calculating r0^3.
|
||||
D3DXVECTOR4 vSin( 1.0f, -1.0f/6.0f, 1.0f/120.0f, -1.0f/5040.0f );
|
||||
D3DXVECTOR4 vCos( 1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f );
|
||||
*/
|
||||
|
||||
/*
|
||||
Cos():
|
||||
|
||||
|
||||
r1 = mul(r0, r0); // r0^2
|
||||
r2 = mul(r1, r1); // r0^4
|
||||
|
||||
//cos
|
||||
r3 = mad( r1, vCos.yyyy, vCos.xxxx );
|
||||
r3 = mad( r2, vCos.zzzz, r3 );
|
||||
*/
|
||||
|
||||
/*
|
||||
Sin();
|
||||
r1 = mul(r0, r0); // r0^3
|
||||
r1 = mul(r0, r1);
|
||||
r2 = mul(r1, r1); // r0^6
|
||||
|
||||
r3 = mad( r1, vSin.yyyy, r0 );
|
||||
r3 = mad( r2, vSin.zzzz, r3 );
|
||||
*/
|
||||
|
||||
/*
|
||||
SinCos():
|
||||
|
||||
r1 = mul(r0, r0); // r0^2
|
||||
r2 = mul(r1, r0); // r0^3 // probably stall
|
||||
r3 = mul(r1, r1); // r0^4
|
||||
r4 = mul(r2, r2); // r0^6
|
||||
|
||||
r5 = mad( r1, vCos.yyyy, vCos.xxxx );
|
||||
r6 = mad( r2, vSin.yyyy, r0 );
|
||||
r5 = mad( r3, vCos.zzzz, r5 );
|
||||
r6 = mad( r4, vSin.zzzz, r6 );
|
||||
|
||||
*/
|
||||
|
||||
/*
|
||||
consts
|
||||
kOneOverEightNsqPi = 1.f / ( 8.f * Pi * 4.f * 4.f );
|
||||
kPiOverTwo = Pi / 2.f;
|
||||
kTwoPi = Pi * 2.f;
|
||||
kPi = Pi;
|
||||
*/
|
||||
/*
|
||||
CONSTANT REGISTERS
|
||||
VOLATILE CONSTS - change per invocation
|
||||
C0-C3 local2proj matrix
|
||||
C4 color
|
||||
C5 freq vector
|
||||
C6 phase vector
|
||||
C7 amplitude vector
|
||||
C8 center0
|
||||
C9 center1
|
||||
C10 center2
|
||||
C11 center3
|
||||
C12 scrunch = (scrunch, -scrunch, 0, 1);
|
||||
CONSTANT CONSTS - forever more
|
||||
C13 SinConsts = (1.0f, -1.0f/6.0f, 1.0f/120.0f, -1.0f/5040.0f);
|
||||
C14 CosConsts = (1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f);
|
||||
C15 PiConsts = (1.f / 8*Pi*N^2, Pi/2, Pi, 2*Pi);
|
||||
C16 numberConsts = (0.f, 0.5f, 1.f, 2.f);
|
||||
//=====================================
|
||||
TEMP REGISTERS
|
||||
r6 accumPos
|
||||
r7 accumCos
|
||||
r8 toCenter_Y
|
||||
r9 toCenter_X
|
||||
r11 filter
|
||||
r10 tempFloat
|
||||
*/
|
||||
// const float4 kCosConsts = float4(1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f);
|
||||
// const float4 kSinConsts = float4(1.0f, -1.0f/6.0f, 1.0f/120.0f, -1.0f/5040.0f);
|
||||
|
||||
// const float4 kPiConsts = float4(1.f / (8.f * 3.1415f * 16f), 3.1415f*0.5f, 3.1415f, 3.1515f*2.f);
|
||||
// const float4 k0512 = float4(0.f, 0.5f, 1.f, 2.f);
|
||||
|
||||
// accumPos = inPos;
|
||||
mov r6, v0;
|
||||
//
|
||||
// For each wave
|
||||
// {
|
||||
// // First, we want to filter out waves based on distance from the local origin
|
||||
// dist = dp3(inPos, inPos);
|
||||
dp3 r0, r6, r6;
|
||||
// dist *= kFreqSq.xyzw;
|
||||
mul r0, r0, c5;
|
||||
mul r0, r0, c5;
|
||||
// dist *= kOneOverEightNsqPi; // combine this into kFreqSq?
|
||||
mul r0, r0, c15.xxxx;
|
||||
// dist = min(dist, kPiOverTwo);
|
||||
min r0, r0, c15.yyyy;
|
||||
// filter = cos(dist);
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r1; // r1^2
|
||||
mul r1, r1, c14.yyyy;
|
||||
add r11, r1, c14.xxxx;
|
||||
mad r11, r2, c14.zzzz, r11;
|
||||
|
||||
|
||||
// filter *= kAmplitude.xyzw;
|
||||
// mul r11, r11, c7;
|
||||
// // Notice that if dist is a 4vec, all this can be simultaneously done for 4 waves at a time.
|
||||
//
|
||||
// Find the x/y distances and stuff them into r9(x) and r8(y) respectively
|
||||
// toCenter_X.x = dir0.x * pos.x;
|
||||
// toCenter_Y.x = dir0.y * pos.y;
|
||||
mul r0, c8, r6.xxxx;
|
||||
mad r0, c9, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c5;
|
||||
add r0, r0, c6;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c15.wwww;
|
||||
add r0, r0, c15.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c15.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c15.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c13.wwww, r2;
|
||||
mad r1, r4, c14.wwww, r1;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c7;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r6.z, r2, c16.zzzz;
|
||||
//
|
||||
// cosDist *= kFreq.xyzw;
|
||||
mul r1, r1, c5;
|
||||
// cosDist *= kAmplitude.xyzw; // Combine?
|
||||
mul r1, r1, c7;
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
//
|
||||
// accumCos = (0, 0, 0, 0);
|
||||
mov r7, c16.xxxx;
|
||||
// temp = dp4( cosDist, toCenter_X );
|
||||
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
|
||||
dp4 r7.x, r1, -c8
|
||||
//
|
||||
// temp = dp4( cosDist, toCenter_Y );
|
||||
// accumCos.y += temp.xxxx;
|
||||
dp4 r7.y, r1, -c9
|
||||
//
|
||||
// }
|
||||
//
|
||||
// accumBin = (1, 0, -accumCos.x);
|
||||
// accumTan = (0, 1, -accumCos.y);
|
||||
// accumNorm = (accumCos.x, accumCos.y, 1);
|
||||
mov r11, c16.xxzx;
|
||||
add r11, r11, r7;
|
||||
dp3 r10.x, r11, r11;
|
||||
rsq r10.x, r10.x;
|
||||
mul r11, r11, r10.xxxx;
|
||||
|
||||
//
|
||||
// // Scrunch in based on computed (normalized) normal
|
||||
// temp = mul( accumNorm, kNegScrunchScale ); // kNegScrunchScale = (-scrunchScale, -scrunchScale, 0, 0);
|
||||
// accumPos += temp;
|
||||
dp3 r10.x, r11, c18.zxw; // winddir.x, winddir.y, 0, 0
|
||||
// r10.x tells us whether our normal is opposed to the wind.
|
||||
// If opposed, r10.x = 0, else r10.x = 1.f;
|
||||
// We'll use this to kill the Scrunch on the back sides of waves.
|
||||
// We use it for position right here, and then again for the
|
||||
// normal just down a bit further.
|
||||
slt r10.x, r10.x, c16.x;
|
||||
mul r9, r10.xxxx, r11;
|
||||
|
||||
mad r6, r9, c12.yyzz, r6;
|
||||
|
||||
// mul r6.z, r6.z, r10.xxxx; DEBUG
|
||||
|
||||
// mad r6, r11, c12.yyzz, r6;
|
||||
|
||||
// accumNorm = mul (accumNorm, kScrunchScale ); // kScrunchScale = (scrunchScale, scrunchScale, 1, 1);
|
||||
// accumCos *= (scrunchScale, scrunchScale, 0, 0);
|
||||
|
||||
mul r2.x, r6.z, c12.x;
|
||||
mul r2.x, r2.x, r10.x; // ???
|
||||
add r2.x, r2.x, c16.z;
|
||||
|
||||
// mul r7, r7, c12.xxzz;
|
||||
mul r7.xy, r7.xy, r2.xx;
|
||||
|
||||
// This is actually wrong, but useful right now for visualizing the generated coords.
|
||||
// See below for correct version.
|
||||
|
||||
sub r3, c16.xxzx, r7.xyzz;
|
||||
|
||||
// Normalize?
|
||||
|
||||
// We can either calculate an orthonormal basis from the
|
||||
// computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0),
|
||||
// or compute our basis directly from the partial derivatives, with
|
||||
// Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1)
|
||||
//
|
||||
// These work out to identically the same result, so we'll compute directly
|
||||
// from the partials because it takes 2 fewer instructions.
|
||||
//
|
||||
// Note that our basis is NOT orthonormal. The Normal is equal to
|
||||
// Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents
|
||||
// are both correct tangents to the surface, and their projections on the XY plane
|
||||
// are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications?
|
||||
// Not really. I'm actually not really sure which is more "proper" for bump mapping.
|
||||
//
|
||||
// Note also that we add when we should subtract and subtract when we should
|
||||
// add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows
|
||||
// of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for
|
||||
// explanation.
|
||||
//
|
||||
// Binormal = Y % Normal
|
||||
// Cross product3 is:
|
||||
// mul res.xyz, a.yzx, b.zxy
|
||||
// mad res.xyz, -a.zxy, b.yzx, res.xyz
|
||||
// mul r1.xyz, c16.zxx, r3.zxy;
|
||||
// mad r1.xyz, -c16.xxz, r3.yzx, r1.xyz;
|
||||
|
||||
// Tangent = Normal % X
|
||||
// mul r2.xyz, r3.yzx, c16.xzx;
|
||||
// mad r2.xyz, -r3.zxy, c16.xxz, r2;
|
||||
|
||||
add r1, c16.zxxx, r7.zzxz;
|
||||
add r2, c16.xzxx, r7.zzyz;
|
||||
|
||||
// Note that we're swapping z and y to match our environment map tools in max.
|
||||
// We do this through our normal map transform (oT1, oT2, oT3), making it
|
||||
// a concatenation of:
|
||||
//
|
||||
// rotate about Z (blue) to turn our map into the wind
|
||||
// windRot = | dirY -dirX 0 |
|
||||
// | dirX dirY 0 |
|
||||
// | 0 0 1 |
|
||||
//
|
||||
// swap our Y and Z axes to match our environment map
|
||||
// swapYZ = | 1 0 0 |
|
||||
// | 0 0 1 |
|
||||
// | 0 1 0 |
|
||||
//
|
||||
// rotate the normal into the surface's tangent space basis
|
||||
// basis = | Bx Tx Nx |
|
||||
// | By Ty Ny |
|
||||
// | Bz Tz Nz |
|
||||
//
|
||||
// Note that we've constucted the basis by taking advantage of the
|
||||
// matrix being a pure rotation, as noted below, so r1, r2 and r3
|
||||
// are actually constructed as:
|
||||
// basis = | Bx -By -Bz |
|
||||
// | -Tx Ty -Tz |
|
||||
// | -Nx -Ny -Nz |
|
||||
//
|
||||
// Then the final normal map transform is:
|
||||
//
|
||||
// basis * swapYZ * windRot [ * normal ]
|
||||
|
||||
|
||||
// sub r1.w, c17.x, r6.x;
|
||||
// sub r2.w, c17.z, r6.z;
|
||||
// sub r3.w, c17.y, r6.y;
|
||||
|
||||
// Big note here. All this math can blow up if the camera position
|
||||
// is outside the environment sphere. It's assumed that's dealt
|
||||
// with in the app setting up the constants. For that reason, the
|
||||
// camera position used here might not be the real local camera position,
|
||||
// which is needed for the angular attenuation, so we burn another constant
|
||||
// with our pseudo-camera position. To restrain the pseudo-camera from
|
||||
// leaving the sphere, we make:
|
||||
// pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R)
|
||||
// where dist = |realPos - envCenter|
|
||||
|
||||
// So, our "finitized" eyeray is:
|
||||
// camPos + D * t - envCenter = D * t - (envCenter - camPos)
|
||||
// with
|
||||
// D = (pos - camPos) / |pos - camPos| // normalized usual eyeray
|
||||
// and
|
||||
// t = D dot F + sqrt( (D dot F)^2 - G )
|
||||
// with
|
||||
// F = (envCenter - camPos) => c19.xyz
|
||||
// G = F^2 - R^2 => c19.w
|
||||
// R = environment radius. => unused
|
||||
//
|
||||
// This all derives from the positive root of equation
|
||||
// (camPos + (pos - camPos) * t - envCenter)^2 = R^2,
|
||||
// In other words, where on a sphere of radius R centered about envCenter
|
||||
// does the ray from the real camera position through this point hit.
|
||||
//
|
||||
// Note that F, G, and R are all constants (one point, two scalars).
|
||||
//
|
||||
// So first we calculate D into r0,
|
||||
// then D dot F into r10.x,
|
||||
// then (D dot F)^2 - G into r10.y
|
||||
// then rsq( (D dot F)^2 - G ) into r9.x;
|
||||
// then t = r10.z = r10.x + r10.y * r9.x;
|
||||
// and
|
||||
// r0 = D * t - (envCenter - camPos)
|
||||
// = r0 * r10.zzzz - F;
|
||||
//
|
||||
sub r0, r6, c17;
|
||||
dp3 r10.x, r0, r0;
|
||||
rsq r10.x, r10.x;
|
||||
mul r0, r0, r10.xxxx;
|
||||
|
||||
dp3 r10.x, r0, c19;
|
||||
mad r10.y, r10.x, r10.x, -c19.w;
|
||||
|
||||
rsq r9.x, r10.y;
|
||||
|
||||
mad r10.z, r10.y, r9.x, r10.x;
|
||||
|
||||
mad r0.xyz, r0, r10.zzz, -c19.xyz;
|
||||
|
||||
mov r1.w, -r0.x;
|
||||
mov r2.w, -r0.y;
|
||||
mov r3.w, -r0.z;
|
||||
|
||||
// Now rotate our basis vectors into the wind
|
||||
dp3 r0.x, r1, c18.xyww;
|
||||
dp3 r0.y, r1, c18.zxww;
|
||||
mov r1.xy, r0;
|
||||
|
||||
dp3 r0.x, r2, c18.xyww;
|
||||
dp3 r0.y, r2, c18.zxww;
|
||||
mov r2.xy, r0;
|
||||
|
||||
dp3 r0.x, r3, c18.xyww;
|
||||
dp3 r0.y, r3, c18.zxww;
|
||||
mov r3.xy, r0;
|
||||
|
||||
mov r0.w, c16.zzzz;
|
||||
|
||||
dp3 r0.x, r1, r1;
|
||||
rsq r0.x, r0.x;
|
||||
mul oT1, r1.xyzw, r0.xxxw;
|
||||
// mul r8, r1.xyzw, r0.xxxw; // VISUAL
|
||||
|
||||
dp3 r0.x, r2, r2;
|
||||
rsq r0.x, r0.x;
|
||||
mul oT3, r2.xyzw, r0.xxxw;
|
||||
// mul r9, r2.xyzw, r0.xxxw; // VISUAL
|
||||
|
||||
dp3 r0.x, r3, r3;
|
||||
rsq r0.x, r0.x;
|
||||
mul oT2, r3.xyzw, r0.xxxw;
|
||||
// mul r9, r3.xyzw, r0.xxxw; // VISUAL
|
||||
|
||||
// mul r3, r3.xzyw, r0.xxxw;
|
||||
// mul r3.xy, r3, -c16.zzzz;
|
||||
|
||||
/*
|
||||
// Want:
|
||||
// oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x)
|
||||
// oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y)
|
||||
// ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z)
|
||||
// with BIN, TAN, and NORM normalized.
|
||||
// Unnormalized, we have
|
||||
// BIN = (1, 0, -r7.x) where r7 == accumCos
|
||||
// TAN = (0, 1, -r7.y)
|
||||
// NORM= (r7.x, r7.y, 1)
|
||||
// So, unnormalized, we have
|
||||
// oT1 = (1, 0, r7.x, view2pos.x)
|
||||
// oT2 = (0, 1, r7.y, view2pos.y)
|
||||
// oT3 = (-r7.x, -r7.y, 1, view2pos.z)
|
||||
// which is just reversing the signs on the accumCos
|
||||
// terms above. So the normalized version is just
|
||||
// reversing the signs on the normalized version above.
|
||||
*/
|
||||
//mov oT3, r4;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
m4x4 oPos, r6, c0;
|
||||
|
||||
// Still need to attenuate based on position
|
||||
mov oD0, c4;
|
||||
|
||||
// This should be in local space after xforming v0
|
||||
dp4 r0.x, v0, c10;
|
||||
dp4 r0.y, v0, c11;
|
||||
mov r0.zw, c16.xxxz;
|
||||
mov oT0, r0
|
||||
// mov oT0, v7;
|
||||
|
||||
// Questionble attenuation follows
|
||||
// Find vector from this point to camera and normalize
|
||||
sub r0, c17, r6;
|
||||
dp3 r1.x, r0, r0;
|
||||
rsq r1.x, r1.x;
|
||||
mul r0, r0, r1.xxxx;
|
||||
// Dot that with the computed normal
|
||||
dp3 r1.x, r0, r11;
|
||||
// dp3 r1.x, r0, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3
|
||||
// Map dot=1 => 0, dot=0 => 1
|
||||
sub r1.xyzw, c16.zzzz, r1.xxxx;
|
||||
add r1.w, r1.wwww, c16.zzzz;
|
||||
mul r1.w, r1.wwww, c16.yyyy;
|
||||
// No need to clamp, since the destination register (in the pixel shader)
|
||||
// will saturate [0..1] anyway.
|
||||
mul oD1, r1, c20;
|
||||
// mov oD1, r9;
|
||||
// mov oD1, r8.xzyw;
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
|
||||
//m4x4 oPos, v0, c0
|
||||
|
||||
|
||||
/*
|
||||
In fact, I was trying to understand how it was possible to expand FRC into 4
|
||||
instructions...
|
||||
Actually, I can do it in 7 instructions :)
|
||||
|
||||
EXPP r0.y, r1.xxxx
|
||||
MOV r0.x, r0.y
|
||||
EXPP r0.y, r1.zzzz
|
||||
MOV r0.z, r0.y
|
||||
EXPP r0.y, r1.wwww
|
||||
MOV r0.w, r0.y
|
||||
EXPP r0.y, r1.yyyy
|
||||
*/
|
||||
|
||||
/*
|
||||
// Constants for sin and cos. 3 term approximation seems plenty
|
||||
// (it's what i used for software sim, and had no visibly different
|
||||
// results than the math library functions).
|
||||
// When doing sin/cos together, some speedup might be obtained
|
||||
// with good pairing of ops doing them simultaneously. Also save
|
||||
// an instruction calculating r0^3.
|
||||
D3DXVECTOR4 vSin( 1.0f, -1.0f/6.0f, 1.0f/120.0f, -1.0f/5040.0f );
|
||||
D3DXVECTOR4 vCos( 1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f );
|
||||
*/
|
||||
|
||||
/*
|
||||
Cos():
|
||||
|
||||
|
||||
r1 = mul(r0, r0); // r0^2
|
||||
r2 = mul(r1, r1); // r0^4
|
||||
|
||||
//cos
|
||||
r3 = mad( r1, vCos.yyyy, vCos.xxxx );
|
||||
r3 = mad( r2, vCos.zzzz, r3 );
|
||||
*/
|
||||
|
||||
/*
|
||||
Sin();
|
||||
r1 = mul(r0, r0); // r0^3
|
||||
r1 = mul(r0, r1);
|
||||
r2 = mul(r1, r1); // r0^6
|
||||
|
||||
r3 = mad( r1, vSin.yyyy, r0 );
|
||||
r3 = mad( r2, vSin.zzzz, r3 );
|
||||
*/
|
||||
|
||||
/*
|
||||
SinCos():
|
||||
|
||||
r1 = mul(r0, r0); // r0^2
|
||||
r2 = mul(r1, r0); // r0^3 // probably stall
|
||||
r3 = mul(r1, r1); // r0^4
|
||||
r4 = mul(r2, r2); // r0^6
|
||||
|
||||
r5 = mad( r1, vCos.yyyy, vCos.xxxx );
|
||||
r6 = mad( r2, vSin.yyyy, r0 );
|
||||
r5 = mad( r3, vCos.zzzz, r5 );
|
||||
r6 = mad( r4, vSin.zzzz, r6 );
|
||||
|
||||
*/
|
||||
|
||||
/*
|
||||
consts
|
||||
kOneOverEightNsqPi = 1.f / ( 8.f * Pi * 4.f * 4.f );
|
||||
kPiOverTwo = Pi / 2.f;
|
||||
kTwoPi = Pi * 2.f;
|
||||
kPi = Pi;
|
||||
*/
|
||||
/*
|
||||
CONSTANT REGISTERS
|
||||
VOLATILE CONSTS - change per invocation
|
||||
C0-C3 local2proj matrix
|
||||
C4 color
|
||||
C5 freq vector
|
||||
C6 phase vector
|
||||
C7 amplitude vector
|
||||
C8 center0
|
||||
C9 center1
|
||||
C10 center2
|
||||
C11 center3
|
||||
C12 scrunch = (scrunch, -scrunch, 0, 1);
|
||||
CONSTANT CONSTS - forever more
|
||||
C13 SinConsts = (1.0f, -1.0f/6.0f, 1.0f/120.0f, -1.0f/5040.0f);
|
||||
C14 CosConsts = (1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f);
|
||||
C15 PiConsts = (1.f / 8*Pi*N^2, Pi/2, Pi, 2*Pi);
|
||||
C16 numberConsts = (0.f, 0.5f, 1.f, 2.f);
|
||||
//=====================================
|
||||
TEMP REGISTERS
|
||||
r6 accumPos
|
||||
r7 accumCos
|
||||
r8 toCenter_Y
|
||||
r9 toCenter_X
|
||||
r11 filter
|
||||
r10 tempFloat
|
||||
*/
|
||||
// const float4 kCosConsts = float4(1.0f, -1.0f/2.0f, 1.0f/ 24.0f, -1.0f/ 720.0f);
|
||||
// const float4 kSinConsts = float4(1.0f, -1.0f/6.0f, 1.0f/120.0f, -1.0f/5040.0f);
|
||||
|
||||
// const float4 kPiConsts = float4(1.f / (8.f * 3.1415f * 16f), 3.1415f*0.5f, 3.1415f, 3.1515f*2.f);
|
||||
// const float4 k0512 = float4(0.f, 0.5f, 1.f, 2.f);
|
||||
|
||||
// accumPos = inPos;
|
||||
mov r6, v0;
|
||||
//
|
||||
// For each wave
|
||||
// {
|
||||
// // First, we want to filter out waves based on distance from the local origin
|
||||
// dist = dp3(inPos, inPos);
|
||||
dp3 r0, r6, r6;
|
||||
// dist *= kFreqSq.xyzw;
|
||||
mul r0, r0, c5;
|
||||
mul r0, r0, c5;
|
||||
// dist *= kOneOverEightNsqPi; // combine this into kFreqSq?
|
||||
mul r0, r0, c15.xxxx;
|
||||
// dist = min(dist, kPiOverTwo);
|
||||
min r0, r0, c15.yyyy;
|
||||
// filter = cos(dist);
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r1; // r1^2
|
||||
mul r1, r1, c14.yyyy;
|
||||
add r11, r1, c14.xxxx;
|
||||
mad r11, r2, c14.zzzz, r11;
|
||||
|
||||
|
||||
// filter *= kAmplitude.xyzw;
|
||||
// mul r11, r11, c7;
|
||||
// // Notice that if dist is a 4vec, all this can be simultaneously done for 4 waves at a time.
|
||||
//
|
||||
// Find the x/y distances and stuff them into r9(x) and r8(y) respectively
|
||||
// toCenter_X.x = dir0.x * pos.x;
|
||||
// toCenter_Y.x = dir0.y * pos.y;
|
||||
mul r0, c8, r6.xxxx;
|
||||
mad r0, c9, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c5;
|
||||
add r0, r0, c6;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c15.wwww;
|
||||
add r0, r0, c15.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c15.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c15.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c13.wwww, r2;
|
||||
mad r1, r4, c14.wwww, r1;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c7;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r6.z, r2, c16.zzzz;
|
||||
//
|
||||
// cosDist *= kFreq.xyzw;
|
||||
mul r1, r1, c5;
|
||||
// cosDist *= kAmplitude.xyzw; // Combine?
|
||||
mul r1, r1, c7;
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
//
|
||||
// accumCos = (0, 0, 0, 0);
|
||||
mov r7, c16.xxxx;
|
||||
// temp = dp4( cosDist, toCenter_X );
|
||||
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
|
||||
dp4 r7.x, r1, -c8
|
||||
//
|
||||
// temp = dp4( cosDist, toCenter_Y );
|
||||
// accumCos.y += temp.xxxx;
|
||||
dp4 r7.y, r1, -c9
|
||||
//
|
||||
// }
|
||||
//
|
||||
// accumBin = (1, 0, -accumCos.x);
|
||||
// accumTan = (0, 1, -accumCos.y);
|
||||
// accumNorm = (accumCos.x, accumCos.y, 1);
|
||||
mov r11, c16.xxzx;
|
||||
add r11, r11, r7;
|
||||
dp3 r10.x, r11, r11;
|
||||
rsq r10.x, r10.x;
|
||||
mul r11, r11, r10.xxxx;
|
||||
|
||||
//
|
||||
// // Scrunch in based on computed (normalized) normal
|
||||
// temp = mul( accumNorm, kNegScrunchScale ); // kNegScrunchScale = (-scrunchScale, -scrunchScale, 0, 0);
|
||||
// accumPos += temp;
|
||||
dp3 r10.x, r11, c18.zxw; // winddir.x, winddir.y, 0, 0
|
||||
// r10.x tells us whether our normal is opposed to the wind.
|
||||
// If opposed, r10.x = 0, else r10.x = 1.f;
|
||||
// We'll use this to kill the Scrunch on the back sides of waves.
|
||||
// We use it for position right here, and then again for the
|
||||
// normal just down a bit further.
|
||||
slt r10.x, r10.x, c16.x;
|
||||
mul r9, r10.xxxx, r11;
|
||||
|
||||
mad r6, r9, c12.yyzz, r6;
|
||||
|
||||
// mul r6.z, r6.z, r10.xxxx; DEBUG
|
||||
|
||||
// mad r6, r11, c12.yyzz, r6;
|
||||
|
||||
// accumNorm = mul (accumNorm, kScrunchScale ); // kScrunchScale = (scrunchScale, scrunchScale, 1, 1);
|
||||
// accumCos *= (scrunchScale, scrunchScale, 0, 0);
|
||||
|
||||
mul r2.x, r6.z, c12.x;
|
||||
mul r2.x, r2.x, r10.x; // ???
|
||||
add r2.x, r2.x, c16.z;
|
||||
|
||||
// mul r7, r7, c12.xxzz;
|
||||
mul r7.xy, r7.xy, r2.xx;
|
||||
|
||||
// This is actually wrong, but useful right now for visualizing the generated coords.
|
||||
// See below for correct version.
|
||||
|
||||
sub r3, c16.xxzx, r7.xyzz;
|
||||
|
||||
// Normalize?
|
||||
|
||||
// We can either calculate an orthonormal basis from the
|
||||
// computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0),
|
||||
// or compute our basis directly from the partial derivatives, with
|
||||
// Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1)
|
||||
//
|
||||
// These work out to identically the same result, so we'll compute directly
|
||||
// from the partials because it takes 2 fewer instructions.
|
||||
//
|
||||
// Note that our basis is NOT orthonormal. The Normal is equal to
|
||||
// Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents
|
||||
// are both correct tangents to the surface, and their projections on the XY plane
|
||||
// are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications?
|
||||
// Not really. I'm actually not really sure which is more "proper" for bump mapping.
|
||||
//
|
||||
// Note also that we add when we should subtract and subtract when we should
|
||||
// add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows
|
||||
// of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for
|
||||
// explanation.
|
||||
//
|
||||
// Binormal = Y % Normal
|
||||
// Cross product3 is:
|
||||
// mul res.xyz, a.yzx, b.zxy
|
||||
// mad res.xyz, -a.zxy, b.yzx, res.xyz
|
||||
// mul r1.xyz, c16.zxx, r3.zxy;
|
||||
// mad r1.xyz, -c16.xxz, r3.yzx, r1.xyz;
|
||||
|
||||
// Tangent = Normal % X
|
||||
// mul r2.xyz, r3.yzx, c16.xzx;
|
||||
// mad r2.xyz, -r3.zxy, c16.xxz, r2;
|
||||
|
||||
add r1, c16.zxxx, r7.zzxz;
|
||||
add r2, c16.xzxx, r7.zzyz;
|
||||
|
||||
// Note that we're swapping z and y to match our environment map tools in max.
|
||||
// We do this through our normal map transform (oT1, oT2, oT3), making it
|
||||
// a concatenation of:
|
||||
//
|
||||
// rotate about Z (blue) to turn our map into the wind
|
||||
// windRot = | dirY -dirX 0 |
|
||||
// | dirX dirY 0 |
|
||||
// | 0 0 1 |
|
||||
//
|
||||
// swap our Y and Z axes to match our environment map
|
||||
// swapYZ = | 1 0 0 |
|
||||
// | 0 0 1 |
|
||||
// | 0 1 0 |
|
||||
//
|
||||
// rotate the normal into the surface's tangent space basis
|
||||
// basis = | Bx Tx Nx |
|
||||
// | By Ty Ny |
|
||||
// | Bz Tz Nz |
|
||||
//
|
||||
// Note that we've constucted the basis by taking advantage of the
|
||||
// matrix being a pure rotation, as noted below, so r1, r2 and r3
|
||||
// are actually constructed as:
|
||||
// basis = | Bx -By -Bz |
|
||||
// | -Tx Ty -Tz |
|
||||
// | -Nx -Ny -Nz |
|
||||
//
|
||||
// Then the final normal map transform is:
|
||||
//
|
||||
// basis * swapYZ * windRot [ * normal ]
|
||||
|
||||
|
||||
// sub r1.w, c17.x, r6.x;
|
||||
// sub r2.w, c17.z, r6.z;
|
||||
// sub r3.w, c17.y, r6.y;
|
||||
|
||||
// Big note here. All this math can blow up if the camera position
|
||||
// is outside the environment sphere. It's assumed that's dealt
|
||||
// with in the app setting up the constants. For that reason, the
|
||||
// camera position used here might not be the real local camera position,
|
||||
// which is needed for the angular attenuation, so we burn another constant
|
||||
// with our pseudo-camera position. To restrain the pseudo-camera from
|
||||
// leaving the sphere, we make:
|
||||
// pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R)
|
||||
// where dist = |realPos - envCenter|
|
||||
|
||||
// So, our "finitized" eyeray is:
|
||||
// camPos + D * t - envCenter = D * t - (envCenter - camPos)
|
||||
// with
|
||||
// D = (pos - camPos) / |pos - camPos| // normalized usual eyeray
|
||||
// and
|
||||
// t = D dot F + sqrt( (D dot F)^2 - G )
|
||||
// with
|
||||
// F = (envCenter - camPos) => c19.xyz
|
||||
// G = F^2 - R^2 => c19.w
|
||||
// R = environment radius. => unused
|
||||
//
|
||||
// This all derives from the positive root of equation
|
||||
// (camPos + (pos - camPos) * t - envCenter)^2 = R^2,
|
||||
// In other words, where on a sphere of radius R centered about envCenter
|
||||
// does the ray from the real camera position through this point hit.
|
||||
//
|
||||
// Note that F, G, and R are all constants (one point, two scalars).
|
||||
//
|
||||
// So first we calculate D into r0,
|
||||
// then D dot F into r10.x,
|
||||
// then (D dot F)^2 - G into r10.y
|
||||
// then rsq( (D dot F)^2 - G ) into r9.x;
|
||||
// then t = r10.z = r10.x + r10.y * r9.x;
|
||||
// and
|
||||
// r0 = D * t - (envCenter - camPos)
|
||||
// = r0 * r10.zzzz - F;
|
||||
//
|
||||
sub r0, r6, c17;
|
||||
dp3 r10.x, r0, r0;
|
||||
rsq r10.x, r10.x;
|
||||
mul r0, r0, r10.xxxx;
|
||||
|
||||
dp3 r10.x, r0, c19;
|
||||
mad r10.y, r10.x, r10.x, -c19.w;
|
||||
|
||||
rsq r9.x, r10.y;
|
||||
|
||||
mad r10.z, r10.y, r9.x, r10.x;
|
||||
|
||||
mad r0.xyz, r0, r10.zzz, -c19.xyz;
|
||||
|
||||
mov r1.w, -r0.x;
|
||||
mov r2.w, -r0.y;
|
||||
mov r3.w, -r0.z;
|
||||
|
||||
// Now rotate our basis vectors into the wind
|
||||
dp3 r0.x, r1, c18.xyww;
|
||||
dp3 r0.y, r1, c18.zxww;
|
||||
mov r1.xy, r0;
|
||||
|
||||
dp3 r0.x, r2, c18.xyww;
|
||||
dp3 r0.y, r2, c18.zxww;
|
||||
mov r2.xy, r0;
|
||||
|
||||
dp3 r0.x, r3, c18.xyww;
|
||||
dp3 r0.y, r3, c18.zxww;
|
||||
mov r3.xy, r0;
|
||||
|
||||
mov r0.w, c16.zzzz;
|
||||
|
||||
dp3 r0.x, r1, r1;
|
||||
rsq r0.x, r0.x;
|
||||
mul oT1, r1.xyzw, r0.xxxw;
|
||||
// mul r8, r1.xyzw, r0.xxxw; // VISUAL
|
||||
|
||||
dp3 r0.x, r2, r2;
|
||||
rsq r0.x, r0.x;
|
||||
mul oT3, r2.xyzw, r0.xxxw;
|
||||
// mul r9, r2.xyzw, r0.xxxw; // VISUAL
|
||||
|
||||
dp3 r0.x, r3, r3;
|
||||
rsq r0.x, r0.x;
|
||||
mul oT2, r3.xyzw, r0.xxxw;
|
||||
// mul r9, r3.xyzw, r0.xxxw; // VISUAL
|
||||
|
||||
// mul r3, r3.xzyw, r0.xxxw;
|
||||
// mul r3.xy, r3, -c16.zzzz;
|
||||
|
||||
/*
|
||||
// Want:
|
||||
// oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x)
|
||||
// oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y)
|
||||
// ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z)
|
||||
// with BIN, TAN, and NORM normalized.
|
||||
// Unnormalized, we have
|
||||
// BIN = (1, 0, -r7.x) where r7 == accumCos
|
||||
// TAN = (0, 1, -r7.y)
|
||||
// NORM= (r7.x, r7.y, 1)
|
||||
// So, unnormalized, we have
|
||||
// oT1 = (1, 0, r7.x, view2pos.x)
|
||||
// oT2 = (0, 1, r7.y, view2pos.y)
|
||||
// oT3 = (-r7.x, -r7.y, 1, view2pos.z)
|
||||
// which is just reversing the signs on the accumCos
|
||||
// terms above. So the normalized version is just
|
||||
// reversing the signs on the normalized version above.
|
||||
*/
|
||||
//mov oT3, r4;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
m4x4 oPos, r6, c0;
|
||||
|
||||
// Still need to attenuate based on position
|
||||
mov oD0, c4;
|
||||
|
||||
// This should be in local space after xforming v0
|
||||
dp4 r0.x, v0, c10;
|
||||
dp4 r0.y, v0, c11;
|
||||
mov r0.zw, c16.xxxz;
|
||||
mov oT0, r0
|
||||
// mov oT0, v7;
|
||||
|
||||
// Questionble attenuation follows
|
||||
// Find vector from this point to camera and normalize
|
||||
sub r0, c17, r6;
|
||||
dp3 r1.x, r0, r0;
|
||||
rsq r1.x, r1.x;
|
||||
mul r0, r0, r1.xxxx;
|
||||
// Dot that with the computed normal
|
||||
dp3 r1.x, r0, r11;
|
||||
// dp3 r1.x, r0, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3
|
||||
// Map dot=1 => 0, dot=0 => 1
|
||||
sub r1.xyzw, c16.zzzz, r1.xxxx;
|
||||
add r1.w, r1.wwww, c16.zzzz;
|
||||
mul r1.w, r1.wwww, c16.yyyy;
|
||||
// No need to clamp, since the destination register (in the pixel shader)
|
||||
// will saturate [0..1] anyway.
|
||||
mul oD1, r1, c20;
|
||||
// mov oD1, r9;
|
||||
// mov oD1, r8.xzyw;
|
||||
|
@ -1,243 +1,243 @@
|
||||
vs.1.1
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c25; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c16.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = reflection strength (transparency)
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c30 = waterlevel + offset
|
||||
// c31 = (maxAtten - minAtten) / depthFalloff
|
||||
// c32 = minAtten.
|
||||
// And in particular:
|
||||
// c30.w = waterlevel
|
||||
// c31.w = 1.f;
|
||||
// c32.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c8, r6.xxxx;
|
||||
mad r0, c9, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c5;
|
||||
add r0, r0, c6;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c15.wwww;
|
||||
add r0, r0, c15.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c15.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c15.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c13.wwww, r2;
|
||||
mad r1, r4, c14.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c30, r6.zzzz;
|
||||
mul r4, r4, c31;
|
||||
add r4, r4, c32;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c16.zzzz;
|
||||
max r4.xyz, r4, c16.xxxx;
|
||||
//mov r4.xyz, c16.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c29;
|
||||
max r11, r11, c16.xxxx;
|
||||
min r11, r11, c16.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c7;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c16.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c30.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= kFreq.xyzw;
|
||||
mul r1, r1, c5;
|
||||
// cosDist *= kAmplitude.xyzw; // Combine?
|
||||
mul r1, r1, c7;
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
//
|
||||
// accumCos = (0, 0, 0, 0);
|
||||
mov r7, c16.xxxx;
|
||||
// temp = dp4( cosDist, toCenter_X );
|
||||
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
|
||||
dp4 r7.x, r1, -c8
|
||||
//
|
||||
// temp = dp4( cosDist, toCenter_Y );
|
||||
// accumCos.y += temp.xxxx;
|
||||
dp4 r7.y, r1, -c9
|
||||
//
|
||||
// }
|
||||
//
|
||||
// accumBin = (1, 0, -accumCos.x);
|
||||
// accumTan = (0, 1, -accumCos.y);
|
||||
// accumNorm = (accumCos.x, accumCos.y, 1);
|
||||
mov r11, c16.xxzx;
|
||||
add r11, r11, r7;
|
||||
dp3 r10.x, r11, r11;
|
||||
rsq r10.x, r10.x;
|
||||
mul r11, r11, r10.xxxx;
|
||||
|
||||
//
|
||||
// Add in our scrunch (offset in X/Y plane).
|
||||
// Scale down our scrunch amount by the wave scaling
|
||||
mul r10.x, c12.y, r4.z;
|
||||
mad r6.xy, r11.xy, r10.xx, r6.xy;
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c35.z, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c35.z;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c25; // HACKAGE
|
||||
//mov r6.w, c16.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c4.x;
|
||||
mul oFog, r10.x, c4.y;
|
||||
mov oPos, r9;
|
||||
|
||||
|
||||
// Dyna Stuff
|
||||
// Constants
|
||||
// c33 = fC1U, fC2U, fC1V, fC2V
|
||||
// c34 = fInitAtten, t, life, 1.f / (life-decay)
|
||||
// c35 = ramp, 1.f / ramp, BIAS (positive is up), FREE
|
||||
//
|
||||
// Vertex Info
|
||||
// v7.z = fBirth (because we don't use it for anything else).
|
||||
//
|
||||
// Initialize r1.zw to 0,1
|
||||
mov r1, c16.xxxz;
|
||||
// Calc r1.x = age, r1.y = atten
|
||||
// age = t - birth.
|
||||
sub r1.x, c34.y, v7.z;
|
||||
// atten = clamp0_1(age / ramp) * clamp0_1((life-age) / (life-decay));
|
||||
// first clamp0_1(age/ramp)
|
||||
mul r1.y, r1.x, c35.y;
|
||||
min r1.y, r1.y, c16.z; // Clamp to one (can't go negative).
|
||||
// now clamp0_1((life-age) / (life-decay));
|
||||
sub r1.z, c34.z, r1.x;
|
||||
mul r1.z, r1.z, c34.w;
|
||||
min r1.z, r1.z, c16.z; // Clamp to one
|
||||
max r1.z, r1.z, c16.x; // Clamp to zero
|
||||
mul r1.y, r1.y, r1.z; // atten is the product of the two terms.
|
||||
|
||||
// color is (atten, atten, atten, 1.f)
|
||||
// Need to calculate opacity we would have had from vs_WaveFixedFin6.inl
|
||||
// Right now that's just modulating by r4.y.
|
||||
mul r0.y, r4.y, c34.x;
|
||||
mul oD0, r0.yyyy, r1.yyyw;
|
||||
//mov oD0, c16.zzzz; // HACKTEST
|
||||
|
||||
// UVW = (inUVW - 0.5) * scale + 0.5
|
||||
// where:
|
||||
// scale = (fC1U / (age * fC2U + 1.f)), fC1V / (age * fC2U + 1.f), 1.f, 1.f
|
||||
mov r2, c16.xxxz;
|
||||
mul r2.xy, r1.xx, c33.yw;
|
||||
add r2.xy, r2.xy, c16.zz;
|
||||
rcp r2.x, r2.x;
|
||||
rcp r2.y, r2.y;
|
||||
mul r2.xy, r2.xy, c33.xz;
|
||||
sub r1.xy, v7.xy, c16.yy;
|
||||
mul r1.xy, r1.xy, r2.xy;
|
||||
add r1.xy, r1.xy, c16.yy;
|
||||
mov oT0, r1;
|
||||
|
||||
|
||||
vs.1.1
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c25; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c16.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = reflection strength (transparency)
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c30 = waterlevel + offset
|
||||
// c31 = (maxAtten - minAtten) / depthFalloff
|
||||
// c32 = minAtten.
|
||||
// And in particular:
|
||||
// c30.w = waterlevel
|
||||
// c31.w = 1.f;
|
||||
// c32.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c8, r6.xxxx;
|
||||
mad r0, c9, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c5;
|
||||
add r0, r0, c6;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c15.wwww;
|
||||
add r0, r0, c15.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c15.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c15.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c13.wwww, r2;
|
||||
mad r1, r4, c14.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c30, r6.zzzz;
|
||||
mul r4, r4, c31;
|
||||
add r4, r4, c32;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c16.zzzz;
|
||||
max r4.xyz, r4, c16.xxxx;
|
||||
//mov r4.xyz, c16.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c29;
|
||||
max r11, r11, c16.xxxx;
|
||||
min r11, r11, c16.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c7;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c16.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c30.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= kFreq.xyzw;
|
||||
mul r1, r1, c5;
|
||||
// cosDist *= kAmplitude.xyzw; // Combine?
|
||||
mul r1, r1, c7;
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
//
|
||||
// accumCos = (0, 0, 0, 0);
|
||||
mov r7, c16.xxxx;
|
||||
// temp = dp4( cosDist, toCenter_X );
|
||||
// accumCos.x += temp.xxxx; (but accumCos = (0,0,0,0)
|
||||
dp4 r7.x, r1, -c8
|
||||
//
|
||||
// temp = dp4( cosDist, toCenter_Y );
|
||||
// accumCos.y += temp.xxxx;
|
||||
dp4 r7.y, r1, -c9
|
||||
//
|
||||
// }
|
||||
//
|
||||
// accumBin = (1, 0, -accumCos.x);
|
||||
// accumTan = (0, 1, -accumCos.y);
|
||||
// accumNorm = (accumCos.x, accumCos.y, 1);
|
||||
mov r11, c16.xxzx;
|
||||
add r11, r11, r7;
|
||||
dp3 r10.x, r11, r11;
|
||||
rsq r10.x, r10.x;
|
||||
mul r11, r11, r10.xxxx;
|
||||
|
||||
//
|
||||
// Add in our scrunch (offset in X/Y plane).
|
||||
// Scale down our scrunch amount by the wave scaling
|
||||
mul r10.x, c12.y, r4.z;
|
||||
mad r6.xy, r11.xy, r10.xx, r6.xy;
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c35.z, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c35.z;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c25; // HACKAGE
|
||||
//mov r6.w, c16.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c4.x;
|
||||
mul oFog, r10.x, c4.y;
|
||||
mov oPos, r9;
|
||||
|
||||
|
||||
// Dyna Stuff
|
||||
// Constants
|
||||
// c33 = fC1U, fC2U, fC1V, fC2V
|
||||
// c34 = fInitAtten, t, life, 1.f / (life-decay)
|
||||
// c35 = ramp, 1.f / ramp, BIAS (positive is up), FREE
|
||||
//
|
||||
// Vertex Info
|
||||
// v7.z = fBirth (because we don't use it for anything else).
|
||||
//
|
||||
// Initialize r1.zw to 0,1
|
||||
mov r1, c16.xxxz;
|
||||
// Calc r1.x = age, r1.y = atten
|
||||
// age = t - birth.
|
||||
sub r1.x, c34.y, v7.z;
|
||||
// atten = clamp0_1(age / ramp) * clamp0_1((life-age) / (life-decay));
|
||||
// first clamp0_1(age/ramp)
|
||||
mul r1.y, r1.x, c35.y;
|
||||
min r1.y, r1.y, c16.z; // Clamp to one (can't go negative).
|
||||
// now clamp0_1((life-age) / (life-decay));
|
||||
sub r1.z, c34.z, r1.x;
|
||||
mul r1.z, r1.z, c34.w;
|
||||
min r1.z, r1.z, c16.z; // Clamp to one
|
||||
max r1.z, r1.z, c16.x; // Clamp to zero
|
||||
mul r1.y, r1.y, r1.z; // atten is the product of the two terms.
|
||||
|
||||
// color is (atten, atten, atten, 1.f)
|
||||
// Need to calculate opacity we would have had from vs_WaveFixedFin6.inl
|
||||
// Right now that's just modulating by r4.y.
|
||||
mul r0.y, r4.y, c34.x;
|
||||
mul oD0, r0.yyyy, r1.yyyw;
|
||||
//mov oD0, c16.zzzz; // HACKTEST
|
||||
|
||||
// UVW = (inUVW - 0.5) * scale + 0.5
|
||||
// where:
|
||||
// scale = (fC1U / (age * fC2U + 1.f)), fC1V / (age * fC2U + 1.f), 1.f, 1.f
|
||||
mov r2, c16.xxxz;
|
||||
mul r2.xy, r1.xx, c33.yw;
|
||||
add r2.xy, r2.xy, c16.zz;
|
||||
rcp r2.x, r2.x;
|
||||
rcp r2.y, r2.y;
|
||||
mul r2.xy, r2.xy, c33.xz;
|
||||
sub r1.xy, v7.xy, c16.yy;
|
||||
mul r1.xy, r1.xy, r2.xy;
|
||||
add r1.xy, r1.xy, c16.yy;
|
||||
mov oT0, r1;
|
||||
|
||||
|
||||
|
@ -1,226 +1,226 @@
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c25; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c16.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = reflection strength (transparency)
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c30 = waterlevel + offset
|
||||
// c31 = (maxAtten - minAtten) / depthFalloff
|
||||
// c32 = minAtten.
|
||||
// And in particular:
|
||||
// c30.w = waterlevel
|
||||
// c31.w = 1.f;
|
||||
// c32.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c8, r6.xxxx;
|
||||
mad r0, c9, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c5;
|
||||
add r0, r0, c6;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c15.wwww;
|
||||
add r0, r0, c15.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c15.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c15.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c13.wwww, r2;
|
||||
mad r1, r4, c14.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c30, r6.zzzz;
|
||||
mul r4, r4, c31;
|
||||
add r4, r4, c32;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c16.zzzz;
|
||||
max r4.xyz, r4, c16.xxxx;
|
||||
//mov r4.xyz, c16.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c29;
|
||||
max r11, r11, c16.xxxx;
|
||||
min r11, r11, c16.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c7;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c16.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c30.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
|
||||
// Pos = (in.x + S, in.y + R, r6.z)
|
||||
// S = sum(k Dir.x A cos())
|
||||
// R = sum(k Dir.y A cos())
|
||||
// c10 = k Dir.x A
|
||||
// c11 = k Dir.y A
|
||||
// S = sum(cosDist * c10);
|
||||
dp4 r7.x, r1, c10;
|
||||
// R = sum(cosDist * c11);
|
||||
dp4 r7.y, r1, c11;
|
||||
|
||||
add r6.xy, r6.xy, r7.xy;
|
||||
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c35.z, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c35.z;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c25; // HACKAGE
|
||||
//mov r6.w, c16.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c4.x;
|
||||
mul oFog, r10.x, c4.y;
|
||||
mov oPos, r9;
|
||||
|
||||
|
||||
// Dyna Stuff
|
||||
// Constants
|
||||
// c33 = fC1U, fC2U, fC1V, fC2V
|
||||
// c34 = fInitAtten, t, life, 1.f / (life-decay)
|
||||
// c35 = ramp, 1.f / ramp, BIAS (positive is up), FREE
|
||||
//
|
||||
// Vertex Info
|
||||
// v7.z = fBirth (because we don't use it for anything else).
|
||||
//
|
||||
// Initialize r1.zw to 0,1
|
||||
mov r1, c16.xxxz;
|
||||
// Calc r1.x = age, r1.y = atten
|
||||
// age = t - birth.
|
||||
sub r1.x, c34.y, v7.z;
|
||||
// atten = clamp0_1(age / ramp) * clamp0_1((life-age) / (life-decay));
|
||||
// first clamp0_1(age/ramp)
|
||||
mul r1.y, r1.x, c35.y;
|
||||
min r1.y, r1.y, c16.z; // Clamp to one (can't go negative).
|
||||
// now clamp0_1((life-age) / (life-decay));
|
||||
sub r1.z, c34.z, r1.x;
|
||||
mul r1.z, r1.z, c34.w;
|
||||
min r1.z, r1.z, c16.z; // Clamp to one
|
||||
max r1.z, r1.z, c16.x; // Clamp to zero
|
||||
mul r1.y, r1.y, r1.z; // atten is the product of the two terms.
|
||||
|
||||
// color is (atten, atten, atten, 1.f)
|
||||
// Need to calculate opacity we would have had from vs_WaveFixedFin7.inl
|
||||
// Right now that's just modulating by r4.y.
|
||||
mul r0.y, r4.y, c34.x;
|
||||
mul oD0, r0.yyyy, r1.yyyw;
|
||||
//mov oD0, c16.zzzz; // HACKTEST
|
||||
|
||||
// UVW = (inUVW - 0.5) * scale + 0.5
|
||||
// where:
|
||||
// scale = (fC1U / (age * fC2U + 1.f)), fC1V / (age * fC2U + 1.f), 1.f, 1.f
|
||||
mov r2, c16.xxxz;
|
||||
mul r2.xy, r1.xx, c33.yw;
|
||||
add r2.xy, r2.xy, c16.zz;
|
||||
rcp r2.x, r2.x;
|
||||
rcp r2.y, r2.y;
|
||||
mul r2.xy, r2.xy, c33.xz;
|
||||
sub r1.xy, v7.xy, c16.yy;
|
||||
mul r1.xy, r1.xy, r2.xy;
|
||||
add r1.xy, r1.xy, c16.yy;
|
||||
mov oT0, r1;
|
||||
|
||||
|
||||
|
||||
vs.1.1
|
||||
|
||||
dcl_position v0
|
||||
dcl_color v5
|
||||
dcl_texcoord0 v7
|
||||
|
||||
// Store our input position in world space in r6
|
||||
m4x3 r6, v0, c25; // v0 * l2w
|
||||
// Fill out our w (m4x3 doesn't touch w).
|
||||
mov r6.w, c16.z;
|
||||
|
||||
//
|
||||
|
||||
// Input diffuse v5 color is:
|
||||
// v5.r = overall transparency
|
||||
// v5.g = reflection strength (transparency)
|
||||
// v5.b = overall wave scaling
|
||||
//
|
||||
// v5.a is:
|
||||
// v5.w = 1/(2.f * edge length)
|
||||
// So per wave filtering is:
|
||||
// min(max( (waveLen * v5.wwww) - 1), 0), 1.f);
|
||||
// So a wave effect starts dying out when the wave is 4 times the sampling frequency,
|
||||
// and is completely filtered at 2 times sampling frequency.
|
||||
|
||||
// We'd like to make this autocalculated based on the depth of the water.
|
||||
// The frequency filtering (v5.w) still needs to be calculated offline, because
|
||||
// it's dependent on edge length, but the first 3 filterings can be calculated
|
||||
// based on this vertex.
|
||||
// Basically, we want the transparency, reflection strength, and wave scaling
|
||||
// to go to zero as the water depth goes to zero. Linear falloffs are as good
|
||||
// a place to start as any.
|
||||
//
|
||||
// depth = waterlevel - r6.z => depth in feet (may be negative)
|
||||
// depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath
|
||||
// atten = minAtten + depthNorm * (maxAtten - minAtten);
|
||||
// These are all vector ops.
|
||||
// This provides separate ramp ups for each of the channels (they reach full unfiltered
|
||||
// values at different depths), but doesn't provide separate controls for where they
|
||||
// go to zero (they all go to zero at zero depth). For that we need an offset. An offset
|
||||
// in feet (depth) is probably the most intuitive. So that changes the first calculation
|
||||
// of depth to:
|
||||
// depth = waterlevel - r6.z + offset
|
||||
// = (waterlevel + offset) - r6.z
|
||||
// And since we only need offsets for 3 channels, we can make the waterlevel constant
|
||||
// waterlevel[chan] = watertableheight + offset[chan],
|
||||
// with waterlevel.w = watertableheight.
|
||||
//
|
||||
// So:
|
||||
// c30 = waterlevel + offset
|
||||
// c31 = (maxAtten - minAtten) / depthFalloff
|
||||
// c32 = minAtten.
|
||||
// And in particular:
|
||||
// c30.w = waterlevel
|
||||
// c31.w = 1.f;
|
||||
// c32.w = 0;
|
||||
// So r4.w is the depth of this vertex in feet.
|
||||
|
||||
// Dot our position with our direction vectors.
|
||||
mul r0, c8, r6.xxxx;
|
||||
mad r0, c9, r6.yyyy, r0;
|
||||
|
||||
//
|
||||
// dist = mad( dist, kFreq.xyzw, kPhase.xyzw);
|
||||
mul r0, r0, c5;
|
||||
add r0, r0, c6;
|
||||
//
|
||||
// // Now we need dist mod'd into range [-Pi..Pi]
|
||||
// dist *= rcp(kTwoPi);
|
||||
rcp r4, c15.wwww;
|
||||
add r0, r0, c15.zzzz;
|
||||
mul r0, r0, r4;
|
||||
// dist = frac(dist);
|
||||
expp r1.y, r0.xxxx
|
||||
mov r1.x, r1.yyyy
|
||||
expp r1.y, r0.zzzz
|
||||
mov r1.z, r1.yyyy
|
||||
expp r1.y, r0.wwww
|
||||
mov r1.w, r1.yyyy
|
||||
expp r1.y, r0.yyyy
|
||||
// dist *= kTwoPi;
|
||||
mul r0, r1, c15.wwww;
|
||||
// dist += -kPi;
|
||||
sub r0, r0, c15.zzzz;
|
||||
|
||||
//
|
||||
// sincos(dist, sinDist, cosDist);
|
||||
// sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
// cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
mul r1, r0, r0; // r0^2
|
||||
mul r2, r1, r0; // r0^3 - probably stall
|
||||
mul r3, r1, r1; // r0^4
|
||||
mul r4, r1, r2; // r0^5
|
||||
mul r5, r2, r3; // r0^7
|
||||
|
||||
mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y
|
||||
mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y
|
||||
add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y
|
||||
mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z
|
||||
mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z
|
||||
|
||||
// r0^7 & r0^6 terms
|
||||
mul r4, r4, r0; // r0^6
|
||||
mad r2, r5, c13.wwww, r2;
|
||||
mad r1, r4, c14.wwww, r1;
|
||||
|
||||
// Calc our depth based filtering here into r4 (because we don't use it again
|
||||
// after here, and we need our filtering shortly).
|
||||
sub r4, c30, r6.zzzz;
|
||||
mul r4, r4, c31;
|
||||
add r4, r4, c32;
|
||||
// Clamp .xyz to range [0..1]
|
||||
min r4.xyz, r4, c16.zzzz;
|
||||
max r4.xyz, r4, c16.xxxx;
|
||||
//mov r4.xyz, c16.xxx; // HACKTEST
|
||||
|
||||
// Calc our filter (see above).
|
||||
mul r11, v5.wwww, c29;
|
||||
max r11, r11, c16.xxxx;
|
||||
min r11, r11, c16.zzzz;
|
||||
|
||||
//mov r2, r1;
|
||||
// r2 == sinDist
|
||||
// r1 == cosDist
|
||||
// sinDist *= filter;
|
||||
mul r2, r2, r11;
|
||||
// sinDist *= kAmplitude.xyzw
|
||||
mul r2, r2, c7;
|
||||
// height = dp4(sinDist, kOne);
|
||||
// accumPos.z += height; (but accumPos.z is currently 0).
|
||||
dp4 r8.x, r2, c16.zzzz;
|
||||
mul r8.y, r8.x, r4.z;
|
||||
add r8.z, r8.y, c30.w;
|
||||
max r6.z, r6.z, r8.z;
|
||||
// r8.x == wave height relative to 0
|
||||
// r8.y == dampened wave relative to 0
|
||||
// r8.z == dampened wave height in world space
|
||||
// r6.z == wave height clamped to never go beneath ground level
|
||||
//
|
||||
// cosDist *= filter;
|
||||
mul r1, r1, r11;
|
||||
|
||||
// Pos = (in.x + S, in.y + R, r6.z)
|
||||
// S = sum(k Dir.x A cos())
|
||||
// R = sum(k Dir.y A cos())
|
||||
// c10 = k Dir.x A
|
||||
// c11 = k Dir.y A
|
||||
// S = sum(cosDist * c10);
|
||||
dp4 r7.x, r1, c10;
|
||||
// R = sum(cosDist * c11);
|
||||
dp4 r7.y, r1, c11;
|
||||
|
||||
add r6.xy, r6.xy, r7.xy;
|
||||
|
||||
|
||||
// Bias our vert up a bit to compensate for precision errors.
|
||||
// In particular, our filter coefficients are coming in as
|
||||
// interpolated bytes, so there's bound to be a lot of slop
|
||||
// from that. We've got a free slot in c35.z, so we'll use that.
|
||||
// A better implementation would be to bias and scale our screen
|
||||
// vert, effectively pushing the vert toward the camera without
|
||||
// actually moving it, but this is easier and might work just
|
||||
// as well.
|
||||
add r6.z, r6.z, c35.z;
|
||||
|
||||
//
|
||||
// // Transform position to screen
|
||||
//
|
||||
//
|
||||
//m4x3 r6, v0, c25; // HACKAGE
|
||||
//mov r6.w, c16.z; // HACKAGE
|
||||
//m4x4 oPos, r6, c0; // ADDFOG
|
||||
m4x4 r9, r6, c0;
|
||||
add r10.x, r9.w, c4.x;
|
||||
mul oFog, r10.x, c4.y;
|
||||
mov oPos, r9;
|
||||
|
||||
|
||||
// Dyna Stuff
|
||||
// Constants
|
||||
// c33 = fC1U, fC2U, fC1V, fC2V
|
||||
// c34 = fInitAtten, t, life, 1.f / (life-decay)
|
||||
// c35 = ramp, 1.f / ramp, BIAS (positive is up), FREE
|
||||
//
|
||||
// Vertex Info
|
||||
// v7.z = fBirth (because we don't use it for anything else).
|
||||
//
|
||||
// Initialize r1.zw to 0,1
|
||||
mov r1, c16.xxxz;
|
||||
// Calc r1.x = age, r1.y = atten
|
||||
// age = t - birth.
|
||||
sub r1.x, c34.y, v7.z;
|
||||
// atten = clamp0_1(age / ramp) * clamp0_1((life-age) / (life-decay));
|
||||
// first clamp0_1(age/ramp)
|
||||
mul r1.y, r1.x, c35.y;
|
||||
min r1.y, r1.y, c16.z; // Clamp to one (can't go negative).
|
||||
// now clamp0_1((life-age) / (life-decay));
|
||||
sub r1.z, c34.z, r1.x;
|
||||
mul r1.z, r1.z, c34.w;
|
||||
min r1.z, r1.z, c16.z; // Clamp to one
|
||||
max r1.z, r1.z, c16.x; // Clamp to zero
|
||||
mul r1.y, r1.y, r1.z; // atten is the product of the two terms.
|
||||
|
||||
// color is (atten, atten, atten, 1.f)
|
||||
// Need to calculate opacity we would have had from vs_WaveFixedFin7.inl
|
||||
// Right now that's just modulating by r4.y.
|
||||
mul r0.y, r4.y, c34.x;
|
||||
mul oD0, r0.yyyy, r1.yyyw;
|
||||
//mov oD0, c16.zzzz; // HACKTEST
|
||||
|
||||
// UVW = (inUVW - 0.5) * scale + 0.5
|
||||
// where:
|
||||
// scale = (fC1U / (age * fC2U + 1.f)), fC1V / (age * fC2U + 1.f), 1.f, 1.f
|
||||
mov r2, c16.xxxz;
|
||||
mul r2.xy, r1.xx, c33.yw;
|
||||
add r2.xy, r2.xy, c16.zz;
|
||||
rcp r2.x, r2.x;
|
||||
rcp r2.y, r2.y;
|
||||
mul r2.xy, r2.xy, c33.xz;
|
||||
sub r1.xy, v7.xy, c16.yy;
|
||||
mul r1.xy, r1.xy, r2.xy;
|
||||
add r1.xy, r1.xy, c16.yy;
|
||||
mov oT0, r1;
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user