vs.1.1 dcl_position v0 dcl_color v5 // Store our input position in world space in r6 m4x3 r6, v0, c21; // v0 * l2w // Fill out our w (m4x3 doesn't touch w). mov r6.w, c16.zzzz; // // Input diffuse v5 color is: // v5.r = overall transparency // v5.g = reflection strength (transparency) // v5.b = overall wave scaling // // v5.a is: // v5.w = 1/(2.f * edge length) // So per wave filtering is: // min(max( (waveLen * v5.wwww) - 1), 0), 1.f); // So a wave effect starts dying out when the wave is 4 times the sampling frequency, // and is completely filtered at 2 times sampling frequency. // We'd like to make this autocalculated based on the depth of the water. // The frequency filtering (v5.w) still needs to be calculated offline, because // it's dependent on edge length, but the first 3 filterings can be calculated // based on this vertex. // Basically, we want the transparency, reflection strength, and wave scaling // to go to zero as the water depth goes to zero. Linear falloffs are as good // a place to start as any. // // depth = waterlevel - r6.z => depth in feet (may be negative) // depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath // atten = minAtten + depthNorm * (maxAtten - minAtten); // These are all vector ops. // This provides separate ramp ups for each of the channels (they reach full unfiltered // values at different depths), but doesn't provide separate controls for where they // go to zero (they all go to zero at zero depth). For that we need an offset. An offset // in feet (depth) is probably the most intuitive. So that changes the first calculation // of depth to: // depth = waterlevel - r6.z + offset // = (waterlevel + offset) - r6.z // And since we only need offsets for 3 channels, we can make the waterlevel constant // waterlevel[chan] = watertableheight + offset[chan], // with waterlevel.w = watertableheight. // // So: // c25 = waterlevel + offset // c26 = (maxAtten - minAtten) / depthFalloff // c27 = minAtten. // And in particular: // c25.w = waterlevel // c26.w = 1.f; // c27.w = 0; // So r4.w is the depth of this vertex in feet. // Dot our position with our direction vectors. mul r0, c8, r6.xxxx; mad r0, c9, r6.yyyy, r0; // // dist = mad( dist, kFreq.xyzw, kPhase.xyzw); mul r0, r0, c5; add r0, r0, c6; // // // Now we need dist mod'd into range [-Pi..Pi] // dist *= rcp(kTwoPi); rcp r4, c15.wwww; add r0, r0, c15.zzzz; mul r0, r0, r4; // dist = frac(dist); expp r1.y, r0.xxxx mov r1.x, r1.yyyy expp r1.y, r0.zzzz mov r1.z, r1.yyyy expp r1.y, r0.wwww mov r1.w, r1.yyyy expp r1.y, r0.yyyy // dist *= kTwoPi; mul r0, r1, c15.wwww; // dist += -kPi; sub r0, r0, c15.zzzz; // // sincos(dist, sinDist, cosDist); // sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z // cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z mul r1, r0, r0; // r0^2 mul r2, r1, r0; // r0^3 - probably stall mul r3, r1, r1; // r0^4 mul r4, r1, r2; // r0^5 mul r5, r2, r3; // r0^7 mul r1, r1, c14.yyyy; // r1 = r0^2 * vCos.y mad r2, r2, c13.yyyy, r0; // r2 = r0 + r0^3 * vSin.y add r1, r1, c14.xxxx; // r1 = 1 + r0^2 * vCos.y mad r2, r4, c13.zzzz, r2; // r2 = r0 + r0^3 * vSin.y + r0^5 * vSin.z mad r1, r3, c14.zzzz, r1; // r1 = 1 + r0^2 * vCos.y + r0^4 * vCos.z // r0^7 & r0^6 terms mul r4, r4, r0; // r0^6 mad r2, r5, c13.wwww, r2; mad r1, r4, c14.wwww, r1; // Calc our depth based filtering here into r4 (because we don't use it again // after here, and we need our filtering shortly). sub r4, c25, r6.zzzz; mul r4, r4, c26; add r4, r4, c27; // Clamp .xyz to range [0..1] min r4.xyz, r4, c16.zzzz; max r4.xyz, r4, c16.xxxx; // Calc our filter (see above). mul r11, v5.wwww, c24; max r11, r11, c16.xxxx; min r11, r11, c16.zzzz; //mov r2, r1; // r2 == sinDist // r1 == cosDist // sinDist *= filter; mul r2, r2, r11; // sinDist *= kAmplitude.xyzw mul r5, r2, c7; // r5 is now T = sum(Ai * sin()) // height = dp4(sinDist, kOne); // accumPos.z += height; (but accumPos.z is currently 0). dp4 r8.x, r5, c16.zzzz; mul r8.y, r8.x, r4.z; add r8.z, r8.y, c25.w; max r6.z, r6.z, r8.z; // CLAMP // r8.x == wave height relative to 0 // r8.y == dampened wave relative to 0 // r8.z == dampened wave height in world space // r6.z == wave height clamped to never go beneath ground level // // cosDist *= kAmplitude.xyzw; // Combine? mul r7, r1, c7; // cosDist *= filter; mul r7, r7, r11; // r7 is now M = sum(Ai * cos()) // Okay, here we go: // W == sum(k w Dir.x^2 A sin()) // V == sum(k w Dir.x Dir.y A sin()) // U == sum(k w Dir.y^2 A sin()) // // T == sum(A sin()) // // S == sum(k Dir.x A cos()) // R == sum(k Dir.y A cos()) // // Q == sum(k w A cos()) // // M == sum(A cos()) // // P == sum(w Dir.x A cos()) // N == sum(w Dir.y A cos()) // // Then: // Pos = (in.x + S, in.y + R, waterheight + T) // // Bin = (1 - W, -V, P) // Tan = (-V, 1 - U, N) // Nor = (-P, -N, 1 - Q) // // But we want the transpose of that to go into r1-r3 dp4 r10.x, r7, c29; add r6.x, r6.x, r10.x; dp4 r10.x, r7, c30; add r6.y, r6.y, r10.x; dp4 r1.x, r5, -c34; dp4 r2.x, r5, -c35; dp4 r3.x, r7, c31; add r1.x, r1.xxxx, c16.zzzz; dp4 r1.y, r5, -c35; dp4 r2.y, r5, -c36; dp4 r3.y, r7, c32; add r2.y, r2.yyyy, c16.zzzz; dp4 r1.z, r7, -c31; dp4 r2.z, r7, -c32; dp4 r3.z, r5, -c33; add r3.z, r3.zzzz, c16.zzzz; // Calculate our normalized vector from camera to vtx. // We'll use that a couple of times coming up. sub r5, r6, c17; dp3 r10.x, r5, r5; rsq r10.x, r10.x; mul r5, r5, r10.xxxx; // r0 = D rcp r5.w, r10.x; // Calculate our specular attenuation from and into r5.w. // r5.w starts off the distance from vtx to camera. // Once we've turned it into an attenuation factor, we // scale the x and y of our normal map (through the transform bases) // so that in the distance, the normal map is flat. Note that the // geometry in the distance isn't necessarily flat. We want to apply // this scale to the normal read from the normal map before it is // transformed into surface space. add r5.w, r5.w, c11.x; mul r5.w, r5.w, c11.y; min r5.w, r5.w, c16.z; max r5.w, r5.w, c16.x; mul r5.w, r5.w, r5.w; // Square it to account for perspective mul r5.w, r5.w, c11.z; // Normalize? // We can either calculate an orthonormal basis from the // computed normal, with Binormal = (0,1,0) X Normal, Tangent = Normal X (1,0,0), // or compute our basis directly from the partial derivatives, with // Binormal = (1, 0, -cosX), Tangent = (0, 1, -cosY), Normal = (cosX, cosY, 1) // // These work out to identically the same result, so we'll compute directly // from the partials because it takes 2 fewer instructions. // // Note that our basis is NOT orthonormal. The Normal is equal to // Binormal X Tangent, but Dot(Binormal, Tangent) != 0. The Binormal and Tangents // are both correct tangents to the surface, and their projections on the XY plane // are 90 degrees apart, but in 3-space, they are not orthogonal. Practical implications? // Not really. I'm actually not really sure which is more "proper" for bump mapping. // // Note also that we add when we should subtract and subtract when we should // add, so that r1, r2, r3 aren't Binormal, Tangent, Normal, but the rows // of our transform, (Bx, Tx, Nx), (By, Ty, Ny), (Bz, Tz, Nz). See below for // explanation. // // Binormal = Y % Normal // Cross product3 is: // mul res.xyz, a.yzx, b.zxy // mad res.xyz, -a.zxy, b.yzx, res.xyz // mul r1.xyz, c16.zxx, r3.zxy; // mad r1.xyz, -c16.xxz, r3.yzx, r1.xyz; // Tangent = Normal % X // mul r2.xyz, r3.yzx, c16.xzx; // mad r2.xyz, -r3.zxy, c16.xxz, r2; //mad r1, r5.wwww, c16.zxxx, r7.zzxz; //mad r2, r5.wwww, c16.xzxx, r7.zzyz; //mul r3.xy, r3.xy, r5.wwww; // Note that we're swapping z and y to match our environment map tools in max. // We do this through our normal map transform (oT1, oT2, oT3), making it // a concatenation of: // // rotate about Z (blue) to turn our map into the wind // windRot = | dirY -dirX 0 | // | dirX dirY 0 | // | 0 0 1 | // // swap our Y and Z axes to match our environment map // swapYZ = | 1 0 0 | // | 0 0 1 | // | 0 1 0 | // // rotate the normal into the surface's tangent space basis // basis = | Bx Tx Nx | // | By Ty Ny | // | Bz Tz Nz | // // Note that we've constucted the basis by taking advantage of the // matrix being a pure rotation, as noted below, so r1, r2 and r3 // are actually constructed as: // basis = | Bx -By -Bz | // | -Tx Ty -Tz | // | -Nx -Ny -Nz | // // Then the final normal map transform is: // // basis * swapYZ * windRot [ * normal ] // sub r1.w, c17.x, r6.x; // sub r2.w, c17.z, r6.z; // sub r3.w, c17.y, r6.y; // Big note here. All this math can blow up if the camera position // is outside the environment sphere. It's assumed that's dealt // with in the app setting up the constants. For that reason, the // camera position used here might not be the real local camera position, // which is needed for the angular attenuation, so we burn another constant // with our pseudo-camera position. To restrain the pseudo-camera from // leaving the sphere, we make: // pseudoPos = envCenter + (realPos - envCenter) * dist * R / (dist + R) // where dist = |realPos - envCenter| // So, our "finitized" eyeray is: // camPos + D * t - envCenter = D * t - (envCenter - camPos) // with // D = (pos - camPos) / |pos - camPos| // normalized usual eyeray // and // t = D dot F + sqrt( (D dot F)^2 - G ) // with // F = (envCenter - camPos) => c19.xyz // G = F^2 - R^2 => c19.w // R = environment radius. => unused // // This all derives from the positive root of equation // (camPos + (pos - camPos) * t - envCenter)^2 = R^2, // In other words, where on a sphere of radius R centered about envCenter // does the ray from the real camera position through this point hit. // // Note that F, G, and R are all constants (one point, two scalars). // // So first we calculate D into r0, // then D dot F into r10.x, // then (D dot F)^2 - G into r10.y // then rsq( (D dot F)^2 - G ) into r9.x; // then t = r10.z = r10.x + r10.y * r9.x; // and // r0 = D * t - (envCenter - camPos) // = r0 * r10.zzzz - F; // mov r0, r5; // r0 = D dp3 r10.x, r0, c19; // r10.x = D dot F mad r10.y, r10.x, r10.x, -c19.w; // r10.y = (D dot F)^2 - G rsq r9.x, r10.y; // r9.x = 1/SQRT((D dot F)^2 - G) mad r10.z, r10.y, r9.x, r10.x; // r10.z = D dot F + SQRT((D dot F)^2 - G) mad r0.xyz, r0, r10.zzz, -c19.xyz; // r0.xyz = D * t - (envCenter - camPos) // ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump. dp3 r10.x, r0, r0; rsq r9.x, r10.x; mul r0.xyz, r0.xyz, r9.xxx; mov r1.w, -r0.x; mov r2.w, -r0.y; mov r3.w, -r0.z; mov r0.zw, c16.zzxz; dp3 r0.x, r1, r1; rsq r0.xy, r0.x; mul r0.x, r0.x, r5.w; mul oT1, r1.xyzw, r0.xxyw; // mul r8, r1.xyzw, r0.xxxw; // VISUAL mul r11.x, r1.z, r0.y; dp3 r0.x, r2, r2; rsq r0.xy, r0.x; mul r0.x, r0.x, r5.w; mul oT3, r2.xyzw, r0.xxyw; // mul r9, r2.xyzw, r0.xxxw; // VISUAL mul r11.y, r2.z, r0.y; dp3 r0.x, r3, r3; rsq r0.xy, r0.x; mul r0.x, r0.x, r5.w; mul oT2, r3.xyzw, r0.xxyw; // mul r9, r3.xyzw, r0.xxxw; // VISUAL mul r11.z, r3.z, r0.y; /* // Want: // oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x) // oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y) // ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z) // with BIN, TAN, and NORM normalized. // Unnormalized, we have // BIN = (1, 0, -r7.x) where r7 == accumCos // TAN = (0, 1, -r7.y) // NORM= (r7.x, r7.y, 1) // So, unnormalized, we have // oT1 = (1, 0, r7.x, view2pos.x) // oT2 = (0, 1, r7.y, view2pos.y) // oT3 = (-r7.x, -r7.y, 1, view2pos.z) // which is just reversing the signs on the accumCos // terms above. So the normalized version is just // reversing the signs on the normalized version above. */ //mov oT3, r4; // // // Transform position to screen // // //m4x3 r6, v0, c21; // HACKAGE //mov r6.w, c16.z; // HACKAGE //m4x4 oPos, r6, c0; // ADDFOG m4x4 r9, r6, c0; add r10.x, r9.w, c28.x; mul oFog, r10.x, c28.y; //mov oFog, c16.zzzz; // TESTFOGHACK mov oPos, r9; // Transform our uvw mul r0.x, v0.xxxx, c10.xxxx; mul r0.y, v0.yyyy, c10.xxxx; //mov r0.zw, c16.xxxz; mov oT0, r0 // Questionble attenuation follows // vector from this point to camera and normalize stashed in r5 // Dot that with the computed normal dp3 r1.x, -r5, r11; mul r1.x, r1.x, v5.z; // dp3 r1.x, r5, r3; // if you want the adjusted normal, you'll need to normalize/swizzle r3 // Map dot=1 => 0, dot=0 => 1 sub r1.xyzw, c16.zzzz, r1.xxxx; add r1.w, r1.wwww, c16.zzzz; mul r1.w, r1.wwww, c16.yyyy; // No need to clamp, since the destination register (in the pixel shader) // will saturate [0..1] anyway. //%%% mul r1.w, r1.w, r4.x; //%%% mul r1.xyz, r1.xyz, r4.yyy; mul r1, r1, r4.yyyx; // HACKTESTCOLOR //mul r1.xyz, r1, r8.xxx; // WAVEFACE mul r1.w, r1.wwww, v5.xxxx; mul r1.w, r1.wwww, c4.wwww; mul oD0, r1, c20; mov oD1, c4; // SEENORM //mov oD1, c16.xxxx; // mov oD1, r4.yyyy; //mov oD1, c16.zzzz; // HACKAGE // mov oD1, r9; // mov oD1, r8.xzyw;