HDKSKSM
HDKSKSM
HDKSKSM
#define ATTRIBUTE_LOCATION(x)
#define FRAGMENT_OUTPUT_LOCATION(x)
#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y)
#define UBO_BINDING(packing, x) layout(packing, binding = x)
#define SAMPLER_BINDING(x) layout(binding = x)
#define TEXEL_BUFFER_BINDING(x) layout(binding = x)
#define SSBO_BINDING(x) layout(std430, binding = x)
#define IMAGE_BINDING(format, x) layout(format, binding = x)
#define VARYING_LOCATION(x)
struct Light {
int4 color;
float4 cosatt;
float4 distatt;
float4 pos;
float4 dir;
};
UBO_BINDING(std140, 2) uniform VSBlock {
uint components;
uint xfmem_dualTexInfo;
uint xfmem_numColorChans;
uint missing_color_hex;
float4 missing_color_value;
float4 cpnmtx[6];
float4 cproj[4];
int4 cmtrl[4];
Light clights[8];
float4 ctexmtx[24];
float4 ctrmtx[64];
float4 cnmtx[32];
float4 cpostmtx[64];
float4 cpixelcenter;
float2 cviewport;
uint4 xfmem_pack1[8];
float4 ctangent;
float4 cbinormal;
#define xfmem_texMtxInfo(i) (xfmem_pack1[(i)].x)
#define xfmem_postMtxInfo(i) (xfmem_pack1[(i)].y)
#define xfmem_color(i) (xfmem_pack1[(i)].z)
#define xfmem_alpha(i) (xfmem_pack1[(i)].w)
};
struct VS_OUTPUT {
float4 pos;
float4 colors_0;
float4 colors_1;
float3 tex0;
float3 tex1;
float3 tex2;
float4 clipPos;
};
switch (attnfunc) {
case 0x0u /* No attenuation */:
case 0x2u /* Directional light attenuation */:
ldir = normalize(clights[index].pos.xyz - pos.xyz);
attn = 1.0;
if (length(ldir) == 0.0)
ldir = normal;
break;
default:
attn = 1.0;
ldir = normal;
break;
}
switch (diffusefunc) {
case 0x0u /* None */:
return int4(round(attn * float4(clights[index].color)));
default:
return int4(0, 0, 0, 0);
}
}
// Normal matrix
float3 N0;
float3 N1;
float3 N2;
// The scale of the transform matrix is used to control the size of the emboss map
// effect by changing the scale of the transformed binormals (which only get used
by
// emboss map texgens). By normalising the first transformed normal (which is used
// by lighting calculations and needs to be unit length), the same transform matrix
// can do double duty, scaling for emboss mapping, and not scaling for lighting.
float3 _normal = float3(0.0, 0.0, 0.0);
if ((components & 1024u) != 0u) // VB_HAS_NORMAL
_normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2,
rawnormal)));
// Lighting
for (uint chan = 0u; chan < 2u; chan++) {
uint colorreg = xfmem_color(chan);
uint alphareg = xfmem_alpha(chan);
int4 mat = cmtrl[chan + 2u];
int4 lacc = int4(255, 255, 255, 255);
if (bitfieldExtract(uint(colorreg), 0, 1) != 0u)
mat.xyz = int3(round(((chan == 0u) ? vertex_color_0.xyz : vertex_color_1.xyz) *
255.0));
if (bitfieldExtract(uint(alphareg), 0, 1) != 0u)
mat.w = int(round(((chan == 0u) ? vertex_color_0.w : vertex_color_1.w) *
255.0));
else
mat.w = cmtrl [chan + 2u].w;
if (bitfieldExtract(uint(colorreg), 1, 1) != 0u) {
if (bitfieldExtract(uint(colorreg), 6, 1) != 0u)
lacc.xyz = int3(round(((chan == 0u) ? vertex_color_0.xyz :
vertex_color_1.xyz) * 255.0));
else
lacc.xyz = cmtrl [chan].xyz;
if (bitfieldExtract(uint(alphareg), 1, 1) != 0u) {
if (bitfieldExtract(uint(alphareg), 6, 1) != 0u) {
if ((components & (8192u << chan)) != 0u) // VB_HAS_COL0
lacc.w = int(round(((chan == 0u) ? vertex_color_0.w : vertex_color_1.w) *
255.0));
else if ((components & 8192u) != 0u) // VB_HAS_COLO0
lacc.w = int(round(vertex_color_0.w * 255.0));
else
lacc.w = 255;
} else {
lacc.w = cmtrl [chan].w;
}
// Hopefully GPUs that can support dynamic indexing will optimize this.
float4 lit_color = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;
switch (chan) {
case 0u: o.colors_0 = lit_color; break;
case 1u: o.colors_1 = lit_color; break;
}
}
// Convert NaN to 1
if (dolphin_isnan(coord.x)) coord.x = 1.0;
if (dolphin_isnan(coord.y)) coord.y = 1.0;
if (dolphin_isnan(coord.z)) coord.z = 1.0;
// first transformation
uint texgentype = bitfieldExtract(uint(texMtxInfo), 4, 3);
float3 output_tex;
switch (texgentype)
{
case 0x1u /* Emboss map (used when bump mapping) */:
{
uint light = bitfieldExtract(uint(texMtxInfo), 15, 3);
uint source = bitfieldExtract(uint(texMtxInfo), 12, 3);
switch (source) {
case 0u: output_tex.xyz = o.tex0; break;
case 1u: output_tex.xyz = o.tex1; break;
case 2u: output_tex.xyz = o.tex2; break;
default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;
}
float3 ldir = normalize(clights[light].pos.xyz - pos.xyz);
output_tex.xyz += float3(dot(ldir, _tangent), dot(ldir, _binormal), 0.0);
}
break;
if (xfmem_dualTexInfo != 0u) {
uint postMtxInfo = xfmem_postMtxInfo(texgen); uint base_index =
bitfieldExtract(uint(postMtxInfo), 0, 6);
float4 P0 = cpostmtx[base_index & 0x3fu];
float4 P1 = cpostmtx[(base_index + 1u) & 0x3fu];
float4 P2 = cpostmtx[(base_index + 2u) & 0x3fu];
if (bitfieldExtract(uint(postMtxInfo), 8, 1) != 0u)
output_tex.xyz = normalize(output_tex.xyz);
// multiply by postmatrix
output_tex.xyz = float3(dot(P0.xyz, output_tex.xyz) + P0.w,
dot(P1.xyz, output_tex.xyz) + P1.w,
dot(P2.xyz, output_tex.xyz) + P2.w);
}
// Hopefully GPUs that can support dynamic indexing will optimize this.
switch (texgen) {
case 0u: o.tex0 = output_tex; break;
case 1u: o.tex1 = output_tex; break;
case 2u: o.tex2 = output_tex; break;
}
}
// The number of colors available to TEV is determined by numColorChans.
// We have to provide the fields to match the interface, so set to zero
// if it's not enabled.
if (xfmem_numColorChans == 0u)
o.colors_0 = float4(0.0, 0.0, 0.0, 0.0);
if (xfmem_numColorChans <= 1u)
o.colors_1 = float4(0.0, 0.0, 0.0, 0.0);
o.clipPos = o.pos;
o.pos.z = o.pos.w * cpixelcenter.w - o.pos.z * cpixelcenter.z;
o.pos.z = o.pos.z * 2.0 - o.pos.w;
o.pos.xy *= sign(cpixelcenter.xy * float2(1.0, -1.0));
o.pos.xy = o.pos.xy - o.pos.w * cpixelcenter.xy;
if (o.pos.w == 1.0f)
{
float ss_pixel_x = ((o.pos.x + 1.0f) * (cviewport.x * 0.5f));
float ss_pixel_y = ((o.pos.y + 1.0f) * (cviewport.y * 0.5f));
ss_pixel_x = round(ss_pixel_x);
ss_pixel_y = round(ss_pixel_y);
o.pos.x = ((ss_pixel_x / (cviewport.x * 0.5f)) - 1.0f);
o.pos.y = ((ss_pixel_y / (cviewport.y * 0.5f)) - 1.0f);
}
tex0.xyz = o.tex0;
tex1.xyz = o.tex1;
tex2.xyz = o.tex2;
clipPos = o.clipPos;
colors_0 = o.colors_0;
colors_1 = o.colors_1;
gl_Position = o.pos;
}
#version 320 es
#define ATTRIBUTE_LOCATION(x)
#define FRAGMENT_OUTPUT_LOCATION(x)
#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y)
#define UBO_BINDING(packing, x) layout(packing, binding = x)
#define SAMPLER_BINDING(x) layout(binding = x)
#define TEXEL_BUFFER_BINDING(x) layout(binding = x)
#define SSBO_BINDING(x) layout(std430, binding = x)
#define IMAGE_BINDING(format, x) layout(format, binding = x)
#define VARYING_LOCATION(x)
int4 ret;
ret.r = color[bitfieldExtract(uint(bpmem_tevksel(s * 2u)), 0, 2)];
ret.g = color[bitfieldExtract(uint(bpmem_tevksel(s * 2u)), 2, 2)];
ret.b = color[bitfieldExtract(uint(bpmem_tevksel(s * 2u + 1u)), 0, 2)];
ret.a = color[bitfieldExtract(uint(bpmem_tevksel(s * 2u + 1u)), 2, 2)];
return ret;
}
// Add bias to D
if (bias == 1u) D += 128;
else if (bias == 2u) D -= 128;
// TODO: Is this rounding bias still added when the scale is divide by 2?
Currently we do not apply it.
if (scale != 3u)
lerp = lerp + (op ? 127 : 128);
// Add/Subtract D
if (op) // Subtract
result = D - result;
else // Add
result = D + result;
// Most of the Scale was moved inside the lerp for improved precision
// But we still do the divide by 2 here
if (scale == 3u)
result = result >> 1;
return result;
}
// Add bias to D
if (bias == 1u) D += 128;
else if (bias == 2u) D -= 128;
// TODO: Is this rounding bias still added when the scale is divide by 2?
Currently we do not apply it.
if (scale != 3u)
lerp = lerp + (op ? 127 : 128);
// Add/Subtract D
if (op) // Subtract
result = D - result;
else // Add
result = D + result;
// Most of the Scale was moved inside the lerp for improved precision
// But we still do the divide by 2 here
if (scale == 3u)
result = result >> 1;
return result;
}
struct State {
int4 Reg[4];
int4 TexColor;
int AlphaBump;
};
struct StageState {
uint stage;
uint order;
uint cc;
uint ac;
};
FORCE_EARLY_Z;
void main()
{
float4 rawpos = gl_FragCoord;
#ifdef FB_FETCH_VALUE
float4 initial_ocol0 = FB_FETCH_VALUE;
#else
float4 initial_ocol0 = real_ocol0;
#endif
float4 ocol0;
float4 ocol1;
int layer = 0;
int3 tevcoord = int3(0, 0, 0);
State s;
s.TexColor = int4(0, 0, 0, 0);
s.AlphaBump = 0;
s.Reg[0] = color[0];
s.Reg[1] = color[1];
s.Reg[2] = color[2];
s.Reg[3] = color[3];
uint num_stages = bitfieldExtract(uint(bpmem_genmode), 10, 4);
// Indirect textures
uint tevind = bpmem_tevind(stage);
if (tevind != 0u)
{
uint bs = bitfieldExtract(uint(tevind), 7, 2);
uint fmt = bitfieldExtract(uint(tevind), 2, 2);
uint bias = bitfieldExtract(uint(tevind), 4, 3);
uint bt = bitfieldExtract(uint(tevind), 0, 2);
uint matrix_index = bitfieldExtract(uint(tevind), 9, 2);
uint matrix_id = bitfieldExtract(uint(tevind), 11, 2);
int2 indtevtrans = int2(0, 0);
if (bpmem_iref(bt) != 0u) {
int3 indcoord;
{
uint iref = bpmem_iref(bt);
uint texcoord = bitfieldExtract(iref, 0, 3);
uint texmap = bitfieldExtract(iref, 8, 3);
int2 fixedPoint_uv = getTexCoord(texcoord);
// Matrix multiply
if (matrix_index != 0u)
{
uint mtxidx = 2u * (matrix_index - 1u);
int shift = cindmtx[mtxidx].w;
switch (matrix_id)
{
case 0u: // 3x2 S0.10 matrix
indtevtrans = int2(idot(cindmtx[mtxidx].xyz, indcoord),
idot(cindmtx[mtxidx + 1u].xyz, indcoord)) >> 3;
break;
case 1u: // S matrix, S17.7 format
indtevtrans = (fixedPoint_uv * indcoord.xx) >> 8;
break;
case 2u: // T matrix, S17.7 format
indtevtrans = (fixedPoint_uv * indcoord.yy) >> 8;
break;
}
if (shift >= 0)
indtevtrans = indtevtrans >> shift;
else
indtevtrans = indtevtrans << ((-shift) & 31);
}
}
// Wrapping
uint sw = bitfieldExtract(uint(tevind), 13, 3);
uint tw = bitfieldExtract(uint(tevind), 16, 3);
int2 wrapped_coord = int2(Wrap(fixedPoint_uv.x, sw), Wrap(fixedPoint_uv.y,
tw));
int3 color;
if (color_bias != 3u) { // Normal mode
color = tevLerp3(color_A, color_B, color_C, color_D, color_bias, color_op,
color_scale);
} else { // Compare mode
// op 6 and 7 do a select per color channel
if (color_compare_op == 6u) {
// TevCompareMode::RGB8, TevComparison::GT
color.r = (color_A.r > color_B.r) ? color_C.r : 0;
color.g = (color_A.g > color_B.g) ? color_C.g : 0;
color.b = (color_A.b > color_B.b) ? color_C.b : 0;
} else if (color_compare_op == 7u) {
// TevCompareMode::RGB8, TevComparison::EQ
color.r = (color_A.r == color_B.r) ? color_C.r : 0;
color.g = (color_A.g == color_B.g) ? color_C.g : 0;
color.b = (color_A.b == color_B.b) ? color_C.b : 0;
} else {
// The remaining ops do one compare which selects all 3 channels
color = tevCompare(color_compare_op, color_A, color_B) ? color_C :
int3(0, 0, 0);
}
color = color_D + color;
}
// Clamp result
if (color_clamp)
color = clamp(color, 0, 255);
else
color = clamp(color, -1024, 1023);
// Write result to the correct input register of the next stage
if (color_dest < 2u) {
if (color_dest < 1u) {
s.Reg[0].rgb = color; // prev (0)
} else {
s.Reg[1].rgb = color; // c0 (1)
}
} else {
if (color_dest < 3u) {
s.Reg[2].rgb = color; // c1 (2)
} else {
s.Reg[3].rgb = color; // c2 (3)
}
}
// Alpha Combiner
uint alpha_a = bitfieldExtract(uint(ss.ac), 13, 3);
uint alpha_b = bitfieldExtract(uint(ss.ac), 10, 3);
uint alpha_c = bitfieldExtract(uint(ss.ac), 7, 3);
uint alpha_d = bitfieldExtract(uint(ss.ac), 4, 3);
uint alpha_bias = bitfieldExtract(uint(ss.ac), 16, 2);
bool alpha_op = bool(bitfieldExtract(uint(ss.ac), 18, 1));
bool alpha_clamp = bool(bitfieldExtract(uint(ss.ac), 19, 1));
uint alpha_scale = bitfieldExtract(uint(ss.ac), 20, 2);
uint alpha_dest = bitfieldExtract(uint(ss.ac), 22, 2);
uint alpha_compare_op = alpha_scale << 1 | uint(alpha_op);
int alpha_A = 0;
int alpha_B = 0;
if (alpha_bias != 3u || alpha_compare_op > 5u) {
// Small optimisation here: alpha_A and alpha_B are unused by compare ops
0-5
alpha_A = selectAlphaInput(s, ss, colors_0, colors_1, alpha_a) & 255;
alpha_B = selectAlphaInput(s, ss, colors_0, colors_1, alpha_b) & 255;
};
int alpha_C = selectAlphaInput(s, ss, colors_0, colors_1, alpha_c) & 255;
int alpha_D = selectAlphaInput(s, ss, colors_0, colors_1, alpha_d); // 10
bits + sign
int alpha;
if (alpha_bias != 3u) { // Normal mode
alpha = tevLerp(alpha_A, alpha_B, alpha_C, alpha_D, alpha_bias, alpha_op,
alpha_scale);
} else { // Compare mode
if (alpha_compare_op == 6u) {
// TevCompareMode::A8, TevComparison::GT
alpha = (alpha_A > alpha_B) ? alpha_C : 0;
} else if (alpha_compare_op == 7u) {
// TevCompareMode::A8, TevComparison::EQ
alpha = (alpha_A == alpha_B) ? alpha_C : 0;
} else {
// All remaining alpha compare ops actually compare the color channels
alpha = tevCompare(alpha_compare_op, color_A, color_B) ? alpha_C : 0;
}
alpha = alpha_D + alpha;
}
// Clamp result
if (alpha_clamp)
alpha = clamp(alpha, 0, 255);
else
alpha = clamp(alpha, -1024, 1023);
int4 TevResult;
TevResult.xyz = getTevReg(s, bitfieldExtract(uint(bpmem_combiners(num_stages).x),
22, 2)).xyz;
TevResult.w = getTevReg(s, bitfieldExtract(uint(bpmem_combiners(num_stages).y),
22, 2)).w;
TevResult &= 255;
// Whatever texture was in our last stage, it's now our depth texture
ztex += idot(s.TexColor.xyzw, czbias[0].xyzw);
ztex += (bpmem_ztex_op == 1u) ? zCoord : 0;
zCoord = ztex & 0xFFFFFF;
}
// Alpha Test
#define discard_fragment discard
if (bpmem_alphaTest != 0u) {
bool comp0 = alphaCompare(TevResult.a, alphaRef.r,
bitfieldExtract(uint(bpmem_alphaTest), 16, 3));
bool comp1 = alphaCompare(TevResult.a, alphaRef.g,
bitfieldExtract(uint(bpmem_alphaTest), 19, 3));
// These if statements are written weirdly to work around intel and Qualcomm
bugs with handling booleans.
switch (bitfieldExtract(uint(bpmem_alphaTest), 22, 2)) {
case 0u: // AND
if (comp0 && comp1) break; else discard_fragment; break;
case 1u: // OR
if (comp0 || comp1) break; else discard_fragment; break;
case 2u: // XOR
if (comp0 != comp1) break; else discard_fragment; break;
case 3u: // XNOR
if (comp0 == comp1) break; else discard_fragment; break;
}
}
// Fog
uint fog_function = bitfieldExtract(uint(bpmem_fogParam3), 21, 3);
if (fog_function != 0x0u /* Off (no fog) */) {
// TODO: This all needs to be converted from float to fixed point
float ze;
if (bitfieldExtract(uint(bpmem_fogParam3), 20, 1) == 0u) {
// perspective
// ze = A/(B - (Zs >> B_SHF)
ze = (cfogf.x * 16777216.0) / float(cfogi.y - (zCoord >> cfogi.w));
} else {
// orthographic
// ze = a*Zs (here, no B_SHF)
ze = cfogf.x * float(zCoord) / 16777216.0;
}
// Logic Ops
if (logic_op_enable) {
int4 fb_value = iround(initial_ocol0 * 255.0); switch (logic_op_mode) {
case 0u: TevResult = int4(0, 0, 0, 0); break;
case 1u: TevResult = TevResult & fb_value; break;
case 2u: TevResult = TevResult & ~fb_value; break;
case 3u: TevResult = TevResult; break;
case 4u: TevResult = ~TevResult & fb_value; break;
case 5u: TevResult = fb_value; break;
case 6u: TevResult = TevResult ^ fb_value; break;
case 7u: TevResult = TevResult | fb_value; break;
case 8u: TevResult = ~(TevResult | fb_value); break;
case 9u: TevResult = ~(TevResult ^ fb_value); break;
case 10u: TevResult = ~fb_value; break;
case 11u: TevResult = TevResult | ~fb_value; break;
case 12u: TevResult = ~TevResult; break;
case 13u: TevResult = ~TevResult | fb_value; break;
case 14u: TevResult = ~(TevResult & fb_value); break;
case 15u: TevResult = int4(255, 255, 255, 255); break;
}
TevResult &= 0xff;
}
if (bpmem_rgba6_format)
ocol0.rgb = float3(TevResult.rgb >> 2) / 63.0;
else
ocol0.rgb = float3(TevResult.rgb) / 255.0;
if (bpmem_dstalpha != 0u)
ocol0.a = float(bitfieldExtract(uint(bpmem_dstalpha), 0, 8) >> 2) / 63.0;
else
ocol0.a = float(TevResult.a >> 2) / 63.0;