HDKSKSM

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 27

#version 320 es

#define FORCE_EARLY_Z layout(early_fragment_tests) in

#define ATTRIBUTE_LOCATION(x)
#define FRAGMENT_OUTPUT_LOCATION(x)
#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y)
#define UBO_BINDING(packing, x) layout(packing, binding = x)
#define SAMPLER_BINDING(x) layout(binding = x)
#define TEXEL_BUFFER_BINDING(x) layout(binding = x)
#define SSBO_BINDING(x) layout(std430, binding = x)
#define IMAGE_BINDING(format, x) layout(format, binding = x)

#define VARYING_LOCATION(x)

#extension GL_ANDROID_extension_pack_es31a : enable

#extension GL_EXT_blend_func_extended : enable

#extension GL_EXT_shader_framebuffer_fetch: enable


#define FRAGMENT_INOUT inout

precision highp float;


precision highp int;
precision highp sampler2DArray;
precision highp usamplerBuffer;
precision highp sampler2DMS;
precision highp image2DArray;
#define API_OPENGL 1
#define float2 vec2
#define float3 vec3
#define float4 vec4
#define uint2 uvec2
#define uint3 uvec3
#define uint4 uvec4
#define int2 ivec2
#define int3 ivec3
#define int4 ivec4
#define frac fract
#define lerp mix
// Vertex UberShader for 3 texgens

struct Light {
int4 color;
float4 cosatt;
float4 distatt;
float4 pos;
float4 dir;
};
UBO_BINDING(std140, 2) uniform VSBlock {
uint components;
uint xfmem_dualTexInfo;
uint xfmem_numColorChans;
uint missing_color_hex;
float4 missing_color_value;
float4 cpnmtx[6];
float4 cproj[4];
int4 cmtrl[4];
Light clights[8];
float4 ctexmtx[24];
float4 ctrmtx[64];
float4 cnmtx[32];
float4 cpostmtx[64];
float4 cpixelcenter;
float2 cviewport;
uint4 xfmem_pack1[8];
float4 ctangent;
float4 cbinormal;
#define xfmem_texMtxInfo(i) (xfmem_pack1[(i)].x)
#define xfmem_postMtxInfo(i) (xfmem_pack1[(i)].y)
#define xfmem_color(i) (xfmem_pack1[(i)].z)
#define xfmem_alpha(i) (xfmem_pack1[(i)].w)
};
struct VS_OUTPUT {
float4 pos;
float4 colors_0;
float4 colors_1;
float3 tex0;
float3 tex1;
float3 tex2;
float4 clipPos;
};

#define dolphin_isnan(f) isnan(f)


int4 CalculateLighting(uint index, uint attnfunc, uint diffusefunc, float3 pos,
float3 normal) {
float3 ldir, h, cosAttn, distAttn;
float dist, dist2, attn;

switch (attnfunc) {
case 0x0u /* No attenuation */:
case 0x2u /* Directional light attenuation */:
ldir = normalize(clights[index].pos.xyz - pos.xyz);
attn = 1.0;
if (length(ldir) == 0.0)
ldir = normal;
break;

case 0x1u /* Point light attenuation */:


ldir = normalize(clights[index].pos.xyz - pos.xyz);
attn = (dot(normal, ldir) >= 0.0) ? max(0.0, dot(normal,
clights[index].dir.xyz)) : 0.0;
cosAttn = clights[index].cosatt.xyz;
if (diffusefunc == 0x0u /* None */)
distAttn = clights[index].distatt.xyz;
else
distAttn = normalize(clights[index].distatt.xyz);
attn = max(0.0, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn,
float3(1.0, attn, attn*attn));
break;

case 0x3u /* Spot light attenuation */:


ldir = clights[index].pos.xyz - pos.xyz;
dist2 = dot(ldir, ldir);
dist = sqrt(dist2);
ldir = ldir / dist;
attn = max(0.0, dot(ldir, clights[index].dir.xyz));
attn = max(0.0, clights[index].cosatt.x + clights[index].cosatt.y * attn +
clights[index].cosatt.z * attn * attn) / dot(clights[index].distatt.xyz,
float3(1.0, dist, dist2));
break;

default:
attn = 1.0;
ldir = normal;
break;
}

switch (diffusefunc) {
case 0x0u /* None */:
return int4(round(attn * float4(clights[index].color)));

case 0x1u /* Sign */:


return int4(round(attn * dot(ldir, normal) * float4(clights[index].color)));

case 0x2u /* Clamp */:


return int4(round(attn * max(0.0, dot(ldir, normal)) *
float4(clights[index].color)));

default:
return int4(0, 0, 0, 0);
}
}

ATTRIBUTE_LOCATION(0) in float4 rawpos;


ATTRIBUTE_LOCATION(1) in uint4 posmtx;
ATTRIBUTE_LOCATION(2) in float3 rawnormal;
ATTRIBUTE_LOCATION(3) in float3 rawtangent;
ATTRIBUTE_LOCATION(4) in float3 rawbinormal;
ATTRIBUTE_LOCATION(5) in float4 rawcolor0;
ATTRIBUTE_LOCATION(6) in float4 rawcolor1;
ATTRIBUTE_LOCATION(8) in float3 rawtex0;
ATTRIBUTE_LOCATION(9) in float3 rawtex1;
ATTRIBUTE_LOCATION(10) in float3 rawtex2;
ATTRIBUTE_LOCATION(11) in float3 rawtex3;
ATTRIBUTE_LOCATION(12) in float3 rawtex4;
ATTRIBUTE_LOCATION(13) in float3 rawtex5;
ATTRIBUTE_LOCATION(14) in float3 rawtex6;
ATTRIBUTE_LOCATION(15) in float3 rawtex7;
VARYING_LOCATION(0) out float4 colors_0;
VARYING_LOCATION(1) out float4 colors_1;
VARYING_LOCATION(2) out float3 tex0;
VARYING_LOCATION(3) out float3 tex1;
VARYING_LOCATION(4) out float3 tex2;
VARYING_LOCATION(5) out float4 clipPos;
void main()
{
VS_OUTPUT o;
// Position matrix
float4 P0;
float4 P1;
float4 P2;

// Normal matrix
float3 N0;
float3 N1;
float3 N2;

if ((components & 2u) != 0u) { // VB_HAS_POSMTXIDX


// Vertex format has a per-vertex matrix
int posidx = int(posmtx.r);
P0 = ctrmtx[posidx];
P1 = ctrmtx[posidx+1];
P2 = ctrmtx[posidx+2];

int normidx = posidx >= 32 ? (posidx - 32) : posidx;


N0 = cnmtx[normidx].xyz;
N1 = cnmtx[normidx+1].xyz;
N2 = cnmtx[normidx+2].xyz;
} else {
// One shared matrix
P0 = cpnmtx[0];
P1 = cpnmtx[1];
P2 = cpnmtx[2];
N0 = cpnmtx[3].xyz;
N1 = cpnmtx[4].xyz;
N2 = cpnmtx[5].xyz;
}

// Multiply the position vector by the position matrix


float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);
o.pos = float4(dot(cproj[0], pos), dot(cproj[1], pos), dot(cproj[2], pos),
dot(cproj[3], pos));

// The scale of the transform matrix is used to control the size of the emboss map
// effect by changing the scale of the transformed binormals (which only get used
by
// emboss map texgens). By normalising the first transformed normal (which is used
// by lighting calculations and needs to be unit length), the same transform matrix
// can do double duty, scaling for emboss mapping, and not scaling for lighting.
float3 _normal = float3(0.0, 0.0, 0.0);
if ((components & 1024u) != 0u) // VB_HAS_NORMAL
_normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2,
rawnormal)));

float3 _tangent = float3(0.0, 0.0, 0.0);


if ((components & 2048u) != 0u) // VB_HAS_TANGENT
_tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, rawtangent));
else
_tangent = float3(dot(N0, ctangent.xyz), dot(N1, ctangent.xyz), dot(N2,
ctangent.xyz));

float3 _binormal = float3(0.0, 0.0, 0.0);


if ((components & 4096u) != 0u) // VB_HAS_BINORMAL
_binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2,
rawbinormal));
else
_binormal = float3(dot(N0, cbinormal.xyz), dot(N1, cbinormal.xyz), dot(N2,
cbinormal.xyz));

// xfmem.numColorChans controls the number of color channels available to TEV,


// but we still need to generate all channels here, as it can be used in texgen.
// Cel-damage is an example of this.
float4 vertex_color_0, vertex_color_1;

// To use color 1, the vertex descriptor must have color 0 and 1.


// If color 1 is present but not color 0, it is used for lighting channel 0.
bool use_color_1 = ((components & 24576u) == 24576u); // VB_HAS_COL0 | VB_HAS_COL1
for (uint color = 0u; color < 2u; color++) {
if ((color == 0u || use_color_1) && (components & (8192u << color)) != 0u) {
// Use color0 for channel 0, and color1 for channel 1 if both colors 0 and 1
are present.
if (color == 0u)
vertex_color_0 = rawcolor0;
else
vertex_color_1 = rawcolor1;
} else if (color == 0u && (components & 16384u) != 0u) {
// Use color1 for channel 0 if color0 is not present.
vertex_color_0 = rawcolor1;
} else {
if (color == 0u)
vertex_color_0 = missing_color_value;
else
vertex_color_1 = missing_color_value;
}
}

// Lighting
for (uint chan = 0u; chan < 2u; chan++) {
uint colorreg = xfmem_color(chan);
uint alphareg = xfmem_alpha(chan);
int4 mat = cmtrl[chan + 2u];
int4 lacc = int4(255, 255, 255, 255);

if (bitfieldExtract(uint(colorreg), 0, 1) != 0u)
mat.xyz = int3(round(((chan == 0u) ? vertex_color_0.xyz : vertex_color_1.xyz) *
255.0));
if (bitfieldExtract(uint(alphareg), 0, 1) != 0u)
mat.w = int(round(((chan == 0u) ? vertex_color_0.w : vertex_color_1.w) *
255.0));
else
mat.w = cmtrl [chan + 2u].w;

if (bitfieldExtract(uint(colorreg), 1, 1) != 0u) {
if (bitfieldExtract(uint(colorreg), 6, 1) != 0u)
lacc.xyz = int3(round(((chan == 0u) ? vertex_color_0.xyz :
vertex_color_1.xyz) * 255.0));
else
lacc.xyz = cmtrl [chan].xyz;

uint light_mask = bitfieldExtract(uint(colorreg), 2, 4) |


(bitfieldExtract(uint(colorreg), 11, 4) << 4u);
uint attnfunc = bitfieldExtract(uint(colorreg), 9, 2);
uint diffusefunc = bitfieldExtract(uint(colorreg), 7, 2);
for (uint light_index = 0u; light_index < 8u; light_index++) {
if ((light_mask & (1u << light_index)) != 0u)
lacc.xyz += CalculateLighting(light_index, attnfunc, diffusefunc, pos.xyz,
_normal).xyz;
}
}

if (bitfieldExtract(uint(alphareg), 1, 1) != 0u) {
if (bitfieldExtract(uint(alphareg), 6, 1) != 0u) {
if ((components & (8192u << chan)) != 0u) // VB_HAS_COL0
lacc.w = int(round(((chan == 0u) ? vertex_color_0.w : vertex_color_1.w) *
255.0));
else if ((components & 8192u) != 0u) // VB_HAS_COLO0
lacc.w = int(round(vertex_color_0.w * 255.0));
else
lacc.w = 255;
} else {
lacc.w = cmtrl [chan].w;
}

uint light_mask = bitfieldExtract(uint(alphareg), 2, 4) |


(bitfieldExtract(uint(alphareg), 11, 4) << 4u);
uint attnfunc = bitfieldExtract(uint(alphareg), 9, 2);
uint diffusefunc = bitfieldExtract(uint(alphareg), 7, 2);
for (uint light_index = 0u; light_index < 8u; light_index++) {

if ((light_mask & (1u << light_index)) != 0u)

lacc.w += CalculateLighting(light_index, attnfunc, diffusefunc, pos.xyz,


_normal).w;
}
}

lacc = clamp(lacc, 0, 255);

// Hopefully GPUs that can support dynamic indexing will optimize this.
float4 lit_color = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;
switch (chan) {
case 0u: o.colors_0 = lit_color; break;
case 1u: o.colors_1 = lit_color; break;
}
}

o.tex0 = float3(0.0, 0.0, 0.0);


o.tex1 = float3(0.0, 0.0, 0.0);
o.tex2 = float3(0.0, 0.0, 0.0);
// Texture coordinate generation
for (uint texgen = 0u; texgen < 3u; texgen++) {
// Texcoord transforms
float4 coord = float4(0.0, 0.0, 1.0, 1.0);
uint texMtxInfo = xfmem_texMtxInfo(texgen);
switch (bitfieldExtract(uint(texMtxInfo), 7, 5)) {
case 0x0u /* Geometry (input is ABC1) */:
coord.xyz = rawpos.xyz;
break;

case 0x1u /* Normal (input is ABC1) */:


coord.xyz = ((components & 1024u /* VB_HAS_NORMAL */) != 0u) ? rawnormal.xyz :
coord.xyz; break;
case 0x3u /* Binormal T (input is ABC1) */:
coord.xyz = ((components & 2048u /* VB_HAS_TANGENT */) != 0u) ?
rawtangent.xyz : coord.xyz; break;

case 0x4u /* Binormal B (input is ABC1) */:


coord.xyz = ((components & 4096u /* VB_HAS_BINORMAL */) != 0u) ?
rawbinormal.xyz : coord.xyz; break;

case 0x5u /* Tex 0 */:


coord = ((components & 32768u /* VB_HAS_UV0 */) != 0u) ? float4(rawtex0.x,
rawtex0.y, 1.0, 1.0) : coord;
break;

case 0x6u /* Tex 1 */:


coord = ((components & 65536u /* VB_HAS_UV1 */) != 0u) ? float4(rawtex1.x,
rawtex1.y, 1.0, 1.0) : coord;
break;

case 0x7u /* Tex 2 */:


coord = ((components & 131072u /* VB_HAS_UV2 */) != 0u) ? float4(rawtex2.x,
rawtex2.y, 1.0, 1.0) : coord;
break;

case 0x8u /* Tex 3 */:


coord = ((components & 262144u /* VB_HAS_UV3 */) != 0u) ? float4(rawtex3.x,
rawtex3.y, 1.0, 1.0) : coord;
break;

case 0x9u /* Tex 4 */:


coord = ((components & 524288u /* VB_HAS_UV4 */) != 0u) ? float4(rawtex4.x,
rawtex4.y, 1.0, 1.0) : coord;
break;

case 0xau /* Tex 5 */:


coord = ((components & 1048576u /* VB_HAS_UV5 */) != 0u) ? float4(rawtex5.x,
rawtex5.y, 1.0, 1.0) : coord;
break;

case 0xbu /* Tex 6 */:


coord = ((components & 2097152u /* VB_HAS_UV6 */) != 0u) ? float4(rawtex6.x,
rawtex6.y, 1.0, 1.0) : coord;
break;

case 0xcu /* Tex 7 */:


coord = ((components & 4194304u /* VB_HAS_UV7 */) != 0u) ? float4(rawtex7.x,
rawtex7.y, 1.0, 1.0) : coord;
break;

// Input form of AB11 sets z element to 1.0


if (bitfieldExtract(uint(texMtxInfo), 2, 1) == 0x0u /* AB11 */) // inputform ==
AB11
coord.z = 1.0f;

// Convert NaN to 1
if (dolphin_isnan(coord.x)) coord.x = 1.0;
if (dolphin_isnan(coord.y)) coord.y = 1.0;
if (dolphin_isnan(coord.z)) coord.z = 1.0;
// first transformation
uint texgentype = bitfieldExtract(uint(texMtxInfo), 4, 3);
float3 output_tex;
switch (texgentype)
{
case 0x1u /* Emboss map (used when bump mapping) */:
{
uint light = bitfieldExtract(uint(texMtxInfo), 15, 3);
uint source = bitfieldExtract(uint(texMtxInfo), 12, 3);
switch (source) {
case 0u: output_tex.xyz = o.tex0; break;
case 1u: output_tex.xyz = o.tex1; break;
case 2u: output_tex.xyz = o.tex2; break;
default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;
}
float3 ldir = normalize(clights[light].pos.xyz - pos.xyz);
output_tex.xyz += float3(dot(ldir, _tangent), dot(ldir, _binormal), 0.0);
}
break;

case 0x2u /* Color channel 0 */:


output_tex.xyz = float3(o.colors_0.x, o.colors_0.y, 1.0);
break;

case 0x3u /* Color channel 1 */:


output_tex.xyz = float3(o.colors_1.x, o.colors_1.y, 1.0);
break;

case 0x0u /* Regular */:


default:
{
if ((components & (4u /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {
// This is messy, due to dynamic indexing of the input texture coordinates.
// Hopefully the compiler will unroll this whole loop anyway and the
switch.
int tmp = 0;
switch (texgen) {
case 0u: tmp = int(rawtex0.z); break;
case 1u: tmp = int(rawtex1.z); break;
case 2u: tmp = int(rawtex2.z); break;
}

if (bitfieldExtract(uint(texMtxInfo), 1, 1) == 0x1u /* STQ (3x4 matrix) */)


{
output_tex.xyz = float3(dot(coord, ctrmtx[tmp]),
dot(coord, ctrmtx[tmp + 1]),
dot(coord, ctrmtx[tmp + 2]));
} else {
output_tex.xyz = float3(dot(coord, ctrmtx[tmp]),
dot(coord, ctrmtx[tmp + 1]),
1.0);
}
} else {
if (bitfieldExtract(uint(texMtxInfo), 1, 1) == 0x1u /* STQ (3x4 matrix) */)
{
output_tex.xyz = float3(dot(coord, ctexmtx[3u * texgen]),
dot(coord, ctexmtx[3u * texgen + 1u]),
dot(coord, ctexmtx[3u * texgen + 2u]));
} else {
output_tex.xyz = float3(dot(coord, ctexmtx[3u * texgen]),
dot(coord, ctexmtx[3u * texgen + 1u]),
1.0);
}
}
}
break;

if (xfmem_dualTexInfo != 0u) {
uint postMtxInfo = xfmem_postMtxInfo(texgen); uint base_index =
bitfieldExtract(uint(postMtxInfo), 0, 6);
float4 P0 = cpostmtx[base_index & 0x3fu];
float4 P1 = cpostmtx[(base_index + 1u) & 0x3fu];
float4 P2 = cpostmtx[(base_index + 2u) & 0x3fu];

if (bitfieldExtract(uint(postMtxInfo), 8, 1) != 0u)
output_tex.xyz = normalize(output_tex.xyz);

// multiply by postmatrix
output_tex.xyz = float3(dot(P0.xyz, output_tex.xyz) + P0.w,
dot(P1.xyz, output_tex.xyz) + P1.w,
dot(P2.xyz, output_tex.xyz) + P2.w);
}

if (texgentype == 0x0u /* Regular */ && output_tex.z == 0.0)


output_tex.xy = clamp(output_tex.xy / 2.0f, float2(-1.0f,-1.0f),
float2(1.0f,1.0f));

// Hopefully GPUs that can support dynamic indexing will optimize this.
switch (texgen) {
case 0u: o.tex0 = output_tex; break;
case 1u: o.tex1 = output_tex; break;
case 2u: o.tex2 = output_tex; break;
}
}
// The number of colors available to TEV is determined by numColorChans.
// We have to provide the fields to match the interface, so set to zero
// if it's not enabled.
if (xfmem_numColorChans == 0u)
o.colors_0 = float4(0.0, 0.0, 0.0, 0.0);
if (xfmem_numColorChans <= 1u)
o.colors_1 = float4(0.0, 0.0, 0.0, 0.0);
o.clipPos = o.pos;
o.pos.z = o.pos.w * cpixelcenter.w - o.pos.z * cpixelcenter.z;
o.pos.z = o.pos.z * 2.0 - o.pos.w;
o.pos.xy *= sign(cpixelcenter.xy * float2(1.0, -1.0));
o.pos.xy = o.pos.xy - o.pos.w * cpixelcenter.xy;
if (o.pos.w == 1.0f)
{
float ss_pixel_x = ((o.pos.x + 1.0f) * (cviewport.x * 0.5f));
float ss_pixel_y = ((o.pos.y + 1.0f) * (cviewport.y * 0.5f));
ss_pixel_x = round(ss_pixel_x);
ss_pixel_y = round(ss_pixel_y);
o.pos.x = ((ss_pixel_x / (cviewport.x * 0.5f)) - 1.0f);
o.pos.y = ((ss_pixel_y / (cviewport.y * 0.5f)) - 1.0f);
}
tex0.xyz = o.tex0;
tex1.xyz = o.tex1;
tex2.xyz = o.tex2;
clipPos = o.clipPos;
colors_0 = o.colors_0;
colors_1 = o.colors_1;
gl_Position = o.pos;
}

#version 320 es

#define FORCE_EARLY_Z layout(early_fragment_tests) in

#define ATTRIBUTE_LOCATION(x)
#define FRAGMENT_OUTPUT_LOCATION(x)
#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y)
#define UBO_BINDING(packing, x) layout(packing, binding = x)
#define SAMPLER_BINDING(x) layout(binding = x)
#define TEXEL_BUFFER_BINDING(x) layout(binding = x)
#define SSBO_BINDING(x) layout(std430, binding = x)
#define IMAGE_BINDING(format, x) layout(format, binding = x)

#define VARYING_LOCATION(x)

#extension GL_ANDROID_extension_pack_es31a : enable

#extension GL_EXT_blend_func_extended : enable

#extension GL_EXT_shader_framebuffer_fetch: enable


#define FRAGMENT_INOUT inout

precision highp float;


precision highp int;
precision highp sampler2DArray;
precision highp usamplerBuffer;
precision highp sampler2DMS;
precision highp image2DArray;
#define API_OPENGL 1
#define float2 vec2
#define float3 vec3
#define float4 vec4
#define uint2 uvec2
#define uint3 uvec3
#define uint4 uvec4
#define int2 ivec2
#define int3 ivec3
#define int4 ivec4
#define frac fract
#define lerp mix
// Pixel UberShader for 3 texgens, early-depth, no dual-source blending
int idot(int3 x, int3 y)
{
int3 tmp = x * y;
return tmp.x + tmp.y + tmp.z;
}
int idot(int4 x, int4 y)
{
int4 tmp = x * y;
return tmp.x + tmp.y + tmp.z + tmp.w;
}

int iround(float x) { return int (round(x)); }


int2 iround(float2 x) { return int2(round(x)); }
int3 iround(float3 x) { return int3(round(x)); }
int4 iround(float4 x) { return int4(round(x)); }

SAMPLER_BINDING(0) uniform sampler2DArray samp[8];

UBO_BINDING(std140, 1) uniform PSBlock {


int4 color[4];
int4 k[4];
int4 alphaRef;
int4 texdim[8];
int4 czbias[2];
int4 cindscale[2];
int4 cindmtx[6];
int4 cfogcolor;
int4 cfogi;
float4 cfogf;
float4 cfogrange[3];
float4 czslope;
float2 cefbscale;
uint bpmem_genmode;
uint bpmem_alphaTest;
uint bpmem_fogParam3;
uint bpmem_fogRangeBase;
uint bpmem_dstalpha;
uint bpmem_ztex_op;
bool bpmem_late_ztest;
bool bpmem_rgba6_format;
bool bpmem_dither;
bool bpmem_bounding_box;
uint4 bpmem_pack1[16];
uint4 bpmem_pack2[8];
int4 konstLookup[32];
bool blend_enable;
uint blend_src_factor;
uint blend_src_factor_alpha;
uint blend_dst_factor;
uint blend_dst_factor_alpha;
bool blend_subtract;
bool blend_subtract_alpha;
bool logic_op_enable;
uint logic_op_mode;
};

#define bpmem_combiners(i) (bpmem_pack1[(i)].xy)


#define bpmem_tevind(i) (bpmem_pack1[(i)].z)
#define bpmem_iref(i) (bpmem_pack1[(i)].w)
#define bpmem_tevorder(i) (bpmem_pack2[(i)].x)
#define bpmem_tevksel(i) (bpmem_pack2[(i)].y)
#define samp_texmode0(i) (bpmem_pack2[(i)].z)
#define samp_texmode1(i) (bpmem_pack2[(i)].w)

int4 sampleTexture(uint texmap, in sampler2DArray tex, int2 uv, int layer) {


float size_s = float(texdim[texmap].x * 128);
float size_t = float(texdim[texmap].y * 128);
float3 coords = float3(float(uv.x) / size_s, float(uv.y) / size_t, layer);
uint texmode0 = samp_texmode0(texmap);
float lod_bias = float(bitfieldExtract(int(texmode0), 8, 16)) / 256.0f;
return iround(255.0 * texture(tex, coords, lod_bias));
}
FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 real_ocol0;
VARYING_LOCATION(0) in float4 colors_0;
VARYING_LOCATION(1) in float4 colors_1;
VARYING_LOCATION(2) in float3 tex0;
VARYING_LOCATION(3) in float3 tex1;
VARYING_LOCATION(4) in float3 tex2;
VARYING_LOCATION(5) in float4 clipPos;
int2 selectTexCoord(uint index, int2 fixpoint_uv0, int2 fixpoint_uv1, int2
fixpoint_uv2) {
if (index >= 3u) {
return fixpoint_uv0;
}
if (index < 2u) {
return (index == 0u) ? fixpoint_uv0 : fixpoint_uv1;
} else {
return fixpoint_uv2;
}
}

int4 sampleTextureWrapper(uint texmap, int2 uv, int layer) {


return sampleTexture(texmap, samp[texmap], uv, layer);
}

int4 Swizzle(uint s, int4 color) {


// AKA: Color Channel Swapping

int4 ret;
ret.r = color[bitfieldExtract(uint(bpmem_tevksel(s * 2u)), 0, 2)];
ret.g = color[bitfieldExtract(uint(bpmem_tevksel(s * 2u)), 2, 2)];
ret.b = color[bitfieldExtract(uint(bpmem_tevksel(s * 2u + 1u)), 0, 2)];
ret.a = color[bitfieldExtract(uint(bpmem_tevksel(s * 2u + 1u)), 2, 2)];
return ret;
}

int Wrap(int coord, uint mode) {


if (mode == 0u) // ITW_OFF
return coord;
else if (mode < 6u) // ITW_256 to ITW_16
return coord & (0xfffe >> mode);
else // ITW_0
return 0;
}

// TEV's Linear Interpolate, plus bias, add/subtract and scale


int tevLerp(int A, int B, int C, int D, uint bias, bool op, uint scale) {
// Scale C from 0..255 to 0..256
C += C >> 7;

// Add bias to D
if (bias == 1u) D += 128;
else if (bias == 2u) D -= 128;

int lerp = (A << 8) + (B - A)*C;


if (scale != 3u) {
lerp = lerp << scale;
D = D << scale;
}

// TODO: Is this rounding bias still added when the scale is divide by 2?
Currently we do not apply it.
if (scale != 3u)
lerp = lerp + (op ? 127 : 128);

int result = lerp >> 8;

// Add/Subtract D
if (op) // Subtract
result = D - result;
else // Add
result = D + result;

// Most of the Scale was moved inside the lerp for improved precision
// But we still do the divide by 2 here
if (scale == 3u)
result = result >> 1;
return result;
}

// TEV's Linear Interpolate, plus bias, add/subtract and scale


int3 tevLerp3(int3 A, int3 B, int3 C, int3 D, uint bias, bool op, uint scale) {
// Scale C from 0..255 to 0..256
C += C >> 7;

// Add bias to D
if (bias == 1u) D += 128;
else if (bias == 2u) D -= 128;

int3 lerp = (A << 8) + (B - A)*C;


if (scale != 3u) {
lerp = lerp << scale;
D = D << scale;
}

// TODO: Is this rounding bias still added when the scale is divide by 2?
Currently we do not apply it.
if (scale != 3u)
lerp = lerp + (op ? 127 : 128);

int3 result = lerp >> 8;

// Add/Subtract D
if (op) // Subtract
result = D - result;
else // Add
result = D + result;
// Most of the Scale was moved inside the lerp for improved precision
// But we still do the divide by 2 here
if (scale == 3u)
result = result >> 1;
return result;
}

// Implements operations 0-5 of TEV's compare mode,


// which are common to both color and alpha channels
bool tevCompare(uint op, int3 color_A, int3 color_B) {
switch (op) {
case 0u: // TevCompareMode::R8, TevComparison::GT
return (color_A.r > color_B.r);
case 1u: // TevCompareMode::R8, TevComparison::EQ
return (color_A.r == color_B.r);
case 2u: // TevCompareMode::GR16, TevComparison::GT
int A_16 = (color_A.r | (color_A.g << 8));
int B_16 = (color_B.r | (color_B.g << 8));
return A_16 > B_16;
case 3u: // TevCompareMode::GR16, TevComparison::EQ
return (color_A.r == color_B.r && color_A.g == color_B.g);
case 4u: // TevCompareMode::BGR24, TevComparison::GT
int A_24 = (color_A.r | (color_A.g << 8) | (color_A.b << 16));
int B_24 = (color_B.r | (color_B.g << 8) | (color_B.b << 16));
return A_24 > B_24;
case 5u: // TevCompareMode::BGR24, TevComparison::EQ
return (color_A.r == color_B.r && color_A.g == color_B.g && color_A.b ==
color_B.b);
default:
return false;
}
}

struct State {
int4 Reg[4];
int4 TexColor;
int AlphaBump;
};
struct StageState {
uint stage;
uint order;
uint cc;
uint ac;
};

int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1);


int4 getKonstColor(State s, StageState ss);

// Helper function for Alpha Test


bool alphaCompare(int a, int b, uint compare) {
if (compare < 4u) {
if (compare < 2u) {
if (compare < 1u) {
return false; // Never (0)
} else {
return a < b; // Less (1)
}
} else {
if (compare < 3u) {
return a == b; // Equal (2)
} else {
return a <= b; // LEqual (3)
}
}
} else {
if (compare < 6u) {
if (compare < 5u) {
return a > b; // Greater (4)
} else {
return a != b; // NEqual (5)
}
} else {
if (compare < 7u) {
return a >= b; // GEqual (6)
} else {
return true; // Always (7)
}
}
}
}

int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1,


uint index) {
if (index < 8u) {
if (index < 4u) {
if (index < 2u) {
if (index < 1u) {
return s.Reg[0].rgb; // prev.rgb (0)
} else {
return s.Reg[0].aaa; // prev.aaa (1)
}
} else {
if (index < 3u) {
return s.Reg[1].rgb; // c0.rgb (2)
} else {
return s.Reg[1].aaa; // c0.aaa (3)
}
}
} else {
if (index < 6u) {
if (index < 5u) {
return s.Reg[2].rgb; // c1.rgb (4)
} else {
return s.Reg[2].aaa; // c1.aaa (5)
}
} else {
if (index < 7u) {
return s.Reg[3].rgb; // c2.rgb (6)
} else {
return s.Reg[3].aaa; // c2.aaa (7)
}
}
}
} else {
if (index < 12u) {
if (index < 10u) {
if (index < 9u) {
return s.TexColor.rgb; // tex.rgb (8)
} else {
return s.TexColor.aaa; // tex.aaa (9)
}
} else {
if (index < 11u) {
return getRasColor(s, ss, colors_0, colors_1).rgb; // ras.rgb (10)
} else {
return getRasColor(s, ss, colors_0, colors_1).aaa; // ras.aaa (11)
}
}
} else {
if (index < 14u) {
if (index < 13u) {
return int3(255, 255, 255); // ONE (12)
} else {
return int3(128, 128, 128); // HALF (13)
}
} else {
if (index < 15u) {
return getKonstColor(s, ss).rgb; // konst.rgb (14)
} else {
return int3(0, 0, 0); // ZERO (15)
}
}
}
}
}

int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, uint


index) {
if (index < 4u) {
if (index < 2u) {
if (index < 1u) {
return s.Reg[0].a; // prev (0)
} else {
return s.Reg[1].a; // c0 (1)
}
} else {
if (index < 3u) {
return s.Reg[2].a; // c1 (2)
} else {
return s.Reg[3].a; // c2 (3)
}
}
} else {
if (index < 6u) {
if (index < 5u) {
return s.TexColor.a; // tex (4)
} else {
return getRasColor(s, ss, colors_0, colors_1).a; // ras (5)
}
} else {
if (index < 7u) {
return getKonstColor(s, ss).a; // konst (6)
} else {
return 0; // ZERO (7)
}
}
}
}

int4 getTevReg(in State s, uint index) {


if (index < 2u) {
if (index < 1u) {
return s.Reg[0]; // prev (0)
} else {
return s.Reg[1]; // c0 (1)
}
} else {
if (index < 3u) {
return s.Reg[2]; // c1 (2)
} else {
return s.Reg[3]; // c2 (3)
}
}
}

#define getTexCoord(index) selectTexCoord((index), fixpoint_uv0, fixpoint_uv1,


fixpoint_uv2)

FORCE_EARLY_Z;
void main()
{
float4 rawpos = gl_FragCoord;
#ifdef FB_FETCH_VALUE
float4 initial_ocol0 = FB_FETCH_VALUE;
#else
float4 initial_ocol0 = real_ocol0;
#endif
float4 ocol0;
float4 ocol1;
int layer = 0;
int3 tevcoord = int3(0, 0, 0);
State s;
s.TexColor = int4(0, 0, 0, 0);
s.AlphaBump = 0;

s.Reg[0] = color[0];
s.Reg[1] = color[1];
s.Reg[2] = color[2];
s.Reg[3] = color[3];
uint num_stages = bitfieldExtract(uint(bpmem_genmode), 10, 4);

// Main tev loop


for(uint stage = 0u; stage <= num_stages; stage++)
{
StageState ss;
ss.stage = stage;
ss.cc = bpmem_combiners(stage).x;
ss.ac = bpmem_combiners(stage).y;
ss.order = bpmem_tevorder(stage>>1);
if ((stage & 1u) == 1u)
ss.order = ss.order >> 12;

int2 fixpoint_uv0 = int2((tex0.z == 0.0 ? tex0.xy : tex0.xy / tex0.z) *


float2(texdim[0].zw * 128));
int2 fixpoint_uv1 = int2((tex1.z == 0.0 ? tex1.xy : tex1.xy / tex1.z) *
float2(texdim[1].zw * 128));
int2 fixpoint_uv2 = int2((tex2.z == 0.0 ? tex2.xy : tex2.xy / tex2.z) *
float2(texdim[2].zw * 128));

uint tex_coord = bitfieldExtract(uint(ss.order), 3, 3);


int2 fixedPoint_uv = getTexCoord(tex_coord);

bool texture_enabled = (ss.order & 64u) != 0u;

// Indirect textures
uint tevind = bpmem_tevind(stage);
if (tevind != 0u)
{
uint bs = bitfieldExtract(uint(tevind), 7, 2);
uint fmt = bitfieldExtract(uint(tevind), 2, 2);
uint bias = bitfieldExtract(uint(tevind), 4, 3);
uint bt = bitfieldExtract(uint(tevind), 0, 2);
uint matrix_index = bitfieldExtract(uint(tevind), 9, 2);
uint matrix_id = bitfieldExtract(uint(tevind), 11, 2);
int2 indtevtrans = int2(0, 0);

if (bpmem_iref(bt) != 0u) {
int3 indcoord;
{
uint iref = bpmem_iref(bt);
uint texcoord = bitfieldExtract(iref, 0, 3);
uint texmap = bitfieldExtract(iref, 8, 3);
int2 fixedPoint_uv = getTexCoord(texcoord);

if ((bt & 1u) == 0u)


fixedPoint_uv = fixedPoint_uv >> cindscale[bt >> 1].xy;
else
fixedPoint_uv = fixedPoint_uv >> cindscale[bt >> 1].zw;

indcoord = sampleTextureWrapper(texmap, fixedPoint_uv, layer).abg;


}
if (bs != 0u)
s.AlphaBump = indcoord[bs - 1u];
switch(fmt)
{
case 0x0u /* ITF_8 */:
indcoord.x = indcoord.x + ((bias & 1u) != 0u ? -128 : 0);
indcoord.y = indcoord.y + ((bias & 2u) != 0u ? -128 : 0);
indcoord.z = indcoord.z + ((bias & 4u) != 0u ? -128 : 0);
s.AlphaBump = s.AlphaBump & 0xf8;
break;
case 0x1u /* ITF_5 */:
indcoord.x = (indcoord.x >> 3) + ((bias & 1u) != 0u ? 1 : 0);
indcoord.y = (indcoord.y >> 3) + ((bias & 2u) != 0u ? 1 : 0);
indcoord.z = (indcoord.z >> 3) + ((bias & 4u) != 0u ? 1 : 0);
s.AlphaBump = s.AlphaBump << 5;
break;
case 0x2u /* ITF_4 */:
indcoord.x = (indcoord.x >> 4) + ((bias & 1u) != 0u ? 1 : 0);
indcoord.y = (indcoord.y >> 4) + ((bias & 2u) != 0u ? 1 : 0);
indcoord.z = (indcoord.z >> 4) + ((bias & 4u) != 0u ? 1 : 0);
s.AlphaBump = s.AlphaBump << 4;
break;
case 0x3u /* ITF_3 */:
indcoord.x = (indcoord.x >> 5) + ((bias & 1u) != 0u ? 1 : 0);
indcoord.y = (indcoord.y >> 5) + ((bias & 2u) != 0u ? 1 : 0);
indcoord.z = (indcoord.z >> 5) + ((bias & 4u) != 0u ? 1 : 0);
s.AlphaBump = s.AlphaBump << 3;
break;
}

// Matrix multiply
if (matrix_index != 0u)
{
uint mtxidx = 2u * (matrix_index - 1u);
int shift = cindmtx[mtxidx].w;

switch (matrix_id)
{
case 0u: // 3x2 S0.10 matrix
indtevtrans = int2(idot(cindmtx[mtxidx].xyz, indcoord),
idot(cindmtx[mtxidx + 1u].xyz, indcoord)) >> 3;
break;
case 1u: // S matrix, S17.7 format
indtevtrans = (fixedPoint_uv * indcoord.xx) >> 8;
break;
case 2u: // T matrix, S17.7 format
indtevtrans = (fixedPoint_uv * indcoord.yy) >> 8;
break;
}

if (shift >= 0)
indtevtrans = indtevtrans >> shift;
else
indtevtrans = indtevtrans << ((-shift) & 31);
}
}

// Wrapping
uint sw = bitfieldExtract(uint(tevind), 13, 3);
uint tw = bitfieldExtract(uint(tevind), 16, 3);
int2 wrapped_coord = int2(Wrap(fixedPoint_uv.x, sw), Wrap(fixedPoint_uv.y,
tw));

if ((tevind & 1048576u) != 0u) // add previous tevcoord


tevcoord.xy += wrapped_coord + indtevtrans;
else
tevcoord.xy = wrapped_coord + indtevtrans;

// Emulate s24 overflows


tevcoord.xy = (tevcoord.xy << 8) >> 8;
}
else
{
tevcoord.xy = fixedPoint_uv;
}

// Sample texture for stage


if (texture_enabled) {
uint sampler_num = bitfieldExtract(uint(ss.order), 0, 3);

int4 color = sampleTextureWrapper(sampler_num, tevcoord.xy, layer);


uint swap = bitfieldExtract(uint(ss.ac), 2, 2);
s.TexColor = Swizzle(swap, color);
} else {
// Texture is disabled
s.TexColor = int4(255, 255, 255, 255);
}

// This is the Meat of TEV


{
// Color Combiner
uint color_a = bitfieldExtract(uint(ss.cc), 12, 4);
uint color_b = bitfieldExtract(uint(ss.cc), 8, 4);
uint color_c = bitfieldExtract(uint(ss.cc), 4, 4);
uint color_d = bitfieldExtract(uint(ss.cc), 0, 4);
uint color_bias = bitfieldExtract(uint(ss.cc), 16, 2);
bool color_op = bool(bitfieldExtract(uint(ss.cc), 18, 1));
bool color_clamp = bool(bitfieldExtract(uint(ss.cc), 19, 1));
uint color_scale = bitfieldExtract(uint(ss.cc), 20, 2);
uint color_dest = bitfieldExtract(uint(ss.cc), 22, 2);
uint color_compare_op = color_scale << 1 | uint(color_op);

int3 color_A = selectColorInput(s, ss, colors_0, colors_1, color_a) &


int3(255, 255, 255);
int3 color_B = selectColorInput(s, ss, colors_0, colors_1, color_b) &
int3(255, 255, 255);
int3 color_C = selectColorInput(s, ss, colors_0, colors_1, color_c) &
int3(255, 255, 255);
int3 color_D = selectColorInput(s, ss, colors_0, colors_1, color_d); // 10
bits + sign

int3 color;
if (color_bias != 3u) { // Normal mode
color = tevLerp3(color_A, color_B, color_C, color_D, color_bias, color_op,
color_scale);
} else { // Compare mode
// op 6 and 7 do a select per color channel
if (color_compare_op == 6u) {
// TevCompareMode::RGB8, TevComparison::GT
color.r = (color_A.r > color_B.r) ? color_C.r : 0;
color.g = (color_A.g > color_B.g) ? color_C.g : 0;
color.b = (color_A.b > color_B.b) ? color_C.b : 0;
} else if (color_compare_op == 7u) {
// TevCompareMode::RGB8, TevComparison::EQ
color.r = (color_A.r == color_B.r) ? color_C.r : 0;
color.g = (color_A.g == color_B.g) ? color_C.g : 0;
color.b = (color_A.b == color_B.b) ? color_C.b : 0;
} else {
// The remaining ops do one compare which selects all 3 channels
color = tevCompare(color_compare_op, color_A, color_B) ? color_C :
int3(0, 0, 0);
}
color = color_D + color;
}

// Clamp result
if (color_clamp)
color = clamp(color, 0, 255);
else
color = clamp(color, -1024, 1023);
// Write result to the correct input register of the next stage
if (color_dest < 2u) {
if (color_dest < 1u) {
s.Reg[0].rgb = color; // prev (0)
} else {
s.Reg[1].rgb = color; // c0 (1)
}
} else {
if (color_dest < 3u) {
s.Reg[2].rgb = color; // c1 (2)
} else {
s.Reg[3].rgb = color; // c2 (3)
}
}

// Alpha Combiner
uint alpha_a = bitfieldExtract(uint(ss.ac), 13, 3);
uint alpha_b = bitfieldExtract(uint(ss.ac), 10, 3);
uint alpha_c = bitfieldExtract(uint(ss.ac), 7, 3);
uint alpha_d = bitfieldExtract(uint(ss.ac), 4, 3);
uint alpha_bias = bitfieldExtract(uint(ss.ac), 16, 2);
bool alpha_op = bool(bitfieldExtract(uint(ss.ac), 18, 1));
bool alpha_clamp = bool(bitfieldExtract(uint(ss.ac), 19, 1));
uint alpha_scale = bitfieldExtract(uint(ss.ac), 20, 2);
uint alpha_dest = bitfieldExtract(uint(ss.ac), 22, 2);
uint alpha_compare_op = alpha_scale << 1 | uint(alpha_op);

int alpha_A = 0;
int alpha_B = 0;
if (alpha_bias != 3u || alpha_compare_op > 5u) {
// Small optimisation here: alpha_A and alpha_B are unused by compare ops
0-5
alpha_A = selectAlphaInput(s, ss, colors_0, colors_1, alpha_a) & 255;
alpha_B = selectAlphaInput(s, ss, colors_0, colors_1, alpha_b) & 255;
};
int alpha_C = selectAlphaInput(s, ss, colors_0, colors_1, alpha_c) & 255;
int alpha_D = selectAlphaInput(s, ss, colors_0, colors_1, alpha_d); // 10
bits + sign

int alpha;
if (alpha_bias != 3u) { // Normal mode
alpha = tevLerp(alpha_A, alpha_B, alpha_C, alpha_D, alpha_bias, alpha_op,
alpha_scale);
} else { // Compare mode
if (alpha_compare_op == 6u) {
// TevCompareMode::A8, TevComparison::GT
alpha = (alpha_A > alpha_B) ? alpha_C : 0;
} else if (alpha_compare_op == 7u) {
// TevCompareMode::A8, TevComparison::EQ
alpha = (alpha_A == alpha_B) ? alpha_C : 0;
} else {
// All remaining alpha compare ops actually compare the color channels
alpha = tevCompare(alpha_compare_op, color_A, color_B) ? alpha_C : 0;
}
alpha = alpha_D + alpha;
}

// Clamp result
if (alpha_clamp)
alpha = clamp(alpha, 0, 255);
else
alpha = clamp(alpha, -1024, 1023);

// Write result to the correct input register of the next stage


if (alpha_dest < 2u) {
if (alpha_dest < 1u) {
s.Reg[0].a = alpha; // prev (0)
} else {
s.Reg[1].a = alpha; // c0 (1)
}
} else {
if (alpha_dest < 3u) {
s.Reg[2].a = alpha; // c1 (2)
} else {
s.Reg[3].a = alpha; // c2 (3)
}
}
}
} // Main TEV loop

int4 TevResult;
TevResult.xyz = getTevReg(s, bitfieldExtract(uint(bpmem_combiners(num_stages).x),
22, 2)).xyz;
TevResult.w = getTevReg(s, bitfieldExtract(uint(bpmem_combiners(num_stages).y),
22, 2)).w;
TevResult &= 255;

int zCoord = czbias[1].x + int((clipPos.z / clipPos.w) * float(czbias[1].y));


// Depth Texture
int early_zCoord = zCoord;
if (bpmem_ztex_op != 0u) {
int ztex = int(czbias[1].w); // fixed bias

// Whatever texture was in our last stage, it's now our depth texture
ztex += idot(s.TexColor.xyzw, czbias[0].xyzw);
ztex += (bpmem_ztex_op == 1u) ? zCoord : 0;
zCoord = ztex & 0xFFFFFF;
}

// Alpha Test
#define discard_fragment discard
if (bpmem_alphaTest != 0u) {
bool comp0 = alphaCompare(TevResult.a, alphaRef.r,
bitfieldExtract(uint(bpmem_alphaTest), 16, 3));
bool comp1 = alphaCompare(TevResult.a, alphaRef.g,
bitfieldExtract(uint(bpmem_alphaTest), 19, 3));

// These if statements are written weirdly to work around intel and Qualcomm
bugs with handling booleans.
switch (bitfieldExtract(uint(bpmem_alphaTest), 22, 2)) {
case 0u: // AND
if (comp0 && comp1) break; else discard_fragment; break;
case 1u: // OR
if (comp0 || comp1) break; else discard_fragment; break;
case 2u: // XOR
if (comp0 != comp1) break; else discard_fragment; break;
case 3u: // XNOR
if (comp0 == comp1) break; else discard_fragment; break;
}
}

// Hardware testing indicates that an alpha of 1 can pass an alpha test,


// but doesn't do anything in blending
if (TevResult.a == 1) TevResult.a = 0;
if (bpmem_dither) {
// Flipper uses a standard 2x2 Bayer Matrix for 6 bit dithering
// Here the matrix is encoded into the two factor constants
int2 dither = int2(rawpos.xy) & 1;
TevResult.rgb = (TevResult.rgb - (TevResult.rgb >> 6)) + abs(dither.y * 3 -
dither.x * 2);
}

// Fog
uint fog_function = bitfieldExtract(uint(bpmem_fogParam3), 21, 3);
if (fog_function != 0x0u /* Off (no fog) */) {
// TODO: This all needs to be converted from float to fixed point
float ze;
if (bitfieldExtract(uint(bpmem_fogParam3), 20, 1) == 0u) {
// perspective
// ze = A/(B - (Zs >> B_SHF)
ze = (cfogf.x * 16777216.0) / float(cfogi.y - (zCoord >> cfogi.w));
} else {
// orthographic
// ze = a*Zs (here, no B_SHF)
ze = cfogf.x * float(zCoord) / 16777216.0;
}

if (bool(bitfieldExtract(uint(bpmem_fogRangeBase), 10, 1))) {


// x_adjust = sqrt((x-center)^2 + k^2)/k
// ze *= x_adjust
float offset = (2.0 * (rawpos.x / cfogf.w)) - 1.0 - cfogf.z;
float floatindex = clamp(9.0 - abs(offset) * 9.0, 0.0, 9.0);
uint indexlower = uint(floatindex);
uint indexupper = indexlower + 1u;
float klower = cfogrange[indexlower >> 2u][indexlower & 3u];
float kupper = cfogrange[indexupper >> 2u][indexupper & 3u];
float k = lerp(klower, kupper, frac(floatindex));
float x_adjust = sqrt(offset * offset + k * k) / k;
ze *= x_adjust;
}

float fog = clamp(ze - cfogf.y, 0.0, 1.0);

if (fog_function >= 0x4u /* Exponential fog */) {


switch (fog_function) {
case 0x4u /* Exponential fog */:
fog = 1.0 - exp2(-8.0 * fog);
break;
case 0x5u /* Exponential-squared fog */:
fog = 1.0 - exp2(-8.0 * fog * fog);
break;
case 0x6u /* Backwards exponential fog */:
fog = exp2(-8.0 * (1.0 - fog));
break;
case 0x7u /* Backwards exponenential-sequared fog */:
fog = 1.0 - fog;
fog = exp2(-8.0 * fog * fog);
break;
}
}

int ifog = iround(fog * 256.0);


TevResult.rgb = (TevResult.rgb * (256 - ifog) + cfogcolor.rgb * ifog) >> 8;
}

// Logic Ops
if (logic_op_enable) {
int4 fb_value = iround(initial_ocol0 * 255.0); switch (logic_op_mode) {
case 0u: TevResult = int4(0, 0, 0, 0); break;
case 1u: TevResult = TevResult & fb_value; break;
case 2u: TevResult = TevResult & ~fb_value; break;
case 3u: TevResult = TevResult; break;
case 4u: TevResult = ~TevResult & fb_value; break;
case 5u: TevResult = fb_value; break;
case 6u: TevResult = TevResult ^ fb_value; break;
case 7u: TevResult = TevResult | fb_value; break;
case 8u: TevResult = ~(TevResult | fb_value); break;
case 9u: TevResult = ~(TevResult ^ fb_value); break;
case 10u: TevResult = ~fb_value; break;
case 11u: TevResult = TevResult | ~fb_value; break;
case 12u: TevResult = ~TevResult; break;
case 13u: TevResult = ~TevResult | fb_value; break;
case 14u: TevResult = ~(TevResult & fb_value); break;
case 15u: TevResult = int4(255, 255, 255, 255); break;
}
TevResult &= 0xff;
}
if (bpmem_rgba6_format)
ocol0.rgb = float3(TevResult.rgb >> 2) / 63.0;
else
ocol0.rgb = float3(TevResult.rgb) / 255.0;

if (bpmem_dstalpha != 0u)
ocol0.a = float(bitfieldExtract(uint(bpmem_dstalpha), 0, 8) >> 2) / 63.0;
else
ocol0.a = float(TevResult.a >> 2) / 63.0;

// Dest alpha override (dual source blending)


// Colors will be blended against the alpha from ocol1 and
// the alpha from ocol0 will be written to the framebuffer.
ocol1 = float4(0.0, 0.0, 0.0, float(TevResult.a) / 255.0);
if (blend_enable) {
float4 src_color;
if (bpmem_dstalpha != 0u) {
src_color = ocol1;
} else {
src_color = ocol0;
} float4 blend_src;
if (blend_src_factor < 4u) {
if (blend_src_factor < 2u) {
if (blend_src_factor < 1u) {
blend_src.rgb = float3(0,0,0); // 0 (0)
} else {
blend_src.rgb = float3(1,1,1); // 1 (1)
}
} else {
if (blend_src_factor < 3u) {
blend_src.rgb = initial_ocol0.rgb; // dst_color (2)
} else {
blend_src.rgb = float3(1,1,1) - initial_ocol0.rgb; // 1-dst_color (3)
}
}
} else {
if (blend_src_factor < 6u) {
if (blend_src_factor < 5u) {
blend_src.rgb = src_color.aaa; // src_alpha (4)
} else {
blend_src.rgb = float3(1,1,1) - src_color.aaa; // 1-src_alpha (5)
}
} else {
if (blend_src_factor < 7u) {
blend_src.rgb = initial_ocol0.aaa; // dst_alpha (6)
} else {
blend_src.rgb = float3(1,1,1) - initial_ocol0.aaa; // 1-dst_alpha (7)
}
}
}
if (blend_src_factor_alpha < 4u) {
if (blend_src_factor_alpha < 2u) {
if (blend_src_factor_alpha < 1u) {
blend_src.a = 0.0; // 0 (0)
} else {
blend_src.a = 1.0; // 1 (1)
}
} else {
if (blend_src_factor_alpha < 3u) {
blend_src.a = initial_ocol0.a; // dst_color (2)
} else {
blend_src.a = 1.0 - initial_ocol0.a; // 1-dst_color (3)
}
}
} else {
if (blend_src_factor_alpha < 6u) {
if (blend_src_factor_alpha < 5u) {
blend_src.a = src_color.a; // src_alpha (4)
} else {
blend_src.a = 1.0 - src_color.a; // 1-src_alpha (5)
}
} else {
if (blend_src_factor_alpha < 7u) {
blend_src.a = initial_ocol0.a; // dst_alpha (6)
} else {
blend_src.a = 1.0 - initial_ocol0.a; // 1-dst_alpha (7)
}
}
}
float4 blend_dst;
if (blend_dst_factor < 4u) {
if (blend_dst_factor < 2u) {
if (blend_dst_factor < 1u) {
blend_dst.rgb = float3(0,0,0); // 0 (0)
} else {
blend_dst.rgb = float3(1,1,1); // 1 (1)
}
} else {
if (blend_dst_factor < 3u) {
blend_dst.rgb = ocol0.rgb; // src_color (2)
} else {
blend_dst.rgb = float3(1,1,1) - ocol0.rgb; // 1-src_color (3)
}
}
} else {
if (blend_dst_factor < 6u) {
if (blend_dst_factor < 5u) {
blend_dst.rgb = src_color.aaa; // src_alpha (4)
} else {
blend_dst.rgb = float3(1,1,1) - src_color.aaa; // 1-src_alpha (5)
}
} else {
if (blend_dst_factor < 7u) {
blend_dst.rgb = initial_ocol0.aaa; // dst_alpha (6)
} else {
blend_dst.rgb = float3(1,1,1) - initial_ocol0.aaa; // 1-dst_alpha (7)
}
}
}
if (blend_dst_factor_alpha < 4u) {
if (blend_dst_factor_alpha < 2u) {
if (blend_dst_factor_alpha < 1u) {
blend_dst.a = 0.0; // 0 (0)
} else {
blend_dst.a = 1.0; // 1 (1)
}
} else {
if (blend_dst_factor_alpha < 3u) {
blend_dst.a = ocol0.a; // src_color (2)
} else {
blend_dst.a = 1.0 - ocol0.a; // 1-src_color (3)
}
}
} else {
if (blend_dst_factor_alpha < 6u) {
if (blend_dst_factor_alpha < 5u) {
blend_dst.a = src_color.a; // src_alpha (4)
} else {
blend_dst.a = 1.0 - src_color.a; // 1-src_alpha (5)
}
} else {
if (blend_dst_factor_alpha < 7u) {
blend_dst.a = initial_ocol0.a; // dst_alpha (6)
} else {
blend_dst.a = 1.0 - initial_ocol0.a; // 1-dst_alpha (7)
}
}
}
float4 blend_result;
if (blend_subtract)
blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb - ocol0.rgb *
blend_src.rgb;
else
blend_result.rgb = initial_ocol0.rgb * blend_dst.rgb + ocol0.rgb *
blend_src.rgb;
if (blend_subtract_alpha)
blend_result.a = initial_ocol0.a * blend_dst.a - ocol0.a * blend_src.a;
else
blend_result.a = initial_ocol0.a * blend_dst.a + ocol0.a * blend_src.a;
real_ocol0 = blend_result;
} else {
real_ocol0 = ocol0;
}
}

int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1) {


// Select Ras for stage
uint ras = bitfieldExtract(uint(ss.order), 7, 3);
if (ras < 2u) { // Lighting Channel 0 or 1
int4 color = iround(((ras == 0u) ? colors_0 : colors_1) * 255.0);
uint swap = bitfieldExtract(uint(ss.ac), 0, 2);
return Swizzle(swap, color);
} else if (ras == 5u) { // Alpha Bumb
return int4(s.AlphaBump, s.AlphaBump, s.AlphaBump, s.AlphaBump);
} else if (ras == 6u) { // Normalzied Alpha Bump
int normalized = s.AlphaBump | s.AlphaBump >> 5;
return int4(normalized, normalized, normalized, normalized);
} else {
return int4(0, 0, 0, 0);
}
}

int4 getKonstColor(State s, StageState ss) {


// Select Konst for stage
// TODO: a switch case might be better here than an dynamically // indexed
uniform lookup
uint tevksel = bpmem_tevksel(ss.stage>>1);
if ((ss.stage & 1u) == 0u)
return int4(konstLookup[bitfieldExtract(uint(tevksel), 4, 5)].rgb,
konstLookup[bitfieldExtract(uint(tevksel), 9, 5)].a);
else
return int4(konstLookup[bitfieldExtract(uint(tevksel), 14, 5)].rgb,
konstLookup[bitfieldExtract(uint(tevksel), 19, 5)].a);
}

Dolphin Version: Dolphin 2.0-15108-121


Video Backend: OpenGL ES

You might also like