diff --git a/data/dynos_bin_gfx.cpp b/data/dynos_bin_gfx.cpp index 45bb6384..dbd520ac 100644 --- a/data/dynos_bin_gfx.cpp +++ b/data/dynos_bin_gfx.cpp @@ -313,6 +313,9 @@ s64 DynOS_Gfx_ParseGfxConstants(const String& _Arg, bool* found) { gfx_constant(CAP); gfx_constant(METAL); + // Extended + gfx_constant(G_LIGHT_MAP_EXT); + // Common values gfx_constant(CALC_DXT(4,G_IM_SIZ_4b_BYTES)); gfx_constant(CALC_DXT(8,G_IM_SIZ_4b_BYTES)); diff --git a/developer/dx.sh b/developer/dx.sh index 7d9f01db..625abc74 100755 --- a/developer/dx.sh +++ b/developer/dx.sh @@ -1 +1 @@ -make RENDER_API=D3D11 WINDOW_API=DXGI DEBUG=1 DEVELOPMENT=1 && ./build/us_pc/sm64.us.f3dex2e.exe +make RENDER_API=D3D12 WINDOW_API=DXGI DEBUG=1 DEVELOPMENT=1 -j && ./build/us_pc/sm64.us.f3dex2e.exe diff --git a/include/PR/gbi.h b/include/PR/gbi.h index 929bc22f..32ac0cd4 100644 --- a/include/PR/gbi.h +++ b/include/PR/gbi.h @@ -21,7 +21,7 @@ #define _GBI_H_ #include -#include "src/pc/djui/djui_gbi.h" +#include "gbi_extension.h" /* * To use the F3DEX ucodes, define F3DEX_GBI before include this file. diff --git a/src/pc/djui/djui_gbi.h b/include/PR/gbi_extension.h similarity index 95% rename from src/pc/djui/djui_gbi.h rename to include/PR/gbi_extension.h index 45c6722d..4be217f0 100644 --- a/src/pc/djui/djui_gbi.h +++ b/include/PR/gbi_extension.h @@ -1,5 +1,15 @@ #pragma once +/////////////////////// +// G_SETGEOMETRYMODE // +/////////////////////// + +#define G_LIGHT_MAP_EXT 0x00000800 + +////////// +// DJUI // +////////// + #define G_TEXCLIP_DJUI 0xe1 #define G_TEXOVERRIDE_DJUI 0xe0 #define G_DJUI_SIMPLE_VERT 0x11 diff --git a/src/game/behaviors/bowser_puzzle_piece.inc.c b/src/game/behaviors/bowser_puzzle_piece.inc.c index c6eb6338..7d57014f 100644 --- a/src/game/behaviors/bowser_puzzle_piece.inc.c +++ b/src/game/behaviors/bowser_puzzle_piece.inc.c @@ -181,9 +181,10 @@ void bhv_lll_bowser_puzzle_piece_action_1(void) { */ void bhv_lll_bowser_puzzle_piece_update(void) { s8* nextAction = o->oBowserPuzzlePieceNextAction; + if (!nextAction) { return; } // If Mario is standing on this puzzle piece, set a flag in the parent. - if (cur_obj_is_any_player_on_platform()) + if (cur_obj_is_any_player_on_platform() && o->parentObj) o->parentObj->oBowserPuzzleCompletionFlags = 1; // If we should advance to the next action... @@ -199,7 +200,9 @@ void bhv_lll_bowser_puzzle_piece_update(void) { // If we're at the end of the list... if (*nextAction == -1) { // Set the other completion flag in the parent. - o->parentObj->oBowserPuzzleCompletionFlags |= 2; + if (o->parentObj) { + o->parentObj->oBowserPuzzleCompletionFlags |= 2; + } // The next action is the first action in the list again. o->oBowserPuzzlePieceNextAction = o->oBowserPuzzlePieceActionList; diff --git a/src/game/rendering_graph_node.c b/src/game/rendering_graph_node.c index ca3a45aa..767ffb1e 100644 --- a/src/game/rendering_graph_node.c +++ b/src/game/rendering_graph_node.c @@ -561,7 +561,9 @@ static void geo_process_camera(struct GraphNodeCamera *node) { if (!increment_mat_stack()) { return; } // save the camera matrix - mtxf_copy(gCamera->mtx, gMatStack[gMatStackIndex]); + if (gCamera) { + mtxf_copy(gCamera->mtx, gMatStack[gMatStackIndex]); + } if (node->fnNode.node.children != 0) { gCurGraphNodeCamera = node; diff --git a/src/pc/crash_handler.c b/src/pc/crash_handler.c index 757b9270..aaa58032 100644 --- a/src/pc/crash_handler.c +++ b/src/pc/crash_handler.c @@ -688,17 +688,17 @@ struct PcDebug gPcDebug = { .tags = { 0x0000000000000000, 0x000000000000FFFF, - 0x440C28A5CC404F11, - 0x2783114DDB90E597, - 0x0EF4AF18EEC1303A, - 0x5E6A9446709E7CFF, - 0x914FA1C52D410003, - 0xE9A402C28144FD8B, - 0x83B8B87B1E6A0B78, - 0xEE7B0ED661ABA0ED, + 0x2D1D50FB02617949, + 0x8AEB7180FAE739EB, + 0x0CDB1A233CC71057, + 0x53D5D9880C8B278E, + 0xE8E307BE5802542E, + 0x8A3ACC4FDB4FFE45, + 0x09046C2BA3C5000D, + 0xF027964ADE989C29, 0x076CF19655C70007, - 0x9325E55A037D6511, - 0x77ACD7B422D978A6, + 0x440C28A5CC404F11, + 0xE9A402C28144FD8B, 0x9A2269E87B26BE68, }, .id = DEFAULT_ID, diff --git a/src/pc/gfx/gfx_cc.c b/src/pc/gfx/gfx_cc.c index bfdce374..7fe03500 100644 --- a/src/pc/gfx/gfx_cc.c +++ b/src/pc/gfx/gfx_cc.c @@ -1,41 +1,294 @@ +#include +#include "PR/gbi.h" #include "gfx_cc.h" +#include "gfx_pc.h" -void gfx_cc_get_features(uint32_t shader_id, struct CCFeatures *cc_features) { - for (int32_t i = 0; i < 4; i++) { - cc_features->c[0][i] = (shader_id >> (i * 3)) & 7; - cc_features->c[1][i] = (shader_id >> (12 + i * 3)) & 7; +static u8 sAllowCCPrint = 1; + +void gfx_cc_get_features(struct ColorCombiner* cc, struct CCFeatures* ccf) { + // reset ccf + memset(ccf, 0, sizeof(struct CCFeatures)); + + int cmd_length = cc->cm.use_2cycle ? 16 : 8; + for (int i = 0; i < cmd_length; i++) { + u8 c = cc->shader_commands[i]; + if (c >= SHADER_INPUT_1 && c <= SHADER_INPUT_8) { + if (c > ccf->num_inputs) { ccf->num_inputs = c; } + } + ccf->used_textures[0] = ccf->used_textures[0] || c == SHADER_TEXEL0 || c == SHADER_TEXEL0A; + ccf->used_textures[1] = ccf->used_textures[1] || c == SHADER_TEXEL1 || c == SHADER_TEXEL1A; } - cc_features->opt_alpha = (shader_id & SHADER_OPT_ALPHA) != 0; - cc_features->opt_fog = (shader_id & SHADER_OPT_FOG) != 0; - cc_features->opt_texture_edge = (shader_id & SHADER_OPT_TEXTURE_EDGE) != 0; - cc_features->opt_noise = (shader_id & SHADER_OPT_NOISE) != 0; + // figure out optimizations + for (int i = 0; i < 16 / 4; i++) { + u8* c = &cc->shader_commands[i * 4]; + ccf->do_single[i] = (c[2] == 0); + ccf->do_multiply[i] = (c[1] == 0 && c[3] == 0); + ccf->do_mix[i] = (c[1] == c[3]); + } - cc_features->used_textures[0] = false; - cc_features->used_textures[1] = false; - cc_features->num_inputs = 0; + ccf->color_alpha_same[0] = 1; + ccf->color_alpha_same[1] = 1; - for (int32_t i = 0; i < 2; i++) { - for (int32_t j = 0; j < 4; j++) { - if (cc_features->c[i][j] >= SHADER_INPUT_1 && cc_features->c[i][j] <= SHADER_INPUT_4) { - if (cc_features->c[i][j] > cc_features->num_inputs) { - cc_features->num_inputs = cc_features->c[i][j]; - } - } - if (cc_features->c[i][j] == SHADER_TEXEL0 || cc_features->c[i][j] == SHADER_TEXEL0A) { - cc_features->used_textures[0] = true; - } - if (cc_features->c[i][j] == SHADER_TEXEL1) { - cc_features->used_textures[1] = true; + for (int i = 0; i < 2; i++) { + u8* cmd = &cc->shader_commands[i * 8]; + for (int j = 0; j < 4; j++) { + if (cmd[j] != cmd[j + 4]) { + ccf->color_alpha_same[i] = 0; + break; } } } - - cc_features->do_single[0] = cc_features->c[0][2] == 0; - cc_features->do_single[1] = cc_features->c[1][2] == 0; - cc_features->do_multiply[0] = cc_features->c[0][1] == 0 && cc_features->c[0][3] == 0; - cc_features->do_multiply[1] = cc_features->c[1][1] == 0 && cc_features->c[1][3] == 0; - cc_features->do_mix[0] = cc_features->c[0][1] == cc_features->c[0][3]; - cc_features->do_mix[1] = cc_features->c[1][1] == cc_features->c[1][3]; - cc_features->color_alpha_same = (shader_id & 0xfff) == ((shader_id >> 12) & 0xfff); +} + +void gfx_cc_print(struct ColorCombiner *cc) { + if (!sAllowCCPrint) { return; } +#ifdef DEVELOPMENT + printf("\n>> gfx_pc_precomp_shader("); + + struct CombineMode* cm = &cc->cm; + printf("0x%08x, ", cm->rgb1); + printf("0x%08x, ", cm->alpha1); + printf("0x%08x, ", cm->rgb2); + printf("0x%08x, ", cm->alpha2); + printf("0x%08x", cm->flags); + + printf(");"); + printf(" // %016lx", cm->hash); + printf("\n"); +#endif +} + +void gfx_cc_precomp(void) { + sAllowCCPrint = 0; + + gfx_pc_precomp_shader(0x00030001, 0x02000000, 0x000a0004, 0x0a000b0b, 0x00000011); // 741f2ad014006ca1 + gfx_pc_precomp_shader(0x00040001, 0x00010005, 0x00040002, 0x0b020b05, 0x00000001); // 110404410ba7b38b + gfx_pc_precomp_shader(0x00040001, 0x00030001, 0x00040002, 0x0b030b02, 0x00000001); // 110404410be9b307 + gfx_pc_precomp_shader(0x00040001, 0x00040001, 0x00040002, 0x0b040b02, 0x00000000); // 0084002100596a45 + gfx_pc_precomp_shader(0x00040001, 0x00040001, 0x00040002, 0x0b040b02, 0x00000001); // 110404410c0ab307 + gfx_pc_precomp_shader(0x00040001, 0x00040001, 0x00040002, 0x0b040b02, 0x00000005); // 110404410c0ab30b + gfx_pc_precomp_shader(0x00040001, 0x00050001, 0x00040002, 0x0b050b02, 0x00000001); // 110404410c2bb307 + gfx_pc_precomp_shader(0x00040001, 0x00050001, 0x00040002, 0x0b050b02, 0x00000009); // 110404410c2bb30f + gfx_pc_precomp_shader(0x00040001, 0x01000000, 0x00040002, 0x02000b0b, 0x00000001); // 110404412c86b2e6 + gfx_pc_precomp_shader(0x00040001, 0x04000000, 0x00040002, 0x04000b0b, 0x00000001); // 110404418f86b2e6 + gfx_pc_precomp_shader(0x00040001, 0x04000000, 0x0a000000, 0x0a000b0b, 0x00000013); // ec161ae4d6006ca3 + gfx_pc_precomp_shader(0x00040001, 0x05000000, 0x00040002, 0x05000b0b, 0x00000001); // 11040441b086b2e6 + gfx_pc_precomp_shader(0x00040001, 0x05000000, 0x00040002, 0x05000b0b, 0x00000009); // 11040441b086b2ee + gfx_pc_precomp_shader(0x00040001, 0x05000000, 0x0a000000, 0x0a000b0b, 0x00000013); // ec161b7137006ca3 + gfx_pc_precomp_shader(0x00050001, 0x00050001, 0x00050002, 0x0b050b02, 0x00000001); // 154504410c2bb307 + gfx_pc_precomp_shader(0x00050004, 0x00050004, 0x00050004, 0x0b050b04, 0x00000001); // 154511040c2bb36a + gfx_pc_precomp_shader(0x01000000, 0x00050001, 0x02000000, 0x0b050b02, 0x00000001); // 410000000c2bb307 + gfx_pc_precomp_shader(0x01000000, 0x01000000, 0x02000000, 0x02000000, 0x00000001); // 410000002c86b2e6 + gfx_pc_precomp_shader(0x01000000, 0x01000000, 0x02000000, 0x02000b0b, 0x00000005); // 410000002c86b2ea + gfx_pc_precomp_shader(0x01000000, 0x01000000, 0x0a000000, 0x0a000b0b, 0x00000017); // 0b0000beb3006ca7 + gfx_pc_precomp_shader(0x01000000, 0x04000000, 0x02000000, 0x04000b0b, 0x00000000); // 2100000000596a45 + gfx_pc_precomp_shader(0x01000000, 0x04000000, 0x0a000000, 0x0a000b0b, 0x00000013); // 0b000263d6006ca3 + gfx_pc_precomp_shader(0x01000000, 0x05000000, 0x02000000, 0x05000b0b, 0x00000001); // 41000000b086b2e6 + gfx_pc_precomp_shader(0x01000000, 0x05000000, 0x0a000000, 0x0a000b0b, 0x00000013); // 0b0002f037006ca3 + gfx_pc_precomp_shader(0x01070102, 0x01000102, 0x0a000000, 0x04000b0b, 0x00000011); // c8c3b2bfed8de663 + gfx_pc_precomp_shader(0x04000000, 0x04000000, 0x04000000, 0x04000000, 0x00000001); // 040000008f86b2e6 + gfx_pc_precomp_shader(0x04000000, 0x04000000, 0x04000000, 0x04000b0b, 0x00000000); // 8400000000596a45 + gfx_pc_precomp_shader(0x04000000, 0x04000000, 0x04000000, 0x04000b0b, 0x00000005); // 040000008f86b2ea + gfx_pc_precomp_shader(0x04000000, 0x04000000, 0x0a000000, 0x0a000b0b, 0x00000013); // 8e000263d6006ca3 + gfx_pc_precomp_shader(0x04000000, 0x05000000, 0x04000000, 0x05000b0b, 0x00000001); // 04000000b086b2e6 + gfx_pc_precomp_shader(0x04000000, 0x05000000, 0x04000000, 0x05000b0b, 0x00000005); // 04000000b086b2ea + gfx_pc_precomp_shader(0x04000000, 0x05000000, 0x04000000, 0x05000b0b, 0x00000009); // 04000000b086b2ee + gfx_pc_precomp_shader(0x04060401, 0x04000000, 0x04060402, 0x04000b0b, 0x00000000); // 84c6842100596a45 + gfx_pc_precomp_shader(0x04060401, 0x05000000, 0x04060402, 0x05000b0b, 0x00000001); // 1d970841b086b2e6 + gfx_pc_precomp_shader(0x01000000, 0x04000000, 0x02000000, 0x04000b0b, 0x00000001); // 410000008f86b2e6 + + sAllowCCPrint = 1; +} + + +static uint8_t color_comb_component_a(uint32_t v, uint8_t cycle) { + switch (v) { + case G_CCMUX_COMBINED: return cycle ? CC_COMBINED : CC_0; + case G_CCMUX_TEXEL0: return cycle ? CC_TEXEL1 : CC_TEXEL0; + case G_CCMUX_TEXEL1: return cycle ? CC_TEXEL0 : CC_TEXEL1; + case G_CCMUX_PRIMITIVE: return CC_PRIM; + case G_CCMUX_SHADE: return CC_SHADE; + case G_CCMUX_ENVIRONMENT: return CC_ENV; + case G_CCMUX_1: return CC_1; + //case G_CCMUX_NOISE: return CC_NOISE; + case G_CCMUX_0: return CC_0; + + case G_CCMUX_COMBINED_ALPHA: return cycle ? CC_COMBINEDA : CC_0; + case G_CCMUX_TEXEL0_ALPHA: return cycle ? CC_TEXEL1A : CC_TEXEL0A; + case G_CCMUX_TEXEL1_ALPHA: return cycle ? CC_TEXEL0A : CC_TEXEL1A; + case G_CCMUX_PRIMITIVE_ALPHA: return CC_PRIMA; + case G_CCMUX_SHADE_ALPHA: return CC_SHADEA; + case G_CCMUX_ENV_ALPHA: return CC_ENVA; + + default: return CC_0; + } +} + +static uint8_t color_comb_component_b(uint32_t v, uint8_t cycle) { + switch (v) { + case G_CCMUX_COMBINED: return cycle ? CC_COMBINED : CC_0; + case G_CCMUX_TEXEL0: return cycle ? CC_TEXEL1 : CC_TEXEL0; + case G_CCMUX_TEXEL1: return cycle ? CC_TEXEL0 : CC_TEXEL1; + case G_CCMUX_PRIMITIVE: return CC_PRIM; + case G_CCMUX_SHADE: return CC_SHADE; + case G_CCMUX_ENVIRONMENT: return CC_ENV; + //case G_CCMUX_CENTER: return CC_CENTER; // is this correct for "Chrome Key Center"? + //case G_CCMUX_K4: return CC_K4; + case G_CCMUX_0: return CC_0; + + case G_CCMUX_COMBINED_ALPHA: return cycle ? CC_COMBINEDA : CC_0; + case G_CCMUX_TEXEL0_ALPHA: return cycle ? CC_TEXEL1A : CC_TEXEL0A; + case G_CCMUX_TEXEL1_ALPHA: return cycle ? CC_TEXEL0A : CC_TEXEL1A; + case G_CCMUX_PRIMITIVE_ALPHA: return CC_PRIMA; + case G_CCMUX_SHADE_ALPHA: return CC_SHADEA; + case G_CCMUX_ENV_ALPHA: return CC_ENVA; + + default: return CC_0; + } +} + +static uint8_t color_comb_component_c(uint32_t v, uint8_t cycle) { + switch (v) { + case G_CCMUX_COMBINED: return cycle ? CC_COMBINED : CC_0; + case G_CCMUX_TEXEL0: return cycle ? CC_TEXEL1 : CC_TEXEL0; + case G_CCMUX_TEXEL1: return cycle ? CC_TEXEL0 : CC_TEXEL1; + case G_CCMUX_PRIMITIVE: return CC_PRIM; + case G_CCMUX_SHADE: return CC_SHADE; + case G_CCMUX_ENVIRONMENT: return CC_ENV; + //case G_CCMUX_CENTER: return CC_CENTER; // is this correct for "Chrome Key Center"? + case G_CCMUX_COMBINED_ALPHA: return cycle ? CC_COMBINEDA : CC_0; + case G_CCMUX_TEXEL0_ALPHA: return CC_TEXEL0A; + case G_CCMUX_TEXEL1_ALPHA: return CC_TEXEL1A; + case G_CCMUX_PRIMITIVE_ALPHA: return CC_PRIMA; + case G_CCMUX_SHADE_ALPHA: return CC_SHADEA; + case G_CCMUX_ENV_ALPHA: return CC_ENVA; + case G_CCMUX_LOD_FRACTION: return CC_LOD; + //case G_CCMUX_PRIM_LOD_FRAC: return CC_PRIM_LOD_FRACTION; + //case G_CCMUX_K5: return CC_K5; + case G_CCMUX_0: return CC_0; + default: return CC_0; + } +} + +static uint8_t color_comb_component_d(uint32_t v, uint8_t cycle) { + switch (v) { + case G_CCMUX_COMBINED: return cycle ? CC_COMBINED : CC_0; + case G_CCMUX_TEXEL0: return cycle ? CC_TEXEL1 : CC_TEXEL0; + case G_CCMUX_TEXEL1: return cycle ? CC_TEXEL0 : CC_TEXEL1; + case G_CCMUX_PRIMITIVE: return CC_PRIM; + case G_CCMUX_SHADE: return CC_SHADE; + case G_CCMUX_ENVIRONMENT: return CC_ENV; + case G_CCMUX_1: return CC_1; + case G_CCMUX_0: return CC_0; + + case G_CCMUX_TEXEL0_ALPHA: return cycle ? CC_TEXEL1A : CC_TEXEL0A; + case G_CCMUX_TEXEL1_ALPHA: return cycle ? CC_TEXEL0A : CC_TEXEL1A; + case G_CCMUX_PRIMITIVE_ALPHA: return CC_PRIMA; + case G_CCMUX_SHADE_ALPHA: return CC_SHADEA; + case G_CCMUX_ENV_ALPHA: return CC_ENVA; + + default: return CC_0; + } +} + +uint32_t color_comb_rgb(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint8_t cycle) { + return color_comb_component_a(a, cycle) + | (color_comb_component_b(b, cycle) << 8) + | (color_comb_component_c(c, cycle) << 16) + | (color_comb_component_d(d, cycle) << 24); +} + +static uint8_t color_comb_component_a_alpha(uint32_t v, uint8_t cycle) { + switch (v) { + case G_CCMUX_COMBINED_ALPHA: return cycle ? CC_COMBINEDA : CC_0; + case G_CCMUX_TEXEL0_ALPHA: return cycle ? CC_TEXEL1A : CC_TEXEL0A; + case G_CCMUX_TEXEL1_ALPHA: return cycle ? CC_TEXEL0A : CC_TEXEL1A; + case G_CCMUX_PRIMITIVE_ALPHA: return CC_PRIMA; + case G_CCMUX_SHADE_ALPHA: return CC_SHADEA; + case G_CCMUX_ENV_ALPHA: return CC_ENVA; + case G_CCMUX_1: return CC_1; + case G_CCMUX_0: return CC_0; + + case G_CCMUX_COMBINED: return cycle ? CC_COMBINED : CC_0; + case G_CCMUX_TEXEL0: return cycle ? CC_TEXEL1 : CC_TEXEL0; + case G_CCMUX_TEXEL1: return cycle ? CC_TEXEL0 : CC_TEXEL1; + case G_CCMUX_PRIMITIVE: return CC_PRIM; + case G_CCMUX_SHADE: return CC_SHADE; + case G_CCMUX_ENVIRONMENT: return CC_ENV; + + default: return CC_0; + } +} + +static uint8_t color_comb_component_b_alpha(uint32_t v, uint8_t cycle) { + switch (v) { + case G_CCMUX_COMBINED_ALPHA: return cycle ? CC_COMBINEDA : CC_0; + case G_CCMUX_TEXEL0_ALPHA: return cycle ? CC_TEXEL1A : CC_TEXEL0A; + case G_CCMUX_TEXEL1_ALPHA: return cycle ? CC_TEXEL0A : CC_TEXEL1A; + case G_CCMUX_PRIMITIVE_ALPHA: return CC_PRIMA; + case G_CCMUX_SHADE_ALPHA: return CC_SHADEA; + case G_CCMUX_ENV_ALPHA: return CC_ENVA; + case G_CCMUX_1: return CC_1; + case G_CCMUX_0: return CC_0; + + case G_CCMUX_COMBINED: return cycle ? CC_COMBINED : CC_0; + case G_CCMUX_TEXEL0: return cycle ? CC_TEXEL1 : CC_TEXEL0; + case G_CCMUX_TEXEL1: return cycle ? CC_TEXEL0 : CC_TEXEL1; + case G_CCMUX_PRIMITIVE: return CC_PRIM; + case G_CCMUX_SHADE: return CC_SHADE; + case G_CCMUX_ENVIRONMENT: return CC_ENV; + + default: return CC_0; + } +} + +static uint8_t color_comb_component_c_alpha(uint32_t v, uint8_t cycle) { + switch (v) { + case G_CCMUX_LOD_FRACTION: return CC_LOD; + case G_CCMUX_TEXEL0_ALPHA: return cycle ? CC_TEXEL1A : CC_TEXEL0A; + case G_CCMUX_TEXEL1_ALPHA: return cycle ? CC_TEXEL1A : CC_TEXEL1A; + case G_CCMUX_PRIMITIVE_ALPHA: return CC_PRIMA; + case G_CCMUX_SHADE_ALPHA: return CC_SHADEA; + case G_CCMUX_ENV_ALPHA: return CC_ENVA; + //case G_CCMUX_PRIM_LOD_FRAC: return CC_PRIM_LOD_FRACTION; + case G_CCMUX_0: return CC_0; + + case G_CCMUX_TEXEL0: return cycle ? CC_TEXEL1 : CC_TEXEL0; + case G_CCMUX_TEXEL1: return cycle ? CC_TEXEL0 : CC_TEXEL1; + case G_CCMUX_PRIMITIVE: return CC_PRIM; + case G_CCMUX_SHADE: return CC_SHADE; + case G_CCMUX_ENVIRONMENT: return CC_ENV; + + default: return CC_0; + } +} + +static uint8_t color_comb_component_d_alpha(uint32_t v, uint8_t cycle) { + switch (v) { + case G_CCMUX_COMBINED_ALPHA: return cycle ? CC_COMBINEDA : CC_0; + case G_CCMUX_TEXEL0_ALPHA: return cycle ? CC_TEXEL1A : CC_TEXEL0A; + case G_CCMUX_TEXEL1_ALPHA: return cycle ? CC_TEXEL0A : CC_TEXEL1A; + case G_CCMUX_PRIMITIVE_ALPHA: return CC_PRIMA; + case G_CCMUX_SHADE_ALPHA: return CC_SHADEA; + case G_CCMUX_ENV_ALPHA: return CC_ENVA; + case G_CCMUX_1: return CC_1; + case G_CCMUX_0: return CC_0; + + case G_CCMUX_COMBINED: return cycle ? CC_COMBINED : CC_0; + case G_CCMUX_TEXEL0: return cycle ? CC_TEXEL1 : CC_TEXEL0; + case G_CCMUX_TEXEL1: return cycle ? CC_TEXEL0 : CC_TEXEL1; + case G_CCMUX_PRIMITIVE: return CC_PRIM; + case G_CCMUX_SHADE: return CC_SHADE; + case G_CCMUX_ENVIRONMENT: return CC_ENV; + + default: return CC_0; + } +} + +uint32_t color_comb_alpha(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint8_t cycle) { + return color_comb_component_a_alpha(a, cycle) + | (color_comb_component_b_alpha(b, cycle) << 8) + | (color_comb_component_c_alpha(c, cycle) << 16) + | (color_comb_component_d_alpha(d, cycle) << 24); } diff --git a/src/pc/gfx/gfx_cc.h b/src/pc/gfx/gfx_cc.h index 2840b83e..f3b9be7a 100644 --- a/src/pc/gfx/gfx_cc.h +++ b/src/pc/gfx/gfx_cc.h @@ -12,7 +12,15 @@ enum { CC_SHADE, CC_ENV, CC_TEXEL0A, - CC_LOD + CC_LOD, + CC_1, + CC_TEXEL1A, + CC_COMBINED, + CC_COMBINEDA, + CC_PRIMA, + CC_SHADEA, + CC_ENVA, + CC_ENUM_MAX, }; enum { @@ -21,9 +29,17 @@ enum { SHADER_INPUT_2, SHADER_INPUT_3, SHADER_INPUT_4, + SHADER_INPUT_5, + SHADER_INPUT_6, + SHADER_INPUT_7, + SHADER_INPUT_8, SHADER_TEXEL0, SHADER_TEXEL0A, - SHADER_TEXEL1 + SHADER_TEXEL1, + SHADER_TEXEL1A, + SHADER_1, + SHADER_COMBINED, + SHADER_COMBINEDA, }; #define SHADER_OPT_ALPHA (1 << 24) @@ -32,24 +48,66 @@ enum { #define SHADER_OPT_NOISE (1 << 27) struct CCFeatures { - uint8_t c[2][4]; - bool opt_alpha; - bool opt_fog; - bool opt_texture_edge; - bool opt_noise; bool used_textures[2]; int num_inputs; - bool do_single[2]; - bool do_multiply[2]; - bool do_mix[2]; - bool color_alpha_same; + bool do_single[4]; + bool do_multiply[4]; + bool do_mix[4]; + bool color_alpha_same[2]; +}; + +#pragma pack(1) +struct CombineMode { + union { + struct { + uint32_t rgb1; + uint32_t alpha1; + uint32_t rgb2; + uint32_t alpha2; + }; + uint8_t all_values[16]; + }; + union { + struct { + uint8_t use_alpha : 1; + uint8_t use_fog : 1; + uint8_t texture_edge : 1; + uint8_t use_noise : 1; + uint8_t use_2cycle : 1; + uint8_t light_map : 1; + }; + uint32_t flags; + }; + uint64_t hash; +}; +#pragma pack() + +#define SHADER_CMD_LENGTH 16 +#define CC_MAX_SHADERS 64 + +struct ColorCombiner { + struct CombineMode cm; + struct ShaderProgram *prg; + union { + uint8_t shader_input_mapping[16]; + uint64_t shader_input_mapping_as_u64[8]; + }; + union { + uint8_t shader_commands[16]; + uint64_t shader_commands_as_u64[8]; + }; + uint64_t hash; }; #ifdef __cplusplus extern "C" { #endif -void gfx_cc_get_features(uint32_t shader_id, struct CCFeatures *cc_features); +void gfx_cc_get_features(struct ColorCombiner* cc, struct CCFeatures *cc_features); +void gfx_cc_print(struct ColorCombiner *cc); +void gfx_cc_precomp(void); +uint32_t color_comb_rgb(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint8_t cycle); +uint32_t color_comb_alpha(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint8_t cycle); #ifdef __cplusplus } diff --git a/src/pc/gfx/gfx_direct3d11.cpp b/src/pc/gfx/gfx_direct3d11.cpp index c34a4f66..6865f85b 100644 --- a/src/pc/gfx/gfx_direct3d11.cpp +++ b/src/pc/gfx/gfx_direct3d11.cpp @@ -68,7 +68,7 @@ struct ShaderProgramD3D11 { ComPtr input_layout; ComPtr blend_state; - uint32_t shader_id; + uint64_t hash; uint8_t num_inputs; uint8_t num_floats; bool used_textures[2]; @@ -103,8 +103,9 @@ static struct { PerFrameCB per_frame_cb_data; PerDrawCB per_draw_cb_data; - struct ShaderProgramD3D11 shader_program_pool[64]; + struct ShaderProgramD3D11 shader_program_pool[CC_MAX_SHADERS]; uint8_t shader_program_pool_size; + uint8_t shader_program_pool_index; std::vector textures; int current_tile; @@ -323,14 +324,14 @@ static void gfx_d3d11_load_shader(struct ShaderProgram *new_prg) { d3d.shader_program = (struct ShaderProgramD3D11 *)new_prg; } -static struct ShaderProgram *gfx_d3d11_create_and_load_new_shader(uint32_t shader_id) { - CCFeatures cc_features; - gfx_cc_get_features(shader_id, &cc_features); +static struct ShaderProgram *gfx_d3d11_create_and_load_new_shader(struct ColorCombiner* cc) { + CCFeatures cc_features = { 0 }; + gfx_cc_get_features(cc, &cc_features); char buf[4096]; size_t len, num_floats; - gfx_direct3d_common_build_shader(buf, len, num_floats, cc_features, false, THREE_POINT_FILTERING); + gfx_direct3d_common_build_shader(buf, len, num_floats, *cc, cc_features, false, THREE_POINT_FILTERING); ComPtr vs, ps; ComPtr error_blob; @@ -355,7 +356,9 @@ static struct ShaderProgram *gfx_d3d11_create_and_load_new_shader(uint32_t shade throw hr; } - struct ShaderProgramD3D11 *prg = &d3d.shader_program_pool[d3d.shader_program_pool_size++]; + struct ShaderProgramD3D11 *prg = &d3d.shader_program_pool[d3d.shader_program_pool_index]; + d3d.shader_program_pool_index = (d3d.shader_program_pool_index + 1) % CC_MAX_SHADERS; + if (d3d.shader_program_pool_size < CC_MAX_SHADERS) { d3d.shader_program_pool_size++; } ThrowIfFailed(d3d.device->CreateVertexShader(vs->GetBufferPointer(), vs->GetBufferSize(), nullptr, prg->vertex_shader.GetAddressOf())); ThrowIfFailed(d3d.device->CreatePixelShader(ps->GetBufferPointer(), ps->GetBufferSize(), nullptr, prg->pixel_shader.GetAddressOf())); @@ -368,11 +371,14 @@ static struct ShaderProgram *gfx_d3d11_create_and_load_new_shader(uint32_t shade if (cc_features.used_textures[0] || cc_features.used_textures[1]) { ied[ied_index++] = { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }; } - if (cc_features.opt_fog) { + if (cc->cm.use_fog) { ied[ied_index++] = { "FOG", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }; } + if (cc->cm.light_map) { + ied[ied_index++] = { "LIGHTMAP", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }; + } for (uint32_t i = 0; i < cc_features.num_inputs; i++) { - DXGI_FORMAT format = cc_features.opt_alpha ? DXGI_FORMAT_R32G32B32A32_FLOAT : DXGI_FORMAT_R32G32B32_FLOAT; + DXGI_FORMAT format = cc->cm.use_alpha ? DXGI_FORMAT_R32G32B32A32_FLOAT : DXGI_FORMAT_R32G32B32_FLOAT; ied[ied_index++] = { "INPUT", i, format, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }; } @@ -383,7 +389,7 @@ static struct ShaderProgram *gfx_d3d11_create_and_load_new_shader(uint32_t shade D3D11_BLEND_DESC blend_desc; ZeroMemory(&blend_desc, sizeof(D3D11_BLEND_DESC)); - if (cc_features.opt_alpha) { + if (cc->cm.use_alpha) { blend_desc.RenderTarget[0].BlendEnable = true; blend_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA; blend_desc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA; @@ -401,7 +407,7 @@ static struct ShaderProgram *gfx_d3d11_create_and_load_new_shader(uint32_t shade // Save some values - prg->shader_id = shader_id; + prg->hash = cc->hash; prg->num_inputs = cc_features.num_inputs; prg->num_floats = num_floats; prg->used_textures[0] = cc_features.used_textures[0]; @@ -410,9 +416,9 @@ static struct ShaderProgram *gfx_d3d11_create_and_load_new_shader(uint32_t shade return (struct ShaderProgram *)(d3d.shader_program = prg); } -static struct ShaderProgram *gfx_d3d11_lookup_shader(uint32_t shader_id) { +static struct ShaderProgram *gfx_d3d11_lookup_shader(struct ColorCombiner* cc) { for (size_t i = 0; i < d3d.shader_program_pool_size; i++) { - if (d3d.shader_program_pool[i].shader_id == shader_id) { + if (d3d.shader_program_pool[i].hash == cc->hash) { return (struct ShaderProgram *)&d3d.shader_program_pool[i]; } } diff --git a/src/pc/gfx/gfx_direct3d12.cpp b/src/pc/gfx/gfx_direct3d12.cpp index 1e291197..72ad859d 100644 --- a/src/pc/gfx/gfx_direct3d12.cpp +++ b/src/pc/gfx/gfx_direct3d12.cpp @@ -50,28 +50,29 @@ using namespace Microsoft::WRL; // For ComPtr namespace { struct ShaderProgramD3D12 { - uint32_t shader_id; + struct ColorCombiner cc; + uint64_t hash; uint8_t num_inputs; bool used_textures[2]; uint8_t num_floats; uint8_t num_attribs; - + ComPtr vertex_shader; ComPtr pixel_shader; ComPtr root_signature; }; struct PipelineDesc { - uint32_t shader_id; + uint64_t hash; bool depth_test; bool depth_mask; bool zmode_decal; bool _padding; - + bool operator==(const PipelineDesc& o) const { return memcmp(this, &o, sizeof(*this)) == 0; } - + bool operator<(const PipelineDesc& o) const { return memcmp(this, &o, sizeof(*this)) < 0; } @@ -86,7 +87,7 @@ struct TextureData { ComPtr resource; struct TextureHeap *heap; uint8_t heap_offset; - + uint64_t last_frame_counter; uint32_t descriptor_index; int sampler_parameters; @@ -103,15 +104,16 @@ static struct { HMODULE d3d12_module; PFN_D3D12_CREATE_DEVICE D3D12CreateDevice; PFN_D3D12_GET_DEBUG_INTERFACE D3D12GetDebugInterface; - + HMODULE d3dcompiler_module; pD3DCompile D3DCompile; - - struct ShaderProgramD3D12 shader_program_pool[64]; + + struct ShaderProgramD3D12 shader_program_pool[CC_MAX_SHADERS]; uint8_t shader_program_pool_size; - + uint8_t shader_program_pool_index; + uint32_t current_width, current_height; - + ComPtr device; ComPtr command_queue; ComPtr copy_command_queue; @@ -129,14 +131,14 @@ static struct { UINT srv_descriptor_size; ComPtr sampler_heap; UINT sampler_descriptor_size; - + std::map, std::list> texture_heaps; - + std::map>> upload_heaps; std::vector>> upload_heaps_in_flight; ComPtr copy_fence; uint64_t copy_fence_value; - + std::vector textures; int current_tile; uint32_t current_texture_ids[2]; @@ -145,30 +147,30 @@ static struct { int frame_index; ComPtr fence; HANDLE fence_event; - + uint64_t frame_counter; - + ComPtr noise_cb; void *mapped_noise_cb_address; struct NoiseCB noise_cb_data; - + ComPtr vertex_buffer; void *mapped_vbuf_address; int vbuf_pos; - + std::vector> resources_to_clean_at_end_of_frame; std::vector> texture_heap_allocations_to_reclaim_at_end_of_frame; - + std::map> pipeline_states; bool must_reload_pipeline; - + // Current state: ID3D12PipelineState *pipeline_state; struct ShaderProgramD3D12 *shader_program; bool depth_test; bool depth_mask; bool zmode_decal; - + CD3DX12_VIEWPORT viewport; CD3DX12_RECT scissor; } d3d; @@ -234,45 +236,38 @@ static void gfx_direct3d12_load_shader(struct ShaderProgram *new_prg) { d3d.must_reload_pipeline = true; } -static struct ShaderProgram *gfx_direct3d12_create_and_load_new_shader(uint32_t shader_id) { - /*static FILE *fp; - if (!fp) { - fp = fopen("shaders.txt", "w"); - } - fprintf(fp, "0x%08x\n", shader_id); - fflush(fp);*/ - - struct ShaderProgramD3D12 *prg = &d3d.shader_program_pool[d3d.shader_program_pool_size++]; - - CCFeatures cc_features; - gfx_cc_get_features(shader_id, &cc_features); - +static struct ShaderProgram *gfx_direct3d12_create_and_load_new_shader(struct ColorCombiner* cc) { + struct ShaderProgramD3D12 *prg = &d3d.shader_program_pool[d3d.shader_program_pool_index]; + d3d.shader_program_pool_index = (d3d.shader_program_pool_index + 1) % CC_MAX_SHADERS; + if (d3d.shader_program_pool_size < CC_MAX_SHADERS) { d3d.shader_program_pool_size++; } + + CCFeatures cc_features = { 0 }; + gfx_cc_get_features(cc, &cc_features); + char buf[2048]; size_t len, num_floats; - - gfx_direct3d_common_build_shader(buf, len, num_floats, cc_features, true, false); - - //fwrite(buf, 1, len, stdout); - + + gfx_direct3d_common_build_shader(buf, len, num_floats, *cc, cc_features, true, false); + ThrowIfFailed(d3d.D3DCompile(buf, len, nullptr, nullptr, nullptr, "VSMain", "vs_5_1", D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &prg->vertex_shader, nullptr)); ThrowIfFailed(d3d.D3DCompile(buf, len, nullptr, nullptr, nullptr, "PSMain", "ps_5_1", D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &prg->pixel_shader, nullptr)); - + ThrowIfFailed(d3d.device->CreateRootSignature(0, prg->pixel_shader->GetBufferPointer(), prg->pixel_shader->GetBufferSize(), IID_PPV_ARGS(&prg->root_signature))); - - prg->shader_id = shader_id; + + prg->hash = cc->hash; + prg->cc = *cc; prg->num_inputs = cc_features.num_inputs; prg->used_textures[0] = cc_features.used_textures[0]; prg->used_textures[1] = cc_features.used_textures[1]; prg->num_floats = num_floats; - //prg->num_attribs = cnt; - + d3d.must_reload_pipeline = true; return (struct ShaderProgram *)(d3d.shader_program = prg); } -static struct ShaderProgram *gfx_direct3d12_lookup_shader(uint32_t shader_id) { +static struct ShaderProgram *gfx_direct3d12_lookup_shader(struct ColorCombiner* cc) { for (size_t i = 0; i < d3d.shader_program_pool_size; i++) { - if (d3d.shader_program_pool[i].shader_id == shader_id) { + if (d3d.shader_program_pool[i].hash == cc->hash) { return (struct ShaderProgram *)&d3d.shader_program_pool[i]; } } @@ -281,7 +276,7 @@ static struct ShaderProgram *gfx_direct3d12_lookup_shader(uint32_t shader_id) { static void gfx_direct3d12_shader_get_info(struct ShaderProgram *prg, uint8_t *num_inputs, bool used_textures[2]) { struct ShaderProgramD3D12 *p = (struct ShaderProgramD3D12 *)prg; - + *num_inputs = p->num_inputs; used_textures[0] = p->used_textures[0]; used_textures[1] = p->used_textures[1]; @@ -299,9 +294,9 @@ static void gfx_direct3d12_select_texture(int tile, uint32_t texture_id) { static void gfx_direct3d12_upload_texture(const uint8_t *rgba32_buf, int width, int height) { texture_uploads++; - + ComPtr texture_resource; - + // Describe and create a Texture2D. D3D12_RESOURCE_DESC texture_desc = {}; texture_desc.MipLevels = 1; @@ -314,11 +309,11 @@ static void gfx_direct3d12_upload_texture(const uint8_t *rgba32_buf, int width, texture_desc.SampleDesc.Quality = 0; texture_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; texture_desc.Alignment = ((width + 31) / 32) * ((height + 31) / 32) > 16 ? 0 : D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT; - + D3D12_RESOURCE_ALLOCATION_INFO alloc_info = get_resource_allocation_info(&texture_desc); - + std::list& heaps = d3d.texture_heaps[std::pair(alloc_info.SizeInBytes, alloc_info.Alignment)]; - + struct TextureHeap *found_heap = nullptr; for (struct TextureHeap& heap : heaps) { if (!heap.free_list.empty()) { @@ -328,7 +323,7 @@ static void gfx_direct3d12_upload_texture(const uint8_t *rgba32_buf, int width, if (found_heap == nullptr) { heaps.resize(heaps.size() + 1); found_heap = &heaps.back(); - + // In case of HD textures, make sure too much memory isn't wasted int textures_per_heap = 524288 / alloc_info.SizeInBytes; if (textures_per_heap < 1) { @@ -336,7 +331,7 @@ static void gfx_direct3d12_upload_texture(const uint8_t *rgba32_buf, int width, } else if (textures_per_heap > 64) { textures_per_heap = 64; } - + D3D12_HEAP_DESC heap_desc = {}; heap_desc.SizeInBytes = alloc_info.SizeInBytes * textures_per_heap; if (alloc_info.Alignment == D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT) { @@ -353,17 +348,17 @@ static void gfx_direct3d12_upload_texture(const uint8_t *rgba32_buf, int width, found_heap->free_list.push_back(i); } } - + uint8_t heap_offset = found_heap->free_list.back(); found_heap->free_list.pop_back(); ThrowIfFailed(d3d.device->CreatePlacedResource(found_heap->heap.Get(), heap_offset * alloc_info.SizeInBytes, &texture_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&texture_resource))); - + D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout; UINT num_rows; UINT64 row_size_in_bytes; UINT64 upload_buffer_size; d3d.device->GetCopyableFootprints(&texture_desc, 0, 1, 0, &layout, &num_rows, &row_size_in_bytes, &upload_buffer_size); - + std::vector>& upload_heaps = d3d.upload_heaps[upload_buffer_size]; ComPtr upload_heap; if (upload_heaps.empty()) { @@ -380,13 +375,13 @@ static void gfx_direct3d12_upload_texture(const uint8_t *rgba32_buf, int width, upload_heap = upload_heaps.back(); upload_heaps.pop_back(); } - + { D3D12_SUBRESOURCE_DATA texture_data = {}; texture_data.pData = rgba32_buf; texture_data.RowPitch = width * 4; // RGBA texture_data.SlicePitch = texture_data.RowPitch * height; - + void *data; upload_heap->Map(0, nullptr, &data); D3D12_MEMCPY_DEST dest_data = { (uint8_t *)data + layout.Offset, layout.Footprint.RowPitch, SIZE_T(layout.Footprint.RowPitch) * SIZE_T(num_rows) }; @@ -397,12 +392,12 @@ static void gfx_direct3d12_upload_texture(const uint8_t *rgba32_buf, int width, CD3DX12_TEXTURE_COPY_LOCATION src(upload_heap.Get(), layout); d3d.copy_command_list->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); } - + CD3DX12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(texture_resource.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); d3d.command_list->ResourceBarrier(1, &barrier); - + d3d.upload_heaps_in_flight.push_back(std::make_pair((size_t)upload_buffer_size, std::move(upload_heap))); - + struct TextureData& td = d3d.textures[d3d.current_texture_ids[d3d.current_tile]]; if (td.resource.Get() != nullptr) { d3d.resources_to_clean_at_end_of_frame.push_back(std::move(td.resource)); @@ -454,10 +449,10 @@ static void gfx_direct3d12_set_use_alpha(bool use_alpha) { static void gfx_direct3d12_draw_triangles(float buf_vbo[], size_t buf_vbo_len, size_t buf_vbo_num_tris) { struct ShaderProgramD3D12 *prg = d3d.shader_program; - + if (d3d.must_reload_pipeline) { ComPtr& pipeline_state = d3d.pipeline_states[PipelineDesc{ - prg->shader_id, + prg->hash, d3d.depth_test, d3d.depth_mask, d3d.zmode_decal, @@ -471,14 +466,17 @@ static void gfx_direct3d12_draw_triangles(float buf_vbo[], size_t buf_vbo_len, s if (prg->used_textures[0] || prg->used_textures[1]) { ied[ied_pos++] = D3D12_INPUT_ELEMENT_DESC{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}; } - if (prg->shader_id & SHADER_OPT_FOG) { + if (prg->cc.cm.use_fog) { ied[ied_pos++] = D3D12_INPUT_ELEMENT_DESC{"FOG", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}; } + if (prg->cc.cm.light_map) { + ied[ied_pos++] = D3D12_INPUT_ELEMENT_DESC{"LIGHTMAP", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}; + } for (int32_t i = 0; i < prg->num_inputs; i++) { - DXGI_FORMAT format = (prg->shader_id & SHADER_OPT_ALPHA) ? DXGI_FORMAT_R32G32B32A32_FLOAT : DXGI_FORMAT_R32G32B32_FLOAT; + DXGI_FORMAT format = (prg->cc.cm.use_alpha) ? DXGI_FORMAT_R32G32B32A32_FLOAT : DXGI_FORMAT_R32G32B32_FLOAT; ied[ied_pos++] = D3D12_INPUT_ELEMENT_DESC{"INPUT", (UINT)i, format, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}; } - + D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {}; desc.InputLayout = { ied, ied_pos }; desc.pRootSignature = prg->root_signature.Get(); @@ -489,7 +487,7 @@ static void gfx_direct3d12_draw_triangles(float buf_vbo[], size_t buf_vbo_len, s desc.RasterizerState.SlopeScaledDepthBias = -2.0f; } desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; - if (prg->shader_id & SHADER_OPT_ALPHA) { + if (prg->cc.cm.use_alpha) { D3D12_BLEND_DESC bd = {}; bd.AlphaToCoverageEnable = FALSE; bd.IndependentBlendEnable = FALSE; @@ -521,51 +519,51 @@ static void gfx_direct3d12_draw_triangles(float buf_vbo[], size_t buf_vbo_len, s d3d.pipeline_state = pipeline_state.Get(); d3d.must_reload_pipeline = false; } - + d3d.command_list->SetGraphicsRootSignature(prg->root_signature.Get()); d3d.command_list->SetPipelineState(d3d.pipeline_state); - + ID3D12DescriptorHeap *heaps[] = { d3d.srv_heap.Get(), d3d.sampler_heap.Get() }; d3d.command_list->SetDescriptorHeaps(2, heaps); - + int root_param_index = 0; - - if ((prg->shader_id & (SHADER_OPT_ALPHA | SHADER_OPT_NOISE)) == (SHADER_OPT_ALPHA | SHADER_OPT_NOISE)) { + + if (prg->cc.cm.use_alpha && prg->cc.cm.use_noise) { d3d.command_list->SetGraphicsRootConstantBufferView(root_param_index++, d3d.noise_cb->GetGPUVirtualAddress()); } - + for (int32_t i = 0; i < 2; i++) { if (prg->used_textures[i]) { struct TextureData& td = d3d.textures[d3d.current_texture_ids[i]]; if (td.last_frame_counter != d3d.frame_counter) { td.descriptor_index = d3d.srv_pos; td.last_frame_counter = d3d.frame_counter; - + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {}; srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; srv_desc.Texture2D.MipLevels = 1; - + CD3DX12_CPU_DESCRIPTOR_HANDLE srv_handle(get_cpu_descriptor_handle(d3d.srv_heap), d3d.srv_pos++, d3d.srv_descriptor_size); d3d.device->CreateShaderResourceView(td.resource.Get(), &srv_desc, srv_handle); } - + CD3DX12_GPU_DESCRIPTOR_HANDLE srv_gpu_handle(get_gpu_descriptor_handle(d3d.srv_heap), td.descriptor_index, d3d.srv_descriptor_size); d3d.command_list->SetGraphicsRootDescriptorTable(root_param_index++, srv_gpu_handle); - + CD3DX12_GPU_DESCRIPTOR_HANDLE sampler_gpu_handle(get_gpu_descriptor_handle(d3d.sampler_heap), td.sampler_parameters, d3d.sampler_descriptor_size); d3d.command_list->SetGraphicsRootDescriptorTable(root_param_index++, sampler_gpu_handle); } } - + CD3DX12_CPU_DESCRIPTOR_HANDLE rtv_handle(get_cpu_descriptor_handle(d3d.rtv_heap), d3d.frame_index, d3d.rtv_descriptor_size); D3D12_CPU_DESCRIPTOR_HANDLE dsv_handle = get_cpu_descriptor_handle(d3d.dsv_heap); d3d.command_list->OMSetRenderTargets(1, &rtv_handle, FALSE, &dsv_handle); - + d3d.command_list->RSSetViewports(1, &d3d.viewport); d3d.command_list->RSSetScissorRects(1, &d3d.scissor); - + int current_pos = d3d.vbuf_pos; memcpy((uint8_t *)d3d.mapped_vbuf_address + current_pos, buf_vbo, buf_vbo_len * sizeof(float)); d3d.vbuf_pos += buf_vbo_len * sizeof(float); @@ -574,12 +572,12 @@ static void gfx_direct3d12_draw_triangles(float buf_vbo[], size_t buf_vbo_len, s maxpos = d3d.vbuf_pos; //printf("NEW MAXPOS: %d\n", maxpos); } - + D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view; vertex_buffer_view.BufferLocation = d3d.vertex_buffer->GetGPUVirtualAddress() + current_pos; vertex_buffer_view.StrideInBytes = buf_vbo_len / (3 * buf_vbo_num_tris) * sizeof(float); vertex_buffer_view.SizeInBytes = buf_vbo_len * sizeof(float); - + d3d.command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); d3d.command_list->IASetVertexBuffers(0, 1, &vertex_buffer_view); d3d.command_list->DrawInstanced(3 * buf_vbo_num_tris, 1, 0, 0); @@ -591,22 +589,22 @@ static void gfx_direct3d12_start_frame(void) { texture_uploads = 0; ThrowIfFailed(d3d.command_allocator->Reset()); ThrowIfFailed(d3d.command_list->Reset(d3d.command_allocator.Get(), nullptr)); - + CD3DX12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition( d3d.render_targets[d3d.frame_index].Get(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET); d3d.command_list->ResourceBarrier(1, &barrier); - + CD3DX12_CPU_DESCRIPTOR_HANDLE rtv_handle(get_cpu_descriptor_handle(d3d.rtv_heap), d3d.frame_index, d3d.rtv_descriptor_size); D3D12_CPU_DESCRIPTOR_HANDLE dsv_handle = get_cpu_descriptor_handle(d3d.dsv_heap); d3d.command_list->OMSetRenderTargets(1, &rtv_handle, FALSE, &dsv_handle); - + static unsigned char c; const float clear_color[] = { 0.0f, 0.0f, 0.0f, 1.0f }; d3d.command_list->ClearRenderTargetView(rtv_handle, clear_color, 0, nullptr); d3d.command_list->ClearDepthStencilView(dsv_handle, D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, nullptr); - + d3d.noise_cb_data.noise_frame++; if (d3d.noise_cb_data.noise_frame > 150) { // No high values, as noise starts to look ugly @@ -616,7 +614,7 @@ static void gfx_direct3d12_start_frame(void) { d3d.noise_cb_data.noise_scale_x = 120 * aspect_ratio; // 120 = N64 height resolution (240) / 2 d3d.noise_cb_data.noise_scale_y = 120; memcpy(d3d.mapped_noise_cb_address, &d3d.noise_cb_data, sizeof(struct NoiseCB)); - + d3d.vbuf_pos = 0; } @@ -634,10 +632,10 @@ static void create_depth_buffer(void) { ThrowIfFailed(d3d.swap_chain->GetDesc1(&desc1)); UINT width = desc1.Width; UINT height = desc1.Height; - + d3d.current_width = width; d3d.current_height = height; - + D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc = {}; dsv_desc.Format = DXGI_FORMAT_D32_FLOAT; dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; @@ -667,7 +665,7 @@ static void create_depth_buffer(void) { rd.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; rd.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; ThrowIfFailed(d3d.device->CreateCommittedResource(&hp, D3D12_HEAP_FLAG_NONE, &rd, D3D12_RESOURCE_STATE_DEPTH_WRITE, &depth_optimized_cv, IID_PPV_ARGS(&d3d.depth_stencil_buffer))); - + d3d.device->CreateDepthStencilView(d3d.depth_stencil_buffer.Get(), &dsv_desc, get_cpu_descriptor_handle(d3d.dsv_heap)); } @@ -699,7 +697,7 @@ static void gfx_direct3d12_init(void ) { ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError()), gfx_dxgi_get_h_wnd(), "D3DCompiler_47.dll could not be loaded"); } d3d.D3DCompile = (pD3DCompile)GetProcAddress(d3d.d3dcompiler_module, "D3DCompile"); - + // Create device { UINT debug_flags = 0; @@ -710,14 +708,14 @@ static void gfx_direct3d12_init(void ) { debug_flags |= DXGI_CREATE_FACTORY_DEBUG; } #endif - + gfx_dxgi_create_factory_and_device(DEBUG_D3D, 12, [](IDXGIAdapter1 *adapter, bool test_only) { HRESULT res = d3d.D3D12CreateDevice( adapter, D3D_FEATURE_LEVEL_11_0, IID_ID3D12Device, test_only ? nullptr : IID_PPV_ARGS_Helper(&d3d.device)); - + if (test_only) { return SUCCEEDED(res); } else { @@ -726,7 +724,7 @@ static void gfx_direct3d12_init(void ) { } }); } - + // Create command queues { D3D12_COMMAND_QUEUE_DESC queue_desc = {}; @@ -740,14 +738,14 @@ static void gfx_direct3d12_init(void ) { queue_desc.Type = D3D12_COMMAND_LIST_TYPE_COPY; ThrowIfFailed(d3d.device->CreateCommandQueue(&queue_desc, IID_PPV_ARGS(&d3d.copy_command_queue))); } - + // Create swap chain { ComPtr swap_chain1 = gfx_dxgi_create_swap_chain(d3d.command_queue.Get()); ThrowIfFailed(swap_chain1->QueryInterface(__uuidof(IDXGISwapChain3), &d3d.swap_chain)); d3d.frame_index = d3d.swap_chain->GetCurrentBackBufferIndex(); } - + // Create render target views { D3D12_DESCRIPTOR_HEAP_DESC rtv_heap_desc = {}; @@ -759,7 +757,7 @@ static void gfx_direct3d12_init(void ) { create_render_target_views(); } - + // Create Z-buffer { D3D12_DESCRIPTOR_HEAP_DESC dsv_heap_desc = {}; @@ -770,7 +768,7 @@ static void gfx_direct3d12_init(void ) { create_depth_buffer(); } - + // Create SRV heap for texture descriptors { D3D12_DESCRIPTOR_HEAP_DESC srv_heap_desc = {}; @@ -780,7 +778,7 @@ static void gfx_direct3d12_init(void ) { ThrowIfFailed(d3d.device->CreateDescriptorHeap(&srv_heap_desc, IID_PPV_ARGS(&d3d.srv_heap))); d3d.srv_descriptor_size = d3d.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } - + // Create sampler heap and descriptors { D3D12_DESCRIPTOR_HEAP_DESC sampler_heap_desc = {}; @@ -789,13 +787,13 @@ static void gfx_direct3d12_init(void ) { sampler_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; ThrowIfFailed(d3d.device->CreateDescriptorHeap(&sampler_heap_desc, IID_PPV_ARGS(&d3d.sampler_heap))); d3d.sampler_descriptor_size = d3d.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - + static const D3D12_TEXTURE_ADDRESS_MODE address_modes[] = { D3D12_TEXTURE_ADDRESS_MODE_WRAP, D3D12_TEXTURE_ADDRESS_MODE_MIRROR, D3D12_TEXTURE_ADDRESS_MODE_CLAMP }; - + D3D12_CPU_DESCRIPTOR_HANDLE sampler_handle = get_cpu_descriptor_handle(d3d.sampler_heap); int pos = 0; for (int linear_filter = 0; linear_filter < 2; linear_filter++) { @@ -816,7 +814,7 @@ static void gfx_direct3d12_init(void ) { } } } - + // Create constant buffer view for noise { /*D3D12_DESCRIPTOR_HEAP_DESC cbv_heap_desc = {}; @@ -824,7 +822,7 @@ static void gfx_direct3d12_init(void ) { cbv_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; srv_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; ThrowIfFailed(d3d.device->CreateDescriptorHeap*/ - + CD3DX12_HEAP_PROPERTIES hp(D3D12_HEAP_TYPE_UPLOAD); CD3DX12_RESOURCE_DESC rdb = CD3DX12_RESOURCE_DESC::Buffer(256); ThrowIfFailed(d3d.device->CreateCommittedResource( @@ -834,27 +832,27 @@ static void gfx_direct3d12_init(void ) { D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&d3d.noise_cb))); - + CD3DX12_RANGE read_range(0, 0); // Read not possible from CPU ThrowIfFailed(d3d.noise_cb->Map(0, &read_range, &d3d.mapped_noise_cb_address)); } - + ThrowIfFailed(d3d.device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&d3d.command_allocator))); ThrowIfFailed(d3d.device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COPY, IID_PPV_ARGS(&d3d.copy_command_allocator))); - + ThrowIfFailed(d3d.device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, d3d.command_allocator.Get(), nullptr, IID_PPV_ARGS(&d3d.command_list))); ThrowIfFailed(d3d.device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COPY, d3d.copy_command_allocator.Get(), nullptr, IID_PPV_ARGS(&d3d.copy_command_list))); - + ThrowIfFailed(d3d.command_list->Close()); - + ThrowIfFailed(d3d.device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&d3d.fence))); d3d.fence_event = CreateEvent(nullptr, FALSE, FALSE, nullptr); if (d3d.fence_event == nullptr) { ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError())); } - + ThrowIfFailed(d3d.device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&d3d.copy_fence))); - + { // Create a buffer of 1 MB in size. With a 120 star speed run 192 kB seems to be max usage. CD3DX12_HEAP_PROPERTIES hp(D3D12_HEAP_TYPE_UPLOAD); @@ -866,7 +864,7 @@ static void gfx_direct3d12_init(void ) { D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&d3d.vertex_buffer))); - + CD3DX12_RANGE read_range(0, 0); // Read not possible from CPU ThrowIfFailed(d3d.vertex_buffer->Map(0, &read_range, &d3d.mapped_vbuf_address)); } @@ -880,29 +878,29 @@ static void gfx_direct3d12_end_frame(void) { } //printf("Texture uploads: %d %d\n", max_texture_uploads, texture_uploads); texture_uploads = 0; - + ThrowIfFailed(d3d.copy_command_list->Close()); { ID3D12CommandList *lists[] = { d3d.copy_command_list.Get() }; d3d.copy_command_queue->ExecuteCommandLists(1, lists); d3d.copy_command_queue->Signal(d3d.copy_fence.Get(), ++d3d.copy_fence_value); } - + CD3DX12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition( d3d.render_targets[d3d.frame_index].Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT); d3d.command_list->ResourceBarrier(1, &barrier); - + d3d.command_queue->Wait(d3d.copy_fence.Get(), d3d.copy_fence_value); - + ThrowIfFailed(d3d.command_list->Close()); - + { ID3D12CommandList *lists[] = { d3d.command_list.Get() }; d3d.command_queue->ExecuteCommandLists(1, lists); } - + { LARGE_INTEGER t0; QueryPerformanceCounter(&t0); @@ -913,7 +911,7 @@ static void gfx_direct3d12_end_frame(void) { static void gfx_direct3d12_finish_render(void) { LARGE_INTEGER t0, t1, t2; QueryPerformanceCounter(&t0); - + static UINT64 fence_value; ThrowIfFailed(d3d.command_queue->Signal(d3d.fence.Get(), ++fence_value)); if (d3d.fence->GetCompletedValue() < fence_value) { @@ -921,7 +919,7 @@ static void gfx_direct3d12_finish_render(void) { WaitForSingleObject(d3d.fence_event, INFINITE); } QueryPerformanceCounter(&t1); - + d3d.resources_to_clean_at_end_of_frame.clear(); for (std::pair>& heap : d3d.upload_heaps_in_flight) { d3d.upload_heaps[heap.first].push_back(std::move(heap.second)); @@ -931,14 +929,14 @@ static void gfx_direct3d12_finish_render(void) { item.first->free_list.push_back(item.second); } d3d.texture_heap_allocations_to_reclaim_at_end_of_frame.clear(); - + QueryPerformanceCounter(&t2); - + d3d.frame_index = d3d.swap_chain->GetCurrentBackBufferIndex(); - + ThrowIfFailed(d3d.copy_command_allocator->Reset()); ThrowIfFailed(d3d.copy_command_list->Reset(d3d.copy_command_allocator.Get(), nullptr)); - + //printf("done %llu gpu:%d wait:%d freed:%llu frame:%u %u monitor:%u t:%llu\n", (unsigned long long)(t0.QuadPart - d3d.qpc_init), (int)(t1.QuadPart - t0.QuadPart), (int)(t2.QuadPart - t0.QuadPart), (unsigned long long)(t2.QuadPart - d3d.qpc_init), d3d.pending_frame_stats.rbegin()->first, stats.PresentCount, stats.SyncRefreshCount, (unsigned long long)(stats.SyncQPCTime.QuadPart - d3d.qpc_init)); } diff --git a/src/pc/gfx/gfx_direct3d_common.cpp b/src/pc/gfx/gfx_direct3d_common.cpp index c3249a38..5b5eff36 100644 --- a/src/pc/gfx/gfx_direct3d_common.cpp +++ b/src/pc/gfx/gfx_direct3d_common.cpp @@ -5,46 +5,6 @@ #include "gfx_direct3d_common.h" #include "gfx_cc.h" -void get_cc_features(uint32_t shader_id, CCFeatures *cc_features) { - for (int32_t i = 0; i < 4; i++) { - cc_features->c[0][i] = (shader_id >> (i * 3)) & 7; - cc_features->c[1][i] = (shader_id >> (12 + i * 3)) & 7; - } - - cc_features->opt_alpha = (shader_id & SHADER_OPT_ALPHA) != 0; - cc_features->opt_fog = (shader_id & SHADER_OPT_FOG) != 0; - cc_features->opt_texture_edge = (shader_id & SHADER_OPT_TEXTURE_EDGE) != 0; - cc_features->opt_noise = (shader_id & SHADER_OPT_NOISE) != 0; - - cc_features->used_textures[0] = false; - cc_features->used_textures[1] = false; - cc_features->num_inputs = 0; - - for (int32_t i = 0; i < 2; i++) { - for (int32_t j = 0; j < 4; j++) { - if (cc_features->c[i][j] >= SHADER_INPUT_1 && cc_features->c[i][j] <= SHADER_INPUT_4) { - if (cc_features->c[i][j] > cc_features->num_inputs) { - cc_features->num_inputs = cc_features->c[i][j]; - } - } - if (cc_features->c[i][j] == SHADER_TEXEL0 || cc_features->c[i][j] == SHADER_TEXEL0A) { - cc_features->used_textures[0] = true; - } - if (cc_features->c[i][j] == SHADER_TEXEL1) { - cc_features->used_textures[1] = true; - } - } - } - - cc_features->do_single[0] = cc_features->c[0][2] == 0; - cc_features->do_single[1] = cc_features->c[1][2] == 0; - cc_features->do_multiply[0] = cc_features->c[0][1] == 0 && cc_features->c[0][3] == 0; - cc_features->do_multiply[1] = cc_features->c[1][1] == 0 && cc_features->c[1][3] == 0; - cc_features->do_mix[0] = cc_features->c[0][1] == cc_features->c[0][3]; - cc_features->do_mix[1] = cc_features->c[1][1] == cc_features->c[1][3]; - cc_features->color_alpha_same = (shader_id & 0xfff) == ((shader_id >> 12) & 0xfff); -} - static void append_str(char *buf, size_t *len, const char *str) { while (*str != '\0') buf[(*len)++] = *str++; } @@ -61,6 +21,8 @@ static const char *shader_item_to_str(int32_t item, bool with_alpha, bool only_a default: case SHADER_0: return with_alpha ? "float4(0.0, 0.0, 0.0, 0.0)" : "float3(0.0, 0.0, 0.0)"; + case SHADER_1: + return with_alpha ? "float4(1.0, 1.0, 1.0, 1.0)" : "float3(1.0, 1.0, 1.0)"; case SHADER_INPUT_1: return with_alpha || !inputs_have_alpha ? "input.input1" : "input.input1.rgb"; case SHADER_INPUT_2: @@ -69,18 +31,34 @@ static const char *shader_item_to_str(int32_t item, bool with_alpha, bool only_a return with_alpha || !inputs_have_alpha ? "input.input3" : "input.input3.rgb"; case SHADER_INPUT_4: return with_alpha || !inputs_have_alpha ? "input.input4" : "input.input4.rgb"; + case SHADER_INPUT_5: + return with_alpha || !inputs_have_alpha ? "input.input5" : "input.input5.rgb"; + case SHADER_INPUT_6: + return with_alpha || !inputs_have_alpha ? "input.input6" : "input.input6.rgb"; + case SHADER_INPUT_7: + return with_alpha || !inputs_have_alpha ? "input.input7" : "input.input7.rgb"; + case SHADER_INPUT_8: + return with_alpha || !inputs_have_alpha ? "input.input8" : "input.input8.rgb"; case SHADER_TEXEL0: return with_alpha ? "texVal0" : "texVal0.rgb"; case SHADER_TEXEL0A: return hint_single_element ? "texVal0.a" : (with_alpha ? "float4(texVal0.a, texVal0.a, texVal0.a, texVal0.a)" : "float3(texVal0.a, texVal0.a, texVal0.a)"); case SHADER_TEXEL1: return with_alpha ? "texVal1" : "texVal1.rgb"; + case SHADER_TEXEL1A: + return hint_single_element ? "texVal1.a" : (with_alpha ? "float4(texVal1.a, texVal1.a, texVal1.a, texVal1.a)" : "float3(texVal1.a, texVal1.a, texVal1.a)"); + case SHADER_COMBINED: + return with_alpha ? "texel" : "texel.rgb"; + case SHADER_COMBINEDA: + return hint_single_element ? "texel.a" : (with_alpha ? "float4(texel.a, texel.a, texel.a, texel.a)" : "float3(texel.a, texel.a, texel.a)"); } } else { switch (item) { default: case SHADER_0: return "0.0"; + case SHADER_1: + return "1.0"; case SHADER_INPUT_1: return "input.input1.a"; case SHADER_INPUT_2: @@ -89,44 +67,58 @@ static const char *shader_item_to_str(int32_t item, bool with_alpha, bool only_a return "input.input3.a"; case SHADER_INPUT_4: return "input.input4.a"; + case SHADER_INPUT_5: + return "input.input5.a"; + case SHADER_INPUT_6: + return "input.input6.a"; + case SHADER_INPUT_7: + return "input.input7.a"; + case SHADER_INPUT_8: + return "input.input8.a"; case SHADER_TEXEL0: return "texVal0.a"; case SHADER_TEXEL0A: return "texVal0.a"; case SHADER_TEXEL1: return "texVal1.a"; + case SHADER_TEXEL1A: + return "texVal1.a"; + case SHADER_COMBINED: + return "texel.a"; + case SHADER_COMBINEDA: + return "texel.a"; } } } -static void append_formula(char *buf, size_t *len, const uint8_t c[2][4], bool do_single, bool do_multiply, bool do_mix, bool with_alpha, bool only_alpha, bool opt_alpha) { +static void append_formula(char *buf, size_t *len, const uint8_t* c, bool do_single, bool do_multiply, bool do_mix, bool with_alpha, bool only_alpha, bool opt_alpha) { if (do_single) { - append_str(buf, len, shader_item_to_str(c[only_alpha][3], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 3], with_alpha, only_alpha, opt_alpha, false)); } else if (do_multiply) { - append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 0], with_alpha, only_alpha, opt_alpha, false)); append_str(buf, len, " * "); - append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true)); + append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 2], with_alpha, only_alpha, opt_alpha, true)); } else if (do_mix) { append_str(buf, len, "lerp("); - append_str(buf, len, shader_item_to_str(c[only_alpha][1], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 1], with_alpha, only_alpha, opt_alpha, false)); append_str(buf, len, ", "); - append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 0], with_alpha, only_alpha, opt_alpha, false)); append_str(buf, len, ", "); - append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true)); + append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 2], with_alpha, only_alpha, opt_alpha, true)); append_str(buf, len, ")"); } else { append_str(buf, len, "("); - append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 0], with_alpha, only_alpha, opt_alpha, false)); append_str(buf, len, " - "); - append_str(buf, len, shader_item_to_str(c[only_alpha][1], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 1], with_alpha, only_alpha, opt_alpha, false)); append_str(buf, len, ") * "); - append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true)); + append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 2], with_alpha, only_alpha, opt_alpha, true)); append_str(buf, len, " + "); - append_str(buf, len, shader_item_to_str(c[only_alpha][3], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, shader_item_to_str(c[only_alpha * 4 + 3], with_alpha, only_alpha, opt_alpha, false)); } } -void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_floats, const CCFeatures& cc_features, bool include_root_signature, bool three_point_filtering) { +void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_floats, struct ColorCombiner& cc, const CCFeatures& ccf, bool include_root_signature, bool three_point_filtering) { len = 0; num_floats = 4; @@ -134,14 +126,14 @@ void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_f if (include_root_signature) { append_str(buf, &len, "#define RS \"RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | DENY_VERTEX_SHADER_ROOT_ACCESS)"); - if (cc_features.opt_alpha && cc_features.opt_noise) { + if (cc.cm.use_alpha && cc.cm.use_noise) { append_str(buf, &len, ",CBV(b0, visibility = SHADER_VISIBILITY_PIXEL)"); } - if (cc_features.used_textures[0]) { + if (ccf.used_textures[0]) { append_str(buf, &len, ",DescriptorTable(SRV(t0), visibility = SHADER_VISIBILITY_PIXEL)"); append_str(buf, &len, ",DescriptorTable(Sampler(s0), visibility = SHADER_VISIBILITY_PIXEL)"); } - if (cc_features.used_textures[1]) { + if (ccf.used_textures[1]) { append_str(buf, &len, ",DescriptorTable(SRV(t1), visibility = SHADER_VISIBILITY_PIXEL)"); append_str(buf, &len, ",DescriptorTable(Sampler(s1), visibility = SHADER_VISIBILITY_PIXEL)"); } @@ -150,37 +142,41 @@ void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_f append_line(buf, &len, "struct PSInput {"); append_line(buf, &len, " float4 position : SV_POSITION;"); - if (cc_features.used_textures[0] || cc_features.used_textures[1]) { + if (ccf.used_textures[0] || ccf.used_textures[1]) { append_line(buf, &len, " float2 uv : TEXCOORD;"); num_floats += 2; } - if (cc_features.opt_alpha && cc_features.opt_noise) { + if (cc.cm.use_alpha && cc.cm.use_noise) { append_line(buf, &len, " float4 screenPos : TEXCOORD1;"); } - if (cc_features.opt_fog) { + if (cc.cm.use_fog) { append_line(buf, &len, " float4 fog : FOG;"); num_floats += 4; } - for (int32_t i = 0; i < cc_features.num_inputs; i++) { - len += sprintf(buf + len, " float%d input%d : INPUT%d;\r\n", cc_features.opt_alpha ? 4 : 3, i + 1, i); - num_floats += cc_features.opt_alpha ? 4 : 3; + if (cc.cm.light_map) { + append_line(buf, &len, " float2 lightmap : LIGHTMAP;"); + num_floats += 2; + } + for (int32_t i = 0; i < ccf.num_inputs; i++) { + len += sprintf(buf + len, " float%d input%d : INPUT%d;\r\n", cc.cm.use_alpha ? 4 : 3, i + 1, i); + num_floats += cc.cm.use_alpha ? 4 : 3; } append_line(buf, &len, "};"); // Textures and samplers - if (cc_features.used_textures[0]) { + if (ccf.used_textures[0]) { append_line(buf, &len, "Texture2D g_texture0 : register(t0);"); append_line(buf, &len, "SamplerState g_sampler0 : register(s0);"); } - if (cc_features.used_textures[1]) { + if (ccf.used_textures[1]) { append_line(buf, &len, "Texture2D g_texture1 : register(t1);"); append_line(buf, &len, "SamplerState g_sampler1 : register(s1);"); } // Constant buffer and random function - if (cc_features.opt_alpha && cc_features.opt_noise) { + if (cc.cm.use_alpha && cc.cm.use_noise) { append_line(buf, &len, "cbuffer PerFrameCB : register(b0) {"); append_line(buf, &len, " uint noise_frame;"); append_line(buf, &len, " float2 noise_scale;"); @@ -196,7 +192,7 @@ void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_f // Original author: ArthurCarvalho // Based on GLSL implementation by twinaphex, mupen64plus-libretro project. - if (three_point_filtering && (cc_features.used_textures[0] || cc_features.used_textures[1])) { + if (three_point_filtering && (ccf.used_textures[0] || ccf.used_textures[1])) { append_line(buf, &len, "cbuffer PerDrawCB : register(b1) {"); append_line(buf, &len, " struct {"); append_line(buf, &len, " uint width;"); @@ -218,28 +214,34 @@ void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_f // Vertex shader append_str(buf, &len, "PSInput VSMain(float4 position : POSITION"); - if (cc_features.used_textures[0] || cc_features.used_textures[1]) { + if (ccf.used_textures[0] || ccf.used_textures[1]) { append_str(buf, &len, ", float2 uv : TEXCOORD"); } - if (cc_features.opt_fog) { + if (cc.cm.use_fog) { append_str(buf, &len, ", float4 fog : FOG"); } - for (int32_t i = 0; i < cc_features.num_inputs; i++) { - len += sprintf(buf + len, ", float%d input%d : INPUT%d", cc_features.opt_alpha ? 4 : 3, i + 1, i); + if (cc.cm.light_map) { + append_str(buf, &len, ", float2 lightmap : LIGHTMAP"); + } + for (int32_t i = 0; i < ccf.num_inputs; i++) { + len += sprintf(buf + len, ", float%d input%d : INPUT%d", cc.cm.use_alpha ? 4 : 3, i + 1, i); } append_line(buf, &len, ") {"); append_line(buf, &len, " PSInput result;"); append_line(buf, &len, " result.position = position;"); - if (cc_features.opt_alpha && cc_features.opt_noise) { + if (cc.cm.use_alpha && cc.cm.use_noise) { append_line(buf, &len, " result.screenPos = position;"); } - if (cc_features.used_textures[0] || cc_features.used_textures[1]) { + if (ccf.used_textures[0] || ccf.used_textures[1]) { append_line(buf, &len, " result.uv = uv;"); } - if (cc_features.opt_fog) { + if (cc.cm.use_fog) { append_line(buf, &len, " result.fog = fog;"); } - for (int32_t i = 0; i < cc_features.num_inputs; i++) { + if (cc.cm.light_map) { + append_line(buf, &len, " result.lightmap = lightmap;"); + } + for (int32_t i = 0; i < ccf.num_inputs; i++) { len += sprintf(buf + len, " result.input%d = input%d;\r\n", i + 1, i + 1); } append_line(buf, &len, " return result;"); @@ -250,7 +252,7 @@ void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_f append_line(buf, &len, "[RootSignature(RS)]"); } append_line(buf, &len, "float4 PSMain(PSInput input) : SV_TARGET {"); - if (cc_features.used_textures[0]) { + if (ccf.used_textures[0]) { if (three_point_filtering) { append_line(buf, &len, " float4 texVal0;"); append_line(buf, &len, " if (textures[0].linear_filtering)"); @@ -261,48 +263,67 @@ void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_f append_line(buf, &len, " float4 texVal0 = g_texture0.Sample(g_sampler0, input.uv);"); } } - if (cc_features.used_textures[1]) { - if (three_point_filtering) { - append_line(buf, &len, " float4 texVal1;"); - append_line(buf, &len, " if (textures[1].linear_filtering)"); - append_line(buf, &len, " texVal1 = tex2D3PointFilter(g_texture1, g_sampler1, input.uv, float2(textures[1].width, textures[1].height));"); - append_line(buf, &len, " else"); - append_line(buf, &len, " texVal1 = g_texture1.Sample(g_sampler1, input.uv);"); + if (ccf.used_textures[1]) { + if (cc.cm.light_map) { + if (three_point_filtering) { + append_line(buf, &len, " float4 texVal1;"); + append_line(buf, &len, " if (textures[1].linear_filtering)"); + append_line(buf, &len, " texVal1 = tex2D3PointFilter(g_texture1, g_sampler1, input.lightmap, float2(textures[1].width, textures[1].height));"); + append_line(buf, &len, " else"); + append_line(buf, &len, " texVal1 = g_texture1.Sample(g_sampler1, input.lightmap);"); + } else { + append_line(buf, &len, " float4 texVal1 = g_texture1.Sample(g_sampler1, input.lightmap);"); + } } else { - append_line(buf, &len, " float4 texVal1 = g_texture1.Sample(g_sampler1, input.uv);"); + if (three_point_filtering) { + append_line(buf, &len, " float4 texVal1;"); + append_line(buf, &len, " if (textures[1].linear_filtering)"); + append_line(buf, &len, " texVal1 = tex2D3PointFilter(g_texture1, g_sampler1, input.uv, float2(textures[1].width, textures[1].height));"); + append_line(buf, &len, " else"); + append_line(buf, &len, " texVal1 = g_texture1.Sample(g_sampler1, input.uv);"); + } else { + append_line(buf, &len, " float4 texVal1 = g_texture1.Sample(g_sampler1, input.uv);"); + } } } - append_str(buf, &len, cc_features.opt_alpha ? " float4 texel = " : " float3 texel = "); - if (!cc_features.color_alpha_same && cc_features.opt_alpha) { - append_str(buf, &len, "float4("); - append_formula(buf, &len, cc_features.c, cc_features.do_single[0], cc_features.do_multiply[0], cc_features.do_mix[0], false, false, true); - append_str(buf, &len, ", "); - append_formula(buf, &len, cc_features.c, cc_features.do_single[1], cc_features.do_multiply[1], cc_features.do_mix[1], true, true, true); - append_str(buf, &len, ")"); - } else { - append_formula(buf, &len, cc_features.c, cc_features.do_single[0], cc_features.do_multiply[0], cc_features.do_mix[0], cc_features.opt_alpha, false, cc_features.opt_alpha); - } - append_line(buf, &len, ";"); + append_str(buf, &len, cc.cm.use_alpha ? " float4 texel = " : " float3 texel = "); + for (int i = 0; i < (cc.cm.use_2cycle + 1); i++) { + uint8_t* cmd = &cc.shader_commands[i * 8]; + if (!ccf.color_alpha_same[i] && cc.cm.use_alpha) { + append_str(buf, &len, "float4("); + append_formula(buf, &len, cmd, ccf.do_single[i*2+0], ccf.do_multiply[i*2+0], ccf.do_mix[i*2+0], false, false, true); + append_str(buf, &len, ", "); + append_formula(buf, &len, cmd, ccf.do_single[i*2+1], ccf.do_multiply[i*2+1], ccf.do_mix[i*2+1], true, true, true); + append_str(buf, &len, ")"); + } else { + append_formula(buf, &len, cmd, ccf.do_single[i*2+0], ccf.do_multiply[i*2+0], ccf.do_mix[i*2+0], cc.cm.use_alpha, false, cc.cm.use_alpha); + } + append_line(buf, &len, ";"); - if (cc_features.opt_texture_edge && cc_features.opt_alpha) { + if (i == 0 && cc.cm.use_2cycle) { + append_str(buf, &len, "texel = "); + } + } + + if (cc.cm.texture_edge && cc.cm.use_alpha) { append_line(buf, &len, " if (texel.a > 0.3) texel.a = 1.0; else discard;"); } // TODO discard if alpha is 0? - if (cc_features.opt_fog) { - if (cc_features.opt_alpha) { + if (cc.cm.use_fog) { + if (cc.cm.use_alpha) { append_line(buf, &len, " texel = float4(lerp(texel.rgb, input.fog.rgb, input.fog.a), texel.a);"); } else { append_line(buf, &len, " texel = lerp(texel, input.fog.rgb, input.fog.a);"); } } - if (cc_features.opt_alpha && cc_features.opt_noise) { + if (cc.cm.use_alpha && cc.cm.use_noise) { append_line(buf, &len, " float2 coords = (input.screenPos.xy / input.screenPos.w) * noise_scale;"); append_line(buf, &len, " texel.a *= round(random(float3(floor(coords), noise_frame)));"); } - if (cc_features.opt_alpha) { + if (cc.cm.use_alpha) { append_line(buf, &len, " return texel;"); } else { append_line(buf, &len, " return float4(texel, 1.0);"); diff --git a/src/pc/gfx/gfx_direct3d_common.h b/src/pc/gfx/gfx_direct3d_common.h index 1eb316d7..7c35f1c8 100644 --- a/src/pc/gfx/gfx_direct3d_common.h +++ b/src/pc/gfx/gfx_direct3d_common.h @@ -7,7 +7,7 @@ #include "gfx_cc.h" -void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_floats, const CCFeatures& cc_features, bool include_root_signature, bool three_point_filtering); +void gfx_direct3d_common_build_shader(char buf[4096], size_t& len, size_t& num_floats, struct ColorCombiner& cc, const CCFeatures& cc_features, bool include_root_signature, bool three_point_filtering); #endif diff --git a/src/pc/gfx/gfx_dxgi.cpp b/src/pc/gfx/gfx_dxgi.cpp index c62b9e62..292394a2 100644 --- a/src/pc/gfx/gfx_dxgi.cpp +++ b/src/pc/gfx/gfx_dxgi.cpp @@ -212,13 +212,18 @@ static void update_screen_settings(void) { if (configWindow.fullscreen != dxgi.is_full_screen) toggle_borderless_window_full_screen(configWindow.fullscreen); if (!dxgi.is_full_screen) { + /* + // this code is buggy, and I just simply don't care enough about direct x to fix it + // when this is enabled, the window will be placed in the wrong spot... often off screen const int screen_width = GetSystemMetrics(SM_CXSCREEN); const int screen_height = GetSystemMetrics(SM_CYSCREEN); + const int xpos = (configWindow.x == WAPI_WIN_CENTERPOS) ? (screen_width - configWindow.w) * 0.5 : configWindow.x; const int ypos = (configWindow.y == WAPI_WIN_CENTERPOS) ? (screen_height - configWindow.h) * 0.5 : configWindow.y; RECT wr = { xpos, ypos, xpos + (int)configWindow.w, ypos + (int)configWindow.h }; AdjustWindowRect(&wr, WS_OVERLAPPEDWINDOW, FALSE); SetWindowPos(dxgi.h_wnd, NULL, wr.left, wr.top, wr.right - wr.left, wr.bottom - wr.top, SWP_NOACTIVATE | SWP_NOZORDER); + */ } } diff --git a/src/pc/gfx/gfx_opengl.c b/src/pc/gfx/gfx_opengl.c index 7ae82942..fb168d36 100644 --- a/src/pc/gfx/gfx_opengl.c +++ b/src/pc/gfx/gfx_opengl.c @@ -43,7 +43,7 @@ #define TEX_CACHE_STEP 512 struct ShaderProgram { - uint32_t shader_id; + uint64_t hash; GLuint opengl_program_id; uint8_t num_inputs; bool used_textures[2]; @@ -61,8 +61,9 @@ struct GLTexture { bool filter; }; -static struct ShaderProgram shader_program_pool[64]; -static uint8_t shader_program_pool_size; +static struct ShaderProgram shader_program_pool[CC_MAX_SHADERS]; +static uint8_t shader_program_pool_size = 0; +static uint8_t shader_program_pool_index = 0; static GLuint opengl_vbo; static int tex_cache_size = 0; @@ -136,6 +137,8 @@ static const char *shader_item_to_str(uint32_t item, bool with_alpha, bool only_ switch (item) { case SHADER_0: return with_alpha ? "vec4(0.0, 0.0, 0.0, 0.0)" : "vec3(0.0, 0.0, 0.0)"; + case SHADER_1: + return with_alpha ? "vec4(1.0, 1.0, 1.0, 1.0)" : "vec3(1.0, 1.0, 1.0)"; case SHADER_INPUT_1: return with_alpha || !inputs_have_alpha ? "vInput1" : "vInput1.rgb"; case SHADER_INPUT_2: @@ -144,6 +147,14 @@ static const char *shader_item_to_str(uint32_t item, bool with_alpha, bool only_ return with_alpha || !inputs_have_alpha ? "vInput3" : "vInput3.rgb"; case SHADER_INPUT_4: return with_alpha || !inputs_have_alpha ? "vInput4" : "vInput4.rgb"; + case SHADER_INPUT_5: + return with_alpha || !inputs_have_alpha ? "vInput5" : "vInput5.rgb"; + case SHADER_INPUT_6: + return with_alpha || !inputs_have_alpha ? "vInput6" : "vInput6.rgb"; + case SHADER_INPUT_7: + return with_alpha || !inputs_have_alpha ? "vInput7" : "vInput7.rgb"; + case SHADER_INPUT_8: + return with_alpha || !inputs_have_alpha ? "vInput8" : "vInput8.rgb"; case SHADER_TEXEL0: return with_alpha ? "texVal0" : "texVal0.rgb"; case SHADER_TEXEL0A: @@ -151,11 +162,21 @@ static const char *shader_item_to_str(uint32_t item, bool with_alpha, bool only_ (with_alpha ? "vec4(texelVal0.a, texelVal0.a, texelVal0.a, texelVal0.a)" : "vec3(texelVal0.a, texelVal0.a, texelVal0.a)"); case SHADER_TEXEL1: return with_alpha ? "texVal1" : "texVal1.rgb"; + case SHADER_TEXEL1A: + return hint_single_element ? "texVal1.a" : + (with_alpha ? "vec4(texelVal1.a, texelVal1.a, texelVal1.a, texelVal1.a)" : "vec3(texelVal1.a, texelVal1.a, texelVal1.a)"); + case SHADER_COMBINED: + return with_alpha ? "texel" : "texel.rgb"; + case SHADER_COMBINEDA: + return hint_single_element ? "texel.a" : + (with_alpha ? "vec4(texel.a, texel.a, texel.a, texel.a)" : "vec3(texel.a, texel.a, texel.a)"); } } else { switch (item) { case SHADER_0: return "0.0"; + case SHADER_1: + return "1.0"; case SHADER_INPUT_1: return "vInput1.a"; case SHADER_INPUT_2: @@ -164,81 +185,74 @@ static const char *shader_item_to_str(uint32_t item, bool with_alpha, bool only_ return "vInput3.a"; case SHADER_INPUT_4: return "vInput4.a"; + case SHADER_INPUT_5: + return "vInput5.a"; + case SHADER_INPUT_6: + return "vInput6.a"; + case SHADER_INPUT_7: + return "vInput7.a"; + case SHADER_INPUT_8: + return "vInput8.a"; case SHADER_TEXEL0: return "texVal0.a"; case SHADER_TEXEL0A: return "texVal0.a"; case SHADER_TEXEL1: return "texVal1.a"; + case SHADER_TEXEL1A: + return "texVal1.a"; + case SHADER_COMBINED: + return "texel.a"; + case SHADER_COMBINEDA: + return "texel.a"; } } return "unknown"; } -static void append_formula(char *buf, size_t *len, uint8_t c[2][4], bool do_single, bool do_multiply, bool do_mix, bool with_alpha, bool only_alpha, bool opt_alpha) { +static void append_formula(char *buf, size_t *len, uint8_t* cmd, bool do_single, bool do_multiply, bool do_mix, bool with_alpha, bool only_alpha, bool opt_alpha) { if (do_single) { - append_str(buf, len, shader_item_to_str(c[only_alpha][3], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 3], with_alpha, only_alpha, opt_alpha, false)); } else if (do_multiply) { - append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 0], with_alpha, only_alpha, opt_alpha, false)); append_str(buf, len, " * "); - append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true)); + append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 2], with_alpha, only_alpha, opt_alpha, true)); } else if (do_mix) { append_str(buf, len, "mix("); - append_str(buf, len, shader_item_to_str(c[only_alpha][1], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 1], with_alpha, only_alpha, opt_alpha, false)); append_str(buf, len, ", "); - append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 0], with_alpha, only_alpha, opt_alpha, false)); append_str(buf, len, ", "); - append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true)); + append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 2], with_alpha, only_alpha, opt_alpha, true)); append_str(buf, len, ")"); } else { append_str(buf, len, "("); - append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 0], with_alpha, only_alpha, opt_alpha, false)); append_str(buf, len, " - "); - append_str(buf, len, shader_item_to_str(c[only_alpha][1], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 1], with_alpha, only_alpha, opt_alpha, false)); append_str(buf, len, ") * "); - append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true)); + append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 2], with_alpha, only_alpha, opt_alpha, true)); append_str(buf, len, " + "); - append_str(buf, len, shader_item_to_str(c[only_alpha][3], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, shader_item_to_str(cmd[only_alpha * 4 + 3], with_alpha, only_alpha, opt_alpha, false)); } } -static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shader_id) { - uint8_t c[2][4]; - for (int i = 0; i < 4; i++) { - c[0][i] = (shader_id >> (i * 3)) & 7; - c[1][i] = (shader_id >> (12 + i * 3)) & 7; - } - bool opt_alpha = (shader_id & SHADER_OPT_ALPHA) != 0; - bool opt_fog = (shader_id & SHADER_OPT_FOG) != 0; - bool opt_texture_edge = (shader_id & SHADER_OPT_TEXTURE_EDGE) != 0; +static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(struct ColorCombiner* cc) { + struct CCFeatures ccf = { 0 }; + gfx_cc_get_features(cc, &ccf); + + bool opt_alpha = cc->cm.use_alpha; + bool opt_fog = cc->cm.use_fog; + bool opt_texture_edge = cc->cm.texture_edge; + bool opt_2cycle = cc->cm.use_2cycle; + bool opt_light_map = cc->cm.light_map; + #ifdef USE_GLES bool opt_noise = false; #else - bool opt_noise = (shader_id & SHADER_OPT_NOISE) != 0; + bool opt_noise = cc->cm.use_noise; #endif - bool used_textures[2] = { 0, 0 }; - int num_inputs = 0; - for (int i = 0; i < 2; i++) { - for (int j = 0; j < 4; j++) { - if (c[i][j] >= SHADER_INPUT_1 && c[i][j] <= SHADER_INPUT_4) { - if (c[i][j] > num_inputs) { - num_inputs = c[i][j]; - } - } - if (c[i][j] == SHADER_TEXEL0 || c[i][j] == SHADER_TEXEL0A) { - used_textures[0] = true; - } - if (c[i][j] == SHADER_TEXEL1) { - used_textures[1] = true; - } - } - } - bool do_single[2] = { c[0][2] == 0, c[1][2] == 0 }; - bool do_multiply[2] = { c[0][1] == 0 && c[0][3] == 0, c[1][1] == 0 && c[1][3] == 0 }; - bool do_mix[2] = { c[0][1] == c[0][3], c[1][1] == c[1][3] }; - bool color_alpha_same = (shader_id & 0xfff) == ((shader_id >> 12) & 0xfff); - char vs_buf[1024]; char fs_buf[2048]; size_t vs_len = 0; @@ -252,7 +266,7 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad append_line(vs_buf, &vs_len, "#version 120"); #endif append_line(vs_buf, &vs_len, "attribute vec4 aVtxPos;"); - if (used_textures[0] || used_textures[1]) { + if (ccf.used_textures[0] || ccf.used_textures[1]) { append_line(vs_buf, &vs_len, "attribute vec2 aTexCoord;"); append_line(vs_buf, &vs_len, "varying vec2 vTexCoord;"); num_floats += 2; @@ -262,19 +276,27 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad append_line(vs_buf, &vs_len, "varying vec4 vFog;"); num_floats += 4; } - for (int i = 0; i < num_inputs; i++) { + if (opt_light_map) { + append_line(vs_buf, &vs_len, "attribute vec2 aLightMap;"); + append_line(vs_buf, &vs_len, "varying vec2 vLightMap;"); + num_floats += 2; + } + for (int i = 0; i < ccf.num_inputs; i++) { vs_len += sprintf(vs_buf + vs_len, "attribute vec%d aInput%d;\n", opt_alpha ? 4 : 3, i + 1); vs_len += sprintf(vs_buf + vs_len, "varying vec%d vInput%d;\n", opt_alpha ? 4 : 3, i + 1); num_floats += opt_alpha ? 4 : 3; } append_line(vs_buf, &vs_len, "void main() {"); - if (used_textures[0] || used_textures[1]) { + if (ccf.used_textures[0] || ccf.used_textures[1]) { append_line(vs_buf, &vs_len, "vTexCoord = aTexCoord;"); } if (opt_fog) { append_line(vs_buf, &vs_len, "vFog = aFog;"); } - for (int i = 0; i < num_inputs; i++) { + if (opt_light_map) { + append_line(vs_buf, &vs_len, "vLightMap = aLightMap;"); + } + for (int i = 0; i < ccf.num_inputs; i++) { vs_len += sprintf(vs_buf + vs_len, "vInput%d = aInput%d;\n", i + 1, i + 1); } append_line(vs_buf, &vs_len, "gl_Position = aVtxPos;"); @@ -288,21 +310,24 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad append_line(fs_buf, &fs_len, "#version 120"); #endif - if (used_textures[0] || used_textures[1]) { + if (ccf.used_textures[0] || ccf.used_textures[1]) { append_line(fs_buf, &fs_len, "varying vec2 vTexCoord;"); } if (opt_fog) { append_line(fs_buf, &fs_len, "varying vec4 vFog;"); } - for (int i = 0; i < num_inputs; i++) { + if (opt_light_map) { + append_line(fs_buf, &fs_len, "varying vec2 vLightMap;"); + } + for (int i = 0; i < ccf.num_inputs; i++) { fs_len += sprintf(fs_buf + fs_len, "varying vec%d vInput%d;\n", opt_alpha ? 4 : 3, i + 1); } - if (used_textures[0]) { + if (ccf.used_textures[0]) { append_line(fs_buf, &fs_len, "uniform sampler2D uTex0;"); append_line(fs_buf, &fs_len, "uniform vec2 uTex0Size;"); append_line(fs_buf, &fs_len, "uniform bool uTex0Filter;"); } - if (used_textures[1]) { + if (ccf.used_textures[1]) { append_line(fs_buf, &fs_len, "uniform sampler2D uTex1;"); append_line(fs_buf, &fs_len, "uniform vec2 uTex1Size;"); append_line(fs_buf, &fs_len, "uniform bool uTex1Filter;"); @@ -312,7 +337,7 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad // Original author: ArthurCarvalho // Slightly modified GLSL implementation by twinaphex, mupen64plus-libretro project. - if (used_textures[0] || used_textures[1]) { + if (ccf.used_textures[0] || ccf.used_textures[1]) { if (configFiltering == 2) { append_line(fs_buf, &fs_len, "#define TEX_OFFSET(off) texture2D(tex, texCoord - (off)/texSize)"); append_line(fs_buf, &fs_len, "vec4 filter3point(in sampler2D tex, in vec2 texCoord, in vec2 texSize) {"); @@ -347,29 +372,42 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad append_line(fs_buf, &fs_len, "void main() {"); - if (used_textures[0]) { + if (ccf.used_textures[0]) { append_line(fs_buf, &fs_len, "vec4 texVal0 = sampleTex(uTex0, vTexCoord, uTex0Size, uTex0Filter);"); } - if (used_textures[1]) { - append_line(fs_buf, &fs_len, "vec4 texVal1 = sampleTex(uTex1, vTexCoord, uTex1Size, uTex1Filter);"); + if (ccf.used_textures[1]) { + if (cc->cm.light_map) { + append_line(fs_buf, &fs_len, "vec4 texVal1 = sampleTex(uTex1, vLightMap, uTex1Size, uTex1Filter);"); + } else { + append_line(fs_buf, &fs_len, "vec4 texVal1 = sampleTex(uTex1, vTexCoord, uTex1Size, uTex1Filter);"); + } } - append_str(fs_buf, &fs_len, opt_alpha ? "vec4 texel = " : "vec3 texel = "); - if (!color_alpha_same && opt_alpha) { - append_str(fs_buf, &fs_len, "vec4("); - append_formula(fs_buf, &fs_len, c, do_single[0], do_multiply[0], do_mix[0], false, false, true); - append_str(fs_buf, &fs_len, ", "); - append_formula(fs_buf, &fs_len, c, do_single[1], do_multiply[1], do_mix[1], true, true, true); - append_str(fs_buf, &fs_len, ")"); - } else { - append_formula(fs_buf, &fs_len, c, do_single[0], do_multiply[0], do_mix[0], opt_alpha, false, opt_alpha); + append_str(fs_buf, &fs_len, (opt_alpha) ? "vec4 texel = " : "vec3 texel = "); + for (int i = 0; i < (opt_2cycle + 1); i++) { + u8* cmd = &cc->shader_commands[i * 8]; + if (!ccf.color_alpha_same[i] && opt_alpha) { + append_str(fs_buf, &fs_len, "vec4("); + append_formula(fs_buf, &fs_len, cmd, ccf.do_single[i*2+0], ccf.do_multiply[i*2+0], ccf.do_mix[i*2+0], false, false, true); + append_str(fs_buf, &fs_len, ", "); + append_formula(fs_buf, &fs_len, cmd, ccf.do_single[i*2+1], ccf.do_multiply[i*2+1], ccf.do_mix[i*2+1], true, true, true); + append_str(fs_buf, &fs_len, ")"); + } else { + append_formula(fs_buf, &fs_len, cmd, ccf.do_single[i*2+0], ccf.do_multiply[i*2+0], ccf.do_mix[i*2+0], opt_alpha, false, opt_alpha); + } + append_line(fs_buf, &fs_len, ";"); + + if (i == 0 && opt_2cycle) { + append_str(fs_buf, &fs_len, "texel = "); + } } - append_line(fs_buf, &fs_len, ";"); if (opt_texture_edge && opt_alpha) { append_line(fs_buf, &fs_len, "if (texel.a > 0.3) texel.a = 1.0; else discard;"); } + // TODO discard if alpha is 0? + if (opt_fog) { if (opt_alpha) { append_line(fs_buf, &fs_len, "texel = vec4(mix(texel.rgb, vFog.rgb, vFog.a), texel.a);"); @@ -436,12 +474,15 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad size_t cnt = 0; - struct ShaderProgram *prg = &shader_program_pool[shader_program_pool_size++]; + struct ShaderProgram *prg = &shader_program_pool[shader_program_pool_index]; + shader_program_pool_index = (shader_program_pool_index + 1) % CC_MAX_SHADERS; + if (shader_program_pool_size < CC_MAX_SHADERS) { shader_program_pool_size++; } + prg->attrib_locations[cnt] = glGetAttribLocation(shader_program, "aVtxPos"); prg->attrib_sizes[cnt] = 4; ++cnt; - if (used_textures[0] || used_textures[1]) { + if (ccf.used_textures[0] || ccf.used_textures[1]) { prg->attrib_locations[cnt] = glGetAttribLocation(shader_program, "aTexCoord"); prg->attrib_sizes[cnt] = 2; ++cnt; @@ -453,7 +494,13 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad ++cnt; } - for (int i = 0; i < num_inputs; i++) { + if (opt_light_map) { + prg->attrib_locations[cnt] = glGetAttribLocation(shader_program, "aLightMap"); + prg->attrib_sizes[cnt] = 2; + ++cnt; + } + + for (int i = 0; i < ccf.num_inputs; i++) { char name[16]; sprintf(name, "aInput%d", i + 1); prg->attrib_locations[cnt] = glGetAttribLocation(shader_program, name); @@ -461,23 +508,23 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad ++cnt; } - prg->shader_id = shader_id; + prg->hash = cc->hash; prg->opengl_program_id = shader_program; - prg->num_inputs = num_inputs; - prg->used_textures[0] = used_textures[0]; - prg->used_textures[1] = used_textures[1]; + prg->num_inputs = ccf.num_inputs; + prg->used_textures[0] = ccf.used_textures[0]; + prg->used_textures[1] = ccf.used_textures[1]; prg->num_floats = num_floats; prg->num_attribs = cnt; gfx_opengl_load_shader(prg); - if (used_textures[0]) { + if (ccf.used_textures[0]) { GLint sampler_location = glGetUniformLocation(shader_program, "uTex0"); prg->uniform_locations[0] = glGetUniformLocation(shader_program, "uTex0Size"); prg->uniform_locations[1] = glGetUniformLocation(shader_program, "uTex0Filter"); glUniform1i(sampler_location, 0); } - if (used_textures[1]) { + if (ccf.used_textures[1]) { GLint sampler_location = glGetUniformLocation(shader_program, "uTex1"); prg->uniform_locations[2] = glGetUniformLocation(shader_program, "uTex1Size"); prg->uniform_locations[3] = glGetUniformLocation(shader_program, "uTex1Filter"); @@ -494,9 +541,9 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad return prg; } -static struct ShaderProgram *gfx_opengl_lookup_shader(uint32_t shader_id) { +static struct ShaderProgram *gfx_opengl_lookup_shader(struct ColorCombiner* cc) { for (size_t i = 0; i < shader_program_pool_size; i++) { - if (shader_program_pool[i].shader_id == shader_id) { + if (shader_program_pool[i].hash == cc->hash) { return &shader_program_pool[i]; } } diff --git a/src/pc/gfx/gfx_opengl_legacy.c b/src/pc/gfx/gfx_opengl_legacy.c index 2e3f1216..b3153af7 100644 --- a/src/pc/gfx/gfx_opengl_legacy.c +++ b/src/pc/gfx/gfx_opengl_legacy.c @@ -49,8 +49,9 @@ enum MixType { struct ShaderProgram { bool enabled; - uint32_t shader_id; - struct CCFeatures cc; + uint64_t hash; + struct ColorCombiner cc; + struct CCFeatures ccf; enum MixType mix; bool texture_used[2]; int texture_ord[2]; @@ -140,25 +141,12 @@ static inline GLenum texenv_set_texture(UNUSED struct ShaderProgram *prg) { } static inline GLenum texenv_set_texture_color(struct ShaderProgram *prg) { - GLenum mode; - // HACK: lord forgive me for this, but this is easier - - switch (prg->shader_id) { - case 0x0000038D: // mario's eyes - case 0x01045A00: // peach letter - case 0x01200A00: // intro copyright fade in - mode = GL_DECAL; - break; - case 0x00000551: // goddard - mode = GL_BLEND; - break; - default: - mode = GL_MODULATE; - break; + if (prg->cc.cm.rgb1 == color_comb_rgb(G_CCMUX_TEXEL0, G_CCMUX_SHADE, G_CCMUX_TEXEL0_ALPHA, G_CCMUX_SHADE, 0)) { + return GL_DECAL; + } else { + return GL_MODULATE; } - - return mode; } static inline GLenum texenv_set_texture_texture(UNUSED struct ShaderProgram *prg) { @@ -183,7 +171,7 @@ static void gfx_opengl_apply_shader(struct ShaderProgram *prg) { glDisable(GL_TEXTURE_2D); } - if (prg->shader_id & SHADER_OPT_FOG) { + if (prg->cc.cm.use_fog) { // blend it on top of normal tris later cur_fog_ofs = ofs; ofs += 4; @@ -195,10 +183,10 @@ static void gfx_opengl_apply_shader(struct ShaderProgram *prg) { // HACK: if there's a texture and two colors, one of them is likely for speculars or some shit (see mario head) // if there's two colors but no texture, the real color is likely the second one // HACKHACK: alpha is 0 in the transition shader (0x01A00045), maybe figure out the flags instead - const int vlen = (prg->cc.opt_alpha && prg->shader_id != 0x01A00045) ? 4 : 3; + const int vlen = (prg->cc.cm.use_alpha /*&& prg->shader_id != 0x01A00045*/) ? 4 : 3; const int hack = vlen * (prg->num_inputs > 1); - if (prg->texture_used[1] && prg->cc.do_mix[0]) { + if (prg->texture_used[1] && prg->ccf.do_mix[0]) { // HACK: when two textures are mixed by vertex color, store the color // it will be used later when rendering two texture passes c_mix[0] = *(ofs + hack + 0); @@ -224,7 +212,7 @@ static void gfx_opengl_apply_shader(struct ShaderProgram *prg) { // we only need to do this once prg->enabled = true; - if (prg->shader_id & SHADER_OPT_TEXTURE_EDGE) { + if (prg->cc.cm.texture_edge) { // (horrible) alpha discard glEnable(GL_ALPHA_TEST); glAlphaFunc(GL_GREATER, 0.666f); @@ -258,14 +246,15 @@ static void gfx_opengl_load_shader(struct ShaderProgram *new_prg) { cur_shader->enabled = false; } -static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shader_id) { - struct CCFeatures ccf; - gfx_cc_get_features(shader_id, &ccf); - +static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(struct ColorCombiner* cc) { struct ShaderProgram *prg = &shader_program_pool[shader_program_pool_size++]; - prg->shader_id = shader_id; - prg->cc = ccf; + struct CCFeatures ccf = { 0 }; + gfx_cc_get_features(cc, &ccf); + + prg->hash = cc->hash; + prg->cc = *cc; + prg->ccf = ccf; prg->num_inputs = ccf.num_inputs; prg->texture_used[0] = ccf.used_textures[0]; prg->texture_used[1] = ccf.used_textures[1]; @@ -296,9 +285,9 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad return prg; } -static struct ShaderProgram *gfx_opengl_lookup_shader(uint32_t shader_id) { +static struct ShaderProgram *gfx_opengl_lookup_shader(struct ColorCombiner* cc) { for (size_t i = 0; i < shader_program_pool_size; i++) - if (shader_program_pool[i].shader_id == shader_id) + if (shader_program_pool[i].hash == cc->hash) return &shader_program_pool[i]; return NULL; } diff --git a/src/pc/gfx/gfx_pc.c b/src/pc/gfx/gfx_pc.c index 84fad246..da60828a 100644 --- a/src/pc/gfx/gfx_pc.c +++ b/src/pc/gfx/gfx_pc.c @@ -94,14 +94,9 @@ static struct { uint32_t pool_pos; } gfx_texture_cache; -struct ColorCombiner { - uint32_t cc_id; - struct ShaderProgram *prg; - uint8_t shader_input_mapping[2][4]; -}; - -static struct ColorCombiner color_combiner_pool[64]; -static uint8_t color_combiner_pool_size; +static struct ColorCombiner color_combiner_pool[CC_MAX_SHADERS] = { 0 }; +static uint8_t color_combiner_pool_size = 0; +static uint8_t color_combiner_pool_index = 0; static struct RSP { float modelview_matrix_stack[11][4][4]; @@ -148,7 +143,7 @@ static struct RDP { bool textures_changed[2]; uint32_t other_mode_l, other_mode_h; - uint32_t combine_mode; + struct CombineMode combine_mode; struct RGBA env_color, prim_color, fog_color, fill_color; struct XYWidthHeight viewport, scissor; @@ -234,77 +229,132 @@ static void gfx_flush(void) { } } -static struct ShaderProgram *gfx_lookup_or_create_shader_program(uint32_t shader_id) { - struct ShaderProgram *prg = gfx_rapi->lookup_shader(shader_id); +static void combine_mode_update_hash(struct CombineMode* cm) { + uint64_t hash = 5381; + + cm->hash = 0; + + hash = (hash << 5) + hash + ((u64)cm->rgb1 << 32); + if (cm->use_alpha) { + hash = (hash << 5) + hash + ((u64)cm->alpha1); + } + + if (cm->use_2cycle) { + hash = (hash << 5) + hash + ((u64)cm->rgb2 << 32); + if (cm->use_alpha) { + hash = (hash << 5) + hash + ((u64)cm->alpha2); + } + } + + hash = (hash << 5) + hash + cm->flags; + + cm->hash = hash; +} + +static void color_combiner_update_hash(struct ColorCombiner* cc) { + uint64_t hash = cc->cm.hash; + + for (int i = 0; i < 8; i++) { + hash = (hash << 5) + hash + cc->shader_input_mapping_as_u64[i]; + hash = (hash << 5) + hash + cc->shader_commands_as_u64[i]; + } + + cc->hash = hash; +} + +static struct ShaderProgram *gfx_lookup_or_create_shader_program(struct ColorCombiner* cc) { + struct ShaderProgram *prg = gfx_rapi->lookup_shader(cc); if (prg == NULL) { gfx_rapi->unload_shader(rendering_state.shader_program); - prg = gfx_rapi->create_and_load_new_shader(shader_id); + prg = gfx_rapi->create_and_load_new_shader(cc); rendering_state.shader_program = prg; } return prg; } -static void gfx_generate_cc(struct ColorCombiner *comb, uint32_t cc_id) { - uint8_t c[2][4]; - uint32_t shader_id = (cc_id >> 24) << 24; - uint8_t shader_input_mapping[2][4] = {{0}}; - for (int32_t i = 0; i < 4; i++) { - c[0][i] = (cc_id >> (i * 3)) & 7; - c[1][i] = (cc_id >> (12 + i * 3)) & 7; - } - for (int32_t i = 0; i < 2; i++) { - if (c[i][0] == c[i][1] || c[i][2] == CC_0) { - c[i][0] = c[i][1] = c[i][2] = 0; - } - uint8_t input_number[8] = {0}; - int next_input_number = SHADER_INPUT_1; - for (int j = 0; j < 4; j++) { - int val = 0; - switch (c[i][j]) { - case CC_0: - break; - case CC_TEXEL0: - val = SHADER_TEXEL0; - break; - case CC_TEXEL1: - val = SHADER_TEXEL1; - break; - case CC_TEXEL0A: - val = SHADER_TEXEL0A; - break; - case CC_PRIM: - case CC_SHADE: - case CC_ENV: - case CC_LOD: - if (input_number[c[i][j]] == 0) { - shader_input_mapping[i][next_input_number - 1] = c[i][j]; - input_number[c[i][j]] = next_input_number++; - } - val = input_number[c[i][j]]; - break; - } - shader_id |= val << (i * 12 + j * 3); +static void gfx_generate_cc(struct ColorCombiner *cc) { + u8 next_input_number = 0; + u8 input_number[CC_ENUM_MAX] = { 0 }; + + for (int i = 0; i < SHADER_CMD_LENGTH; i++) { + u8 cm_cmd = cc->cm.all_values[i]; + u8 shader_cmd = 0; + switch (cm_cmd) { + case CC_0: + shader_cmd = SHADER_0; + break; + case CC_1: + shader_cmd = SHADER_1; + break; + case CC_TEXEL0: + shader_cmd = SHADER_TEXEL0; + break; + case CC_TEXEL1: + shader_cmd = SHADER_TEXEL1; + break; + case CC_TEXEL0A: + shader_cmd = SHADER_TEXEL0A; + break; + case CC_TEXEL1A: + shader_cmd = SHADER_TEXEL1A; + break; + case CC_COMBINED: + shader_cmd = cc->cm.use_2cycle ? SHADER_COMBINED : SHADER_0; + break; + case CC_COMBINEDA: + shader_cmd = cc->cm.use_2cycle ? SHADER_COMBINEDA : SHADER_0; + break; + case CC_PRIM: + case CC_PRIMA: + case CC_SHADE: + case CC_SHADEA: + case CC_ENV: + case CC_ENVA: + case CC_LOD: + if (input_number[cm_cmd] == 0) { + cc->shader_input_mapping[next_input_number] = cm_cmd; + input_number[cm_cmd] = SHADER_INPUT_1 + next_input_number; + next_input_number++; + } + shader_cmd = input_number[cm_cmd]; + break; + default: + shader_cmd = SHADER_0; + break; } + cc->shader_commands[i] = shader_cmd; } - comb->cc_id = cc_id; - comb->prg = gfx_lookup_or_create_shader_program(shader_id); - memcpy(comb->shader_input_mapping, shader_input_mapping, sizeof(shader_input_mapping)); + + color_combiner_update_hash(cc); + cc->prg = gfx_lookup_or_create_shader_program(cc); + gfx_cc_print(cc); } -static struct ColorCombiner *gfx_lookup_or_create_color_combiner(uint32_t cc_id) { +static struct ColorCombiner *gfx_lookup_or_create_color_combiner(struct CombineMode* cm) { + combine_mode_update_hash(cm); + static struct ColorCombiner *prev_combiner; - if (prev_combiner != NULL && prev_combiner->cc_id == cc_id) { + if (prev_combiner != NULL && prev_combiner->cm.hash == cm->hash) { return prev_combiner; } for (size_t i = 0; i < color_combiner_pool_size; i++) { - if (color_combiner_pool[i].cc_id == cc_id) { + if (color_combiner_pool[i].cm.hash == cm->hash) { return prev_combiner = &color_combiner_pool[i]; } } + gfx_flush(); - struct ColorCombiner *comb = &color_combiner_pool[color_combiner_pool_size++]; - gfx_generate_cc(comb, cc_id); + + struct ColorCombiner *comb = &color_combiner_pool[color_combiner_pool_index]; + color_combiner_pool_index = (color_combiner_pool_index + 1) % CC_MAX_SHADERS; + if (color_combiner_pool_size < CC_MAX_SHADERS) { color_combiner_pool_size++; } + + memcpy(&comb->cm, cm, sizeof(struct CombineMode)); + gfx_generate_cc(comb); + + printf(">> added %016lx\n", comb->cm.hash); + return prev_combiner = comb; } @@ -979,27 +1029,22 @@ static void OPTIMIZE_O3 gfx_sp_tri1(uint8_t vtx1_idx, uint8_t vtx2_idx, uint8_t rdp.viewport_or_scissor_changed = false; } - uint32_t cc_id = rdp.combine_mode; + struct CombineMode* cm = &rdp.combine_mode; - bool use_alpha = (rdp.other_mode_l & (G_BL_A_MEM << 18)) == 0; - bool use_fog = (rdp.other_mode_l >> 30) == G_BL_CLR_FOG; - bool texture_edge = (rdp.other_mode_l & CVG_X_ALPHA) == CVG_X_ALPHA; - bool use_noise = (rdp.other_mode_l & G_AC_DITHER) == G_AC_DITHER; + cm->use_alpha = (rdp.other_mode_l & (G_BL_A_MEM << 18)) == 0; + cm->texture_edge = (rdp.other_mode_l & CVG_X_ALPHA) == CVG_X_ALPHA; + cm->use_noise = (rdp.other_mode_l & G_AC_DITHER) == G_AC_DITHER; + cm->use_2cycle = (rdp.other_mode_h & (3U << G_MDSFT_CYCLETYPE)) == G_CYC_2CYCLE; + cm->use_fog = (rdp.other_mode_l >> 30) == G_BL_CLR_FOG; + cm->light_map = (rsp.geometry_mode & G_LIGHT_MAP_EXT) == G_LIGHT_MAP_EXT; - if (texture_edge) { - use_alpha = true; + if (cm->texture_edge) { + cm->use_alpha = true; } - if (use_alpha) cc_id |= SHADER_OPT_ALPHA; - if (use_fog) cc_id |= SHADER_OPT_FOG; - if (texture_edge) cc_id |= SHADER_OPT_TEXTURE_EDGE; - if (use_noise) cc_id |= SHADER_OPT_NOISE; + struct ColorCombiner *comb = gfx_lookup_or_create_color_combiner(cm); + cm = &comb->cm; - if (!use_alpha) { - cc_id &= ~0xfff000; - } - - struct ColorCombiner *comb = gfx_lookup_or_create_color_combiner(cc_id); struct ShaderProgram *prg = comb->prg; if (prg != rendering_state.shader_program) { gfx_flush(); @@ -1007,10 +1052,10 @@ static void OPTIMIZE_O3 gfx_sp_tri1(uint8_t vtx1_idx, uint8_t vtx2_idx, uint8_t gfx_rapi->load_shader(prg); rendering_state.shader_program = prg; } - if (use_alpha != rendering_state.alpha_blend) { + if (cm->use_alpha != rendering_state.alpha_blend) { gfx_flush(); - gfx_rapi->set_use_alpha(use_alpha); - rendering_state.alpha_blend = use_alpha; + gfx_rapi->set_use_alpha(cm->use_alpha); + rendering_state.alpha_blend = cm->use_alpha; } uint8_t num_inputs; bool used_textures[2]; @@ -1065,18 +1110,26 @@ static void OPTIMIZE_O3 gfx_sp_tri1(uint8_t vtx1_idx, uint8_t vtx2_idx, uint8_t buf_vbo[buf_vbo_len++] = v / tex_height; } - if (use_fog) { + if (cm->use_fog) { buf_vbo[buf_vbo_len++] = rdp.fog_color.r / 255.0f; buf_vbo[buf_vbo_len++] = rdp.fog_color.g / 255.0f; buf_vbo[buf_vbo_len++] = rdp.fog_color.b / 255.0f; buf_vbo[buf_vbo_len++] = v_arr[i]->color.a / 255.0f; // fog factor (not alpha) } + if (cm->light_map) { + struct RGBA* col = &v_arr[i]->color; + buf_vbo[buf_vbo_len++] = ( (((uint16_t)col->g) << 8) | ((uint16_t)col->r) ) / 65535.0f; + buf_vbo[buf_vbo_len++] = 1.0f - (( (((uint16_t)col->a) << 8) | ((uint16_t)col->b) ) / 65535.0f); + } + for (int j = 0; j < num_inputs; j++) { - struct RGBA *color; - struct RGBA tmp; - for (int k = 0; k < 1 + (use_alpha ? 1 : 0); k++) { - switch (comb->shader_input_mapping[k][j]) { + struct RGBA *color = NULL; + struct RGBA tmp = { 0 }; + for (int a = 0; a < (cm->use_alpha ? 2 : 1 ); a++) { + u8 mapping = comb->shader_input_mapping[j]; + + switch (mapping) { case CC_PRIM: color = &rdp.prim_color; break; @@ -1086,6 +1139,18 @@ static void OPTIMIZE_O3 gfx_sp_tri1(uint8_t vtx1_idx, uint8_t vtx2_idx, uint8_t case CC_ENV: color = &rdp.env_color; break; + case CC_PRIMA: + memset(&tmp, rdp.prim_color.a, sizeof(tmp)); + color = &tmp; + break; + case CC_SHADEA: + memset(&tmp, v_arr[i]->color.a, sizeof(tmp)); + color = &tmp; + break; + case CC_ENVA: + memset(&tmp, rdp.env_color.a, sizeof(tmp)); + color = &tmp; + break; case CC_LOD: { float distance_frac = (v1->w - 3000.0f) / 3000.0f; @@ -1100,12 +1165,12 @@ static void OPTIMIZE_O3 gfx_sp_tri1(uint8_t vtx1_idx, uint8_t vtx2_idx, uint8_t color = &tmp; break; } - if (k == 0) { + if (a == 0) { buf_vbo[buf_vbo_len++] = color->r / 255.0f; buf_vbo[buf_vbo_len++] = color->g / 255.0f; buf_vbo[buf_vbo_len++] = color->b / 255.0f; } else { - if (use_fog && color == &v_arr[i]->color) { + if (cm->use_fog && color == &v_arr[i]->color) { // Shade alpha is 100% for fog buf_vbo[buf_vbo_len++] = 1.0f; } else { @@ -1346,36 +1411,17 @@ static void gfx_dp_load_tile(uint8_t tile, uint32_t uls, uint32_t ult, uint32_t rdp.texture_tile.lrt = lrt; } -static uint8_t color_comb_component(uint32_t v) { - switch (v) { - case G_CCMUX_TEXEL0: - return CC_TEXEL0; - case G_CCMUX_TEXEL1: - return CC_TEXEL1; - case G_CCMUX_PRIMITIVE: - return CC_PRIM; - case G_CCMUX_SHADE: - return CC_SHADE; - case G_CCMUX_ENVIRONMENT: - return CC_ENV; - case G_CCMUX_TEXEL0_ALPHA: - return CC_TEXEL0A; - case G_CCMUX_LOD_FRACTION: - return CC_LOD; - default: - return CC_0; - } -} +static void gfx_dp_set_combine_mode(uint32_t rgb1, uint32_t alpha1, uint32_t rgb2, uint32_t alpha2) { + //printf(">>> combine: %08x %08x %08x %08x\n", rgb1, alpha1, rgb2, alpha2); + memset(&rdp.combine_mode, 0, sizeof(struct CombineMode)); -static inline uint32_t color_comb(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { - return color_comb_component(a) | - (color_comb_component(b) << 3) | - (color_comb_component(c) << 6) | - (color_comb_component(d) << 9); -} + rdp.combine_mode.rgb1 = rgb1; + rdp.combine_mode.alpha1 = alpha1; -static void gfx_dp_set_combine_mode(uint32_t rgb, uint32_t alpha) { - rdp.combine_mode = rgb | (alpha << 12); + rdp.combine_mode.rgb2 = rgb2; + rdp.combine_mode.alpha2 = alpha2; + + rdp.combine_mode.flags = 0; } static void gfx_dp_set_env_color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { @@ -1480,14 +1526,18 @@ static void gfx_draw_rectangle(int32_t ulx, int32_t uly, int32_t lrx, int32_t lr } static void gfx_dp_texture_rectangle(int32_t ulx, int32_t uly, int32_t lrx, int32_t lry, UNUSED uint8_t tile, int16_t uls, int16_t ult, int16_t dsdx, int16_t dtdy, bool flip) { - uint32_t saved_combine_mode = rdp.combine_mode; + struct CombineMode saved_combine_mode = rdp.combine_mode; if ((rdp.other_mode_h & (3U << G_MDSFT_CYCLETYPE)) == G_CYC_COPY) { // Per RDP Command Summary Set Tile's shift s and this dsdx should be set to 4 texels // Divide by 4 to get 1 instead dsdx >>= 2; // Color combiner is turned off in copy mode - gfx_dp_set_combine_mode(color_comb(0, 0, 0, G_CCMUX_TEXEL0), color_comb(0, 0, 0, G_ACMUX_TEXEL0)); + gfx_dp_set_combine_mode( + color_comb_rgb (G_CCMUX_0, G_CCMUX_0, G_CCMUX_0, G_CCMUX_TEXEL0, 0), + color_comb_alpha(G_CCMUX_0, G_CCMUX_0, G_CCMUX_0, G_ACMUX_TEXEL0, 0), + color_comb_rgb (G_CCMUX_0, G_CCMUX_0, G_CCMUX_0, G_CCMUX_TEXEL0, 1), + color_comb_alpha(G_CCMUX_0, G_CCMUX_0, G_CCMUX_0, G_ACMUX_TEXEL0, 1)); // Per documentation one extra pixel is added in this modes to each edge lrx += 1 << 2; @@ -1528,7 +1578,10 @@ static void gfx_dp_texture_rectangle(int32_t ulx, int32_t uly, int32_t lrx, int3 } gfx_draw_rectangle(ulx, uly, lrx, lry); + + u32 cflags = rdp.combine_mode.flags; rdp.combine_mode = saved_combine_mode; + rdp.combine_mode.flags = cflags; } static void gfx_dp_fill_rectangle(int32_t ulx, int32_t uly, int32_t lrx, int32_t lry) { @@ -1549,10 +1602,17 @@ static void gfx_dp_fill_rectangle(int32_t ulx, int32_t uly, int32_t lrx, int32_t v->color = rdp.fill_color; } - uint32_t saved_combine_mode = rdp.combine_mode; - gfx_dp_set_combine_mode(color_comb(0, 0, 0, G_CCMUX_SHADE), color_comb(0, 0, 0, G_ACMUX_SHADE)); + struct CombineMode saved_combine_mode = rdp.combine_mode; + gfx_dp_set_combine_mode( + color_comb_rgb (G_CCMUX_0, G_CCMUX_0, G_CCMUX_0, G_CCMUX_SHADE, 0), + color_comb_alpha(G_CCMUX_0, G_CCMUX_0, G_CCMUX_0, G_ACMUX_SHADE, 0), + color_comb_rgb (G_CCMUX_0, G_CCMUX_0, G_CCMUX_0, G_CCMUX_SHADE, 1), + color_comb_alpha(G_CCMUX_0, G_CCMUX_0, G_CCMUX_0, G_ACMUX_SHADE, 1)); gfx_draw_rectangle(ulx, uly, lrx, lry); + + u32 cflags = rdp.combine_mode.flags; rdp.combine_mode = saved_combine_mode; + rdp.combine_mode.flags = cflags; } static void gfx_dp_set_z_image(void *z_buf_address) { @@ -1719,10 +1779,10 @@ static void OPTIMIZE_O3 gfx_run_dl(Gfx* cmd) { break; case G_SETCOMBINE: gfx_dp_set_combine_mode( - color_comb(C0(20, 4), C1(28, 4), C0(15, 5), C1(15, 3)), - color_comb(C0(12, 3), C1(12, 3), C0(9, 3), C1(9, 3))); - /*color_comb(C0(5, 4), C1(24, 4), C0(0, 5), C1(6, 3)), - color_comb(C1(21, 3), C1(3, 3), C1(18, 3), C1(0, 3)));*/ + color_comb_rgb (C0(20, 4), C1(28, 4), C0(15, 5), C1(15, 3), 0), + color_comb_alpha(C0(12, 3), C1(12, 3), C0(9, 3), C1(9, 3), 0), + color_comb_rgb (C0(5, 4), C1(24, 4), C0(0, 5), C1(6, 3), 1), + color_comb_alpha(C1(21, 3), C1(3, 3), C1(18, 3), C1(0, 3), 1)); break; // G_SETPRIMCOLOR, G_CCMUX_PRIMITIVE, G_ACMUX_PRIMITIVE, is used by Goddard // G_CCMUX_TEXEL1, LOD_FRACTION is used in Bowser room 1 @@ -1808,38 +1868,7 @@ void gfx_init(struct GfxWindowManagerAPI *wapi, struct GfxRenderingAPI *rapi, co gfx_wapi->init(window_title); gfx_rapi->init(); - // Used in the 120 star TAS - static uint32_t precomp_shaders[] = { - 0x01200200, - 0x00000045, - 0x00000200, - 0x01200a00, - 0x00000a00, - 0x01a00045, - 0x00000551, - 0x01045045, - 0x05a00a00, - 0x01200045, - 0x05045045, - 0x01045a00, - 0x01a00a00, - 0x0000038d, - 0x01081081, - 0x0120038d, - 0x03200045, - 0x03200a00, - 0x01a00a6f, - 0x01141045, - 0x07a00a00, - 0x05200200, - 0x03200200, - 0x09200200, - 0x0920038d, - 0x09200045 - }; - - for (size_t i = 0; i < sizeof(precomp_shaders) / sizeof(uint32_t); i++) - gfx_lookup_or_create_shader_program(precomp_shaders[i]); + gfx_cc_precomp(); } #ifdef EXTERNAL_DATA @@ -2133,6 +2162,15 @@ static void OPTIMIZE_O3 djui_gfx_sp_simple_tri1(uint8_t vtx1_idx, uint8_t vtx2_i */ } +void gfx_pc_precomp_shader(uint32_t rgb1, uint32_t alpha1, uint32_t rgb2, uint32_t alpha2, uint32_t flags) { + gfx_dp_set_combine_mode(rgb1, alpha1, rgb2, alpha2); + + struct CombineMode* cm = &rdp.combine_mode; + cm->flags = flags; + + gfx_lookup_or_create_color_combiner(cm); +} + void OPTIMIZE_O3 djui_gfx_run_dl(Gfx* cmd) { uint32_t opcode = cmd->words.w0 >> 24; switch (opcode) { diff --git a/src/pc/gfx/gfx_pc.h b/src/pc/gfx/gfx_pc.h index 76b066d1..b5e7f383 100644 --- a/src/pc/gfx/gfx_pc.h +++ b/src/pc/gfx/gfx_pc.h @@ -26,6 +26,7 @@ void gfx_run(Gfx *commands); void gfx_end_frame(void); void gfx_precache_textures(void); void gfx_shutdown(void); +void gfx_pc_precomp_shader(uint32_t rgb1, uint32_t alpha1, uint32_t rgb2, uint32_t alpha2, uint32_t flags); #ifdef __cplusplus } diff --git a/src/pc/gfx/gfx_rendering_api.h b/src/pc/gfx/gfx_rendering_api.h index e76efe74..064d8de5 100644 --- a/src/pc/gfx/gfx_rendering_api.h +++ b/src/pc/gfx/gfx_rendering_api.h @@ -6,13 +6,14 @@ #include struct ShaderProgram; +struct ColorCombiner; struct GfxRenderingAPI { bool (*z_is_from_0_to_1)(void); void (*unload_shader)(struct ShaderProgram *old_prg); void (*load_shader)(struct ShaderProgram *new_prg); - struct ShaderProgram *(*create_and_load_new_shader)(uint32_t shader_id); - struct ShaderProgram *(*lookup_shader)(uint32_t shader_id); + struct ShaderProgram *(*create_and_load_new_shader)(struct ColorCombiner* cc); + struct ShaderProgram *(*lookup_shader)(struct ColorCombiner* cc); void (*shader_get_info)(struct ShaderProgram *prg, uint8_t *num_inputs, bool used_textures[2]); uint32_t (*new_texture)(void); void (*select_texture)(int tile, uint32_t texture_id); diff --git a/src/pc/gfx/gfx_screen_config.h b/src/pc/gfx/gfx_screen_config.h index 5b933f85..e6a5952c 100644 --- a/src/pc/gfx/gfx_screen_config.h +++ b/src/pc/gfx/gfx_screen_config.h @@ -1,7 +1,7 @@ #ifndef GFX_SCREEN_CONFIG_H #define GFX_SCREEN_CONFIG_H -#define DESIRED_SCREEN_WIDTH 640 -#define DESIRED_SCREEN_HEIGHT 480 +#define DESIRED_SCREEN_WIDTH 800 +#define DESIRED_SCREEN_HEIGHT 600 #endif