From a81a8e6ef1d52becbff3d08fb0bf8611084e0cce Mon Sep 17 00:00:00 2001 From: fgsfds Date: Thu, 11 Jun 2020 03:44:08 +0300 Subject: [PATCH] added D3D11 and D3D12 renderers from Emil/n64-fast3d-engine along with options to select backends for windowing, rendering, audio and controls in the Makefile use RENDER_API=D3D11 or D3D12 for the D3D renderers, that will also automatically enable using DXGI for windowing; SDL2 will still be used for input and audio for the time being also adds three-point filtering to the OpenGL backend and an option for it in the menu --- Makefile | 113 ++- include/text_options_strings.h.in | 2 + src/game/options_menu.c | 2 + src/pc/audio/audio_sdl.c | 6 +- src/pc/audio/audio_sdl.h | 2 + src/pc/configfile.c | 2 +- src/pc/controller/controller_sdl.c | 4 + src/pc/gfx/gfx_direct3d11.cpp | 1306 ++++++++++++++++++++++++++ src/pc/gfx/gfx_direct3d11.h | 10 + src/pc/gfx/gfx_direct3d12.cpp | 1317 +++++++++++++++++++++++++++ src/pc/gfx/gfx_direct3d12.h | 10 + src/pc/gfx/gfx_direct3d_common.cpp | 143 +++ src/pc/gfx/gfx_direct3d_common.h | 33 + src/pc/gfx/gfx_opengl.c | 126 ++- src/pc/gfx/gfx_opengl_legacy.c | 4 +- src/pc/gfx/gfx_pc.c | 13 +- src/pc/gfx/gfx_sdl2.c | 73 +- src/pc/gfx/gfx_window_manager_api.h | 5 +- src/pc/pc_main.c | 23 +- src/pc/platform.c | 2 +- 20 files changed, 3115 insertions(+), 81 deletions(-) create mode 100644 src/pc/gfx/gfx_direct3d11.cpp create mode 100644 src/pc/gfx/gfx_direct3d11.h create mode 100644 src/pc/gfx/gfx_direct3d12.cpp create mode 100644 src/pc/gfx/gfx_direct3d12.h create mode 100644 src/pc/gfx/gfx_direct3d_common.cpp create mode 100644 src/pc/gfx/gfx_direct3d_common.h diff --git a/Makefile b/Makefile index 890fb3f9..0b1f81d2 100644 --- a/Makefile +++ b/Makefile @@ -54,9 +54,16 @@ DISCORDRPC ?= 0 NO_BZERO_BCOPY ?= 0 NO_LDIV ?= 0 -# Use OpenGL 1.3 renderer +# Backend selection -LEGACY_GL ?= 0 +# Renderers: GL, GL_LEGACY, D3D11, D3D12 +RENDER_API ?= GL +# Window managers: SDL2, DXGI (forced if D3D11 or D3D12 in RENDER_API) +WINDOW_API ?= SDL2 +# Audio backends: SDL2 +AUDIO_API ?= SDL2 +# Controller backends (can have multiple, space separated): SDL2 +CONTROLLER_API ?= SDL2 # Misc settings for EXTERNAL_DATA @@ -207,6 +214,22 @@ ifeq ($(TARGET_WEB),1) VERSION_CFLAGS := $(VERSION_CFLAGS) -DTARGET_WEB endif +# Check backends + +ifneq (,$(filter $(RENDER_API),D3D11 D3D12)) + ifneq ($(WINDOWS_BUILD),1) + $(error DirectX is only supported on Windows) + endif + ifneq ($(WINDOW_API),DXGI) + $(warning DirectX renderers require DXGI, forcing WINDOW_API value) + WINDOW_API := DXGI + endif +else + ifeq ($(WINDOW_API),DXGI) + $(error DXGI can only be used with DirectX renderers) + endif +endif + ################### Universal Dependencies ################### # (This is a bit hacky, but a lot of rules implicitly depend @@ -515,18 +538,68 @@ endif PYTHON := python3 SDLCONFIG := $(CROSS)sdl2-config +# configure backend flags + +BACKEND_CFLAGS := -DRAPI_$(RENDER_API)=1 -DWAPI_$(WINDOW_API)=1 -DAAPI_$(AUDIO_API)=1 +# can have multiple controller APIs +BACKEND_CFLAGS += $(foreach capi,$(CONTROLLER_API),-DCAPI_$(capi)=1) +BACKEND_LDFLAGS := +SDL2_USED := 0 + +# for now, it's either SDL+GL or DXGI+DirectX, so choose based on WAPI +ifeq ($(WINDOW_API),DXGI) + DXBITS := `cat $(ENDIAN_BITWIDTH) | tr ' ' '\n' | tail -1` + ifeq ($(RENDER_API),D3D11) + BACKEND_LDFLAGS += -ld3d11 + else ifeq ($(RENDER_API),D3D12) + BACKEND_LDFLAGS += -ld3d12 + endif + BACKEND_LDFLAGS += -ld3dcompiler -ldxgi -ldxguid + BACKEND_LDFLAGS += -lsetupapi -ldinput8 -luser32 -lgdi32 -limm32 -lole32 -loleaut32 -lshell32 -lwinmm -lversion -luuid -static +else ifeq ($(WINDOW_API),SDL2) + ifeq ($(WINDOWS_BUILD),1) + BACKEND_LDFLAGS += -lglew32 -lglu32 -lopengl32 + else ifeq ($(TARGET_RPI),1) + BACKEND_LDFLAGS += -lGLESv2 + else ifeq ($(OSX_BUILD),1) + BACKEND_LDFLAGS += -framework OpenGL `pkg-config --libs glew` + else + BACKEND_LDFLAGS += -lGL + endif + SDL_USED := 2 +endif + +ifeq ($(AUDIO_API),SDL2) + SDL_USED := 2 +endif + +ifneq (,$(findstring SDL,$(CONTROLLER_API))) + SDL_USED := 2 +endif + +# SDL can be used by different systems, so we consolidate all of that shit into this +ifeq ($(SDL_USED),2) + BACKEND_CFLAGS += -DHAVE_SDL2=1 `$(SDLCONFIG) --cflags` + ifeq ($(WINDOWS_BUILD),1) + BACKEND_LDFLAGS += `$(SDLCONFIG) --static-libs` -lsetupapi -luser32 -limm32 -lOle32 -loleaut32 -lshell32 -lwinmm -lversion + else + BACKEND_LDFLAGS += `$(SDLCONFIG) --libs` + endif +endif + ifeq ($(WINDOWS_BUILD),1) -CC_CHECK := $(CC) -fsyntax-only -fsigned-char $(INCLUDE_CFLAGS) -Wall -Wextra -Wno-format-security $(VERSION_CFLAGS) $(GRUCODE_CFLAGS) `$(SDLCONFIG) --cflags` -DUSE_SDL=2 -CFLAGS := $(OPT_FLAGS) $(INCLUDE_CFLAGS) $(VERSION_CFLAGS) $(GRUCODE_CFLAGS) -fno-strict-aliasing -fwrapv `$(SDLCONFIG) --cflags` -DUSE_SDL=2 + CC_CHECK := $(CC) -fsyntax-only -fsigned-char $(BACKEND_CFLAGS) $(INCLUDE_CFLAGS) -Wall -Wextra -Wno-format-security $(VERSION_CFLAGS) $(GRUCODE_CFLAGS) + CFLAGS := $(OPT_FLAGS) $(INCLUDE_CFLAGS) $(BACKEND_CFLAGS) $(VERSION_CFLAGS) $(GRUCODE_CFLAGS) -fno-strict-aliasing -fwrapv else ifeq ($(TARGET_WEB),1) -CC_CHECK := $(CC) -fsyntax-only -fsigned-char $(INCLUDE_CFLAGS) -Wall -Wextra -Wno-format-security $(VERSION_CFLAGS) $(GRUCODE_CFLAGS) -s USE_SDL=2 -CFLAGS := $(OPT_FLAGS) $(INCLUDE_CFLAGS) $(VERSION_CFLAGS) $(GRUCODE_CFLAGS) -fno-strict-aliasing -fwrapv -s USE_SDL=2 + CC_CHECK := $(CC) -fsyntax-only -fsigned-char $(BACKEND_CFLAGS) $(INCLUDE_CFLAGS) -Wall -Wextra -Wno-format-security $(VERSION_CFLAGS) $(GRUCODE_CFLAGS) -s USE_SDL=2 + CFLAGS := $(OPT_FLAGS) $(INCLUDE_CFLAGS) $(BACKEND_CFLAGS) $(VERSION_CFLAGS) $(GRUCODE_CFLAGS) -fno-strict-aliasing -fwrapv -s USE_SDL=2 # Linux / Other builds below else -CC_CHECK := $(CC) -fsyntax-only -fsigned-char $(INCLUDE_CFLAGS) -Wall -Wextra -Wno-format-security $(VERSION_CFLAGS) $(GRUCODE_CFLAGS) `$(SDLCONFIG) --cflags` -DUSE_SDL=2 -CFLAGS := $(OPT_FLAGS) $(INCLUDE_CFLAGS) $(VERSION_CFLAGS) $(GRUCODE_CFLAGS) -fno-strict-aliasing -fwrapv `$(SDLCONFIG) --cflags` -DUSE_SDL=2 + CC_CHECK := $(CC) -fsyntax-only -fsigned-char $(BACKEND_CFLAGS) $(INCLUDE_CFLAGS) -Wall -Wextra -Wno-format-security $(VERSION_CFLAGS) $(GRUCODE_CFLAGS) + CFLAGS := $(OPT_FLAGS) $(INCLUDE_CFLAGS) $(BACKEND_CFLAGS) $(VERSION_CFLAGS) $(GRUCODE_CFLAGS) -fno-strict-aliasing -fwrapv + endif # Check for enhancement options @@ -598,26 +671,28 @@ ASFLAGS := -I include -I $(BUILD_DIR) $(VERSION_ASFLAGS) ifeq ($(TARGET_WEB),1) LDFLAGS := -lm -lGL -lSDL2 -no-pie -s TOTAL_MEMORY=20MB -g4 --source-map-base http://localhost:8080/ -s "EXTRA_EXPORTED_RUNTIME_METHODS=['callMain']" + else ifeq ($(WINDOWS_BUILD),1) - LDFLAGS := $(BITS) -march=$(TARGET_ARCH) -Llib -lpthread -lglew32 `$(SDLCONFIG) --static-libs` -lm -lglu32 -lsetupapi -ldinput8 -luser32 -lgdi32 -limm32 -lole32 -loleaut32 -lshell32 -lwinmm -lversion -luuid -lopengl32 -static + LDFLAGS := $(BITS) -march=$(TARGET_ARCH) -Llib -lpthread $(BACKEND_LDFLAGS) -static ifeq ($(CROSS),) LDFLAGS += -no-pie endif ifeq ($(WINDOWS_CONSOLE),1) LDFLAGS += -mconsole endif + else ifeq ($(TARGET_RPI),1) -# Linux / Other builds below -LDFLAGS := $(OPT_FLAGS) -lm -lGLESv2 `$(SDLCONFIG) --libs` -no-pie + LDFLAGS := $(OPT_FLAGS) -lm $(BACKEND_LDFLAGS) -no-pie + +else ifeq ($(OSX_BUILD),1) + LDFLAGS := -lm $(BACKEND_LDFLAGS) -no-pie -lpthread + else -ifeq ($(OSX_BUILD),1) -LDFLAGS := -lm -framework OpenGL `$(SDLCONFIG) --libs` -no-pie -lpthread `pkg-config --libs libusb-1.0 glfw3 glew` -else -LDFLAGS := $(BITS) -march=$(TARGET_ARCH) -lm -lGL `$(SDLCONFIG) --libs` -no-pie -lpthread -ifeq ($(DISCORDRPC),1) - LDFLAGS += -ldl -Wl,-rpath . -endif -endif + LDFLAGS := $(BITS) -march=$(TARGET_ARCH) -lm $(BACKEND_LDFLAGS) -no-pie -lpthread + ifeq ($(DISCORDRPC),1) + LDFLAGS += -ldl -Wl,-rpath . + endif + endif # End of LDFLAGS # Prevent a crash with -sopt diff --git a/include/text_options_strings.h.in b/include/text_options_strings.h.in index 2941bde1..20b36f5c 100644 --- a/include/text_options_strings.h.in +++ b/include/text_options_strings.h.in @@ -51,6 +51,7 @@ #define TEXT_OPT_DOUBLE _("DOUBLE") #define TEXT_RESET_WINDOW _("RESET WINDOW") #define TEXT_OPT_HUD _("HUD") +#define TEXT_OPT_THREEPOINT _("THREE POINT") #define TEXT_BIND_A _("A BUTTON") #define TEXT_BIND_B _("B BUTTON") @@ -109,6 +110,7 @@ #define TEXT_OPT_DOUBLE _("Double") #define TEXT_RESET_WINDOW _("Reset Window") #define TEXT_OPT_HUD _("HUD") +#define TEXT_OPT_THREEPOINT _("Three-point") #define TEXT_BIND_A _("A Button") #define TEXT_BIND_B _("B Button") diff --git a/src/game/options_menu.c b/src/game/options_menu.c index d37b6afe..6b2c5bf8 100644 --- a/src/game/options_menu.c +++ b/src/game/options_menu.c @@ -80,6 +80,7 @@ static const u8 optsVideoStr[][32] = { { TEXT_OPT_VSYNC }, { TEXT_OPT_DOUBLE }, { TEXT_OPT_HUD }, + { TEXT_OPT_THREEPOINT }, }; static const u8 optsAudioStr[][32] = { @@ -122,6 +123,7 @@ static const u8 bindStr[][32] = { static const u8 *filterChoices[] = { optsVideoStr[2], optsVideoStr[3], + optsVideoStr[8], }; static const u8 *vsyncChoices[] = { diff --git a/src/pc/audio/audio_sdl.c b/src/pc/audio/audio_sdl.c index beb5a1e6..0701b037 100644 --- a/src/pc/audio/audio_sdl.c +++ b/src/pc/audio/audio_sdl.c @@ -1,3 +1,5 @@ +#ifdef AAPI_SDL2 + #include #include "audio_api.h" @@ -57,4 +59,6 @@ struct AudioAPI audio_sdl = { audio_sdl_get_desired_buffered, audio_sdl_play, audio_sdl_shutdown -}; \ No newline at end of file +}; + +#endif diff --git a/src/pc/audio/audio_sdl.h b/src/pc/audio/audio_sdl.h index e553239f..8b4a4e46 100644 --- a/src/pc/audio/audio_sdl.h +++ b/src/pc/audio/audio_sdl.h @@ -1,6 +1,8 @@ #ifndef AUDIO_SDL_H #define AUDIO_SDL_H +#include "audio_api.h" + extern struct AudioAPI audio_sdl; #endif diff --git a/src/pc/configfile.c b/src/pc/configfile.c index db420112..1af4bfb5 100644 --- a/src/pc/configfile.c +++ b/src/pc/configfile.c @@ -6,7 +6,7 @@ #include #include -#if USE_SDL == 2 +#ifdef WAPI_SDL2 # include # define WINDOWPOS_CENTERED SDL_WINDOWPOS_CENTERED #else diff --git a/src/pc/controller/controller_sdl.c b/src/pc/controller/controller_sdl.c index 10fe9d71..0a8ad57e 100644 --- a/src/pc/controller/controller_sdl.c +++ b/src/pc/controller/controller_sdl.c @@ -1,3 +1,5 @@ +#ifdef CAPI_SDL2 + #include #include #include @@ -296,3 +298,5 @@ struct ControllerAPI controller_sdl = { controller_sdl_bind, controller_sdl_shutdown }; + +#endif // CAPI_SDL2 diff --git a/src/pc/gfx/gfx_direct3d11.cpp b/src/pc/gfx/gfx_direct3d11.cpp new file mode 100644 index 00000000..b81bbec6 --- /dev/null +++ b/src/pc/gfx/gfx_direct3d11.cpp @@ -0,0 +1,1306 @@ +#ifdef RAPI_D3D11 + +#if defined(_WIN32) || defined(_WIN64) + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +extern "C" { +#include "../configfile.h" +#include "../platform.h" +} + +#ifndef _LANGUAGE_C +# define _LANGUAGE_C +#endif +#include + +#include "gfx_cc.h" +#include "gfx_window_manager_api.h" +#include "gfx_rendering_api.h" +#include "gfx_direct3d_common.h" + +#include "gfx_screen_config.h" + +#define WINCLASS_NAME L"SUPERMARIO64" +#define WINDOW_CLIENT_MIN_WIDTH 320 +#define WINDOW_CLIENT_MIN_HEIGHT 240 +#define DEBUG_D3D 0 + +using namespace Microsoft::WRL; // For ComPtr + +struct PerFrameCB { + uint32_t noise_frame; + float noise_scale_x; + float noise_scale_y; + uint32_t padding; +}; + +struct PerDrawCB { + struct Texture { + uint32_t width; + uint32_t height; + uint32_t linear_filtering; + uint32_t padding; + } textures[2]; +}; + +struct TextureData { + ComPtr resource_view; + ComPtr sampler_state; + uint32_t width; + uint32_t height; + bool linear_filtering; +}; + +struct ShaderProgram { + ComPtr vertex_shader; + ComPtr pixel_shader; + ComPtr input_layout; + ComPtr blend_state; + + uint32_t shader_id; + uint8_t num_inputs; + uint8_t num_floats; + bool used_textures[2]; +}; + +static struct { + ComPtr device; + ComPtr context; + ComPtr swap_chain; // For Windows versions older than 8.1 + ComPtr swap_chain2; // For Windows version 8.1 or newer + ComPtr backbuffer_view; + ComPtr depth_stencil_view; + ComPtr rasterizer_state; + ComPtr depth_stencil_state; + ComPtr vertex_buffer; + ComPtr per_frame_cb; + ComPtr per_draw_cb; + +#if DEBUG_D3D + ComPtr debug; +#endif + + HANDLE frame_latency_waitable_object; + + DXGI_SAMPLE_DESC sample_description; + + PerFrameCB per_frame_cb_data; + PerDrawCB per_draw_cb_data; + + struct ShaderProgram shader_program_pool[64]; + uint8_t shader_program_pool_size; + + std::vector textures; + int current_tile; + uint32_t current_texture_ids[2]; + + // Current state + + struct ShaderProgram *shader_program; + + uint32_t current_width, current_height; + + int8_t depth_test; + int8_t depth_mask; + int8_t zmode_decal; + + // Previous states (to prevent setting states needlessly) + + struct ShaderProgram *last_shader_program = nullptr; + uint32_t last_vertex_buffer_stride = 0; + ComPtr last_blend_state = nullptr; + ComPtr last_resource_views[2] = { nullptr, nullptr }; + ComPtr last_sampler_states[2] = { nullptr, nullptr }; + int8_t last_depth_test = -1; + int8_t last_depth_mask = -1; + int8_t last_zmode_decal = -1; + D3D_PRIMITIVE_TOPOLOGY last_primitive_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; + + // Game loop callback + + void (*run_one_game_iter)(void); + bool (*on_key_down)(int scancode); + bool (*on_key_up)(int scancode); + void (*on_all_keys_up)(void); +} d3d; + +static HWND h_wnd; +static bool lower_latency; +static LARGE_INTEGER last_time, accumulated_time, frequency; +static uint8_t sync_interval; +static RECT last_window_rect; +static bool is_full_screen, last_maximized_state; + +static void toggle_borderless_window_full_screen() { + if (is_full_screen) { + RECT r = last_window_rect; + + // Set in window mode with the last saved position and size + SetWindowLongPtr(h_wnd, GWL_STYLE, WS_VISIBLE | WS_OVERLAPPEDWINDOW); + + if (last_maximized_state) { + SetWindowPos(h_wnd, NULL, 0, 0, 0, 0, SWP_FRAMECHANGED | SWP_NOMOVE | SWP_NOSIZE); + ShowWindow(h_wnd, SW_MAXIMIZE); + } else { + SetWindowPos(h_wnd, NULL, r.left, r.top, r.right - r.left, r.bottom - r.top, SWP_FRAMECHANGED); + ShowWindow(h_wnd, SW_RESTORE); + } + + is_full_screen = false; + } else { + // Save if window is maximized or not + WINDOWPLACEMENT window_placement; + window_placement.length = sizeof(WINDOWPLACEMENT); + GetWindowPlacement(h_wnd, &window_placement); + last_maximized_state = window_placement.showCmd == SW_SHOWMAXIMIZED; + + // Save window position and size if the window is not maximized + GetWindowRect(h_wnd, &last_window_rect); + + // Get in which monitor the window is + HMONITOR h_monitor = MonitorFromWindow(h_wnd, MONITOR_DEFAULTTONEAREST); + + // Get info from that monitor + MONITORINFOEX monitor_info; + monitor_info.cbSize = sizeof(MONITORINFOEX); + GetMonitorInfo(h_monitor, &monitor_info); + RECT r = monitor_info.rcMonitor; + + // Set borderless full screen to that monitor + SetWindowLongPtr(h_wnd, GWL_STYLE, WS_VISIBLE | WS_POPUP); + SetWindowPos(h_wnd, HWND_TOP, r.left, r.top, r.right - r.left, r.bottom - r.top, SWP_FRAMECHANGED); + + is_full_screen = true; + } +} + +static void create_render_target_views(uint32_t width, uint32_t height) { + if (width == 0 || height == 0) { + return; + } + if (d3d.current_width == width && d3d.current_height == height) { + return; + } + + // Release previous stuff (if any) + + d3d.backbuffer_view.Reset(); + d3d.depth_stencil_view.Reset(); + + // Resize swap chain + + if (lower_latency) { + UINT swap_chain_flags = DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; + ThrowIfFailed(d3d.swap_chain2->ResizeBuffers(0, width, height, DXGI_FORMAT_UNKNOWN, swap_chain_flags), + h_wnd, "Failed to resize IDXGISwapChain2 buffers."); + } else { + UINT swap_chain_flags = 0; + ThrowIfFailed(d3d.swap_chain->ResizeBuffers(0, width, height, DXGI_FORMAT_UNKNOWN, swap_chain_flags), + h_wnd, "Failed to resize IDXGISwapChain buffers."); + } + + // Create back buffer + + ComPtr backbuffer_texture; + if (lower_latency) { + ThrowIfFailed(d3d.swap_chain2->GetBuffer(0, __uuidof(ID3D11Texture2D), (LPVOID *) backbuffer_texture.GetAddressOf()), + h_wnd, "Failed to get backbuffer from IDXGISwapChain2."); + } else { + ThrowIfFailed(d3d.swap_chain->GetBuffer(0, __uuidof(ID3D11Texture2D), (LPVOID *) backbuffer_texture.GetAddressOf()), + h_wnd, "Failed to get backbuffer from IDXGISwapChain."); + } + + ThrowIfFailed(d3d.device->CreateRenderTargetView(backbuffer_texture.Get(), NULL, d3d.backbuffer_view.GetAddressOf()), + h_wnd, "Failed to create render target view."); + + // Create depth buffer + + D3D11_TEXTURE2D_DESC depth_stencil_texture_desc; + ZeroMemory(&depth_stencil_texture_desc, sizeof(D3D11_TEXTURE2D_DESC)); + + depth_stencil_texture_desc.Width = width; + depth_stencil_texture_desc.Height = height; + depth_stencil_texture_desc.MipLevels = 1; + depth_stencil_texture_desc.ArraySize = 1; + depth_stencil_texture_desc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; + depth_stencil_texture_desc.SampleDesc = d3d.sample_description; + depth_stencil_texture_desc.Usage = D3D11_USAGE_DEFAULT; + depth_stencil_texture_desc.BindFlags = D3D11_BIND_DEPTH_STENCIL; + depth_stencil_texture_desc.CPUAccessFlags = 0; + depth_stencil_texture_desc.MiscFlags = 0; + + ComPtr depth_stencil_texture; + ThrowIfFailed(d3d.device->CreateTexture2D(&depth_stencil_texture_desc, NULL, depth_stencil_texture.GetAddressOf())); + ThrowIfFailed(d3d.device->CreateDepthStencilView(depth_stencil_texture.Get(), NULL, d3d.depth_stencil_view.GetAddressOf())); + + // Save resolution + + d3d.current_width = width; + d3d.current_height = height; +} + +static void calculate_sync_interval() { + const POINT ptZero = { 0, 0 }; + HMONITOR h_monitor = MonitorFromPoint(ptZero, MONITOR_DEFAULTTOPRIMARY); + + MONITORINFOEX monitor_info; + monitor_info.cbSize = sizeof(MONITORINFOEX); + GetMonitorInfo(h_monitor, &monitor_info); + + DEVMODE dev_mode; + dev_mode.dmSize = sizeof(DEVMODE); + dev_mode.dmDriverExtra = 0; + EnumDisplaySettings(monitor_info.szDevice, ENUM_CURRENT_SETTINGS, &dev_mode); + + if (dev_mode.dmDisplayFrequency >= 29 && dev_mode.dmDisplayFrequency <= 31) { + sync_interval = 1; + } else if (dev_mode.dmDisplayFrequency >= 59 && dev_mode.dmDisplayFrequency <= 61) { + sync_interval = 2; + } else if (dev_mode.dmDisplayFrequency >= 89 && dev_mode.dmDisplayFrequency <= 91) { + sync_interval = 3; + } else if (dev_mode.dmDisplayFrequency >= 119 && dev_mode.dmDisplayFrequency <= 121) { + sync_interval = 4; + } else { + sync_interval = 0; + } +} + +LRESULT CALLBACK gfx_d3d11_dxgi_wnd_proc(HWND h_wnd, UINT message, WPARAM w_param, LPARAM l_param) { + switch (message) { + case WM_SIZE: { + RECT rect; + GetClientRect(h_wnd, &rect); + create_render_target_views(rect.right - rect.left, rect.bottom - rect.top); + break; + } + case WM_EXITSIZEMOVE: { + calculate_sync_interval(); + break; + } + case WM_GETMINMAXINFO: { + RECT wr = { 0, 0, WINDOW_CLIENT_MIN_WIDTH, WINDOW_CLIENT_MIN_HEIGHT }; + AdjustWindowRect(&wr, WS_OVERLAPPEDWINDOW, FALSE); + LPMINMAXINFO lpMMI = (LPMINMAXINFO) l_param; + lpMMI->ptMinTrackSize.x = wr.right - wr.left; + lpMMI->ptMinTrackSize.y = wr.bottom - wr.top; + break; + } + case WM_DISPLAYCHANGE: { + calculate_sync_interval(); + break; + } + case WM_DESTROY: { +#if DEBUG_D3D + d3d.debug->ReportLiveDeviceObjects(D3D11_RLDO_DETAIL); +#endif + exit(0); + break; + } + case WM_ACTIVATEAPP: { + if (d3d.on_all_keys_up != nullptr) { + d3d.on_all_keys_up(); + } + break; + } + case WM_SYSKEYDOWN: { + if ((w_param == VK_RETURN) && ((l_param & 1 << 30) == 0)) { + toggle_borderless_window_full_screen(); + break; + } else { + return DefWindowProcW(h_wnd, message, w_param, l_param); + } + } + case WM_KEYDOWN: { + if (d3d.on_key_down != nullptr) { + d3d.on_key_down((l_param >> 16) & 0x1ff); + } + break; + } + case WM_KEYUP: { + if (d3d.on_key_up != nullptr) { + d3d.on_key_up((l_param >> 16) & 0x1ff); + } + break; + } + default: { + return DefWindowProcW(h_wnd, message, w_param, l_param); + } + } + return 0; +} + +static void gfx_d3d11_dxgi_init(const char *window_title) { + // Prepare window title + + wchar_t w_title[512]; + mbstowcs(w_title, window_title, strlen(window_title) + 1); + + // Create window + + WNDCLASSEXW wcex; + ZeroMemory(&wcex, sizeof(WNDCLASSEX)); + + wcex.cbSize = sizeof(WNDCLASSEX); + wcex.style = CS_HREDRAW | CS_VREDRAW; + wcex.lpfnWndProc = gfx_d3d11_dxgi_wnd_proc; + wcex.cbClsExtra = 0; + wcex.cbWndExtra = 0; + wcex.hInstance = nullptr; + wcex.hIcon = nullptr; + wcex.hCursor = LoadCursor(nullptr, IDC_ARROW); + wcex.hbrBackground = (HBRUSH)(COLOR_WINDOW + 1); + wcex.lpszMenuName = nullptr; + wcex.lpszClassName = WINCLASS_NAME; + wcex.hIconSm = nullptr; + + RegisterClassExW(&wcex); + + RECT wr = { 0, 0, DESIRED_SCREEN_WIDTH, DESIRED_SCREEN_HEIGHT }; + AdjustWindowRect(&wr, WS_OVERLAPPEDWINDOW, FALSE); + + h_wnd = CreateWindowW(WINCLASS_NAME, w_title, WS_OVERLAPPEDWINDOW, + CW_USEDEFAULT, 0, wr.right - wr.left, wr.bottom - wr.top, nullptr, nullptr, + nullptr, nullptr); + + is_full_screen = false; + + // Center window + + int screen_width = GetSystemMetrics(SM_CXSCREEN); + int screen_height = GetSystemMetrics(SM_CYSCREEN); + int xPos = (screen_width - wr.right) * 0.5; + int yPos = (screen_height - wr.bottom) * 0.5; + SetWindowPos(h_wnd, 0, xPos, yPos, 0, 0, SWP_NOZORDER | SWP_NOSIZE); + + // Check if a lower latency flip model can be used + + lower_latency = IsWindows8Point1OrGreater(); + + // Create D3D11 device + +#if DEBUG_D3D + UINT device_creation_flags = D3D11_CREATE_DEVICE_DEBUG; +#else + UINT device_creation_flags = 0; +#endif + + D3D_FEATURE_LEVEL FeatureLevels[] = { + D3D_FEATURE_LEVEL_11_0, + D3D_FEATURE_LEVEL_10_1, + D3D_FEATURE_LEVEL_10_0, + D3D_FEATURE_LEVEL_9_3, + D3D_FEATURE_LEVEL_9_2, + D3D_FEATURE_LEVEL_9_1 + }; + + ThrowIfFailed(D3D11CreateDevice( + nullptr, + D3D_DRIVER_TYPE_HARDWARE, + nullptr, + device_creation_flags, + FeatureLevels, + ARRAYSIZE(FeatureLevels), + D3D11_SDK_VERSION, + d3d.device.GetAddressOf(), + NULL, + d3d.context.GetAddressOf()), + h_wnd, "Failed to create D3D11 device."); + + // Sample description to be used in back buffer and depth buffer + + d3d.sample_description.Count = 1; + d3d.sample_description.Quality = 0; + + // Create the swap chain + + if (lower_latency) { + + // Create swap chain description + + DXGI_SWAP_CHAIN_DESC1 swap_chain_desc1; + ZeroMemory(&swap_chain_desc1, sizeof(DXGI_SWAP_CHAIN_DESC1)); + + swap_chain_desc1.Width = DESIRED_SCREEN_WIDTH; + swap_chain_desc1.Height = DESIRED_SCREEN_HEIGHT; + swap_chain_desc1.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + swap_chain_desc1.Stereo = FALSE; + swap_chain_desc1.SampleDesc = d3d.sample_description; + swap_chain_desc1.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swap_chain_desc1.BufferCount = 2; + swap_chain_desc1.Scaling = DXGI_SCALING_STRETCH; + swap_chain_desc1.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; + swap_chain_desc1.AlphaMode = DXGI_ALPHA_MODE_UNSPECIFIED; + swap_chain_desc1.Flags = DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; + + // Create DXGI Factory + + ComPtr dxgi_device2; + ThrowIfFailed(d3d.device.Get()->QueryInterface(__uuidof(IDXGIDevice2), (void **) dxgi_device2.GetAddressOf()), + h_wnd, "Failed to get IDXGIDevice2."); + + ComPtr dxgi_adapter; + ThrowIfFailed(dxgi_device2.Get()->GetAdapter(dxgi_adapter.GetAddressOf()), + h_wnd, "Failed to get IDXGIAdapter."); + + ComPtr dxgi_factory2; + ThrowIfFailed(dxgi_adapter.Get()->GetParent(__uuidof(IDXGIFactory2), (void **) dxgi_factory2.GetAddressOf()), + h_wnd, "Failed to get IDXGIFactory2."); + + // Create Swap Chain + + ComPtr swap_chain1; + ThrowIfFailed(dxgi_factory2.Get()->CreateSwapChainForHwnd(d3d.device.Get(), h_wnd, &swap_chain_desc1, NULL, NULL, swap_chain1.GetAddressOf()), + h_wnd, "Failed to create IDXGISwapChain1."); + + ThrowIfFailed(swap_chain1.As(&d3d.swap_chain2), + h_wnd, "Failed to get IDXGISwapChain2 from IDXGISwapChain1."); + + ThrowIfFailed(d3d.swap_chain2.Get()->SetMaximumFrameLatency(1), + h_wnd, "Failed to Set Maximum Frame Latency to 1."); + + d3d.frame_latency_waitable_object = d3d.swap_chain2.Get()->GetFrameLatencyWaitableObject(); + + // Prevent DXGI from intercepting Alt+Enter + + ThrowIfFailed(dxgi_factory2.Get()->MakeWindowAssociation(h_wnd, DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER), + h_wnd, "Failed to call MakeWindowAssociation."); + + } else { + + // Create swap chain description + + DXGI_SWAP_CHAIN_DESC swap_chain_desc; + ZeroMemory(&swap_chain_desc, sizeof(DXGI_SWAP_CHAIN_DESC)); + + swap_chain_desc.BufferDesc.Width = DESIRED_SCREEN_WIDTH; + swap_chain_desc.BufferDesc.Height = DESIRED_SCREEN_HEIGHT; + swap_chain_desc.BufferDesc.RefreshRate.Numerator = 0; + swap_chain_desc.BufferDesc.RefreshRate.Denominator = 1; + swap_chain_desc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + swap_chain_desc.BufferDesc.ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED; + swap_chain_desc.BufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED; + swap_chain_desc.SampleDesc = d3d.sample_description; + swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swap_chain_desc.BufferCount = 1; + swap_chain_desc.OutputWindow = h_wnd; + swap_chain_desc.Windowed = TRUE; + swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD; + swap_chain_desc.Flags = 0; + + // Create DXGI Factory + + ComPtr dxgi_device; + ThrowIfFailed(d3d.device.Get()->QueryInterface(__uuidof(IDXGIDevice), (void **) dxgi_device.GetAddressOf()), + h_wnd, "Failed to get IDXGIDevice."); + + ComPtr dxgi_adapter; + ThrowIfFailed(dxgi_device.Get()->GetAdapter(dxgi_adapter.GetAddressOf()), + h_wnd, "Failed to get IDXGIAdapter."); + + ComPtr dxgi_factory; + ThrowIfFailed(dxgi_adapter.Get()->GetParent(__uuidof(IDXGIFactory), (void **) dxgi_factory.GetAddressOf()), + h_wnd, "Failed to get IDXGIFactory."); + + // Create Swap Chain + + ThrowIfFailed(dxgi_factory.Get()->CreateSwapChain(d3d.device.Get(), &swap_chain_desc, d3d.swap_chain.GetAddressOf()), + h_wnd, "Failed to create IDXGISwapChain."); + + // Prevent DXGI from intercepting Alt+Enter + + ThrowIfFailed(dxgi_factory.Get()->MakeWindowAssociation(h_wnd, DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER), + h_wnd, "Failed to call MakeWindowAssociation."); + } + + // Create D3D Debug device if in debug mode + +#if DEBUG_D3D + ThrowIfFailed(d3d.device->QueryInterface(__uuidof(ID3D11Debug), (void **) d3d.debug.GetAddressOf()), + h_wnd, "Failed to get ID3D11Debug device."); +#endif + + // Create views + + create_render_target_views(DESIRED_SCREEN_WIDTH, DESIRED_SCREEN_HEIGHT); + + // Create main vertex buffer + + D3D11_BUFFER_DESC vertex_buffer_desc; + ZeroMemory(&vertex_buffer_desc, sizeof(D3D11_BUFFER_DESC)); + + vertex_buffer_desc.Usage = D3D11_USAGE_DYNAMIC; + vertex_buffer_desc.ByteWidth = 256 * 26 * 3 * sizeof(float); // Same as buf_vbo size in gfx_pc + vertex_buffer_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; + vertex_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + vertex_buffer_desc.MiscFlags = 0; + + ThrowIfFailed(d3d.device->CreateBuffer(&vertex_buffer_desc, NULL, d3d.vertex_buffer.GetAddressOf()), + h_wnd, "Failed to create vertex buffer."); + + // Create per-frame constant buffer + + D3D11_BUFFER_DESC constant_buffer_desc; + ZeroMemory(&constant_buffer_desc, sizeof(D3D11_BUFFER_DESC)); + + constant_buffer_desc.Usage = D3D11_USAGE_DYNAMIC; + constant_buffer_desc.ByteWidth = sizeof(PerFrameCB); + constant_buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + constant_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + constant_buffer_desc.MiscFlags = 0; + + ThrowIfFailed(d3d.device->CreateBuffer(&constant_buffer_desc, NULL, d3d.per_frame_cb.GetAddressOf()), + h_wnd, "Failed to create per-frame constant buffer."); + + d3d.context->PSSetConstantBuffers(0, 1, d3d.per_frame_cb.GetAddressOf()); + + // Create per-draw constant buffer + + constant_buffer_desc.Usage = D3D11_USAGE_DYNAMIC; + constant_buffer_desc.ByteWidth = sizeof(PerDrawCB); + constant_buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + constant_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + constant_buffer_desc.MiscFlags = 0; + + ThrowIfFailed(d3d.device->CreateBuffer(&constant_buffer_desc, NULL, d3d.per_draw_cb.GetAddressOf()), + h_wnd, "Failed to create per-draw constant buffer."); + + d3d.context->PSSetConstantBuffers(1, 1, d3d.per_draw_cb.GetAddressOf()); + + // Initialize some timer values + + QueryPerformanceFrequency(&frequency); + accumulated_time.QuadPart = 0; + + // Decide vsync interval + + calculate_sync_interval(); + + // Show the window + + ShowWindow(h_wnd, SW_SHOW); +} + +static void gfx_d3d11_dxgi_shutdown(void) { + if (d3d.swap_chain) d3d.swap_chain.Get()->SetFullscreenState(false, nullptr); + if (d3d.swap_chain2) d3d.swap_chain2.Get()->SetFullscreenState(false, nullptr); + + for (unsigned int i = 0; i < sizeof(d3d.shader_program_pool) / sizeof(d3d.shader_program_pool[0]); ++i) { + d3d.shader_program_pool[i].vertex_shader.Reset(); + d3d.shader_program_pool[i].pixel_shader.Reset(); + d3d.shader_program_pool[i].input_layout.Reset(); + d3d.shader_program_pool[i].blend_state.Reset(); + } + + d3d.rasterizer_state.Reset(); + d3d.backbuffer_view.Reset(); + d3d.depth_stencil_view.Reset(); + d3d.depth_stencil_state.Reset(); + d3d.context.Reset(); + d3d.device.Reset(); + d3d.swap_chain.Reset(); + d3d.swap_chain2.Reset(); + + if (h_wnd) { + DestroyWindow(h_wnd); + h_wnd = nullptr; + } +} + +static void gfx_d3d11_dxgi_set_keyboard_callbacks(bool (*on_key_down)(int scancode), bool (*on_key_up)(int scancode), void (*on_all_keys_up)(void)) { + d3d.on_key_down = on_key_down; + d3d.on_key_up = on_key_up; + d3d.on_all_keys_up = on_all_keys_up; +} + +static void gfx_d3d11_dxgi_main_loop(void (*run_one_game_iter)(void)) { + MSG msg = { 0 }; + + bool quit = false; + while (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) { + TranslateMessage(&msg); + DispatchMessage(&msg); + if (msg.message == WM_QUIT) { + quit = true; + } + } + + if (quit) { + return; + } + + if (IsIconic(h_wnd)) { + Sleep(50); + return; + } + + d3d.run_one_game_iter = run_one_game_iter; + + if (sync_interval == 0) { + LARGE_INTEGER current_time; + QueryPerformanceCounter(¤t_time); + + LARGE_INTEGER elapsed_time_microseconds; + elapsed_time_microseconds.QuadPart = current_time.QuadPart - last_time.QuadPart; + elapsed_time_microseconds.QuadPart *= 1000000; + elapsed_time_microseconds.QuadPart /= frequency.QuadPart; + + accumulated_time.QuadPart += elapsed_time_microseconds.QuadPart; + last_time = current_time; + + const uint32_t FRAME_TIME = 1000000 / 30; + + if (accumulated_time.QuadPart >= FRAME_TIME) { + accumulated_time.QuadPart %= FRAME_TIME; + + if (lower_latency) { + WaitForSingleObjectEx(d3d.frame_latency_waitable_object, 1000, true); + } + + if (d3d.run_one_game_iter != nullptr) { + d3d.run_one_game_iter(); + } + + if (lower_latency) { + d3d.swap_chain2->Present(1, 0); + } else { + d3d.swap_chain->Present(1, 0); + } + } else { + Sleep(1); + } + } else { + if (lower_latency) { + WaitForSingleObjectEx(d3d.frame_latency_waitable_object, 1000, true); + } + + if (d3d.run_one_game_iter != nullptr) { + d3d.run_one_game_iter(); + } + + if (lower_latency) { + d3d.swap_chain2->Present(sync_interval, 0); + } else { + d3d.swap_chain->Present(sync_interval, 0); + } + } +} + +static void gfx_d3d11_dxgi_get_dimensions(uint32_t *width, uint32_t *height) { + *width = d3d.current_width; + *height = d3d.current_height; +} + +static void gfx_d3d11_dxgi_handle_events(void) { +} + +static bool gfx_d3d11_dxgi_start_frame(void) { + return true; +} + +static void gfx_d3d11_dxgi_swap_buffers_begin(void) { +} + +static void gfx_d3d11_dxgi_swap_buffers_end(void) { +} + +double gfx_d3d11_dxgi_get_time(void) { + return 0.0; +} + +static bool gfx_d3d11_z_is_from_0_to_1(void) { + return true; +} + +static void gfx_d3d11_unload_shader(struct ShaderProgram *old_prg) { +} + +static void gfx_d3d11_load_shader(struct ShaderProgram *new_prg) { + d3d.shader_program = new_prg; +} + +static struct ShaderProgram *gfx_d3d11_create_and_load_new_shader(uint32_t shader_id) { + CCFeatures cc_features; + get_cc_features(shader_id, &cc_features); + + char buf[4096]; + size_t len = 0; + size_t num_floats = 4; + + // Pixel shader input struct + + append_line(buf, &len, "struct PSInput {"); + append_line(buf, &len, " float4 position : SV_POSITION;"); + + if (cc_features.used_textures[0] || cc_features.used_textures[1]) { + append_line(buf, &len, " float2 uv : TEXCOORD;"); + num_floats += 2; + } + + if (cc_features.opt_alpha && cc_features.opt_noise) { + append_line(buf, &len, " float4 screenPos : TEXCOORD1;"); + } + + if (cc_features.opt_fog) { + append_line(buf, &len, " float4 fog : FOG;"); + num_floats += 4; + } + for (uint32_t i = 0; i < cc_features.num_inputs; i++) { + len += sprintf(buf + len, " float%d input%d : INPUT%d;\r\n", cc_features.opt_alpha ? 4 : 3, i + 1, i); + num_floats += cc_features.opt_alpha ? 4 : 3; + } + append_line(buf, &len, "};"); + + // Textures and samplers + + if (cc_features.used_textures[0]) { + append_line(buf, &len, "Texture2D g_texture0 : register(t0);"); + append_line(buf, &len, "SamplerState g_sampler0 : register(s0);"); + } + if (cc_features.used_textures[1]) { + append_line(buf, &len, "Texture2D g_texture1 : register(t1);"); + append_line(buf, &len, "SamplerState g_sampler1 : register(s1);"); + } + + // Constant buffer and random function + + if (cc_features.opt_alpha && cc_features.opt_noise) { + append_line(buf, &len, "cbuffer PerFrameCB : register(b0) {"); + append_line(buf, &len, " uint noise_frame;"); + append_line(buf, &len, " float2 noise_scale;"); + append_line(buf, &len, "}"); + + append_line(buf, &len, "float random(in float3 value) {"); + append_line(buf, &len, " float random = dot(value, float3(12.9898, 78.233, 37.719));"); + append_line(buf, &len, " return frac(sin(random) * 143758.5453);"); + append_line(buf, &len, "}"); + } + + // 3 point texture filtering + // Original author: ArthurCarvalho + // Based on GLSL implementation by twinaphex, mupen64plus-libretro project. + + if (configFiltering == 2) { + if (cc_features.used_textures[0] || cc_features.used_textures[1]) { + append_line(buf, &len, "cbuffer PerDrawCB : register(b1) {"); + append_line(buf, &len, " struct {"); + append_line(buf, &len, " uint width;"); + append_line(buf, &len, " uint height;"); + append_line(buf, &len, " bool linear_filtering;"); + append_line(buf, &len, " } textures[2];"); + append_line(buf, &len, "}"); + append_line(buf, &len, "#define TEX_OFFSET(tex, tSampler, texCoord, off, texSize) tex.Sample(tSampler, texCoord - off / texSize)"); + append_line(buf, &len, "float4 tex2D3PointFilter(in Texture2D tex, in SamplerState tSampler, in float2 texCoord, in float2 texSize) {"); + append_line(buf, &len, " float2 offset = frac(texCoord * texSize - float2(0.5, 0.5));"); + append_line(buf, &len, " offset -= step(1.0, offset.x + offset.y);"); + append_line(buf, &len, " float4 c0 = TEX_OFFSET(tex, tSampler, texCoord, offset, texSize);"); + append_line(buf, &len, " float4 c1 = TEX_OFFSET(tex, tSampler, texCoord, float2(offset.x - sign(offset.x), offset.y), texSize);"); + append_line(buf, &len, " float4 c2 = TEX_OFFSET(tex, tSampler, texCoord, float2(offset.x, offset.y - sign(offset.y)), texSize);"); + append_line(buf, &len, " return c0 + abs(offset.x)*(c1-c0) + abs(offset.y)*(c2-c0);"); + append_line(buf, &len, "}"); + } + } + + // Vertex shader + + append_str(buf, &len, "PSInput VSMain(float4 position : POSITION"); + if (cc_features.used_textures[0] || cc_features.used_textures[1]) { + append_str(buf, &len, ", float2 uv : TEXCOORD"); + } + if (cc_features.opt_fog) { + append_str(buf, &len, ", float4 fog : FOG"); + } + for (uint32_t i = 0; i < cc_features.num_inputs; i++) { + len += sprintf(buf + len, ", float%d input%d : INPUT%d", cc_features.opt_alpha ? 4 : 3, i + 1, i); + } + append_line(buf, &len, ") {"); + append_line(buf, &len, " PSInput result;"); + append_line(buf, &len, " result.position = position;"); + if (cc_features.opt_alpha && cc_features.opt_noise) { + append_line(buf, &len, " result.screenPos = position;"); + } + if (cc_features.used_textures[0] || cc_features.used_textures[1]) { + append_line(buf, &len, " result.uv = uv;"); + } + if (cc_features.opt_fog) { + append_line(buf, &len, " result.fog = fog;"); + } + for (uint32_t i = 0; i < cc_features.num_inputs; i++) { + len += sprintf(buf + len, " result.input%d = input%d;\r\n", i + 1, i + 1); + } + append_line(buf, &len, " return result;"); + append_line(buf, &len, "}"); + + // Pixel shader + + append_line(buf, &len, "float4 PSMain(PSInput input) : SV_TARGET {"); + if (cc_features.used_textures[0]) { + if (configFiltering == 2) { + append_line(buf, &len, " float4 texVal0;"); + append_line(buf, &len, " if (textures[0].linear_filtering)"); + append_line(buf, &len, " texVal0 = tex2D3PointFilter(g_texture0, g_sampler0, input.uv, float2(textures[0].width, textures[0].height));"); + append_line(buf, &len, " else"); + append_line(buf, &len, " texVal0 = g_texture0.Sample(g_sampler0, input.uv);"); + } else { + append_line(buf, &len, " float4 texVal0 = g_texture0.Sample(g_sampler0, input.uv);"); + } + } + if (cc_features.used_textures[1]) { + if (configFiltering == 2) { + append_line(buf, &len, " float4 texVal1;"); + append_line(buf, &len, " if (textures[1].linear_filtering)"); + append_line(buf, &len, " texVal1 = tex2D3PointFilter(g_texture1, g_sampler1, input.uv, float2(textures[1].width, textures[1].height));"); + append_line(buf, &len, " else"); + append_line(buf, &len, " texVal1 = g_texture1.Sample(g_sampler1, input.uv);"); + } else { + append_line(buf, &len, " float4 texVal1 = g_texture1.Sample(g_sampler1, input.uv);"); + } + } + + append_str(buf, &len, cc_features.opt_alpha ? " float4 texel = " : " float3 texel = "); + if (!cc_features.color_alpha_same && cc_features.opt_alpha) { + append_str(buf, &len, "float4("); + append_formula(buf, &len, cc_features.c, cc_features.do_single[0], cc_features.do_multiply[0], cc_features.do_mix[0], false, false, true); + append_str(buf, &len, ", "); + append_formula(buf, &len, cc_features.c, cc_features.do_single[1], cc_features.do_multiply[1], cc_features.do_mix[1], true, true, true); + append_str(buf, &len, ")"); + } else { + append_formula(buf, &len, cc_features.c, cc_features.do_single[0], cc_features.do_multiply[0], cc_features.do_mix[0], cc_features.opt_alpha, false, cc_features.opt_alpha); + } + append_line(buf, &len, ";"); + + if (cc_features.opt_texture_edge && cc_features.opt_alpha) { + append_line(buf, &len, " if (texel.a > 0.3) texel.a = 1.0; else discard;"); + } + // TODO discard if alpha is 0? + if (cc_features.opt_fog) { + if (cc_features.opt_alpha) { + append_line(buf, &len, " texel = float4(lerp(texel.rgb, input.fog.rgb, input.fog.a), texel.a);"); + } else { + append_line(buf, &len, " texel = lerp(texel, input.fog.rgb, input.fog.a);"); + } + } + + if (cc_features.opt_alpha && cc_features.opt_noise) { + append_line(buf, &len, " float2 coords = (input.screenPos.xy / input.screenPos.w) * noise_scale;"); + append_line(buf, &len, " texel.a *= round(random(float3(floor(coords), noise_frame)));"); + } + + if (cc_features.opt_alpha) { + append_line(buf, &len, " return texel;"); + } else { + append_line(buf, &len, " return float4(texel, 1.0);"); + } + append_line(buf, &len, "}"); + + ComPtr vs, ps; + ComPtr error_blob; + +#if DEBUG_D3D + UINT compile_flags = D3DCOMPILE_DEBUG; +#else + UINT compile_flags = D3DCOMPILE_OPTIMIZATION_LEVEL2; +#endif + + HRESULT hr = D3DCompile(buf, len, nullptr, nullptr, nullptr, "VSMain", "vs_4_0_level_9_1", compile_flags, 0, vs.GetAddressOf(), error_blob.GetAddressOf()); + + if (FAILED(hr)) + sys_fatal("%s", (char *) error_blob->GetBufferPointer()); + + hr = D3DCompile(buf, len, nullptr, nullptr, nullptr, "PSMain", "ps_4_0_level_9_1", compile_flags, 0, ps.GetAddressOf(), error_blob.GetAddressOf()); + + if (FAILED(hr)) + sys_fatal("%s", (char *) error_blob->GetBufferPointer()); + + struct ShaderProgram *prg = &d3d.shader_program_pool[d3d.shader_program_pool_size++]; + + ThrowIfFailed(d3d.device->CreateVertexShader(vs->GetBufferPointer(), vs->GetBufferSize(), NULL, prg->vertex_shader.GetAddressOf())); + ThrowIfFailed(d3d.device->CreatePixelShader(ps->GetBufferPointer(), ps->GetBufferSize(), NULL, prg->pixel_shader.GetAddressOf())); + + // Input Layout + + D3D11_INPUT_ELEMENT_DESC ied[7]; + uint8_t ied_index = 0; + ied[ied_index++] = { "POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }; + if (cc_features.used_textures[0] || cc_features.used_textures[1]) { + ied[ied_index++] = { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }; + } + if (cc_features.opt_fog) { + ied[ied_index++] = { "FOG", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }; + } + for (unsigned int i = 0; i < cc_features.num_inputs; i++) { + DXGI_FORMAT format = cc_features.opt_alpha ? DXGI_FORMAT_R32G32B32A32_FLOAT : DXGI_FORMAT_R32G32B32_FLOAT; + ied[ied_index++] = { "INPUT", i, format, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }; + } + + ThrowIfFailed(d3d.device->CreateInputLayout(ied, ied_index, vs->GetBufferPointer(), vs->GetBufferSize(), prg->input_layout.GetAddressOf())); + + // Blend state + + D3D11_BLEND_DESC blend_desc; + ZeroMemory(&blend_desc, sizeof(D3D11_BLEND_DESC)); + + if (cc_features.opt_alpha) { + blend_desc.RenderTarget[0].BlendEnable = true; + blend_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA; + blend_desc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA; + blend_desc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD; + blend_desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; + blend_desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; + blend_desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; + blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + } else { + blend_desc.RenderTarget[0].BlendEnable = false; + blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + } + + ThrowIfFailed(d3d.device->CreateBlendState(&blend_desc, prg->blend_state.GetAddressOf())); + + // Save some values + + prg->shader_id = shader_id; + prg->num_inputs = cc_features.num_inputs; + prg->num_floats = num_floats; + prg->used_textures[0] = cc_features.used_textures[0]; + prg->used_textures[1] = cc_features.used_textures[1]; + + return d3d.shader_program = prg; +} + +static struct ShaderProgram *gfx_d3d11_lookup_shader(uint32_t shader_id) { + for (size_t i = 0; i < d3d.shader_program_pool_size; i++) { + if (d3d.shader_program_pool[i].shader_id == shader_id) { + return &d3d.shader_program_pool[i]; + } + } + return NULL; +} + +static void gfx_d3d11_shader_get_info(struct ShaderProgram *prg, uint8_t *num_inputs, bool used_textures[2]) { + *num_inputs = prg->num_inputs; + used_textures[0] = prg->used_textures[0]; + used_textures[1] = prg->used_textures[1]; +} + +static uint32_t gfx_d3d11_new_texture(void) { + d3d.textures.resize(d3d.textures.size() + 1); + return (uint32_t)(d3d.textures.size() - 1); +} + +static void gfx_d3d11_select_texture(int tile, uint32_t texture_id) { + d3d.current_tile = tile; + d3d.current_texture_ids[tile] = texture_id; +} + +static D3D11_TEXTURE_ADDRESS_MODE gfx_cm_to_d3d11(uint32_t val) { + if (val & G_TX_CLAMP) { + return D3D11_TEXTURE_ADDRESS_CLAMP; + } + return (val & G_TX_MIRROR) ? D3D11_TEXTURE_ADDRESS_MIRROR : D3D11_TEXTURE_ADDRESS_WRAP; +} + +static void gfx_d3d11_upload_texture(uint8_t *rgba32_buf, int width, int height) { + // Create texture + + D3D11_TEXTURE2D_DESC texture_desc; + ZeroMemory(&texture_desc, sizeof(D3D11_TEXTURE2D_DESC)); + + texture_desc.Width = width; + texture_desc.Height = height; + texture_desc.Usage = D3D11_USAGE_IMMUTABLE; + texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + texture_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + texture_desc.CPUAccessFlags = 0; + texture_desc.MiscFlags = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS ? + texture_desc.ArraySize = 1; + texture_desc.MipLevels = 1; + texture_desc.SampleDesc.Count = 1; + texture_desc.SampleDesc.Quality = 0; + + D3D11_SUBRESOURCE_DATA resource_data; + resource_data.pSysMem = rgba32_buf; + resource_data.SysMemPitch = width * 4; + resource_data.SysMemSlicePitch = resource_data.SysMemPitch * height; + + ComPtr texture; + ThrowIfFailed(d3d.device->CreateTexture2D(&texture_desc, &resource_data, texture.GetAddressOf())); + + // Create shader resource view from texture + + D3D11_SHADER_RESOURCE_VIEW_DESC resource_view_desc; + ZeroMemory(&resource_view_desc, sizeof(D3D11_SHADER_RESOURCE_VIEW_DESC)); + + resource_view_desc.Format = texture_desc.Format; + resource_view_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + resource_view_desc.Texture2D.MostDetailedMip = 0; + resource_view_desc.Texture2D.MipLevels = -1; + + TextureData *texture_data = &d3d.textures[d3d.current_texture_ids[d3d.current_tile]]; + texture_data->width = width; + texture_data->height = height; + + ThrowIfFailed(d3d.device->CreateShaderResourceView(texture.Get(), &resource_view_desc, texture_data->resource_view.GetAddressOf())); +} + +static void gfx_d3d11_set_sampler_parameters(int tile, bool linear_filter, uint32_t cms, uint32_t cmt) { + D3D11_SAMPLER_DESC sampler_desc; + ZeroMemory(&sampler_desc, sizeof(D3D11_SAMPLER_DESC)); + + if (configFiltering == 2) + sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; + else + sampler_desc.Filter = linear_filter ? D3D11_FILTER_MIN_MAG_MIP_LINEAR : D3D11_FILTER_MIN_MAG_MIP_POINT; + + sampler_desc.AddressU = gfx_cm_to_d3d11(cms); + sampler_desc.AddressV = gfx_cm_to_d3d11(cmt); + sampler_desc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP; + sampler_desc.MinLOD = 0; + sampler_desc.MaxLOD = D3D11_FLOAT32_MAX; + + TextureData *texture_data = &d3d.textures[d3d.current_texture_ids[tile]]; + texture_data->linear_filtering = linear_filter; + + // This function is called twice per texture, the first one only to set default values. + // Maybe that could be skipped? Anyway, make sure to release the first default sampler + // state before setting the actual one. + texture_data->sampler_state.Reset(); + + ThrowIfFailed(d3d.device->CreateSamplerState(&sampler_desc, texture_data->sampler_state.GetAddressOf())); +} + +static void gfx_d3d11_set_depth_test(bool depth_test) { + d3d.depth_test = depth_test; +} + +static void gfx_d3d11_set_depth_mask(bool depth_mask) { + d3d.depth_mask = depth_mask; +} + +static void gfx_d3d11_set_zmode_decal(bool zmode_decal) { + d3d.zmode_decal = zmode_decal; +} + +static void gfx_d3d11_set_viewport(int x, int y, int width, int height) { + D3D11_VIEWPORT viewport; + viewport.TopLeftX = x; + viewport.TopLeftY = d3d.current_height - y - height; + viewport.Width = width; + viewport.Height = height; + viewport.MinDepth = 0.0f; + viewport.MaxDepth = 1.0f; + + d3d.context->RSSetViewports(1, &viewport); +} + +static void gfx_d3d11_set_scissor(int x, int y, int width, int height) { + D3D11_RECT rect; + rect.left = x; + rect.top = d3d.current_height - y - height; + rect.right = x + width; + rect.bottom = d3d.current_height - y; + + d3d.context->RSSetScissorRects(1, &rect); +} + +static void gfx_d3d11_set_use_alpha(bool use_alpha) { + // Already part of the pipeline state from shader info +} + +static void gfx_d3d11_draw_triangles(float buf_vbo[], size_t buf_vbo_len, size_t buf_vbo_num_tris) { + + if (d3d.last_depth_test != d3d.depth_test || d3d.last_depth_mask != d3d.depth_mask) { + d3d.last_depth_test = d3d.depth_test; + d3d.last_depth_mask = d3d.depth_mask; + + d3d.depth_stencil_state.Reset(); + + D3D11_DEPTH_STENCIL_DESC depth_stencil_desc; + ZeroMemory(&depth_stencil_desc, sizeof(D3D11_DEPTH_STENCIL_DESC)); + + depth_stencil_desc.DepthEnable = d3d.depth_test; + depth_stencil_desc.DepthWriteMask = d3d.depth_mask ? D3D11_DEPTH_WRITE_MASK_ALL : D3D11_DEPTH_WRITE_MASK_ZERO; + depth_stencil_desc.DepthFunc = D3D11_COMPARISON_LESS_EQUAL; + depth_stencil_desc.StencilEnable = false; + + ThrowIfFailed(d3d.device->CreateDepthStencilState(&depth_stencil_desc, d3d.depth_stencil_state.GetAddressOf())); + d3d.context->OMSetDepthStencilState(d3d.depth_stencil_state.Get(), 0); + } + + if (d3d.last_zmode_decal != d3d.zmode_decal) { + d3d.last_zmode_decal = d3d.zmode_decal; + + d3d.rasterizer_state.Reset(); + + D3D11_RASTERIZER_DESC rasterizer_desc; + ZeroMemory(&rasterizer_desc, sizeof(D3D11_RASTERIZER_DESC)); + + rasterizer_desc.FillMode = D3D11_FILL_SOLID; + rasterizer_desc.CullMode = D3D11_CULL_NONE; + rasterizer_desc.FrontCounterClockwise = true; + rasterizer_desc.DepthBias = 0; + rasterizer_desc.SlopeScaledDepthBias = d3d.zmode_decal ? -2.0f : 0.0f; + rasterizer_desc.DepthBiasClamp = 0.0f; + rasterizer_desc.DepthClipEnable = true; + rasterizer_desc.ScissorEnable = true; + rasterizer_desc.MultisampleEnable = false; + rasterizer_desc.AntialiasedLineEnable = false; + + ThrowIfFailed(d3d.device->CreateRasterizerState(&rasterizer_desc, d3d.rasterizer_state.GetAddressOf())); + d3d.context->RSSetState(d3d.rasterizer_state.Get()); + } + + bool textures_changed = false; + + for (int i = 0; i < 2; i++) { + if (d3d.shader_program->used_textures[i]) { + if (d3d.last_resource_views[i].Get() != d3d.textures[d3d.current_texture_ids[i]].resource_view.Get()) { + d3d.last_resource_views[i] = d3d.textures[d3d.current_texture_ids[i]].resource_view.Get(); + d3d.context->PSSetShaderResources(i, 1, d3d.textures[d3d.current_texture_ids[i]].resource_view.GetAddressOf()); + + if (configFiltering == 2) { + d3d.per_draw_cb_data.textures[i].width = d3d.textures[d3d.current_texture_ids[i]].width; + d3d.per_draw_cb_data.textures[i].height = d3d.textures[d3d.current_texture_ids[i]].height; + d3d.per_draw_cb_data.textures[i].linear_filtering = d3d.textures[d3d.current_texture_ids[i]].linear_filtering; + textures_changed = true; + } + + if (d3d.last_sampler_states[i].Get() != d3d.textures[d3d.current_texture_ids[i]].sampler_state.Get()) { + d3d.last_sampler_states[i] = d3d.textures[d3d.current_texture_ids[i]].sampler_state.Get(); + d3d.context->PSSetSamplers(i, 1, d3d.textures[d3d.current_texture_ids[i]].sampler_state.GetAddressOf()); + } + } + } + } + + // Set per-draw constant buffer + + if (textures_changed) { + D3D11_MAPPED_SUBRESOURCE ms; + ZeroMemory(&ms, sizeof(D3D11_MAPPED_SUBRESOURCE)); + d3d.context->Map(d3d.per_draw_cb.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &ms); + memcpy(ms.pData, &d3d.per_draw_cb_data, sizeof(PerDrawCB)); + d3d.context->Unmap(d3d.per_draw_cb.Get(), 0); + } + + // Set vertex buffer data + + D3D11_MAPPED_SUBRESOURCE ms; + ZeroMemory(&ms, sizeof(D3D11_MAPPED_SUBRESOURCE)); + d3d.context->Map(d3d.vertex_buffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &ms); + memcpy(ms.pData, buf_vbo, buf_vbo_len * sizeof(float)); + d3d.context->Unmap(d3d.vertex_buffer.Get(), 0); + + uint32_t stride = d3d.shader_program->num_floats * sizeof(float); + uint32_t offset = 0; + + if (d3d.last_vertex_buffer_stride != stride) { + d3d.last_vertex_buffer_stride = stride; + d3d.context->IASetVertexBuffers(0, 1, d3d.vertex_buffer.GetAddressOf(), &stride, &offset); + } + + if (d3d.last_shader_program != d3d.shader_program) { + d3d.last_shader_program = d3d.shader_program; + d3d.context->IASetInputLayout(d3d.shader_program->input_layout.Get()); + d3d.context->VSSetShader(d3d.shader_program->vertex_shader.Get(), 0, 0); + d3d.context->PSSetShader(d3d.shader_program->pixel_shader.Get(), 0, 0); + + if (d3d.last_blend_state.Get() != d3d.shader_program->blend_state.Get()) { + d3d.last_blend_state = d3d.shader_program->blend_state.Get(); + d3d.context->OMSetBlendState(d3d.shader_program->blend_state.Get(), 0, 0xFFFFFFFF); + } + } + + if (d3d.last_primitive_topology != D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST) { + d3d.last_primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + d3d.context->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + } + + d3d.context->Draw(buf_vbo_num_tris * 3, 0); +} + +static void gfx_d3d11_init(void) { } + +static void gfx_d3d11_shutdown(void) { } + +static void gfx_d3d11_start_frame(void) { + // Set render targets + + d3d.context->OMSetRenderTargets(1, d3d.backbuffer_view.GetAddressOf(), d3d.depth_stencil_view.Get()); + + // Clear render targets + + const float clearColor[] = { 0.0f, 0.0f, 0.0f, 1.0f }; + d3d.context->ClearRenderTargetView(d3d.backbuffer_view.Get(), clearColor); + d3d.context->ClearDepthStencilView(d3d.depth_stencil_view.Get(), D3D11_CLEAR_DEPTH, 1.0f, 0); + + // Set per-frame constant buffer + + d3d.per_frame_cb_data.noise_frame++; + if (d3d.per_frame_cb_data.noise_frame > 150) { + // No high values, as noise starts to look ugly + d3d.per_frame_cb_data.noise_frame = 0; + } + float aspect_ratio = (float) d3d.current_width / (float) d3d.current_height; + d3d.per_frame_cb_data.noise_scale_x = 120 * aspect_ratio; // 120 = N64 height resolution (240) / 2 + d3d.per_frame_cb_data.noise_scale_y = 120; + + D3D11_MAPPED_SUBRESOURCE ms; + ZeroMemory(&ms, sizeof(D3D11_MAPPED_SUBRESOURCE)); + d3d.context->Map(d3d.per_frame_cb.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &ms); + memcpy(ms.pData, &d3d.per_frame_cb_data, sizeof(PerFrameCB)); + d3d.context->Unmap(d3d.per_frame_cb.Get(), 0); +} + +struct GfxRenderingAPI gfx_d3d11_api = { + gfx_d3d11_z_is_from_0_to_1, + gfx_d3d11_unload_shader, + gfx_d3d11_load_shader, + gfx_d3d11_create_and_load_new_shader, + gfx_d3d11_lookup_shader, + gfx_d3d11_shader_get_info, + gfx_d3d11_new_texture, + gfx_d3d11_select_texture, + gfx_d3d11_upload_texture, + gfx_d3d11_set_sampler_parameters, + gfx_d3d11_set_depth_test, + gfx_d3d11_set_depth_mask, + gfx_d3d11_set_zmode_decal, + gfx_d3d11_set_viewport, + gfx_d3d11_set_scissor, + gfx_d3d11_set_use_alpha, + gfx_d3d11_draw_triangles, + gfx_d3d11_init, + gfx_d3d11_start_frame, + gfx_d3d11_shutdown, +}; + +struct GfxWindowManagerAPI gfx_dxgi = { + gfx_d3d11_dxgi_init, + gfx_d3d11_dxgi_set_keyboard_callbacks, + gfx_d3d11_dxgi_main_loop, + gfx_d3d11_dxgi_get_dimensions, + gfx_d3d11_dxgi_handle_events, + gfx_d3d11_dxgi_start_frame, + gfx_d3d11_dxgi_swap_buffers_begin, + gfx_d3d11_dxgi_swap_buffers_end, + gfx_d3d11_dxgi_get_time, + gfx_d3d11_dxgi_shutdown, +}; + +#else + +#error "D3D11 is only supported on Windows" + +#endif // _WIN32 + +#endif // RAPI_D3D11 diff --git a/src/pc/gfx/gfx_direct3d11.h b/src/pc/gfx/gfx_direct3d11.h new file mode 100644 index 00000000..60b122b3 --- /dev/null +++ b/src/pc/gfx/gfx_direct3d11.h @@ -0,0 +1,10 @@ +#ifndef GFX_DIRECT3D11_H +#define GFX_DIRECT3D11_H + +#include "gfx_window_manager_api.h" +#include "gfx_rendering_api.h" + +extern struct GfxWindowManagerAPI gfx_dxgi; +extern struct GfxRenderingAPI gfx_d3d11_api; + +#endif diff --git a/src/pc/gfx/gfx_direct3d12.cpp b/src/pc/gfx/gfx_direct3d12.cpp new file mode 100644 index 00000000..90f3cd08 --- /dev/null +++ b/src/pc/gfx/gfx_direct3d12.cpp @@ -0,0 +1,1317 @@ +#ifdef RAPI_D3D12 + +#if defined(_WIN32) || defined(_WIN64) + +#include +#include +#include + +#include +#include +#include + +#include +#include + +// These are needed when compiling with MinGW +#include +#include +#define __in_ecount_opt(size) +#define __in +#define __out +#define __REQUIRED_RPCNDR_H_VERSION__ 475 +#include +#include + +#include "dxsdk/dxgi.h" +#include "dxsdk/dxgi1_4.h" +#include "dxsdk/d3d12.h" +#include "dxsdk/d3dcompiler.h" + +#include "dxsdk/d3dx12.h" + +#ifndef _LANGUAGE_C +#define _LANGUAGE_C +#endif +#include + +extern "C" { +#include "../configfile.h" +#include "../platform.h" +} + +#include "gfx_cc.h" +#include "gfx_window_manager_api.h" +#include "gfx_rendering_api.h" +#include "gfx_direct3d_common.h" + +#include "gfx_screen_config.h" + +#define WINCLASS_NAME L"SUPERMARIO64" +#define GFX_API_NAME "Direct3D 12" +#define DEBUG_D3D 0 + +#ifdef VERSION_EU +#define FRAME_INTERVAL_US_NUMERATOR 40000 +#define FRAME_INTERVAL_US_DENOMINATOR 1 +#else +#define FRAME_INTERVAL_US_NUMERATOR 100000 +#define FRAME_INTERVAL_US_DENOMINATOR 3 +#endif + +using namespace Microsoft::WRL; // For ComPtr + +struct ShaderProgram { + uint32_t shader_id; + uint8_t num_inputs; + bool used_textures[2]; + uint8_t num_floats; + uint8_t num_attribs; + + ComPtr vertex_shader; + ComPtr pixel_shader; + ComPtr root_signature; +}; + +struct PipelineDesc { + uint32_t shader_id; + bool depth_test; + bool depth_mask; + bool zmode_decal; + bool _padding; + + bool operator==(const PipelineDesc& o) const { + return memcmp(this, &o, sizeof(*this)) == 0; + } + + bool operator<(const PipelineDesc& o) const { + return memcmp(this, &o, sizeof(*this)) < 0; + } +}; + +struct TextureHeap { + ComPtr heap; + std::vector free_list; +}; + +struct TextureData { + ComPtr resource; + struct TextureHeap *heap; + uint8_t heap_offset; + + uint64_t last_frame_counter; + uint32_t descriptor_index; + int sampler_parameters; +}; + +static struct { + struct ShaderProgram shader_program_pool[64]; + uint8_t shader_program_pool_size; + + uint32_t current_width, current_height; + + ComPtr factory; + ComPtr device; + ComPtr command_queue; + ComPtr copy_command_queue; + ComPtr swap_chain; + ComPtr rtv_heap; + UINT rtv_descriptor_size; + ComPtr render_targets[2]; + ComPtr command_allocator; + ComPtr copy_command_allocator; + ComPtr command_list; + ComPtr copy_command_list; + ComPtr dsv_heap; + ComPtr depth_stencil_buffer; + ComPtr srv_heap; + UINT srv_descriptor_size; + ComPtr sampler_heap; + UINT sampler_descriptor_size; + + std::map, std::list> texture_heaps; + + std::map>> upload_heaps; + std::vector>> upload_heaps_in_flight; + ComPtr copy_fence; + uint64_t copy_fence_value; + + std::vector textures; + int current_tile; + uint32_t current_texture_ids[2]; + uint32_t srv_pos; + + int frame_index; + ComPtr fence; + HANDLE fence_event; + HANDLE waitable_object; + uint64_t qpc_init, qpc_freq; + uint64_t frame_timestamp; // in units of 1/FRAME_INTERVAL_US_DENOMINATOR microseconds + std::map frame_stats; + std::set> pending_frame_stats; + bool dropped_frame; + bool sync_interval_means_frames_to_wait; + UINT length_in_vsync_frames; + + uint64_t frame_counter; + + ComPtr vertex_buffer; + void *mapped_vbuf_address; + int vbuf_pos; + + std::vector> resources_to_clean_at_end_of_frame; + std::vector> texture_heap_allocations_to_reclaim_at_end_of_frame; + + std::map> pipeline_states; + bool must_reload_pipeline; + + // Current state: + ID3D12PipelineState *pipeline_state; + struct ShaderProgram *shader_program; + bool depth_test; + bool depth_mask; + bool zmode_decal; + + CD3DX12_VIEWPORT viewport; + CD3DX12_RECT scissor; + + void (*run_one_game_iter)(void); + bool (*on_key_down)(int scancode); + bool (*on_key_up)(int scancode); + void (*on_all_keys_up)(void); +} d3d; + +static int texture_uploads = 0; +static int max_texture_uploads; + +static D3D12_CPU_DESCRIPTOR_HANDLE get_cpu_descriptor_handle(ComPtr& heap) { +#if __MINGW32__ + // We would like to do this: + // D3D12_CPU_DESCRIPTOR_HANDLE handle = heap->GetCPUDescriptorHandleForHeapStart(); + // but MinGW64 doesn't follow the calling conventions of VC++ for some reason. + // Per MS documentation "User-defined types can be returned by value from global functions and static member functions"... + // "Otherwise, the caller assumes the responsibility of allocating memory and passing a pointer for the return value as the first argument". + // The method here is a non-static member function, and hence we need to pass the address to the return value as a parameter. + // MinGW32 has the same issue. + auto fn = heap->GetCPUDescriptorHandleForHeapStart; + void (STDMETHODCALLTYPE ID3D12DescriptorHeap::*fun)(D3D12_CPU_DESCRIPTOR_HANDLE *out) = (void (STDMETHODCALLTYPE ID3D12DescriptorHeap::*)(D3D12_CPU_DESCRIPTOR_HANDLE *out))fn; + D3D12_CPU_DESCRIPTOR_HANDLE handle; + (heap.Get()->*fun)(&handle); + return handle; +#else + return heap->GetCPUDescriptorHandleForHeapStart(); +#endif +} + +static D3D12_GPU_DESCRIPTOR_HANDLE get_gpu_descriptor_handle(ComPtr& heap) { +#ifdef __MINGW32__ + // See get_cpu_descriptor_handle + auto fn = heap->GetGPUDescriptorHandleForHeapStart; + void (STDMETHODCALLTYPE ID3D12DescriptorHeap::*fun)(D3D12_GPU_DESCRIPTOR_HANDLE *out) = (void (STDMETHODCALLTYPE ID3D12DescriptorHeap::*)(D3D12_GPU_DESCRIPTOR_HANDLE *out))fn; + D3D12_GPU_DESCRIPTOR_HANDLE handle; + (heap.Get()->*fun)(&handle); + return handle; +#else + return heap->GetGPUDescriptorHandleForHeapStart(); +#endif +} + +static D3D12_RESOURCE_ALLOCATION_INFO get_resource_allocation_info(const D3D12_RESOURCE_DESC *resource_desc) { +#ifdef __MINGW32__ + // See get_cpu_descriptor_handle + auto fn = d3d.device->GetResourceAllocationInfo; + void (STDMETHODCALLTYPE ID3D12Device::*fun)(D3D12_RESOURCE_ALLOCATION_INFO *out, UINT visibleMask, UINT numResourceDescs, const D3D12_RESOURCE_DESC *pResourceDescs) = + (void (STDMETHODCALLTYPE ID3D12Device::*)(D3D12_RESOURCE_ALLOCATION_INFO *out, UINT visibleMask, UINT numResourceDescs, const D3D12_RESOURCE_DESC *pResourceDescs))fn; + D3D12_RESOURCE_ALLOCATION_INFO out; + (d3d.device.Get()->*fun)(&out, 0, 1, resource_desc); + return out; +#else + return d3d.device->GetResourceAllocationInfo(0, 1, resource_desc); +#endif +} + +static bool gfx_d3d12_z_is_from_0_to_1(void) { + return true; +} + +static void gfx_d3d12_unload_shader(struct ShaderProgram *old_prg) { +} + +static void gfx_d3d12_load_shader(struct ShaderProgram *new_prg) { + d3d.shader_program = new_prg; + d3d.must_reload_pipeline = true; +} + +static struct ShaderProgram *gfx_d3d12_create_and_load_new_shader(uint32_t shader_id) { + /*static FILE *fp; + if (!fp) { + fp = fopen("shaders.txt", "w"); + } + fprintf(fp, "0x%08x\n", shader_id); + fflush(fp);*/ + + struct ShaderProgram *prg = &d3d.shader_program_pool[d3d.shader_program_pool_size++]; + + CCFeatures cc_features; + get_cc_features(shader_id, &cc_features); + + char buf[2048]; + size_t len = 0; + size_t num_floats = 4; + + append_str(buf, &len, "#define RS \"RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | DENY_VERTEX_SHADER_ROOT_ACCESS)"); + if (cc_features.used_textures[0]) { + append_str(buf, &len, ",DescriptorTable(SRV(t0), visibility = SHADER_VISIBILITY_PIXEL)"); + append_str(buf, &len, ",DescriptorTable(Sampler(s0), visibility = SHADER_VISIBILITY_PIXEL)"); + } + if (cc_features.used_textures[1]) { + append_str(buf, &len, ",DescriptorTable(SRV(t1), visibility = SHADER_VISIBILITY_PIXEL)"); + append_str(buf, &len, ",DescriptorTable(Sampler(s1), visibility = SHADER_VISIBILITY_PIXEL)"); + } + append_line(buf, &len, "\""); + + append_line(buf, &len, "struct PSInput {"); + append_line(buf, &len, "float4 position : SV_POSITION;"); + if (cc_features.used_textures[0] || cc_features.used_textures[1]) { + append_line(buf, &len, "float2 uv : TEXCOORD;"); + num_floats += 2; + } + if (cc_features.opt_fog) { + append_line(buf, &len, "float4 fog : FOG;"); + num_floats += 4; + } + for (int i = 0; i < cc_features.num_inputs; i++) { + len += sprintf(buf + len, "float%d input%d : INPUT%d;\r\n", cc_features.opt_alpha ? 4 : 3, i + 1, i); + num_floats += cc_features.opt_alpha ? 4 : 3; + } + append_line(buf, &len, "};"); + + if (cc_features.used_textures[0]) { + append_line(buf, &len, "Texture2D g_texture0 : register(t0);"); + append_line(buf, &len, "SamplerState g_sampler0 : register(s0);"); + } + if (cc_features.used_textures[1]) { + append_line(buf, &len, "Texture2D g_texture1 : register(t1);"); + append_line(buf, &len, "SamplerState g_sampler1 : register(s1);"); + } + + // Vertex shader + append_str(buf, &len, "PSInput VSMain(float4 position : POSITION"); + if (cc_features.used_textures[0] || cc_features.used_textures[1]) { + append_str(buf, &len, ", float2 uv : TEXCOORD"); + } + if (cc_features.opt_fog) { + append_str(buf, &len, ", float4 fog : FOG"); + } + for (int i = 0; i < cc_features.num_inputs; i++) { + len += sprintf(buf + len, ", float%d input%d : INPUT%d", cc_features.opt_alpha ? 4 : 3, i + 1, i); + } + append_line(buf, &len, ") {"); + append_line(buf, &len, "PSInput result;"); + append_line(buf, &len, "result.position = position;"); + if (cc_features.used_textures[0] || cc_features.used_textures[1]) { + append_line(buf, &len, "result.uv = uv;"); + } + if (cc_features.opt_fog) { + append_line(buf, &len, "result.fog = fog;"); + } + for (int i = 0; i < cc_features.num_inputs; i++) { + len += sprintf(buf + len, "result.input%d = input%d;\r\n", i + 1, i + 1); + } + append_line(buf, &len, "return result;"); + append_line(buf, &len, "}"); + + // Pixel shader + append_line(buf, &len, "[RootSignature(RS)]"); + append_line(buf, &len, "float4 PSMain(PSInput input) : SV_TARGET {"); + if (cc_features.used_textures[0]) { + append_line(buf, &len, "float4 texVal0 = g_texture0.Sample(g_sampler0, input.uv);"); + } + if (cc_features.used_textures[1]) { + append_line(buf, &len, "float4 texVal1 = g_texture1.Sample(g_sampler1, input.uv);"); + } + + append_str(buf, &len, cc_features.opt_alpha ? "float4 texel = " : "float3 texel = "); + if (!cc_features.color_alpha_same && cc_features.opt_alpha) { + append_str(buf, &len, "float4("); + append_formula(buf, &len, cc_features.c, cc_features.do_single[0], cc_features.do_multiply[0], cc_features.do_mix[0], false, false, true); + append_str(buf, &len, ", "); + append_formula(buf, &len, cc_features.c, cc_features.do_single[1], cc_features.do_multiply[1], cc_features.do_mix[1], true, true, true); + append_str(buf, &len, ")"); + } else { + append_formula(buf, &len, cc_features.c, cc_features.do_single[0], cc_features.do_multiply[0], cc_features.do_mix[0], cc_features.opt_alpha, false, cc_features.opt_alpha); + } + append_line(buf, &len, ";"); + + if (cc_features.opt_texture_edge && cc_features.opt_alpha) { + append_line(buf, &len, "if (texel.a > 0.3) texel.a = 1.0; else discard;"); + } + // TODO discard if alpha is 0? + if (cc_features.opt_fog) { + if (cc_features.opt_alpha) { + append_line(buf, &len, "texel = float4(lerp(texel.rgb, input.fog.rgb, input.fog.a), texel.a);"); + } else { + append_line(buf, &len, "texel = lerp(texel, input.fog.rgb, input.fog.a);"); + } + } + + if (cc_features.opt_alpha) { + append_line(buf, &len, "return texel;"); + } else { + append_line(buf, &len, "return float4(texel, 1.0);"); + } + append_line(buf, &len, "}"); + + //fwrite(buf, 1, len, stdout); + + ThrowIfFailed(D3DCompile(buf, len, nullptr, nullptr, nullptr, "VSMain", "vs_5_1", /*D3DCOMPILE_OPTIMIZATION_LEVEL3*/0, 0, &prg->vertex_shader, nullptr)); + ThrowIfFailed(D3DCompile(buf, len, nullptr, nullptr, nullptr, "PSMain", "ps_5_1", /*D3DCOMPILE_OPTIMIZATION_LEVEL3*/0, 0, &prg->pixel_shader, nullptr)); + + ThrowIfFailed(d3d.device->CreateRootSignature(0, prg->pixel_shader->GetBufferPointer(), prg->pixel_shader->GetBufferSize(), IID_ID3D12RootSignature, IID_PPV_ARGS_Helper(&prg->root_signature))); + + prg->shader_id = shader_id; + prg->num_inputs = cc_features.num_inputs; + prg->used_textures[0] = cc_features.used_textures[0]; + prg->used_textures[1] = cc_features.used_textures[1]; + prg->num_floats = num_floats; + //prg->num_attribs = cnt; + + d3d.must_reload_pipeline = true; + return d3d.shader_program = prg; +} + +static struct ShaderProgram *gfx_d3d12_lookup_shader(uint32_t shader_id) { + for (size_t i = 0; i < d3d.shader_program_pool_size; i++) { + if (d3d.shader_program_pool[i].shader_id == shader_id) { + return &d3d.shader_program_pool[i]; + } + } + return nullptr; +} + +static void gfx_d3d12_shader_get_info(struct ShaderProgram *prg, uint8_t *num_inputs, bool used_textures[2]) { + *num_inputs = prg->num_inputs; + used_textures[0] = prg->used_textures[0]; + used_textures[1] = prg->used_textures[1]; +} + +static uint32_t gfx_d3d12_new_texture(void) { + d3d.textures.resize(d3d.textures.size() + 1); + return (uint32_t)(d3d.textures.size() - 1); +} + +static void gfx_d3d12_select_texture(int tile, uint32_t texture_id) { + d3d.current_tile = tile; + d3d.current_texture_ids[tile] = texture_id; +} + +static void gfx_d3d12_upload_texture(uint8_t *rgba32_buf, int width, int height) { + texture_uploads++; + + ComPtr texture_resource; + + // Describe and create a Texture2D. + D3D12_RESOURCE_DESC texture_desc = {}; + texture_desc.MipLevels = 1; + texture_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + texture_desc.Width = width; + texture_desc.Height = height; + texture_desc.Flags = D3D12_RESOURCE_FLAG_NONE; + texture_desc.DepthOrArraySize = 1; + texture_desc.SampleDesc.Count = 1; + texture_desc.SampleDesc.Quality = 0; + texture_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + texture_desc.Alignment = D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT; + + D3D12_RESOURCE_ALLOCATION_INFO alloc_info = get_resource_allocation_info(&texture_desc); + + std::list& heaps = d3d.texture_heaps[std::pair(alloc_info.SizeInBytes, alloc_info.Alignment)]; + + struct TextureHeap *found_heap = nullptr; + for (struct TextureHeap& heap : heaps) { + if (!heap.free_list.empty()) { + found_heap = &heap; + } + } + if (found_heap == nullptr) { + heaps.resize(heaps.size() + 1); + found_heap = &heaps.back(); + + const int textures_per_heap = 64; + + D3D12_HEAP_DESC heap_desc = {}; + heap_desc.SizeInBytes = alloc_info.SizeInBytes * textures_per_heap; + if (alloc_info.Alignment == D3D12_SMALL_RESOURCE_PLACEMENT_ALIGNMENT) { + heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + } else { + heap_desc.Alignment = alloc_info.Alignment; + } + heap_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heap_desc.Properties.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; + heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES; + ThrowIfFailed(d3d.device->CreateHeap(&heap_desc, IID_ID3D12Heap, IID_PPV_ARGS_Helper(&found_heap->heap))); + for (int i = 0; i < textures_per_heap; i++) { + found_heap->free_list.push_back(i); + } + } + + uint8_t heap_offset = found_heap->free_list.back(); + found_heap->free_list.pop_back(); + ThrowIfFailed(d3d.device->CreatePlacedResource(found_heap->heap.Get(), heap_offset * alloc_info.SizeInBytes, &texture_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_ID3D12Resource, IID_PPV_ARGS_Helper(&texture_resource))); + + D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout; + UINT num_rows; + UINT64 row_size_in_bytes; + UINT64 upload_buffer_size; + d3d.device->GetCopyableFootprints(&texture_desc, 0, 1, 0, &layout, &num_rows, &row_size_in_bytes, &upload_buffer_size); + + std::vector>& upload_heaps = d3d.upload_heaps[upload_buffer_size]; + ComPtr upload_heap; + if (upload_heaps.empty()) { + CD3DX12_HEAP_PROPERTIES hp(D3D12_HEAP_TYPE_UPLOAD); + CD3DX12_RESOURCE_DESC rdb = CD3DX12_RESOURCE_DESC::Buffer(upload_buffer_size); + ThrowIfFailed(d3d.device->CreateCommittedResource( + &hp, + D3D12_HEAP_FLAG_NONE, + &rdb, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_ID3D12Resource, IID_PPV_ARGS_Helper(&upload_heap))); + } else { + upload_heap = upload_heaps.back(); + upload_heaps.pop_back(); + } + + { + D3D12_SUBRESOURCE_DATA texture_data = {}; + texture_data.pData = rgba32_buf; + texture_data.RowPitch = width * 4; // RGBA + texture_data.SlicePitch = texture_data.RowPitch * height; + + void *data; + upload_heap->Map(0, nullptr, &data); + D3D12_MEMCPY_DEST dest_data = { (uint8_t *)data + layout.Offset, layout.Footprint.RowPitch, SIZE_T(layout.Footprint.RowPitch) * SIZE_T(num_rows) }; + MemcpySubresource(&dest_data, &texture_data, static_cast(row_size_in_bytes), num_rows, layout.Footprint.Depth); + upload_heap->Unmap(0, nullptr); + + CD3DX12_TEXTURE_COPY_LOCATION dst(texture_resource.Get(), 0); + CD3DX12_TEXTURE_COPY_LOCATION src(upload_heap.Get(), layout); + d3d.copy_command_list->CopyTextureRegion(&dst, 0, 0, 0, &src, nullptr); + } + + CD3DX12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(texture_resource.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + d3d.command_list->ResourceBarrier(1, &barrier); + + d3d.upload_heaps_in_flight.push_back(std::make_pair((size_t)upload_buffer_size, std::move(upload_heap))); + + struct TextureData& td = d3d.textures[d3d.current_texture_ids[d3d.current_tile]]; + if (td.resource.Get() != nullptr) { + d3d.resources_to_clean_at_end_of_frame.push_back(std::move(td.resource)); + d3d.texture_heap_allocations_to_reclaim_at_end_of_frame.push_back(std::make_pair(td.heap, td.heap_offset)); + td.last_frame_counter = 0; + } + td.resource = std::move(texture_resource); + td.heap = found_heap; + td.heap_offset = heap_offset; +} + +static int gfx_cm_to_index(uint32_t val) { + if (val & G_TX_CLAMP) { + return 2; + } + return (val & G_TX_MIRROR) ? 1 : 0; +} + +static void gfx_d3d12_set_sampler_parameters(int tile, bool linear_filter, uint32_t cms, uint32_t cmt) { + d3d.textures[d3d.current_texture_ids[tile]].sampler_parameters = linear_filter * 9 + gfx_cm_to_index(cms) * 3 + gfx_cm_to_index(cmt); +} + +static void gfx_d3d12_set_depth_test(bool depth_test) { + d3d.depth_test = depth_test; + d3d.must_reload_pipeline = true; +} + +static void gfx_d3d12_set_depth_mask(bool z_upd) { + d3d.depth_mask = z_upd; + d3d.must_reload_pipeline = true; +} + +static void gfx_d3d12_set_zmode_decal(bool zmode_decal) { + d3d.zmode_decal = zmode_decal; + d3d.must_reload_pipeline = true; +} + +static void gfx_d3d12_set_viewport(int x, int y, int width, int height) { + d3d.viewport = CD3DX12_VIEWPORT(x, d3d.current_height - y - height, width, height); +} + +static void gfx_d3d12_set_scissor(int x, int y, int width, int height) { + d3d.scissor = CD3DX12_RECT(x, d3d.current_height - y - height, x + width, d3d.current_height - y); +} + +static void gfx_d3d12_set_use_alpha(bool use_alpha) { + // Already part of the pipeline state from shader info +} + +static void gfx_d3d12_draw_triangles(float buf_vbo[], size_t buf_vbo_len, size_t buf_vbo_num_tris) { + struct ShaderProgram *prg = d3d.shader_program; + + if (d3d.must_reload_pipeline) { + ComPtr& pipeline_state = d3d.pipeline_states[PipelineDesc{ + prg->shader_id, + d3d.depth_test, + d3d.depth_mask, + d3d.zmode_decal, + 0 + }]; + if (pipeline_state.Get() == nullptr) { + D3D12_INPUT_ELEMENT_DESC ied[7] = { + {"POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 } + }; + uint32_t ied_pos = 1; + if (prg->used_textures[0] || prg->used_textures[1]) { + ied[ied_pos++] = D3D12_INPUT_ELEMENT_DESC{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}; + } + if (prg->shader_id & SHADER_OPT_FOG) { + ied[ied_pos++] = D3D12_INPUT_ELEMENT_DESC{"FOG", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}; + } + for (int i = 0; i < prg->num_inputs; i++) { + DXGI_FORMAT format = (prg->shader_id & SHADER_OPT_ALPHA) ? DXGI_FORMAT_R32G32B32A32_FLOAT : DXGI_FORMAT_R32G32B32_FLOAT; + ied[ied_pos++] = D3D12_INPUT_ELEMENT_DESC{"INPUT", (UINT)i, format, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}; + } + + D3D12_GRAPHICS_PIPELINE_STATE_DESC desc = {}; + desc.InputLayout = { ied, ied_pos }; + desc.pRootSignature = prg->root_signature.Get(); + desc.VS = CD3DX12_SHADER_BYTECODE(prg->vertex_shader.Get()); + desc.PS = CD3DX12_SHADER_BYTECODE(prg->pixel_shader.Get()); + desc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); + if (d3d.zmode_decal) { + desc.RasterizerState.SlopeScaledDepthBias = -2.0f; + } + desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; + if (prg->shader_id & SHADER_OPT_ALPHA) { + D3D12_BLEND_DESC bd = {}; + bd.AlphaToCoverageEnable = FALSE; + bd.IndependentBlendEnable = FALSE; + static const D3D12_RENDER_TARGET_BLEND_DESC default_rtbd = { + TRUE, FALSE, + D3D12_BLEND_SRC_ALPHA, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_OP_ADD, + D3D12_BLEND_ONE, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_OP_ADD, + D3D12_LOGIC_OP_NOOP, + D3D12_COLOR_WRITE_ENABLE_ALL + }; + for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; i++) { + bd.RenderTarget[i] = default_rtbd; + } + desc.BlendState = bd; + } else { + desc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); + } + desc.DepthStencilState.DepthEnable = d3d.depth_test; + desc.DepthStencilState.DepthWriteMask = d3d.depth_mask ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; + desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL; + desc.DSVFormat = d3d.depth_test ? DXGI_FORMAT_D32_FLOAT : DXGI_FORMAT_UNKNOWN; + desc.SampleMask = UINT_MAX; + desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + desc.NumRenderTargets = 1; + desc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; + desc.SampleDesc.Count = 1; + ThrowIfFailed(d3d.device->CreateGraphicsPipelineState(&desc, IID_ID3D12PipelineState, IID_PPV_ARGS_Helper(&pipeline_state))); + } + d3d.pipeline_state = pipeline_state.Get(); + d3d.must_reload_pipeline = false; + } + + d3d.command_list->SetGraphicsRootSignature(prg->root_signature.Get()); + d3d.command_list->SetPipelineState(d3d.pipeline_state); + + ID3D12DescriptorHeap *heaps[] = { d3d.srv_heap.Get(), d3d.sampler_heap.Get() }; + d3d.command_list->SetDescriptorHeaps(2, heaps); + + int texture_pos = 0; + for (int i = 0; i < 2; i++) { + if (prg->used_textures[i]) { + struct TextureData& td = d3d.textures[d3d.current_texture_ids[i]]; + if (td.last_frame_counter != d3d.frame_counter) { + td.descriptor_index = d3d.srv_pos; + td.last_frame_counter = d3d.frame_counter; + + D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = {}; + srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srv_desc.Texture2D.MipLevels = 1; + + CD3DX12_CPU_DESCRIPTOR_HANDLE srv_handle(get_cpu_descriptor_handle(d3d.srv_heap), d3d.srv_pos++, d3d.srv_descriptor_size); + d3d.device->CreateShaderResourceView(td.resource.Get(), &srv_desc, srv_handle); + } + + CD3DX12_GPU_DESCRIPTOR_HANDLE srv_gpu_handle(get_gpu_descriptor_handle(d3d.srv_heap), td.descriptor_index, d3d.srv_descriptor_size); + d3d.command_list->SetGraphicsRootDescriptorTable(2 * texture_pos, srv_gpu_handle); + + CD3DX12_GPU_DESCRIPTOR_HANDLE sampler_gpu_handle(get_gpu_descriptor_handle(d3d.sampler_heap), td.sampler_parameters, d3d.sampler_descriptor_size); + d3d.command_list->SetGraphicsRootDescriptorTable(2 * texture_pos + 1, sampler_gpu_handle); + + ++texture_pos; + } + } + + CD3DX12_CPU_DESCRIPTOR_HANDLE rtv_handle(get_cpu_descriptor_handle(d3d.rtv_heap), d3d.frame_index, d3d.rtv_descriptor_size); + D3D12_CPU_DESCRIPTOR_HANDLE dsv_handle = get_cpu_descriptor_handle(d3d.dsv_heap); + d3d.command_list->OMSetRenderTargets(1, &rtv_handle, FALSE, &dsv_handle); + + d3d.command_list->RSSetViewports(1, &d3d.viewport); + d3d.command_list->RSSetScissorRects(1, &d3d.scissor); + + int current_pos = d3d.vbuf_pos; + memcpy((uint8_t *)d3d.mapped_vbuf_address + current_pos, buf_vbo, buf_vbo_len * sizeof(float)); + d3d.vbuf_pos += buf_vbo_len * sizeof(float); + static int maxpos; + if (d3d.vbuf_pos > maxpos) { + maxpos = d3d.vbuf_pos; + //printf("NEW MAXPOS: %d\n", maxpos); + } + + D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view; + vertex_buffer_view.BufferLocation = d3d.vertex_buffer->GetGPUVirtualAddress() + current_pos; + vertex_buffer_view.StrideInBytes = buf_vbo_len / (3 * buf_vbo_num_tris) * sizeof(float); + vertex_buffer_view.SizeInBytes = buf_vbo_len * sizeof(float); + + d3d.command_list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + d3d.command_list->IASetVertexBuffers(0, 1, &vertex_buffer_view); + d3d.command_list->DrawInstanced(3 * buf_vbo_num_tris, 1, 0, 0); +} + +static void gfx_d3d12_init(void) { } + +static void gfx_d3d12_shutdown(void) { } + +static void gfx_d3d12_start_frame(void) { + ++d3d.frame_counter; + d3d.srv_pos = 0; + texture_uploads = 0; + ThrowIfFailed(d3d.command_allocator->Reset()); + ThrowIfFailed(d3d.command_list->Reset(d3d.command_allocator.Get(), nullptr)); + + CD3DX12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition( + d3d.render_targets[d3d.frame_index].Get(), + D3D12_RESOURCE_STATE_PRESENT, + D3D12_RESOURCE_STATE_RENDER_TARGET); + d3d.command_list->ResourceBarrier(1, &barrier); + + CD3DX12_CPU_DESCRIPTOR_HANDLE rtv_handle(get_cpu_descriptor_handle(d3d.rtv_heap), d3d.frame_index, d3d.rtv_descriptor_size); + D3D12_CPU_DESCRIPTOR_HANDLE dsv_handle = get_cpu_descriptor_handle(d3d.dsv_heap); + d3d.command_list->OMSetRenderTargets(1, &rtv_handle, FALSE, &dsv_handle); + + static unsigned char c; + const float clear_color[] = { 0.0f, 0.0f, 0.0f, 1.0f }; + d3d.command_list->ClearRenderTargetView(rtv_handle, clear_color, 0, nullptr); + d3d.command_list->ClearDepthStencilView(dsv_handle, D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, nullptr); + + d3d.vbuf_pos = 0; +} + +static void create_render_target_views(void) { + D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = get_cpu_descriptor_handle(d3d.rtv_heap); + for (UINT i = 0; i < 2; i++) { + ThrowIfFailed(d3d.swap_chain->GetBuffer(i, IID_ID3D12Resource, (void **)&d3d.render_targets[i])); + d3d.device->CreateRenderTargetView(d3d.render_targets[i].Get(), nullptr, rtv_handle); + rtv_handle.ptr += d3d.rtv_descriptor_size; + } +} + +static void create_depth_buffer(void) { + DXGI_SWAP_CHAIN_DESC1 desc1; + ThrowIfFailed(d3d.swap_chain->GetDesc1(&desc1)); + UINT width = desc1.Width; + UINT height = desc1.Height; + + d3d.current_width = width; + d3d.current_height = height; + + D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc = {}; + dsv_desc.Format = DXGI_FORMAT_D32_FLOAT; + dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; + dsv_desc.Flags = D3D12_DSV_FLAG_NONE; + + D3D12_CLEAR_VALUE depth_optimized_cv = {}; + depth_optimized_cv.Format = DXGI_FORMAT_D32_FLOAT; + depth_optimized_cv.DepthStencil.Depth = 1.0f; + + D3D12_HEAP_PROPERTIES hp = {}; + hp.Type = D3D12_HEAP_TYPE_DEFAULT; + hp.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + hp.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + hp.CreationNodeMask = 1; + hp.VisibleNodeMask = 1; + + D3D12_RESOURCE_DESC rd = {}; + rd.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + rd.Alignment = 0; + rd.Width = width; + rd.Height = height; + rd.DepthOrArraySize = 1; + rd.MipLevels = 0; + rd.Format = DXGI_FORMAT_D32_FLOAT; + rd.SampleDesc.Count = 1; + rd.SampleDesc.Quality = 0; + rd.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + rd.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + ThrowIfFailed(d3d.device->CreateCommittedResource(&hp, D3D12_HEAP_FLAG_NONE, &rd, D3D12_RESOURCE_STATE_DEPTH_WRITE, &depth_optimized_cv, IID_ID3D12Resource, IID_PPV_ARGS_Helper(&d3d.depth_stencil_buffer))); + + d3d.device->CreateDepthStencilView(d3d.depth_stencil_buffer.Get(), &dsv_desc, get_cpu_descriptor_handle(d3d.dsv_heap)); +} + +static void gfx_d3d12_dxgi_on_resize(void) { + if (d3d.render_targets[0].Get() != nullptr) { + d3d.render_targets[0].Reset(); + d3d.render_targets[1].Reset(); + ThrowIfFailed(d3d.swap_chain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN, DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT)); + d3d.frame_index = d3d.swap_chain->GetCurrentBackBufferIndex(); + create_render_target_views(); + create_depth_buffer(); + } +} + +static void onkeydown(WPARAM w_param, LPARAM l_param) { + int key = ((l_param >> 16) & 0x1ff); + if (d3d.on_key_down != nullptr) { + d3d.on_key_down(key); + } +} +static void onkeyup(WPARAM w_param, LPARAM l_param) { + int key = ((l_param >> 16) & 0x1ff); + if (d3d.on_key_up != nullptr) { + d3d.on_key_up(key); + } +} + +LRESULT CALLBACK gfx_d3d12_dxgi_wnd_proc(HWND h_wnd, UINT message, WPARAM w_param, LPARAM l_param) { + switch (message) { + case WM_SIZE: + gfx_d3d12_dxgi_on_resize(); + break; + case WM_DESTROY: + exit(0); + case WM_PAINT: + if (d3d.run_one_game_iter != nullptr) + d3d.run_one_game_iter(); + break; + case WM_ACTIVATEAPP: + if (d3d.on_all_keys_up != nullptr) + d3d.on_all_keys_up(); + break; + case WM_KEYDOWN: + onkeydown(w_param, l_param); + break; + case WM_KEYUP: + onkeyup(w_param, l_param); + break; + default: + return DefWindowProcW(h_wnd, message, w_param, l_param); + } + return 0; +} + +static void gfx_d3d12_dxgi_init(const char *window_title) { + LARGE_INTEGER qpc_init, qpc_freq; + QueryPerformanceCounter(&qpc_init); + QueryPerformanceFrequency(&qpc_freq); + d3d.qpc_init = qpc_init.QuadPart; + d3d.qpc_freq = qpc_freq.QuadPart; + + // Prepare window title + + wchar_t w_title[512]; + mbstowcs(w_title, window_title, strlen(window_title) + 1); + + // Create window + WNDCLASSEXW wcex; + + wcex.cbSize = sizeof(WNDCLASSEX); + + wcex.style = CS_HREDRAW | CS_VREDRAW; + wcex.lpfnWndProc = gfx_d3d12_dxgi_wnd_proc; + wcex.cbClsExtra = 0; + wcex.cbWndExtra = 0; + wcex.hInstance = nullptr; + wcex.hIcon = nullptr; + wcex.hCursor = LoadCursor(nullptr, IDC_ARROW); + wcex.hbrBackground = (HBRUSH)(COLOR_WINDOW+1); + wcex.lpszMenuName = nullptr; + wcex.lpszClassName = WINCLASS_NAME; + wcex.hIconSm = nullptr; + + ATOM winclass = RegisterClassExW(&wcex); + + RECT wr = {0, 0, DESIRED_SCREEN_WIDTH, DESIRED_SCREEN_HEIGHT}; + AdjustWindowRect(&wr, WS_OVERLAPPEDWINDOW, FALSE); + + HWND h_wnd = CreateWindowW(WINCLASS_NAME, w_title, WS_OVERLAPPEDWINDOW, + CW_USEDEFAULT, 0, wr.right - wr.left, wr.bottom - wr.top, nullptr, nullptr, nullptr, nullptr); + + // Create device + { + UINT debug_flags = 0; +#if DEBUG_D3D + ComPtr debug_controller; + if (SUCCEEDED(D3D12GetDebugInterface(IID_ID3D12Debug, IID_PPV_ARGS_Helper(&debug_controller)))) { + debug_controller->EnableDebugLayer(); + debug_flags |= DXGI_CREATE_FACTORY_DEBUG; + } +#endif + + ThrowIfFailed(CreateDXGIFactory2(debug_flags, IID_IDXGIFactory4, &d3d.factory)); + ComPtr hw_adapter; + for (UINT i = 0; d3d.factory->EnumAdapters1(i, &hw_adapter) != DXGI_ERROR_NOT_FOUND; i++) { + DXGI_ADAPTER_DESC1 desc; + hw_adapter->GetDesc1(&desc); + if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) { + continue; + } + if (SUCCEEDED(D3D12CreateDevice(hw_adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_ID3D12Device, nullptr))) { + break; + } + } + ThrowIfFailed(D3D12CreateDevice(hw_adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_ID3D12Device, IID_PPV_ARGS_Helper(&d3d.device))); + } + + // Create command queues + { + D3D12_COMMAND_QUEUE_DESC queue_desc = {}; + queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; + queue_desc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + ThrowIfFailed(d3d.device->CreateCommandQueue(&queue_desc, IID_ID3D12CommandQueue, IID_PPV_ARGS_Helper(&d3d.command_queue))); + } + { + D3D12_COMMAND_QUEUE_DESC queue_desc = {}; + queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; + queue_desc.Type = D3D12_COMMAND_LIST_TYPE_COPY; + ThrowIfFailed(d3d.device->CreateCommandQueue(&queue_desc, IID_ID3D12CommandQueue, IID_PPV_ARGS_Helper(&d3d.copy_command_queue))); + } + + // Create swap chain + { + DXGI_SWAP_CHAIN_DESC1 swap_chain_desc = {}; + swap_chain_desc.BufferCount = 2; + swap_chain_desc.Width = DESIRED_SCREEN_WIDTH; + swap_chain_desc.Height = DESIRED_SCREEN_HEIGHT; + swap_chain_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swap_chain_desc.Scaling = DXGI_SCALING_NONE; + swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; + swap_chain_desc.Flags = DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; + swap_chain_desc.SampleDesc.Count = 1; + + ComPtr swap_chain1; + ThrowIfFailed(d3d.factory->CreateSwapChainForHwnd(d3d.command_queue.Get(), h_wnd, &swap_chain_desc, nullptr, nullptr, &swap_chain1)); + //ThrowIfFailed(factory->MakeWindowAssociation(h_wnd, DXGI_MWA_NO_ALT_ENTER)); + ThrowIfFailed(swap_chain1->QueryInterface(IID_IDXGISwapChain3, &d3d.swap_chain)); + d3d.frame_index = d3d.swap_chain->GetCurrentBackBufferIndex(); + ThrowIfFailed(d3d.swap_chain->SetMaximumFrameLatency(1)); + d3d.waitable_object = d3d.swap_chain->GetFrameLatencyWaitableObject(); + WaitForSingleObject(d3d.waitable_object, INFINITE); + } + + // Create render target views + { + D3D12_DESCRIPTOR_HEAP_DESC rtv_heap_desc = {}; + rtv_heap_desc.NumDescriptors = 2; + rtv_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + rtv_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + ThrowIfFailed(d3d.device->CreateDescriptorHeap(&rtv_heap_desc, IID_ID3D12DescriptorHeap, IID_PPV_ARGS_Helper(&d3d.rtv_heap))); + d3d.rtv_descriptor_size = d3d.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + + create_render_target_views(); + } + + // Create Z-buffer + { + D3D12_DESCRIPTOR_HEAP_DESC dsv_heap_desc = {}; + dsv_heap_desc.NumDescriptors = 1; + dsv_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_DSV; + dsv_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + ThrowIfFailed(d3d.device->CreateDescriptorHeap(&dsv_heap_desc, IID_ID3D12DescriptorHeap, IID_PPV_ARGS_Helper(&d3d.dsv_heap))); + + create_depth_buffer(); + } + + // Create SRV heap for texture descriptors + { + D3D12_DESCRIPTOR_HEAP_DESC srv_heap_desc = {}; + srv_heap_desc.NumDescriptors = 1024; // Max unique textures per frame + srv_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + srv_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + ThrowIfFailed(d3d.device->CreateDescriptorHeap(&srv_heap_desc, IID_ID3D12DescriptorHeap, IID_PPV_ARGS_Helper(&d3d.srv_heap))); + d3d.srv_descriptor_size = d3d.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } + + // Create sampler heap and descriptors + { + D3D12_DESCRIPTOR_HEAP_DESC sampler_heap_desc = {}; + sampler_heap_desc.NumDescriptors = 18; + sampler_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + sampler_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + ThrowIfFailed(d3d.device->CreateDescriptorHeap(&sampler_heap_desc, IID_ID3D12DescriptorHeap, IID_PPV_ARGS_Helper(&d3d.sampler_heap))); + d3d.sampler_descriptor_size = d3d.device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + + static const D3D12_TEXTURE_ADDRESS_MODE address_modes[] = { + D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE_MIRROR, + D3D12_TEXTURE_ADDRESS_MODE_CLAMP + }; + + D3D12_CPU_DESCRIPTOR_HANDLE sampler_handle = get_cpu_descriptor_handle(d3d.sampler_heap); + int pos = 0; + for (int linear_filter = 0; linear_filter < 2; linear_filter++) { + for (int cms = 0; cms < 3; cms++) { + for (int cmt = 0; cmt < 3; cmt++) { + D3D12_SAMPLER_DESC sampler_desc = {}; + sampler_desc.Filter = linear_filter ? D3D12_FILTER_MIN_MAG_MIP_LINEAR : D3D12_FILTER_MIN_MAG_MIP_POINT; + sampler_desc.AddressU = address_modes[cms]; + sampler_desc.AddressV = address_modes[cmt]; + sampler_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + sampler_desc.MinLOD = 0; + sampler_desc.MaxLOD = D3D12_FLOAT32_MAX; + sampler_desc.MipLODBias = 0.0f; + sampler_desc.MaxAnisotropy = 1; + sampler_desc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; + d3d.device->CreateSampler(&sampler_desc, CD3DX12_CPU_DESCRIPTOR_HANDLE(sampler_handle, pos++, d3d.sampler_descriptor_size)); + } + } + } + } + + ThrowIfFailed(d3d.device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_ID3D12CommandAllocator, IID_PPV_ARGS_Helper(&d3d.command_allocator))); + ThrowIfFailed(d3d.device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COPY, IID_ID3D12CommandAllocator, IID_PPV_ARGS_Helper(&d3d.copy_command_allocator))); + + ThrowIfFailed(d3d.device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, d3d.command_allocator.Get(), nullptr, IID_ID3D12GraphicsCommandList, IID_PPV_ARGS_Helper(&d3d.command_list))); + ThrowIfFailed(d3d.device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COPY, d3d.copy_command_allocator.Get(), nullptr, IID_ID3D12GraphicsCommandList, IID_PPV_ARGS_Helper(&d3d.copy_command_list))); + + ThrowIfFailed(d3d.command_list->Close()); + + ThrowIfFailed(d3d.device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_ID3D12Fence, IID_PPV_ARGS_Helper(&d3d.fence))); + d3d.fence_event = CreateEvent(nullptr, FALSE, FALSE, nullptr); + if (d3d.fence_event == nullptr) { + ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError())); + } + + ThrowIfFailed(d3d.device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_ID3D12Fence, IID_PPV_ARGS_Helper(&d3d.copy_fence))); + + { + // Create a buffer of 1 MB in size. With a 120 star speed run 192 kB seems to be max usage. + CD3DX12_HEAP_PROPERTIES hp(D3D12_HEAP_TYPE_UPLOAD); + CD3DX12_RESOURCE_DESC rdb = CD3DX12_RESOURCE_DESC::Buffer(256 * 1024 * sizeof(float)); + ThrowIfFailed(d3d.device->CreateCommittedResource( + &hp, + D3D12_HEAP_FLAG_NONE, + &rdb, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_ID3D12Resource, + IID_PPV_ARGS_Helper(&d3d.vertex_buffer))); + + CD3DX12_RANGE read_range(0, 0); // Read not possible from CPU + ThrowIfFailed(d3d.vertex_buffer->Map(0, &read_range, &d3d.mapped_vbuf_address)); + } + + ShowWindow(h_wnd, SW_SHOW); + UpdateWindow(h_wnd); +} + +static void gfx_d3d12_dxgi_shutdown(void) { + if (d3d.render_targets[0].Get() != nullptr) { + d3d.render_targets[0].Reset(); + d3d.render_targets[1].Reset(); + } + + // uhh +} + +static void gfx_d3d12_dxgi_set_keyboard_callbacks(bool (*on_key_down)(int scancode), bool (*on_key_up)(int scancode), void (*on_all_keys_up)(void)) { + d3d.on_key_down = on_key_down; + d3d.on_key_up = on_key_up; + d3d.on_all_keys_up = on_all_keys_up; +} + +static void gfx_d3d12_dxgi_main_loop(void (*run_one_game_iter)(void)) { + d3d.run_one_game_iter = run_one_game_iter; + + MSG msg; + while (GetMessage(&msg, nullptr, 0, 0)) { + TranslateMessage(&msg); + DispatchMessage(&msg); + } +} + +static void gfx_d3d12_dxgi_get_dimensions(uint32_t *width, uint32_t *height) { + *width = d3d.current_width; + *height = d3d.current_height; +} + +static void gfx_d3d12_dxgi_handle_events(void) { + /*MSG msg; + while (PeekMessageW(&msg, nullptr, 0, 0, PM_REMOVE)) { + TranslateMessage(&msg); + DispatchMessage(&msg); + }*/ +} + +static uint64_t qpc_to_us(uint64_t qpc) { + return qpc / d3d.qpc_freq * 1000000 + qpc % d3d.qpc_freq * 1000000 / d3d.qpc_freq; +} + +static bool gfx_d3d12_dxgi_start_frame(void) { + DXGI_FRAME_STATISTICS stats; + if (d3d.swap_chain->GetFrameStatistics(&stats) == S_OK && (stats.SyncRefreshCount != 0 || stats.SyncQPCTime.QuadPart != 0ULL)) { + { + LARGE_INTEGER t0; + QueryPerformanceCounter(&t0); + //printf("Get frame stats: %llu\n", (unsigned long long)(t0.QuadPart - d3d.qpc_init)); + } + //printf("stats: %u %u %u %u %u %.6f\n", d3d.pending_frame_stats.rbegin()->first, d3d.pending_frame_stats.rbegin()->second, stats.PresentCount, stats.PresentRefreshCount, stats.SyncRefreshCount, (double)(stats.SyncQPCTime.QuadPart - d3d.qpc_init) / d3d.qpc_freq); + if (d3d.frame_stats.empty() || d3d.frame_stats.rbegin()->second.PresentCount != stats.PresentCount) { + d3d.frame_stats.insert(std::make_pair(stats.PresentCount, stats)); + } + if (d3d.frame_stats.size() > 3) { + d3d.frame_stats.erase(d3d.frame_stats.begin()); + } + } + if (!d3d.frame_stats.empty()) { + while (!d3d.pending_frame_stats.empty() && d3d.pending_frame_stats.begin()->first < d3d.frame_stats.rbegin()->first) { + d3d.pending_frame_stats.erase(d3d.pending_frame_stats.begin()); + } + } + while (d3d.pending_frame_stats.size() > 15) { + // Just make sure the list doesn't grow too large if GetFrameStatistics fails. + d3d.pending_frame_stats.erase(d3d.pending_frame_stats.begin()); + } + + d3d.frame_timestamp += FRAME_INTERVAL_US_NUMERATOR; + + if (d3d.frame_stats.size() >= 2) { + DXGI_FRAME_STATISTICS *first = &d3d.frame_stats.begin()->second; + DXGI_FRAME_STATISTICS *last = &d3d.frame_stats.rbegin()->second; + uint64_t sync_qpc_diff = last->SyncQPCTime.QuadPart - first->SyncQPCTime.QuadPart; + UINT sync_vsync_diff = last->SyncRefreshCount - first->SyncRefreshCount; + UINT present_vsync_diff = last->PresentRefreshCount - first->PresentRefreshCount; + UINT present_diff = last->PresentCount - first->PresentCount; + + if (sync_vsync_diff == 0) { + sync_vsync_diff = 1; + } + + double estimated_vsync_interval = (double)sync_qpc_diff / (double)sync_vsync_diff; + //printf("Estimated vsync_interval: %f\n", estimated_vsync_interval); + uint64_t estimated_vsync_interval_us = qpc_to_us(estimated_vsync_interval); + if (estimated_vsync_interval_us < 2 || estimated_vsync_interval_us > 1000000) { + // Unreasonable, maybe a monitor change + estimated_vsync_interval_us = 16666; + estimated_vsync_interval = estimated_vsync_interval_us * d3d.qpc_freq / 1000000; + } + + UINT queued_vsyncs = 0; + bool is_first = true; + for (const std::pair& p : d3d.pending_frame_stats) { + if (is_first && d3d.sync_interval_means_frames_to_wait) { + is_first = false; + continue; + } + queued_vsyncs += p.second; + } + + uint64_t last_frame_present_end_qpc = (last->SyncQPCTime.QuadPart - d3d.qpc_init) + estimated_vsync_interval * queued_vsyncs; + uint64_t last_end_us = qpc_to_us(last_frame_present_end_qpc); + + double vsyncs_to_wait = (double)(int64_t)(d3d.frame_timestamp / FRAME_INTERVAL_US_DENOMINATOR - last_end_us) / estimated_vsync_interval_us; + //printf("ts: %llu, last_end_us: %llu, Init v: %f\n", d3d.frame_timestamp / 3, last_end_us, vsyncs_to_wait); + + if (vsyncs_to_wait <= 0) { + // Too late + + if ((int64_t)(d3d.frame_timestamp / FRAME_INTERVAL_US_DENOMINATOR - last_end_us) < -66666) { + // The application must have been paused or similar + vsyncs_to_wait = round(((double)FRAME_INTERVAL_US_NUMERATOR / FRAME_INTERVAL_US_DENOMINATOR) / estimated_vsync_interval_us); + if (vsyncs_to_wait < 1) { + vsyncs_to_wait = 1; + } + d3d.frame_timestamp = FRAME_INTERVAL_US_DENOMINATOR * (last_end_us + vsyncs_to_wait * estimated_vsync_interval_us); + } else { + // Drop frame + //printf("Dropping frame\n"); + d3d.dropped_frame = true; + return false; + } + } + if (floor(vsyncs_to_wait) != vsyncs_to_wait) { + uint64_t left = last_end_us + floor(vsyncs_to_wait) * estimated_vsync_interval_us; + uint64_t right = last_end_us + ceil(vsyncs_to_wait) * estimated_vsync_interval_us; + uint64_t adjusted_desired_time = d3d.frame_timestamp / FRAME_INTERVAL_US_DENOMINATOR + (last_end_us + (FRAME_INTERVAL_US_NUMERATOR / FRAME_INTERVAL_US_DENOMINATOR) > d3d.frame_timestamp / FRAME_INTERVAL_US_DENOMINATOR ? 2000 : -2000); + int64_t diff_left = adjusted_desired_time - left; + int64_t diff_right = right - adjusted_desired_time; + if (diff_left < 0) { + diff_left = -diff_left; + } + if (diff_right < 0) { + diff_right = -diff_right; + } + if (diff_left < diff_right) { + vsyncs_to_wait = floor(vsyncs_to_wait); + } else { + vsyncs_to_wait = ceil(vsyncs_to_wait); + } + if (vsyncs_to_wait == 0) { + //printf("vsyncs_to_wait became 0 so dropping frame\n"); + d3d.dropped_frame = true; + return false; + } + } + //printf("v: %d\n", (int)vsyncs_to_wait); + if (vsyncs_to_wait > 4) { + // Invalid, so change to 4 + vsyncs_to_wait = 4; + } + d3d.length_in_vsync_frames = vsyncs_to_wait; + } else { + d3d.length_in_vsync_frames = 2; + } + + return true; +} + +static void gfx_d3d12_dxgi_swap_buffers_begin(void) { + if (max_texture_uploads < texture_uploads && texture_uploads != 38 && texture_uploads != 34 && texture_uploads != 29) { + max_texture_uploads = texture_uploads; + } + //printf("Texture uploads: %d %d\n", max_texture_uploads, texture_uploads); + texture_uploads = 0; + + ThrowIfFailed(d3d.copy_command_list->Close()); + { + ID3D12CommandList *lists[] = { d3d.copy_command_list.Get() }; + d3d.copy_command_queue->ExecuteCommandLists(1, lists); + d3d.copy_command_queue->Signal(d3d.copy_fence.Get(), ++d3d.copy_fence_value); + } + + CD3DX12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition( + d3d.render_targets[d3d.frame_index].Get(), + D3D12_RESOURCE_STATE_RENDER_TARGET, + D3D12_RESOURCE_STATE_PRESENT); + d3d.command_list->ResourceBarrier(1, &barrier); + + d3d.command_queue->Wait(d3d.copy_fence.Get(), d3d.copy_fence_value); + + ThrowIfFailed(d3d.command_list->Close()); + + { + ID3D12CommandList *lists[] = { d3d.command_list.Get() }; + d3d.command_queue->ExecuteCommandLists(1, lists); + } + + { + LARGE_INTEGER t0; + QueryPerformanceCounter(&t0); + //printf("Present: %llu %u\n", (unsigned long long)(t0.QuadPart - d3d.qpc_init), d3d.length_in_vsync_frames); + } + + //d3d.length_in_vsync_frames = 1; + ThrowIfFailed(d3d.swap_chain->Present(d3d.length_in_vsync_frames, 0)); + UINT this_present_id; + if (d3d.swap_chain->GetLastPresentCount(&this_present_id) == S_OK) { + d3d.pending_frame_stats.insert(std::make_pair(this_present_id, d3d.length_in_vsync_frames)); + } + d3d.dropped_frame = false; +} + +static void gfx_d3d12_dxgi_swap_buffers_end(void) { + LARGE_INTEGER t0, t1, t2; + QueryPerformanceCounter(&t0); + + static UINT64 fence_value; + ThrowIfFailed(d3d.command_queue->Signal(d3d.fence.Get(), ++fence_value)); + if (d3d.fence->GetCompletedValue() < fence_value) { + ThrowIfFailed(d3d.fence->SetEventOnCompletion(fence_value, d3d.fence_event)); + WaitForSingleObject(d3d.fence_event, INFINITE); + } + QueryPerformanceCounter(&t1); + + d3d.resources_to_clean_at_end_of_frame.clear(); + for (std::pair>& heap : d3d.upload_heaps_in_flight) { + d3d.upload_heaps[heap.first].push_back(std::move(heap.second)); + } + d3d.upload_heaps_in_flight.clear(); + for (std::pair& item : d3d.texture_heap_allocations_to_reclaim_at_end_of_frame) { + item.first->free_list.push_back(item.second); + } + d3d.texture_heap_allocations_to_reclaim_at_end_of_frame.clear(); + + if (!d3d.dropped_frame) { + WaitForSingleObject(d3d.waitable_object, INFINITE); + } + + DXGI_FRAME_STATISTICS stats; + d3d.swap_chain->GetFrameStatistics(&stats); + + QueryPerformanceCounter(&t2); + + d3d.frame_index = d3d.swap_chain->GetCurrentBackBufferIndex(); + + ThrowIfFailed(d3d.copy_command_allocator->Reset()); + ThrowIfFailed(d3d.copy_command_list->Reset(d3d.copy_command_allocator.Get(), nullptr)); + + d3d.sync_interval_means_frames_to_wait = d3d.pending_frame_stats.rbegin()->first == stats.PresentCount; + + //printf("done %llu gpu:%d wait:%d freed:%llu frame:%u %u monitor:%u t:%llu\n", (unsigned long long)(t0.QuadPart - d3d.qpc_init), (int)(t1.QuadPart - t0.QuadPart), (int)(t2.QuadPart - t0.QuadPart), (unsigned long long)(t2.QuadPart - d3d.qpc_init), d3d.pending_frame_stats.rbegin()->first, stats.PresentCount, stats.SyncRefreshCount, (unsigned long long)(stats.SyncQPCTime.QuadPart - d3d.qpc_init)); +} + +double gfx_d3d12_dxgi_get_time(void) { + LARGE_INTEGER t; + QueryPerformanceCounter(&t); + return (double)(t.QuadPart - d3d.qpc_init) / d3d.qpc_freq; +} + +struct GfxRenderingAPI gfx_d3d12_api = { + gfx_d3d12_z_is_from_0_to_1, + gfx_d3d12_unload_shader, + gfx_d3d12_load_shader, + gfx_d3d12_create_and_load_new_shader, + gfx_d3d12_lookup_shader, + gfx_d3d12_shader_get_info, + gfx_d3d12_new_texture, + gfx_d3d12_select_texture, + gfx_d3d12_upload_texture, + gfx_d3d12_set_sampler_parameters, + gfx_d3d12_set_depth_test, + gfx_d3d12_set_depth_mask, + gfx_d3d12_set_zmode_decal, + gfx_d3d12_set_viewport, + gfx_d3d12_set_scissor, + gfx_d3d12_set_use_alpha, + gfx_d3d12_draw_triangles, + gfx_d3d12_init, + gfx_d3d12_start_frame, + gfx_d3d12_shutdown, +}; + +struct GfxWindowManagerAPI gfx_dxgi = { + gfx_d3d12_dxgi_init, + gfx_d3d12_dxgi_set_keyboard_callbacks, + gfx_d3d12_dxgi_main_loop, + gfx_d3d12_dxgi_get_dimensions, + gfx_d3d12_dxgi_handle_events, + gfx_d3d12_dxgi_start_frame, + gfx_d3d12_dxgi_swap_buffers_begin, + gfx_d3d12_dxgi_swap_buffers_end, + gfx_d3d12_dxgi_get_time, + gfx_d3d12_dxgi_shutdown, +}; + +#else + +#error "D3D12 is only supported on Windows" + +#endif // _WIN32 + +#endif // RAPI_D3D12 diff --git a/src/pc/gfx/gfx_direct3d12.h b/src/pc/gfx/gfx_direct3d12.h new file mode 100644 index 00000000..7b8f5a66 --- /dev/null +++ b/src/pc/gfx/gfx_direct3d12.h @@ -0,0 +1,10 @@ +#ifndef GFX_DIRECT3D12_H +#define GFX_DIRECT3D12_H + +#include "gfx_window_manager_api.h" +#include "gfx_rendering_api.h" + +extern struct GfxWindowManagerAPI gfx_dxgi; +extern struct GfxRenderingAPI gfx_d3d12_api; + +#endif diff --git a/src/pc/gfx/gfx_direct3d_common.cpp b/src/pc/gfx/gfx_direct3d_common.cpp new file mode 100644 index 00000000..334814bd --- /dev/null +++ b/src/pc/gfx/gfx_direct3d_common.cpp @@ -0,0 +1,143 @@ +#if (defined(RAPI_D3D11) || defined(RAPI_D3D12)) && (defined(_WIN32) || defined(_WIN64)) + +#include + +extern "C" { +#include "../platform.h" +} + +#include "gfx_direct3d_common.h" +#include "gfx_cc.h" + +void ThrowIfFailed(HRESULT res) { + if (FAILED(res)) + sys_fatal("error while initializing D3D:\nerror code 0x%08X", res); +} + +void ThrowIfFailed(HRESULT res, HWND h_wnd, const char *message) { + if (FAILED(res)) + sys_fatal("%s\nerror code 0x%08X", message, res); +} + +void get_cc_features(uint32_t shader_id, CCFeatures *cc_features) { + for (int i = 0; i < 4; i++) { + cc_features->c[0][i] = (shader_id >> (i * 3)) & 7; + cc_features->c[1][i] = (shader_id >> (12 + i * 3)) & 7; + } + + cc_features->opt_alpha = (shader_id & SHADER_OPT_ALPHA) != 0; + cc_features->opt_fog = (shader_id & SHADER_OPT_FOG) != 0; + cc_features->opt_texture_edge = (shader_id & SHADER_OPT_TEXTURE_EDGE) != 0; + cc_features->opt_noise = (shader_id & SHADER_OPT_NOISE) != 0; + + cc_features->used_textures[0] = false; + cc_features->used_textures[1] = false; + cc_features->num_inputs = 0; + + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 4; j++) { + if (cc_features->c[i][j] >= SHADER_INPUT_1 && cc_features->c[i][j] <= SHADER_INPUT_4) { + if (cc_features->c[i][j] > cc_features->num_inputs) { + cc_features->num_inputs = cc_features->c[i][j]; + } + } + if (cc_features->c[i][j] == SHADER_TEXEL0 || cc_features->c[i][j] == SHADER_TEXEL0A) { + cc_features->used_textures[0] = true; + } + if (cc_features->c[i][j] == SHADER_TEXEL1) { + cc_features->used_textures[1] = true; + } + } + } + + cc_features->do_single[0] = cc_features->c[0][2] == 0; + cc_features->do_single[1] = cc_features->c[1][2] == 0; + cc_features->do_multiply[0] = cc_features->c[0][1] == 0 && cc_features->c[0][3] == 0; + cc_features->do_multiply[1] = cc_features->c[1][1] == 0 && cc_features->c[1][3] == 0; + cc_features->do_mix[0] = cc_features->c[0][1] == cc_features->c[0][3]; + cc_features->do_mix[1] = cc_features->c[1][1] == cc_features->c[1][3]; + cc_features->color_alpha_same = (shader_id & 0xfff) == ((shader_id >> 12) & 0xfff); +} + +void append_str(char *buf, size_t *len, const char *str) { + while (*str != '\0') buf[(*len)++] = *str++; +} + +void append_line(char *buf, size_t *len, const char *str) { + while (*str != '\0') buf[(*len)++] = *str++; + buf[(*len)++] = '\r'; + buf[(*len)++] = '\n'; +} + +const char *shader_item_to_str(uint32_t item, bool with_alpha, bool only_alpha, bool inputs_have_alpha, bool hint_single_element) { + if (!only_alpha) { + switch (item) { + default: + case SHADER_0: + return with_alpha ? "float4(0.0, 0.0, 0.0, 0.0)" : "float3(0.0, 0.0, 0.0)"; + case SHADER_INPUT_1: + return with_alpha || !inputs_have_alpha ? "input.input1" : "input.input1.rgb"; + case SHADER_INPUT_2: + return with_alpha || !inputs_have_alpha ? "input.input2" : "input.input2.rgb"; + case SHADER_INPUT_3: + return with_alpha || !inputs_have_alpha ? "input.input3" : "input.input3.rgb"; + case SHADER_INPUT_4: + return with_alpha || !inputs_have_alpha ? "input.input4" : "input.input4.rgb"; + case SHADER_TEXEL0: + return with_alpha ? "texVal0" : "texVal0.rgb"; + case SHADER_TEXEL0A: + return hint_single_element ? "texVal0.a" : (with_alpha ? "float4(texVal0.a, texVal0.a, texVal0.a, texVal0.a)" : "float3(texVal0.a, texVal0.a, texVal0.a)"); + case SHADER_TEXEL1: + return with_alpha ? "texVal1" : "texVal1.rgb"; + } + } else { + switch (item) { + default: + case SHADER_0: + return "0.0"; + case SHADER_INPUT_1: + return "input.input1.a"; + case SHADER_INPUT_2: + return "input.input2.a"; + case SHADER_INPUT_3: + return "input.input3.a"; + case SHADER_INPUT_4: + return "input.input4.a"; + case SHADER_TEXEL0: + return "texVal0.a"; + case SHADER_TEXEL0A: + return "texVal0.a"; + case SHADER_TEXEL1: + return "texVal1.a"; + } + } +} + +void append_formula(char *buf, size_t *len, uint8_t c[2][4], bool do_single, bool do_multiply, bool do_mix, bool with_alpha, bool only_alpha, bool opt_alpha) { + if (do_single) { + append_str(buf, len, shader_item_to_str(c[only_alpha][3], with_alpha, only_alpha, opt_alpha, false)); + } else if (do_multiply) { + append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, " * "); + append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true)); + } else if (do_mix) { + append_str(buf, len, "lerp("); + append_str(buf, len, shader_item_to_str(c[only_alpha][1], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, ", "); + append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, ", "); + append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true)); + append_str(buf, len, ")"); + } else { + append_str(buf, len, "("); + append_str(buf, len, shader_item_to_str(c[only_alpha][0], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, " - "); + append_str(buf, len, shader_item_to_str(c[only_alpha][1], with_alpha, only_alpha, opt_alpha, false)); + append_str(buf, len, ") * "); + append_str(buf, len, shader_item_to_str(c[only_alpha][2], with_alpha, only_alpha, opt_alpha, true)); + append_str(buf, len, " + "); + append_str(buf, len, shader_item_to_str(c[only_alpha][3], with_alpha, only_alpha, opt_alpha, false)); + } +} + +#endif diff --git a/src/pc/gfx/gfx_direct3d_common.h b/src/pc/gfx/gfx_direct3d_common.h new file mode 100644 index 00000000..09769a7c --- /dev/null +++ b/src/pc/gfx/gfx_direct3d_common.h @@ -0,0 +1,33 @@ +#if defined(RAPI_D3D11) || defined(RAPI_D3D12) + +#ifndef GFX_DIRECT3D_COMMON_H +#define GFX_DIRECT3D_COMMON_H + +#include +#include + +struct CCFeatures { + uint8_t c[2][4]; + bool opt_alpha; + bool opt_fog; + bool opt_texture_edge; + bool opt_noise; + bool used_textures[2]; + uint32_t num_inputs; + bool do_single[2]; + bool do_multiply[2]; + bool do_mix[2]; + bool color_alpha_same; +}; + +void ThrowIfFailed(HRESULT res); +void ThrowIfFailed(HRESULT res, HWND h_wnd, const char *message); +void get_cc_features(uint32_t shader_id, CCFeatures *shader_features); +void append_str(char *buf, size_t *len, const char *str); +void append_line(char *buf, size_t *len, const char *str); +const char *shader_item_to_str(uint32_t item, bool with_alpha, bool only_alpha, bool inputs_have_alpha, bool hint_single_element); +void append_formula(char *buf, size_t *len, uint8_t c[2][4], bool do_single, bool do_multiply, bool do_mix, bool with_alpha, bool only_alpha, bool opt_alpha); + +#endif + +#endif diff --git a/src/pc/gfx/gfx_opengl.c b/src/pc/gfx/gfx_opengl.c index 5977e13b..9c0a0525 100644 --- a/src/pc/gfx/gfx_opengl.c +++ b/src/pc/gfx/gfx_opengl.c @@ -1,4 +1,4 @@ -#ifndef LEGACY_GL +#ifdef RAPI_GL #include #include @@ -29,9 +29,12 @@ #endif #include "../platform.h" +#include "../configfile.h" #include "gfx_cc.h" #include "gfx_rendering_api.h" +#define TEX_CACHE_STEP 512 + struct ShaderProgram { uint32_t shader_id; GLuint opengl_program_id; @@ -39,17 +42,30 @@ struct ShaderProgram { bool used_textures[2]; uint8_t num_floats; GLint attrib_locations[7]; + GLint uniform_locations[6]; uint8_t attrib_sizes[7]; uint8_t num_attribs; bool used_noise; - GLint frame_count_location; - GLint window_height_location; +}; + +struct GLTexture { + GLuint gltex; + GLfloat size[2]; + bool filter; }; static struct ShaderProgram shader_program_pool[64]; static uint8_t shader_program_pool_size; static GLuint opengl_vbo; +static int tex_cache_size = 0; +static int num_textures = 0; +static struct GLTexture *tex_cache = NULL; + +static struct ShaderProgram *opengl_prg = NULL; +static struct GLTexture *opengl_tex[2]; +static int opengl_curtex = 0; + static uint32_t frame_count; static uint32_t current_height; @@ -68,25 +84,38 @@ static void gfx_opengl_vertex_array_set_attribs(struct ShaderProgram *prg) { } } -static void gfx_opengl_set_uniforms(struct ShaderProgram *prg) { +static inline void gfx_opengl_set_shader_uniforms(struct ShaderProgram *prg) { if (prg->used_noise) { - glUniform1i(prg->frame_count_location, frame_count); - glUniform1i(prg->window_height_location, current_height); + glUniform1i(prg->uniform_locations[4], frame_count); + glUniform1i(prg->uniform_locations[5], current_height); + } +} + +static inline void gfx_opengl_set_texture_uniforms(struct ShaderProgram *prg, const int tile) { + if (prg->used_textures[tile] && opengl_tex[tile]) { + glUniform2f(prg->uniform_locations[tile*2 + 0], opengl_tex[tile]->size[0], opengl_tex[tile]->size[1]); + glUniform1i(prg->uniform_locations[tile*2 + 1], opengl_tex[tile]->filter); } } static void gfx_opengl_unload_shader(struct ShaderProgram *old_prg) { if (old_prg != NULL) { - for (int i = 0; i < old_prg->num_attribs; i++) { + for (int i = 0; i < old_prg->num_attribs; i++) glDisableVertexAttribArray(old_prg->attrib_locations[i]); - } + if (old_prg == opengl_prg) + opengl_prg = NULL; + } else { + opengl_prg = NULL; } } static void gfx_opengl_load_shader(struct ShaderProgram *new_prg) { + opengl_prg = new_prg; glUseProgram(new_prg->opengl_program_id); gfx_opengl_vertex_array_set_attribs(new_prg); - gfx_opengl_set_uniforms(new_prg); + gfx_opengl_set_shader_uniforms(new_prg); + gfx_opengl_set_texture_uniforms(new_prg, 0); + gfx_opengl_set_texture_uniforms(new_prg, 1); } static void append_str(char *buf, size_t *len, const char *str) { @@ -206,7 +235,7 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad bool color_alpha_same = (shader_id & 0xfff) == ((shader_id >> 12) & 0xfff); char vs_buf[1024]; - char fs_buf[1024]; + char fs_buf[2048]; size_t vs_len = 0; size_t fs_len = 0; size_t num_floats = 4; @@ -265,9 +294,41 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad } if (used_textures[0]) { append_line(fs_buf, &fs_len, "uniform sampler2D uTex0;"); + append_line(fs_buf, &fs_len, "uniform vec2 uTex0Size;"); + append_line(fs_buf, &fs_len, "uniform bool uTex0Filter;"); } if (used_textures[1]) { append_line(fs_buf, &fs_len, "uniform sampler2D uTex1;"); + append_line(fs_buf, &fs_len, "uniform vec2 uTex1Size;"); + append_line(fs_buf, &fs_len, "uniform bool uTex1Filter;"); + } + + // 3 point texture filtering + // Original author: ArthurCarvalho + // Slightly modified GLSL implementation by twinaphex, mupen64plus-libretro project. + + if (used_textures[0] || used_textures[1]) { + if (configFiltering == 2) { + append_line(fs_buf, &fs_len, "#define TEX_OFFSET(off) texture2D(tex, texCoord - (off)/texSize)"); + append_line(fs_buf, &fs_len, "lowp vec4 filter3point(in sampler2D tex, in mediump vec2 texCoord, in mediump vec2 texSize) {"); + append_line(fs_buf, &fs_len, " mediump vec2 offset = fract(texCoord*texSize - vec2(0.5));"); + append_line(fs_buf, &fs_len, " offset -= step(1.0, offset.x + offset.y);"); + append_line(fs_buf, &fs_len, " lowp vec4 c0 = TEX_OFFSET(offset);"); + append_line(fs_buf, &fs_len, " lowp vec4 c1 = TEX_OFFSET(vec2(offset.x - sign(offset.x), offset.y));"); + append_line(fs_buf, &fs_len, " lowp vec4 c2 = TEX_OFFSET(vec2(offset.x, offset.y - sign(offset.y)));"); + append_line(fs_buf, &fs_len, " return c0 + abs(offset.x)*(c1-c0) + abs(offset.y)*(c2-c0);"); + append_line(fs_buf, &fs_len, "}"); + append_line(fs_buf, &fs_len, "lowp vec4 sampleTex(in sampler2D tex, in mediump vec2 uv, in mediump vec2 texSize, in bool filter) {"); + append_line(fs_buf, &fs_len, "if (filter)"); + append_line(fs_buf, &fs_len, "return filter3point(tex, uv, texSize);"); + append_line(fs_buf, &fs_len, "else"); + append_line(fs_buf, &fs_len, "return texture2D(tex, uv);"); + append_line(fs_buf, &fs_len, "}"); + } else { + append_line(fs_buf, &fs_len, "lowp vec4 sampleTex(in sampler2D tex, in mediump vec2 uv, in mediump vec2 texSize, in bool filter) {"); + append_line(fs_buf, &fs_len, "return texture2D(tex, uv);"); + append_line(fs_buf, &fs_len, "}"); + } } if (opt_alpha && opt_noise) { @@ -283,10 +344,10 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad append_line(fs_buf, &fs_len, "void main() {"); if (used_textures[0]) { - append_line(fs_buf, &fs_len, "vec4 texVal0 = texture2D(uTex0, vTexCoord);"); + append_line(fs_buf, &fs_len, "vec4 texVal0 = sampleTex(uTex0, vTexCoord, uTex0Size, uTex0Filter);"); } if (used_textures[1]) { - append_line(fs_buf, &fs_len, "vec4 texVal1 = texture2D(uTex1, vTexCoord);"); + append_line(fs_buf, &fs_len, "vec4 texVal1 = sampleTex(uTex1, vTexCoord, uTex1Size, uTex1Filter);"); } append_str(fs_buf, &fs_len, opt_alpha ? "vec4 texel = " : "vec3 texel = "); @@ -409,16 +470,20 @@ static struct ShaderProgram *gfx_opengl_create_and_load_new_shader(uint32_t shad if (used_textures[0]) { GLint sampler_location = glGetUniformLocation(shader_program, "uTex0"); + prg->uniform_locations[0] = glGetUniformLocation(shader_program, "uTex0Size"); + prg->uniform_locations[1] = glGetUniformLocation(shader_program, "uTex0Filter"); glUniform1i(sampler_location, 0); } if (used_textures[1]) { GLint sampler_location = glGetUniformLocation(shader_program, "uTex1"); + prg->uniform_locations[2] = glGetUniformLocation(shader_program, "uTex1Size"); + prg->uniform_locations[3] = glGetUniformLocation(shader_program, "uTex1Filter"); glUniform1i(sampler_location, 1); } if (opt_alpha && opt_noise) { - prg->frame_count_location = glGetUniformLocation(shader_program, "frame_count"); - prg->window_height_location = glGetUniformLocation(shader_program, "window_height"); + prg->uniform_locations[4] = glGetUniformLocation(shader_program, "frame_count"); + prg->uniform_locations[5] = glGetUniformLocation(shader_program, "window_height"); prg->used_noise = true; } else { prg->used_noise = false; @@ -443,18 +508,30 @@ static void gfx_opengl_shader_get_info(struct ShaderProgram *prg, uint8_t *num_i } static GLuint gfx_opengl_new_texture(void) { - GLuint ret; - glGenTextures(1, &ret); - return ret; + if (num_textures >= tex_cache_size) { + tex_cache_size += TEX_CACHE_STEP; + tex_cache = realloc(tex_cache, sizeof(struct GLTexture) * tex_cache_size); + if (!tex_cache) sys_fatal("out of memory allocating texture cache"); + // invalidate these because they might be pointing to garbage now + opengl_tex[0] = NULL; + opengl_tex[1] = NULL; + } + glGenTextures(1, &tex_cache[num_textures].gltex); + return num_textures++; } static void gfx_opengl_select_texture(int tile, GLuint texture_id) { - glActiveTexture(GL_TEXTURE0 + tile); - glBindTexture(GL_TEXTURE_2D, texture_id); + opengl_tex[tile] = tex_cache + texture_id; + opengl_curtex = tile; + glActiveTexture(GL_TEXTURE0 + tile); + glBindTexture(GL_TEXTURE_2D, opengl_tex[tile]->gltex); + gfx_opengl_set_texture_uniforms(opengl_prg, tile); } static void gfx_opengl_upload_texture(uint8_t *rgba32_buf, int width, int height) { glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, rgba32_buf); + opengl_tex[opengl_curtex]->size[0] = width; + opengl_tex[opengl_curtex]->size[1] = height; } static uint32_t gfx_cm_to_opengl(uint32_t val) { @@ -471,6 +548,11 @@ static void gfx_opengl_set_sampler_parameters(int tile, bool linear_filter, uint glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, filter); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, gfx_cm_to_opengl(cms)); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, gfx_cm_to_opengl(cmt)); + opengl_curtex = tile; + if (opengl_tex[tile]) { + opengl_tex[tile]->filter = linear_filter; + gfx_opengl_set_texture_uniforms(opengl_prg, tile); + } } static void gfx_opengl_set_depth_test(bool depth_test) { @@ -540,6 +622,10 @@ static void gfx_opengl_init(void) { sys_fatal("could not init GLEW:\n%s", glewGetErrorString(err)); #endif + tex_cache_size = TEX_CACHE_STEP; + tex_cache = calloc(tex_cache_size, sizeof(struct GLTexture)); + if (!tex_cache) sys_fatal("out of memory allocating texture cache"); + // check GL version int vmajor, vminor; bool is_es = false; @@ -591,4 +677,4 @@ struct GfxRenderingAPI gfx_opengl_api = { gfx_opengl_shutdown }; -#endif // !LEGACY_GL +#endif // RAPI_GL diff --git a/src/pc/gfx/gfx_opengl_legacy.c b/src/pc/gfx/gfx_opengl_legacy.c index d64674aa..4ab19c87 100644 --- a/src/pc/gfx/gfx_opengl_legacy.c +++ b/src/pc/gfx/gfx_opengl_legacy.c @@ -1,4 +1,4 @@ -#ifdef LEGACY_GL +#ifdef RAPI_GL_LEGACY #include #include @@ -603,4 +603,4 @@ struct GfxRenderingAPI gfx_opengl_api = { gfx_opengl_shutdown }; -#endif // LEGACY_GL +#endif // RAPI_GL_LEGACY diff --git a/src/pc/gfx/gfx_pc.c b/src/pc/gfx/gfx_pc.c index aec9df32..20430019 100644 --- a/src/pc/gfx/gfx_pc.c +++ b/src/pc/gfx/gfx_pc.c @@ -1719,9 +1719,20 @@ void gfx_get_dimensions(uint32_t *width, uint32_t *height) { } void gfx_init(struct GfxWindowManagerAPI *wapi, struct GfxRenderingAPI *rapi) { + char window_title[96] = + #ifndef USE_GLES + "Super Mario 64 PC port (OpenGL)" + #else + "Super Mario 64 PC port (OpenGL_ES2)" + #endif + #ifdef NIGHTLY + " nightly " GIT_HASH + #endif + ; + gfx_wapi = wapi; gfx_rapi = rapi; - gfx_wapi->init(); + gfx_wapi->init(window_title); gfx_rapi->init(); // Used in the 120 star TAS diff --git a/src/pc/gfx/gfx_sdl2.c b/src/pc/gfx/gfx_sdl2.c index 78833092..94020e62 100644 --- a/src/pc/gfx/gfx_sdl2.c +++ b/src/pc/gfx/gfx_sdl2.c @@ -1,3 +1,5 @@ +#ifdef WAPI_SDL2 + #ifdef __MINGW32__ #define FOR_WINDOWS 1 #else @@ -50,33 +52,36 @@ static SDL_GLContext ctx = NULL; static int inverted_scancode_table[512]; static Uint32 frame_start = 0; -const SDL_Scancode windows_scancode_table[] = -{ - /* 0 1 2 3 4 5 6 7 */ - /* 8 9 A B C D E F */ - SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_ESCAPE, SDL_SCANCODE_1, SDL_SCANCODE_2, SDL_SCANCODE_3, SDL_SCANCODE_4, SDL_SCANCODE_5, SDL_SCANCODE_6, /* 0 */ - SDL_SCANCODE_7, SDL_SCANCODE_8, SDL_SCANCODE_9, SDL_SCANCODE_0, SDL_SCANCODE_MINUS, SDL_SCANCODE_EQUALS, SDL_SCANCODE_BACKSPACE, SDL_SCANCODE_TAB, /* 0 */ +static kb_callback_t kb_key_down = NULL; +static kb_callback_t kb_key_up = NULL; +static void (*kb_all_keys_up)(void) = NULL; - SDL_SCANCODE_Q, SDL_SCANCODE_W, SDL_SCANCODE_E, SDL_SCANCODE_R, SDL_SCANCODE_T, SDL_SCANCODE_Y, SDL_SCANCODE_U, SDL_SCANCODE_I, /* 1 */ - SDL_SCANCODE_O, SDL_SCANCODE_P, SDL_SCANCODE_LEFTBRACKET, SDL_SCANCODE_RIGHTBRACKET, SDL_SCANCODE_RETURN, SDL_SCANCODE_LCTRL, SDL_SCANCODE_A, SDL_SCANCODE_S, /* 1 */ +const SDL_Scancode windows_scancode_table[] = { + /* 0 1 2 3 4 5 6 7 */ + /* 8 9 A B C D E F */ + SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_ESCAPE, SDL_SCANCODE_1, SDL_SCANCODE_2, SDL_SCANCODE_3, SDL_SCANCODE_4, SDL_SCANCODE_5, SDL_SCANCODE_6, /* 0 */ + SDL_SCANCODE_7, SDL_SCANCODE_8, SDL_SCANCODE_9, SDL_SCANCODE_0, SDL_SCANCODE_MINUS, SDL_SCANCODE_EQUALS, SDL_SCANCODE_BACKSPACE, SDL_SCANCODE_TAB, /* 0 */ - SDL_SCANCODE_D, SDL_SCANCODE_F, SDL_SCANCODE_G, SDL_SCANCODE_H, SDL_SCANCODE_J, SDL_SCANCODE_K, SDL_SCANCODE_L, SDL_SCANCODE_SEMICOLON, /* 2 */ - SDL_SCANCODE_APOSTROPHE, SDL_SCANCODE_GRAVE, SDL_SCANCODE_LSHIFT, SDL_SCANCODE_BACKSLASH, SDL_SCANCODE_Z, SDL_SCANCODE_X, SDL_SCANCODE_C, SDL_SCANCODE_V, /* 2 */ + SDL_SCANCODE_Q, SDL_SCANCODE_W, SDL_SCANCODE_E, SDL_SCANCODE_R, SDL_SCANCODE_T, SDL_SCANCODE_Y, SDL_SCANCODE_U, SDL_SCANCODE_I, /* 1 */ + SDL_SCANCODE_O, SDL_SCANCODE_P, SDL_SCANCODE_LEFTBRACKET, SDL_SCANCODE_RIGHTBRACKET, SDL_SCANCODE_RETURN, SDL_SCANCODE_LCTRL, SDL_SCANCODE_A, SDL_SCANCODE_S, /* 1 */ - SDL_SCANCODE_B, SDL_SCANCODE_N, SDL_SCANCODE_M, SDL_SCANCODE_COMMA, SDL_SCANCODE_PERIOD, SDL_SCANCODE_SLASH, SDL_SCANCODE_RSHIFT, SDL_SCANCODE_PRINTSCREEN,/* 3 */ - SDL_SCANCODE_LALT, SDL_SCANCODE_SPACE, SDL_SCANCODE_CAPSLOCK, SDL_SCANCODE_F1, SDL_SCANCODE_F2, SDL_SCANCODE_F3, SDL_SCANCODE_F4, SDL_SCANCODE_F5, /* 3 */ + SDL_SCANCODE_D, SDL_SCANCODE_F, SDL_SCANCODE_G, SDL_SCANCODE_H, SDL_SCANCODE_J, SDL_SCANCODE_K, SDL_SCANCODE_L, SDL_SCANCODE_SEMICOLON, /* 2 */ + SDL_SCANCODE_APOSTROPHE, SDL_SCANCODE_GRAVE, SDL_SCANCODE_LSHIFT, SDL_SCANCODE_BACKSLASH, SDL_SCANCODE_Z, SDL_SCANCODE_X, SDL_SCANCODE_C, SDL_SCANCODE_V, /* 2 */ - SDL_SCANCODE_F6, SDL_SCANCODE_F7, SDL_SCANCODE_F8, SDL_SCANCODE_F9, SDL_SCANCODE_F10, SDL_SCANCODE_NUMLOCKCLEAR, SDL_SCANCODE_SCROLLLOCK, SDL_SCANCODE_HOME, /* 4 */ - SDL_SCANCODE_UP, SDL_SCANCODE_PAGEUP, SDL_SCANCODE_KP_MINUS, SDL_SCANCODE_LEFT, SDL_SCANCODE_KP_5, SDL_SCANCODE_RIGHT, SDL_SCANCODE_KP_PLUS, SDL_SCANCODE_END, /* 4 */ + SDL_SCANCODE_B, SDL_SCANCODE_N, SDL_SCANCODE_M, SDL_SCANCODE_COMMA, SDL_SCANCODE_PERIOD, SDL_SCANCODE_SLASH, SDL_SCANCODE_RSHIFT, SDL_SCANCODE_PRINTSCREEN,/* 3 */ + SDL_SCANCODE_LALT, SDL_SCANCODE_SPACE, SDL_SCANCODE_CAPSLOCK, SDL_SCANCODE_F1, SDL_SCANCODE_F2, SDL_SCANCODE_F3, SDL_SCANCODE_F4, SDL_SCANCODE_F5, /* 3 */ - SDL_SCANCODE_DOWN, SDL_SCANCODE_PAGEDOWN, SDL_SCANCODE_INSERT, SDL_SCANCODE_DELETE, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_NONUSBACKSLASH,SDL_SCANCODE_F11, /* 5 */ - SDL_SCANCODE_F12, SDL_SCANCODE_PAUSE, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_LGUI, SDL_SCANCODE_RGUI, SDL_SCANCODE_APPLICATION, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, /* 5 */ + SDL_SCANCODE_F6, SDL_SCANCODE_F7, SDL_SCANCODE_F8, SDL_SCANCODE_F9, SDL_SCANCODE_F10, SDL_SCANCODE_NUMLOCKCLEAR, SDL_SCANCODE_SCROLLLOCK, SDL_SCANCODE_HOME, /* 4 */ + SDL_SCANCODE_UP, SDL_SCANCODE_PAGEUP, SDL_SCANCODE_KP_MINUS, SDL_SCANCODE_LEFT, SDL_SCANCODE_KP_5, SDL_SCANCODE_RIGHT, SDL_SCANCODE_KP_PLUS, SDL_SCANCODE_END, /* 4 */ - SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_F13, SDL_SCANCODE_F14, SDL_SCANCODE_F15, SDL_SCANCODE_F16, /* 6 */ - SDL_SCANCODE_F17, SDL_SCANCODE_F18, SDL_SCANCODE_F19, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, /* 6 */ + SDL_SCANCODE_DOWN, SDL_SCANCODE_PAGEDOWN, SDL_SCANCODE_INSERT, SDL_SCANCODE_DELETE, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_NONUSBACKSLASH, SDL_SCANCODE_F11, /* 5 */ + SDL_SCANCODE_F12, SDL_SCANCODE_PAUSE, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_LGUI, SDL_SCANCODE_RGUI, SDL_SCANCODE_APPLICATION, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, /* 5 */ - SDL_SCANCODE_INTERNATIONAL2, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_INTERNATIONAL1, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, /* 7 */ - SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_INTERNATIONAL4, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_INTERNATIONAL5, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_INTERNATIONAL3, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN /* 7 */ + SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_F13, SDL_SCANCODE_F14, SDL_SCANCODE_F15, SDL_SCANCODE_F16, /* 6 */ + SDL_SCANCODE_F17, SDL_SCANCODE_F18, SDL_SCANCODE_F19, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, /* 6 */ + + SDL_SCANCODE_INTERNATIONAL2, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_INTERNATIONAL1, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN, /* 7 */ + SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_INTERNATIONAL4, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_INTERNATIONAL5, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_INTERNATIONAL3, SDL_SCANCODE_UNKNOWN, SDL_SCANCODE_UNKNOWN /* 7 */ }; const SDL_Scancode scancode_rmapping_extended[][2] = { @@ -140,7 +145,7 @@ static void gfx_sdl_reset_dimension_and_pos() { SDL_GL_SetSwapInterval(configWindow.vsync); // in case vsync changed } -static void gfx_sdl_init(void) { +static void gfx_sdl_init(const char *window_title) { SDL_Init(SDL_INIT_VIDEO); SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 24); @@ -160,17 +165,6 @@ static void gfx_sdl_init(void) { else if (gCLIOpts.FullScreen == 2) configWindow.fullscreen = false; - char window_title[96] = - #ifndef USE_GLES - "Super Mario 64 PC port (OpenGL)"; - #else - "Super Mario 64 PC port (OpenGL_ES2)"; - #endif - - #ifdef NIGHTLY - strcat(window_title, " nightly " GIT_HASH); - #endif - wnd = SDL_CreateWindow( window_title, configWindow.x, configWindow.y, configWindow.w, configWindow.h, @@ -223,7 +217,8 @@ static int translate_scancode(int scancode) { } static void gfx_sdl_onkeydown(int scancode) { - keyboard_on_key_down(translate_scancode(scancode)); + if (kb_key_down) + kb_key_down(translate_scancode(scancode)); const Uint8 *state = SDL_GetKeyboardState(NULL); @@ -234,7 +229,8 @@ static void gfx_sdl_onkeydown(int scancode) { } static void gfx_sdl_onkeyup(int scancode) { - keyboard_on_key_up(translate_scancode(scancode)); + if (kb_key_up) + kb_key_up(translate_scancode(scancode)); } static void gfx_sdl_handle_events(void) { @@ -277,6 +273,12 @@ static void gfx_sdl_handle_events(void) { gfx_sdl_set_fullscreen(); } +static void gfx_sdl_set_keyboard_callbacks(kb_callback_t on_key_down, kb_callback_t on_key_up, void (*on_all_keys_up)(void)) { + kb_key_down = on_key_down; + kb_key_up = on_key_up; + kb_all_keys_up = on_all_keys_up; +} + static bool gfx_sdl_start_frame(void) { frame_start = SDL_GetTicks(); return true; @@ -304,6 +306,7 @@ static void gfx_sdl_shutdown(void) { struct GfxWindowManagerAPI gfx_sdl = { gfx_sdl_init, + gfx_sdl_set_keyboard_callbacks, gfx_sdl_main_loop, gfx_sdl_get_dimensions, gfx_sdl_handle_events, @@ -313,3 +316,5 @@ struct GfxWindowManagerAPI gfx_sdl = { gfx_sdl_get_time, gfx_sdl_shutdown }; + +#endif // BACKEND_WM diff --git a/src/pc/gfx/gfx_window_manager_api.h b/src/pc/gfx/gfx_window_manager_api.h index 45826711..44260a97 100644 --- a/src/pc/gfx/gfx_window_manager_api.h +++ b/src/pc/gfx/gfx_window_manager_api.h @@ -4,8 +4,11 @@ #include #include +typedef bool (*kb_callback_t)(int code); + struct GfxWindowManagerAPI { - void (*init)(void); + void (*init)(const char *window_title); + void (*set_keyboard_callbacks)(kb_callback_t on_key_down, kb_callback_t on_key_up, void (*on_all_keys_up)(void)); void (*main_loop)(void (*run_one_game_iter)(void)); void (*get_dimensions)(uint32_t *width, uint32_t *height); void (*handle_events)(void); diff --git a/src/pc/pc_main.c b/src/pc/pc_main.c index dc85b17e..c3e156d8 100644 --- a/src/pc/pc_main.c +++ b/src/pc/pc_main.c @@ -13,6 +13,8 @@ #include "gfx/gfx_pc.h" #include "gfx/gfx_opengl.h" +#include "gfx/gfx_direct3d11.h" +#include "gfx/gfx_direct3d12.h" #include "gfx/gfx_sdl.h" #include "audio/audio_api.h" @@ -23,6 +25,7 @@ #include "cliopts.h" #include "configfile.h" #include "controller/controller_api.h" +#include "controller/controller_keyboard.h" #include "fs/fs.h" #include "game/game_init.h" @@ -162,9 +165,26 @@ void main_func(void) { configfile_load(configfile_name()); + #if defined(WAPI_SDL1) || defined(WAPI_SDL2) wm_api = &gfx_sdl; + #elif defined(WAPI_DXGI) + wm_api = &gfx_dxgi; + #else + #error No window API! + #endif + + #if defined(RAPI_D3D11) + rendering_api = &gfx_d3d11_api; + #elif defined(RAPI_D3D12) + rendering_api = &gfx_d3d12_api; + #elif defined(RAPI_GL) || defined(RAPI_GL_LEGACY) rendering_api = &gfx_opengl_api; + #else + #error No rendering API! + #endif + gfx_init(wm_api, rendering_api); + wm_api->set_keyboard_callbacks(keyboard_on_key_down, keyboard_on_key_up, keyboard_on_all_keys_up); if (audio_api == NULL && audio_sdl.init()) audio_api = &audio_sdl; @@ -193,7 +213,8 @@ void main_func(void) { emscripten_set_main_loop(em_main_loop, 0, 0); request_anim_frame(on_anim_frame); #else - wm_api->main_loop(produce_one_frame); + while (true) + wm_api->main_loop(produce_one_frame); #endif } diff --git a/src/pc/platform.c b/src/pc/platform.c index d6bfd6e2..6fc86c9e 100644 --- a/src/pc/platform.c +++ b/src/pc/platform.c @@ -79,7 +79,7 @@ void sys_fatal(const char *fmt, ...) { sys_fatal_impl(msg); } -#if USE_SDL +#ifdef HAVE_SDL2 // we can just ask SDL for most of this shit if we have it #include