diff -rupN dolphin.orig/CMake/FindWaylandProtocols.cmake dolphin/CMake/FindWaylandProtocols.cmake --- dolphin.orig/CMake/FindWaylandProtocols.cmake 1970-01-01 00:00:00.000000000 +0000 +++ dolphin/CMake/FindWaylandProtocols.cmake 2023-09-26 18:02:22.836042087 +0000 @@ -0,0 +1,28 @@ +# from https://github.com/glfw/glfw/blob/master/CMake/modules/FindWaylandProtocols.cmake + +find_package(PkgConfig) + +pkg_check_modules(WaylandProtocols QUIET wayland-protocols>=${WaylandProtocols_FIND_VERSION}) + +execute_process(COMMAND ${PKG_CONFIG_EXECUTABLE} --variable=pkgdatadir wayland-protocols + OUTPUT_VARIABLE WaylandProtocols_PKGDATADIR + RESULT_VARIABLE _pkgconfig_failed) +if (_pkgconfig_failed) + message(FATAL_ERROR "Missing wayland-protocols pkgdatadir") +endif() + +string(REGEX REPLACE "[\r\n]" "" WaylandProtocols_PKGDATADIR "${WaylandProtocols_PKGDATADIR}") + +find_package_handle_standard_args(WaylandProtocols + FOUND_VAR + WaylandProtocols_FOUND + REQUIRED_VARS + WaylandProtocols_PKGDATADIR + VERSION_VAR + WaylandProtocols_VERSION + HANDLE_COMPONENTS +) + +set(WAYLAND_PROTOCOLS_FOUND ${WaylandProtocols_FOUND}) +set(WAYLAND_PROTOCOLS_PKGDATADIR ${WaylandProtocols_PKGDATADIR}) +set(WAYLAND_PROTOCOLS_VERSION ${WaylandProtocols_VERSION}) diff -rupN dolphin.orig/CMake/FindXKBCommon.cmake dolphin/CMake/FindXKBCommon.cmake --- dolphin.orig/CMake/FindXKBCommon.cmake 1970-01-01 00:00:00.000000000 +0000 +++ dolphin/CMake/FindXKBCommon.cmake 2023-09-26 18:02:22.836042087 +0000 @@ -0,0 +1,33 @@ +# - Try to find XKBCommon +# Once done, this will define +# +# XKBCOMMON_FOUND - System has XKBCommon +# XKBCOMMON_INCLUDE_DIRS - The XKBCommon include directories +# XKBCOMMON_LIBRARIES - The libraries needed to use XKBCommon +# XKBCOMMON_DEFINITIONS - Compiler switches required for using XKBCommon + +find_package(PkgConfig) +pkg_check_modules(PC_XKBCOMMON QUIET xkbcommon) +set(XKBCOMMON_DEFINITIONS ${PC_XKBCOMMON_CFLAGS_OTHER}) + +find_path(XKBCOMMON_INCLUDE_DIR + NAMES xkbcommon/xkbcommon.h + HINTS ${PC_XKBCOMMON_INCLUDE_DIR} ${PC_XKBCOMMON_INCLUDE_DIRS} +) + +find_library(XKBCOMMON_LIBRARY + NAMES xkbcommon + HINTS ${PC_XKBCOMMON_LIBRARY} ${PC_XKBCOMMON_LIBRARY_DIRS} +) + +set(XKBCOMMON_LIBRARIES ${XKBCOMMON_LIBRARY}) +set(XKBCOMMON_LIBRARY_DIRS ${XKBCOMMON_LIBRARY_DIRS}) +set(XKBCOMMON_INCLUDE_DIRS ${XKBCOMMON_INCLUDE_DIR}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(XKBCommon DEFAULT_MSG + XKBCOMMON_LIBRARY + XKBCOMMON_INCLUDE_DIR +) + +mark_as_advanced(XKBCOMMON_LIBRARY XKBCOMMON_INCLUDE_DIR) diff -rupN dolphin.orig/CMakeLists.txt dolphin/CMakeLists.txt --- dolphin.orig/CMakeLists.txt 2023-09-26 17:58:02.593990718 +0000 +++ dolphin/CMakeLists.txt 2023-09-26 18:02:22.836042087 +0000 @@ -47,6 +47,7 @@ set(DOLPHIN_DEFAULT_UPDATE_TRACK "" CACH if(UNIX AND NOT APPLE AND NOT ANDROID) option(ENABLE_X11 "Enables X11 Support" ON) + option(ENABLE_WAYLAND "Enables Wayland Support" OFF) endif() if(NOT WIN32 AND NOT APPLE AND NOT HAIKU) option(ENABLE_EGL "Enables EGL OpenGL Interface" ON) @@ -543,6 +544,17 @@ if(ENABLE_X11) endif() endif() +if(ENABLE_WAYLAND) + find_package(ECM REQUIRED NO_MODULE) + list(APPEND CMAKE_MODULE_PATH "${ECM_MODULE_PATH}") + find_package(Wayland REQUIRED Client Egl) + find_package(WaylandScanner REQUIRED) + find_package(WaylandProtocols 1.15 REQUIRED) + find_package(XKBCommon REQUIRED) + add_definitions(-DHAVE_WAYLAND=1) + message(STATUS "Wayland support enabled") +endif() + if(ENABLE_EGL) find_package(EGL) if(EGL_FOUND) diff -rupN dolphin.orig/Source/Core/Common/CMakeLists.txt dolphin/Source/Core/Common/CMakeLists.txt --- dolphin.orig/Source/Core/Common/CMakeLists.txt 2023-09-26 17:58:02.801995519 +0000 +++ dolphin/Source/Core/Common/CMakeLists.txt 2023-09-26 18:02:22.836042087 +0000 @@ -253,11 +253,20 @@ if(ENABLE_EGL AND EGL_FOUND) GL/GLInterface/EGLAndroid.cpp GL/GLInterface/EGLAndroid.h ) - elseif(ENABLE_X11 AND X11_FOUND) - target_sources(common PRIVATE - GL/GLInterface/EGLX11.cpp - GL/GLInterface/EGLX11.h - ) + else() + if(ENABLE_X11 AND X11_FOUND) + target_sources(common PRIVATE + GL/GLInterface/EGLX11.cpp + GL/GLInterface/EGLX11.h + ) + endif() + if(ENABLE_WAYLAND AND WAYLAND_FOUND) + target_sources(common PRIVATE + GL/GLInterface/EGLWayland.cpp + GL/GLInterface/EGLWayland.h + ) + target_link_libraries(common PRIVATE Wayland::Egl) + endif() endif() target_include_directories(common PRIVATE ${EGL_INCLUDE_DIRS}) target_link_libraries(common PUBLIC ${EGL_LIBRARIES}) diff -rupN dolphin.orig/Source/Core/Common/GL/GLContext.cpp dolphin/Source/Core/Common/GL/GLContext.cpp --- dolphin.orig/Source/Core/Common/GL/GLContext.cpp 2023-09-26 17:58:02.801995519 +0000 +++ dolphin/Source/Core/Common/GL/GLContext.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -25,6 +25,9 @@ #if defined(ANDROID) #include "Common/GL/GLInterface/EGLAndroid.h" #endif +#if HAVE_WAYLAND +#include "Common/GL/GLInterface/EGLWayland.h" +#endif #endif const std::array, 9> GLContext::s_desktop_opengl_versions = { @@ -57,11 +60,11 @@ bool GLContext::ClearCurrent() return false; } -void GLContext::Update() +void GLContext::UpdateDimensions(int window_width, int window_height) { } -void GLContext::UpdateSurface(void* window_handle) +void GLContext::UpdateSurface(void* window_handle, int window_width, int window_height) { } @@ -113,6 +116,10 @@ std::unique_ptr GLContext::Cr #endif } #endif +#if HAVE_WAYLAND + if (wsi.type == WindowSystemType::Wayland) + context = std::make_unique(); +#endif #if HAVE_EGL if (wsi.type == WindowSystemType::Headless || wsi.type == WindowSystemType::FBDev) context = std::make_unique(); diff -rupN dolphin.orig/Source/Core/Common/GL/GLContext.h dolphin/Source/Core/Common/GL/GLContext.h --- dolphin.orig/Source/Core/Common/GL/GLContext.h 2023-09-26 17:58:02.801995519 +0000 +++ dolphin/Source/Core/Common/GL/GLContext.h 2023-09-26 18:02:22.836042087 +0000 @@ -36,8 +36,8 @@ public: virtual bool MakeCurrent(); virtual bool ClearCurrent(); - virtual void Update(); - virtual void UpdateSurface(void* window_handle); + virtual void UpdateDimensions(int window_width, int window_height); + virtual void UpdateSurface(void* window_handle, int window_width, int window_height); virtual void Swap(); virtual void SwapInterval(int interval); diff -rupN dolphin.orig/Source/Core/Common/GL/GLInterface/AGL.h dolphin/Source/Core/Common/GL/GLInterface/AGL.h --- dolphin.orig/Source/Core/Common/GL/GLInterface/AGL.h 2023-09-26 17:58:02.801995519 +0000 +++ dolphin/Source/Core/Common/GL/GLInterface/AGL.h 2023-09-26 18:02:22.836042087 +0000 @@ -27,6 +27,8 @@ public: bool MakeCurrent() override; bool ClearCurrent() override; + + void UpdateDimensions(int window_width, int window_height) override; void Update() override; diff -rupN dolphin.orig/Source/Core/Common/GL/GLInterface/AGL.mm dolphin/Source/Core/Common/GL/GLInterface/AGL.mm --- dolphin.orig/Source/Core/Common/GL/GLInterface/AGL.mm 2023-09-26 17:58:02.801995519 +0000 +++ dolphin/Source/Core/Common/GL/GLInterface/AGL.mm 2023-09-26 18:02:22.836042087 +0000 @@ -144,7 +144,7 @@ bool GLContextAGL::ClearCurrent() return true; } -void GLContextAGL::Update() +void GLContextAGL::UpdateDimensions(int window_width, int window_height) { if (!m_view) return; diff -rupN dolphin.orig/Source/Core/Common/GL/GLInterface/EGL.cpp dolphin/Source/Core/Common/GL/GLInterface/EGL.cpp --- dolphin.orig/Source/Core/Common/GL/GLInterface/EGL.cpp 2023-09-26 17:58:02.805995611 +0000 +++ dolphin/Source/Core/Common/GL/GLInterface/EGL.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -292,8 +292,8 @@ bool GLContextEGL::CreateWindowSurface() { if (!IsHeadless()) { - EGLNativeWindowType native_window = GetEGLNativeWindow(m_config); - m_egl_surface = eglCreateWindowSurface(m_egl_display, m_config, native_window, nullptr); + m_native_window = GetEGLNativeWindow(m_config); + m_egl_surface = eglCreateWindowSurface(m_egl_display, m_config, m_native_window, nullptr); if (!m_egl_surface) { INFO_LOG_FMT(VIDEO, "Error: eglCreateWindowSurface failed"); @@ -301,15 +301,7 @@ bool GLContextEGL::CreateWindowSurface() } // Get dimensions from the surface. - EGLint surface_width = 1, surface_height = 1; - if (!eglQuerySurface(m_egl_display, m_egl_surface, EGL_WIDTH, &surface_width) || - !eglQuerySurface(m_egl_display, m_egl_surface, EGL_HEIGHT, &surface_height)) - { - WARN_LOG_FMT(VIDEO, - "Failed to get surface dimensions via eglQuerySurface. Size may be incorrect."); - } - m_backbuffer_width = static_cast(surface_width); - m_backbuffer_height = static_cast(surface_height); + QueryDimensions(); } else if (!m_supports_surfaceless) { @@ -347,9 +339,16 @@ bool GLContextEGL::MakeCurrent() return eglMakeCurrent(m_egl_display, m_egl_surface, m_egl_surface, m_egl_context); } -void GLContextEGL::UpdateSurface(void* window_handle) +void GLContextEGL::UpdateDimensions(int window_width, int window_height) +{ + QueryDimensions(); +} + +void GLContextEGL::UpdateSurface(void* window_handle, int window_width, int window_height) { m_wsi.render_surface = window_handle; + m_wsi.render_surface_width = window_width; + m_wsi.render_surface_height = window_height; ClearCurrent(); DestroyWindowSurface(); CreateWindowSurface(); @@ -376,3 +375,15 @@ void GLContextEGL::DestroyContext() m_egl_context = EGL_NO_CONTEXT; m_egl_display = EGL_NO_DISPLAY; } + +void GLContextEGL::QueryDimensions() +{ + EGLint surface_width = 1, surface_height = 1; + if (!eglQuerySurface(m_egl_display, m_egl_surface, EGL_WIDTH, &surface_width) || + !eglQuerySurface(m_egl_display, m_egl_surface, EGL_HEIGHT, &surface_height)) + { + INFO_LOG_FMT(VIDEO, "Failed to get surface dimensions via eglQuerySurface. Size may be incorrect."); + } + m_backbuffer_width = static_cast(surface_width); + m_backbuffer_height = static_cast(surface_height); +} diff -rupN dolphin.orig/Source/Core/Common/GL/GLInterface/EGL.h dolphin/Source/Core/Common/GL/GLInterface/EGL.h --- dolphin.orig/Source/Core/Common/GL/GLInterface/EGL.h 2023-09-26 17:58:02.805995611 +0000 +++ dolphin/Source/Core/Common/GL/GLInterface/EGL.h 2023-09-26 18:02:22.836042087 +0000 @@ -22,7 +22,8 @@ public: bool MakeCurrent() override; bool ClearCurrent() override; - void UpdateSurface(void* window_handle) override; + void UpdateDimensions(int window_width, int window_height); + void UpdateSurface(void* window_handle, int window_width, int window_height) override; void Swap() override; void SwapInterval(int interval) override; @@ -39,9 +40,12 @@ protected: void DestroyWindowSurface(); void DetectMode(); void DestroyContext(); + void QueryDimensions(); WindowSystemInfo m_wsi = {}; + EGLNativeWindowType m_native_window = {}; + EGLConfig m_config; bool m_supports_surfaceless = false; std::vector m_attribs; diff -rupN dolphin.orig/Source/Core/Common/GL/GLInterface/EGLWayland.cpp dolphin/Source/Core/Common/GL/GLInterface/EGLWayland.cpp --- dolphin.orig/Source/Core/Common/GL/GLInterface/EGLWayland.cpp 1970-01-01 00:00:00.000000000 +0000 +++ dolphin/Source/Core/Common/GL/GLInterface/EGLWayland.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -0,0 +1,36 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "Common/GL/GLInterface/EGLWayland.h" +#include + +GLContextEGLWayland::~GLContextEGLWayland() +{ + if (m_native_window) + wl_egl_window_destroy(reinterpret_cast(m_native_window)); +} + +EGLDisplay GLContextEGLWayland::OpenEGLDisplay() +{ + return eglGetDisplay(reinterpret_cast(m_wsi.display_connection)); +} + +void GLContextEGLWayland::UpdateDimensions(int window_width, int window_height) +{ + wl_egl_window_resize(reinterpret_cast(m_native_window), window_width, + window_height, 0, 0); + m_backbuffer_width = window_width; + m_backbuffer_height = window_height; +} + +EGLNativeWindowType GLContextEGLWayland::GetEGLNativeWindow(EGLConfig config) +{ + wl_egl_window* window = + wl_egl_window_create(static_cast(m_wsi.render_surface), + m_wsi.render_surface_width, m_wsi.render_surface_height); + if (!window) + return {}; + + return reinterpret_cast(window); +} diff -rupN dolphin.orig/Source/Core/Common/GL/GLInterface/EGLWayland.h dolphin/Source/Core/Common/GL/GLInterface/EGLWayland.h --- dolphin.orig/Source/Core/Common/GL/GLInterface/EGLWayland.h 1970-01-01 00:00:00.000000000 +0000 +++ dolphin/Source/Core/Common/GL/GLInterface/EGLWayland.h 2023-09-26 18:02:22.836042087 +0000 @@ -0,0 +1,19 @@ +// Copyright 2019 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include "Common/GL/GLInterface/EGL.h" + +class GLContextEGLWayland : public GLContextEGL +{ +public: + ~GLContextEGLWayland(); + + void UpdateDimensions(int window_width, int window_height) override; + +protected: + EGLDisplay OpenEGLDisplay() override; + EGLNativeWindowType GetEGLNativeWindow(EGLConfig config) override; +}; diff -rupN dolphin.orig/Source/Core/Common/GL/GLInterface/EGLX11.cpp dolphin/Source/Core/Common/GL/GLInterface/EGLX11.cpp --- dolphin.orig/Source/Core/Common/GL/GLInterface/EGLX11.cpp 2023-09-26 17:58:02.805995611 +0000 +++ dolphin/Source/Core/Common/GL/GLInterface/EGLX11.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -11,7 +11,7 @@ GLContextEGLX11::~GLContextEGLX11() m_render_window.reset(); } -void GLContextEGLX11::Update() +void GLContextEGLX11::UpdateDimensions(int window_width, int window_height) { m_render_window->UpdateDimensions(); m_backbuffer_width = m_render_window->GetWidth(); diff -rupN dolphin.orig/Source/Core/Common/GL/GLInterface/EGLX11.h dolphin/Source/Core/Common/GL/GLInterface/EGLX11.h --- dolphin.orig/Source/Core/Common/GL/GLInterface/EGLX11.h 2023-09-26 17:58:02.805995611 +0000 +++ dolphin/Source/Core/Common/GL/GLInterface/EGLX11.h 2023-09-26 18:02:22.836042087 +0000 @@ -13,7 +13,7 @@ class GLContextEGLX11 final : public GLC public: ~GLContextEGLX11() override; - void Update() override; +void UpdateDimensions(int window_width, int window_height) override; protected: EGLDisplay OpenEGLDisplay() override; diff -rupN dolphin.orig/Source/Core/Common/GL/GLInterface/GLX.cpp dolphin/Source/Core/Common/GL/GLInterface/GLX.cpp --- dolphin.orig/Source/Core/Common/GL/GLInterface/GLX.cpp 2023-09-26 17:58:02.805995611 +0000 +++ dolphin/Source/Core/Common/GL/GLInterface/GLX.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -310,7 +310,7 @@ bool GLContextGLX::ClearCurrent() return glXMakeCurrent(m_display, None, nullptr); } -void GLContextGLX::Update() +void GLContextGLX::UpdateDimensions(int window_width, int window_height) { m_render_window->UpdateDimensions(); m_backbuffer_width = m_render_window->GetWidth(); diff -rupN dolphin.orig/Source/Core/Common/GL/GLInterface/GLX.h dolphin/Source/Core/Common/GL/GLInterface/GLX.h --- dolphin.orig/Source/Core/Common/GL/GLInterface/GLX.h 2023-09-26 17:58:02.805995611 +0000 +++ dolphin/Source/Core/Common/GL/GLInterface/GLX.h 2023-09-26 18:02:22.836042087 +0000 @@ -24,7 +24,7 @@ public: bool MakeCurrent() override; bool ClearCurrent() override; - void Update() override; + void UpdateDimensions(int window_width, int window_height) override; void SwapInterval(int Interval) override; void Swap() override; diff -rupN dolphin.orig/Source/Core/Common/GL/GLInterface/WGL.cpp dolphin/Source/Core/Common/GL/GLInterface/WGL.cpp --- dolphin.orig/Source/Core/Common/GL/GLInterface/WGL.cpp 2023-09-26 17:58:02.805995611 +0000 +++ dolphin/Source/Core/Common/GL/GLInterface/WGL.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -480,7 +480,7 @@ bool GLContextWGL::ClearCurrent() } // Update window width, size and etc. Called from Render.cpp -void GLContextWGL::Update() +void GLContextWGL::UpdateDimensions(int window_width, int window_height) { RECT rcWindow; GetClientRect(m_window_handle, &rcWindow); diff -rupN dolphin.orig/Source/Core/Common/GL/GLInterface/WGL.h dolphin/Source/Core/Common/GL/GLInterface/WGL.h --- dolphin.orig/Source/Core/Common/GL/GLInterface/WGL.h 2023-09-26 17:58:02.805995611 +0000 +++ dolphin/Source/Core/Common/GL/GLInterface/WGL.h 2023-09-26 18:02:22.836042087 +0000 @@ -19,7 +19,7 @@ public: bool MakeCurrent() override; bool ClearCurrent() override; - void Update() override; + void UpdateDimensions(int window_width, int window_height) override; void Swap() override; void SwapInterval(int interval) override; diff -rupN dolphin.orig/Source/Core/Common/WindowSystemInfo.h dolphin/Source/Core/Common/WindowSystemInfo.h --- dolphin.orig/Source/Core/Common/WindowSystemInfo.h 2023-09-26 17:58:02.805995611 +0000 +++ dolphin/Source/Core/Common/WindowSystemInfo.h 2023-09-26 18:02:22.836042087 +0000 @@ -40,7 +40,11 @@ struct WindowSystemInfo // This is kept seperate as input may require a different handle to rendering, and // during video backend startup the surface pointer may change (MoltenVK). void* render_surface = nullptr; - + + // Dimensions of the render surface, if this is determined by the frontend. + int render_surface_width = 0; + int render_surface_height = 0; + // Scale of the render surface. For hidpi systems, this will be >1. float render_surface_scale = 1.0f; }; diff -rupN dolphin.orig/Source/Core/Core/Core.cpp dolphin/Source/Core/Core/Core.cpp --- dolphin.orig/Source/Core/Core/Core.cpp 2023-09-26 17:58:02.809995703 +0000 +++ dolphin/Source/Core/Core/Core.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -474,6 +474,8 @@ static void EmuThread(std::unique_ptrAddInput(ControllerEmu::Translate, START_BUTTON, _trans("START")); + // Hotkey Button + m_buttons->AddInput(ControllerEmu::Translate, HOTKEY_BUTTON, _trans("HOTKEY")); + // sticks groups.emplace_back(m_main_stick = new ControllerEmu::OctagonAnalogStick( MAIN_STICK_GROUP, _trans("Control Stick"), MAIN_STICK_GATE_RADIUS)); diff -rupN dolphin.orig/Source/Core/Core/HW/GCPadEmu.h dolphin/Source/Core/Core/HW/GCPadEmu.h --- dolphin.orig/Source/Core/Core/HW/GCPadEmu.h 2023-09-26 17:58:02.817995888 +0000 +++ dolphin/Source/Core/Core/HW/GCPadEmu.h 2023-09-26 18:02:22.836042087 +0000 @@ -65,6 +65,7 @@ public: static constexpr const char* X_BUTTON = "X"; static constexpr const char* Y_BUTTON = "Y"; static constexpr const char* Z_BUTTON = "Z"; + static constexpr const char* HOTKEY_BUTTON = "Hotkey"; static constexpr const char* START_BUTTON = "Start"; // i18n: The left trigger button (labeled L on real controllers) diff -rupN dolphin.orig/Source/Core/DolphinLib.props dolphin/Source/Core/DolphinLib.props --- dolphin.orig/Source/Core/DolphinLib.props 2023-09-26 17:58:02.829996165 +0000 +++ dolphin/Source/Core/DolphinLib.props 2023-09-26 18:02:22.836042087 +0000 @@ -1207,6 +1207,7 @@ + diff -rupN dolphin.orig/Source/Core/DolphinNoGUI/CMakeLists.txt dolphin/Source/Core/DolphinNoGUI/CMakeLists.txt --- dolphin.orig/Source/Core/DolphinNoGUI/CMakeLists.txt 2023-09-26 17:58:02.833996257 +0000 +++ dolphin/Source/Core/DolphinNoGUI/CMakeLists.txt 2023-09-26 18:02:22.836042087 +0000 @@ -17,6 +17,22 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" target_sources(dolphin-nogui PRIVATE PlatformFBDev.cpp) endif() +if(ENABLE_WAYLAND AND WAYLAND_FOUND) + set(WAYLAND_PLATFORM_SRCS PlatformWayland.cpp) + + # Generate the xdg-shell and xdg-decoration protocols at build-time. + ecm_add_wayland_client_protocol(WAYLAND_PLATFORM_SRCS + PROTOCOL "${WAYLAND_PROTOCOLS_PKGDATADIR}/stable/xdg-shell/xdg-shell.xml" + BASENAME xdg-shell) + ecm_add_wayland_client_protocol(WAYLAND_PLATFORM_SRCS + PROTOCOL "${WAYLAND_PROTOCOLS_PKGDATADIR}/unstable/xdg-decoration/xdg-decoration-unstable-v1.xml" + BASENAME xdg-decoration) + + target_include_directories(dolphin-nogui PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") + target_sources(dolphin-nogui PRIVATE "${WAYLAND_PLATFORM_SRCS}") + target_link_libraries(dolphin-nogui PRIVATE Wayland::Client) +endif() + set_target_properties(dolphin-nogui PROPERTIES OUTPUT_NAME dolphin-emu-nogui) target_link_libraries(dolphin-nogui diff -rupN dolphin.orig/Source/Core/DolphinNoGUI/MainNoGUI.cpp dolphin/Source/Core/DolphinNoGUI/MainNoGUI.cpp --- dolphin.orig/Source/Core/DolphinNoGUI/MainNoGUI.cpp 2023-09-26 17:58:02.833996257 +0000 +++ dolphin/Source/Core/DolphinNoGUI/MainNoGUI.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -155,6 +155,11 @@ static std::unique_ptr GetPlat { std::string platform_name = static_cast(options.get("platform")); +#if HAVE_WAYLAND + if (platform_name == "wayland") + return Platform::CreateWaylandPlatform(); +#endif + #if HAVE_X11 if (platform_name == "x11" || platform_name.empty()) return Platform::CreateX11Platform(); @@ -200,6 +205,10 @@ int main(int argc, char* argv[]) , "win32" #endif +#ifdef HAVE_WAYLAND + , + "wayland" +#endif }); optparse::Values& options = CommandLineParse::ParseArguments(parser.get(), argc, argv); diff -rupN dolphin.orig/Source/Core/DolphinNoGUI/Platform.h dolphin/Source/Core/DolphinNoGUI/Platform.h --- dolphin.orig/Source/Core/DolphinNoGUI/Platform.h 2023-09-26 17:58:02.833996257 +0000 +++ dolphin/Source/Core/DolphinNoGUI/Platform.h 2023-09-26 18:02:22.836042087 +0000 @@ -35,6 +35,10 @@ public: static std::unique_ptr CreateX11Platform(); #endif +#ifdef HAVE_WAYLAND + static std::unique_ptr CreateWaylandPlatform(); +#endif + #ifdef __linux__ static std::unique_ptr CreateFBDevPlatform(); #endif diff -rupN dolphin.orig/Source/Core/DolphinNoGUI/PlatformWayland.cpp dolphin/Source/Core/DolphinNoGUI/PlatformWayland.cpp --- dolphin.orig/Source/Core/DolphinNoGUI/PlatformWayland.cpp 1970-01-01 00:00:00.000000000 +0000 +++ dolphin/Source/Core/DolphinNoGUI/PlatformWayland.cpp 2023-09-26 18:27:27.334792936 +0000 @@ -0,0 +1,364 @@ +// Copyright 2018 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include "DolphinNoGUI/Platform.h" + +#include "Common/MsgHandler.h" +#include "Core/Config/MainSettings.h" +#include "Core/Core.h" +#include "Core/State.h" + +#include "Core/HW/GCPad.h" +#include "InputCommon/GCPadStatus.h" +#include +#include "Core/Config/GraphicsSettings.h" +#include "VideoCommon/VideoConfig.h" +#include "VideoCommon/OnScreenDisplay.h" + +#include +#include +#include + +#include +#include "wayland-xdg-decoration-client-protocol.h" +#include "wayland-xdg-shell-client-protocol.h" + +#include "UICommon/X11Utils.h" +#include "VideoCommon/RenderBase.h" + +namespace +{ +class PlatformWayland : public Platform +{ +public: + ~PlatformWayland() override; + + bool Init() override; + void SetTitle(const std::string& string) override; + void MainLoop() override; + + WindowSystemInfo GetWindowSystemInfo() const; + +private: + void ProcessEvents(); + + static void GlobalRegistryHandler(void* data, wl_registry* registry, uint32_t id, + const char* interface, uint32_t version); + static void GlobalRegistryRemover(void* data, wl_registry* registry, uint32_t id); + static void XDGWMBasePing(void* data, struct xdg_wm_base* xdg_wm_base, uint32_t serial); + static void XDGSurfaceConfigure(void* data, struct xdg_surface* xdg_surface, uint32_t serial); + static void TopLevelConfigure(void* data, struct xdg_toplevel* xdg_toplevel, int32_t width, + int32_t height, struct wl_array* states); + static void TopLevelClose(void* data, struct xdg_toplevel* xdg_toplevel); + + wl_display* m_display = nullptr; + wl_registry* m_registry = nullptr; + wl_compositor* m_compositor = nullptr; + xdg_wm_base* m_xdg_wm_base = nullptr; + wl_surface* m_surface = nullptr; + wl_region* m_region = nullptr; + xdg_surface* m_xdg_surface = nullptr; + xdg_toplevel* m_xdg_toplevel = nullptr; + zxdg_decoration_manager_v1* m_decoration_manager = nullptr; + zxdg_toplevel_decoration_v1* m_toplevel_decoration = nullptr; + + int m_surface_width = 0; + int m_surface_height = 0; +}; + +PlatformWayland::~PlatformWayland() +{ + if (m_xdg_toplevel) + xdg_toplevel_destroy(m_xdg_toplevel); + if (m_xdg_surface) + xdg_surface_destroy(m_xdg_surface); + if (m_surface) + wl_surface_destroy(m_surface); + if (m_region) + wl_region_destroy(m_region); + if (m_xdg_wm_base) + xdg_wm_base_destroy(m_xdg_wm_base); + if (m_compositor) + wl_compositor_destroy(m_compositor); + if (m_registry) + wl_registry_destroy(m_registry); + if (m_display) + wl_display_disconnect(m_display); +} + +void PlatformWayland::GlobalRegistryHandler(void* data, wl_registry* registry, uint32_t id, + const char* interface, uint32_t version) +{ + PlatformWayland* platform = static_cast(data); + if (std::strcmp(interface, wl_compositor_interface.name) == 0) + { + platform->m_compositor = static_cast( + wl_registry_bind(platform->m_registry, id, &wl_compositor_interface, 1)); + } + else if (std::strcmp(interface, xdg_wm_base_interface.name) == 0) + { + platform->m_xdg_wm_base = static_cast( + wl_registry_bind(platform->m_registry, id, &xdg_wm_base_interface, 1)); + } + else if (std::strcmp(interface, zxdg_decoration_manager_v1_interface.name) == 0) + { + platform->m_decoration_manager = static_cast( + wl_registry_bind(platform->m_registry, id, &zxdg_decoration_manager_v1_interface, 1)); + } +} + +void PlatformWayland::GlobalRegistryRemover(void* data, wl_registry* registry, uint32_t id) +{ +} + +void PlatformWayland::XDGWMBasePing(void* data, struct xdg_wm_base* xdg_wm_base, uint32_t serial) +{ + xdg_wm_base_pong(xdg_wm_base, serial); +} + +void PlatformWayland::XDGSurfaceConfigure(void* data, struct xdg_surface* xdg_surface, + uint32_t serial) +{ + xdg_surface_ack_configure(xdg_surface, serial); +} + +void PlatformWayland::TopLevelConfigure(void* data, struct xdg_toplevel* xdg_toplevel, + int32_t width, int32_t height, struct wl_array* states) +{ + // If this is zero, it's asking us to set the size. + if (width == 0 || height == 0) + return; + + PlatformWayland* platform = static_cast(data); + platform->m_surface_width = width; + platform->m_surface_height = height; + if (g_renderer) + g_renderer->ResizeSurface(width, height); + //if (g_controller_interface.IsInit()) + //g_controller_interface.OnWindowResized(width, height); +} + +void PlatformWayland::TopLevelClose(void* data, struct xdg_toplevel* xdg_toplevel) +{ + PlatformWayland* platform = static_cast(data); + platform->Stop(); +} + +bool PlatformWayland::Init() +{ + m_display = wl_display_connect(nullptr); + if (!m_display) + { + //PanicAlert("Failed to connect to Wayland display."); + return false; + } + + static const wl_registry_listener registry_listener = {GlobalRegistryHandler, + GlobalRegistryRemover}; + m_registry = wl_display_get_registry(m_display); + wl_registry_add_listener(m_registry, ®istry_listener, this); + + // Call back to registry listener to get compositor/shell. + wl_display_dispatch(m_display); + wl_display_roundtrip(m_display); + + // We need a shell/compositor, or at least one we understand. + if (!m_compositor || !m_display || !m_xdg_wm_base) + { + std::fprintf(stderr, "Missing Wayland shell/compositor\n"); + return false; + } + + // Create the compositor and shell surface. + if (!(m_surface = wl_compositor_create_surface(m_compositor)) || + !(m_xdg_surface = xdg_wm_base_get_xdg_surface(m_xdg_wm_base, m_surface)) || + !(m_xdg_toplevel = xdg_surface_get_toplevel(m_xdg_surface))) + { + std::fprintf(stderr, "Failed to create compositor/shell surfaces\n"); + return false; + } + + static const xdg_wm_base_listener xdg_wm_base_listener = {XDGWMBasePing}; + xdg_wm_base_add_listener(m_xdg_wm_base, &xdg_wm_base_listener, this); + + static const xdg_surface_listener shell_surface_listener = {XDGSurfaceConfigure}; + xdg_surface_add_listener(m_xdg_surface, &shell_surface_listener, this); + + static const xdg_toplevel_listener toplevel_listener = {TopLevelConfigure, TopLevelClose}; + xdg_toplevel_add_listener(m_xdg_toplevel, &toplevel_listener, this); + + // Create region in the surface to draw into. + m_surface_width = Config::Get(Config::MAIN_RENDER_WINDOW_WIDTH); + m_surface_height = Config::Get(Config::MAIN_RENDER_WINDOW_HEIGHT); + m_region = wl_compositor_create_region(m_compositor); + wl_region_add(m_region, 0, 0, m_surface_width, m_surface_height); + wl_surface_set_opaque_region(m_surface, m_region); + wl_surface_commit(m_surface); + + // This doesn't seem to have any effect on kwin... + xdg_surface_set_window_geometry(m_xdg_surface, Config::Get(Config::MAIN_RENDER_WINDOW_XPOS), + Config::Get(Config::MAIN_RENDER_WINDOW_YPOS), + Config::Get(Config::MAIN_RENDER_WINDOW_WIDTH), + Config::Get(Config::MAIN_RENDER_WINDOW_HEIGHT)); + + if (m_decoration_manager) + { + m_toplevel_decoration = + zxdg_decoration_manager_v1_get_toplevel_decoration(m_decoration_manager, m_xdg_toplevel); + if (m_toplevel_decoration) + zxdg_toplevel_decoration_v1_set_mode(m_toplevel_decoration, + ZXDG_TOPLEVEL_DECORATION_V1_MODE_SERVER_SIDE); + } + + return true; +} + +void PlatformWayland::SetTitle(const std::string& string) +{ + xdg_toplevel_set_title(m_xdg_toplevel, string.c_str()); +} + +void PlatformWayland::MainLoop() +{ + while (IsRunning()) + { + static int hotkey = 0; + static int slot = 0; + static int fps = 0; + static int aspect = 0; + static int fforward = 0; + static int ires = 0; + + UpdateRunningFlag(); + Core::HostDispatchJobs(); + ProcessEvents(); + + if(Pad::IsInitialized()) { + GCPadStatus x = Pad::GetStatus(0); + + if( (x.button & PAD_BUTTON_HOTKEY) == PAD_BUTTON_HOTKEY) { // hotkey pressed + if(hotkey == 1) { + hotkey = 2; + } + } else { + hotkey = 1; // assure hotkey is released between actions + } + + if(hotkey == 2) { // hotkey pressed + if( (x.button & PAD_BUTTON_START) == PAD_BUTTON_START) { + RequestShutdown(); + hotkey = 0; + } + + if( (x.button & PAD_TRIGGER_L) == PAD_TRIGGER_L) { + State::Load(slot); + hotkey = 0; + } + if( (x.button & PAD_TRIGGER_R) == PAD_TRIGGER_R) { + State::Save(slot); + hotkey = 0; + } + if( (x.button & PAD_BUTTON_DOWN) == PAD_BUTTON_DOWN) { + if(slot > 0) slot--; + Core::DisplayMessage(fmt::format("Slot {} selected", slot), 4000); + hotkey = 0; + } + if( (x.button & PAD_BUTTON_UP) == PAD_BUTTON_UP) { + if(slot < 10) slot++; + Core::DisplayMessage(fmt::format("Slot {} selected", slot), 4000); + hotkey = 0; + } + if( (x.button & PAD_BUTTON_A) == PAD_BUTTON_A) { + Core::SaveScreenShot(); + hotkey = 0; + } + if( (x.button & PAD_BUTTON_Y) == PAD_BUTTON_Y) { + if(fps == 0) { + Config::SetCurrent(Config::GFX_SHOW_FPS, True); + fps = 1; + } else { + Config::SetCurrent(Config::GFX_SHOW_FPS, False); + fps = 0; + } + hotkey = 0; + } + if( (x.button & PAD_BUTTON_X) == PAD_BUTTON_X) { + if(aspect == 0) { + Config::SetCurrent(Config::GFX_ASPECT_RATIO, AspectMode::Stretch); + aspect = 1; + } else { + Config::SetCurrent(Config::GFX_ASPECT_RATIO, AspectMode::Auto); + aspect = 0; + } + hotkey = 0; + } + if( (x.button & PAD_BUTTON_B) == PAD_BUTTON_B) { + if(ires == 0) { + Config::SetCurrent(Config::GFX_EFB_SCALE, 2); + OSD::AddMessage("Internal Resolution: 480P"); + ires = 2; + } + else if(ires == 2) { + Config::SetCurrent(Config::GFX_EFB_SCALE, 4); + OSD::AddMessage("Internal Resolution: 720P"); + ires = 4; + } + else if(ires == 4) { + Config::SetCurrent(Config::GFX_EFB_SCALE, 6); + OSD::AddMessage("Internal Resolution: 1080P"); + ires = 6; + } else { + Config::SetCurrent(Config::GFX_EFB_SCALE, 1); + OSD::AddMessage("Internal Resolution: 240P"); + ires = 0; + } + hotkey = 0; + } + if( (x.button & PAD_TRIGGER_Z) == PAD_TRIGGER_Z) { + if(fforward == 0) { + auto speed = Config::Get(Config::MAIN_EMULATION_SPEED) + 1.0; + speed = (speed >= 0.95 && speed <= 1.05) ? 1.0 : speed; + Config::SetCurrent(Config::MAIN_EMULATION_SPEED, speed); + OSD::AddMessage("Fast Forward: ON"); + fforward = 1; + } else { + auto speed = Config::Get(Config::MAIN_EMULATION_SPEED) - 1.0; + speed = (speed <= 0 || (speed >= 0.95 && speed <= 1.05)) ? 1.0 : speed; + Config::SetCurrent(Config::MAIN_EMULATION_SPEED, speed); + OSD::AddMessage("Fast Forward: OFF"); + fforward = 0; + } + hotkey = 0; + } + } + } + + // TODO: Is this sleep appropriate? + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } +} + +WindowSystemInfo PlatformWayland::GetWindowSystemInfo() const +{ + WindowSystemInfo wsi; + wsi.type = WindowSystemType::Wayland; + wsi.display_connection = static_cast(m_display); + wsi.render_surface = reinterpret_cast(m_surface); + wsi.render_surface_width = m_surface_width; + wsi.render_surface_height = m_surface_height; + return wsi; +} + +void PlatformWayland::ProcessEvents() +{ + wl_display_dispatch_pending(m_display); +} +} // namespace + +std::unique_ptr Platform::CreateWaylandPlatform() +{ + return std::make_unique(); +} diff -rupN dolphin.orig/Source/Core/DolphinNoGUI/PlatformX11.cpp dolphin/Source/Core/DolphinNoGUI/PlatformX11.cpp --- dolphin.orig/Source/Core/DolphinNoGUI/PlatformX11.cpp 2023-09-26 17:58:02.833996257 +0000 +++ dolphin/Source/Core/DolphinNoGUI/PlatformX11.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -57,8 +57,8 @@ private: #endif int m_window_x = Config::Get(Config::MAIN_RENDER_WINDOW_XPOS); int m_window_y = Config::Get(Config::MAIN_RENDER_WINDOW_YPOS); - unsigned int m_window_width = Config::Get(Config::MAIN_RENDER_WINDOW_WIDTH); - unsigned int m_window_height = Config::Get(Config::MAIN_RENDER_WINDOW_HEIGHT); + int m_window_width = Config::Get(Config::MAIN_RENDER_WINDOW_WIDTH); + int m_window_height = Config::Get(Config::MAIN_RENDER_WINDOW_HEIGHT); }; PlatformX11::~PlatformX11() @@ -166,6 +166,8 @@ WindowSystemInfo PlatformX11::GetWindowS wsi.display_connection = static_cast(m_display); wsi.render_window = reinterpret_cast(m_window); wsi.render_surface = reinterpret_cast(m_window); + wsi.render_surface_width = m_window_width; + wsi.render_surface_height = m_window_height; return wsi; } @@ -176,8 +178,9 @@ void PlatformX11::UpdateWindowPosition() Window winDummy; unsigned int borderDummy, depthDummy; - XGetGeometry(m_display, m_window, &winDummy, &m_window_x, &m_window_y, &m_window_width, - &m_window_height, &borderDummy, &depthDummy); + XGetGeometry(m_display, m_window, &winDummy, &m_window_x, &m_window_y, + reinterpret_cast(&m_window_width), + reinterpret_cast(&m_window_height), &borderDummy, &depthDummy); } void PlatformX11::ProcessEvents() @@ -264,7 +267,10 @@ void PlatformX11::ProcessEvents() case ConfigureNotify: { if (g_renderer) - g_renderer->ResizeSurface(); + { + UpdateWindowPosition(); + g_renderer->ResizeSurface(m_window_width, m_window_height); + } } break; } diff -rupN dolphin.orig/Source/Core/InputCommon/GCPadStatus.h dolphin/Source/Core/InputCommon/GCPadStatus.h --- dolphin.orig/Source/Core/InputCommon/GCPadStatus.h 2023-09-26 17:58:02.845996536 +0000 +++ dolphin/Source/Core/InputCommon/GCPadStatus.h 2023-09-26 18:02:22.836042087 +0000 @@ -26,6 +26,7 @@ enum PadButton PAD_BUTTON_X = 0x0400, PAD_BUTTON_Y = 0x0800, PAD_BUTTON_START = 0x1000, + PAD_BUTTON_HOTKEY = 0x2000, }; struct GCPadStatus diff -rupN dolphin.orig/Source/Core/VideoBackends/OGL/OGLRender.cpp dolphin/Source/Core/VideoBackends/OGL/OGLRender.cpp --- dolphin.orig/Source/Core/VideoBackends/OGL/OGLRender.cpp 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/OGL/OGLRender.cpp 2023-09-26 21:41:39.568275473 +0000 @@ -471,11 +471,7 @@ Renderer::Renderer(std::unique_ptrIsGLES()) { - g_ogl_config.SupportedESPointSize = GLExtensions::Supports("GL_OES_geometry_point_size") ? 1 : - GLExtensions::Supports("GL_EXT_geometry_point_size") ? 2 : - 0; + g_ogl_config.SupportedESPointSize = + GLExtensions::Supports("GL_OES_geometry_point_size") ? EsPointSizeType::PointSizeOes : + GLExtensions::Supports("GL_EXT_geometry_point_size") ? EsPointSizeType::PointSizeExt : + EsPointSizeType::PointSizeNone; g_ogl_config.SupportedESTextureBuffer = GLExtensions::Supports("VERSION_GLES_3_2") ? EsTexbufType::TexbufCore : GLExtensions::Supports("GL_OES_texture_buffer") ? EsTexbufType::TexbufOes : @@ -548,21 +545,16 @@ Renderer::Renderer(std::unique_ptr 0; + g_Config.backend_info.bSupportsGeometryShaders && + g_ogl_config.SupportedESPointSize != EsPointSizeType::PointSizeNone; g_Config.backend_info.bSupportsSSAA = g_ogl_config.bSupportsAEP; g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true; g_ogl_config.bSupportsMSAA = true; g_ogl_config.bSupportsTextureStorage = true; - g_ogl_config.bSupports2DTextureStorageMultisample = true; + if (GLExtensions::Supports("GL_OES_texture_storage_multisample_2d_array")) + g_ogl_config.SupportedMultisampleTexStorage = MultisampleTexStorageType::TexStorageOes; g_Config.backend_info.bSupportsBitfield = true; g_Config.backend_info.bSupportsDynamicSamplerIndexing = g_ogl_config.bSupportsAEP; - if (g_ActiveConfig.stereo_mode != StereoMode::Off && g_ActiveConfig.iMultisamples > 1 && - !g_ogl_config.bSupports3DTextureStorageMultisample) - { - // GLES 3.1 can't support stereo rendering and MSAA - OSD::AddMessage("MSAA Stereo rendering isn't supported by your GPU.", 10000); - Config::SetCurrent(Config::GFX_MSAA, UINT32_C(1)); - } } else { @@ -572,7 +564,8 @@ Renderer::Renderer(std::unique_ptr 0; + g_Config.backend_info.bSupportsGSInstancing = + g_ogl_config.SupportedESPointSize != EsPointSizeType::PointSizeNone; g_Config.backend_info.bSupportsPaletteConversion = true; g_Config.backend_info.bSupportsSSAA = true; g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true; @@ -581,8 +574,7 @@ Renderer::Renderer(std::unique_ptrUpdateSurface(m_new_surface_handle); + m_main_gl_context->UpdateSurface(m_new_surface_handle, m_new_surface_width, m_new_surface_height); m_new_surface_handle = nullptr; // With a surface change, the window likely has new dimensions. @@ -1093,7 +1088,7 @@ void Renderer::CheckForSurfaceResize() if (!m_surface_resized.TestAndClear()) return; - m_main_gl_context->Update(); + m_main_gl_context->UpdateDimensions(m_new_surface_width, m_new_surface_height); m_backbuffer_width = m_main_gl_context->GetBackBufferWidth(); m_backbuffer_height = m_main_gl_context->GetBackBufferHeight(); m_system_framebuffer->UpdateDimensions(m_backbuffer_width, m_backbuffer_height); diff -rupN dolphin.orig/Source/Core/VideoBackends/OGL/OGLRender.h dolphin/Source/Core/VideoBackends/OGL/OGLRender.h --- dolphin.orig/Source/Core/VideoBackends/OGL/OGLRender.h 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/OGL/OGLRender.h 2023-09-26 21:32:17.039048706 +0000 @@ -31,6 +31,14 @@ enum GlslVersion GlslEs310, // GLES 3.1 GlslEs320, // GLES 3.2 }; + +enum class EsPointSizeType +{ + PointSizeNone, + PointSizeOes, + PointSizeExt, +}; + enum class EsTexbufType { TexbufNone, @@ -46,6 +54,13 @@ enum class EsFbFetchType FbFetchArm, }; +enum class MultisampleTexStorageType +{ + TexStorageNone, + TexStorageCore, + TexStorageOes, +}; + // ogl-only config, so not in VideoConfig.h struct VideoConfig { @@ -60,11 +75,10 @@ struct VideoConfig bool bSupportsAEP; bool bSupportsDebug; bool bSupportsCopySubImage; - u8 SupportedESPointSize; + EsPointSizeType SupportedESPointSize; EsTexbufType SupportedESTextureBuffer; bool bSupportsTextureStorage; - bool bSupports2DTextureStorageMultisample; - bool bSupports3DTextureStorageMultisample; + MultisampleTexStorageType SupportedMultisampleTexStorage; bool bSupportsConservativeDepth; bool bSupportsImageLoadStore; bool bSupportsAniso; diff -rupN dolphin.orig/Source/Core/VideoBackends/OGL/OGLTexture.cpp dolphin/Source/Core/VideoBackends/OGL/OGLTexture.cpp --- dolphin.orig/Source/Core/VideoBackends/OGL/OGLTexture.cpp 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/OGL/OGLTexture.cpp 2023-09-26 21:34:37.398249032 +0000 @@ -128,12 +128,18 @@ OGLTexture::OGLTexture(const TextureConf GLenum gl_internal_format = GetGLInternalFormatForTextureFormat(m_config.format, true); if (tex_config.IsMultisampled()) { - if (g_ogl_config.bSupportsTextureStorage) + ASSERT(g_ogl_config.bSupportsMSAA); + if (g_ogl_config.SupportedMultisampleTexStorage != MultisampleTexStorageType::TexStorageNone) + { glTexStorage3DMultisample(target, tex_config.samples, gl_internal_format, m_config.width, m_config.height, m_config.layers, GL_FALSE); + } else + { + ASSERT(!g_ogl_config.bIsES); glTexImage3DMultisample(target, tex_config.samples, gl_internal_format, m_config.width, m_config.height, m_config.layers, GL_FALSE); + } } else if (g_ogl_config.bSupportsTextureStorage) { diff -rupN dolphin.orig/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp dolphin/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp --- dolphin.orig/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp 2023-09-26 21:37:11.705777665 +0000 @@ -661,12 +661,13 @@ void ProgramShaderCache::CreateHeader() std::string SupportedESTextureBuffer; switch (g_ogl_config.SupportedESPointSize) { - case 1: + case EsPointSizeType::PointSizeOes: SupportedESPointSize = "#extension GL_OES_geometry_point_size : enable"; break; - case 2: + case EsPointSizeType::PointSizeExt: SupportedESPointSize = "#extension GL_EXT_geometry_point_size : enable"; break; + case EsPointSizeType::PointSizeNone: default: SupportedESPointSize = ""; break; @@ -718,6 +719,13 @@ void ProgramShaderCache::CreateHeader() break; } + // The sampler2DMSArray keyword is reserved in GLSL ES 3.0 and 3.1, but is available in 3.2 and + // with GL_OES_texture_storage_multisample_2d_array for 3.1. + // See https://bugs.dolphin-emu.org/issues/13198. + const bool use_multisample_2d_array_precision = + v >= GlslEs320 || + g_ogl_config.SupportedMultisampleTexStorage != MultisampleTexStorageType::TexStorageNone; + std::string shader_shuffle_string; if (g_ogl_config.bSupportsShaderThreadShuffleNV) { @@ -762,6 +770,7 @@ void ProgramShaderCache::CreateHeader() "{}\n" // shader thread shuffle "{}\n" // derivative control "{}\n" // query levels + "{}\n" // OES multisample texture storage // Precision defines for GLSL ES "{}\n" @@ -847,12 +856,18 @@ void ProgramShaderCache::CreateHeader() g_ActiveConfig.backend_info.bSupportsTextureQueryLevels ? "#extension GL_ARB_texture_query_levels : enable" : "", + // Note: GL_ARB_texture_storage_multisample doesn't have an #extension, as it doesn't + // need to change GLSL, but on GLES 3.1 sampler2DMSArray is a reserved keyword unless + // the extension is enabled. Thus, we don't need to check TexStorageCore/have an ARB version. + g_ogl_config.SupportedMultisampleTexStorage == MultisampleTexStorageType::TexStorageOes ? + "#extension GL_OES_texture_storage_multisample_2d_array : enable" : + "", is_glsles ? "precision highp float;" : "", is_glsles ? "precision highp int;" : "", is_glsles ? "precision highp sampler2DArray;" : "", (is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ? "precision highp usamplerBuffer;" : "", - v > GlslEs300 ? "precision highp sampler2DMSArray;" : "", + use_multisample_2d_array_precision ? "precision highp sampler2DMSArray;" : "", v >= GlslEs310 ? "precision highp image2DArray;" : ""); } diff -rupN dolphin.orig/Source/Core/VideoBackends/Software/SWOGLWindow.cpp dolphin/Source/Core/VideoBackends/Software/SWOGLWindow.cpp --- dolphin.orig/Source/Core/VideoBackends/Software/SWOGLWindow.cpp 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Software/SWOGLWindow.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -32,6 +32,16 @@ bool SWOGLWindow::IsHeadless() const return m_gl_context->IsHeadless(); } +u32 SWOGLWindow::GetWidth() const +{ + return m_gl_context->GetBackBufferWidth(); +} + +u32 SWOGLWindow::GetHeight() const +{ + return m_gl_context->GetBackBufferHeight(); +} + bool SWOGLWindow::Initialize(const WindowSystemInfo& wsi) { m_gl_context = GLContext::Create(wsi); @@ -84,11 +94,17 @@ bool SWOGLWindow::Initialize(const Windo return true; } + +void SWOGLWindow::UpdateDimensions(int window_width, int window_height) +{ + // just updates the render window position and the backbuffer size + m_gl_context->UpdateDimensions(window_width, window_height); +} + void SWOGLWindow::ShowImage(const AbstractTexture* image, const MathUtil::Rectangle& xfb_region) { const SW::SWTexture* sw_image = static_cast(image); - m_gl_context->Update(); // just updates the render window position and the backbuffer size GLsizei glWidth = (GLsizei)m_gl_context->GetBackBufferWidth(); GLsizei glHeight = (GLsizei)m_gl_context->GetBackBufferHeight(); diff -rupN dolphin.orig/Source/Core/VideoBackends/Software/SWOGLWindow.h dolphin/Source/Core/VideoBackends/Software/SWOGLWindow.h --- dolphin.orig/Source/Core/VideoBackends/Software/SWOGLWindow.h 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Software/SWOGLWindow.h 2023-09-26 18:02:22.836042087 +0000 @@ -20,6 +20,10 @@ public: GLContext* GetContext() const { return m_gl_context.get(); } bool IsHeadless() const; + u32 GetWidth() const; + u32 GetHeight() const; + void UpdateDimensions(int window_width, int window_height); + // Image to show, will be swapped immediately void ShowImage(const AbstractTexture* image, const MathUtil::Rectangle& xfb_region); diff -rupN dolphin.orig/Source/Core/VideoBackends/Software/SWRenderer.cpp dolphin/Source/Core/VideoBackends/Software/SWRenderer.cpp --- dolphin.orig/Source/Core/VideoBackends/Software/SWRenderer.cpp 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Software/SWRenderer.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -60,17 +60,17 @@ SWRenderer::CreateFramebuffer(AbstractTe static_cast(depth_attachment)); } -void SWRenderer::BindBackbuffer(const ClearColor& clear_color) -{ +//void SWRenderer::BindBackbuffer(const ClearColor& clear_color) +//{ // Look for framebuffer resizes - if (!m_surface_resized.TestAndClear()) - return; + //if (!m_surface_resized.TestAndClear()) + //return; - GLContext* context = m_window->GetContext(); - context->Update(); - m_backbuffer_width = context->GetBackBufferWidth(); - m_backbuffer_height = context->GetBackBufferHeight(); -} + //GLContext* context = m_window->GetContext(); + //context->UpdateDimensions(window_width, window_height); + //m_backbuffer_width = context->GetBackBufferWidth(); + //m_backbuffer_height = context->GetBackBufferHeight(); +//} class SWShader final : public AbstractShader { @@ -108,7 +108,7 @@ std::unique_ptr SWRend { return std::make_unique(); } - + // Called on the GPU thread void SWRenderer::RenderXFBToScreen(const MathUtil::Rectangle& target_rc, const AbstractTexture* source_texture, @@ -118,6 +118,16 @@ void SWRenderer::RenderXFBToScreen(const m_window->ShowImage(source_texture, source_rc); } +void SWRenderer::CheckForSurfaceResize() +{ + if (!m_surface_resized.TestAndClear()) + return; + + m_window->UpdateDimensions(m_new_surface_width, m_new_surface_height); + m_backbuffer_width = static_cast(m_window->GetWidth()); + m_backbuffer_height = static_cast(m_window->GetHeight()); +} + u32 SWRenderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 InputData) { u32 value = 0; diff -rupN dolphin.orig/Source/Core/VideoBackends/Software/SWRenderer.h dolphin/Source/Core/VideoBackends/Software/SWRenderer.h --- dolphin.orig/Source/Core/VideoBackends/Software/SWRenderer.h 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Software/SWRenderer.h 2023-09-26 18:02:22.836042087 +0000 @@ -29,7 +29,7 @@ public: std::unique_ptr CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override; - void BindBackbuffer(const ClearColor& clear_color = {}) override; + //void BindBackbuffer(const ClearColor& clear_color = {}) override; std::unique_ptr CreateShaderFromSource(ShaderStage stage, std::string_view source, std::string_view name) override; @@ -64,6 +64,8 @@ protected: std::unique_ptr CreateBoundingBox() const override; private: +void CheckForSurfaceResize(); + std::unique_ptr m_window; }; } // namespace SW diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/CMakeLists.txt dolphin/Source/Core/VideoBackends/Vulkan/CMakeLists.txt --- dolphin.orig/Source/Core/VideoBackends/Vulkan/CMakeLists.txt 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/CMakeLists.txt 2023-09-26 18:02:22.836042087 +0000 @@ -35,6 +35,8 @@ add_library(videovulkan VulkanContext.h VulkanLoader.cpp VulkanLoader.h + VKScheduler.h + VKScheduler.cpp ) target_link_libraries(videovulkan diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp dolphin/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp --- dolphin.orig/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -10,24 +10,24 @@ #include "Common/MsgHandler.h" #include "Common/Thread.h" +#include "VideoBackends/Vulkan/StateTracker.h" #include "VideoBackends/Vulkan/VulkanContext.h" namespace Vulkan { -CommandBufferManager::CommandBufferManager(bool use_threaded_submission) - : m_use_threaded_submission(use_threaded_submission) +CommandBufferManager::CommandBufferManager() + : m_state_tracker(std::make_unique(this)), m_last_present_done(true) { } CommandBufferManager::~CommandBufferManager() { // If the worker thread is enabled, stop and block until it exits. - if (m_use_threaded_submission) - { - WaitForWorkerThreadIdle(); - m_submit_loop->Stop(); - m_submit_thread.join(); - } + WaitForSubmitWorkerThreadIdle(); + m_submit_loop->Stop(); + m_fence_loop->Stop(); + m_submit_thread.join(); + m_fence_thread.join(); DestroyCommandBuffers(); } @@ -37,10 +37,13 @@ bool CommandBufferManager::Initialize() if (!CreateCommandBuffers()) return false; - if (m_use_threaded_submission && !CreateSubmitThread()) + if (!CreateFenceThread()) return false; - return true; + if (!CreateSubmitThread()) + return false; + + return m_state_tracker->Initialize(); } bool CommandBufferManager::CreateCommandBuffers() @@ -86,13 +89,6 @@ bool CommandBufferManager::CreateCommand LOG_VULKAN_ERROR(res, "vkCreateFence failed: "); return false; } - - res = vkCreateSemaphore(device, &semaphore_create_info, nullptr, &resources.semaphore); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateSemaphore failed: "); - return false; - } } res = vkCreateSemaphore(device, &semaphore_create_info, nullptr, &m_present_semaphore); @@ -125,9 +121,6 @@ void CommandBufferManager::DestroyComman for (auto& it : resources.cleanup_resources) it(); - if (resources.semaphore != VK_NULL_HANDLE) - vkDestroySemaphore(device, resources.semaphore, nullptr); - if (resources.fence != VK_NULL_HANDLE) vkDestroyFence(device, resources.fence, nullptr); } @@ -215,11 +208,42 @@ VkDescriptorSet CommandBufferManager::Al return descriptor_set; } +bool CommandBufferManager::CreateFenceThread() +{ + m_fence_loop = std::make_unique(); + m_fence_thread = std::thread([this]() { + Common::SetCurrentThreadName("Vulkan FenceThread"); + m_fence_loop->Run([this]() { + PendingFenceCounter fence; + { + std::lock_guard guard(m_pending_fences_lock); + if (m_pending_fences.empty()) + { + m_fence_condvar.notify_all(); + m_fence_loop->AllowSleep(); + return; + } + + fence = m_pending_fences.front(); + m_pending_fences.pop_front(); + } + + vkWaitForFences(g_vulkan_context->GetDevice(), 1, &fence.fence, true, ~0ul); + + std::lock_guard guard(m_pending_fences_lock); + m_completed_fence_counter.store(fence.counter, std::memory_order_release); + m_fence_condvar.notify_all(); + }); + }); + + return true; +} + bool CommandBufferManager::CreateSubmitThread() { m_submit_loop = std::make_unique(); m_submit_thread = std::thread([this]() { - Common::SetCurrentThreadName("Vulkan CommandBufferManager SubmitThread"); + Common::SetCurrentThreadName("Vulkan SubmitThread"); m_submit_loop->Run([this]() { PendingCommandBufferSubmit submit; @@ -227,9 +251,9 @@ bool CommandBufferManager::CreateSubmitT std::lock_guard guard(m_pending_submit_lock); if (m_pending_submits.empty()) { - m_submit_loop->AllowSleep(); m_submit_worker_idle = true; m_submit_worker_condvar.notify_all(); + m_submit_loop->AllowSleep(); return; } @@ -239,16 +263,12 @@ bool CommandBufferManager::CreateSubmitT SubmitCommandBuffer(submit.command_buffer_index, submit.present_swap_chain, submit.present_image_index); - CmdBufferResources& resources = m_command_buffers[submit.command_buffer_index]; - resources.waiting_for_submit.store(false, std::memory_order_release); + std::lock_guard guard(m_pending_submit_lock); + if (m_pending_submits.empty()) { - std::lock_guard guard(m_pending_submit_lock); - if (m_pending_submits.empty()) - { - m_submit_worker_idle = true; - m_submit_worker_condvar.notify_all(); - } + m_submit_worker_idle = true; + m_submit_worker_condvar.notify_all(); } }); }); @@ -256,55 +276,28 @@ bool CommandBufferManager::CreateSubmitT return true; } -void CommandBufferManager::WaitForWorkerThreadIdle() +void CommandBufferManager::WaitForSubmitWorkerThreadIdle() { - if (!m_use_threaded_submission) - return; - std::unique_lock lock{m_pending_submit_lock}; m_submit_worker_condvar.wait(lock, [&] { return m_submit_worker_idle; }); } void CommandBufferManager::WaitForFenceCounter(u64 fence_counter) { - if (m_completed_fence_counter >= fence_counter) + if (m_completed_fence_counter.load(std::memory_order_relaxed) >= fence_counter) [[likely]] return; - // Find the first command buffer which covers this counter value. - u32 index = (m_current_cmd_buffer + 1) % NUM_COMMAND_BUFFERS; - while (index != m_current_cmd_buffer) - { - if (m_command_buffers[index].fence_counter >= fence_counter) - break; - - index = (index + 1) % NUM_COMMAND_BUFFERS; - } - - ASSERT(index != m_current_cmd_buffer); - WaitForCommandBufferCompletion(index); + std::unique_lock lock{m_pending_fences_lock}; + m_fence_condvar.wait(lock, [&] { + return m_completed_fence_counter.load(std::memory_order_relaxed) >= fence_counter; + }); } -void CommandBufferManager::WaitForCommandBufferCompletion(u32 index) +void CommandBufferManager::CleanupCompletedCommandBuffers() { - CmdBufferResources& resources = m_command_buffers[index]; - - // Ensure this command buffer has been submitted. - if (resources.waiting_for_submit.load(std::memory_order_acquire)) - { - WaitForWorkerThreadIdle(); - ASSERT_MSG(VIDEO, !resources.waiting_for_submit.load(std::memory_order_relaxed), - "Submit thread is idle but command buffer is still waiting for submission!"); - } - - // Wait for this command buffer to be completed. - VkResult res = - vkWaitForFences(g_vulkan_context->GetDevice(), 1, &resources.fence, VK_TRUE, UINT64_MAX); - if (res != VK_SUCCESS) - LOG_VULKAN_ERROR(res, "vkWaitForFences failed: "); - // Clean up any resources for command buffers between the last known completed buffer and this // now-completed command buffer. If we use >2 buffers, this may be more than one buffer. - const u64 now_completed_counter = resources.fence_counter; + const u64 now_completed_counter = m_completed_fence_counter.load(std::memory_order_acquire); u32 cleanup_index = (m_current_cmd_buffer + 1) % NUM_COMMAND_BUFFERS; while (cleanup_index != m_current_cmd_buffer) { @@ -312,26 +305,22 @@ void CommandBufferManager::WaitForComman if (cleanup_resources.fence_counter > now_completed_counter) break; - if (cleanup_resources.fence_counter > m_completed_fence_counter) - { - for (auto& it : cleanup_resources.cleanup_resources) - it(); - cleanup_resources.cleanup_resources.clear(); - } + for (auto& it : cleanup_resources.cleanup_resources) + it(); + cleanup_resources.cleanup_resources.clear(); cleanup_index = (cleanup_index + 1) % NUM_COMMAND_BUFFERS; } - - m_completed_fence_counter = now_completed_counter; } -void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread, +void CommandBufferManager::SubmitCommandBuffer(u64 fence_counter, bool submit_on_worker_thread, bool wait_for_completion, VkSwapchainKHR present_swap_chain, uint32_t present_image_index) { // End the current command buffer. CmdBufferResources& resources = GetCurrentCmdBufferResources(); + resources.fence_counter = fence_counter; for (VkCommandBuffer command_buffer : resources.command_buffers) { VkResult res = vkEndCommandBuffer(command_buffer); @@ -344,32 +333,37 @@ void CommandBufferManager::SubmitCommand } // Submitting off-thread? - if (m_use_threaded_submission && submit_on_worker_thread && !wait_for_completion) + if (submit_on_worker_thread && !wait_for_completion) { - resources.waiting_for_submit.store(true, std::memory_order_relaxed); // Push to the pending submit queue. { std::lock_guard guard(m_pending_submit_lock); m_submit_worker_idle = false; m_pending_submits.push_back({present_swap_chain, present_image_index, m_current_cmd_buffer}); - } - // Wake up the worker thread for a single iteration. - m_submit_loop->Wakeup(); + // Wake up the worker thread for a single iteration. + m_submit_loop->Wakeup(); + } } else { - WaitForWorkerThreadIdle(); + WaitForSubmitWorkerThreadIdle(); // Pass through to normal submission path. SubmitCommandBuffer(m_current_cmd_buffer, present_swap_chain, present_image_index); if (wait_for_completion) - WaitForCommandBufferCompletion(m_current_cmd_buffer); + { + std::unique_lock lock{m_pending_fences_lock}; + m_fence_condvar.wait(lock, [&] { + return m_completed_fence_counter.load(std::memory_order_relaxed) >= resources.fence_counter; + }); + } } if (present_swap_chain != VK_NULL_HANDLE) { m_current_frame = (m_current_frame + 1) % NUM_FRAMES_IN_FLIGHT; + const u64 now_completed_counter = m_completed_fence_counter.load(std::memory_order_acquire); // Wait for all command buffers that used the descriptor pool to finish u32 cmd_buffer_index = (m_current_cmd_buffer + 1) % NUM_COMMAND_BUFFERS; @@ -377,9 +371,9 @@ void CommandBufferManager::SubmitCommand { CmdBufferResources& cmd_buffer = m_command_buffers[cmd_buffer_index]; if (cmd_buffer.frame_index == m_current_frame && cmd_buffer.fence_counter != 0 && - cmd_buffer.fence_counter > m_completed_fence_counter) + cmd_buffer.fence_counter > now_completed_counter) { - WaitForCommandBufferCompletion(cmd_buffer_index); + WaitForFenceCounter(cmd_buffer.fence_counter); } cmd_buffer_index = (cmd_buffer_index + 1) % NUM_COMMAND_BUFFERS; } @@ -411,6 +405,7 @@ void CommandBufferManager::SubmitCommand // Switch to next cmdbuffer. BeginCommandBuffer(); + m_state_tracker->InvalidateCachedState(); } void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index, @@ -455,8 +450,15 @@ void CommandBufferManager::SubmitCommand if (res != VK_SUCCESS) { LOG_VULKAN_ERROR(res, "vkQueueSubmit failed: "); - PanicAlertFmt("Failed to submit command buffer: {} ({})", VkResultToString(res), - static_cast(res)); + PanicAlertFmt("Failed to submit command buffer: {} ({}), semaphore used: {}, has present sc {}", + VkResultToString(res), static_cast(res), resources.semaphore_used, + present_swap_chain != VK_NULL_HANDLE); + } + + { + std::lock_guard guard(m_pending_fences_lock); + m_pending_fences.push_back({resources.fence, resources.fence_counter}); + m_fence_loop->Wakeup(); } // Do we have a swap chain to present? @@ -471,28 +473,27 @@ void CommandBufferManager::SubmitCommand &present_swap_chain, &present_image_index, nullptr}; - - m_last_present_result = vkQueuePresentKHR(g_vulkan_context->GetPresentQueue(), &present_info); - m_last_present_done.Set(); - if (m_last_present_result != VK_SUCCESS) + res = vkQueuePresentKHR(g_vulkan_context->GetPresentQueue(), &present_info); + if (res != VK_SUCCESS) { // VK_ERROR_OUT_OF_DATE_KHR is not fatal, just means we need to recreate our swap chain. - if (m_last_present_result != VK_ERROR_OUT_OF_DATE_KHR && - m_last_present_result != VK_SUBOPTIMAL_KHR && - m_last_present_result != VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT) + if (res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR && + res != VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT) { - LOG_VULKAN_ERROR(m_last_present_result, "vkQueuePresentKHR failed: "); + LOG_VULKAN_ERROR(res, "vkQueuePresentKHR failed: "); } // Don't treat VK_SUBOPTIMAL_KHR as fatal on Android. Android 10+ requires prerotation. // See https://twitter.com/Themaister/status/1207062674011574273 #ifdef VK_USE_PLATFORM_ANDROID_KHR - if (m_last_present_result != VK_SUBOPTIMAL_KHR) + if (res != VK_SUBOPTIMAL_KHR) m_last_present_failed.Set(); #else m_last_present_failed.Set(); #endif } + m_last_present_result.store(res); + m_last_present_done.Set(); } } @@ -503,8 +504,11 @@ void CommandBufferManager::BeginCommandB CmdBufferResources& resources = m_command_buffers[next_buffer_index]; // Wait for the GPU to finish with all resources for this command buffer. - if (resources.fence_counter > m_completed_fence_counter) - WaitForCommandBufferCompletion(next_buffer_index); + if (resources.fence_counter > m_completed_fence_counter.load(std::memory_order_acquire) && + resources.fence_counter != 0) + WaitForFenceCounter(resources.fence_counter); + + CleanupCompletedCommandBuffers(); // Reset fence to unsignaled before starting. VkResult res = vkResetFences(g_vulkan_context->GetDevice(), 1, &resources.fence); @@ -526,10 +530,9 @@ void CommandBufferManager::BeginCommandB LOG_VULKAN_ERROR(res, "vkBeginCommandBuffer failed: "); } - // Reset upload command buffer state + // Reset command buffer state resources.init_command_buffer_used = false; resources.semaphore_used = false; - resources.fence_counter = m_next_fence_counter++; resources.frame_index = m_current_frame; m_current_cmd_buffer = next_buffer_index; } @@ -569,6 +572,4 @@ void CommandBufferManager::DeferImageVie cmd_buffer_resources.cleanup_resources.push_back( [object]() { vkDestroyImageView(g_vulkan_context->GetDevice(), object, nullptr); }); } - -std::unique_ptr g_command_buffer_mgr; } // namespace Vulkan diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h dolphin/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h --- dolphin.orig/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h 2023-09-26 18:02:22.836042087 +0000 @@ -22,10 +22,12 @@ namespace Vulkan { +class StateTracker; + class CommandBufferManager { public: - explicit CommandBufferManager(bool use_threaded_submission); + explicit CommandBufferManager(); ~CommandBufferManager(); bool Initialize(); @@ -50,39 +52,37 @@ public: // If the last completed fence counter is greater or equal to N, it means that the work // associated counter N has been completed by the GPU. The value of N to associate with // commands can be retreived by calling GetCurrentFenceCounter(). - u64 GetCompletedFenceCounter() const { return m_completed_fence_counter; } - - // Gets the fence that will be signaled when the currently executing command buffer is - // queued and executed. Do not wait for this fence before the buffer is executed. - u64 GetCurrentFenceCounter() const + // THREAD SAFE + u64 GetCompletedFenceCounter() const { - auto& resources = m_command_buffers[m_current_cmd_buffer]; - return resources.fence_counter; + return m_completed_fence_counter.load(std::memory_order_acquire); } // Returns the semaphore for the current command buffer, which can be used to ensure the // swap chain image is ready before the command buffer executes. - VkSemaphore GetCurrentCommandBufferSemaphore() + void SetWaitSemaphoreForCurrentCommandBuffer(VkSemaphore semaphore) { auto& resources = m_command_buffers[m_current_cmd_buffer]; resources.semaphore_used = true; - return resources.semaphore; + resources.semaphore = semaphore; } // Ensure that the worker thread has submitted any previous command buffers and is idle. - void WaitForWorkerThreadIdle(); + void WaitForSubmitWorkerThreadIdle(); // Wait for a fence to be completed. // Also invokes callbacks for completion. + // THREAD SAFE void WaitForFenceCounter(u64 fence_counter); - void SubmitCommandBuffer(bool submit_on_worker_thread, bool wait_for_completion, + void SubmitCommandBuffer(u64 fence_counter, bool submit_on_worker_thread, + bool wait_for_completion, VkSwapchainKHR present_swap_chain = VK_NULL_HANDLE, uint32_t present_image_index = 0xFFFFFFFF); // Was the last present submitted to the queue a failure? If so, we must recreate our swapchain. bool CheckLastPresentFail() { return m_last_present_failed.TestAndClear(); } - VkResult GetLastPresentResult() const { return m_last_present_result; } + VkResult GetLastPresentResult() const { return m_last_present_result.load(); } bool CheckLastPresentDone() { return m_last_present_done.TestAndClear(); } // Schedule a vulkan resource for destruction later on. This will occur when the command buffer @@ -93,17 +93,21 @@ public: void DeferImageDestruction(VkImage object, VmaAllocation alloc); void DeferImageViewDestruction(VkImageView object); + StateTracker* GetStateTracker() { return m_state_tracker.get(); } + private: bool CreateCommandBuffers(); void DestroyCommandBuffers(); + bool CreateFenceThread(); bool CreateSubmitThread(); - void WaitForCommandBufferCompletion(u32 command_buffer_index); void SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain, u32 present_image_index); void BeginCommandBuffer(); + void CleanupCompletedCommandBuffers(); + VkDescriptorPool CreateDescriptorPool(u32 descriptor_sizes); const u32 DESCRIPTOR_SETS_PER_POOL = 1024; @@ -118,7 +122,6 @@ private: u64 fence_counter = 0; bool init_command_buffer_used = false; bool semaphore_used = false; - std::atomic waiting_for_submit{false}; u32 frame_index = 0; std::vector> cleanup_resources; @@ -137,14 +140,15 @@ private: return m_command_buffers[m_current_cmd_buffer]; } - u64 m_next_fence_counter = 1; - u64 m_completed_fence_counter = 0; + std::atomic m_completed_fence_counter = 0; std::array m_frame_resources; std::array m_command_buffers; u32 m_current_frame = 0; u32 m_current_cmd_buffer = 0; + std::unique_ptr m_state_tracker; + // Threaded command buffer execution std::thread m_submit_thread; std::unique_ptr m_submit_loop; @@ -161,11 +165,20 @@ private: bool m_submit_worker_idle = true; Common::Flag m_last_present_failed; Common::Flag m_last_present_done; - VkResult m_last_present_result = VK_SUCCESS; - bool m_use_threaded_submission = false; + std::atomic m_last_present_result = VK_SUCCESS; u32 m_descriptor_set_count = DESCRIPTOR_SETS_PER_POOL; -}; -extern std::unique_ptr g_command_buffer_mgr; + // Fence thread + std::thread m_fence_thread; + std::unique_ptr m_fence_loop; + struct PendingFenceCounter + { + VkFence fence; + u64 counter; + }; + std::deque m_pending_fences; + std::mutex m_pending_fences_lock; + std::condition_variable m_fence_condvar; +}; } // namespace Vulkan diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/Constants.h dolphin/Source/Core/VideoBackends/Vulkan/Constants.h --- dolphin.orig/Source/Core/VideoBackends/Vulkan/Constants.h 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/Constants.h 2023-09-26 18:02:22.836042087 +0000 @@ -12,7 +12,7 @@ namespace Vulkan { // Number of command buffers. -constexpr size_t NUM_COMMAND_BUFFERS = 8; +constexpr size_t NUM_COMMAND_BUFFERS = 16; // Number of frames in flight, will be used to decide how many descriptor pools are used constexpr size_t NUM_FRAMES_IN_FLIGHT = 2; diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp dolphin/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp --- dolphin.orig/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -389,6 +389,8 @@ VkSampler ObjectCache::GetSampler(const VkRenderPass ObjectCache::GetRenderPass(VkFormat color_format, VkFormat depth_format, u32 multisamples, VkAttachmentLoadOp load_op) { + std::scoped_lock lock(m_render_pass_mutex); + auto key = std::tie(color_format, depth_format, multisamples, load_op); auto it = m_render_pass_cache.find(key); if (it != m_render_pass_cache.end()) @@ -467,6 +469,8 @@ VkRenderPass ObjectCache::GetRenderPass( void ObjectCache::DestroyRenderPassCache() { + std::scoped_lock lock(m_render_pass_mutex); + for (auto& it : m_render_pass_cache) vkDestroyRenderPass(g_vulkan_context->GetDevice(), it.second, nullptr); m_render_pass_cache.clear(); diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/ObjectCache.h dolphin/Source/Core/VideoBackends/Vulkan/ObjectCache.h --- dolphin.orig/Source/Core/VideoBackends/Vulkan/ObjectCache.h 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/ObjectCache.h 2023-09-26 18:02:22.836042087 +0000 @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -102,6 +103,7 @@ private: std::unique_ptr m_dummy_texture; // Render pass cache + std::mutex m_render_pass_mutex; using RenderPassCacheKey = std::tuple; std::map m_render_pass_cache; diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp dolphin/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp --- dolphin.orig/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -8,6 +8,7 @@ #include "Common/Assert.h" +#include "VKScheduler.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" #include "VideoBackends/Vulkan/VulkanContext.h" #include "VideoCommon/DriverDetails.h" @@ -26,7 +27,10 @@ StagingBuffer::~StagingBuffer() if (m_map_pointer) Unmap(); - g_command_buffer_mgr->DeferBufferDestruction(m_buffer, m_alloc); + g_scheduler->Record( + [c_alloc = m_alloc, c_buffer = m_buffer](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->DeferBufferDestruction(c_buffer, c_alloc); + }); } void StagingBuffer::BufferMemoryBarrier(VkCommandBuffer command_buffer, VkBuffer buffer, diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/StateTracker.cpp dolphin/Source/Core/VideoBackends/Vulkan/StateTracker.cpp --- dolphin.orig/Source/Core/VideoBackends/Vulkan/StateTracker.cpp 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/StateTracker.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -16,59 +16,93 @@ namespace Vulkan { -static std::unique_ptr s_state_tracker; - -StateTracker::StateTracker() = default; - -StateTracker::~StateTracker() = default; +StateTracker::StateTracker(CommandBufferManager* command_buffer_mgr) + : m_command_buffer_mgr(command_buffer_mgr) +{ +} -StateTracker* StateTracker::GetInstance() +StateTracker::~StateTracker() { - return s_state_tracker.get(); + vkDestroyImageView(g_vulkan_context->GetDevice(), m_dummy_view, nullptr); + vmaDestroyImage(g_vulkan_context->GetMemoryAllocator(), m_dummy_image, m_dummy_alloc); } -bool StateTracker::CreateInstance() +bool StateTracker::Initialize() { - ASSERT(!s_state_tracker); - s_state_tracker = std::make_unique(); - if (!s_state_tracker->Initialize()) + // Create a dummy texture which can be used in place of a real binding. + VkImageCreateInfo dummy_info; + dummy_info.pNext = nullptr; + dummy_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + dummy_info.extent = {1, 1, 1}; + dummy_info.arrayLayers = 1; + dummy_info.mipLevels = 1; + dummy_info.flags = 0; + dummy_info.imageType = VK_IMAGE_TYPE_2D; + dummy_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + dummy_info.samples = VK_SAMPLE_COUNT_1_BIT; + dummy_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + dummy_info.format = VK_FORMAT_R8G8B8A8_UNORM; + dummy_info.tiling = VK_IMAGE_TILING_OPTIMAL; + dummy_info.queueFamilyIndexCount = 0; + dummy_info.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT; + dummy_info.pQueueFamilyIndices = nullptr; + + VmaAllocationCreateInfo alloc_create_info = {}; + alloc_create_info.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT; + alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + alloc_create_info.pool = VK_NULL_HANDLE; + alloc_create_info.pUserData = nullptr; + alloc_create_info.priority = 0.0; + alloc_create_info.requiredFlags = 0; + alloc_create_info.preferredFlags = 0; + + VkResult res = vmaCreateImage(g_vulkan_context->GetMemoryAllocator(), &dummy_info, + &alloc_create_info, &m_dummy_image, &m_dummy_alloc, nullptr); + if (res != VK_SUCCESS) { - s_state_tracker.reset(); + LOG_VULKAN_ERROR(res, "vmaCreateImage failed: "); return false; } - return true; -} -void StateTracker::DestroyInstance() -{ - if (!s_state_tracker) - return; + VkImageViewCreateInfo dummy_view_info; + dummy_view_info.pNext = nullptr; + dummy_view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + dummy_view_info.flags = 0; + dummy_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + dummy_view_info.format = VK_FORMAT_R8G8B8A8_UNORM; + dummy_view_info.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; + dummy_view_info.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + dummy_view_info.image = m_dummy_image; - // When the dummy texture is destroyed, it unbinds itself, then references itself. - // Clear everything out so this doesn't happen. - for (auto& it : s_state_tracker->m_bindings.samplers) - it.imageView = VK_NULL_HANDLE; - s_state_tracker->m_bindings.image_texture.imageView = VK_NULL_HANDLE; - s_state_tracker->m_dummy_texture.reset(); - - s_state_tracker.reset(); -} - -bool StateTracker::Initialize() -{ - // Create a dummy texture which can be used in place of a real binding. - m_dummy_texture = - VKTexture::Create(TextureConfig(1, 1, 1, 1, 1, AbstractTextureFormat::RGBA8, 0), ""); - if (!m_dummy_texture) + res = vkCreateImageView(g_vulkan_context->GetDevice(), &dummy_view_info, nullptr, &m_dummy_view); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateImageView failed: "); return false; - m_dummy_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + + VkImageMemoryBarrier img_barrier; + img_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + img_barrier.pNext = nullptr; + img_barrier.srcAccessMask = 0; + img_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + img_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + img_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + img_barrier.image = m_dummy_image; + img_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + img_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + img_barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + + vkCmdPipelineBarrier(m_command_buffer_mgr->GetCurrentInitCommandBuffer(), + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, + nullptr, 0, nullptr, 1, &img_barrier); // Initialize all samplers to point by default for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++) { m_bindings.samplers[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - m_bindings.samplers[i].imageView = m_dummy_texture->GetView(); + m_bindings.samplers[i].imageView = m_dummy_view; m_bindings.samplers[i].sampler = g_object_cache->GetPointSampler(); } @@ -107,11 +141,17 @@ void StateTracker::SetIndexBuffer(VkBuff m_dirty_flags |= DIRTY_FLAG_INDEX_BUFFER; } -void StateTracker::SetFramebuffer(VKFramebuffer* framebuffer) +void StateTracker::SetFramebuffer(VkFramebuffer framebuffer, VkRect2D render_area, + VkRenderPass load_render_pass, VkRenderPass clear_render_pass, + VkRenderPass discard_render_pass) { // Should not be changed within a render pass. ASSERT(!InRenderPass()); m_framebuffer = framebuffer; + m_framebuffer_render_area = render_area; + m_framebuffer_load_render_pass = load_render_pass; + m_framebuffer_clear_render_pass = clear_render_pass; + m_framebuffer_discard_render_pass = discard_render_pass; } void StateTracker::SetPipeline(const VKPipeline* pipeline) @@ -232,14 +272,14 @@ void StateTracker::UnbindTexture(VkImage { if (it.imageView == view) { - it.imageView = m_dummy_texture->GetView(); + it.imageView = m_dummy_view; it.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } } if (m_bindings.image_texture.imageView == view) { - m_bindings.image_texture.imageView = m_dummy_texture->GetView(); + m_bindings.image_texture.imageView = m_dummy_view; m_bindings.image_texture.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } } @@ -263,18 +303,18 @@ void StateTracker::BeginRenderPass() if (InRenderPass()) return; - m_current_render_pass = m_framebuffer->GetLoadRenderPass(); - m_framebuffer_render_area = m_framebuffer->GetRect(); + m_current_render_pass = m_framebuffer_load_render_pass; + m_render_area = m_framebuffer_render_area; VkRenderPassBeginInfo begin_info = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, nullptr, m_current_render_pass, - m_framebuffer->GetFB(), - m_framebuffer_render_area, + m_framebuffer, + m_render_area, 0, nullptr}; - vkCmdBeginRenderPass(g_command_buffer_mgr->GetCurrentCommandBuffer(), &begin_info, + vkCmdBeginRenderPass(m_command_buffer_mgr->GetCurrentCommandBuffer(), &begin_info, VK_SUBPASS_CONTENTS_INLINE); } @@ -283,18 +323,18 @@ void StateTracker::BeginDiscardRenderPas if (InRenderPass()) return; - m_current_render_pass = m_framebuffer->GetDiscardRenderPass(); - m_framebuffer_render_area = m_framebuffer->GetRect(); + m_current_render_pass = m_framebuffer_discard_render_pass; + m_render_area = m_framebuffer_render_area; VkRenderPassBeginInfo begin_info = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, nullptr, m_current_render_pass, - m_framebuffer->GetFB(), - m_framebuffer_render_area, + m_framebuffer, + m_render_area, 0, nullptr}; - vkCmdBeginRenderPass(g_command_buffer_mgr->GetCurrentCommandBuffer(), &begin_info, + vkCmdBeginRenderPass(m_command_buffer_mgr->GetCurrentCommandBuffer(), &begin_info, VK_SUBPASS_CONTENTS_INLINE); } @@ -303,7 +343,7 @@ void StateTracker::EndRenderPass() if (!InRenderPass()) return; - vkCmdEndRenderPass(g_command_buffer_mgr->GetCurrentCommandBuffer()); + vkCmdEndRenderPass(m_command_buffer_mgr->GetCurrentCommandBuffer()); m_current_render_pass = VK_NULL_HANDLE; } @@ -312,18 +352,18 @@ void StateTracker::BeginClearRenderPass( { ASSERT(!InRenderPass()); - m_current_render_pass = m_framebuffer->GetClearRenderPass(); - m_framebuffer_render_area = area; + m_current_render_pass = m_framebuffer_clear_render_pass; + m_render_area = area; VkRenderPassBeginInfo begin_info = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, nullptr, m_current_render_pass, - m_framebuffer->GetFB(), - m_framebuffer_render_area, + m_framebuffer, + m_render_area, num_clear_values, clear_values}; - vkCmdBeginRenderPass(g_command_buffer_mgr->GetCurrentCommandBuffer(), &begin_info, + vkCmdBeginRenderPass(m_command_buffer_mgr->GetCurrentCommandBuffer(), &begin_info, VK_SUBPASS_CONTENTS_INLINE); } @@ -352,7 +392,7 @@ bool StateTracker::Bind() return false; // Check the render area if we were in a clear pass. - if (m_current_render_pass == m_framebuffer->GetClearRenderPass() && !IsViewportWithinRenderArea()) + if (m_current_render_pass == m_framebuffer_clear_render_pass && !IsViewportWithinRenderArea()) EndRenderPass(); // Get a new descriptor set if any parts have changed @@ -363,7 +403,7 @@ bool StateTracker::Bind() BeginRenderPass(); // Re-bind parts of the pipeline - const VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); + const VkCommandBuffer command_buffer = m_command_buffer_mgr->GetCurrentCommandBuffer(); const bool needs_vertex_buffer = !g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader || m_pipeline->GetUsage() != AbstractPipelineUsage::GXUber; if (needs_vertex_buffer && (m_dirty_flags & DIRTY_FLAG_VERTEX_BUFFER)) @@ -398,7 +438,7 @@ bool StateTracker::BindCompute() if (InRenderPass()) EndRenderPass(); - const VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); + const VkCommandBuffer command_buffer = m_command_buffer_mgr->GetCurrentCommandBuffer(); if (m_dirty_flags & DIRTY_FLAG_COMPUTE_SHADER) { vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, @@ -414,10 +454,10 @@ bool StateTracker::IsWithinRenderArea(s3 { // Check that the viewport does not lie outside the render area. // If it does, we need to switch to a normal load/store render pass. - s32 left = m_framebuffer_render_area.offset.x; - s32 top = m_framebuffer_render_area.offset.y; - s32 right = left + static_cast(m_framebuffer_render_area.extent.width); - s32 bottom = top + static_cast(m_framebuffer_render_area.extent.height); + s32 left = m_render_area.offset.x; + s32 top = m_render_area.offset.y; + s32 right = left + static_cast(m_render_area.extent.width); + s32 bottom = top + static_cast(m_render_area.extent.height); s32 test_left = x; s32 test_top = y; s32 test_right = test_left + static_cast(width); @@ -434,7 +474,7 @@ bool StateTracker::IsViewportWithinRende void StateTracker::EndClearRenderPass() { - if (m_current_render_pass != m_framebuffer->GetClearRenderPass()) + if (m_current_render_pass != m_framebuffer_clear_render_pass) return; // End clear render pass. Bind() will call BeginRenderPass() which @@ -463,7 +503,7 @@ void StateTracker::UpdateGXDescriptorSet if (m_dirty_flags & DIRTY_FLAG_GX_UBOS || m_gx_descriptor_sets[0] == VK_NULL_HANDLE) { - m_gx_descriptor_sets[0] = g_command_buffer_mgr->AllocateDescriptorSet( + m_gx_descriptor_sets[0] = m_command_buffer_mgr->AllocateDescriptorSet( g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_STANDARD_UNIFORM_BUFFERS)); for (size_t i = 0; i < NUM_UBO_DESCRIPTOR_SET_BINDINGS; i++) @@ -490,7 +530,7 @@ void StateTracker::UpdateGXDescriptorSet if (m_dirty_flags & DIRTY_FLAG_GX_SAMPLERS || m_gx_descriptor_sets[1] == VK_NULL_HANDLE) { - m_gx_descriptor_sets[1] = g_command_buffer_mgr->AllocateDescriptorSet( + m_gx_descriptor_sets[1] = m_command_buffer_mgr->AllocateDescriptorSet( g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_STANDARD_SAMPLERS)); writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, @@ -516,7 +556,7 @@ void StateTracker::UpdateGXDescriptorSet (m_dirty_flags & DIRTY_FLAG_GX_SSBO || m_gx_descriptor_sets[2] == VK_NULL_HANDLE)) { m_gx_descriptor_sets[2] = - g_command_buffer_mgr->AllocateDescriptorSet(g_object_cache->GetDescriptorSetLayout( + m_command_buffer_mgr->AllocateDescriptorSet(g_object_cache->GetDescriptorSetLayout( DESCRIPTOR_SET_LAYOUT_STANDARD_SHADER_STORAGE_BUFFERS)); writes[num_writes++] = { @@ -546,7 +586,7 @@ void StateTracker::UpdateGXDescriptorSet if (m_dirty_flags & DIRTY_FLAG_DESCRIPTOR_SETS) { - vkCmdBindDescriptorSets(g_command_buffer_mgr->GetCurrentCommandBuffer(), + vkCmdBindDescriptorSets(m_command_buffer_mgr->GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipelineLayout(), 0, needs_ssbo ? NUM_GX_DESCRIPTOR_SETS : (NUM_GX_DESCRIPTOR_SETS - 1), m_gx_descriptor_sets.data(), @@ -558,7 +598,7 @@ void StateTracker::UpdateGXDescriptorSet else if (m_dirty_flags & DIRTY_FLAG_GX_UBO_OFFSETS) { vkCmdBindDescriptorSets( - g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, + m_command_buffer_mgr->GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipelineLayout(), 0, 1, m_gx_descriptor_sets.data(), needs_gs_ubo ? NUM_UBO_DESCRIPTOR_SET_BINDINGS : (NUM_UBO_DESCRIPTOR_SET_BINDINGS - 1), m_bindings.gx_ubo_offsets.data()); @@ -575,7 +615,7 @@ void StateTracker::UpdateUtilityDescript // Allocate descriptor sets. if (m_dirty_flags & DIRTY_FLAG_UTILITY_UBO || m_utility_descriptor_sets[0] == VK_NULL_HANDLE) { - m_utility_descriptor_sets[0] = g_command_buffer_mgr->AllocateDescriptorSet( + m_utility_descriptor_sets[0] = m_command_buffer_mgr->AllocateDescriptorSet( g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_UTILITY_UNIFORM_BUFFER)); dswrites[writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, @@ -594,7 +634,7 @@ void StateTracker::UpdateUtilityDescript if (m_dirty_flags & DIRTY_FLAG_UTILITY_BINDINGS || m_utility_descriptor_sets[1] == VK_NULL_HANDLE) { - m_utility_descriptor_sets[1] = g_command_buffer_mgr->AllocateDescriptorSet( + m_utility_descriptor_sets[1] = m_command_buffer_mgr->AllocateDescriptorSet( g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_UTILITY_SAMPLERS)); dswrites[writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, @@ -626,7 +666,7 @@ void StateTracker::UpdateUtilityDescript if (m_dirty_flags & DIRTY_FLAG_DESCRIPTOR_SETS) { - vkCmdBindDescriptorSets(g_command_buffer_mgr->GetCurrentCommandBuffer(), + vkCmdBindDescriptorSets(m_command_buffer_mgr->GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipelineLayout(), 0, NUM_UTILITY_DESCRIPTOR_SETS, m_utility_descriptor_sets.data(), 1, &m_bindings.utility_ubo_offset); @@ -634,7 +674,7 @@ void StateTracker::UpdateUtilityDescript } else if (m_dirty_flags & DIRTY_FLAG_UTILITY_UBO_OFFSET) { - vkCmdBindDescriptorSets(g_command_buffer_mgr->GetCurrentCommandBuffer(), + vkCmdBindDescriptorSets(m_command_buffer_mgr->GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipelineLayout(), 0, 1, m_utility_descriptor_sets.data(), 1, &m_bindings.utility_ubo_offset); m_dirty_flags &= ~(DIRTY_FLAG_DESCRIPTOR_SETS | DIRTY_FLAG_UTILITY_UBO_OFFSET); @@ -649,7 +689,7 @@ void StateTracker::UpdateComputeDescript // Allocate descriptor sets. if (m_dirty_flags & DIRTY_FLAG_COMPUTE_BINDINGS) { - m_compute_descriptor_set = g_command_buffer_mgr->AllocateDescriptorSet( + m_compute_descriptor_set = m_command_buffer_mgr->AllocateDescriptorSet( g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_COMPUTE)); dswrites[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, @@ -700,7 +740,7 @@ void StateTracker::UpdateComputeDescript if (m_dirty_flags & DIRTY_FLAG_COMPUTE_DESCRIPTOR_SET) { - vkCmdBindDescriptorSets(g_command_buffer_mgr->GetCurrentCommandBuffer(), + vkCmdBindDescriptorSets(m_command_buffer_mgr->GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_COMPUTE, g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_COMPUTE), 0, 1, &m_compute_descriptor_set, 1, &m_bindings.utility_ubo_offset); diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/StateTracker.h dolphin/Source/Core/VideoBackends/Vulkan/StateTracker.h --- dolphin.orig/Source/Core/VideoBackends/Vulkan/StateTracker.h 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/StateTracker.h 2023-09-26 18:02:22.836042087 +0000 @@ -13,28 +13,24 @@ namespace Vulkan { -class VKFramebuffer; class VKShader; class VKPipeline; -class VKTexture; -class StreamBuffer; -class VertexFormat; +class CommandBufferManager; class StateTracker { public: - StateTracker(); + StateTracker(CommandBufferManager* command_buffer_mgr); ~StateTracker(); - static StateTracker* GetInstance(); - static bool CreateInstance(); - static void DestroyInstance(); + bool Initialize(); - VKFramebuffer* GetFramebuffer() const { return m_framebuffer; } const VKPipeline* GetPipeline() const { return m_pipeline; } void SetVertexBuffer(VkBuffer buffer, VkDeviceSize offset, u32 size); void SetIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType type); - void SetFramebuffer(VKFramebuffer* framebuffer); + void SetFramebuffer(VkFramebuffer framebuffer, VkRect2D render_area, + VkRenderPass load_render_pass, VkRenderPass clear_render_pass, + VkRenderPass discard_render_pass); void SetPipeline(const VKPipeline* pipeline); void SetComputeShader(const VKShader* shader); void SetGXUniformBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size); @@ -110,8 +106,6 @@ private: DIRTY_FLAG_UTILITY_BINDINGS | DIRTY_FLAG_COMPUTE_BINDINGS }; - bool Initialize(); - // Check that the specified viewport is within the render area. // If not, ends the render pass if it is a clear render pass. bool IsViewportWithinRenderArea() const; @@ -121,6 +115,8 @@ private: void UpdateUtilityDescriptorSet(); void UpdateComputeDescriptorSet(); + CommandBufferManager* m_command_buffer_mgr; + // Which bindings/state has to be updated before the next draw. u32 m_dirty_flags = 0; @@ -157,10 +153,17 @@ private: VkRect2D m_scissor = {{0, 0}, {1, 1}}; // uniform buffers - std::unique_ptr m_dummy_texture; + VkImage m_dummy_image; + VkImageView m_dummy_view; + VmaAllocation m_dummy_alloc; - VKFramebuffer* m_framebuffer = nullptr; - VkRenderPass m_current_render_pass = VK_NULL_HANDLE; + VkFramebuffer m_framebuffer = VK_NULL_HANDLE; VkRect2D m_framebuffer_render_area = {}; + VkRenderPass m_framebuffer_load_render_pass = VK_NULL_HANDLE; + VkRenderPass m_framebuffer_discard_render_pass = VK_NULL_HANDLE; + VkRenderPass m_framebuffer_clear_render_pass = VK_NULL_HANDLE; + + VkRenderPass m_current_render_pass = VK_NULL_HANDLE; + VkRect2D m_render_area = {}; }; } // namespace Vulkan diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/VKBoundingBox.cpp dolphin/Source/Core/VideoBackends/Vulkan/VKBoundingBox.cpp --- dolphin.orig/Source/Core/VideoBackends/Vulkan/VKBoundingBox.cpp 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/VKBoundingBox.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -12,6 +12,7 @@ #include "VideoBackends/Vulkan/StagingBuffer.h" #include "VideoBackends/Vulkan/StateTracker.h" #include "VideoBackends/Vulkan/VKRenderer.h" +#include "VideoBackends/Vulkan/VKScheduler.h" #include "VideoBackends/Vulkan/VulkanContext.h" namespace Vulkan @@ -33,36 +34,43 @@ bool VKBoundingBox::Initialize() return false; // Bind bounding box to state tracker - StateTracker::GetInstance()->SetSSBO(m_gpu_buffer, 0, BUFFER_SIZE); + g_scheduler->Record([c_gpu_buffer = m_gpu_buffer](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->SetSSBO(c_gpu_buffer, 0, BUFFER_SIZE); + }); return true; } std::vector VKBoundingBox::Read(u32 index, u32 length) { - // Can't be done within a render pass. - StateTracker::GetInstance()->EndRenderPass(); - - // Ensure all writes are completed to the GPU buffer prior to the transfer. - StagingBuffer::BufferMemoryBarrier( - g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, 0, - BUFFER_SIZE, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); - m_readback_buffer->PrepareForGPUWrite(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_ACCESS_TRANSFER_WRITE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT); - - // Copy from GPU -> readback buffer. - VkBufferCopy region = {0, 0, BUFFER_SIZE}; - vkCmdCopyBuffer(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, - m_readback_buffer->GetBuffer(), 1, ®ion); - - // Restore GPU buffer access. - StagingBuffer::BufferMemoryBarrier( - g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_READ_BIT, - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); - m_readback_buffer->FlushGPUCache(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + std::vector values(length); + // We can just take a reference here, we'll sync immediately afterwards + g_scheduler->Record([&](CommandBufferManager* command_buffer_mgr) { + // Can't be done within a render pass. + command_buffer_mgr->GetStateTracker()->EndRenderPass(); + + // Ensure all writes are completed to the GPU buffer prior to the transfer. + StagingBuffer::BufferMemoryBarrier( + command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, 0, + BUFFER_SIZE, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + + m_readback_buffer->PrepareForGPUWrite(command_buffer_mgr->GetCurrentCommandBuffer(), + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT); + + // Copy from GPU -> readback buffer. + VkBufferCopy region = {0, 0, BUFFER_SIZE}; + vkCmdCopyBuffer(command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, + m_readback_buffer->GetBuffer(), 1, ®ion); + + // Restore GPU buffer access. + StagingBuffer::BufferMemoryBarrier( + command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_READ_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + m_readback_buffer->FlushGPUCache(command_buffer_mgr->GetCurrentCommandBuffer(), + VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + }); // Wait until these commands complete. Renderer::GetInstance()->ExecuteCommandBuffer(false, true); @@ -71,34 +79,37 @@ std::vector VKBoundingBox::Rea m_readback_buffer->InvalidateCPUCache(); // Read out the values and return - std::vector values(length); m_readback_buffer->Read(index * sizeof(BBoxType), values.data(), length * sizeof(BBoxType), false); + return values; } void VKBoundingBox::Write(u32 index, const std::vector& values) { - // We can't issue vkCmdUpdateBuffer within a render pass. - // However, the writes must be serialized, so we can't put it in the init buffer. - StateTracker::GetInstance()->EndRenderPass(); - - // Ensure GPU buffer is in a state where it can be transferred to. - StagingBuffer::BufferMemoryBarrier( - g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, 0, - BUFFER_SIZE, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); - - // Write the values to the GPU buffer - vkCmdUpdateBuffer(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, - index * sizeof(BBoxType), values.size() * sizeof(BBoxType), - reinterpret_cast(values.data())); - - // Restore fragment shader access to the buffer. - StagingBuffer::BufferMemoryBarrier( - g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + g_scheduler->Record([c_gpu_buffer = m_gpu_buffer, c_values = values, + c_index = index](CommandBufferManager* command_buffer_mgr) { + // We can't issue vkCmdUpdateBuffer within a render pass. + // However, the writes must be serialized, so we can't put it in the init buffer. + command_buffer_mgr->GetStateTracker()->EndRenderPass(); + + // Ensure GPU buffer is in a state where it can be transferred to. + StagingBuffer::BufferMemoryBarrier( + command_buffer_mgr->GetCurrentCommandBuffer(), c_gpu_buffer, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, 0, + BUFFER_SIZE, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + + // Write the values to the GPU buffer + vkCmdUpdateBuffer(command_buffer_mgr->GetCurrentCommandBuffer(), c_gpu_buffer, + c_index * sizeof(BBoxType), c_values.size() * sizeof(BBoxType), + reinterpret_cast(c_values.data())); + + // Restore fragment shader access to the buffer. + StagingBuffer::BufferMemoryBarrier( + command_buffer_mgr->GetCurrentCommandBuffer(), c_gpu_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + }); } bool VKBoundingBox::CreateGPUBuffer() diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/VKMain.cpp dolphin/Source/Core/VideoBackends/Vulkan/VKMain.cpp --- dolphin.orig/Source/Core/VideoBackends/Vulkan/VKMain.cpp 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/VKMain.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -18,6 +18,7 @@ #include "VideoBackends/Vulkan/VKVertexManager.h" #include "VideoBackends/Vulkan/VulkanContext.h" +#include "VKScheduler.h" #include "VideoCommon/FramebufferManager.h" #include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VideoBackendBase.h" @@ -196,14 +197,7 @@ bool VideoBackend::Initialize(const Wind // With the backend information populated, we can now initialize videocommon. InitializeShared(); - // Create command buffers. We do this separately because the other classes depend on it. - g_command_buffer_mgr = std::make_unique(g_Config.bBackendMultithreading); - if (!g_command_buffer_mgr->Initialize()) - { - PanicAlertFmt("Failed to create Vulkan command buffers"); - Shutdown(); - return false; - } + g_scheduler = std::make_unique(); // Remaining classes are also dependent on object cache. g_object_cache = std::make_unique(); @@ -214,6 +208,14 @@ bool VideoBackend::Initialize(const Wind return false; } + // Has to be initialized after the object cache + if (!g_scheduler->Initialize()) + { + PanicAlertFmt("Failed to initialize Vulkan scheduler."); + Shutdown(); + return false; + } + // Create swap chain. This has to be done early so that the target size is correct for auto-scale. std::unique_ptr swap_chain; if (surface != VK_NULL_HANDLE) @@ -227,13 +229,6 @@ bool VideoBackend::Initialize(const Wind } } - if (!StateTracker::CreateInstance()) - { - PanicAlertFmt("Failed to create state tracker"); - Shutdown(); - return false; - } - // Create main wrapper instances. g_renderer = std::make_unique(std::move(swap_chain), wsi.render_surface_scale); g_vertex_manager = std::make_unique(); @@ -257,6 +252,9 @@ bool VideoBackend::Initialize(const Wind void VideoBackend::Shutdown() { + if (g_scheduler) + g_scheduler->SyncWorker(); + if (g_vulkan_context) vkDeviceWaitIdle(g_vulkan_context->GetDevice()); @@ -269,6 +267,9 @@ void VideoBackend::Shutdown() if (g_renderer) g_renderer->Shutdown(); + if (g_scheduler) + g_scheduler->Shutdown(); + g_perf_query.reset(); g_texture_cache.reset(); g_framebuffer_manager.reset(); @@ -276,8 +277,7 @@ void VideoBackend::Shutdown() g_vertex_manager.reset(); g_renderer.reset(); g_object_cache.reset(); - StateTracker::DestroyInstance(); - g_command_buffer_mgr.reset(); + g_scheduler.reset(); g_vulkan_context.reset(); ShutdownShared(); UnloadVulkanLibrary(); diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/VKPerfQuery.cpp dolphin/Source/Core/VideoBackends/Vulkan/VKPerfQuery.cpp --- dolphin.orig/Source/Core/VideoBackends/Vulkan/VKPerfQuery.cpp 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/VKPerfQuery.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -11,6 +11,7 @@ #include "Common/Logging/Log.h" #include "Common/MsgHandler.h" +#include "VKScheduler.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" #include "VideoBackends/Vulkan/StateTracker.h" #include "VideoBackends/Vulkan/VKRenderer.h" @@ -49,9 +50,11 @@ void PerfQuery::EnableQuery(PerfQueryGro if (query_count > m_query_buffer.size() / 2) PartialFlush(query_count == PERF_QUERY_BUFFER_SIZE); - // Ensure command buffer is ready to go before beginning the query, that way we don't submit - // a buffer with open queries. - StateTracker::GetInstance()->Bind(); + g_scheduler->Record([](CommandBufferManager* command_buffer_mgr) { + // Ensure command buffer is ready to go before beginning the query, that way we don't submit + // a buffer with open queries. + command_buffer_mgr->GetStateTracker()->Bind(); + }); if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP) { @@ -59,15 +62,16 @@ void PerfQuery::EnableQuery(PerfQueryGro DEBUG_ASSERT(!entry.has_value); entry.has_value = true; entry.query_group = group; - - // Use precise queries if supported, otherwise boolean (which will be incorrect). - VkQueryControlFlags flags = - g_vulkan_context->SupportsPreciseOcclusionQueries() ? VK_QUERY_CONTROL_PRECISE_BIT : 0; - - // Ensure the query starts within a render pass. - StateTracker::GetInstance()->BeginRenderPass(); - vkCmdBeginQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, m_query_next_pos, - flags); + g_scheduler->Record([c_query_pool = m_query_pool, + c_pos = m_query_next_pos](CommandBufferManager* command_buffer_mgr) { + // Use precise queries if supported, otherwise boolean (which will be incorrect). + VkQueryControlFlags flags = + g_vulkan_context->SupportsPreciseOcclusionQueries() ? VK_QUERY_CONTROL_PRECISE_BIT : 0; + + // Ensure the query starts within a render pass. + command_buffer_mgr->GetStateTracker()->BeginRenderPass(); + vkCmdBeginQuery(command_buffer_mgr->GetCurrentCommandBuffer(), c_query_pool, c_pos, flags); + }); } } @@ -75,10 +79,13 @@ void PerfQuery::DisableQuery(PerfQueryGr { if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP) { - vkCmdEndQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, m_query_next_pos); - ActiveQuery& entry = m_query_buffer[m_query_next_pos]; - entry.fence_counter = g_command_buffer_mgr->GetCurrentFenceCounter(); + g_scheduler->Record([c_query_pool = m_query_pool, c_pos = m_query_next_pos + ](CommandBufferManager* command_buffer_mgr) { + vkCmdEndQuery(command_buffer_mgr->GetCurrentCommandBuffer(), c_query_pool, c_pos); + }); + ActiveQuery& entry = m_query_buffer[m_query_next_pos]; + entry.fence_counter = g_scheduler->GetCurrentFenceCounter(); m_query_next_pos = (m_query_next_pos + 1) % PERF_QUERY_BUFFER_SIZE; m_query_count.fetch_add(1, std::memory_order_relaxed); } @@ -93,10 +100,11 @@ void PerfQuery::ResetQuery() m_results[i].store(0, std::memory_order_relaxed); // Reset entire query pool, ensuring all queries are ready to write to. - StateTracker::GetInstance()->EndRenderPass(); - vkCmdResetQueryPool(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, 0, - PERF_QUERY_BUFFER_SIZE); - + g_scheduler->Record([c_query_pool = m_query_pool](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->EndRenderPass(); + vkCmdResetQueryPool(command_buffer_mgr->GetCurrentCommandBuffer(), c_query_pool, 0, + PERF_QUERY_BUFFER_SIZE); + }); std::memset(m_query_buffer.data(), 0, sizeof(ActiveQuery) * m_query_buffer.size()); } @@ -160,7 +168,7 @@ bool PerfQuery::CreateQueryPool() void PerfQuery::ReadbackQueries() { - const u64 completed_fence_counter = g_command_buffer_mgr->GetCompletedFenceCounter(); + const u64 completed_fence_counter = g_scheduler->GetCompletedFenceCounter(); // Need to save these since ProcessResults will modify them. const u32 outstanding_queries = m_query_count.load(std::memory_order_relaxed); @@ -201,9 +209,12 @@ void PerfQuery::ReadbackQueries(u32 quer if (res != VK_SUCCESS) LOG_VULKAN_ERROR(res, "vkGetQueryPoolResults failed: "); - StateTracker::GetInstance()->EndRenderPass(); - vkCmdResetQueryPool(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, - m_query_readback_pos, query_count); + g_scheduler->Record([c_query_pool = m_query_pool, c_query_readback_pos = m_query_readback_pos, + query_count](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->EndRenderPass(); + vkCmdResetQueryPool(command_buffer_mgr->GetCurrentCommandBuffer(), c_query_pool, + c_query_readback_pos, query_count); + }); // Remove pending queries. for (u32 i = 0; i < query_count; i++) @@ -231,8 +242,8 @@ void PerfQuery::ReadbackQueries(u32 quer void PerfQuery::PartialFlush(bool blocking) { // Submit a command buffer in the background if the front query is not bound to one. - if (blocking || m_query_buffer[m_query_readback_pos].fence_counter == - g_command_buffer_mgr->GetCurrentFenceCounter()) + if (blocking || + m_query_buffer[m_query_readback_pos].fence_counter == g_scheduler->GetCurrentFenceCounter()) { Renderer::GetInstance()->ExecuteCommandBuffer(true, blocking); } diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp dolphin/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp --- dolphin.orig/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/VKRenderer.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -31,6 +31,7 @@ #include "VideoBackends/Vulkan/VKVertexFormat.h" #include "VideoBackends/Vulkan/VulkanContext.h" +#include "VKScheduler.h" #include "VideoCommon/DriverDetails.h" #include "VideoCommon/FramebufferManager.h" #include "VideoCommon/RenderState.h" @@ -124,7 +125,10 @@ std::unique_ptr Ren void Renderer::SetPipeline(const AbstractPipeline* pipeline) { - StateTracker::GetInstance()->SetPipeline(static_cast(pipeline)); + g_scheduler->Record([c_pipeline = static_cast(pipeline)]( + CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->SetPipeline(c_pipeline); + }); } std::unique_ptr Renderer::CreateBoundingBox() const @@ -138,17 +142,6 @@ void Renderer::ClearScreen(const MathUti g_framebuffer_manager->FlushEFBPokes(); g_framebuffer_manager->FlagPeekCacheAsOutOfDate(); - // Native -> EFB coordinates - MathUtil::Rectangle target_rc = Renderer::ConvertEFBRectangle(rc); - - // Size we pass this size to vkBeginRenderPass, it has to be clamped to the framebuffer - // dimensions. The other backends just silently ignore this case. - target_rc.ClampUL(0, 0, m_target_width, m_target_height); - - VkRect2D target_vk_rc = { - {target_rc.left, target_rc.top}, - {static_cast(target_rc.GetWidth()), static_cast(target_rc.GetHeight())}}; - // Determine whether the EFB has an alpha channel. If it doesn't, we can clear the alpha // channel to 0xFF. This hopefully allows us to use the fast path in most cases. if (bpmem.zcontrol.pixel_format == PixelFormat::RGB565_Z16 || @@ -161,84 +154,98 @@ void Renderer::ClearScreen(const MathUti color &= 0x00FFFFFF; } - // Convert RGBA8 -> floating-point values. - VkClearValue clear_color_value = {}; - VkClearValue clear_depth_value = {}; - clear_color_value.color.float32[0] = static_cast((color >> 16) & 0xFF) / 255.0f; - clear_color_value.color.float32[1] = static_cast((color >> 8) & 0xFF) / 255.0f; - clear_color_value.color.float32[2] = static_cast((color >> 0) & 0xFF) / 255.0f; - clear_color_value.color.float32[3] = static_cast((color >> 24) & 0xFF) / 255.0f; - clear_depth_value.depthStencil.depth = static_cast(z & 0xFFFFFF) / 16777216.0f; - if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) - clear_depth_value.depthStencil.depth = 1.0f - clear_depth_value.depthStencil.depth; - - // If we're not in a render pass (start of the frame), we can use a clear render pass - // to discard the data, rather than loading and then clearing. - bool use_clear_attachments = (color_enable && alpha_enable) || z_enable; - bool use_clear_render_pass = - !StateTracker::GetInstance()->InRenderPass() && color_enable && alpha_enable && z_enable; - - // The NVIDIA Vulkan driver causes the GPU to lock up, or throw exceptions if MSAA is enabled, - // a non-full clear rect is specified, and a clear loadop or vkCmdClearAttachments is used. - if (g_ActiveConfig.iMultisamples > 1 && - DriverDetails::HasBug(DriverDetails::BUG_BROKEN_MSAA_CLEAR)) - { - use_clear_render_pass = false; - use_clear_attachments = false; - } - - // This path cannot be used if the driver implementation doesn't guarantee pixels with no drawn - // geometry in "this" renderpass won't be cleared - if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_CLEAR_LOADOP_RENDERPASS)) - use_clear_render_pass = false; - - // Fastest path: Use a render pass to clear the buffers. - if (use_clear_render_pass) - { - const std::array clear_values = {{clear_color_value, clear_depth_value}}; - StateTracker::GetInstance()->BeginClearRenderPass(target_vk_rc, clear_values.data(), - static_cast(clear_values.size())); - return; - } + // Native -> EFB coordinates + MathUtil::Rectangle target_rc = Renderer::ConvertEFBRectangle(rc); + + // Size we pass this size to vkBeginRenderPass, it has to be clamped to the framebuffer + // dimensions. The other backends just silently ignore this case. + target_rc.ClampUL(0, 0, m_target_width, m_target_height); + + g_scheduler->Record([color, z, color_enable, alpha_enable, z_enable, + target_rc](CommandBufferManager* command_buffer_mgr) mutable { + VkRect2D target_vk_rc = {{target_rc.left, target_rc.top}, + {static_cast(target_rc.GetWidth()), + static_cast(target_rc.GetHeight())}}; + + // Convert RGBA8 -> floating-point values. + VkClearValue clear_color_value = {}; + VkClearValue clear_depth_value = {}; + clear_color_value.color.float32[0] = static_cast((color >> 16) & 0xFF) / 255.0f; + clear_color_value.color.float32[1] = static_cast((color >> 8) & 0xFF) / 255.0f; + clear_color_value.color.float32[2] = static_cast((color >> 0) & 0xFF) / 255.0f; + clear_color_value.color.float32[3] = static_cast((color >> 24) & 0xFF) / 255.0f; + clear_depth_value.depthStencil.depth = static_cast(z & 0xFFFFFF) / 16777216.0f; + if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) + clear_depth_value.depthStencil.depth = 1.0f - clear_depth_value.depthStencil.depth; + + // If we're not in a render pass (start of the frame), we can use a clear render pass + // to discard the data, rather than loading and then clearing. + bool use_clear_attachments = (color_enable && alpha_enable) || z_enable; + bool use_clear_render_pass = !command_buffer_mgr->GetStateTracker()->InRenderPass() && + color_enable && alpha_enable && z_enable; + + // The NVIDIA Vulkan driver causes the GPU to lock up, or throw exceptions if MSAA is enabled, + // a non-full clear rect is specified, and a clear loadop or vkCmdClearAttachments is used. + if (g_ActiveConfig.iMultisamples > 1 && + DriverDetails::HasBug(DriverDetails::BUG_BROKEN_MSAA_CLEAR)) + { + use_clear_render_pass = false; + use_clear_attachments = false; + } + + // This path cannot be used if the driver implementation doesn't guarantee pixels with no drawn + // geometry in "this" renderpass won't be cleared + if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_CLEAR_LOADOP_RENDERPASS)) + use_clear_render_pass = false; - // Fast path: Use vkCmdClearAttachments to clear the buffers within a render path - // We can't use this when preserving alpha but clearing color. - if (use_clear_attachments) - { - VkClearAttachment clear_attachments[2]; - uint32_t num_clear_attachments = 0; - if (color_enable && alpha_enable) - { - clear_attachments[num_clear_attachments].aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - clear_attachments[num_clear_attachments].colorAttachment = 0; - clear_attachments[num_clear_attachments].clearValue = clear_color_value; - num_clear_attachments++; - color_enable = false; - alpha_enable = false; - } - if (z_enable) - { - clear_attachments[num_clear_attachments].aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - clear_attachments[num_clear_attachments].colorAttachment = 0; - clear_attachments[num_clear_attachments].clearValue = clear_depth_value; - num_clear_attachments++; - z_enable = false; - } - if (num_clear_attachments > 0) - { - VkClearRect vk_rect = {target_vk_rc, 0, g_framebuffer_manager->GetEFBLayers()}; - if (!StateTracker::GetInstance()->IsWithinRenderArea( - target_vk_rc.offset.x, target_vk_rc.offset.y, target_vk_rc.extent.width, - target_vk_rc.extent.height)) + // Fastest path: Use a render pass to clear the buffers. + if (use_clear_render_pass) + { + const std::array clear_values = {{clear_color_value, clear_depth_value}}; + command_buffer_mgr->GetStateTracker()->BeginClearRenderPass( + target_vk_rc, clear_values.data(), static_cast(clear_values.size())); + return; + } + + // Fast path: Use vkCmdClearAttachments to clear the buffers within a render path + // We can't use this when preserving alpha but clearing color. + if (use_clear_attachments) + { + VkClearAttachment clear_attachments[2]; + uint32_t num_clear_attachments = 0; + if (color_enable && alpha_enable) + { + clear_attachments[num_clear_attachments].aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + clear_attachments[num_clear_attachments].colorAttachment = 0; + clear_attachments[num_clear_attachments].clearValue = clear_color_value; + num_clear_attachments++; + color_enable = false; + alpha_enable = false; + } + if (z_enable) { - StateTracker::GetInstance()->EndClearRenderPass(); + clear_attachments[num_clear_attachments].aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + clear_attachments[num_clear_attachments].colorAttachment = 0; + clear_attachments[num_clear_attachments].clearValue = clear_depth_value; + num_clear_attachments++; + z_enable = false; } - StateTracker::GetInstance()->BeginRenderPass(); + if (num_clear_attachments > 0) + { + VkClearRect vk_rect = {target_vk_rc, 0, g_framebuffer_manager->GetEFBLayers()}; + if (!command_buffer_mgr->GetStateTracker()->IsWithinRenderArea( + target_vk_rc.offset.x, target_vk_rc.offset.y, target_vk_rc.extent.width, + target_vk_rc.extent.height)) + { + command_buffer_mgr->GetStateTracker()->EndClearRenderPass(); + } + command_buffer_mgr->GetStateTracker()->BeginRenderPass(); - vkCmdClearAttachments(g_command_buffer_mgr->GetCurrentCommandBuffer(), num_clear_attachments, - clear_attachments, 1, &vk_rect); + vkCmdClearAttachments(command_buffer_mgr->GetCurrentCommandBuffer(), num_clear_attachments, + clear_attachments, 1, &vk_rect); + } } - } + }); // Anything left over for the slow path? if (!color_enable && !alpha_enable && !z_enable) @@ -259,10 +266,11 @@ void Renderer::WaitForGPUIdle() void Renderer::BindBackbuffer(const ClearColor& clear_color) { - StateTracker::GetInstance()->EndRenderPass(); - - if (!g_command_buffer_mgr->CheckLastPresentDone()) - g_command_buffer_mgr->WaitForWorkerThreadIdle(); + g_scheduler->Record([](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->EndRenderPass(); + }); + if (!g_scheduler->CheckLastPresentDone()) + g_scheduler->SynchronizeSubmissionThread(); // Handle host window resizes. CheckForSurfaceChange(); @@ -276,9 +284,21 @@ void Renderer::BindBackbuffer(const Clea m_swap_chain->SetNextFullscreenState(m_swap_chain->GetCurrentFullscreenState()); } - const bool present_fail = g_command_buffer_mgr->CheckLastPresentFail(); - VkResult res = present_fail ? g_command_buffer_mgr->GetLastPresentResult() : - m_swap_chain->AcquireNextImage(); + VkSemaphore semaphore = VK_NULL_HANDLE; + VkResult res; + const bool present_fail = g_scheduler->CheckLastPresentFail(); + if (!present_fail) + { + semaphore = m_swap_chain->GetNextSemaphore(); + g_scheduler->Record([c_semaphore = semaphore](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->SetWaitSemaphoreForCurrentCommandBuffer(c_semaphore); + }); + res = m_swap_chain->AcquireNextImage(semaphore); + } + else + { + res = g_scheduler->GetLastPresentResult(); + } if (res == VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT && !m_swap_chain->GetCurrentFullscreenState()) @@ -289,8 +309,13 @@ void Renderer::BindBackbuffer(const Clea res = VK_SUCCESS; if (present_fail) { + if (semaphore == VK_NULL_HANDLE) + { + semaphore = m_swap_chain->GetNextSemaphore(); + } + // We still need to acquire an image. - res = m_swap_chain->AcquireNextImage(); + res = m_swap_chain->AcquireNextImage(semaphore); } } @@ -309,7 +334,7 @@ void Renderer::BindBackbuffer(const Clea else if (res == VK_SUBOPTIMAL_KHR || res == VK_ERROR_OUT_OF_DATE_KHR) { INFO_LOG_FMT(VIDEO, "Resizing swap chain due to suboptimal/out-of-date"); - m_swap_chain->ResizeSwapChain(); + m_swap_chain->ResizeSwapChain(m_backbuffer_width, m_backbuffer_height); } else { @@ -318,7 +343,12 @@ void Renderer::BindBackbuffer(const Clea m_swap_chain->RecreateSwapChain(); } - res = m_swap_chain->AcquireNextImage(); + semaphore = m_swap_chain->GetNextSemaphore(); + g_scheduler->Record([c_semaphore = semaphore](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->SetWaitSemaphoreForCurrentCommandBuffer(c_semaphore); + }); + + res = m_swap_chain->AcquireNextImage(semaphore); if (res != VK_SUCCESS) PanicAlertFmt("Failed to grab image from swap chain: {:#010X} {}", static_cast(res), VkResultToString(res)); @@ -328,8 +358,7 @@ void Renderer::BindBackbuffer(const Clea // color attachment ready for writing. These transitions must occur outside // a render pass, unless the render pass declares a self-dependency. m_swap_chain->GetCurrentTexture()->OverrideImageLayout(VK_IMAGE_LAYOUT_UNDEFINED); - m_swap_chain->GetCurrentTexture()->TransitionToLayout( - g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + m_swap_chain->GetCurrentTexture()->TransitionToLayout(VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); SetAndClearFramebuffer(m_swap_chain->GetCurrentFramebuffer(), ClearColor{{0.0f, 0.0f, 0.0f, 1.0f}}); } @@ -337,22 +366,20 @@ void Renderer::BindBackbuffer(const Clea void Renderer::PresentBackbuffer() { // End drawing to backbuffer - StateTracker::GetInstance()->EndRenderPass(); + g_scheduler->Record([](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->EndRenderPass(); + }); // Transition the backbuffer to PRESENT_SRC to ensure all commands drawing // to it have finished before present. - m_swap_chain->GetCurrentTexture()->TransitionToLayout( - g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_PRESENT_SRC_KHR); + m_swap_chain->GetCurrentTexture()->TransitionToLayout(VK_IMAGE_LAYOUT_PRESENT_SRC_KHR); // Submit the current command buffer, signaling rendering finished semaphore when it's done // Because this final command buffer is rendering to the swap chain, we need to wait for // the available semaphore to be signaled before executing the buffer. This final submission // can happen off-thread in the background while we're preparing the next frame. - g_command_buffer_mgr->SubmitCommandBuffer(true, false, m_swap_chain->GetSwapChain(), - m_swap_chain->GetCurrentImageIndex()); - - // New cmdbuffer, so invalidate state. - StateTracker::GetInstance()->InvalidateCachedState(); + g_scheduler->SubmitCommandBuffer(true, false, m_swap_chain->GetSwapChain(), + m_swap_chain->GetCurrentImageIndex()); } void Renderer::SetFullscreen(bool enable_fullscreen) @@ -370,11 +397,7 @@ bool Renderer::IsFullscreen() const void Renderer::ExecuteCommandBuffer(bool submit_off_thread, bool wait_for_completion) { - StateTracker::GetInstance()->EndRenderPass(); - - g_command_buffer_mgr->SubmitCommandBuffer(submit_off_thread, wait_for_completion); - - StateTracker::GetInstance()->InvalidateCachedState(); + g_scheduler->SubmitCommandBuffer(submit_off_thread, wait_for_completion); } void Renderer::CheckForSurfaceChange() @@ -382,15 +405,20 @@ void Renderer::CheckForSurfaceChange() if (!m_surface_changed.TestAndClear() || !m_swap_chain) return; + g_scheduler->SyncWorker(); + // Submit the current draws up until rendering the XFB. ExecuteCommandBuffer(false, true); // Clear the present failed flag, since we don't want to resize after recreating. - g_command_buffer_mgr->CheckLastPresentFail(); + g_scheduler->CheckLastPresentFail(); // Recreate the surface. If this fails we're in trouble. - if (!m_swap_chain->RecreateSurface(m_new_surface_handle)) + if (!m_swap_chain->RecreateSurface(m_new_surface_handle, m_new_surface_width, + m_new_surface_height)) + { PanicAlertFmt("Failed to recreate Vulkan surface. Cannot continue."); + } m_new_surface_handle = nullptr; // Handle case where the dimensions are now different. @@ -402,6 +430,8 @@ void Renderer::CheckForSurfaceResize() if (!m_surface_resized.TestAndClear()) return; + g_scheduler->SyncWorker(); + // If we don't have a surface, how can we resize the swap chain? // CheckForSurfaceChange should handle this case. if (!m_swap_chain) @@ -414,17 +444,21 @@ void Renderer::CheckForSurfaceResize() ExecuteCommandBuffer(false, true); // Clear the present failed flag, since we don't want to resize after recreating. - g_command_buffer_mgr->CheckLastPresentFail(); + g_scheduler->CheckLastPresentFail(); // Resize the swap chain. - m_swap_chain->RecreateSwapChain(); + m_swap_chain->ResizeSwapChain(m_new_surface_width, m_new_surface_height); OnSwapChainResized(); } void Renderer::OnConfigChanged(u32 bits) { if (bits & CONFIG_CHANGE_BIT_HOST_CONFIG) - g_object_cache->ReloadPipelineCache(); + { + g_scheduler->Record([](CommandBufferManager* command_buffer_manager) { + g_object_cache->ReloadPipelineCache(); + }); + } // For vsync, we need to change the present mode, which means recreating the swap chain. if (m_swap_chain && bits & CONFIG_CHANGE_BIT_VSYNC) @@ -456,22 +490,32 @@ void Renderer::OnSwapChainResized() void Renderer::BindFramebuffer(VKFramebuffer* fb) { - StateTracker::GetInstance()->EndRenderPass(); + g_scheduler->Record([fb](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->EndRenderPass(); - // Shouldn't be bound as a texture. - if (fb->GetColorAttachment()) - { - StateTracker::GetInstance()->UnbindTexture( - static_cast(fb->GetColorAttachment())->GetView()); - } - if (fb->GetDepthAttachment()) - { - StateTracker::GetInstance()->UnbindTexture( - static_cast(fb->GetDepthAttachment())->GetView()); - } + // Shouldn't be bound as a texture. + if (fb->GetColorAttachment()) + { + command_buffer_mgr->GetStateTracker()->UnbindTexture( + static_cast(fb->GetColorAttachment())->GetView()); + } + if (fb->GetDepthAttachment()) + { + command_buffer_mgr->GetStateTracker()->UnbindTexture( + static_cast(fb->GetDepthAttachment())->GetView()); + } + }); fb->TransitionForRender(); - StateTracker::GetInstance()->SetFramebuffer(fb); + + g_scheduler->Record([c_framebuffer = fb->GetFB(), c_rect = fb->GetRect(), + c_load_render_pass = fb->GetLoadRenderPass(), + c_clear_render_pass = fb->GetClearRenderPass(), + c_discard_render_pass = + fb->GetDiscardRenderPass()](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->SetFramebuffer( + c_framebuffer, c_rect, c_load_render_pass, c_clear_render_pass, c_discard_render_pass); + }); m_current_framebuffer = fb; } @@ -494,7 +538,9 @@ void Renderer::SetAndDiscardFramebuffer( // If we're discarding, begin the discard pass, then switch to a load pass. // This way if the command buffer is flushed, we don't start another discard pass. - StateTracker::GetInstance()->BeginDiscardRenderPass(); + g_scheduler->Record([](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->BeginDiscardRenderPass(); + }); } void Renderer::SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, @@ -503,22 +549,25 @@ void Renderer::SetAndClearFramebuffer(Ab VKFramebuffer* vkfb = static_cast(framebuffer); BindFramebuffer(vkfb); - std::array clear_values; - u32 num_clear_values = 0; - if (vkfb->GetColorFormat() != AbstractTextureFormat::Undefined) - { - std::memcpy(clear_values[num_clear_values].color.float32, color_value.data(), - sizeof(clear_values[num_clear_values].color.float32)); - num_clear_values++; - } - if (vkfb->GetDepthFormat() != AbstractTextureFormat::Undefined) - { - clear_values[num_clear_values].depthStencil.depth = depth_value; - clear_values[num_clear_values].depthStencil.stencil = 0; - num_clear_values++; - } - StateTracker::GetInstance()->BeginClearRenderPass(vkfb->GetRect(), clear_values.data(), - num_clear_values); + g_scheduler->Record( + [vkfb, c_color_value = color_value, depth_value](CommandBufferManager* command_buffer_mgr) { + std::array clear_values; + u32 num_clear_values = 0; + if (vkfb->GetColorFormat() != AbstractTextureFormat::Undefined) + { + std::memcpy(clear_values[num_clear_values].color.float32, c_color_value.data(), + sizeof(clear_values[num_clear_values].color.float32)); + num_clear_values++; + } + if (vkfb->GetDepthFormat() != AbstractTextureFormat::Undefined) + { + clear_values[num_clear_values].depthStencil.depth = depth_value; + clear_values[num_clear_values].depthStencil.stencil = 0; + num_clear_values++; + } + command_buffer_mgr->GetStateTracker()->BeginClearRenderPass( + vkfb->GetRect(), clear_values.data(), num_clear_values); + }); } void Renderer::SetTexture(u32 index, const AbstractTexture* texture) @@ -526,25 +575,31 @@ void Renderer::SetTexture(u32 index, con // Texture should always be in SHADER_READ_ONLY layout prior to use. // This is so we don't need to transition during render passes. const VKTexture* tex = static_cast(texture); + if (tex) { if (tex->GetLayout() != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { - if (StateTracker::GetInstance()->InRenderPass()) - { - WARN_LOG_FMT(VIDEO, "Transitioning image in render pass in Renderer::SetTexture()"); - StateTracker::GetInstance()->EndRenderPass(); - } + g_scheduler->Record([](CommandBufferManager* command_buffer_mgr) { + if (command_buffer_mgr->GetStateTracker()->InRenderPass()) + { + WARN_LOG_FMT(VIDEO, "Transitioning image in render pass in Renderer::SetTexture()"); + command_buffer_mgr->GetStateTracker()->EndRenderPass(); + } + }); - tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + tex->TransitionToLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); } - StateTracker::GetInstance()->SetTexture(index, tex->GetView()); + g_scheduler->Record([c_view = tex->GetView(), index](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->SetTexture(index, c_view); + }); } else { - StateTracker::GetInstance()->SetTexture(0, VK_NULL_HANDLE); + g_scheduler->Record([](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->SetTexture(0, VK_NULL_HANDLE); + }); } } @@ -554,15 +609,17 @@ void Renderer::SetSamplerState(u32 index if (m_sampler_states[index] == state) return; - // Look up new state and replace in state tracker. - VkSampler sampler = g_object_cache->GetSampler(state); - if (sampler == VK_NULL_HANDLE) - { - ERROR_LOG_FMT(VIDEO, "Failed to create sampler"); - sampler = g_object_cache->GetPointSampler(); - } + g_scheduler->Record([index, c_sampler_state = state](CommandBufferManager* command_buffer_mgr) { + // Look up new state and replace in state tracker. + VkSampler sampler = g_object_cache->GetSampler(c_sampler_state); + if (sampler == VK_NULL_HANDLE) + { + ERROR_LOG_FMT(VIDEO, "Failed to create sampler"); + sampler = g_object_cache->GetPointSampler(); + } - StateTracker::GetInstance()->SetSampler(index, sampler); + command_buffer_mgr->GetStateTracker()->SetSampler(index, sampler); + }); m_sampler_states[index] = state; } @@ -571,87 +628,116 @@ void Renderer::SetComputeImageTexture(Ab VKTexture* vk_texture = static_cast(texture); if (vk_texture) { - StateTracker::GetInstance()->EndRenderPass(); - StateTracker::GetInstance()->SetImageTexture(vk_texture->GetView()); - vk_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - read ? (write ? VKTexture::ComputeImageLayout::ReadWrite : + g_scheduler->Record([c_view = vk_texture->GetView()](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->EndRenderPass(); + command_buffer_mgr->GetStateTracker()->SetImageTexture(c_view); + }); + + vk_texture->TransitionToLayout(read ? (write ? VKTexture::ComputeImageLayout::ReadWrite : VKTexture::ComputeImageLayout::ReadOnly) : VKTexture::ComputeImageLayout::WriteOnly); } else { - StateTracker::GetInstance()->SetImageTexture(VK_NULL_HANDLE); + g_scheduler->Record([](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->SetImageTexture(VK_NULL_HANDLE); + }); } } void Renderer::UnbindTexture(const AbstractTexture* texture) { - StateTracker::GetInstance()->UnbindTexture(static_cast(texture)->GetView()); + g_scheduler->Record([c_view = static_cast(texture)->GetView()]( + CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->UnbindTexture(c_view); + }); } void Renderer::ResetSamplerStates() { - // Invalidate all sampler states, next draw will re-initialize them. for (u32 i = 0; i < m_sampler_states.size(); i++) { m_sampler_states[i] = RenderState::GetPointSamplerState(); - StateTracker::GetInstance()->SetSampler(i, g_object_cache->GetPointSampler()); } - // Invalidate all sampler objects (some will be unused now). - g_object_cache->ClearSamplerCache(); + g_scheduler->Record( + [c_sampler_count = m_sampler_states.size()](CommandBufferManager* command_buffer_mgr) { + // Invalidate all sampler states, next draw will re-initialize them. + for (u32 i = 0; i < c_sampler_count; i++) + { + command_buffer_mgr->GetStateTracker()->SetSampler(i, g_object_cache->GetPointSampler()); + } + + // Invalidate all sampler objects (some will be unused now). + g_object_cache->ClearSamplerCache(); + }); } void Renderer::SetScissorRect(const MathUtil::Rectangle& rc) { - VkRect2D scissor = {{rc.left, rc.top}, - {static_cast(rc.GetWidth()), static_cast(rc.GetHeight())}}; - - // See Vulkan spec for vkCmdSetScissor: - // The x and y members of offset must be greater than or equal to 0. - if (scissor.offset.x < 0) - { - scissor.extent.width -= -scissor.offset.x; - scissor.offset.x = 0; - } - if (scissor.offset.y < 0) - { - scissor.extent.height -= -scissor.offset.y; - scissor.offset.y = 0; - } - StateTracker::GetInstance()->SetScissor(scissor); + g_scheduler->Record([c_rc = rc](CommandBufferManager* command_buffer_mgr) { + VkRect2D scissor = {{c_rc.left, c_rc.top}, + {static_cast(c_rc.GetWidth()), static_cast(c_rc.GetHeight())}}; + + // See Vulkan spec for vkCmdSetScissor: + // The x and y members of offset must be greater than or equal to 0. + if (scissor.offset.x < 0) + { + scissor.extent.width -= -scissor.offset.x; + scissor.offset.x = 0; + } + if (scissor.offset.y < 0) + { + scissor.extent.height -= -scissor.offset.y; + scissor.offset.y = 0; + } + command_buffer_mgr->GetStateTracker()->SetScissor(scissor); + }); } void Renderer::SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) { VkViewport viewport = {x, y, width, height, near_depth, far_depth}; - StateTracker::GetInstance()->SetViewport(viewport); + g_scheduler->Record([viewport](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->SetViewport(viewport); + }); } void Renderer::Draw(u32 base_vertex, u32 num_vertices) { - if (!StateTracker::GetInstance()->Bind()) - return; + g_scheduler->Record([base_vertex, num_vertices](CommandBufferManager* command_buffer_mgr) { + if (!command_buffer_mgr->GetStateTracker()->Bind()) + return; - vkCmdDraw(g_command_buffer_mgr->GetCurrentCommandBuffer(), num_vertices, 1, base_vertex, 0); + vkCmdDraw(command_buffer_mgr->GetCurrentCommandBuffer(), num_vertices, 1, base_vertex, 0); + }); } void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) { - if (!StateTracker::GetInstance()->Bind()) - return; - - vkCmdDrawIndexed(g_command_buffer_mgr->GetCurrentCommandBuffer(), num_indices, 1, base_index, - base_vertex, 0); + g_scheduler->Record( + [base_vertex, num_indices, base_index](CommandBufferManager* command_buffer_mgr) { + if (!command_buffer_mgr->GetStateTracker()->Bind()) + return; + + vkCmdDrawIndexed(command_buffer_mgr->GetCurrentCommandBuffer(), num_indices, 1, base_index, + base_vertex, 0); + }); } void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groupsize_x, u32 groupsize_y, u32 groupsize_z, u32 groups_x, u32 groups_y, u32 groups_z) { - StateTracker::GetInstance()->SetComputeShader(static_cast(shader)); - if (StateTracker::GetInstance()->BindCompute()) - vkCmdDispatch(g_command_buffer_mgr->GetCurrentCommandBuffer(), groups_x, groups_y, groups_z); + g_scheduler->Record([groups_x, groups_y, groups_z, + shader](CommandBufferManager* command_buffer_mgr) { + if (!command_buffer_mgr->GetStateTracker()->Bind()) + return; + + command_buffer_mgr->GetStateTracker()->SetComputeShader(static_cast(shader)); + if (command_buffer_mgr->GetStateTracker()->BindCompute()) + vkCmdDispatch(command_buffer_mgr->GetCurrentCommandBuffer(), groups_x, groups_y, groups_z); + }); } } // namespace Vulkan diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/VKScheduler.cpp dolphin/Source/Core/VideoBackends/Vulkan/VKScheduler.cpp --- dolphin.orig/Source/Core/VideoBackends/Vulkan/VKScheduler.cpp 1970-01-01 00:00:00.000000000 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/VKScheduler.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -0,0 +1,155 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "VKScheduler.h" +#include +#include "Common/Thread.h" +#include "StateTracker.h" + +namespace Vulkan +{ +Scheduler::Scheduler() + : m_commandBufferManager(std::make_unique()), + m_submit_loop(std::make_unique()) +{ + AcquireNewChunk(); + + m_worker = std::thread([this]() { + Common::SetCurrentThreadName("Vulkan CS Thread"); + WorkerThread(); + }); +} + +Scheduler::~Scheduler() +{ + m_submit_loop->Stop(); + m_worker.join(); +} + +bool Scheduler::Initialize() +{ + return m_commandBufferManager->Initialize(); +} + +void Scheduler::CommandChunk::ExecuteAll(CommandBufferManager* cmdbuf) +{ + auto command = first; + while (command != nullptr) + { + auto next = command->GetNext(); + command->Execute(cmdbuf); + command->~Command(); + command = next; + } + command_offset = 0; + first = nullptr; + last = nullptr; +} + +void Scheduler::AcquireNewChunk() +{ + std::scoped_lock lock{m_reserve_mutex}; + if (m_chunk_reserve.empty()) + { + m_chunk = std::make_unique(); + return; + } + m_chunk = std::move(m_chunk_reserve.back()); + m_chunk_reserve.pop_back(); +} + +void Scheduler::Flush() +{ + if (m_chunk->Empty()) + return; + + { + std::scoped_lock lock{m_work_mutex}; + m_worker_idle = false; + m_work_queue.push(std::move(m_chunk)); + m_submit_loop->Wakeup(); + } + AcquireNewChunk(); +} + +void Scheduler::SyncWorker() +{ + Flush(); + std::unique_lock lock{m_work_mutex}; + m_idle_condvar.wait(lock, [this] { return m_worker_idle; }); +} + +void Scheduler::WorkerThread() +{ + m_submit_loop->Run([this]() { + std::unique_ptr work; + { + std::scoped_lock lock{m_work_mutex}; + if (m_work_queue.empty()) + { + m_worker_idle = true; + m_idle_condvar.notify_all(); + m_submit_loop->AllowSleep(); + return; + } + work = std::move(m_work_queue.front()); + m_work_queue.pop(); + } + + work->ExecuteAll(m_commandBufferManager.get()); + { + std::scoped_lock reserve_lock{m_reserve_mutex}; + m_chunk_reserve.push_back(std::move(work)); + } + { + std::scoped_lock lock{m_work_mutex}; + if (m_work_queue.empty()) + { + m_worker_idle = true; + m_idle_condvar.notify_all(); + } + } + }); +} + +void Scheduler::Shutdown() +{ + SyncWorker(); + SynchronizeSubmissionThread(); +} + +void Scheduler::SynchronizeSubmissionThread() +{ + SyncWorker(); + m_commandBufferManager->WaitForSubmitWorkerThreadIdle(); +} + +void Scheduler::WaitForFenceCounter(u64 counter) +{ + if (m_commandBufferManager->GetCompletedFenceCounter() >= counter) + return; + + SyncWorker(); + m_commandBufferManager->WaitForFenceCounter(counter); +} + +void Scheduler::SubmitCommandBuffer(bool submit_on_worker_thread, bool wait_for_completion, + VkSwapchainKHR present_swap_chain, uint32_t present_image_index) +{ + const u64 fence_counter = ++m_current_fence_counter; + Record([fence_counter, submit_on_worker_thread, wait_for_completion, present_swap_chain, + present_image_index](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->EndRenderPass(); + command_buffer_mgr->SubmitCommandBuffer(fence_counter, submit_on_worker_thread, + wait_for_completion, present_swap_chain, + present_image_index); + }); + + if (wait_for_completion) [[unlikely]] + g_scheduler->WaitForFenceCounter(fence_counter); + else + Flush(); +} + +std::unique_ptr g_scheduler; +} // namespace Vulkan diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/VKScheduler.h dolphin/Source/Core/VideoBackends/Vulkan/VKScheduler.h --- dolphin.orig/Source/Core/VideoBackends/Vulkan/VKScheduler.h 1970-01-01 00:00:00.000000000 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/VKScheduler.h 2023-09-26 18:02:22.836042087 +0000 @@ -0,0 +1,160 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "CommandBufferManager.h" +#include "Common/Align.h" + +namespace Vulkan +{ +class Scheduler +{ + class Command + { + public: + virtual ~Command() = default; + + virtual void Execute(CommandBufferManager* cmdbuf) = 0; + + Command* GetNext() const { return next; } + + void SetNext(Command* next_) { next = next_; } + + private: + Command* next = nullptr; + }; + + template + class alignas(16) TypedCommand final : public Command + { + public: + explicit TypedCommand(T&& command_) : command{std::move(command_)} {} + + ~TypedCommand() override = default; + + TypedCommand(TypedCommand&&) = delete; + + TypedCommand& operator=(TypedCommand&&) = delete; + + void Execute(CommandBufferManager* cmdbuf) override { command(cmdbuf); } + + private: + T command; + }; + + class CommandChunk final + { + public: + void ExecuteAll(CommandBufferManager* cmdbuf); + + template + bool Record(T& command) + { + using FuncType = TypedCommand; + static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large"); + + command_offset = Common::AlignUp(command_offset, alignof(FuncType)); + if (command_offset > sizeof(data) - sizeof(FuncType)) [[unlikely]] + { + return false; + } + Command* const current_last = last; + last = new (data.data() + command_offset) FuncType(std::move(command)); + + if (current_last) [[likely]] + { + current_last->SetNext(last); + } + else + { + first = last; + } + command_offset += sizeof(FuncType); + return true; + } + + bool Empty() const { return command_offset == 0; } + + private: + Command* first = nullptr; + Command* last = nullptr; + + size_t command_offset = 0; + alignas(64) std::array data{}; + }; + +public: + Scheduler(); + ~Scheduler(); + + bool Initialize(); + + void Flush(); + void SyncWorker(); + + void Shutdown(); + + template + void Record(T&& command) + { +#ifdef VULKAN_DISABLE_THREADING + command(m_commandBufferManager.get()); + return; +#endif + + if (m_chunk->Record(command)) [[likely]] + return; + + Flush(); + (void)m_chunk->Record(command); + } + + uint64_t GetCompletedFenceCounter() const + { + return m_commandBufferManager->GetCompletedFenceCounter(); + } + + uint64_t GetCurrentFenceCounter() const + { + return m_current_fence_counter.load(std::memory_order_acquire); + } + + void WaitForFenceCounter(u64 counter); + void SynchronizeSubmissionThread(); + + bool CheckLastPresentFail() { return m_commandBufferManager->CheckLastPresentFail(); } + VkResult GetLastPresentResult() const { return m_commandBufferManager->GetLastPresentResult(); } + bool CheckLastPresentDone() { return m_commandBufferManager->CheckLastPresentDone(); } + + void SubmitCommandBuffer(bool submit_on_worker_thread, bool wait_for_completion, + VkSwapchainKHR present_swap_chain = VK_NULL_HANDLE, + uint32_t present_image_index = 0xFFFFFFFF); + +private: + void WorkerThread(); + void AcquireNewChunk(); + + std::unique_ptr m_commandBufferManager; + + std::unique_ptr m_chunk; + + std::thread m_worker; + std::unique_ptr m_submit_loop; + + std::atomic m_current_fence_counter = 1; + + std::queue> m_work_queue; + std::mutex m_work_mutex; + std::condition_variable m_idle_condvar; + bool m_worker_idle{true}; + + std::vector> m_chunk_reserve; + std::mutex m_reserve_mutex; +}; + +extern std::unique_ptr g_scheduler; +} // namespace Vulkan diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/VKStreamBuffer.cpp dolphin/Source/Core/VideoBackends/Vulkan/VKStreamBuffer.cpp --- dolphin.orig/Source/Core/VideoBackends/Vulkan/VKStreamBuffer.cpp 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/VKStreamBuffer.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -12,6 +12,7 @@ #include "Common/MsgHandler.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/VKScheduler.h" #include "VideoBackends/Vulkan/VulkanContext.h" namespace Vulkan @@ -24,7 +25,13 @@ StreamBuffer::~StreamBuffer() { // VMA_ALLOCATION_CREATE_MAPPED_BIT automatically handles unmapping for us if (m_buffer != VK_NULL_HANDLE) - g_command_buffer_mgr->DeferBufferDestruction(m_buffer, m_alloc); + { + g_scheduler->Record( + [c_buffer = m_buffer, c_alloc = m_alloc](CommandBufferManager* command_buffer_mgr) { + if (c_buffer != VK_NULL_HANDLE) + command_buffer_mgr->DeferBufferDestruction(c_buffer, c_alloc); + }); + } } std::unique_ptr StreamBuffer::Create(VkBufferUsageFlags usage, u32 size) @@ -75,8 +82,11 @@ bool StreamBuffer::AllocateBuffer() // Destroy the backings for the buffer after the command buffer executes // VMA_ALLOCATION_CREATE_MAPPED_BIT automatically handles unmapping for us - if (m_buffer != VK_NULL_HANDLE) - g_command_buffer_mgr->DeferBufferDestruction(m_buffer, m_alloc); + g_scheduler->Record( + [c_buffer = m_buffer, c_alloc = m_alloc](CommandBufferManager* command_buffer_mgr) { + if (c_buffer != VK_NULL_HANDLE) + command_buffer_mgr->DeferBufferDestruction(c_buffer, c_alloc); + }); // Replace with the new buffer m_buffer = buffer; @@ -175,7 +185,7 @@ void StreamBuffer::UpdateCurrentFencePos return; // Has the offset changed since the last fence? - const u64 counter = g_command_buffer_mgr->GetCurrentFenceCounter(); + const u64 counter = g_scheduler->GetCurrentFenceCounter(); if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter) { // Still haven't executed a command buffer, so just update the offset. @@ -193,7 +203,7 @@ void StreamBuffer::UpdateGPUPosition() auto start = m_tracked_fences.begin(); auto end = start; - const u64 completed_counter = g_command_buffer_mgr->GetCompletedFenceCounter(); + const u64 completed_counter = g_scheduler->GetCompletedFenceCounter(); while (end != m_tracked_fences.end() && completed_counter >= end->first) { m_current_gpu_position = end->second; @@ -266,14 +276,13 @@ bool StreamBuffer::WaitForClearSpace(u32 // Did any fences satisfy this condition? // Has the command buffer been executed yet? If not, the caller should execute it. - if (iter == m_tracked_fences.end() || - iter->first == g_command_buffer_mgr->GetCurrentFenceCounter()) + if (iter == m_tracked_fences.end() || iter->first == g_scheduler->GetCurrentFenceCounter()) { return false; } // Wait until this fence is signaled. This will fire the callback, updating the GPU position. - g_command_buffer_mgr->WaitForFenceCounter(iter->first); + g_scheduler->WaitForFenceCounter(iter->first); m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter); m_current_offset = new_offset; diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/VKSwapChain.cpp dolphin/Source/Core/VideoBackends/Vulkan/VKSwapChain.cpp --- dolphin.orig/Source/Core/VideoBackends/Vulkan/VKSwapChain.cpp 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/VKSwapChain.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -13,9 +13,9 @@ #include "VideoBackends/Vulkan/CommandBufferManager.h" #include "VideoBackends/Vulkan/ObjectCache.h" +#include "VideoBackends/Vulkan/VKScheduler.h" #include "VideoBackends/Vulkan/VKTexture.h" #include "VideoBackends/Vulkan/VulkanContext.h" -#include "VideoCommon/RenderBase.h" #if defined(VK_USE_PLATFORM_XLIB_KHR) #include @@ -25,15 +25,18 @@ namespace Vulkan { SwapChain::SwapChain(const WindowSystemInfo& wsi, VkSurfaceKHR surface, bool vsync) : m_wsi(wsi), m_surface(surface), m_vsync_enabled(vsync), - m_fullscreen_supported(g_vulkan_context->SupportsExclusiveFullscreen(wsi, surface)) + m_fullscreen_supported(g_vulkan_context->SupportsExclusiveFullscreen(wsi, surface)), + m_width(wsi.render_surface_width), m_height(wsi.render_surface_height) { } SwapChain::~SwapChain() { + g_scheduler->SyncWorker(); DestroySwapChainImages(); DestroySwapChain(); DestroySurface(); + DestroySemaphores(); } VkSurfaceKHR SwapChain::CreateVulkanSurface(VkInstance instance, const WindowSystemInfo& wsi) @@ -84,6 +87,29 @@ VkSurfaceKHR SwapChain::CreateVulkanSurf } #endif +#if defined(VK_USE_PLATFORM_WAYLAND_KHR) + if (wsi.type == WindowSystemType::Wayland) + { + VkWaylandSurfaceCreateInfoKHR surface_create_info = { + VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkWaylandSurfaceCreateFlagsKHR flags + static_cast(wsi.display_connection), // struct wl_display* display + static_cast(wsi.render_surface) // struct wl_surface* surface + }; + + VkSurfaceKHR surface; + VkResult res = vkCreateWaylandSurfaceKHR(instance, &surface_create_info, nullptr, &surface); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateWaylandSurfaceKHR failed: "); + return VK_NULL_HANDLE; + } + + return surface; + } +#endif + #if defined(VK_USE_PLATFORM_ANDROID_KHR) if (wsi.type == WindowSystemType::Android) { @@ -132,7 +158,8 @@ std::unique_ptr SwapChain::Cr bool vsync) { std::unique_ptr swap_chain = std::make_unique(wsi, surface, vsync); - if (!swap_chain->CreateSwapChain() || !swap_chain->SetupSwapChainImages()) + if (!swap_chain->CreateSwapChain() || !swap_chain->SetupSwapChainImages() || + !swap_chain->CreateSemaphores()) return nullptr; return swap_chain; @@ -237,6 +264,23 @@ bool SwapChain::SelectPresentMode() return true; } +bool SwapChain::CreateSemaphores() +{ + static constexpr VkSemaphoreCreateInfo semaphore_create_info = { + VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, nullptr, 0}; + for (VkSemaphore& semaphore : m_semaphores) + { + VkResult res = vkCreateSemaphore(g_vulkan_context->GetDevice(), &semaphore_create_info, nullptr, + &semaphore); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateSemaphore failed: "); + return false; + } + } + return true; +} + bool SwapChain::CreateSwapChain() { // Look up surface properties to determine image count and dimensions @@ -265,8 +309,8 @@ bool SwapChain::CreateSwapChain() VkExtent2D size = surface_capabilities.currentExtent; if (size.width == UINT32_MAX) { - size.width = std::max(g_renderer->GetBackbufferWidth(), 1); - size.height = std::max(g_renderer->GetBackbufferHeight(), 1); + size.width = static_cast(m_wsi.render_surface_width); + size.height = static_cast(m_wsi.render_surface_height); } size.width = std::clamp(size.width, surface_capabilities.minImageExtent.width, surface_capabilities.maxImageExtent.width); @@ -452,20 +496,33 @@ void SwapChain::DestroySwapChain() m_swap_chain = VK_NULL_HANDLE; } -VkResult SwapChain::AcquireNextImage() +void SwapChain::DestroySemaphores() { - VkResult res = vkAcquireNextImageKHR(g_vulkan_context->GetDevice(), m_swap_chain, UINT64_MAX, - g_command_buffer_mgr->GetCurrentCommandBufferSemaphore(), - VK_NULL_HANDLE, &m_current_swap_chain_image_index); + for (VkSemaphore semaphore : m_semaphores) + { + if (semaphore != VK_NULL_HANDLE) + { + vkDestroySemaphore(g_vulkan_context->GetDevice(), semaphore, nullptr); + } + } +} + +VkResult SwapChain::AcquireNextImage(VkSemaphore semaphore) +{ + VkResult res = + vkAcquireNextImageKHR(g_vulkan_context->GetDevice(), m_swap_chain, UINT64_MAX, semaphore, + VK_NULL_HANDLE, &m_current_swap_chain_image_index); if (res != VK_SUCCESS && res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR) LOG_VULKAN_ERROR(res, "vkAcquireNextImageKHR failed: "); return res; } -bool SwapChain::ResizeSwapChain() +bool SwapChain::ResizeSwapChain(int window_width, int window_height) { DestroySwapChainImages(); + m_wsi.render_surface_width = window_width; + m_wsi.render_surface_height = window_height; if (!CreateSwapChain() || !SetupSwapChainImages()) { PanicAlertFmt("Failed to re-configure swap chain images, this is fatal (for now)"); @@ -531,7 +588,7 @@ bool SwapChain::SetFullscreenState(bool #endif } -bool SwapChain::RecreateSurface(void* native_handle) +bool SwapChain::RecreateSurface(void* native_handle, int window_width, int window_height) { // Destroy the old swap chain, images, and surface. DestroySwapChainImages(); @@ -540,6 +597,8 @@ bool SwapChain::RecreateSurface(void* na // Re-create the surface with the new native handle m_wsi.render_surface = native_handle; + m_wsi.render_surface_width = window_width; + m_wsi.render_surface_height = window_height; m_surface = CreateVulkanSurface(g_vulkan_context->GetVulkanInstance(), m_wsi); if (m_surface == VK_NULL_HANDLE) return false; diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/VKSwapChain.h dolphin/Source/Core/VideoBackends/Vulkan/VKSwapChain.h --- dolphin.orig/Source/Core/VideoBackends/Vulkan/VKSwapChain.h 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/VKSwapChain.h 2023-09-26 18:02:22.836042087 +0000 @@ -3,6 +3,7 @@ #pragma once +#include #include #include @@ -50,10 +51,10 @@ public: { return m_swap_chain_images[m_current_swap_chain_image_index].framebuffer.get(); } - VkResult AcquireNextImage(); + VkResult AcquireNextImage(VkSemaphore semaphore); - bool RecreateSurface(void* native_handle); - bool ResizeSwapChain(); + bool RecreateSurface(void* native_handle, int window_width, int window_height); + bool ResizeSwapChain(int window_width, int window_height); bool RecreateSwapChain(); // Change vsync enabled state. This may fail as it causes a swapchain recreation. @@ -70,10 +71,19 @@ public: // Updates the fullscreen state. Must call on-thread. bool SetFullscreenState(bool state); + VkSemaphore GetNextSemaphore() + { + m_semaphore_index = (m_semaphore_index + 1) % NUM_COMMAND_BUFFERS; + return m_semaphores[m_semaphore_index]; + } + private: bool SelectSurfaceFormat(); bool SelectPresentMode(); + bool CreateSemaphores(); + void DestroySemaphores(); + bool CreateSwapChain(); void DestroySwapChain(); @@ -103,6 +113,9 @@ private: std::vector m_swap_chain_images; u32 m_current_swap_chain_image_index = 0; + std::array m_semaphores = {}; + u32 m_semaphore_index = 0; + u32 m_width = 0; u32 m_height = 0; u32 m_layers = 0; diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/VKTexture.cpp dolphin/Source/Core/VideoBackends/Vulkan/VKTexture.cpp --- dolphin.orig/Source/Core/VideoBackends/Vulkan/VKTexture.cpp 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/VKTexture.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -21,6 +21,7 @@ #include "VideoBackends/Vulkan/VKStreamBuffer.h" #include "VideoBackends/Vulkan/VulkanContext.h" +#include "VKScheduler.h" #include "VideoCommon/DriverDetails.h" #include "VideoCommon/VideoConfig.h" @@ -45,14 +46,17 @@ VKTexture::VKTexture(const TextureConfig VKTexture::~VKTexture() { - StateTracker::GetInstance()->UnbindTexture(m_view); - g_command_buffer_mgr->DeferImageViewDestruction(m_view); + g_scheduler->Record([c_view = m_view, c_image = m_image, + c_alloc = m_alloc](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->UnbindTexture(c_view); + command_buffer_mgr->DeferImageViewDestruction(c_view); - // If we don't have device memory allocated, the image is not owned by us (e.g. swapchain) - if (m_alloc != VK_NULL_HANDLE) - { - g_command_buffer_mgr->DeferImageDestruction(m_image, m_alloc); - } + // If we don't have device memory allocated, the image is not owned by us (e.g. swapchain) + if (c_alloc != VK_NULL_HANDLE) + { + command_buffer_mgr->DeferImageDestruction(c_image, c_alloc); + } + }); } std::unique_ptr VKTexture::Create(const TextureConfig& tex_config, std::string_view name) @@ -265,30 +269,35 @@ void VKTexture::CopyRectangleFromTexture static_cast(dst_rect.GetHeight()) <= m_config.height, "Dest rect is too large for CopyRectangleFromTexture"); - const u32 copy_layer_count = 1; - - VkImageCopy image_copy = { - {VK_IMAGE_ASPECT_COLOR_BIT, src_level, src_layer, copy_layer_count}, - {src_rect.left, src_rect.top, 0}, - {VK_IMAGE_ASPECT_COLOR_BIT, dst_level, dst_layer, copy_layer_count}, - {dst_rect.left, dst_rect.top, 0}, - {static_cast(src_rect.GetWidth()), static_cast(src_rect.GetHeight()), 1}}; - - // Must be called outside of a render pass. - StateTracker::GetInstance()->EndRenderPass(); + g_scheduler->Record([](CommandBufferManager* command_buffer_mgr) { + // Must be called outside of a render pass. + command_buffer_mgr->GetStateTracker()->EndRenderPass(); + }); const VkImageLayout old_src_layout = src_texture->GetLayout(); - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - vkCmdCopyImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), src_texture->m_image, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy); + src_texture->TransitionToLayout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + TransitionToLayout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + g_scheduler->Record([c_src_image = src_texture->GetImage(), c_image = m_image, + c_src_level = src_level, c_src_layer = src_layer, + c_src_layers = src_texture->GetLayers(), c_src_rect = src_rect, + c_dst_level = dst_level, c_dst_layer = dst_layer, + c_dst_layers = m_config.layers, + c_dst_rect = dst_rect](CommandBufferManager* command_buffer_mgr) { + VkImageCopy image_copy = {{VK_IMAGE_ASPECT_COLOR_BIT, c_src_level, c_src_layer, c_src_layers}, + {c_src_rect.left, c_src_rect.top, 0}, + {VK_IMAGE_ASPECT_COLOR_BIT, c_dst_level, c_dst_layer, c_dst_layers}, + {c_dst_rect.left, c_dst_rect.top, 0}, + {static_cast(c_src_rect.GetWidth()), + static_cast(c_src_rect.GetHeight()), 1}}; + + vkCmdCopyImage(command_buffer_mgr->GetCurrentCommandBuffer(), c_src_image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, c_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy); + }); // Only restore the source layout. Destination is restored by FinishedRendering(). - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_src_layout); + src_texture->TransitionToLayout(old_src_layout); } void VKTexture::ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, @@ -301,24 +310,28 @@ void VKTexture::ResolveFromTexture(const rect.top + rect.GetHeight() <= static_cast(srcentry->m_config.height)); // Resolving is considered to be a transfer operation. - StateTracker::GetInstance()->EndRenderPass(); + g_scheduler->Record([](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->EndRenderPass(); + }); VkImageLayout old_src_layout = srcentry->m_layout; - srcentry->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - VkImageResolve resolve = { - {VK_IMAGE_ASPECT_COLOR_BIT, level, layer, 1}, // srcSubresource - {rect.left, rect.top, 0}, // srcOffset - {VK_IMAGE_ASPECT_COLOR_BIT, level, layer, 1}, // dstSubresource - {rect.left, rect.top, 0}, // dstOffset - {static_cast(rect.GetWidth()), static_cast(rect.GetHeight()), 1} // extent - }; - vkCmdResolveImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), srcentry->m_image, - srcentry->m_layout, m_image, m_layout, 1, &resolve); + srcentry->TransitionToLayout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + TransitionToLayout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - srcentry->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_src_layout); + g_scheduler->Record([c_src_image = srcentry->GetImage(), c_image = m_image, + c_src_layout = srcentry->GetLayout(), c_dst_layout = m_layout, c_rect = rect, + c_layer = layer, c_level = level](CommandBufferManager* command_buffer_mgr) { + VkImageResolve resolve = { + {VK_IMAGE_ASPECT_COLOR_BIT, c_level, c_layer, 1}, // srcSubresource + {c_rect.left, c_rect.top, 0}, // srcOffset + {VK_IMAGE_ASPECT_COLOR_BIT, c_level, c_layer, 1}, // dstSubresource + {c_rect.left, c_rect.top, 0}, // dstOffset + {static_cast(c_rect.GetWidth()), static_cast(c_rect.GetHeight()), 1} // extent + }; + vkCmdResolveImage(command_buffer_mgr->GetCurrentCommandBuffer(), c_src_image, c_src_layout, + c_image, c_dst_layout, 1, &resolve); + }); + + srcentry->TransitionToLayout(old_src_layout); } void VKTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, @@ -344,8 +357,7 @@ void VKTexture::Load(u32 level, u32 widt // When the last mip level is uploaded, we transition to SHADER_READ_ONLY, ready for use. This is // because we can't transition in a render pass, and we don't necessarily know when this texture // is going to be used. - TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + TransitionToLayout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, true); // For unaligned textures, we can save some memory in the transfer buffer by skipping the rows // that lie outside of the texture's dimensions. @@ -396,26 +408,30 @@ void VKTexture::Load(u32 level, u32 widt temp_buffer->Unmap(); } - // Copy from the streaming buffer to the actual image. - VkBufferImageCopy image_copy = { - upload_buffer_offset, // VkDeviceSize bufferOffset - row_length, // uint32_t bufferRowLength - 0, // uint32_t bufferImageHeight - {VK_IMAGE_ASPECT_COLOR_BIT, level, layer, 1}, // VkImageSubresourceLayers imageSubresource - {0, 0, 0}, // VkOffset3D imageOffset - {width, height, 1} // VkExtent3D imageExtent - }; - vkCmdCopyBufferToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), upload_buffer, - m_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy); + g_scheduler->Record([c_upload_buffer_offset = upload_buffer_offset, c_row_length = row_length, + c_level = level, c_width = width, c_height = height, + c_upload_buffer = upload_buffer, + c_image = m_image](CommandBufferManager* command_buffer_mgr) { + // Copy from the streaming buffer to the actual image. + VkBufferImageCopy image_copy = { + c_upload_buffer_offset, // VkDeviceSize bufferOffset + c_row_length, // uint32_t bufferRowLength + 0, // uint32_t bufferImageHeight + {VK_IMAGE_ASPECT_COLOR_BIT, c_level, 0, 1}, // VkImageSubresourceLayers imageSubresource + {0, 0, 0}, // VkOffset3D imageOffset + {c_width, c_height, 1} // VkExtent3D imageExtent + }; + vkCmdCopyBufferToImage(command_buffer_mgr->GetCurrentInitCommandBuffer(), c_upload_buffer, + c_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy); + }); // Preemptively transition to shader read only after uploading the last mip level, as we're // likely finished with writes to this texture for now. We can't do this in common with a // FinishedRendering() call because the upload happens in the init command buffer, and we // don't want to interrupt the render pass with calls which were executed ages before. - if (level == (m_config.levels - 1) && layer == (m_config.layers - 1)) + if (level == (m_config.levels - 1)) { - TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + TransitionToLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, true); } } @@ -424,9 +440,10 @@ void VKTexture::FinishedRendering() if (m_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) return; - StateTracker::GetInstance()->EndRenderPass(); - TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + g_scheduler->Record([](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->EndRenderPass(); + }); + TransitionToLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); } void VKTexture::OverrideImageLayout(VkImageLayout new_layout) @@ -434,9 +451,9 @@ void VKTexture::OverrideImageLayout(VkIm m_layout = new_layout; } -void VKTexture::TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout) const +void VKTexture::TransitionToLayout(VkImageLayout new_layout, bool init_command_buffer) const { - if (m_layout == new_layout) + if (m_layout == new_layout) [[likely]] return; VkImageMemoryBarrier barrier = { @@ -573,14 +590,19 @@ void VKTexture::TransitionToLayout(VkCom } m_compute_layout = ComputeImageLayout::Undefined; - vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1, - &barrier); + g_scheduler->Record([c_src_stage_mask = srcStageMask, c_dst_stage_mask = dstStageMask, + c_barrier = barrier, + init_command_buffer](CommandBufferManager* command_buffer_mgr) { + vkCmdPipelineBarrier(init_command_buffer ? command_buffer_mgr->GetCurrentInitCommandBuffer() : + command_buffer_mgr->GetCurrentCommandBuffer(), + c_src_stage_mask, c_dst_stage_mask, 0, 0, nullptr, 0, nullptr, 1, + &c_barrier); + }); m_layout = new_layout; } -void VKTexture::TransitionToLayout(VkCommandBuffer command_buffer, - ComputeImageLayout new_layout) const +void VKTexture::TransitionToLayout(ComputeImageLayout new_layout, bool init_command_buffer) const { ASSERT(new_layout != ComputeImageLayout::Undefined); if (m_compute_layout == new_layout) @@ -678,8 +700,14 @@ void VKTexture::TransitionToLayout(VkCom m_layout = barrier.newLayout; m_compute_layout = new_layout; - vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1, - &barrier); + g_scheduler->Record([c_src_stage_mask = srcStageMask, c_dst_stage_mask = dstStageMask, + c_barrier = barrier, + init_command_buffer](CommandBufferManager* command_buffer_mgr) { + vkCmdPipelineBarrier(init_command_buffer ? command_buffer_mgr->GetCurrentInitCommandBuffer() : + command_buffer_mgr->GetCurrentCommandBuffer(), + c_src_stage_mask, c_dst_stage_mask, 0, 0, nullptr, 0, nullptr, 1, + &c_barrier); + }); } VKStagingTexture::VKStagingTexture(PrivateTag, StagingTextureType type, const TextureConfig& config, @@ -694,7 +722,11 @@ VKStagingTexture::~VKStagingTexture() { if (m_linear_image != VK_NULL_HANDLE) { - g_command_buffer_mgr->DeferImageDestruction(m_linear_image, m_linear_image_alloc); + g_scheduler->Record( + [c_linear_image = m_linear_image, + c_linear_image_alloc = m_linear_image_alloc](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->DeferImageDestruction(c_linear_image, c_linear_image_alloc); + }); } } @@ -816,12 +848,12 @@ void VKStagingTexture::CopyFromTexture(c src_rect.top >= 0 && static_cast(src_rect.bottom) <= src_tex->GetHeight()); ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= m_config.width && dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= m_config.height); - - StateTracker::GetInstance()->EndRenderPass(); + g_scheduler->Record([](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->EndRenderPass(); + }); VkImageLayout old_layout = src_tex->GetLayout(); - src_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + src_tex->TransitionToLayout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); // Issue the image->buffer copy, but delay it for now. VkBufferImageCopy image_copy = {}; @@ -844,15 +876,17 @@ void VKStagingTexture::CopyFromTexture(c image_copy.imageOffset = {0, 0, 0}; } - vkCmdCopyImageToBuffer(g_command_buffer_mgr->GetCurrentCommandBuffer(), src_image, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_staging_buffer->GetBuffer(), 1, - &image_copy); + g_scheduler->Record([c_src_image = src_image, c_dst_buffer = m_staging_buffer->GetBuffer(), + c_image_copy = image_copy](CommandBufferManager* command_buffer_mgr) { + vkCmdCopyImageToBuffer(command_buffer_mgr->GetCurrentCommandBuffer(), c_src_image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, c_dst_buffer, 1, &c_image_copy); + }); // Restore old source texture layout. - src_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_layout); + src_tex->TransitionToLayout(old_layout); m_needs_flush = true; - m_flush_fence_counter = g_command_buffer_mgr->GetCurrentFenceCounter(); + m_flush_fence_counter = g_scheduler->GetCurrentFenceCounter(); } void VKStagingTexture::CopyFromTextureToLinearImage(const VKTexture* src_tex, @@ -864,44 +898,49 @@ void VKStagingTexture::CopyFromTextureTo // with optimal tiling (VK_IMAGE_TILING_OPTIMAL) to a buffer. // That allocation is very slow, so we just do it ourself and reuse the intermediate image. - const VkImageAspectFlags aspect = VKTexture::GetImageViewAspectForFormat(src_tex->GetFormat()); - - VkImageMemoryBarrier linear_image_barrier = {}; - linear_image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - linear_image_barrier.pNext = nullptr; - linear_image_barrier.srcAccessMask = 0; - linear_image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; - linear_image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - linear_image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - linear_image_barrier.image = m_linear_image; - linear_image_barrier.subresourceRange = {aspect, 0, 1, 0, 1}; - vkCmdPipelineBarrier(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, - nullptr, 0, nullptr, 1, &linear_image_barrier); - - VkImageBlit blit; - blit.srcSubresource = {aspect, src_level, src_layer, 1}; - blit.dstSubresource.layerCount = 1; - blit.dstSubresource.baseArrayLayer = 0; - blit.dstSubresource.mipLevel = 0; - blit.dstSubresource.aspectMask = linear_image_barrier.subresourceRange.aspectMask; - blit.srcOffsets[0] = {src_rect.left, src_rect.top, 0}; - blit.srcOffsets[1] = {static_cast(blit.srcOffsets[0].x + src_rect.GetWidth()), - static_cast(blit.srcOffsets[0].y + src_rect.GetHeight()), 1}; - blit.dstOffsets[0] = {0, 0, 0}; - blit.dstOffsets[1] = {dst_rect.GetWidth(), dst_rect.GetHeight(), 1u}; - - vkCmdBlitImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), src_tex->GetImage(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_linear_image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit, VK_FILTER_NEAREST); - - linear_image_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - linear_image_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - linear_image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; - - vkCmdPipelineBarrier(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, - nullptr, 0, nullptr, 1, &linear_image_barrier); + g_scheduler->Record([c_linear_image = m_linear_image, c_src_image = src_tex->GetImage(), + c_src_format = src_tex->GetFormat(), c_src_rect = src_rect, + c_dst_rect = dst_rect, c_src_layer = src_layer, + c_src_level = src_level](CommandBufferManager* command_buffer_mgr) { + const VkImageAspectFlags aspect = VKTexture::GetImageViewAspectForFormat(c_src_format); + + VkImageMemoryBarrier linear_image_barrier = {}; + linear_image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + linear_image_barrier.pNext = nullptr; + linear_image_barrier.srcAccessMask = 0; + linear_image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT; + linear_image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + linear_image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + linear_image_barrier.image = c_linear_image; + linear_image_barrier.subresourceRange = {aspect, 0, 1, 0, 1}; + vkCmdPipelineBarrier(command_buffer_mgr->GetCurrentCommandBuffer(), + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, + nullptr, 0, nullptr, 1, &linear_image_barrier); + + VkImageBlit blit; + blit.srcSubresource = {aspect, c_src_level, c_src_layer, 1}; + blit.dstSubresource.layerCount = 1; + blit.dstSubresource.baseArrayLayer = 0; + blit.dstSubresource.mipLevel = 0; + blit.dstSubresource.aspectMask = linear_image_barrier.subresourceRange.aspectMask; + blit.srcOffsets[0] = {c_src_rect.left, c_src_rect.top, 0}; + blit.srcOffsets[1] = {static_cast(blit.srcOffsets[0].x + c_src_rect.GetWidth()), + static_cast(blit.srcOffsets[0].y + c_src_rect.GetHeight()), 1}; + blit.dstOffsets[0] = {0, 0, 0}; + blit.dstOffsets[1] = {c_dst_rect.GetWidth(), c_dst_rect.GetHeight(), 1u}; + + vkCmdBlitImage(command_buffer_mgr->GetCurrentCommandBuffer(), c_src_image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, c_linear_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit, VK_FILTER_NEAREST); + + linear_image_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + linear_image_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + linear_image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + + vkCmdPipelineBarrier(command_buffer_mgr->GetCurrentCommandBuffer(), + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, + nullptr, 0, nullptr, 1, &linear_image_barrier); + }); } void VKStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, AbstractTexture* dst, @@ -919,32 +958,40 @@ void VKStagingTexture::CopyToTexture(con // Flush caches before copying. m_staging_buffer->FlushCPUCache(); - StateTracker::GetInstance()->EndRenderPass(); + + g_scheduler->Record([](CommandBufferManager* command_buffer_manager) { + command_buffer_manager->GetStateTracker()->EndRenderPass(); + }); VkImageLayout old_layout = dst_tex->GetLayout(); - dst_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + dst_tex->TransitionToLayout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - // Issue the image->buffer copy, but delay it for now. - VkBufferImageCopy image_copy = {}; - image_copy.bufferOffset = - static_cast(static_cast(src_rect.top) * m_config.GetStride() + - static_cast(src_rect.left) * m_texel_size); - image_copy.bufferRowLength = static_cast(m_config.width); - image_copy.bufferImageHeight = 0; - image_copy.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, dst_level, dst_layer, 1}; - image_copy.imageOffset = {dst_rect.left, dst_rect.top, 0}; - image_copy.imageExtent = {static_cast(dst_rect.GetWidth()), - static_cast(dst_rect.GetHeight()), 1u}; - vkCmdCopyBufferToImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), - m_staging_buffer->GetBuffer(), dst_tex->GetImage(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy); + g_scheduler->Record( + [c_src_rect = src_rect, c_dst_rect = dst_rect, c_dst_layer = dst_layer, + c_dst_level = dst_level, c_width = m_config.width, c_height = m_config.height, + c_stride = m_config.GetStride(), c_texel_size = m_texel_size, + c_staging_buffer = m_staging_buffer->GetBuffer(), + c_dst_image = dst_tex->GetImage()](CommandBufferManager* command_buffer_mgr) { + // Issue the image->buffer copy, but delay it for now. + VkBufferImageCopy image_copy = {}; + image_copy.bufferOffset = + static_cast(static_cast(c_src_rect.top) * c_stride + + static_cast(c_src_rect.left) * c_texel_size); + image_copy.bufferRowLength = static_cast(c_width); + image_copy.bufferImageHeight = 0; + image_copy.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, c_dst_level, c_dst_layer, 1}; + image_copy.imageOffset = {c_dst_rect.left, c_dst_rect.top, 0}; + image_copy.imageExtent = {static_cast(c_dst_rect.GetWidth()), + static_cast(c_dst_rect.GetHeight()), 1u}; + vkCmdCopyBufferToImage(command_buffer_mgr->GetCurrentCommandBuffer(), c_staging_buffer, + c_dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy); + }); // Restore old source texture layout. - dst_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_layout); + dst_tex->TransitionToLayout(old_layout); m_needs_flush = true; - m_flush_fence_counter = g_command_buffer_mgr->GetCurrentFenceCounter(); + m_flush_fence_counter = g_scheduler->GetCurrentFenceCounter(); } bool VKStagingTexture::Map() @@ -964,7 +1011,7 @@ void VKStagingTexture::Flush() return; // Is this copy in the current command buffer? - if (g_command_buffer_mgr->GetCurrentFenceCounter() == m_flush_fence_counter) + if (g_scheduler->GetCurrentFenceCounter() == m_flush_fence_counter) { // Execute the command buffer and wait for it to finish. Renderer::GetInstance()->ExecuteCommandBuffer(false, true); @@ -972,7 +1019,7 @@ void VKStagingTexture::Flush() else { // Wait for the GPU to finish with it. - g_command_buffer_mgr->WaitForFenceCounter(m_flush_fence_counter); + g_scheduler->WaitForFenceCounter(m_flush_fence_counter); } // For readback textures, invalidate the CPU cache as there is new data there. @@ -998,7 +1045,11 @@ VKFramebuffer::VKFramebuffer(VKTexture* VKFramebuffer::~VKFramebuffer() { - g_command_buffer_mgr->DeferFramebufferDestruction(m_fb); + g_scheduler->SyncWorker(); // TODO + + g_scheduler->Record([c_fb = m_fb](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->DeferFramebufferDestruction(c_fb); + }); } std::unique_ptr VKFramebuffer::Create(VKTexture* color_attachment, @@ -1067,15 +1118,13 @@ void VKFramebuffer::TransitionForRender( if (m_color_attachment) { static_cast(m_color_attachment) - ->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + ->TransitionToLayout(VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); } if (m_depth_attachment) { static_cast(m_depth_attachment) - ->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + ->TransitionToLayout(VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); } } } // namespace Vulkan diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/VKTexture.h dolphin/Source/Core/VideoBackends/Vulkan/VKTexture.h --- dolphin.orig/Source/Core/VideoBackends/Vulkan/VKTexture.h 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/VKTexture.h 2023-09-26 18:02:22.836042087 +0000 @@ -67,8 +67,8 @@ public: // irrelevant and will not be loaded. void OverrideImageLayout(VkImageLayout new_layout); - void TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout) const; - void TransitionToLayout(VkCommandBuffer command_buffer, ComputeImageLayout new_layout) const; + void TransitionToLayout(VkImageLayout new_layout, bool init_command_buffer = false) const; + void TransitionToLayout(ComputeImageLayout new_layout, bool init_command_buffer = false) const; private: bool CreateView(VkImageViewType type); diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/VKVertexManager.cpp dolphin/Source/Core/VideoBackends/Vulkan/VKVertexManager.cpp --- dolphin.orig/Source/Core/VideoBackends/Vulkan/VKVertexManager.cpp 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/VKVertexManager.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -15,6 +15,7 @@ #include "VideoBackends/Vulkan/CommandBufferManager.h" #include "VideoBackends/Vulkan/StateTracker.h" #include "VideoBackends/Vulkan/VKRenderer.h" +#include "VideoBackends/Vulkan/VKScheduler.h" #include "VideoBackends/Vulkan/VKStreamBuffer.h" #include "VideoBackends/Vulkan/VKVertexFormat.h" #include "VideoBackends/Vulkan/VulkanContext.h" @@ -67,11 +68,11 @@ bool VertexManager::Initialize() m_vertex_stream_buffer = StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, - VERTEX_STREAM_BUFFER_SIZE); + VERTEX_STREAM_BUFFER_SIZE * 2); m_index_stream_buffer = - StreamBuffer::Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, INDEX_STREAM_BUFFER_SIZE); + StreamBuffer::Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, INDEX_STREAM_BUFFER_SIZE * 2); m_uniform_stream_buffer = - StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, UNIFORM_STREAM_BUFFER_SIZE); + StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, UNIFORM_STREAM_BUFFER_SIZE * 2); if (!m_vertex_stream_buffer || !m_index_stream_buffer || !m_uniform_stream_buffer) { PanicAlertFmt("Failed to allocate streaming buffers"); @@ -121,13 +122,18 @@ bool VertexManager::Initialize() // Bind the buffers to all the known spots even if it's not used, to keep the driver happy. UploadAllConstants(); - StateTracker::GetInstance()->SetUtilityUniformBuffer(m_uniform_stream_buffer->GetBuffer(), 0, - sizeof(VertexShaderConstants)); - for (u32 i = 0; i < NUM_COMPUTE_TEXEL_BUFFERS; i++) - { - StateTracker::GetInstance()->SetTexelBuffer(i, - m_texel_buffer_views[TEXEL_BUFFER_FORMAT_R8_UINT]); - } + + g_scheduler->Record([c_buffer = m_uniform_stream_buffer->GetBuffer(), + c_texel_buffer_view = m_texel_buffer_views[TEXEL_BUFFER_FORMAT_R8_UINT]]( + CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->SetUtilityUniformBuffer(c_buffer, 0, + sizeof(VertexShaderConstants)); + + for (u32 i = 0; i < NUM_COMPUTE_TEXEL_BUFFERS; i++) + { + command_buffer_mgr->GetStateTracker()->SetTexelBuffer(i, c_texel_buffer_view); + } + }); return true; } @@ -189,10 +195,13 @@ void VertexManager::CommitBuffer(u32 num ADDSTAT(g_stats.this_frame.bytes_vertex_streamed, static_cast(vertex_data_size)); ADDSTAT(g_stats.this_frame.bytes_index_streamed, static_cast(index_data_size)); - StateTracker::GetInstance()->SetVertexBuffer(m_vertex_stream_buffer->GetBuffer(), 0, - VERTEX_STREAM_BUFFER_SIZE); - StateTracker::GetInstance()->SetIndexBuffer(m_index_stream_buffer->GetBuffer(), 0, - VK_INDEX_TYPE_UINT16); + g_scheduler->Record([c_vertex_buffer = m_vertex_stream_buffer->GetBuffer(), + c_index_buffer = m_index_stream_buffer->GetBuffer()]( + CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->SetVertexBuffer(c_vertex_buffer, 0, + VERTEX_STREAM_BUFFER_SIZE); + command_buffer_mgr->GetStateTracker()->SetIndexBuffer(c_index_buffer, 0, VK_INDEX_TYPE_UINT16); + }); } void VertexManager::UploadUniforms() @@ -210,9 +219,12 @@ void VertexManager::UpdateVertexShaderCo if (!vertex_shader_manager.dirty || !ReserveConstantStorage()) return; - StateTracker::GetInstance()->SetGXUniformBuffer( - UBO_DESCRIPTOR_SET_BINDING_VS, m_uniform_stream_buffer->GetBuffer(), - m_uniform_stream_buffer->GetCurrentOffset(), sizeof(VertexShaderConstants)); + g_scheduler->Record([c_buffer = m_uniform_stream_buffer->GetBuffer(), + c_offset = m_uniform_stream_buffer->GetCurrentOffset()]( + CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->SetGXUniformBuffer( + UBO_DESCRIPTOR_SET_BINDING_VS, c_buffer, c_offset, sizeof(VertexShaderConstants)); + }); std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &vertex_shader_manager.constants, sizeof(VertexShaderConstants)); m_uniform_stream_buffer->CommitMemory(sizeof(VertexShaderConstants)); @@ -228,9 +240,13 @@ void VertexManager::UpdateGeometryShader if (!geometry_shader_manager.dirty || !ReserveConstantStorage()) return; - StateTracker::GetInstance()->SetGXUniformBuffer( - UBO_DESCRIPTOR_SET_BINDING_GS, m_uniform_stream_buffer->GetBuffer(), - m_uniform_stream_buffer->GetCurrentOffset(), sizeof(GeometryShaderConstants)); + g_scheduler->Record([c_buffer = m_uniform_stream_buffer->GetBuffer(), + c_offset = m_uniform_stream_buffer->GetCurrentOffset()]( + CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->SetGXUniformBuffer( + UBO_DESCRIPTOR_SET_BINDING_GS, c_buffer, c_offset, sizeof(GeometryShaderConstants)); + }); + std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &geometry_shader_manager.constants, sizeof(GeometryShaderConstants)); m_uniform_stream_buffer->CommitMemory(sizeof(GeometryShaderConstants)); @@ -246,9 +262,13 @@ void VertexManager::UpdatePixelShaderCon if (!pixel_shader_manager.dirty || !ReserveConstantStorage()) return; - StateTracker::GetInstance()->SetGXUniformBuffer( - UBO_DESCRIPTOR_SET_BINDING_PS, m_uniform_stream_buffer->GetBuffer(), - m_uniform_stream_buffer->GetCurrentOffset(), sizeof(PixelShaderConstants)); + g_scheduler->Record([c_buffer = m_uniform_stream_buffer->GetBuffer(), + c_offset = m_uniform_stream_buffer->GetCurrentOffset()]( + CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->SetGXUniformBuffer( + UBO_DESCRIPTOR_SET_BINDING_PS, c_buffer, c_offset, sizeof(PixelShaderConstants)); + }); + std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &pixel_shader_manager.constants, sizeof(PixelShaderConstants)); m_uniform_stream_buffer->CommitMemory(sizeof(PixelShaderConstants)); @@ -299,18 +319,21 @@ void VertexManager::UploadAllConstants() auto& geometry_shader_manager = system.GetGeometryShaderManager(); // Update bindings - StateTracker::GetInstance()->SetGXUniformBuffer( - UBO_DESCRIPTOR_SET_BINDING_PS, m_uniform_stream_buffer->GetBuffer(), - m_uniform_stream_buffer->GetCurrentOffset() + pixel_constants_offset, - sizeof(PixelShaderConstants)); - StateTracker::GetInstance()->SetGXUniformBuffer( - UBO_DESCRIPTOR_SET_BINDING_VS, m_uniform_stream_buffer->GetBuffer(), - m_uniform_stream_buffer->GetCurrentOffset() + vertex_constants_offset, - sizeof(VertexShaderConstants)); - StateTracker::GetInstance()->SetGXUniformBuffer( - UBO_DESCRIPTOR_SET_BINDING_GS, m_uniform_stream_buffer->GetBuffer(), - m_uniform_stream_buffer->GetCurrentOffset() + geometry_constants_offset, - sizeof(GeometryShaderConstants)); + g_scheduler->Record( + [c_buffer = m_uniform_stream_buffer->GetBuffer(), + c_ps_offset = m_uniform_stream_buffer->GetCurrentOffset() + pixel_constants_offset, + c_vs_offset = m_uniform_stream_buffer->GetCurrentOffset() + vertex_constants_offset, + c_gs_offset = m_uniform_stream_buffer->GetCurrentOffset() + + geometry_constants_offset](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->SetGXUniformBuffer( + UBO_DESCRIPTOR_SET_BINDING_PS, c_buffer, c_ps_offset, sizeof(PixelShaderConstants)); + + command_buffer_mgr->GetStateTracker()->SetGXUniformBuffer( + UBO_DESCRIPTOR_SET_BINDING_VS, c_buffer, c_vs_offset, sizeof(VertexShaderConstants)); + + command_buffer_mgr->GetStateTracker()->SetGXUniformBuffer( + UBO_DESCRIPTOR_SET_BINDING_VS, c_buffer, c_gs_offset, sizeof(GeometryShaderConstants)); + }); // Copy the actual data in std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer() + pixel_constants_offset, @@ -340,8 +363,11 @@ void VertexManager::UploadUtilityUniform Renderer::GetInstance()->ExecuteCommandBuffer(false); } - StateTracker::GetInstance()->SetUtilityUniformBuffer( - m_uniform_stream_buffer->GetBuffer(), m_uniform_stream_buffer->GetCurrentOffset(), data_size); + g_scheduler->Record([c_buffer = m_uniform_stream_buffer->GetBuffer(), + c_offset = m_uniform_stream_buffer->GetCurrentOffset(), + c_size = data_size](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->SetUtilityUniformBuffer(c_buffer, c_offset, c_size); + }); std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), data, data_size); m_uniform_stream_buffer->CommitMemory(data_size); ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, data_size); @@ -370,7 +396,10 @@ bool VertexManager::UploadTexelBuffer(co *out_offset = static_cast(m_texel_stream_buffer->GetCurrentOffset()) / elem_size; m_texel_stream_buffer->CommitMemory(data_size); ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, data_size); - StateTracker::GetInstance()->SetTexelBuffer(0, m_texel_buffer_views[format]); + g_scheduler->Record([c_texel_buffer_view = + m_texel_buffer_views[format]](CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->SetTexelBuffer(0, c_texel_buffer_view); + }); return true; } @@ -407,8 +436,13 @@ bool VertexManager::UploadTexelBuffer(co m_texel_stream_buffer->CommitMemory(palette_byte_offset + palette_size); ADDSTAT(g_stats.this_frame.bytes_uniform_streamed, palette_byte_offset + palette_size); - StateTracker::GetInstance()->SetTexelBuffer(0, m_texel_buffer_views[format]); - StateTracker::GetInstance()->SetTexelBuffer(1, m_texel_buffer_views[palette_format]); + + g_scheduler->Record([c_texel_buffer_view = m_texel_buffer_views[format], + c_palette_texel_buffer_view = m_texel_buffer_views[palette_format]]( + CommandBufferManager* command_buffer_mgr) { + command_buffer_mgr->GetStateTracker()->SetTexelBuffer(0, c_texel_buffer_view); + command_buffer_mgr->GetStateTracker()->SetTexelBuffer(1, c_palette_texel_buffer_view); + }); return true; } } // namespace Vulkan diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp dolphin/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp --- dolphin.orig/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -274,6 +274,13 @@ bool VulkanContext::SelectInstanceExtens return false; } #endif +#if defined(VK_USE_PLATFORM_WAYLAND_KHR) + if (wstype == WindowSystemType::Wayland && + !AddExtension(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, true)) + { + return false; + } +#endif #if defined(VK_USE_PLATFORM_ANDROID_KHR) if (wstype == WindowSystemType::Android && !AddExtension(VK_KHR_ANDROID_SURFACE_EXTENSION_NAME, true)) diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/VulkanEntryPoints.inl dolphin/Source/Core/VideoBackends/Vulkan/VulkanEntryPoints.inl --- dolphin.orig/Source/Core/VideoBackends/Vulkan/VulkanEntryPoints.inl 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/VulkanEntryPoints.inl 2023-09-26 18:02:22.836042087 +0000 @@ -49,6 +49,11 @@ VULKAN_INSTANCE_ENTRY_POINT(vkCreateXlib VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceXlibPresentationSupportKHR, false) #endif +#if defined(VK_USE_PLATFORM_WAYLAND_KHR) +VULKAN_INSTANCE_ENTRY_POINT(vkCreateWaylandSurfaceKHR, false) +VULKAN_INSTANCE_ENTRY_POINT(vkGetPhysicalDeviceWaylandPresentationSupportKHR, false) +#endif + #if defined(VK_USE_PLATFORM_ANDROID_KHR) VULKAN_INSTANCE_ENTRY_POINT(vkCreateAndroidSurfaceKHR, false) #endif diff -rupN dolphin.orig/Source/Core/VideoBackends/Vulkan/VulkanLoader.h dolphin/Source/Core/VideoBackends/Vulkan/VulkanLoader.h --- dolphin.orig/Source/Core/VideoBackends/Vulkan/VulkanLoader.h 2023-09-26 17:58:02.853996721 +0000 +++ dolphin/Source/Core/VideoBackends/Vulkan/VulkanLoader.h 2023-09-26 18:02:22.836042087 +0000 @@ -13,6 +13,10 @@ #define VK_USE_PLATFORM_XLIB_KHR #endif +#if defined(HAVE_WAYLAND) +#define VK_USE_PLATFORM_WAYLAND_KHR +#endif + #if defined(ANDROID) #define VK_USE_PLATFORM_ANDROID_KHR #endif diff -rupN dolphin.orig/Source/Core/VideoCommon/FramebufferManager.cpp dolphin/Source/Core/VideoCommon/FramebufferManager.cpp --- dolphin.orig/Source/Core/VideoCommon/FramebufferManager.cpp 2023-09-26 17:58:02.857996813 +0000 +++ dolphin/Source/Core/VideoCommon/FramebufferManager.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -630,7 +630,7 @@ void FramebufferManager::DestroyReadback bool FramebufferManager::CreateReadbackFramebuffer() { - if (g_renderer->GetEFBScale() != 1) + if (g_renderer->IsUnscaled()) { const TextureConfig color_config(IsUsingTiledEFBCache() ? m_efb_cache_tile_size : EFB_WIDTH, IsUsingTiledEFBCache() ? m_efb_cache_tile_size : EFB_HEIGHT, 1, @@ -651,7 +651,7 @@ bool FramebufferManager::CreateReadbackF (IsUsingTiledEFBCache() && !g_ActiveConfig.backend_info.bSupportsPartialDepthCopies) || !AbstractTexture::IsCompatibleDepthAndColorFormats(m_efb_depth_texture->GetFormat(), GetEFBDepthCopyFormat()) || - g_renderer->GetEFBScale() != 1) + g_renderer->IsUnscaled()) { const TextureConfig depth_config(IsUsingTiledEFBCache() ? m_efb_cache_tile_size : EFB_WIDTH, IsUsingTiledEFBCache() ? m_efb_cache_tile_size : EFB_HEIGHT, 1, @@ -731,7 +731,7 @@ void FramebufferManager::PopulateEFBCach const MathUtil::Rectangle native_rect = g_renderer->ConvertEFBRectangle(rect); AbstractTexture* src_texture = depth ? ResolveEFBDepthTexture(native_rect) : ResolveEFBColorTexture(native_rect); - if (g_renderer->GetEFBScale() != 1 || force_intermediate_copy) + if (g_renderer->IsUnscaled() || force_intermediate_copy) { // Downsample from internal resolution to 1x. // TODO: This won't produce correct results at IRs above 2x. More samples are required. @@ -918,7 +918,7 @@ void FramebufferManager::CreatePokeVerti // GPU will expand the point to a quad. const float cs_x = (static_cast(x) + 0.5f) * cs_pixel_width - 1.0f; const float cs_y = 1.0f - (static_cast(y) + 0.5f) * cs_pixel_height; - const float point_size = static_cast(g_renderer->GetEFBScale()); + const float point_size = g_renderer->GetEFBScalef(); destination_list->push_back({{cs_x, cs_y, z, point_size}, color}); return; } diff -rupN dolphin.orig/Source/Core/VideoCommon/RenderBase.cpp dolphin/Source/Core/VideoCommon/RenderBase.cpp --- dolphin.orig/Source/Core/VideoCommon/RenderBase.cpp 2023-09-26 17:58:02.857996813 +0000 +++ dolphin/Source/Core/VideoCommon/RenderBase.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -364,19 +364,24 @@ void Renderer::RenderToXFB(u32 xfbAddr, return; } -unsigned int Renderer::GetEFBScale() const +bool Renderer::IsUnscaled() const { - return m_efb_scale; + return m_efb_scale == 2; +} + +float Renderer::GetEFBScalef() const +{ + return m_efb_scale / 2.0f; } int Renderer::EFBToScaledX(int x) const { - return x * static_cast(m_efb_scale); + return x * static_cast(m_efb_scale) / 2; } int Renderer::EFBToScaledY(int y) const { - return y * static_cast(m_efb_scale); + return y * static_cast(m_efb_scale) / 2; } float Renderer::EFBToScaledXf(float x) const @@ -391,7 +396,7 @@ float Renderer::EFBToScaledYf(float y) c std::tuple Renderer::CalculateTargetScale(int x, int y) const { - return std::make_tuple(x * static_cast(m_efb_scale), y * static_cast(m_efb_scale)); + return std::make_tuple(x * static_cast(m_efb_scale) / 2, y * static_cast(m_efb_scale) / 2); } // return true if target size changed @@ -711,16 +716,20 @@ bool Renderer::IsHeadless() const return true; } -void Renderer::ChangeSurface(void* new_surface_handle) +void Renderer::ChangeSurface(void* new_surface_handle, int new_width, int new_height) { std::lock_guard lock(m_swap_mutex); m_new_surface_handle = new_surface_handle; + m_new_surface_width = new_width; + m_new_surface_height = new_height; m_surface_changed.Set(); } -void Renderer::ResizeSurface() +void Renderer::ResizeSurface(int new_width, int new_height) { std::lock_guard lock(m_swap_mutex); + m_new_surface_width = new_width; + m_new_surface_height = new_height; m_surface_resized.Set(); } diff -rupN dolphin.orig/Source/Core/VideoCommon/RenderBase.h dolphin/Source/Core/VideoCommon/RenderBase.h --- dolphin.orig/Source/Core/VideoCommon/RenderBase.h 2023-09-26 17:58:02.857996813 +0000 +++ dolphin/Source/Core/VideoCommon/RenderBase.h 2023-09-26 18:02:22.836042087 +0000 @@ -193,7 +193,8 @@ public: std::tuple, MathUtil::Rectangle> ConvertStereoRectangle(const MathUtil::Rectangle& rc) const; - unsigned int GetEFBScale() const; + bool IsUnscaled() const; + float GetEFBScalef() const; // Use this to upscale native EFB coordinates to IDEAL internal resolution int EFBToScaledX(int x) const; @@ -246,8 +247,8 @@ public: VideoCommon::PostProcessing* GetPostProcessor() const { return m_post_processor.get(); } // Final surface changing // This is called when the surface is resized (WX) or the window changes (Android). - void ChangeSurface(void* new_surface_handle); - void ResizeSurface(); + void ChangeSurface(void* new_surface_handle, int new_width, int new_height); + void ResizeSurface(int new_width, int new_height); bool UseVertexDepthRange() const; void DoState(PointerWrap& p); @@ -342,6 +343,8 @@ protected: std::unique_ptr m_post_processor; void* m_new_surface_handle = nullptr; + int m_new_surface_width = 0; + int m_new_surface_height = 0; Common::Flag m_surface_changed; Common::Flag m_surface_resized; std::mutex m_swap_mutex; @@ -357,7 +360,8 @@ private: std::tuple CalculateOutputDimensions(int width, int height) const; PixelFormat m_prev_efb_format = PixelFormat::INVALID_FMT; - unsigned int m_efb_scale = 1; + // Scale in steps of 0.5x. Value of 2 is scale of 1 (unscaled). + unsigned int m_efb_scale = 2; // These will be set on the first call to SetWindowSize. int m_last_window_request_width = 0; diff -rupN dolphin.orig/Source/Core/VideoCommon/TextureCacheBase.cpp dolphin/Source/Core/VideoCommon/TextureCacheBase.cpp --- dolphin.orig/Source/Core/VideoCommon/TextureCacheBase.cpp 2023-09-26 17:58:02.857996813 +0000 +++ dolphin/Source/Core/VideoCommon/TextureCacheBase.cpp 2023-09-26 18:02:22.836042087 +0000 @@ -1046,7 +1046,7 @@ static void SetSamplerState(u32 index, f // that have arbitrary contents, eg. are used for fog effects where the // distance they kick in at is important to preserve at any resolution. // Correct this with the upscaling factor of custom textures. - s32 lod_offset = std::log2(g_renderer->GetEFBScale() / custom_tex_scale) * 256.f; + s64 lod_offset = std::log2(g_renderer->GetEFBScalef() / custom_tex_scale) * (s64) 256.f; state.tm0.lod_bias = std::clamp(state.tm0.lod_bias + lod_offset, -32768, 32767); // Anisotropic also pushes mips farther away so it cannot be used either @@ -2221,7 +2221,7 @@ void TextureCacheBase::CopyRenderTargetT // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more // complex down filtering to average all pixels and produce the correct result. const bool linear_filter = - !is_depth_copy && (scaleByHalf || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f); + !is_depth_copy && (scaleByHalf || g_renderer->IsUnscaled() || y_scale > 1.0f); TCacheEntry* entry = nullptr; if (copy_to_vram)