Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Async specialized shaders #574

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
11 changes: 6 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ include_directories(third_party/stb)
include_directories(third_party/opengl)
include_directories(third_party/miniaudio)
include_directories(third_party/mio/single_include)
include_directories(third_party/lockfree)

add_compile_definitions(NOMINMAX) # Make windows.h not define min/max macros because third-party deps don't like it
add_compile_definitions(WIN32_LEAN_AND_MEAN) # Make windows.h not include literally everything
Expand Down Expand Up @@ -301,7 +302,7 @@ if(ENABLE_QT_GUI)
set(THIRD_PARTY_SOURCE_FILES ${THIRD_PARTY_SOURCE_FILES} third_party/duckstation/gl/context_wgl.cpp)
else()
set(THIRD_PARTY_SOURCE_FILES ${THIRD_PARTY_SOURCE_FILES} third_party/duckstation/gl/context_egl.cpp third_party/duckstation/gl/context_egl_wayland.cpp
third_party/duckstation/gl/context_egl_x11.cpp third_party/duckstation/gl/context_glx.cpp third_party/duckstation/gl/x11_window.cpp)
third_party/duckstation/gl/context_egl_x11.cpp third_party/duckstation/gl/x11_window.cpp)
endif()
endif()

Expand All @@ -325,14 +326,14 @@ if(ENABLE_OPENGL)
set(RENDERER_GL_INCLUDE_FILES third_party/opengl/opengl.hpp
include/renderer_gl/renderer_gl.hpp include/renderer_gl/textures.hpp
include/renderer_gl/surfaces.hpp include/renderer_gl/surface_cache.hpp
include/renderer_gl/gl_state.hpp
include/renderer_gl/gl_state.hpp include/renderer_gl/async_compiler.hpp
)

set(RENDERER_GL_SOURCE_FILES src/core/renderer_gl/renderer_gl.cpp
src/core/renderer_gl/textures.cpp src/core/renderer_gl/etc1.cpp
src/core/renderer_gl/gl_state.cpp src/host_shaders/opengl_display.frag
src/host_shaders/opengl_display.vert src/host_shaders/opengl_vertex_shader.vert
src/host_shaders/opengl_fragment_shader.frag
src/core/renderer_gl/gl_state.cpp src/core/renderer_gl/async_compiler.cpp
src/host_shaders/opengl_display.frag src/host_shaders/opengl_display.vert
src/host_shaders/opengl_vertex_shader.vert src/host_shaders/opengl_fragment_shader.frag
)

set(HEADER_FILES ${HEADER_FILES} ${RENDERER_GL_INCLUDE_FILES})
Expand Down
23 changes: 22 additions & 1 deletion include/PICA/pica_frag_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ namespace PICA {
bumpSelector = Helpers::getBits<22, 2>(config0);
clampHighlights = Helpers::getBit<27>(config0);
bumpMode = Helpers::getBits<28, 2>(config0);
bumpRenorm = Helpers::getBit<30>(config0) ^ 1; // 0 = enable so flip it with xor
bumpRenorm = Helpers::getBit<30>(config0) ^ 1; // 0 = enable so flip it with xor

for (int i = 0; i < totalLightCount; i++) {
auto& light = lights[i];
Expand Down Expand Up @@ -206,6 +206,27 @@ namespace PICA {
return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0;
}

FragmentConfig& operator=(const FragmentConfig& config) {
// BitField copy constructor is deleted for reasons, so we have to do this manually
outConfig.raw = config.outConfig.raw;
texConfig = config.texConfig;
fogConfig.raw = config.fogConfig.raw;
lighting.raw = config.lighting.raw;
for (int i = 0; i < 7; i++) {
lighting.luts[i].raw = config.lighting.luts[i].raw;
}
for (int i = 0; i < 8; i++) {
lighting.lights[i].raw = config.lighting.lights[i].raw;
}

// If this fails you probably added a new field to the struct and forgot to update the copy constructor
static_assert(
sizeof(FragmentConfig) == sizeof(outConfig.raw) + sizeof(texConfig) + sizeof(fogConfig.raw) + sizeof(lighting.raw) +
7 * sizeof(LightingLUTConfig) + 8 * sizeof(Light)
);
return *this;
}

FragmentConfig(const std::array<u32, 0x300>& regs) : lighting(regs) {
auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig];
auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig);
Expand Down
10 changes: 5 additions & 5 deletions include/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@ struct EmulatorConfig {
static constexpr bool shaderJitDefault = false;
#endif

// For now, use specialized shaders by default on MacOS as M1 drivers are buggy when using the ubershader, and on Android since mobile GPUs are
// horrible. On other platforms we default to ubershader + shadergen fallback for lights
// For now, use specialized shaders by default on MacOS as M1 drivers are buggy when using the ubershader, and on Android since mobile GPUs are
// horrible. On other platforms we default to ubershader + shadergen fallback for lights
#if defined(__ANDROID__) || defined(__APPLE__)
static constexpr bool ubershaderDefault = false;
static constexpr ShaderMode defaultShaderMode = ShaderMode::Specialized;
#else
static constexpr bool ubershaderDefault = true;
static constexpr ShaderMode defaultShaderMode = ShaderMode::Ubershader;
#endif

bool shaderJitEnabled = shaderJitDefault;
bool discordRpcEnabled = false;
bool useUbershaders = ubershaderDefault;
ShaderMode shaderMode = defaultShaderMode;
bool accurateShaderMul = false;

// Toggles whether to force shadergen when there's more than N lights active and we're using the ubershader, for better performance
Expand Down
4 changes: 2 additions & 2 deletions include/emulator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class Emulator {
static constexpr u32 width = 400;
static constexpr u32 height = 240 * 2; // * 2 because 2 screens
ROMType romType = ROMType::None;
bool running = false; // Is the emulator running a game?
bool running = false; // Is the emulator running a game?

private:
#ifdef PANDA3DS_ENABLE_HTTP_SERVER
Expand Down Expand Up @@ -109,7 +109,7 @@ class Emulator {

#ifdef PANDA3DS_FRONTEND_QT
// For passing the GL context from Qt to the renderer
void initGraphicsContext(GL::Context* glContext) { gpu.initGraphicsContext(nullptr); }
void initGraphicsContext(GL::Context* glContext) { gpu.initGraphicsContext(glContext); }
#else
void initGraphicsContext(SDL_Window* window) { gpu.initGraphicsContext(window); }
#endif
Expand Down
12 changes: 10 additions & 2 deletions include/renderer.hpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#pragma once
#include <array>
#include <optional>
#include <span>
#include <string>
#include <optional>

#include "PICA/pica_vertex.hpp"
#include "PICA/regs.hpp"
Expand All @@ -20,6 +20,12 @@ enum class RendererType : s8 {
Software = 3,
};

enum class ShaderMode {
Specialized,
Ubershader,
Hybrid,
};

struct EmulatorConfig;
class GPU;
struct SDL_Window;
Expand Down Expand Up @@ -56,6 +62,8 @@ class Renderer {
static constexpr u32 vertexBufferSize = 0x10000;
static std::optional<RendererType> typeFromString(std::string inString);
static const char* typeToString(RendererType rendererType);
static std::optional<ShaderMode> shaderModeFromString(std::string inString);
static const char* shaderModeToString(ShaderMode shaderMode);

virtual void reset() = 0;
virtual void display() = 0; // Display the 3DS screen contents to the window
Expand All @@ -77,7 +85,7 @@ class Renderer {
virtual std::string getUbershader() { return ""; }
virtual void setUbershader(const std::string& shader) {}

virtual void setUbershaderSetting(bool value) {}
virtual void setShaderMode(ShaderMode shaderMode) {}

// Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window
#ifdef PANDA3DS_FRONTEND_QT
Expand Down
50 changes: 50 additions & 0 deletions include/renderer_gl/async_compiler.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#pragma once

#include <atomic>
#include <thread>

#include "PICA/pica_frag_config.hpp"
#include "lockfree/spsc/queue.hpp"
#include "opengl.hpp"
#include "renderer_gl/renderer_gl.hpp"

namespace PICA::ShaderGen {
class FragmentGenerator;
}

namespace AsyncCompiler {
void* createContext(void* userdata);
void makeCurrent(void* userdata, void* context);
void destroyContext(void* context);
} // namespace AsyncCompiler

struct CompilingProgram {
CachedProgram* program;
PICA::FragmentConfig* config;
};

struct AsyncCompilerThread {
explicit AsyncCompilerThread(PICA::ShaderGen::FragmentGenerator& fragShaderGen, void* userdata);
~AsyncCompilerThread();

// Called from the emulator thread to queue a fragment configuration for compilation
// Returns false if the queue is full, true otherwise
void PushFragmentConfig(const PICA::FragmentConfig& config, CachedProgram* cachedProgram);

// Wait for all queued fragment configurations to be compiled
void Finish();

private:
PICA::ShaderGen::FragmentGenerator& fragShaderGen;
OpenGL::Shader defaultShadergenVs;

// Our lockfree queue only allows for trivial types, so we preallocate enough structs
// to avoid dynamic allocation on each push
int preallocatedProgramsIndex;
static constexpr int preallocatedProgramsSize = 256;
std::array<CompilingProgram*, preallocatedProgramsSize> preallocatedPrograms;
lockfree::spsc::Queue<CompilingProgram*, preallocatedProgramsSize - 1> programQueue;
std::atomic_bool running;
std::atomic_bool hasWork;
std::thread thread;
};
28 changes: 18 additions & 10 deletions include/renderer_gl/renderer_gl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "PICA/pica_vertex.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader_gen.hpp"
#include "config.hpp"
#include "gl_state.hpp"
#include "helpers.hpp"
#include "logger.hpp"
Expand All @@ -22,6 +23,15 @@
// More circular dependencies!
class GPU;

// Cached recompiled fragment shader
struct CachedProgram {
OpenGL::Program program;
std::atomic_bool compiling = false;
bool needsInitialization = true;
};

struct AsyncCompilerThread;

class RendererGL final : public Renderer {
GLStateManager gl = {};

Expand All @@ -30,9 +40,9 @@ class RendererGL final : public Renderer {

OpenGL::VertexArray vao;
OpenGL::VertexBuffer vbo;
bool enableUbershader = true;
ShaderMode shaderMode = EmulatorConfig::defaultShaderMode;

// Data
// Data
struct {
// TEV configuration uniform locations
GLint textureEnvSourceLoc = -1;
Expand Down Expand Up @@ -71,12 +81,10 @@ class RendererGL final : public Renderer {
OpenGL::Shader defaultShadergenVs;
GLuint shadergenFragmentUBO;

// Cached recompiled fragment shader
struct CachedProgram {
OpenGL::Program program;
};
std::unordered_map<PICA::FragmentConfig, CachedProgram> shaderCache;

AsyncCompilerThread* asyncCompiler = nullptr;

OpenGL::Framebuffer getColourFBO();
OpenGL::Texture getTexture(Texture& tex);
OpenGL::Program& getSpecializedShader();
Expand Down Expand Up @@ -104,15 +112,15 @@ class RendererGL final : public Renderer {
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; // Clear a GPU buffer in VRAM
void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; // Perform display transfer
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override; // Draw the given vertices
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override; // Draw the given vertices
void deinitGraphicsContext() override;

virtual bool supportsShaderReload() override { return true; }
virtual std::string getUbershader() override;
virtual void setUbershader(const std::string& shader) override;

virtual void setUbershaderSetting(bool value) override { enableUbershader = value; }
virtual void setShaderMode(ShaderMode mode) override { shaderMode = mode; }

std::optional<ColourBuffer> getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true);

// Note: The caller is responsible for deleting the currently bound FBO before calling this
Expand All @@ -122,7 +130,7 @@ class RendererGL final : public Renderer {
void initUbershader(OpenGL::Program& program);

#ifdef PANDA3DS_FRONTEND_QT
virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override { initGraphicsContextInternal(); }
void initGraphicsContext(GL::Context* context) override;
#endif

// Take a screenshot of the screen and store it in a file
Expand Down
15 changes: 12 additions & 3 deletions src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,18 @@ void EmulatorConfig::load() {
rendererType = RendererType::OpenGL;
}

auto shaderModeName = toml::find_or<std::string>(gpu, "ShaderMode", Renderer::shaderModeToString(defaultShaderMode));
auto configShaderMode = Renderer::shaderModeFromString(shaderModeName);

if (configShaderMode.has_value()) {
shaderMode = configShaderMode.value();
} else {
Helpers::warn("Invalid shader mode specified: %s\n", shaderModeName.c_str());
shaderMode = defaultShaderMode;
}

shaderJitEnabled = toml::find_or<toml::boolean>(gpu, "EnableShaderJIT", shaderJitDefault);
vsyncEnabled = toml::find_or<toml::boolean>(gpu, "EnableVSync", true);
useUbershaders = toml::find_or<toml::boolean>(gpu, "UseUbershaders", ubershaderDefault);
accurateShaderMul = toml::find_or<toml::boolean>(gpu, "AccurateShaderMultiplication", false);

forceShadergenForLights = toml::find_or<toml::boolean>(gpu, "ForceShadergenForLighting", true);
Expand Down Expand Up @@ -127,12 +136,12 @@ void EmulatorConfig::save() {
data["General"]["EnableDiscordRPC"] = discordRpcEnabled;
data["General"]["UsePortableBuild"] = usePortableBuild;
data["General"]["DefaultRomPath"] = defaultRomPath.string();

data["GPU"]["EnableShaderJIT"] = shaderJitEnabled;
data["GPU"]["Renderer"] = std::string(Renderer::typeToString(rendererType));
data["GPU"]["EnableVSync"] = vsyncEnabled;
data["GPU"]["AccurateShaderMultiplication"] = accurateShaderMul;
data["GPU"]["UseUbershaders"] = useUbershaders;
data["GPU"]["ShaderMode"] = std::string(Renderer::shaderModeToString(shaderMode));
data["GPU"]["ForceShadergenForLighting"] = forceShadergenForLights;
data["GPU"]["ShadergenLightThreshold"] = lightShadergenThreshold;

Expand Down
4 changes: 2 additions & 2 deletions src/core/PICA/gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ void GPU::reset() {
externalRegs[Framebuffer1Config] = static_cast<u32>(PICA::ColorFmt::RGB8);
externalRegs[Framebuffer1Select] = 0;

renderer->setUbershaderSetting(config.useUbershaders);
renderer->setShaderMode(config.shaderMode);
renderer->reset();
}

Expand Down Expand Up @@ -365,7 +365,7 @@ PICA::Vertex GPU::getImmediateModeVertex() {

// Run VS and return vertex data. TODO: Don't hardcode offsets for each attribute
shaderUnit.vs.run();

// Map shader outputs to fixed function properties
const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7;
for (int i = 0; i < totalShaderOutputs; i++) {
Expand Down
Loading
Loading