Skip to content

Commit

Permalink
Add Betsy to speed up BC6 compression
Browse files Browse the repository at this point in the history
  • Loading branch information
BlueCube3310 committed Aug 15, 2024
1 parent c2a4942 commit a08d1e5
Show file tree
Hide file tree
Showing 17 changed files with 1,348 additions and 2 deletions.
21 changes: 21 additions & 0 deletions core/io/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

#include "image.h"

#include "core/config/project_settings.h"
#include "core/error/error_list.h"
#include "core/error/error_macros.h"
#include "core/io/image_loader.h"
Expand Down Expand Up @@ -2630,6 +2631,25 @@ Error Image::compress(CompressMode p_mode, CompressSource p_source, ASTCFormat p
Error Image::compress_from_channels(CompressMode p_mode, UsedChannels p_channels, ASTCFormat p_astc_format) {
ERR_FAIL_COND_V(data.is_empty(), ERR_INVALID_DATA);

// RenderingDevice only.
if (GLOBAL_GET("rendering/textures/vram_compression/compress_with_gpu")) {
switch (p_mode) {
case COMPRESS_BPTC: {
// BC7 is unsupported currently.
if (format >= FORMAT_RF && format <= FORMAT_RGBE9995) {
ERR_FAIL_NULL_V(_image_compress_bptc_rd_func, ERR_UNAVAILABLE);
_image_compress_bptc_rd_func(this, p_channels);

return OK;
}

} break;

default: {
}
}
}

switch (p_mode) {
case COMPRESS_S3TC: {
ERR_FAIL_NULL_V(_image_compress_bc_func, ERR_UNAVAILABLE);
Expand Down Expand Up @@ -3011,6 +3031,7 @@ void (*Image::_image_compress_bptc_func)(Image *, Image::UsedChannels) = nullptr
void (*Image::_image_compress_etc1_func)(Image *) = nullptr;
void (*Image::_image_compress_etc2_func)(Image *, Image::UsedChannels) = nullptr;
void (*Image::_image_compress_astc_func)(Image *, Image::ASTCFormat) = nullptr;
void (*Image::_image_compress_bptc_rd_func)(Image *, Image::UsedChannels) = nullptr;
void (*Image::_image_decompress_bc)(Image *) = nullptr;
void (*Image::_image_decompress_bptc)(Image *) = nullptr;
void (*Image::_image_decompress_etc1)(Image *) = nullptr;
Expand Down
2 changes: 2 additions & 0 deletions core/io/image.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ class Image : public Resource {
static void (*_image_compress_etc2_func)(Image *, UsedChannels p_channels);
static void (*_image_compress_astc_func)(Image *, ASTCFormat p_format);

static void (*_image_compress_bptc_rd_func)(Image *, UsedChannels p_channels);

static void (*_image_decompress_bc)(Image *);
static void (*_image_decompress_bptc)(Image *);
static void (*_image_decompress_etc1)(Image *);
Expand Down
5 changes: 5 additions & 0 deletions doc/classes/ProjectSettings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2870,6 +2870,11 @@
<member name="rendering/textures/lossless_compression/force_png" type="bool" setter="" getter="" default="false">
If [code]true[/code], the texture importer will import lossless textures using the PNG format. Otherwise, it will default to using WebP.
</member>
<member name="rendering/textures/vram_compression/compress_with_gpu" type="bool" setter="" getter="" default="true">
If [code]true[/code], the texture importer will utilize the GPU for compressing textures, which makes large textures import significantly faster.
[b]Note:[/b] This setting requires either Vulkan or D3D12 available as a rendering backend.
[b]Note:[/b] Currently this only affects BC6H compression, which is used on Desktop and Console for HDR images.
</member>
<member name="rendering/textures/vram_compression/import_etc2_astc" type="bool" setter="" getter="" default="false">
If [code]true[/code], the texture importer will import VRAM-compressed textures using the Ericsson Texture Compression 2 algorithm for lower quality textures and normal maps and Adaptable Scalable Texture Compression algorithm for high quality textures (in 4×4 block size).
[b]Note:[/b] This setting is an override. The texture importer will always import the format the host platform needs, even if this is set to [code]false[/code].
Expand Down
2 changes: 0 additions & 2 deletions editor/import/resource_importer_layered_texture.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,6 @@ Error ResourceImporterLayeredTexture::import(const String &p_source_file, const
}

if (compress_mode == COMPRESS_VRAM_COMPRESSED) {
mipmaps = true;

//if using video ram, optimize
if (channel_pack == 0) {
//remove alpha if not needed, so compression is more efficient
Expand Down
76 changes: 76 additions & 0 deletions modules/betsy/CrossPlatformSettings_piece_all.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@

#define min3(a, b, c) min(a, min(b, c))
#define max3(a, b, c) max(a, max(b, c))

#define float2 vec2
#define float3 vec3
#define float4 vec4

#define int2 ivec2
#define int3 ivec3
#define int4 ivec4

#define uint2 uvec2
#define uint3 uvec3
#define uint4 uvec4

#define float2x2 mat2
#define float3x3 mat3
#define float4x4 mat4
#define ogre_float4x3 mat3x4

#define ushort uint
#define ushort3 uint3
#define ushort4 uint4

//Short used for read operations. It's an int in GLSL & HLSL. An ushort in Metal
#define rshort int
#define rshort2 int2
#define rint int
//Short used for write operations. It's an int in GLSL. An ushort in HLSL & Metal
#define wshort2 int2
#define wshort3 int3

#define toFloat3x3(x) mat3(x)
#define buildFloat3x3(row0, row1, row2) mat3(row0, row1, row2)

#define mul(x, y) ((x) * (y))
#define saturate(x) clamp((x), 0.0, 1.0)
#define lerp mix
#define rsqrt inversesqrt
#define INLINE
#define NO_INTERPOLATION_PREFIX flat
#define NO_INTERPOLATION_SUFFIX

#define PARAMS_ARG_DECL
#define PARAMS_ARG

#define reversebits bitfieldReverse

#define OGRE_Sample(tex, sampler, uv) texture(tex, uv)
#define OGRE_SampleLevel(tex, sampler, uv, lod) textureLod(tex, uv, lod)
#define OGRE_SampleArray2D(tex, sampler, uv, arrayIdx) texture(tex, vec3(uv, arrayIdx))
#define OGRE_SampleArray2DLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec3(uv, arrayIdx), lod)
#define OGRE_SampleArrayCubeLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec4(uv, arrayIdx), lod)
#define OGRE_SampleGrad(tex, sampler, uv, ddx, ddy) textureGrad(tex, uv, ddx, ddy)
#define OGRE_SampleArray2DGrad(tex, sampler, uv, arrayIdx, ddx, ddy) textureGrad(tex, vec3(uv, arrayIdx), ddx, ddy)
#define OGRE_ddx(val) dFdx(val)
#define OGRE_ddy(val) dFdy(val)
#define OGRE_Load2D(tex, iuv, lod) texelFetch(tex, iuv, lod)
#define OGRE_LoadArray2D(tex, iuv, arrayIdx, lod) texelFetch(tex, ivec3(iuv, arrayIdx), lod)
#define OGRE_Load2DMS(tex, iuv, subsample) texelFetch(tex, iuv, subsample)

#define OGRE_Load3D(tex, iuv, lod) texelFetch(tex, ivec3(iuv), lod)

#define OGRE_GatherRed(tex, sampler, uv) textureGather(tex, uv, 0)
#define OGRE_GatherGreen(tex, sampler, uv) textureGather(tex, uv, 1)
#define OGRE_GatherBlue(tex, sampler, uv) textureGather(tex, uv, 2)

#define bufferFetch1(buffer, idx) texelFetch(buffer, idx).x

#define OGRE_SAMPLER_ARG_DECL(samplerName)
#define OGRE_SAMPLER_ARG(samplerName)

#define OGRE_Texture3D_float4 sampler3D
#define OGRE_OUT_REF(declType, variableName) out declType variableName
#define OGRE_INOUT_REF(declType, variableName) inout declType variableName
24 changes: 24 additions & 0 deletions modules/betsy/SCsub
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# !/ usr / bin / env python
Import("env")
Import("env_modules")

env_betsy = env_modules.Clone()
env_betsy.GLSL_HEADER("bc6h.glsl")
env_betsy.Depends(Glob("*.glsl.gen.h"), ["#glsl_builders.py"])

# Thirdparty source files
thirdparty_obj = []
thirdparty_dir = "#thirdparty/betsy/"
env_betsy.Prepend(CPPPATH=[thirdparty_dir])

env_thirdparty = env_betsy.Clone()
env_thirdparty.disable_warnings()
env.modules_sources += thirdparty_obj

# Godot source files
module_obj = []
env_betsy.add_source_files(module_obj, "*.cpp")
env.modules_sources += module_obj

# Needed to force rebuilding the module files when the thirdparty library is updated.
env.Depends(module_obj, thirdparty_obj)
17 changes: 17 additions & 0 deletions modules/betsy/UavCrossPlatform_piece_all.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@

#define OGRE_imageLoad2D(inImage, iuv) imageLoad(inImage, int2(iuv))
#define OGRE_imageLoad2DArray(inImage, iuvw) imageLoad(inImage, int3(iuvw))

#define OGRE_imageWrite2D1(outImage, iuv, value) imageStore(outImage, int2(iuv), float4(value, 0, 0, 0))
#define OGRE_imageWrite2D2(outImage, iuv, value) imageStore(outImage, int2(iuv), float4(value, 0, 0))
#define OGRE_imageWrite2D4(outImage, iuv, value) imageStore(outImage, int2(iuv), value)

#define OGRE_imageLoad3D(inImage, iuv) imageLoad(inImage, int3(iuv))

#define OGRE_imageWrite3D1(outImage, iuv, value) imageStore(outImage, int3(iuv), value)
#define OGRE_imageWrite3D4(outImage, iuv, value) imageStore(outImage, int3(iuv), value)

#define OGRE_imageWrite2DArray1(outImage, iuvw, value) imageStore(outImage, int3(iuvw), value)
#define OGRE_imageWrite2DArray4(outImage, iuvw, value) imageStore(outImage, int3(iuvw), value)

//#define sharedOnlyBarrier memoryBarrierShared();barrier();
Loading

0 comments on commit a08d1e5

Please sign in to comment.