Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[4.4 backport] Assorted import optimizations #122

Merged
merged 10 commits into from
Nov 5, 2024
156 changes: 155 additions & 1 deletion core/io/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

#include "image.h"

#include "core/config/project_settings.h"
#include "core/error/error_list.h"
#include "core/error/error_macros.h"
#include "core/io/image_loader.h"
Expand Down Expand Up @@ -501,6 +502,38 @@ static void _convert(int p_width, int p_height, const uint8_t *p_src, uint8_t *p
}
}

template <typename T, uint32_t read_channels, uint32_t write_channels, T def_zero, T def_one>
static void _convert_fast(int p_width, int p_height, const T *p_src, T *p_dst) {
uint32_t dst_count = 0;
uint32_t src_count = 0;

const int resolution = p_width * p_height;

for (int i = 0; i < resolution; i++) {
memcpy(p_dst + dst_count, p_src + src_count, MIN(read_channels, write_channels) * sizeof(T));

if constexpr (write_channels > read_channels) {
const T def_value[4] = { def_zero, def_zero, def_zero, def_one };
memcpy(p_dst + dst_count + read_channels, &def_value[read_channels], (write_channels - read_channels) * sizeof(T));
}

dst_count += write_channels;
src_count += read_channels;
}
}

static bool _are_formats_compatible(Image::Format p_format0, Image::Format p_format1) {
if (p_format0 <= Image::FORMAT_RGBA8 && p_format1 <= Image::FORMAT_RGBA8) {
return true;
} else if (p_format0 <= Image::FORMAT_RGBAH && p_format0 >= Image::FORMAT_RH && p_format1 <= Image::FORMAT_RGBAH && p_format1 >= Image::FORMAT_RH) {
return true;
} else if (p_format0 <= Image::FORMAT_RGBAF && p_format0 >= Image::FORMAT_RF && p_format1 <= Image::FORMAT_RGBAF && p_format1 >= Image::FORMAT_RF) {
return true;
}

return false;
}

void Image::convert(Format p_new_format) {
ERR_FAIL_INDEX_MSG(p_new_format, FORMAT_MAX, "The Image format specified (" + itos(p_new_format) + ") is out of range. See Image's Format enum.");
if (data.size() == 0) {
Expand All @@ -517,7 +550,7 @@ void Image::convert(Format p_new_format) {
if (Image::is_format_compressed(format) || Image::is_format_compressed(p_new_format)) {
ERR_FAIL_MSG("Cannot convert to <-> from compressed formats. Use compress() and decompress() instead.");

} else if (format > FORMAT_RGBA8 || p_new_format > FORMAT_RGBA8) {
} else if (!_are_formats_compatible(format, p_new_format)) {
//use put/set pixel which is slower but works with non byte formats
Image new_img(width, height, mipmaps, p_new_format);

Expand Down Expand Up @@ -648,6 +681,78 @@ void Image::convert(Format p_new_format) {
case FORMAT_RGBA8 | (FORMAT_RGB8 << 8):
_convert<3, true, 3, false, false, false>(mip_width, mip_height, rptr, wptr);
break;
case FORMAT_RH | (FORMAT_RGH << 8):
_convert_fast<uint16_t, 1, 2, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RH | (FORMAT_RGBH << 8):
_convert_fast<uint16_t, 1, 3, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RH | (FORMAT_RGBAH << 8):
_convert_fast<uint16_t, 1, 4, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RGH | (FORMAT_RH << 8):
_convert_fast<uint16_t, 2, 1, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RGH | (FORMAT_RGBH << 8):
_convert_fast<uint16_t, 2, 3, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RGH | (FORMAT_RGBAH << 8):
_convert_fast<uint16_t, 2, 4, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RGBH | (FORMAT_RH << 8):
_convert_fast<uint16_t, 3, 1, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RGBH | (FORMAT_RGH << 8):
_convert_fast<uint16_t, 3, 2, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RGBH | (FORMAT_RGBAH << 8):
_convert_fast<uint16_t, 3, 4, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RGBAH | (FORMAT_RH << 8):
_convert_fast<uint16_t, 4, 1, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RGBAH | (FORMAT_RGH << 8):
_convert_fast<uint16_t, 4, 2, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RGBAH | (FORMAT_RGBH << 8):
_convert_fast<uint16_t, 4, 3, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RF | (FORMAT_RGF << 8):
_convert_fast<uint32_t, 1, 2, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RF | (FORMAT_RGBF << 8):
_convert_fast<uint32_t, 1, 3, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RF | (FORMAT_RGBAF << 8):
_convert_fast<uint32_t, 1, 4, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RGF | (FORMAT_RF << 8):
_convert_fast<uint32_t, 2, 1, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RGF | (FORMAT_RGBF << 8):
_convert_fast<uint32_t, 2, 3, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RGF | (FORMAT_RGBAF << 8):
_convert_fast<uint32_t, 2, 4, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RGBF | (FORMAT_RF << 8):
_convert_fast<uint32_t, 3, 1, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RGBF | (FORMAT_RGF << 8):
_convert_fast<uint32_t, 3, 2, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RGBF | (FORMAT_RGBAF << 8):
_convert_fast<uint32_t, 3, 4, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RGBAF | (FORMAT_RF << 8):
_convert_fast<uint32_t, 4, 1, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RGBAF | (FORMAT_RGF << 8):
_convert_fast<uint32_t, 4, 2, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RGBAF | (FORMAT_RGBF << 8):
_convert_fast<uint32_t, 4, 3, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
}
}

Expand Down Expand Up @@ -2630,6 +2735,27 @@ Error Image::compress(CompressMode p_mode, CompressSource p_source, ASTCFormat p
Error Image::compress_from_channels(CompressMode p_mode, UsedChannels p_channels, ASTCFormat p_astc_format) {
ERR_FAIL_COND_V(data.is_empty(), ERR_INVALID_DATA);

// RenderingDevice only.
if (GLOBAL_GET("rendering/textures/vram_compression/compress_with_gpu")) {
switch (p_mode) {
case COMPRESS_BPTC: {
// BC7 is unsupported currently.
if ((format >= FORMAT_RF && format <= FORMAT_RGBE9995) && _image_compress_bptc_rd_func) {
Error result = _image_compress_bptc_rd_func(this, p_channels);

// If the image was compressed successfully, we return here. If not, we fall back to the default compression scheme.
if (result == OK) {
return OK;
}
}

} break;

default: {
}
}
}

switch (p_mode) {
case COMPRESS_S3TC: {
ERR_FAIL_NULL_V(_image_compress_bc_func, ERR_UNAVAILABLE);
Expand Down Expand Up @@ -3011,6 +3137,7 @@ void (*Image::_image_compress_bptc_func)(Image *, Image::UsedChannels) = nullptr
void (*Image::_image_compress_etc1_func)(Image *) = nullptr;
void (*Image::_image_compress_etc2_func)(Image *, Image::UsedChannels) = nullptr;
void (*Image::_image_compress_astc_func)(Image *, Image::ASTCFormat) = nullptr;
Error (*Image::_image_compress_bptc_rd_func)(Image *, Image::UsedChannels) = nullptr;
void (*Image::_image_decompress_bc)(Image *) = nullptr;
void (*Image::_image_decompress_bptc)(Image *) = nullptr;
void (*Image::_image_decompress_etc1)(Image *) = nullptr;
Expand Down Expand Up @@ -3696,6 +3823,33 @@ void Image::bump_map_to_normal_map(float bump_scale) {
data = result_image;
}

bool Image::detect_signed(bool p_include_mips) const {
ERR_FAIL_COND_V(is_compressed(), false);

if (format >= Image::FORMAT_RH && format <= Image::FORMAT_RGBAH) {
const uint16_t *img_data = reinterpret_cast<const uint16_t *>(data.ptr());
const uint64_t img_size = p_include_mips ? (data.size() / 2) : (width * height * get_format_pixel_size(format) / 2);

for (uint64_t i = 0; i < img_size; i++) {
if ((img_data[i] & 0x8000) != 0 && (img_data[i] & 0x7fff) != 0) {
return true;
}
}

} else if (format >= Image::FORMAT_RF && format <= Image::FORMAT_RGBAF) {
const uint32_t *img_data = reinterpret_cast<const uint32_t *>(data.ptr());
const uint64_t img_size = p_include_mips ? (data.size() / 4) : (width * height * get_format_pixel_size(format) / 4);

for (uint64_t i = 0; i < img_size; i++) {
if ((img_data[i] & 0x80000000) != 0 && (img_data[i] & 0x7fffffff) != 0) {
return true;
}
}
}

return false;
}

void Image::srgb_to_linear() {
if (data.size() == 0) {
return;
Expand Down
4 changes: 4 additions & 0 deletions core/io/image.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ class Image : public Resource {
static void (*_image_compress_etc2_func)(Image *, UsedChannels p_channels);
static void (*_image_compress_astc_func)(Image *, ASTCFormat p_format);

static Error (*_image_compress_bptc_rd_func)(Image *, UsedChannels p_channels);

static void (*_image_decompress_bc)(Image *);
static void (*_image_decompress_bptc)(Image *);
static void (*_image_decompress_etc1)(Image *);
Expand Down Expand Up @@ -388,6 +390,8 @@ class Image : public Resource {
Ref<Image> get_image_from_mipmap(int p_mipmap) const;
void bump_map_to_normal_map(float bump_scale = 1.0);

bool detect_signed(bool p_include_mips = true) const;

void blit_rect(const Ref<Image> &p_src, const Rect2i &p_src_rect, const Point2i &p_dest);
void blit_rect_mask(const Ref<Image> &p_src, const Ref<Image> &p_mask, const Rect2i &p_src_rect, const Point2i &p_dest);
void blend_rect(const Ref<Image> &p_src, const Rect2i &p_src_rect, const Point2i &p_dest);
Expand Down
67 changes: 40 additions & 27 deletions core/math/color.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,33 +129,46 @@ struct [[nodiscard]] Color {
}

_FORCE_INLINE_ uint32_t to_rgbe9995() const {
const float pow2to9 = 512.0f;
const float B = 15.0f;
const float N = 9.0f;

float sharedexp = 65408.000f; // Result of: ((pow2to9 - 1.0f) / pow2to9) * powf(2.0f, 31.0f - 15.0f)

float cRed = MAX(0.0f, MIN(sharedexp, r));
float cGreen = MAX(0.0f, MIN(sharedexp, g));
float cBlue = MAX(0.0f, MIN(sharedexp, b));

float cMax = MAX(cRed, MAX(cGreen, cBlue));

float expp = MAX(-B - 1.0f, floor(Math::log(cMax) / (real_t)Math_LN2)) + 1.0f + B;

float sMax = (float)floor((cMax / Math::pow(2.0f, expp - B - N)) + 0.5f);

float exps = expp + 1.0f;

if (0.0f <= sMax && sMax < pow2to9) {
exps = expp;
}

float sRed = Math::floor((cRed / pow(2.0f, exps - B - N)) + 0.5f);
float sGreen = Math::floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f);
float sBlue = Math::floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f);

return (uint32_t(Math::fast_ftoi(sRed)) & 0x1FF) | ((uint32_t(Math::fast_ftoi(sGreen)) & 0x1FF) << 9) | ((uint32_t(Math::fast_ftoi(sBlue)) & 0x1FF) << 18) | ((uint32_t(Math::fast_ftoi(exps)) & 0x1F) << 27);
// https://github.com/microsoft/DirectX-Graphics-Samples/blob/v10.0.19041.0/MiniEngine/Core/Color.cpp
static const float kMaxVal = float(0x1FF << 7);
static const float kMinVal = float(1.f / (1 << 16));

// Clamp RGB to [0, 1.FF*2^16]
const float _r = CLAMP(r, 0.0f, kMaxVal);
const float _g = CLAMP(g, 0.0f, kMaxVal);
const float _b = CLAMP(b, 0.0f, kMaxVal);

// Compute the maximum channel, no less than 1.0*2^-15
const float MaxChannel = MAX(MAX(_r, _g), MAX(_b, kMinVal));

// Take the exponent of the maximum channel (rounding up the 9th bit) and
// add 15 to it. When added to the channels, it causes the implicit '1.0'
// bit and the first 8 mantissa bits to be shifted down to the low 9 bits
// of the mantissa, rounding the truncated bits.
union {
float f;
int32_t i;
} R, G, B, E;

E.f = MaxChannel;
E.i += 0x07804000; // Add 15 to the exponent and 0x4000 to the mantissa
E.i &= 0x7F800000; // Zero the mantissa

// This shifts the 9-bit values we need into the lowest bits, rounding as
// needed. Note that if the channel has a smaller exponent than the max
// channel, it will shift even more. This is intentional.
R.f = _r + E.f;
G.f = _g + E.f;
B.f = _b + E.f;

// Convert the Bias to the correct exponent in the upper 5 bits.
E.i <<= 4;
E.i += 0x10000000;

// Combine the fields. RGB floats have unwanted data in the upper 9
// bits. Only red needs to mask them off because green and blue shift
// it out to the left.
return E.i | (B.i << 18) | (G.i << 9) | (R.i & 511);
}

_FORCE_INLINE_ Color blend(const Color &p_over) const {
Expand Down
5 changes: 5 additions & 0 deletions doc/classes/ProjectSettings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2882,6 +2882,11 @@
<member name="rendering/textures/lossless_compression/force_png" type="bool" setter="" getter="" default="false">
If [code]true[/code], the texture importer will import lossless textures using the PNG format. Otherwise, it will default to using WebP.
</member>
<member name="rendering/textures/vram_compression/compress_with_gpu" type="bool" setter="" getter="" default="true">
If [code]true[/code], the texture importer will utilize the GPU for compressing textures, which makes large textures import significantly faster.
[b]Note:[/b] This setting requires either Vulkan or D3D12 available as a rendering backend.
[b]Note:[/b] Currently this only affects BC6H compression, which is used on Desktop and Console for HDR images.
</member>
<member name="rendering/textures/vram_compression/import_etc2_astc" type="bool" setter="" getter="" default="false">
If [code]true[/code], the texture importer will import VRAM-compressed textures using the Ericsson Texture Compression 2 algorithm for lower quality textures and normal maps and Adaptable Scalable Texture Compression algorithm for high quality textures (in 4×4 block size).
[b]Note:[/b] This setting is an override. The texture importer will always import the format the host platform needs, even if this is set to [code]false[/code].
Expand Down
3 changes: 0 additions & 3 deletions doc/classes/ResourceImporterOBJ.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@
<member name="offset_mesh" type="Vector3" setter="" getter="" default="Vector3(0, 0, 0)">
Offsets the mesh's data by the specified value. This can be used to work around misaligned meshes without having to modify the source file.
</member>
<member name="optimize_mesh" type="bool" setter="" getter="" default="true">
Unused parameter. This currently has no effect.
</member>
<member name="scale_mesh" type="Vector3" setter="" getter="" default="Vector3(1, 1, 1)">
Scales the mesh's data by the specified value. This can be used to work around misscaled meshes without having to modify the source file.
</member>
Expand Down
10 changes: 9 additions & 1 deletion editor/editor_file_system.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2277,10 +2277,18 @@ void EditorFileSystem::update_files(const Vector<String> &p_script_paths) {
if (!is_scanning()) {
_process_update_pending();
}
call_deferred(SNAME("emit_signal"), "filesystem_changed"); // Update later
if (!filesystem_changed_queued) {
filesystem_changed_queued = true;
callable_mp(this, &EditorFileSystem::_notify_filesystem_changed).call_deferred();
}
}
}

void EditorFileSystem::_notify_filesystem_changed() {
emit_signal("filesystem_changed");
filesystem_changed_queued = false;
}

HashSet<String> EditorFileSystem::get_valid_extensions() const {
return valid_extensions;
}
Expand Down
2 changes: 2 additions & 0 deletions editor/editor_file_system.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ class EditorFileSystem : public Node {
EditorFileSystemDirectory *new_filesystem = nullptr;
ScannedDirectory *first_scan_root_dir = nullptr;

bool filesystem_changed_queued = false;
bool scanning = false;
bool importing = false;
bool first_scan = true;
Expand All @@ -189,6 +190,7 @@ class EditorFileSystem : public Node {
bool revalidate_import_files = false;
int nb_files_total = 0;

void _notify_filesystem_changed();
void _scan_filesystem();
void _first_scan_filesystem();
void _first_scan_process_scripts(const ScannedDirectory *p_scan_dir, HashSet<String> &p_existing_class_names);
Expand Down
7 changes: 3 additions & 4 deletions editor/import/3d/resource_importer_obj.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ static Error _parse_material_library(const String &p_path, HashMap<String, Ref<S
return OK;
}

static Error _parse_obj(const String &p_path, List<Ref<ImporterMesh>> &r_meshes, bool p_single_mesh, bool p_generate_tangents, bool p_optimize, Vector3 p_scale_mesh, Vector3 p_offset_mesh, bool p_disable_compression, List<String> *r_missing_deps) {
static Error _parse_obj(const String &p_path, List<Ref<ImporterMesh>> &r_meshes, bool p_single_mesh, bool p_generate_tangents, Vector3 p_scale_mesh, Vector3 p_offset_mesh, bool p_disable_compression, List<String> *r_missing_deps) {
Ref<FileAccess> f = FileAccess::open(p_path, FileAccess::READ);
ERR_FAIL_COND_V_MSG(f.is_null(), ERR_CANT_OPEN, vformat("Couldn't open OBJ file '%s', it may not exist or not be readable.", p_path));

Expand Down Expand Up @@ -512,7 +512,7 @@ static Error _parse_obj(const String &p_path, List<Ref<ImporterMesh>> &r_meshes,
Node *EditorOBJImporter::import_scene(const String &p_path, uint32_t p_flags, const HashMap<StringName, Variant> &p_options, List<String> *r_missing_deps, Error *r_err) {
List<Ref<ImporterMesh>> meshes;

Error err = _parse_obj(p_path, meshes, false, p_flags & IMPORT_GENERATE_TANGENT_ARRAYS, false, Vector3(1, 1, 1), Vector3(0, 0, 0), p_flags & IMPORT_FORCE_DISABLE_MESH_COMPRESSION, r_missing_deps);
Error err = _parse_obj(p_path, meshes, false, p_flags & IMPORT_GENERATE_TANGENT_ARRAYS, Vector3(1, 1, 1), Vector3(0, 0, 0), p_flags & IMPORT_FORCE_DISABLE_MESH_COMPRESSION, r_missing_deps);

if (err != OK) {
if (r_err) {
Expand Down Expand Up @@ -583,7 +583,6 @@ void ResourceImporterOBJ::get_import_options(const String &p_path, List<ImportOp
r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "generate_tangents"), true));
r_options->push_back(ImportOption(PropertyInfo(Variant::VECTOR3, "scale_mesh"), Vector3(1, 1, 1)));
r_options->push_back(ImportOption(PropertyInfo(Variant::VECTOR3, "offset_mesh"), Vector3(0, 0, 0)));
r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "optimize_mesh"), true));
r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "force_disable_mesh_compression"), false));
}

Expand All @@ -594,7 +593,7 @@ bool ResourceImporterOBJ::get_option_visibility(const String &p_path, const Stri
Error ResourceImporterOBJ::import(const String &p_source_file, const String &p_save_path, const HashMap<StringName, Variant> &p_options, List<String> *r_platform_variants, List<String> *r_gen_files, Variant *r_metadata) {
List<Ref<ImporterMesh>> meshes;

Error err = _parse_obj(p_source_file, meshes, true, p_options["generate_tangents"], p_options["optimize_mesh"], p_options["scale_mesh"], p_options["offset_mesh"], p_options["force_disable_mesh_compression"], nullptr);
Error err = _parse_obj(p_source_file, meshes, true, p_options["generate_tangents"], p_options["scale_mesh"], p_options["offset_mesh"], p_options["force_disable_mesh_compression"], nullptr);

ERR_FAIL_COND_V(err != OK, err);
ERR_FAIL_COND_V(meshes.size() != 1, ERR_BUG);
Expand Down
Loading
Loading