Skip to content

Commit

Permalink
Merge pull request #122 from jss2a98aj/cherry-pick-4.4-import
Browse files Browse the repository at this point in the history
[4.4 backport] Assorted import optimizations
  • Loading branch information
Bioblaze authored Nov 5, 2024
2 parents 5906fc4 + 779804a commit 0336f4c
Show file tree
Hide file tree
Showing 28 changed files with 1,616 additions and 74 deletions.
156 changes: 155 additions & 1 deletion core/io/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

#include "image.h"

#include "core/config/project_settings.h"
#include "core/error/error_list.h"
#include "core/error/error_macros.h"
#include "core/io/image_loader.h"
Expand Down Expand Up @@ -501,6 +502,38 @@ static void _convert(int p_width, int p_height, const uint8_t *p_src, uint8_t *p
}
}

template <typename T, uint32_t read_channels, uint32_t write_channels, T def_zero, T def_one>
static void _convert_fast(int p_width, int p_height, const T *p_src, T *p_dst) {
uint32_t dst_count = 0;
uint32_t src_count = 0;

const int resolution = p_width * p_height;

for (int i = 0; i < resolution; i++) {
memcpy(p_dst + dst_count, p_src + src_count, MIN(read_channels, write_channels) * sizeof(T));

if constexpr (write_channels > read_channels) {
const T def_value[4] = { def_zero, def_zero, def_zero, def_one };
memcpy(p_dst + dst_count + read_channels, &def_value[read_channels], (write_channels - read_channels) * sizeof(T));
}

dst_count += write_channels;
src_count += read_channels;
}
}

static bool _are_formats_compatible(Image::Format p_format0, Image::Format p_format1) {
if (p_format0 <= Image::FORMAT_RGBA8 && p_format1 <= Image::FORMAT_RGBA8) {
return true;
} else if (p_format0 <= Image::FORMAT_RGBAH && p_format0 >= Image::FORMAT_RH && p_format1 <= Image::FORMAT_RGBAH && p_format1 >= Image::FORMAT_RH) {
return true;
} else if (p_format0 <= Image::FORMAT_RGBAF && p_format0 >= Image::FORMAT_RF && p_format1 <= Image::FORMAT_RGBAF && p_format1 >= Image::FORMAT_RF) {
return true;
}

return false;
}

void Image::convert(Format p_new_format) {
ERR_FAIL_INDEX_MSG(p_new_format, FORMAT_MAX, "The Image format specified (" + itos(p_new_format) + ") is out of range. See Image's Format enum.");
if (data.size() == 0) {
Expand All @@ -517,7 +550,7 @@ void Image::convert(Format p_new_format) {
if (Image::is_format_compressed(format) || Image::is_format_compressed(p_new_format)) {
ERR_FAIL_MSG("Cannot convert to <-> from compressed formats. Use compress() and decompress() instead.");

} else if (format > FORMAT_RGBA8 || p_new_format > FORMAT_RGBA8) {
} else if (!_are_formats_compatible(format, p_new_format)) {
//use put/set pixel which is slower but works with non byte formats
Image new_img(width, height, mipmaps, p_new_format);

Expand Down Expand Up @@ -648,6 +681,78 @@ void Image::convert(Format p_new_format) {
case FORMAT_RGBA8 | (FORMAT_RGB8 << 8):
_convert<3, true, 3, false, false, false>(mip_width, mip_height, rptr, wptr);
break;
case FORMAT_RH | (FORMAT_RGH << 8):
_convert_fast<uint16_t, 1, 2, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RH | (FORMAT_RGBH << 8):
_convert_fast<uint16_t, 1, 3, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RH | (FORMAT_RGBAH << 8):
_convert_fast<uint16_t, 1, 4, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RGH | (FORMAT_RH << 8):
_convert_fast<uint16_t, 2, 1, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RGH | (FORMAT_RGBH << 8):
_convert_fast<uint16_t, 2, 3, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RGH | (FORMAT_RGBAH << 8):
_convert_fast<uint16_t, 2, 4, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RGBH | (FORMAT_RH << 8):
_convert_fast<uint16_t, 3, 1, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RGBH | (FORMAT_RGH << 8):
_convert_fast<uint16_t, 3, 2, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RGBH | (FORMAT_RGBAH << 8):
_convert_fast<uint16_t, 3, 4, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RGBAH | (FORMAT_RH << 8):
_convert_fast<uint16_t, 4, 1, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RGBAH | (FORMAT_RGH << 8):
_convert_fast<uint16_t, 4, 2, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RGBAH | (FORMAT_RGBH << 8):
_convert_fast<uint16_t, 4, 3, 0x0000, 0x3C00>(mip_width, mip_height, (const uint16_t *)rptr, (uint16_t *)wptr);
break;
case FORMAT_RF | (FORMAT_RGF << 8):
_convert_fast<uint32_t, 1, 2, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RF | (FORMAT_RGBF << 8):
_convert_fast<uint32_t, 1, 3, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RF | (FORMAT_RGBAF << 8):
_convert_fast<uint32_t, 1, 4, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RGF | (FORMAT_RF << 8):
_convert_fast<uint32_t, 2, 1, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RGF | (FORMAT_RGBF << 8):
_convert_fast<uint32_t, 2, 3, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RGF | (FORMAT_RGBAF << 8):
_convert_fast<uint32_t, 2, 4, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RGBF | (FORMAT_RF << 8):
_convert_fast<uint32_t, 3, 1, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RGBF | (FORMAT_RGF << 8):
_convert_fast<uint32_t, 3, 2, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RGBF | (FORMAT_RGBAF << 8):
_convert_fast<uint32_t, 3, 4, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RGBAF | (FORMAT_RF << 8):
_convert_fast<uint32_t, 4, 1, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RGBAF | (FORMAT_RGF << 8):
_convert_fast<uint32_t, 4, 2, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
case FORMAT_RGBAF | (FORMAT_RGBF << 8):
_convert_fast<uint32_t, 4, 3, 0x00000000, 0x3F800000>(mip_width, mip_height, (const uint32_t *)rptr, (uint32_t *)wptr);
break;
}
}

Expand Down Expand Up @@ -2630,6 +2735,27 @@ Error Image::compress(CompressMode p_mode, CompressSource p_source, ASTCFormat p
Error Image::compress_from_channels(CompressMode p_mode, UsedChannels p_channels, ASTCFormat p_astc_format) {
ERR_FAIL_COND_V(data.is_empty(), ERR_INVALID_DATA);

// RenderingDevice only.
if (GLOBAL_GET("rendering/textures/vram_compression/compress_with_gpu")) {
switch (p_mode) {
case COMPRESS_BPTC: {
// BC7 is unsupported currently.
if ((format >= FORMAT_RF && format <= FORMAT_RGBE9995) && _image_compress_bptc_rd_func) {
Error result = _image_compress_bptc_rd_func(this, p_channels);

// If the image was compressed successfully, we return here. If not, we fall back to the default compression scheme.
if (result == OK) {
return OK;
}
}

} break;

default: {
}
}
}

switch (p_mode) {
case COMPRESS_S3TC: {
ERR_FAIL_NULL_V(_image_compress_bc_func, ERR_UNAVAILABLE);
Expand Down Expand Up @@ -3011,6 +3137,7 @@ void (*Image::_image_compress_bptc_func)(Image *, Image::UsedChannels) = nullptr
void (*Image::_image_compress_etc1_func)(Image *) = nullptr;
void (*Image::_image_compress_etc2_func)(Image *, Image::UsedChannels) = nullptr;
void (*Image::_image_compress_astc_func)(Image *, Image::ASTCFormat) = nullptr;
Error (*Image::_image_compress_bptc_rd_func)(Image *, Image::UsedChannels) = nullptr;
void (*Image::_image_decompress_bc)(Image *) = nullptr;
void (*Image::_image_decompress_bptc)(Image *) = nullptr;
void (*Image::_image_decompress_etc1)(Image *) = nullptr;
Expand Down Expand Up @@ -3696,6 +3823,33 @@ void Image::bump_map_to_normal_map(float bump_scale) {
data = result_image;
}

bool Image::detect_signed(bool p_include_mips) const {
ERR_FAIL_COND_V(is_compressed(), false);

if (format >= Image::FORMAT_RH && format <= Image::FORMAT_RGBAH) {
const uint16_t *img_data = reinterpret_cast<const uint16_t *>(data.ptr());
const uint64_t img_size = p_include_mips ? (data.size() / 2) : (width * height * get_format_pixel_size(format) / 2);

for (uint64_t i = 0; i < img_size; i++) {
if ((img_data[i] & 0x8000) != 0 && (img_data[i] & 0x7fff) != 0) {
return true;
}
}

} else if (format >= Image::FORMAT_RF && format <= Image::FORMAT_RGBAF) {
const uint32_t *img_data = reinterpret_cast<const uint32_t *>(data.ptr());
const uint64_t img_size = p_include_mips ? (data.size() / 4) : (width * height * get_format_pixel_size(format) / 4);

for (uint64_t i = 0; i < img_size; i++) {
if ((img_data[i] & 0x80000000) != 0 && (img_data[i] & 0x7fffffff) != 0) {
return true;
}
}
}

return false;
}

void Image::srgb_to_linear() {
if (data.size() == 0) {
return;
Expand Down
4 changes: 4 additions & 0 deletions core/io/image.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ class Image : public Resource {
static void (*_image_compress_etc2_func)(Image *, UsedChannels p_channels);
static void (*_image_compress_astc_func)(Image *, ASTCFormat p_format);

static Error (*_image_compress_bptc_rd_func)(Image *, UsedChannels p_channels);

static void (*_image_decompress_bc)(Image *);
static void (*_image_decompress_bptc)(Image *);
static void (*_image_decompress_etc1)(Image *);
Expand Down Expand Up @@ -388,6 +390,8 @@ class Image : public Resource {
Ref<Image> get_image_from_mipmap(int p_mipmap) const;
void bump_map_to_normal_map(float bump_scale = 1.0);

bool detect_signed(bool p_include_mips = true) const;

void blit_rect(const Ref<Image> &p_src, const Rect2i &p_src_rect, const Point2i &p_dest);
void blit_rect_mask(const Ref<Image> &p_src, const Ref<Image> &p_mask, const Rect2i &p_src_rect, const Point2i &p_dest);
void blend_rect(const Ref<Image> &p_src, const Rect2i &p_src_rect, const Point2i &p_dest);
Expand Down
67 changes: 40 additions & 27 deletions core/math/color.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,33 +129,46 @@ struct [[nodiscard]] Color {
}

_FORCE_INLINE_ uint32_t to_rgbe9995() const {
const float pow2to9 = 512.0f;
const float B = 15.0f;
const float N = 9.0f;

float sharedexp = 65408.000f; // Result of: ((pow2to9 - 1.0f) / pow2to9) * powf(2.0f, 31.0f - 15.0f)

float cRed = MAX(0.0f, MIN(sharedexp, r));
float cGreen = MAX(0.0f, MIN(sharedexp, g));
float cBlue = MAX(0.0f, MIN(sharedexp, b));

float cMax = MAX(cRed, MAX(cGreen, cBlue));

float expp = MAX(-B - 1.0f, floor(Math::log(cMax) / (real_t)Math_LN2)) + 1.0f + B;

float sMax = (float)floor((cMax / Math::pow(2.0f, expp - B - N)) + 0.5f);

float exps = expp + 1.0f;

if (0.0f <= sMax && sMax < pow2to9) {
exps = expp;
}

float sRed = Math::floor((cRed / pow(2.0f, exps - B - N)) + 0.5f);
float sGreen = Math::floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f);
float sBlue = Math::floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f);

return (uint32_t(Math::fast_ftoi(sRed)) & 0x1FF) | ((uint32_t(Math::fast_ftoi(sGreen)) & 0x1FF) << 9) | ((uint32_t(Math::fast_ftoi(sBlue)) & 0x1FF) << 18) | ((uint32_t(Math::fast_ftoi(exps)) & 0x1F) << 27);
// https://github.com/microsoft/DirectX-Graphics-Samples/blob/v10.0.19041.0/MiniEngine/Core/Color.cpp
static const float kMaxVal = float(0x1FF << 7);
static const float kMinVal = float(1.f / (1 << 16));

// Clamp RGB to [0, 1.FF*2^16]
const float _r = CLAMP(r, 0.0f, kMaxVal);
const float _g = CLAMP(g, 0.0f, kMaxVal);
const float _b = CLAMP(b, 0.0f, kMaxVal);

// Compute the maximum channel, no less than 1.0*2^-15
const float MaxChannel = MAX(MAX(_r, _g), MAX(_b, kMinVal));

// Take the exponent of the maximum channel (rounding up the 9th bit) and
// add 15 to it. When added to the channels, it causes the implicit '1.0'
// bit and the first 8 mantissa bits to be shifted down to the low 9 bits
// of the mantissa, rounding the truncated bits.
union {
float f;
int32_t i;
} R, G, B, E;

E.f = MaxChannel;
E.i += 0x07804000; // Add 15 to the exponent and 0x4000 to the mantissa
E.i &= 0x7F800000; // Zero the mantissa

// This shifts the 9-bit values we need into the lowest bits, rounding as
// needed. Note that if the channel has a smaller exponent than the max
// channel, it will shift even more. This is intentional.
R.f = _r + E.f;
G.f = _g + E.f;
B.f = _b + E.f;

// Convert the Bias to the correct exponent in the upper 5 bits.
E.i <<= 4;
E.i += 0x10000000;

// Combine the fields. RGB floats have unwanted data in the upper 9
// bits. Only red needs to mask them off because green and blue shift
// it out to the left.
return E.i | (B.i << 18) | (G.i << 9) | (R.i & 511);
}

_FORCE_INLINE_ Color blend(const Color &p_over) const {
Expand Down
5 changes: 5 additions & 0 deletions doc/classes/ProjectSettings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2882,6 +2882,11 @@
<member name="rendering/textures/lossless_compression/force_png" type="bool" setter="" getter="" default="false">
If [code]true[/code], the texture importer will import lossless textures using the PNG format. Otherwise, it will default to using WebP.
</member>
<member name="rendering/textures/vram_compression/compress_with_gpu" type="bool" setter="" getter="" default="true">
If [code]true[/code], the texture importer will utilize the GPU for compressing textures, which makes large textures import significantly faster.
[b]Note:[/b] This setting requires either Vulkan or D3D12 available as a rendering backend.
[b]Note:[/b] Currently this only affects BC6H compression, which is used on Desktop and Console for HDR images.
</member>
<member name="rendering/textures/vram_compression/import_etc2_astc" type="bool" setter="" getter="" default="false">
If [code]true[/code], the texture importer will import VRAM-compressed textures using the Ericsson Texture Compression 2 algorithm for lower quality textures and normal maps and Adaptable Scalable Texture Compression algorithm for high quality textures (in 4×4 block size).
[b]Note:[/b] This setting is an override. The texture importer will always import the format the host platform needs, even if this is set to [code]false[/code].
Expand Down
3 changes: 0 additions & 3 deletions doc/classes/ResourceImporterOBJ.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@
<member name="offset_mesh" type="Vector3" setter="" getter="" default="Vector3(0, 0, 0)">
Offsets the mesh's data by the specified value. This can be used to work around misaligned meshes without having to modify the source file.
</member>
<member name="optimize_mesh" type="bool" setter="" getter="" default="true">
Unused parameter. This currently has no effect.
</member>
<member name="scale_mesh" type="Vector3" setter="" getter="" default="Vector3(1, 1, 1)">
Scales the mesh's data by the specified value. This can be used to work around misscaled meshes without having to modify the source file.
</member>
Expand Down
10 changes: 9 additions & 1 deletion editor/editor_file_system.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2277,10 +2277,18 @@ void EditorFileSystem::update_files(const Vector<String> &p_script_paths) {
if (!is_scanning()) {
_process_update_pending();
}
call_deferred(SNAME("emit_signal"), "filesystem_changed"); // Update later
if (!filesystem_changed_queued) {
filesystem_changed_queued = true;
callable_mp(this, &EditorFileSystem::_notify_filesystem_changed).call_deferred();
}
}
}

void EditorFileSystem::_notify_filesystem_changed() {
emit_signal("filesystem_changed");
filesystem_changed_queued = false;
}

HashSet<String> EditorFileSystem::get_valid_extensions() const {
return valid_extensions;
}
Expand Down
2 changes: 2 additions & 0 deletions editor/editor_file_system.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ class EditorFileSystem : public Node {
EditorFileSystemDirectory *new_filesystem = nullptr;
ScannedDirectory *first_scan_root_dir = nullptr;

bool filesystem_changed_queued = false;
bool scanning = false;
bool importing = false;
bool first_scan = true;
Expand All @@ -189,6 +190,7 @@ class EditorFileSystem : public Node {
bool revalidate_import_files = false;
int nb_files_total = 0;

void _notify_filesystem_changed();
void _scan_filesystem();
void _first_scan_filesystem();
void _first_scan_process_scripts(const ScannedDirectory *p_scan_dir, HashSet<String> &p_existing_class_names);
Expand Down
7 changes: 3 additions & 4 deletions editor/import/3d/resource_importer_obj.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ static Error _parse_material_library(const String &p_path, HashMap<String, Ref<S
return OK;
}

static Error _parse_obj(const String &p_path, List<Ref<ImporterMesh>> &r_meshes, bool p_single_mesh, bool p_generate_tangents, bool p_optimize, Vector3 p_scale_mesh, Vector3 p_offset_mesh, bool p_disable_compression, List<String> *r_missing_deps) {
static Error _parse_obj(const String &p_path, List<Ref<ImporterMesh>> &r_meshes, bool p_single_mesh, bool p_generate_tangents, Vector3 p_scale_mesh, Vector3 p_offset_mesh, bool p_disable_compression, List<String> *r_missing_deps) {
Ref<FileAccess> f = FileAccess::open(p_path, FileAccess::READ);
ERR_FAIL_COND_V_MSG(f.is_null(), ERR_CANT_OPEN, vformat("Couldn't open OBJ file '%s', it may not exist or not be readable.", p_path));

Expand Down Expand Up @@ -512,7 +512,7 @@ static Error _parse_obj(const String &p_path, List<Ref<ImporterMesh>> &r_meshes,
Node *EditorOBJImporter::import_scene(const String &p_path, uint32_t p_flags, const HashMap<StringName, Variant> &p_options, List<String> *r_missing_deps, Error *r_err) {
List<Ref<ImporterMesh>> meshes;

Error err = _parse_obj(p_path, meshes, false, p_flags & IMPORT_GENERATE_TANGENT_ARRAYS, false, Vector3(1, 1, 1), Vector3(0, 0, 0), p_flags & IMPORT_FORCE_DISABLE_MESH_COMPRESSION, r_missing_deps);
Error err = _parse_obj(p_path, meshes, false, p_flags & IMPORT_GENERATE_TANGENT_ARRAYS, Vector3(1, 1, 1), Vector3(0, 0, 0), p_flags & IMPORT_FORCE_DISABLE_MESH_COMPRESSION, r_missing_deps);

if (err != OK) {
if (r_err) {
Expand Down Expand Up @@ -583,7 +583,6 @@ void ResourceImporterOBJ::get_import_options(const String &p_path, List<ImportOp
r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "generate_tangents"), true));
r_options->push_back(ImportOption(PropertyInfo(Variant::VECTOR3, "scale_mesh"), Vector3(1, 1, 1)));
r_options->push_back(ImportOption(PropertyInfo(Variant::VECTOR3, "offset_mesh"), Vector3(0, 0, 0)));
r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "optimize_mesh"), true));
r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "force_disable_mesh_compression"), false));
}

Expand All @@ -594,7 +593,7 @@ bool ResourceImporterOBJ::get_option_visibility(const String &p_path, const Stri
Error ResourceImporterOBJ::import(const String &p_source_file, const String &p_save_path, const HashMap<StringName, Variant> &p_options, List<String> *r_platform_variants, List<String> *r_gen_files, Variant *r_metadata) {
List<Ref<ImporterMesh>> meshes;

Error err = _parse_obj(p_source_file, meshes, true, p_options["generate_tangents"], p_options["optimize_mesh"], p_options["scale_mesh"], p_options["offset_mesh"], p_options["force_disable_mesh_compression"], nullptr);
Error err = _parse_obj(p_source_file, meshes, true, p_options["generate_tangents"], p_options["scale_mesh"], p_options["offset_mesh"], p_options["force_disable_mesh_compression"], nullptr);

ERR_FAIL_COND_V(err != OK, err);
ERR_FAIL_COND_V(meshes.size() != 1, ERR_BUG);
Expand Down
Loading

0 comments on commit 0336f4c

Please sign in to comment.