diff --git a/CMakeLists.txt b/CMakeLists.txt index 492233ae..45ebfbad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ option(SAN "sanitize" FALSE) # For MSVC builds default to SSE enabled, and determine if it's a 64-bit (-A x64) vs. 32-bit (-A Win32) build. if (MSVC) option(SSE "SSE 4.1 support" TRUE) - if ( CMAKE_GENERATOR_PLATFORM STREQUAL Win32 ) + if ( CMAKE_GENERATOR_PLATFORM STREQUAL Win32 ) set(BUILD_X64 0) else() set(BUILD_X64 1) @@ -63,7 +63,7 @@ endif() if (NOT MSVC) set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -g") - + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}") @@ -73,7 +73,7 @@ if (NOT MSVC) endif() set(CMAKE_CXX_FLAGS -std=c++11) - set(GCC_COMPILE_FLAGS "-fvisibility=hidden -fPIC -fno-strict-aliasing -D_LARGEFILE64_SOURCE=1 -D_FILE_OFFSET_BITS=64 -Wall -Wextra -Wno-unused-local-typedefs -Wno-unused-value -Wno-unused-parameter -Wno-unused-variable") + set(GCC_COMPILE_FLAGS "-fvisibility=hidden -fPIC -fno-strict-aliasing -D_LARGEFILE64_SOURCE=1 -D_FILE_OFFSET_BITS=64 -Wall -Wextra -Wno-unused-local-typedefs -Wno-unused-parameter -Wno-unused-variable") if (NOT BUILD_X64) set(GCC_COMPILE_FLAGS "${GCC_COMPILE_FLAGS} -m32") @@ -92,7 +92,7 @@ if (NOT MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBASISU_SUPPORT_SSE=0") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBASISU_SUPPORT_SSE=0") endif() - + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${GCC_LINK_FLAGS} -static-libgcc -static-libstdc++ -static") else() if (SSE) @@ -102,7 +102,7 @@ if (NOT MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBASISU_SUPPORT_SSE=0") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBASISU_SUPPORT_SSE=0") endif() - + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${GCC_LINK_FLAGS} -Wl,-rpath .") endif() @@ -123,7 +123,7 @@ else() endif() endif() -set(BASISU_SRC_LIST ${COMMON_SRC_LIST} +set(BASISU_SRC_LIST ${COMMON_SRC_LIST} basisu_tool.cpp encoder/basisu_backend.cpp encoder/basisu_basis_file.cpp @@ -169,8 +169,8 @@ if (NOT MSVC) # For Non-Windows builds, let cmake try and find the system OpenCL headers/libs for us. if (OPENCL_FOUND) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBASISU_SUPPORT_OPENCL=1") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBASISU_SUPPORT_OPENCL=1") - + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBASISU_SUPPORT_OPENCL=1") + target_include_directories( basisu PRIVATE ${OpenCL_INCLUDE_DIRS} ) set(BASISU_EXTRA_LIBS ${OpenCL_LIBRARIES}) endif() @@ -179,8 +179,8 @@ else() # For Windows builds, we use our local copies of the OpenCL import lib and Khronos headers. if (OPENCL) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBASISU_SUPPORT_OPENCL=1") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBASISU_SUPPORT_OPENCL=1") - + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBASISU_SUPPORT_OPENCL=1") + target_include_directories( basisu PRIVATE "OpenCL" ) if ( BUILD_X64 ) @@ -190,7 +190,7 @@ else() endif() endif() -endif() +endif() if (NOT MSVC) target_link_libraries(basisu m pthread ${BASISU_EXTRA_LIBS}) @@ -198,7 +198,7 @@ endif() if (NOT EMSCRIPTEN) install(TARGETS basisu DESTINATION bin) - + if (UNIX) if (CMAKE_BUILD_TYPE STREQUAL Release) if (APPLE) diff --git a/OpenCL/license.txt b/OpenCL/license.txt index 9d69cfcb..af230774 100644 --- a/OpenCL/license.txt +++ b/OpenCL/license.txt @@ -1,4 +1,4 @@ -These optional files (which are only needed when compiling with OpenCL support enabled in the encoder) are from the +These optional files (which are only needed when compiling with OpenCL support enabled in the encoder) are from the Khronos Group OpenCL headers github repo. They are Copyright (c) 2008-2020 The Khronos Group Inc. https://github.com/KhronosGroup/OpenCL-Headers diff --git a/README.md b/README.md index 00c12c7c..8214ce6d 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Basis Universal Supercompressed GPU Texture Codec [![Build status](https://ci.appveyor.com/api/projects/status/87eb0o96pjho4sh0?svg=true)](https://ci.appveyor.com/project/BinomialLLC/basis-universal) -Basis Universal is a ["supercompressed"](http://gamma.cs.unc.edu/GST/gst.pdf) GPU texture data interchange system that supports two highly compressed intermediate file formats (.basis or the [.KTX2 open standard from the Khronos Group](https://github.khronos.org/KTX-Specification/)) that can be quickly transcoded to a [very wide variety](https://github.com/BinomialLLC/basis_universal/wiki/OpenGL-texture-format-enums-table) of GPU compressed and uncompressed pixel formats: ASTC 4x4 L/LA/RGB/RGBA, PVRTC1 4bpp RGB/RGBA, PVRTC2 RGB/RGBA, BC7 mode 6 RGB, BC7 mode 5 RGB/RGBA, BC1-5 RGB/RGBA/X/XY, ETC1 RGB, ETC2 RGBA, ATC RGB/RGBA, ETC2 EAC R11 and RG11, FXT1 RGB, and uncompressed raster image formats 8888/565/4444. +Basis Universal is a ["supercompressed"](http://gamma.cs.unc.edu/GST/gst.pdf) GPU texture data interchange system that supports two highly compressed intermediate file formats (.basis or the [.KTX2 open standard from the Khronos Group](https://github.khronos.org/KTX-Specification/)) that can be quickly transcoded to a [very wide variety](https://github.com/BinomialLLC/basis_universal/wiki/OpenGL-texture-format-enums-table) of GPU compressed and uncompressed pixel formats: ASTC 4x4 L/LA/RGB/RGBA, PVRTC1 4bpp RGB/RGBA, PVRTC2 RGB/RGBA, BC7 mode 6 RGB, BC7 mode 5 RGB/RGBA, BC1-5 RGB/RGBA/X/XY, ETC1 RGB, ETC2 RGBA, ATC RGB/RGBA, ETC2 EAC R11 and RG11, FXT1 RGB, and uncompressed raster image formats 8888/565/4444. The system now supports two modes: a high quality mode which is internally based off the [UASTC compressed texture format](https://richg42.blogspot.com/2020/01/uastc-block-format-encoding.html), and the original lower quality mode which is based off a subset of ETC1 called "ETC1S". UASTC is for extremely high quality (similar to BC7 quality) textures, and ETC1S is for very small files. The ETC1S system includes built-in data compression, while the UASTC system includes an optional Rate Distortion Optimization (RDO) post-process stage that conditions the encoded UASTC texture data in the .basis file so it can be more effectively LZ compressed by the end user. More technical details about UASTC integration are [here](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-implementation-details). @@ -15,7 +15,7 @@ The system's bitrate depends on the quality setting and image content, but commo The .basis and .KTX2 transcoders have been fuzz tested using [zzuf](https://www.linux.com/news/fuzz-testing-zzuf). -So far, we've compiled the code using MSVC 2019, under Ubuntu 18.04 and 20 x64 using cmake with either clang 3.8 or gcc 5.4, and emscripten 1.35 to asm.js. (Be sure to use this version or later of emcc, as earlier versions fail with internal errors/exceptions during compilation.) +So far, we've compiled the code using MSVC 2019, under Ubuntu 18.04 and 20 x64 using cmake with either clang 3.8 or gcc 5.4, and emscripten 1.35 to asm.js. (Be sure to use this version or later of emcc, as earlier versions fail with internal errors/exceptions during compilation.) Basis Universal supports "skip blocks" in ETC1S compressed texture arrays, which makes it useful for basic [compressed texture video](http://gamma.cs.unc.edu/MPTC/) applications. Note that Basis Universal is still at heart a GPU texture compression system, not a dedicated video codec, so bitrates will be larger than even MPEG1. 1/10/21 release notes: @@ -32,11 +32,11 @@ https://www.losinglena.com/ ### Quick Introduction -Probably the most important concept to understand about Basis Universal before using it: The system supports **two** very different universal texture modes: The original "ETC1S" mode is low/medium quality, but the resulting file sizes are very small because the system has built-in compression for ETC1S texture format files. This is the command line encoding tool's default mode. ETC1S textures work best on images, photos, map data, or albedo/specular/etc. textures, but don't work as well on normal maps. +Probably the most important concept to understand about Basis Universal before using it: The system supports **two** very different universal texture modes: The original "ETC1S" mode is low/medium quality, but the resulting file sizes are very small because the system has built-in compression for ETC1S texture format files. This is the command line encoding tool's default mode. ETC1S textures work best on images, photos, map data, or albedo/specular/etc. textures, but don't work as well on normal maps. There's the second "UASTC" mode, which is significantly higher quality (comparable to BC7 and highest quality LDR ASTC 4x4), and is usable on all texture types including complex normal maps. UASTC mode purposely does not have built-in file compression like ETC1S mode does, so the resulting files are quite large (8-bits/texel - same as BC7) compared to ETC1S mode. The UASTC encoder has an optional Rate Distortion Optimization (RDO) encoding mode (implemented as a post-process over the encoded UASTC texture data), which conditions the output texture data in a way that results in better lossless compression when UASTC .basis files are compressed with Deflate/Zstd, etc. In UASTC mode, you must losslessly compress .basis files yourself. .KTX2 files have built-in lossless compression support using [Zstandard](https://facebook.github.io/zstd/), which is used by default on UASTC textures. -Basis Universal is not an image compression codec, but a GPU texture compression codec. It can be used just like an image compression codec, but that's not the only use case. Here's a [good intro](http://renderingpipeline.com/2012/07/texture-compression/) to GPU texture compression. If you're looking to primarily use the system as an image compression codec on sRGB photographic content, use the default ETC1S mode, because it has built-in compression. +Basis Universal is not an image compression codec, but a GPU texture compression codec. It can be used just like an image compression codec, but that's not the only use case. Here's a [good intro](http://renderingpipeline.com/2012/07/texture-compression/) to GPU texture compression. If you're looking to primarily use the system as an image compression codec on sRGB photographic content, use the default ETC1S mode, because it has built-in compression. **The "-q X" option controls the output quality in ETC1S mode.** The default is quality level 128. "-q 255" will increase quality quite a bit. If you want even higher quality, try "-max_selectors 16128 -max_endpoints 16128" instead of -q. -q internally tries to set the codebook sizes (or the # of quantization intervals for endpoints/selectors) for you. You need to experiment with the quality level on your content. @@ -52,7 +52,7 @@ The encoder optionally uses Zstandard's single source file compressor (in zstd/z ### Command Line Compression Tool -The command line tool used to create, validate, and transcode/unpack .basis/.KTX2 files is named "basisu". Run basisu without any parameters for help. +The command line tool used to create, validate, and transcode/unpack .basis/.KTX2 files is named "basisu". Run basisu without any parameters for help. The library and command line tool have no other 3rd party dependencies (that are not already in the repo), so it's pretty easy to build. @@ -151,7 +151,7 @@ The mipmapped or cubemap .KTX files will be in a wide variety of compressed GPU After compression, the compressor transcodes all slices in the output .basis file to validate that the file decompresses correctly. It also validates all header, compressed data, and slice data CRC16's. -For best quality, you must **supply basisu with original uncompressed source images**. Any other type of lossy compression applied before basisu (including ETC1/BC1-5, BC7, JPEG, etc.) will cause multi-generational artifacts to appear in the final output textures. +For best quality, you must **supply basisu with original uncompressed source images**. Any other type of lossy compression applied before basisu (including ETC1/BC1-5, BC7, JPEG, etc.) will cause multi-generational artifacts to appear in the final output textures. For the maximum possible achievable ETC1S mode quality with the current format and encoder (completely ignoring encoding speed!), use: @@ -168,7 +168,7 @@ To compress small video sequences, say using tools like ffmpeg and VirtualDub: For video, the more cores your machine has, the better. Basis is intended for smaller videos of a few dozen seconds or so. If you are very patient and have a Threadripper or Xeon workstation, you should be able to encode up to a few thousand 720P frames. The "webgl_videotest" directory contains a very simple video viewer. For texture video, use -comp_level 2 or 3. The default is 1, which isn't quite good enough for texture video. Higher comp_level's result in reduced ETC1S artifacts. -The .basis file will contain multiple images (all using the same global codebooks), which you can retrieve using the transcoder's image API. The system now supports [conditional replenisment](https://en.wikipedia.org/wiki/MPEG-1) (CR, or "skip blocks"). CR can reduce the bitrate of some videos (highly dependent on how dynamic the content is) by over 50%. For videos using CR, the images must be requested from the transcoder in sequence from first to last, and random access is only allowed to I-Frames. +The .basis file will contain multiple images (all using the same global codebooks), which you can retrieve using the transcoder's image API. The system now supports [conditional replenisment](https://en.wikipedia.org/wiki/MPEG-1) (CR, or "skip blocks"). CR can reduce the bitrate of some videos (highly dependent on how dynamic the content is) by over 50%. For videos using CR, the images must be requested from the transcoder in sequence from first to last, and random access is only allowed to I-Frames. If you are doing rate distortion comparisons vs. other similar systems, be sure to experiment with increasing the endpoint RDO threshold (-endpoint_rdo_thresh X). This setting controls how aggressively the compressor's backend will combine together nearby blocks so they use the same block endpoint codebook vectors, for better coding efficiency. X defaults to a modest 1.5, which means the backend is allowed to increase the overall color distance by 1.5x while searching for merge candidates. The higher this setting, the better the compression, with the tradeoff of more block artifacts. Settings up to ~2.25 can work well, and make the codec more competitive. "-endpoint_rdo_thresh 1.75" is a good setting on many textures. @@ -214,7 +214,7 @@ Compress a 20 sRGB source image video sequence (x01.png, x02.png, x03.png, etc.) `basisu -comp_level 2 -q 255 -file x.png -mipmap -y_flip`\ Compress a mipmapped x.basis file from an sRGB image named x.png, Y flip each source image, set encoder to level 2 for slightly higher quality (but slower encoding). -### WebGL test +### WebGL test The "WebGL" directory contains three simple WebGL demos that use the transcoder and compressor compiled to wasm with [emscripten](https://emscripten.org/). See more details [here](webgl/README.md). @@ -238,7 +238,7 @@ The Basis Universal port in vcpkg is kept up to date by Microsoft team members a Both the transcoder and now the compressor (as of 12/17/2020) may be compiled using emscripten to WebAssembly and used on the web. Currently, multithreading is not supported by the compressor when compiled with emscripten. A simple Web compression demo is in webgl/encode_test. All compressor features, including texture video, are supported and fully exposed. -To enable compression support compile the JavaScript wrappers in `webgl/transcoding/basis_wrappers.cpp` with `BASISU_SUPPORT_ENCODING` set to 1. See the webgl/encoding directory. +To enable compression support compile the JavaScript wrappers in `webgl/transcoding/basis_wrappers.cpp` with `BASISU_SUPPORT_ENCODING` set to 1. See the webgl/encoding directory. ### Low-level C++ encoder API @@ -275,7 +275,7 @@ bool test() basisCompressorParams.m_debug = true; basisCompressorParams.m_status_output = true; basisCompressorParams.m_compute_stats = true; - + basisu::job_pool jpool(1); basisCompressorParams.m_pJob_pool = &jpool; diff --git a/appveyor.yml b/appveyor.yml index d790c2e5..6502a0a4 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,6 +1,6 @@ --- -image: +image: - macos - Ubuntu2004 - Visual Studio 2019 @@ -16,7 +16,7 @@ install: sudo apt-get update -y sudo apt-get install -y dos2unix recode fi - + build_script: - ps: | New-Item -Path . -Name "build" -ItemType "directory" diff --git a/basisu_tool.cpp b/basisu_tool.cpp index 59675620..78e8049c 100644 --- a/basisu_tool.cpp +++ b/basisu_tool.cpp @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #if _MSC_VER -// For sprintf(), strcpy() +// For sprintf(), strcpy() #define _CRT_SECURE_NO_WARNINGS (1) #endif @@ -66,7 +66,7 @@ enum tool_mode static void print_usage() { printf("\nUsage: basisu filename [filename ...] \n"); - + puts("\n" "The default mode is compression of one or more PNG/BMP/TGA/JPG files to a .basis file. Alternate modes:\n" " -unpack: Use transcoder to unpack .basis file to one or more .ktx/.png files\n" @@ -246,7 +246,7 @@ static bool load_listing_file(const std::string &f, basisu::vector { if (read_filename[0] == ' ') read_filename.erase(0, 1); - else + else break; } @@ -255,7 +255,7 @@ static bool load_listing_file(const std::string &f, basisu::vector const char c = read_filename.back(); if ((c == ' ') || (c == '\n') || (c == '\r')) read_filename.erase(read_filename.size() - 1, 1); - else + else break; } @@ -292,8 +292,8 @@ class command_line_params m_individual(true), m_no_ktx(false), m_ktx_only(false), - m_write_out(false), m_format_only(-1), + m_write_out(false), m_etc1_only(false), m_fuzz_testing(false), m_compare_ssim(false), @@ -431,13 +431,13 @@ class command_line_params int uastc_level = atoi(arg_v[arg_index + 1]); uastc_level = clamp(uastc_level, 0, TOTAL_PACK_UASTC_LEVELS - 1); - + static_assert(TOTAL_PACK_UASTC_LEVELS == 5, "TOTAL_PACK_UASTC_LEVELS==5"); static const uint32_t s_level_flags[TOTAL_PACK_UASTC_LEVELS] = { cPackUASTCLevelFastest, cPackUASTCLevelFaster, cPackUASTCLevelDefault, cPackUASTCLevelSlower, cPackUASTCLevelVerySlow }; - + m_comp_params.m_pack_uastc_flags &= ~cPackUASTCLevelMask; m_comp_params.m_pack_uastc_flags |= s_level_flags[uastc_level]; - + arg_count++; } else if (strcasecmp(pArg, "-resample") == 0) @@ -656,7 +656,7 @@ class command_line_params { REMAINING_ARGS_CHECK(1); m_comp_params.m_mip_filter = arg_v[arg_index + 1]; - // TODO: Check filter + // TODO: Check filter arg_count++; } else if (strcasecmp(pArg, "-mip_renorm") == 0) @@ -777,7 +777,7 @@ class command_line_params arg_index += arg_count; } - + if (m_comp_params.m_quality_level != -1) { m_comp_params.m_max_endpoint_clusters = 0; @@ -799,7 +799,7 @@ class command_line_params else m_comp_params.m_mip_srgb = false; } - + return true; } @@ -830,12 +830,12 @@ class command_line_params new_input_alpha_filenames.push_back(m_input_alpha_filenames[i]); } new_input_alpha_filenames.swap(m_input_alpha_filenames); - + return true; } basis_compressor_params m_comp_params; - + tool_mode m_mode; bool m_ktx2_mode; @@ -844,7 +844,7 @@ class command_line_params uint32_t m_ktx2_animdata_duration; uint32_t m_ktx2_animdata_timescale; uint32_t m_ktx2_animdata_loopcount; - + basisu::vector m_input_filenames; basisu::vector m_input_alpha_filenames; @@ -862,9 +862,9 @@ class command_line_params std::string m_etc1s_use_global_codebooks_file; std::string m_test_file_dir; - + uint32_t m_max_threads; - + bool m_individual; bool m_no_ktx; bool m_ktx_only; @@ -881,13 +881,13 @@ static bool expand_multifile(command_line_params &opts) { if (!opts.m_multifile_printf.size()) return true; - + if (!opts.m_multifile_num) { error_printf("-multifile_printf specified, but not -multifile_num\n"); return false; } - + std::string fmt(opts.m_multifile_printf); // Workaround for MSVC debugger issues. Questionable to leave in here. size_t x = fmt.find_first_of('!'); @@ -899,15 +899,15 @@ static bool expand_multifile(command_line_params &opts) error_printf("Must include C-style printf() format character '%%' in -multifile_printf string\n"); return false; } - + for (uint32_t i = opts.m_multifile_first; i < opts.m_multifile_first + opts.m_multifile_num; i++) { char buf[1024]; -#ifdef _WIN32 +#ifdef _WIN32 sprintf_s(buf, sizeof(buf), fmt.c_str(), i); #else snprintf(buf, sizeof(buf), fmt.c_str(), i); -#endif +#endif if (buf[0]) opts.m_input_filenames.push_back(buf); @@ -918,8 +918,8 @@ static bool expand_multifile(command_line_params &opts) struct basis_data { - basis_data() : - m_transcoder() + basis_data() : + m_transcoder() { } uint8_vec m_file_data; @@ -984,7 +984,7 @@ static bool compress_mode(command_line_params &opts) job_pool compressor_jpool(opts.m_parallel_compression ? 1 : num_threads); if (!opts.m_parallel_compression) opts.m_comp_params.m_pJob_pool = &compressor_jpool; - + if (!expand_multifile(opts)) { error_printf("-multifile expansion failed!\n"); @@ -996,7 +996,7 @@ static bool compress_mode(command_line_params &opts) error_printf("No input files to process!\n"); return false; } - + basis_data* pGlobal_codebook_data = nullptr; if (opts.m_etc1s_use_global_codebooks_file.size()) { @@ -1040,7 +1040,7 @@ static bool compress_mode(command_line_params &opts) pGlobal_codebook_data2 = nullptr; #endif } - + basis_compressor_params ¶ms = opts.m_comp_params; if (opts.m_ktx2_mode) @@ -1050,7 +1050,7 @@ static bool compress_mode(command_line_params &opts) params.m_ktx2_uastc_supercompression = basist::KTX2_SS_ZSTANDARD; else params.m_ktx2_uastc_supercompression = basist::KTX2_SS_NONE; - + params.m_ktx2_srgb_transfer_func = opts.m_comp_params.m_perceptual; if (params.m_tex_type == basist::basis_texture_type::cBASISTexTypeVideoFrames) @@ -1062,7 +1062,7 @@ static bool compress_mode(command_line_params &opts) const char* pAD = "KTXanimData"; kv.m_key.resize(strlen(pAD) + 1); strcpy((char*)kv.m_key.data(), pAD); - + basist::ktx2_animdata ad; ad.m_duration = opts.m_ktx2_animdata_duration; ad.m_timescale = opts.m_ktx2_animdata_timescale; @@ -1073,14 +1073,14 @@ static bool compress_mode(command_line_params &opts) params.m_ktx2_key_values.push_back(kv); } - + // TODO- expose this to command line. params.m_ktx2_zstd_supercompression_level = opts.m_ktx2_zstandard_level; } params.m_read_source_images = true; params.m_write_output_basis_files = true; - params.m_pGlobal_codebooks = pGlobal_codebook_data ? &pGlobal_codebook_data->m_transcoder.get_lowlevel_etc1s_decoder() : nullptr; + params.m_pGlobal_codebooks = pGlobal_codebook_data ? &pGlobal_codebook_data->m_transcoder.get_lowlevel_etc1s_decoder() : nullptr; FILE *pCSV_file = nullptr; if (opts.m_csv_file.size()) { @@ -1096,7 +1096,7 @@ static bool compress_mode(command_line_params &opts) } printf("Processing %u total file(s)\n", (uint32_t)opts.m_input_filenames.size()); - + interval_timer all_tm; all_tm.start(); @@ -1143,7 +1143,7 @@ static bool compress_mode(command_line_params &opts) params.m_source_filenames = opts.m_input_filenames; params.m_source_alpha_filenames = opts.m_input_alpha_filenames; } - + if (opts.m_output_filename.size()) params.m_out_filename = opts.m_output_filename; else @@ -1337,8 +1337,8 @@ static bool compress_mode(command_line_params &opts) total_failures++; - error_printf("File %u (first source image: \"%s\", output file: \"%s\") failed with error code %i!\n", i, - comp_params_vec[i].m_source_filenames[0].c_str(), + error_printf("File %u (first source image: \"%s\", output file: \"%s\") failed with error code %i!\n", i, + comp_params_vec[i].m_source_filenames[0].c_str(), comp_params_vec[i].m_out_filename.c_str(), (int)results[i].m_error_code); } @@ -1351,7 +1351,7 @@ static bool compress_mode(command_line_params &opts) printf("Total successes: %u failures: %u\n", total_successes, total_failures); } // if (opts.m_parallel_compression) - + all_tm.stop(); if (total_files > 1) @@ -1362,9 +1362,9 @@ static bool compress_mode(command_line_params &opts) fclose(pCSV_file); pCSV_file = nullptr; } - delete pGlobal_codebook_data; + delete pGlobal_codebook_data; pGlobal_codebook_data = nullptr; - + return result; } @@ -1397,16 +1397,16 @@ static bool unpack_and_validate_ktx2_file( error_printf("ktx2_transcoder::start_transcoding() failed! File either uses an unsupported feature, is invalid, was corrupted, or this is a bug.\n"); return false; } - + printf("Resolution: %ux%u\n", dec.get_width(), dec.get_height()); printf("Mipmap Levels: %u\n", dec.get_levels()); printf("Texture Array Size (layers): %u\n", dec.get_layers()); printf("Total Faces: %u (%s)\n", dec.get_faces(), (dec.get_faces() == 6) ? "CUBEMAP" : "2D"); printf("Is Texture Video: %u\n", dec.is_video()); - + const bool is_etc1s = dec.get_format() == basist::basis_tex_format::cETC1S; printf("Supercompression Format: %s\n", is_etc1s ? "ETC1S" : "UASTC"); - + printf("Supercompression Scheme: "); switch (dec.get_header().m_supercompression_scheme) { @@ -1436,7 +1436,7 @@ static bool unpack_and_validate_ktx2_file( } else printf("DFD chan0: %s\n", basist::ktx2_get_uastc_df_channel_id_str(dec.get_dfd_channel_id0())); - + printf("DFD hex values:\n"); for (uint32_t i = 0; i < dec.get_dfd().size(); i++) { @@ -1454,13 +1454,13 @@ static bool unpack_and_validate_ktx2_file( if (dec.get_key_values()[i].m_value.size() > 256) continue; - + bool is_ascii = true; for (uint32_t j = 0; j < dec.get_key_values()[i].m_value.size(); j++) { uint8_t c = dec.get_key_values()[i].m_value[j]; - if (!( - ((c >= ' ') && (c < 0x80)) || + if (!( + ((c >= ' ') && (c < 0x80)) || ((j == dec.get_key_values()[i].m_value.size() - 1) && (!c)) )) { @@ -1786,7 +1786,7 @@ static bool unpack_and_validate_basis_file( uint32_t file_index, const std::string &base_filename, uint8_vec &basis_file_data, - command_line_params& opts, + command_line_params& opts, FILE *pCSV_file, basis_data* pGlobal_codebook_data, uint32_t &total_unpack_warnings, @@ -1809,7 +1809,7 @@ static bool unpack_and_validate_basis_file( if (!dec.validate_file_checksums(&basis_file_data[0], (uint32_t)basis_file_data.size(), true)) { error_printf("File version is unsupported, or file failed one or more CRC checks!\n"); - + return false; } } @@ -1928,7 +1928,7 @@ static bool unpack_and_validate_basis_file( printf("start_transcoding time: %3.3f ms\n", start_transcoding_time_ms); basisu::vector< gpu_image_vec > gpu_images[(int)basist::transcoder_texture_format::cTFTotalTextureFormats]; - + double total_format_transcoding_time_ms[(int)basist::transcoder_texture_format::cTFTotalTextureFormats]; clear_obj(total_format_transcoding_time_ms); @@ -2074,7 +2074,7 @@ static bool unpack_and_validate_basis_file( tm.start(); if (!dec.transcode_slice( - &basis_file_data[0], (uint32_t)basis_file_data.size(), + &basis_file_data[0], (uint32_t)basis_file_data.size(), level_info.m_first_slice_index, gi.get_ptr(), gi.get_total_blocks(), basist::block_format::cUASTC_4x4, gi.get_bytes_per_block())) { error_printf("Failed transcoding image level (%u %u) to UASTC!\n", image_index, level_index); @@ -2509,7 +2509,7 @@ static bool unpack_and_validate_mode(command_line_params &opts) tm.start(); //const bool validate_flag = (opts.m_mode == cValidate); - + basis_data* pGlobal_codebook_data = nullptr; if (opts.m_etc1s_use_global_codebooks_file.size()) { @@ -2576,7 +2576,7 @@ static bool unpack_and_validate_mode(command_line_params &opts) delete pGlobal_codebook_data; pGlobal_codebook_data = nullptr; return false; } - + bool is_ktx2 = false; if (file_data.size() >= sizeof(basist::g_ktx2_file_identifier)) { @@ -2613,10 +2613,10 @@ static bool unpack_and_validate_mode(command_line_params &opts) if (!status) { - if (pCSV_file) + if (pCSV_file) fclose(pCSV_file); - delete pGlobal_codebook_data; + delete pGlobal_codebook_data; pGlobal_codebook_data = nullptr; return false; @@ -2639,7 +2639,7 @@ static bool unpack_and_validate_mode(command_line_params &opts) fclose(pCSV_file); pCSV_file = nullptr; } - delete pGlobal_codebook_data; + delete pGlobal_codebook_data; pGlobal_codebook_data = nullptr; return true; @@ -2707,7 +2707,7 @@ static bool compare_mode(command_line_params &opts) im.calc(a, b, 0, 0, true, true); im.print("Y 601 " ); - + if (opts.m_compare_ssim) { vec4F s_rgb(compute_ssim(a, b, false, false)); @@ -2750,7 +2750,7 @@ static bool compare_mode(command_line_params &opts) save_png("delta_img_rgb.png", delta_img, cImageSaveIgnoreAlpha); printf("Wrote delta_img_rgb.png\n"); - + save_png("delta_img_a.png", delta_img, cImageSaveGrayscale, 3); printf("Wrote delta_img_a.png\n"); @@ -2932,7 +2932,7 @@ static bool compare_mode(command_line_params &opts) } } // display_plot - + return true; } @@ -2971,7 +2971,7 @@ static bool split_image_mode(command_line_params& opts) } printf("Wrote file %s\n", buf); } - + return true; } @@ -3016,7 +3016,7 @@ static bool combine_images_mode(command_line_params& opts) const char* pOutput_filename = "combined.png"; if (opts.m_output_filename.size()) pOutput_filename = opts.m_output_filename.c_str(); - + if (!save_png(pOutput_filename, combined_img)) { fprintf(stderr, "Failed writing file %s\n", pOutput_filename); @@ -3590,7 +3590,7 @@ static bool bench_mode(command_line_params& opts) // HACK HACK const uint32_t max_rdo_jobs = 4; - + char rdo_fname[256]; FILE* pFile = nullptr; for (uint32_t try_index = 0; try_index < 100; try_index++) @@ -3602,7 +3602,7 @@ static bool bench_mode(command_line_params& opts) fclose(pFile); continue; } - + pFile = fopen(rdo_fname, "w"); if (!pFile) printf("Cannot open CSV file %s\n", rdo_fname); @@ -3621,7 +3621,7 @@ static bool bench_mode(command_line_params& opts) p.m_lambda = q; p.m_max_allowed_rms_increase_ratio = 10.0f; p.m_skip_block_rms_thresh = 8.0f; - + bool rdo_status = uastc_rdo((uint32_t)ublocks.size(), &ublocks[0], &orig_block_pixels[0], p, flags, &jpool, max_rdo_jobs); if (!rdo_status) { @@ -3685,7 +3685,7 @@ static bool bench_mode(command_line_params& opts) } if (pFile) fclose(pFile); - + { size_t comp_size = 0; void* pComp_data = tdefl_compress_mem_to_heap(&ublocks[0], ublocks.size() * 16, &comp_size, TDEFL_MAX_PROBES_MASK);// TDEFL_DEFAULT_MAX_PROBES); @@ -3711,7 +3711,7 @@ static bool bench_mode(command_line_params& opts) total_rdo_raw_size += ublocks.size() * 16; total_comp_blocks += ublocks.size(); } - + printf("Total blocks: %u\n", total_blocks); printf("Total BC1 hint 0's: %u %3.1f%%\n", total_bc1_hint0s, total_bc1_hint0s * 100.0f / total_blocks); printf("Total BC1 hint 1's: %u %3.1f%%\n", total_bc1_hint1s, total_bc1_hint1s * 100.0f / total_blocks); @@ -3756,7 +3756,7 @@ static bool bench_mode(command_line_params& opts) c[i] = (uint8_t)v; } - + } #endif @@ -3877,7 +3877,7 @@ static bool bench_mode(command_line_params& opts) em.print("RDOUASTC RGBA "); total_rdo_uastc_rgba_psnr += basisu::minimum(99.0f, em.m_psnr); - // UASTC2 + // UASTC2 em.calc(img, uastc2_img, 0, 3); em.print("UASTC2 RGB "); total_uastc2_psnr += basisu::minimum(99.0f, em.m_psnr); @@ -3970,7 +3970,7 @@ static bool bench_mode(command_line_params& opts) total_obc1_psnr += basisu::minimum(99.0f, em.m_psnr); total_obc1_psnr_sq += basisu::minimum(99.0f, em.m_psnr) * basisu::minimum(99.0f, em.m_psnr); #endif - + em.calc(img, opt_bc1_2_img, 0, 3); em.print("OBC1 2 RGB "); total_obc1_2_psnr += basisu::minimum(99.0f, em.m_psnr); @@ -4104,7 +4104,7 @@ static bool bench_mode(command_line_params& opts) } // image_index printf("Total time: %f secs\n", otm.get_elapsed_secs()); - + printf("Total Non-RDO UASTC size: %llu, compressed size: %llu, %3.2f bits/texel\n", (unsigned long long)total_raw_size, (unsigned long long)total_comp_size, @@ -4215,10 +4215,10 @@ const struct test_file uint32_t m_etc1s_size; float m_etc1s_psnr; float m_uastc_psnr; - + uint32_t m_etc1s_128_size; float m_etc1s_128_psnr; -} g_test_files[] = +} g_test_files[] = { { "black_1x1.png", 189, 100.0f, 100.0f, 189, 100.0f }, { "kodim01.png", 30993, 27.40f, 44.14f, 58354, 30.356064f }, @@ -4290,7 +4290,7 @@ static bool test_mode(command_line_params& opts) // Test ETC1S flags_and_quality = (opts.m_comp_params.m_multithreading ? cFlagThreaded : 0) | cFlagPrintStats | cFlagPrintStatus; - + { printf("**** Testing ETC1S non-OpenCL level 1\n"); @@ -4430,7 +4430,7 @@ static bool test_mode(command_line_params& opts) static bool clbench_mode(command_line_params& opts) { BASISU_NOTE_UNUSED(opts); - + bool opencl_failed = false; bool use_cl = basis_benchmark_etc1s_opencl(&opencl_failed); if (use_cl) @@ -4471,19 +4471,19 @@ static int main_internal(int argc, const char **argv) #endif basisu_encoder_init(use_opencl, opencl_force_serialization); - + //printf("Encoder and transcoder libraries initialized in %3.3f ms\n", tm.get_elapsed_ms()); #if defined(DEBUG) || defined(_DEBUG) printf("DEBUG build\n"); #endif - + if (argc == 1) { print_usage(); return EXIT_FAILURE; } - + command_line_params opts; if (!opts.parse(argc, argv)) { @@ -4496,7 +4496,7 @@ static int main_internal(int argc, const char **argv) #else printf("Multithreading: %u, Zstandard support: %u, OpenCL: %u\n", (uint32_t)opts.m_comp_params.m_multithreading, basist::basisu_transcoder_supports_ktx2_zstd(), opencl_is_available()); #endif - + if (!opts.process_listing_files()) return EXIT_FAILURE; @@ -4567,7 +4567,7 @@ int main(int argc, const char** argv) #ifdef _DEBUG printf("DEBUG\n"); #endif - + int status = EXIT_FAILURE; #if BASISU_CATCH_EXCEPTIONS diff --git a/bin/ocl_kernels.cl b/bin/ocl_kernels.cl index 6eda24f1..e1339e9e 100644 --- a/bin/ocl_kernels.cl +++ b/bin/ocl_kernels.cl @@ -112,7 +112,7 @@ uint color_distance(bool perceptual, color_rgba e1, color_rgba e2, bool alpha) int da = (e1.w - e2.w) << 7; id += ((uint)(da * da) >> 7U); } - + return id; #endif } @@ -120,7 +120,7 @@ uint color_distance(bool perceptual, color_rgba e1, color_rgba e2, bool alpha) { int dr = e1.x - e2.x; int dg = e1.y - e2.y; - int db = e1.z - e2.z; + int db = e1.z - e2.z; int da = e1.w - e2.w; return dr * dr + dg * dg + db * db + da * da; } @@ -128,7 +128,7 @@ uint color_distance(bool perceptual, color_rgba e1, color_rgba e2, bool alpha) { int dr = e1.x - e2.x; int dg = e1.y - e2.y; - int db = e1.z - e2.z; + int db = e1.z - e2.z; return dr * dr + dg * dg + db * db; } } @@ -137,7 +137,7 @@ typedef struct __attribute__ ((packed)) etc_block_tag { // big endian uint64: // bit ofs: 56 48 40 32 24 16 8 0 - // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 + // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 union { uint64_t m_uint64; @@ -252,7 +252,7 @@ constant int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] constant uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; constant uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; -uint32_t etc_block_get_byte_bits(const etc_block *p, uint32_t ofs, uint32_t num) +uint32_t etc_block_get_byte_bits(const etc_block *p, uint32_t ofs, uint32_t num) { assert((ofs + num) <= 64U); assert(num && (num <= 8U)); @@ -275,7 +275,7 @@ void etc_block_set_byte_bits(etc_block *p, uint32_t ofs, uint32_t num, uint32_t p->m_bytes[byte_ofs] |= (bits << byte_bit_ofs); } -bool etc_block_get_flip_bit(const etc_block *p) +bool etc_block_get_flip_bit(const etc_block *p) { return (p->m_bytes[3] & 1) != 0; } @@ -286,7 +286,7 @@ void etc_block_set_flip_bit(etc_block *p, bool flip) p->m_bytes[3] |= (uint8_t)(flip); } -bool etc_block_get_diff_bit(const etc_block *p) +bool etc_block_get_diff_bit(const etc_block *p) { return (p->m_bytes[3] & 2) != 0; } @@ -299,7 +299,7 @@ void etc_block_set_diff_bit(etc_block *p, bool diff) // Returns intensity modifier table (0-7) used by subblock subblock_id. // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2) -uint32_t etc_block_get_inten_table(const etc_block *p, uint32_t subblock_id) +uint32_t etc_block_get_inten_table(const etc_block *p, uint32_t subblock_id) { assert(subblock_id < 2); const uint32_t ofs = subblock_id ? 2 : 5; @@ -322,7 +322,7 @@ void etc_block_set_inten_tables_etc1s(etc_block *p, uint32_t t) etc_block_set_inten_table(p, 1, t); } -uint32_t etc_block_get_raw_selector(const etc_block *pBlock, uint32_t x, uint32_t y) +uint32_t etc_block_get_raw_selector(const etc_block *pBlock, uint32_t x, uint32_t y) { assert((x | y) < 4); @@ -337,7 +337,7 @@ uint32_t etc_block_get_raw_selector(const etc_block *pBlock, uint32_t x, uint32_ } // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. -uint32_t etc_block_get_selector(const etc_block *pBlock, uint32_t x, uint32_t y) +uint32_t etc_block_get_selector(const etc_block *pBlock, uint32_t x, uint32_t y) { return g_etc1_to_selector_index[etc_block_get_raw_selector(pBlock, x, y)]; } @@ -354,7 +354,7 @@ void etc_block_set_selector(etc_block *pBlock, uint32_t x, uint32_t y, uint32_t const uint32_t mask = 1 << byte_bit_ofs; const uint32_t etc1_val = g_selector_index_to_etc1[val]; - + const uint32_t lsb = etc1_val & 1; const uint32_t msb = etc1_val >> 1; @@ -381,7 +381,7 @@ void etc_block_set_base4_color(etc_block *pBlock, uint32_t idx, uint16_t c) } } -uint16_t etc_block_get_base4_color(const etc_block *pBlock, uint32_t idx) +uint16_t etc_block_get_base4_color(const etc_block *pBlock, uint32_t idx) { uint32_t r, g, b; if (idx) @@ -421,7 +421,7 @@ void etc_block_set_delta3_color(etc_block *pBlock, uint16_t c) etc_block_set_byte_bits(pBlock, cETC1DeltaColor3BBitOffset, 3, c & 7); } -uint16_t etc_block_get_delta3_color(const etc_block *pBlock) +uint16_t etc_block_get_delta3_color(const etc_block *pBlock) { const uint32_t r = etc_block_get_byte_bits(pBlock, cETC1DeltaColor3RBitOffset, 3); const uint32_t g = etc_block_get_byte_bits(pBlock, cETC1DeltaColor3GBitOffset, 3); @@ -504,7 +504,7 @@ color_rgba etc_block_unpack_color4(uint16_t packed_color4, bool scaled, uint32_t } // false if didn't clamp, true if any component clamped -bool etc_block_get_block_colors(const etc_block *pBlock, color_rgba* pBlock_colors, uint32_t subblock_index) +bool etc_block_get_block_colors(const etc_block *pBlock, color_rgba* pBlock_colors, uint32_t subblock_index) { color_rgba b; @@ -530,7 +530,7 @@ bool etc_block_get_block_colors(const etc_block *pBlock, color_rgba* pBlock_colo return dc; } -void get_block_colors5(color_rgba *pBlock_colors, const color_rgba *pBase_color5, uint32_t inten_table, bool scaled /* false */) +void get_block_colors5(color_rgba *pBlock_colors, const color_rgba *pBase_color5, uint32_t inten_table, bool scaled /* false */) { color_rgba b = *pBase_color5; @@ -695,7 +695,7 @@ void etc_block_set_block_color5(etc_block *pBlock, color_rgba c0_unscaled, color void etc_block_set_block_color5_etc1s(etc_block *pBlock, color_rgba c_unscaled) { etc_block_set_diff_bit(pBlock, true); - + etc_block_set_base5_color(pBlock, etc_block_pack_color5(c_unscaled, false)); etc_block_set_delta3_color(pBlock, etc_block_pack_delta3(0, 0, 0)); } @@ -729,9 +729,9 @@ void etc_block_pack_raw_selectors(etc_block *pBlock, const uint8_t *pSelectors) { const uint32_t bit_index = x * 4 + y; const uint32_t s = pSelectors[x + y * 4]; - + const uint32_t lsb = s & 1, msb = s >> 1; - + word3 |= (lsb << bit_index); word2 |= (msb << bit_index); } @@ -764,14 +764,14 @@ typedef struct etc1s_optimizer_solution_coordinates_tag uint32_t m_inten_table; } etc1s_optimizer_solution_coordinates; -color_rgba get_scaled_color(color_rgba unscaled_color) +color_rgba get_scaled_color(color_rgba unscaled_color) { int br, bg, bb; - + br = (unscaled_color.x >> 2) | (unscaled_color.x << 3); bg = (unscaled_color.y >> 2) | (unscaled_color.y << 3); bb = (unscaled_color.z >> 2) | (unscaled_color.z << 3); - + return (color_rgba)((uint8_t)br, (uint8_t)bg, (uint8_t)bb, 255); } @@ -779,7 +779,7 @@ typedef struct etc1s_optimizer_potential_solution_tag { uint64_t m_error; etc1s_optimizer_solution_coordinates m_coords; - + uint8_t m_selectors[16]; bool m_valid; } etc1s_optimizer_potential_solution; @@ -795,20 +795,20 @@ typedef struct etc1s_optimizer_state_tag bool etc1s_optimizer_evaluate_solution( etc1s_optimizer_state *pState, const global encode_etc1s_param_struct *pParams, - uint64_t num_pixels, const global color_rgba *pPixels, + uint64_t num_pixels, const global color_rgba *pPixels, const global uint32_t *pWeights, - etc1s_optimizer_solution_coordinates coords, - etc1s_optimizer_potential_solution* pTrial_solution, + etc1s_optimizer_solution_coordinates coords, + etc1s_optimizer_potential_solution* pTrial_solution, etc1s_optimizer_potential_solution* pBest_solution) { uint8_t temp_selectors[16]; pTrial_solution->m_valid = false; - + const color_rgba base_color = get_scaled_color(coords.m_unscaled_color); - + pTrial_solution->m_error = INT64_MAX; - + for (uint32_t inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++) { // TODO: This check is equivalent to medium quality in the C++ version. @@ -825,7 +825,7 @@ bool etc1s_optimizer_evaluate_solution( } uint64_t total_error = 0; - + for (uint64_t c = 0; c < num_pixels; c++) { color_rgba src_pixel = pPixels[c]; @@ -858,7 +858,7 @@ bool etc1s_optimizer_evaluate_solution( temp_selectors[c] = (uint8_t)(best_selector_index); total_error += pWeights ? (best_error * (uint64_t)pWeights[c]) : best_error; - + if (total_error >= pTrial_solution->m_error) break; } @@ -886,23 +886,23 @@ bool etc1s_optimizer_evaluate_solution( success = true; } } - + return success; } void etc1s_optimizer_init( etc1s_optimizer_state *pState, const global encode_etc1s_param_struct *pParams, - uint64_t num_pixels, const global color_rgba *pPixels, + uint64_t num_pixels, const global color_rgba *pPixels, const global uint32_t *pWeights) { const int LIMIT = 31; - + color_rgba min_color = 255; color_rgba max_color = 0; uint64_t total_weight = 0; uint64_t sum_r = 0, sum_g = 0, sum_b = 0; - + for (uint64_t i = 0; i < num_pixels; i++) { const color_rgba c = pPixels[i]; @@ -917,7 +917,7 @@ void etc1s_optimizer_init( sum_r += weight * c.x; sum_g += weight * c.y; sum_b += weight * c.z; - + total_weight += weight; } else @@ -929,7 +929,7 @@ void etc1s_optimizer_init( total_weight++; } } - + float3 avg_color; avg_color.x = (float)sum_r / total_weight; avg_color.y = (float)sum_g / total_weight; @@ -937,7 +937,7 @@ void etc1s_optimizer_init( pState->m_avg_color = avg_color; pState->m_max_comp_spread = max(max((int)max_color.x - (int)min_color.x, (int)max_color.y - (int)min_color.y), (int)max_color.z - (int)min_color.z); - + // TODO: The rounding here could be improved, like with DXT1/BC1. pState->m_br = clamp((int)(avg_color.x * (LIMIT / 255.0f) + .5f), 0, LIMIT); pState->m_bg = clamp((int)(avg_color.y * (LIMIT / 255.0f) + .5f), 0, LIMIT); @@ -961,7 +961,7 @@ void etc1s_optimizer_internal_cluster_fit( etc1s_optimizer_solution_coordinates cur_coords; cur_coords.m_unscaled_color = (color_rgba)(pState->m_br, pState->m_bg, pState->m_bb, 255); etc1s_optimizer_evaluate_solution(pState, pParams, num_pixels, pPixels, pWeights, cur_coords, &trial_solution, &pState->m_best_solution); - + if (pState->m_best_solution.m_error == 0) return; @@ -993,11 +993,11 @@ void etc1s_optimizer_internal_cluster_fit( const int br1 = clamp((int)((pState->m_avg_color.x - avg_delta_r_f) * (LIMIT / 255.0f) + .5f), 0, LIMIT); const int bg1 = clamp((int)((pState->m_avg_color.y - avg_delta_g_f) * (LIMIT / 255.0f) + .5f), 0, LIMIT); const int bb1 = clamp((int)((pState->m_avg_color.z - avg_delta_b_f) * (LIMIT / 255.0f) + .5f), 0, LIMIT); - + cur_coords.m_unscaled_color = (color_rgba)(br1, bg1, bb1, 255); etc1s_optimizer_evaluate_solution(pState, pParams, num_pixels, pPixels, pWeights, cur_coords, &trial_solution, &pState->m_best_solution); - + if (pState->m_best_solution.m_error == 0) break; } @@ -1005,24 +1005,24 @@ void etc1s_optimizer_internal_cluster_fit( // Encode an ETC1S block given a 4x4 pixel block. kernel void encode_etc1s_blocks( - const global encode_etc1s_param_struct *pParams, + const global encode_etc1s_param_struct *pParams, const global pixel_block *pInput_blocks, global etc_block *pOutput_blocks) { const uint32_t block_index = get_global_id(0); - + const global pixel_block *pInput_block = &pInput_blocks[block_index]; etc1s_optimizer_state state; etc1s_optimizer_init(&state, pParams, 16, pInput_block->m_pixels, NULL); etc1s_optimizer_internal_cluster_fit(pParams->m_total_perms, &state, pParams, 16, pInput_block->m_pixels, NULL); - + etc_block blk; etc_block_set_flip_bit(&blk, true); etc_block_set_block_color5_etc1s(&blk, state.m_best_solution.m_coords.m_unscaled_color); etc_block_set_inten_tables_etc1s(&blk, state.m_best_solution.m_coords.m_inten_table); etc_block_pack_raw_selectors(&blk, state.m_best_solution.m_selectors); - + pOutput_blocks[block_index] = blk; } @@ -1034,14 +1034,14 @@ typedef struct __attribute__ ((packed)) pixel_cluster_tag // Determine the optimal ETC1S color5/intensity given an arbitrary large array of 4x4 input pixel blocks. kernel void encode_etc1s_from_pixel_cluster( - const global encode_etc1s_param_struct *pParams, + const global encode_etc1s_param_struct *pParams, const global pixel_cluster *pInput_pixel_clusters, const global color_rgba *pInput_pixels, const global uint32_t *pInput_weights, global etc_block *pOutput_blocks) { const uint32_t cluster_index = get_global_id(0); - + const global pixel_cluster *pInput_cluster = &pInput_pixel_clusters[cluster_index]; uint64_t total_pixels = pInput_cluster->m_total_pixels; @@ -1051,12 +1051,12 @@ kernel void encode_etc1s_from_pixel_cluster( etc1s_optimizer_state state; etc1s_optimizer_init(&state, pParams, total_pixels, pPixels, pWeights); etc1s_optimizer_internal_cluster_fit(pParams->m_total_perms, &state, pParams, total_pixels, pPixels, pWeights); - + etc_block blk; etc_block_set_flip_bit(&blk, true); etc_block_set_block_color5_etc1s(&blk, state.m_best_solution.m_coords.m_unscaled_color); etc_block_set_inten_tables_etc1s(&blk, state.m_best_solution.m_coords.m_inten_table); - + pOutput_blocks[cluster_index] = blk; } @@ -1084,7 +1084,7 @@ typedef struct __attribute__ ((packed)) rec_param_struct_tag // For each input block: find the best endpoint cluster that encodes it. kernel void refine_endpoint_clusterization( - const rec_param_struct params, + const rec_param_struct params, const global pixel_block *pInput_blocks, const global rec_block_struct *pInput_block_info, const global rec_endpoint_cluster_struct *pInput_clusters, @@ -1096,7 +1096,7 @@ kernel void refine_endpoint_clusterization( const int perceptual = params.m_perceptual; const global pixel_block *pInput_block = &pInput_blocks[block_index]; - + pixel_block priv_pixel_block; priv_pixel_block = *pInput_block; @@ -1104,7 +1104,7 @@ kernel void refine_endpoint_clusterization( const uint32_t num_clusters = pInput_block_info[block_index].m_num_clusters; const uint32_t cur_block_cluster_index = pInput_block_info[block_index].m_cur_cluster_index; const uint32_t cur_block_cluster_etc_inten = pInput_block_info[block_index].m_cur_cluster_etc_inten; - + uint64_t overall_best_err = UINT64_MAX; uint32_t best_cluster_index = 0; @@ -1122,7 +1122,7 @@ kernel void refine_endpoint_clusterization( get_block_colors5(block_colors, &unscaled_color, etc_inten, false); uint64_t total_error = 0; - + for (uint32_t c = 0; c < 16; c++) { color_rgba src_pixel = priv_pixel_block.m_pixels[c]; @@ -1140,7 +1140,7 @@ kernel void refine_endpoint_clusterization( trial_error = color_distance(perceptual, src_pixel, block_colors[3], false); if (trial_error < best_error) best_error = trial_error; - + total_error += best_error; } @@ -1180,7 +1180,7 @@ typedef struct __attribute__ ((packed)) fosc_param_struct_tag // For each input block: Find the quantized selector which results in the lowest error. kernel void find_optimal_selector_clusters_for_each_block( - const fosc_param_struct params, + const fosc_param_struct params, const global pixel_block *pInput_blocks, const global fosc_block_struct *pInput_block_info, const global fosc_selector_struct *pInput_selectors, @@ -1188,10 +1188,10 @@ kernel void find_optimal_selector_clusters_for_each_block( global uint32_t *pOutput_selector_cluster_indices) { const uint32_t block_index = get_global_id(0); - + const global color_rgba *pBlock_pixels = pInput_blocks[block_index].m_pixels; const global fosc_block_struct *pBlock_info = &pInput_block_info[block_index]; - + const global fosc_selector_struct *pSelectors = &pInput_selectors[pBlock_info->m_first_selector]; const uint32_t num_selectors = pBlock_info->m_num_selectors; @@ -1220,7 +1220,7 @@ kernel void find_optimal_selector_clusters_for_each_block( for (uint32_t sel_index = 0; sel_index < num_selectors; sel_index++) { uint32_t sels = pSelectors[sel_index].m_packed_selectors; - + uint64_t total_err = 0; for (uint32_t i = 0; i < 16; i++, sels >>= 2) total_err += trial_errors[sels & 3][i]; @@ -1246,15 +1246,15 @@ typedef struct __attribute__ ((packed)) ds_param_struct_tag int m_perceptual; } ds_param_struct; -// For each input block: Determine the ETC1S selectors that result in the lowest error, given each block's predetermined ETC1S color5/intensities. +// For each input block: Determine the ETC1S selectors that result in the lowest error, given each block's predetermined ETC1S color5/intensities. kernel void determine_selectors( - const ds_param_struct params, + const ds_param_struct params, const global pixel_block *pInput_blocks, const global color_rgba *pInput_etc_color5_and_inten, global etc_block *pOutput_blocks) { const uint32_t block_index = get_global_id(0); - + const global color_rgba *pBlock_pixels = pInput_blocks[block_index].m_pixels; color_rgba etc_color5_inten = pInput_etc_color5_and_inten[block_index]; diff --git a/contrib/previewers/lib/basisu_transcoder.cpp b/contrib/previewers/lib/basisu_transcoder.cpp index 37640ee1..55d1ce17 100644 --- a/contrib/previewers/lib/basisu_transcoder.cpp +++ b/contrib/previewers/lib/basisu_transcoder.cpp @@ -9,7 +9,7 @@ * Transcoder build options for known platforms (iOS has ETC, ASTC and PVRTC; * Emscripten adds DXT to iOS's options; Android adds PVRTC2 to Emscripten's * options; other platforms build all except FXT1). - * + * * See https://github.com/BinomialLLC/basis_universal#shrinking-the-transcoders-compiled-size */ #ifdef __APPLE__ @@ -73,7 +73,7 @@ #define BASISD_SUPPORT_KTX2 1 #endif -// Set BASISD_SUPPORT_KTX2_ZSTD to 0 to disable Zstd usage and KTX2 UASTC Zstd supercompression support +// Set BASISD_SUPPORT_KTX2_ZSTD to 0 to disable Zstd usage and KTX2 UASTC Zstd supercompression support #ifndef BASISD_SUPPORT_KTX2_ZSTD #define BASISD_SUPPORT_KTX2_ZSTD 1 #endif @@ -539,28 +539,28 @@ namespace basisu // operator[] will assert on out of range indices, but in final builds there is (and will never be) any range checking on this method. //BASISU_FORCE_INLINE const T& operator[] (uint32_t i) const { assert(i < m_size); return m_p[i]; } //BASISU_FORCE_INLINE T& operator[] (uint32_t i) { assert(i < m_size); return m_p[i]; } - + #if !BASISU_VECTOR_FORCE_CHECKING BASISU_FORCE_INLINE const T& operator[] (size_t i) const { assert(i < m_size); return m_p[i]; } BASISU_FORCE_INLINE T& operator[] (size_t i) { assert(i < m_size); return m_p[i]; } #else - BASISU_FORCE_INLINE const T& operator[] (size_t i) const - { + BASISU_FORCE_INLINE const T& operator[] (size_t i) const + { if (i >= m_size) { fprintf(stderr, "operator[] invalid index: %u, max entries %u, type size %u\n", (uint32_t)i, m_size, (uint32_t)sizeof(T)); abort(); } - return m_p[i]; + return m_p[i]; } - BASISU_FORCE_INLINE T& operator[] (size_t i) - { + BASISU_FORCE_INLINE T& operator[] (size_t i) + { if (i >= m_size) { fprintf(stderr, "operator[] invalid index: %u, max entries %u, type size %u\n", (uint32_t)i, m_size, (uint32_t)sizeof(T)); abort(); } - return m_p[i]; + return m_p[i]; } #endif @@ -568,7 +568,7 @@ namespace basisu // The first element is returned if the index is out of range. BASISU_FORCE_INLINE const T& at(size_t i) const { assert(i < m_size); return (i >= m_size) ? m_p[0] : m_p[i]; } BASISU_FORCE_INLINE T& at(size_t i) { assert(i < m_size); return (i >= m_size) ? m_p[0] : m_p[i]; } - + #if !BASISU_VECTOR_FORCE_CHECKING BASISU_FORCE_INLINE const T& front() const { assert(m_size); return m_p[0]; } BASISU_FORCE_INLINE T& front() { assert(m_size); return m_p[0]; } @@ -576,42 +576,42 @@ namespace basisu BASISU_FORCE_INLINE const T& back() const { assert(m_size); return m_p[m_size - 1]; } BASISU_FORCE_INLINE T& back() { assert(m_size); return m_p[m_size - 1]; } #else - BASISU_FORCE_INLINE const T& front() const - { + BASISU_FORCE_INLINE const T& front() const + { if (!m_size) { fprintf(stderr, "front: vector is empty, type size %u\n", (uint32_t)sizeof(T)); abort(); } - return m_p[0]; + return m_p[0]; } - BASISU_FORCE_INLINE T& front() - { + BASISU_FORCE_INLINE T& front() + { if (!m_size) { fprintf(stderr, "front: vector is empty, type size %u\n", (uint32_t)sizeof(T)); abort(); } - return m_p[0]; + return m_p[0]; } - BASISU_FORCE_INLINE const T& back() const - { + BASISU_FORCE_INLINE const T& back() const + { if(!m_size) { fprintf(stderr, "back: vector is empty, type size %u\n", (uint32_t)sizeof(T)); abort(); } - return m_p[m_size - 1]; + return m_p[m_size - 1]; } - BASISU_FORCE_INLINE T& back() - { + BASISU_FORCE_INLINE T& back() + { if (!m_size) { fprintf(stderr, "back: vector is empty, type size %u\n", (uint32_t)sizeof(T)); abort(); } - return m_p[m_size - 1]; + return m_p[m_size - 1]; } #endif @@ -902,7 +902,7 @@ namespace basisu insert(m_size, p, n); return *this; } - + inline void erase(uint32_t start, uint32_t n) { assert((start + n) <= m_size); @@ -933,7 +933,7 @@ namespace basisu } else { - // Type is not bitwise copyable or movable. + // Type is not bitwise copyable or movable. // Move them down one at a time by using the equals operator, and destroying anything that's left over at the end. T* pDst_end = pDst + num_to_move; while (pDst != pDst_end) @@ -1153,7 +1153,7 @@ namespace basisu if (!m) break; cmp = -cmp; i += (((m + 1) >> 1) ^ cmp) - cmp; - if (i < 0) + if (i < 0) break; } } @@ -1298,7 +1298,7 @@ namespace basisu public: class iterator; class const_iterator; - + private: friend class iterator; friend class const_iterator; @@ -1486,7 +1486,7 @@ namespace basisu if (new_hash_size > m_values.size()) rehash((uint32_t)new_hash_size); } - + class iterator { friend class hash_map; @@ -1921,7 +1921,7 @@ namespace basisu inline void grow() { uint64_t n = m_values.size() * 3ULL; // was * 2 - + if (!helpers::is_power_of_2(n)) n = helpers::next_pow2(n); @@ -2138,11 +2138,11 @@ namespace basisu template struct bitwise_movable< hash_map > { enum { cFlag = true }; }; - + #if BASISU_HASHMAP_TEST extern void hash_map_test(); #endif - + } // namespace basisu namespace std @@ -2213,7 +2213,7 @@ namespace basisu void enable_debug_printf(bool enabled); void debug_printf(const char *pFmt, ...); - + template inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(obj)); } @@ -2222,7 +2222,7 @@ namespace basisu template inline S maximum(S a, S b) { return (a > b) ? a : b; } template inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); } template inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); } - + template inline S minimum(S a, S b) { return (a < b) ? a : b; } template inline S minimum(S a, S b, S c) { return minimum(minimum(a, b), c); } template inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); } @@ -2246,7 +2246,7 @@ namespace basisu inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } inline uint64_t iabs64(int64_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } - template inline void clear_vector(T &vec) { vec.erase(vec.begin(), vec.end()); } + template inline void clear_vector(T &vec) { vec.erase(vec.begin(), vec.end()); } template inline typename T::value_type *enlarge_vector(T &vec, size_t n) { size_t cs = vec.size(); vec.resize(cs + n); return &vec[cs]; } inline bool is_pow2(uint32_t x) { return x && ((x & (x - 1U)) == 0U); } @@ -2259,8 +2259,8 @@ namespace basisu template inline T saturate(T val) { return clamp(val, 0.0f, 1.0f); } - template inline void append_vector(T &vec, const R *pObjs, size_t n) - { + template inline void append_vector(T &vec, const R *pObjs, size_t n) + { if (n) { if (vec.size()) @@ -2311,7 +2311,7 @@ namespace basisu for (size_t i = 0; i < vec.size(); i++) vec[i] = obj; } - + inline uint64_t read_be64(const void *p) { uint64_t val = 0; @@ -2372,7 +2372,7 @@ namespace basisu pBytes[2] = (uint8_t)(val >> 16U); pBytes[3] = (uint8_t)(val >> 24U); } - + // Always little endian 1-8 byte unsigned int template struct packed_uint @@ -2382,17 +2382,17 @@ namespace basisu inline packed_uint() { static_assert(NumBytes <= sizeof(uint64_t), "Invalid NumBytes"); } inline packed_uint(uint64_t v) { *this = v; } inline packed_uint(const packed_uint& other) { *this = other; } - - inline packed_uint& operator= (uint64_t v) - { - for (uint32_t i = 0; i < NumBytes; i++) - m_bytes[i] = static_cast(v >> (i * 8)); - return *this; + + inline packed_uint& operator= (uint64_t v) + { + for (uint32_t i = 0; i < NumBytes; i++) + m_bytes[i] = static_cast(v >> (i * 8)); + return *this; } - inline packed_uint& operator= (const packed_uint& rhs) - { - memcpy(m_bytes, rhs.m_bytes, sizeof(m_bytes)); + inline packed_uint& operator= (const packed_uint& rhs) + { + memcpy(m_bytes, rhs.m_bytes, sizeof(m_bytes)); return *this; } @@ -2400,19 +2400,19 @@ namespace basisu { switch (NumBytes) { - case 1: + case 1: { return m_bytes[0]; } - case 2: + case 2: { return (m_bytes[1] << 8U) | m_bytes[0]; } - case 3: + case 3: { return (m_bytes[2] << 16U) | (m_bytes[1] << 8U) | m_bytes[0]; } - case 4: + case 4: { return read_le_dword(m_bytes); } @@ -2434,13 +2434,13 @@ namespace basisu uint32_t h = (m_bytes[6] << 16U) | (m_bytes[5] << 8U) | m_bytes[4]; return static_cast(l) | (static_cast(h) << 32U); } - case 8: + case 8: { uint32_t l = read_le_dword(m_bytes); uint32_t h = read_le_dword(m_bytes + 4); return static_cast(l) | (static_cast(h) << 32U); } - default: + default: { assert(0); return 0; @@ -2451,14 +2451,14 @@ namespace basisu enum eZero { cZero }; enum eNoClamp { cNoClamp }; - + // Rice/Huffman entropy coding - + // This is basically Deflate-style canonical Huffman, except we allow for a lot more symbols. enum { - cHuffmanMaxSupportedCodeSize = 16, cHuffmanMaxSupportedInternalCodeSize = 31, - cHuffmanFastLookupBits = 10, + cHuffmanMaxSupportedCodeSize = 16, cHuffmanMaxSupportedInternalCodeSize = 31, + cHuffmanFastLookupBits = 10, cHuffmanMaxSymsLog2 = 14, cHuffmanMaxSyms = 1 << cHuffmanMaxSymsLog2, // Small zero runs @@ -2484,13 +2484,13 @@ namespace basisu enum class texture_format { cInvalidTextureFormat = -1, - + // Block-based formats cETC1, // ETC1 cETC1S, // ETC1 (subset: diff colors only, no subblocks) cETC2_RGB, // ETC2 color block (basisu doesn't support ETC2 planar/T/H modes - just basic ETC1) cETC2_RGBA, // ETC2 EAC alpha block followed by ETC2 color block - cETC2_ALPHA, // ETC2 EAC alpha block + cETC2_ALPHA, // ETC2 EAC alpha block cBC1, // DXT1 cBC3, // DXT5 (BC4/DXT5A block followed by a BC1/DXT1 block) cBC4, // DXT5A @@ -2505,10 +2505,10 @@ namespace basisu cPVRTC2_4_RGBA, cETC2_R11_EAC, cETC2_RG11_EAC, - cUASTC4x4, + cUASTC4x4, cBC1_NV, cBC1_AMD, - + // Uncompressed/raw pixels cRGBA32, cRGB565, @@ -2566,7 +2566,7 @@ namespace basisu BASISU_NOTE_UNUSED(fmt); return 4; } - + } // namespace basisu /**** ended inlining basisu.h ****/ @@ -2584,9 +2584,9 @@ namespace basist // You probably don't care about these enum's unless you are going pretty low-level and calling the transcoder to decode individual slices. enum class block_format { - cETC1, // ETC1S RGB + cETC1, // ETC1S RGB cETC2_RGBA, // full ETC2 EAC RGBA8 block - cBC1, // DXT1 RGB + cBC1, // DXT1 RGB cBC3, // BC4 block followed by a four color BC1 block cBC4, // DXT5A (alpha block only) cBC5, // two BC4 blocks @@ -2596,9 +2596,9 @@ namespace basist cBC7_M5_COLOR, // RGB BC7 mode 5 color (writes an opaque mode 5 block) cBC7_M5_ALPHA, // alpha portion of BC7 mode 5 (cBC7_M5_COLOR output data must have been written to the output buffer first to set the mode/rot fields etc.) cETC2_EAC_A8, // alpha block of ETC2 EAC (first 8 bytes of the 16-bit ETC2 EAC RGBA format) - cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC + cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC // data. If you use a sRGB ASTC format you'll get ~1 LSB of additional error, because of the different way ASTC decoders scale 8-bit endpoints to 16-bits during unpacking. - + cATC_RGB, cATC_RGBA_INTERPOLATED_ALPHA, cFXT1_RGB, // Opaque-only, has oddball 8x4 pixel block size @@ -2608,21 +2608,21 @@ namespace basist cETC2_EAC_R11, cETC2_EAC_RG11, - + cIndices, // Used internally: Write 16-bit endpoint and selector indices directly to output (output block must be at least 32-bits) cRGB32, // Writes RGB components to 32bpp output pixels cRGBA32, // Writes RGB255 components to 32bpp output pixels cA32, // Writes alpha component to 32bpp output pixels - + cRGB565, cBGR565, - + cRGBA4444_COLOR, cRGBA4444_ALPHA, cRGBA4444_COLOR_OPAQUE, cRGBA4444, - + cTotalBlockFormats }; @@ -2643,9 +2643,9 @@ namespace basist const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH = 3; const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6; const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS); - + uint16_t crc16(const void *r, size_t size, uint16_t crc); - + class huffman_decoding_table { friend class bitwise_decoder; @@ -2763,7 +2763,7 @@ namespace basist return false; else if (idx >= (int)m_tree.size()) m_tree.resize(idx + 1); - + if (!m_tree[idx]) { m_tree[idx] = (int16_t)tree_next; @@ -2932,14 +2932,14 @@ namespace basist for (;;) { uint32_t k = peek_bits(16); - + uint32_t l = 0; while (k & 1) { l++; k >>= 1; } - + q += l; remove_bits(l); @@ -2957,7 +2957,7 @@ namespace basist const uint32_t chunk_size = 1 << chunk_bits; const uint32_t chunk_mask = chunk_size - 1; - + uint32_t v = 0; uint32_t ofs = 0; @@ -2969,7 +2969,7 @@ namespace basist if ((s & chunk_size) == 0) break; - + if (ofs >= 32) { assert(0); @@ -2985,7 +2985,7 @@ namespace basist assert(ct.m_code_sizes.size()); const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits; - + while (m_bit_buf_size < 16) { uint32_t c = 0; @@ -2996,7 +2996,7 @@ namespace basist m_bit_buf_size += 8; assert(m_bit_buf_size <= 32); } - + int code_len; int sym; @@ -3181,7 +3181,7 @@ namespace basist }; struct decoder_etc_block; - + inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); @@ -3205,7 +3205,7 @@ namespace basist }; uint8_t c[4]; - + uint32_t m; }; @@ -3327,7 +3327,7 @@ namespace basist }; bool basis_block_format_is_uncompressed(block_format tex_type); - + } // namespace basist @@ -3340,8 +3340,8 @@ namespace basist namespace basist { struct color_quad_u8 - { - uint8_t m_c[4]; + { + uint8_t m_c[4]; }; const uint32_t TOTAL_UASTC_MODES = 19; @@ -3436,9 +3436,9 @@ namespace basist int m_ccs; // color component selector (dual plane only) bool m_dual_plane; // true if dual plane - // Weight and endpoint BISE values. + // Weight and endpoint BISE values. // Note these values are NOT linear, they must be BISE encoded. See Table 97 and Table 107. - uint8_t m_endpoints[18]; // endpoint values, in RR GG BB etc. order + uint8_t m_endpoints[18]; // endpoint values, in RR GG BB etc. order uint8_t m_weights[64]; // weight index values, raster order, in P0 P1, P0 P1, etc. or P0, P0, P0, P0, etc. order }; @@ -3533,7 +3533,7 @@ namespace basist #ifdef _DEBUG int astc_compute_texel_partition(int seed, int x, int y, int z, int partitioncount, bool small_block); #endif - + struct uastc_block { union @@ -3573,10 +3573,10 @@ namespace basist }; color32 apply_etc1_bias(const color32 &block_color, uint32_t bias, uint32_t limit, uint32_t subblock); - + struct decoder_etc_block; struct eac_block; - + bool unpack_uastc(uint32_t mode, uint32_t common_pattern, const color32& solid_color, const astc_block_desc& astc, color32* pPixels, bool srgb); bool unpack_uastc(const unpacked_uastc_block& unpacked_blk, color32* pPixels, bool srgb); @@ -3598,7 +3598,7 @@ namespace basist // Packs 16 scalar values to BC4. Same PSNR as stb_dxt's BC4 encoder, around 13% faster. void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride); - + void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb); enum @@ -3608,7 +3608,7 @@ namespace basist cEncodeBC1UseSelectors = 4, }; void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags); - + // Alternate PCA-free encoder, around 15% faster, same (or slightly higher) avg. PSNR void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags); @@ -3625,7 +3625,7 @@ namespace basist bool transcode_uastc_to_pvrtc1_4_rgb(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality, bool from_alpha); bool transcode_uastc_to_pvrtc1_4_rgba(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality); - + // uastc_init() MUST be called before using this module. void uastc_init(); @@ -3654,10 +3654,10 @@ namespace basist enum basis_slice_desc_flags { cSliceDescFlagsHasAlpha = 1, - + // Video only: Frame doesn't refer to previous frame (no usage of conditional replenishment pred symbols) // Currently the first frame is always an I-Frame, all subsequent frames are P-Frames. This will eventually be changed to periodic I-Frames. - cSliceDescFlagsFrameIsIFrame = 2 + cSliceDescFlagsFrameIsIFrame = 2 }; #pragma pack(push) @@ -3672,7 +3672,7 @@ namespace basist basisu::packed_uint<2> m_orig_height; // The original image height (may not be a multiple of 4 pixels) basisu::packed_uint<2> m_num_blocks_x; // The slice's block X dimensions. Each block is 4x4 pixels. The slice's pixel resolution may or may not be a power of 2. - basisu::packed_uint<2> m_num_blocks_y; // The slice's block Y dimensions. + basisu::packed_uint<2> m_num_blocks_y; // The slice's block Y dimensions. basisu::packed_uint<4> m_file_ofs; // Offset from the start of the file to the start of the slice's data basisu::packed_uint<4> m_file_size; // The size of the compressed slice data in bytes @@ -3684,24 +3684,24 @@ namespace basist enum basis_header_flags { // Always set for ETC1S files. Not set for UASTC files. - cBASISHeaderFlagETC1S = 1, - + cBASISHeaderFlagETC1S = 1, + // Set if the texture had to be Y flipped before encoding. The actual interpretation of this (is Y up or down?) is up to the user. - cBASISHeaderFlagYFlipped = 2, - + cBASISHeaderFlagYFlipped = 2, + // Set if any slices contain alpha (for ETC1S, if the odd slices contain alpha data) - cBASISHeaderFlagHasAlphaSlices = 4, - - // For ETC1S files, this will be true if the file utilizes a codebook from another .basis file. - cBASISHeaderFlagUsesGlobalCodebook = 8, - - // Set if the texture data is sRGB, otherwise it's linear. + cBASISHeaderFlagHasAlphaSlices = 4, + + // For ETC1S files, this will be true if the file utilizes a codebook from another .basis file. + cBASISHeaderFlagUsesGlobalCodebook = 8, + + // Set if the texture data is sRGB, otherwise it's linear. // In reality, we have no idea if the texture data is actually linear or sRGB. This is the m_perceptual parameter passed to the compressor. - cBASISHeaderFlagSRGB = 16, + cBASISHeaderFlagSRGB = 16, }; // The image type field attempts to describe how to interpret the image data in a Basis file. - // The encoder library doesn't really do anything special or different with these texture types, this is mostly here for the benefit of the user. + // The encoder library doesn't really do anything special or different with these texture types, this is mostly here for the benefit of the user. // We do make sure the various constraints are followed (2DArray/cubemap/videoframes/volume implies that each image has the same resolution and # of mipmap levels, etc., cubemap implies that the # of image slices is a multiple of 6) enum basis_texture_type { @@ -3744,7 +3744,7 @@ namespace basist basisu::packed_uint<3> m_total_slices; // The total # of compressed slices (1 slice per image, or 2 for alpha .basis files) basisu::packed_uint<3> m_total_images; // The total # of images - + basisu::packed_uint<1> m_tex_format; // enum basis_tex_format basisu::packed_uint<2> m_flags; // enum basist::header_flags basisu::packed_uint<1> m_tex_type; // enum basist::basis_texture_type @@ -3754,11 +3754,11 @@ namespace basist basisu::packed_uint<4> m_userdata0; // For client use basisu::packed_uint<4> m_userdata1; // For client use - basisu::packed_uint<2> m_total_endpoints; // The number of endpoints in the endpoint codebook + basisu::packed_uint<2> m_total_endpoints; // The number of endpoints in the endpoint codebook basisu::packed_uint<4> m_endpoint_cb_file_ofs; // The compressed endpoint codebook's file offset relative to the start of the file basisu::packed_uint<3> m_endpoint_cb_file_size; // The compressed endpoint codebook's size in bytes - basisu::packed_uint<2> m_total_selectors; // The number of selectors in the endpoint codebook + basisu::packed_uint<2> m_total_selectors; // The number of selectors in the endpoint codebook basisu::packed_uint<4> m_selector_cb_file_ofs; // The compressed selectors codebook's file offset relative to the start of the file basisu::packed_uint<3> m_selector_cb_file_size; // The compressed selector codebook's size in bytes @@ -3766,7 +3766,7 @@ namespace basist basisu::packed_uint<4> m_tables_file_size; // The file size in bytes of the compressed huffman codelength tables basisu::packed_uint<4> m_slice_desc_file_ofs; // The file offset to the slice description array, usually follows the header - + basisu::packed_uint<4> m_extended_file_ofs; // The file offset of the "extended" header and compressed data, for future use basisu::packed_uint<4> m_extended_file_size; // The file size in bytes of the "extended" header and compressed data, for future use }; @@ -3780,7 +3780,7 @@ namespace basist // High-level composite texture formats supported by the transcoder. // Each of these texture formats directly correspond to OpenGL/D3D/Vulkan etc. texture formats. // Notes: - // - If you specify a texture format that supports alpha, but the .basis file doesn't have alpha, the transcoder will automatically output a + // - If you specify a texture format that supports alpha, but the .basis file doesn't have alpha, the transcoder will automatically output a // fully opaque (255) alpha channel. // - The PVRTC1 texture formats only support power of 2 dimension .basis files, but this may be relaxed in a future version. // - The PVRTC1 transcoders are real-time encoders, so don't expect the highest quality. We may add a slower encoder with improved quality. @@ -3809,7 +3809,7 @@ namespace basist // ATC (mobile, Adreno devices, this is a niche format) cTFATC_RGB = 11, // Opaque, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. ATI ATC (GL_ATC_RGB_AMD) - cTFATC_RGBA = 12, // Opaque+alpha, alpha channel will be opaque for opaque .basis files. ATI ATC (GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD) + cTFATC_RGBA = 12, // Opaque+alpha, alpha channel will be opaque for opaque .basis files. ATI ATC (GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD) // FXT1 (desktop, Intel devices, this is a super obscure format) cTFFXT1_RGB = 17, // Opaque only, uses exclusively CC_MIXED blocks. Notable for having a 8x4 block size. GL_3DFX_texture_compression_FXT1 is supported on Intel integrated GPU's (such as HD 630). @@ -3908,7 +3908,7 @@ namespace basist basisu::vector m_block_endpoint_preds[2]; enum { cMaxPrevFrameLevels = 16 }; - basisu::vector m_prev_frame_indices[2][cMaxPrevFrameLevels]; // [alpha_flag][level_index] + basisu::vector m_prev_frame_indices[2][cMaxPrevFrameLevels]; // [alpha_flag][level_index] void clear() { @@ -3986,13 +3986,13 @@ namespace basist typedef basisu::vector selector_vec; const selector_vec& get_selectors() const { return m_local_selectors; } - + private: const basisu_lowlevel_etc1s_transcoder* m_pGlobal_codebook; endpoint_vec m_local_endpoints; selector_vec m_local_selectors; - + huffman_decoding_table m_endpoint_pred_model, m_delta_endpoint_model, m_selector_model, m_selector_history_buf_rle_model; uint32_t m_selector_history_buf_size; @@ -4013,7 +4013,7 @@ namespace basist // This flag is used internally when decoding to BC3. cDecodeFlagsBC1ForbidThreeColorBlocks = 8, - // The output buffer contains alpha endpoint/selector indices. + // The output buffer contains alpha endpoint/selector indices. // Used internally when decoding formats like ASTC that require both color and alpha data to be available when transcoding to the output format. cDecodeFlagsOutputHasAlphaIndices = 16, @@ -4220,11 +4220,11 @@ namespace basist // transcode_image_level() decodes a single mipmap level from the .basis file to any of the supported output texture formats. // It'll first find the slice(s) to transcode, then call transcode_slice() one or two times to decode both the color and alpha texture data (or RG texture data from two slices for BC5). // If the .basis file doesn't have alpha slices, the output alpha blocks will be set to fully opaque (all 255's). - // Currently, to decode to PVRTC1 the basis texture's dimensions in pixels must be a power of 2, due to PVRTC1 format requirements. + // Currently, to decode to PVRTC1 the basis texture's dimensions in pixels must be a power of 2, due to PVRTC1 format requirements. // output_blocks_buf_size_in_blocks_or_pixels should be at least the image level's total_blocks (num_blocks_x * num_blocks_y), or the total number of output pixels if fmt==cTFRGBA32. // output_row_pitch_in_blocks_or_pixels: Number of blocks or pixels per row. If 0, the transcoder uses the slice's num_blocks_x or orig_width (NOT num_blocks_x * 4). Ignored for PVRTC1 (due to texture swizzling). // output_rows_in_pixels: Ignored unless fmt is uncompressed (cRGBA32, etc.). The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4). - // Notes: + // Notes: // - basisu_transcoder_init() must have been called first to initialize the transcoder lookup tables before calling this function. // - This method assumes the output texture buffer is readable. In some cases to handle alpha, the transcoder will write temporary data to the output texture in // a first pass, which will be read in a second pass. @@ -4279,7 +4279,7 @@ namespace basist // basisu_transcoder_init() MUST be called before a .basis file can be transcoded. void basisu_transcoder_init(); - + enum debug_flags_t { cDebugFlagVisCRs = 1, @@ -4289,10 +4289,10 @@ namespace basist uint32_t get_debug_flags(); void set_debug_flags(uint32_t f); - // ------------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------------ // Optional .KTX2 file format support // KTX2 reading optionally requires miniz or Zstd decompressors for supercompressed UASTC files. - // ------------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------------ #if BASISD_SUPPORT_KTX2 #pragma pack(push) #pragma pack(1) @@ -4435,12 +4435,12 @@ namespace basist { case KTX2_DF_PRIMARIES_UNSPECIFIED: return "UNSPECIFIED"; case KTX2_DF_PRIMARIES_BT709: return "BT709"; - case KTX2_DF_PRIMARIES_BT601_EBU: return "EBU"; + case KTX2_DF_PRIMARIES_BT601_EBU: return "EBU"; case KTX2_DF_PRIMARIES_BT601_SMPTE: return "SMPTE"; case KTX2_DF_PRIMARIES_BT2020: return "BT2020"; case KTX2_DF_PRIMARIES_CIEXYZ: return "CIEXYZ"; case KTX2_DF_PRIMARIES_ACES: return "ACES"; - case KTX2_DF_PRIMARIES_ACESCC: return "ACESCC"; + case KTX2_DF_PRIMARIES_ACESCC: return "ACESCC"; case KTX2_DF_PRIMARIES_NTSC1953: return "NTSC1953"; case KTX2_DF_PRIMARIES_PAL525: return "PAL525"; case KTX2_DF_PRIMARIES_DISPLAYP3: return "DISPLAYP3"; @@ -4448,7 +4448,7 @@ namespace basist default: break; } return "?"; - } + } // Information about a single 2D texture "image" in a KTX2 file. struct ktx2_image_level_info @@ -4479,7 +4479,7 @@ namespace basist // true if the image is an I-Frame. Currently, for ETC1S textures, the first frame will always be an I-Frame, and subsequent frames will always be P-Frames. bool m_iframe_flag; }; - + // Thread-specific ETC1S/supercompressed UASTC transcoder state. (If you're not doing multithreading transcoding you can ignore this.) struct ktx2_transcoder_state { @@ -4497,9 +4497,9 @@ namespace basist // This class is quite similar to basisu_transcoder. It treats KTX2 files as a simple container for ETC1S/UASTC texture data. // It does not support 1D or 3D textures. - // It only supports 2D and cubemap textures, with or without mipmaps, texture arrays of 2D/cubemap textures, and texture video files. + // It only supports 2D and cubemap textures, with or without mipmaps, texture arrays of 2D/cubemap textures, and texture video files. // It only supports raw non-supercompressed UASTC, ETC1S, UASTC+Zstd, or UASTC+zlib compressed files. - // DFD (Data Format Descriptor) parsing is purposely as simple as possible. + // DFD (Data Format Descriptor) parsing is purposely as simple as possible. // If you need to know how to interpret the texture channels you'll need to parse the DFD yourself after calling get_dfd(). class ktx2_transcoder { @@ -4540,7 +4540,7 @@ namespace basist uint32_t get_layers() const { return m_header.m_layer_count; } // Returns cETC1S or cUASTC4x4. Valid after init(). - basist::basis_tex_format get_format() const { return m_format; } + basist::basis_tex_format get_format() const { return m_format; } bool is_etc1s() const { return get_format() == basist::basis_tex_format::cETC1S; } @@ -4559,7 +4559,7 @@ namespace basist // Returns the DFD color primary. // We do not validate the color primaries, so the returned value may not be in the ktx2_df_color_primaries enum. ktx2_df_color_primaries get_dfd_color_primaries() const { return m_dfd_color_prims; } - + // Returns KTX2_KHR_DF_TRANSFER_LINEAR or KTX2_KHR_DF_TRANSFER_SRGB. uint32_t get_dfd_transfer_func() const { return m_dfd_transfer_func; } @@ -4567,9 +4567,9 @@ namespace basist // Returns 1 (ETC1S/UASTC) or 2 (ETC1S with an internal alpha channel). uint32_t get_dfd_total_samples() const { return m_dfd_samples; } - - // Returns the channel mapping for each DFD "sample". UASTC always has 1 sample, ETC1S can have one or two. - // Note the returned value SHOULD be one of the ktx2_df_channel_id enums, but we don't validate that. + + // Returns the channel mapping for each DFD "sample". UASTC always has 1 sample, ETC1S can have one or two. + // Note the returned value SHOULD be one of the ktx2_df_channel_id enums, but we don't validate that. // It's up to the caller to decide what to do if the value isn't in the enum. ktx2_df_channel_id get_dfd_channel_id0() const { return m_dfd_chan0; } ktx2_df_channel_id get_dfd_channel_id1() const { return m_dfd_chan1; } @@ -4607,18 +4607,18 @@ namespace basist // is_video() is only valid after start_transcoding() is called. // For ETC1S data, if this returns true you must currently transcode the file from first to last frame, in order, without skipping any frames. bool is_video() const { return m_is_video; } - + // start_transcoding() MUST be called before calling transcode_image(). // This method decompresses the ETC1S global endpoint/selector codebooks, which is not free, so try to avoid calling it excessively. bool start_transcoding(); - + // get_image_level_info() be called after init(), but the m_iframe_flag's won't be valid until start_transcoding() is called. // You can call this method before calling transcode_image_level() to retrieve basic information about the mipmap level's dimensions, etc. bool get_image_level_info(ktx2_image_level_info& level_info, uint32_t level_index, uint32_t layer_index, uint32_t face_index) const; // transcode_image_level() transcodes a single 2D texture or cubemap face from the KTX2 file. // Internally it uses the same low-level transcode API's as basisu_transcoder::transcode_image_level(). - // If the file is UASTC and is supercompressed with Zstandard, and the file is a texture array or cubemap, it's highly recommended that each mipmap level is + // If the file is UASTC and is supercompressed with Zstandard, and the file is a texture array or cubemap, it's highly recommended that each mipmap level is // completely transcoded before switching to another level. Every time the mipmap level is changed all supercompressed level data must be decompressed using Zstandard as a single unit. // Currently ETC1S videos must always be transcoded from first to last frame (or KTX2 "layer"), in order, with no skipping of frames. // By default this method is not thread safe unless you specify a pointer to a user allocated thread-specific transcoder_state struct. @@ -4628,7 +4628,7 @@ namespace basist basist::transcoder_texture_format fmt, uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, ktx2_transcoder_state *pState = nullptr); - + private: const uint8_t* m_pData; uint32_t m_data_size; @@ -4637,22 +4637,22 @@ namespace basist basisu::vector m_levels; basisu::uint8_vec m_dfd; key_value_vec m_key_values; - + ktx2_etc1s_global_data_header m_etc1s_header; basisu::vector m_etc1s_image_descs; basist::basis_tex_format m_format; - + uint32_t m_dfd_color_model; ktx2_df_color_primaries m_dfd_color_prims; uint32_t m_dfd_transfer_func; uint32_t m_dfd_flags; uint32_t m_dfd_samples; ktx2_df_channel_id m_dfd_chan0, m_dfd_chan1; - + basist::basisu_lowlevel_etc1s_transcoder m_etc1s_transcoder; basist::basisu_lowlevel_uastc_transcoder m_uastc_transcoder; - + ktx2_transcoder_state m_def_transcoder_state; bool m_has_alpha; @@ -4712,7 +4712,7 @@ namespace basisu abort(); } } - + const size_t desired_size = element_size * new_capacity; size_t actual_size = 0; if (!pMover) @@ -4776,7 +4776,7 @@ namespace basisu if (m_p) free(m_p); - + m_p = new_p; } @@ -5140,7 +5140,7 @@ namespace basisu void debug_printf(const char* pFmt, ...) { -#if BASISU_FORCE_DEVEL_MESSAGES +#if BASISU_FORCE_DEVEL_MESSAGES g_debug_printf = true; #endif if (g_debug_printf) @@ -5202,7 +5202,7 @@ namespace basist return static_cast(~crc); } - + enum etc_constants { cETC1BytesPerBlock = 8U, @@ -5275,14 +5275,14 @@ namespace basist //const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; - + static const uint8_t g_etc_5_to_8[32] = { 0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123, 132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255 }; struct decoder_etc_block { // big endian uint64: // bit ofs: 56 48 40 32 24 16 8 0 - // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 + // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 union { uint64_t m_uint64; @@ -5550,7 +5550,7 @@ namespace basist { return (m_bytes[3] & 2) != 0; } - + inline uint32_t get_inten_table(uint32_t subblock_id) const { assert(subblock_id < 2); @@ -5565,7 +5565,7 @@ namespace basist const uint32_t b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); return static_cast(b | (g << 3U) | (r << 6U)); } - + void get_block_colors(color32* pBlock_colors, uint32_t subblock_index) const { color32 b; @@ -5683,7 +5683,7 @@ namespace basist g = c.g; b = c.b; } - + static void unpack_color5(color32& result, uint16_t packed_color5, bool scaled) { result = unpack_color5(packed_color5, scaled, 255); @@ -5812,7 +5812,7 @@ namespace basist static void get_block_color5_r(const color32& base_color5, uint32_t inten_table, uint32_t index, uint32_t &r) { assert(index < 4); - + uint32_t br = (base_color5.r << 3) | (base_color5.r >> 2); const int* pInten_table = g_etc1_inten_tables[inten_table]; @@ -5988,7 +5988,7 @@ namespace basist { 1, 2, 2, 2 }, { 1, 2, 3, 3 }, }; - + static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256]; static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256]; @@ -7370,9 +7370,9 @@ namespace basist return best_err; } #endif // BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES - + static -#if !BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES +#if !BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES const #endif etc1_g_to_eac_conversion s_etc1_g_to_etc2_a8[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] = @@ -7862,18 +7862,18 @@ namespace basist #endif static bool g_transcoder_initialized; - + // Library global initialization. Requires ~9 milliseconds when compiled and executed natively on a Core i7 2.2 GHz. // If this is too slow, these computed tables can easilky be moved to be compiled in. void basisu_transcoder_init() { if (g_transcoder_initialized) { - BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n"); + BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n"); return; } - - BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n"); + + BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n"); #if BASISD_SUPPORT_UASTC uastc_init(); @@ -7882,7 +7882,7 @@ namespace basist #if BASISD_SUPPORT_ASTC transcoder_init_astc(); #endif - + #if BASISD_WRITE_NEW_ASTC_TABLES create_etc1_to_astc_conversion_table_0_47(); create_etc1_to_astc_conversion_table_0_255(); @@ -8138,7 +8138,7 @@ namespace basist std::swap(l, h); pSelectors_xlat_256 = &g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[best_mapping][0]; } - + pDst_block->set_low_color(static_cast(l)); pDst_block->set_high_color(static_cast(h)); @@ -8298,7 +8298,7 @@ namespace basist fxt1_block* pBlock = static_cast(pDst); // CC_MIXED is basically DXT1 with different encoding tricks. - // So transcode ETC1S to DXT1, then transcode that to FXT1 which is easy and nearly lossless. + // So transcode ETC1S to DXT1, then transcode that to FXT1 which is easy and nearly lossless. // (It's not completely lossless because FXT1 rounds in its color lerps while DXT1 doesn't, but it should be good enough.) dxt1_block blk; convert_etc1s_to_dxt1(&blk, pEndpoints, pSelectors, false); @@ -8311,7 +8311,7 @@ namespace basist uint32_t g0 = color0.g & 1; uint32_t g1 = color1.g & 1; - + color0.g >>= 1; color1.g >>= 1; @@ -8319,7 +8319,7 @@ namespace basist blk.m_selectors[1] = conv_dxt1_to_fxt1_sels(blk.m_selectors[1]); blk.m_selectors[2] = conv_dxt1_to_fxt1_sels(blk.m_selectors[2]); blk.m_selectors[3] = conv_dxt1_to_fxt1_sels(blk.m_selectors[3]); - + if ((blk.get_selector(0, 0) >> 1) != (g0 ^ g1)) { std::swap(color0, color1); @@ -8333,7 +8333,7 @@ namespace basist if (fxt1_subblock == 0) { - pBlock->m_hi.m_mode = 1; + pBlock->m_hi.m_mode = 1; pBlock->m_hi.m_alpha = 0; pBlock->m_hi.m_glsb = g1 | (g1 << 1); pBlock->m_hi.m_r0 = color0.r; @@ -8654,7 +8654,7 @@ namespace basist { uint32_t r; decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r); - + pDst_block->set_low_alpha(r); pDst_block->set_high_alpha(r); pDst_block->m_selectors[0] = 0; @@ -8737,7 +8737,7 @@ namespace basist static const uint8_t g_pvrtc_4[16] = { 0,16,33,49,66,82,99,115,140,156,173,189,206,222,239,255 }; static const uint8_t g_pvrtc_3[8] = { 0,33,74,107,148,181,222,255 }; static const uint8_t g_pvrtc_alpha[9] = { 0,34,68,102,136,170,204,238,255 }; - + static const uint8_t g_pvrtc_5_floor[256] = { 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3, @@ -8761,7 +8761,7 @@ namespace basist 24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,28, 28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31,31,31,31,31,31,31,31 }; - + static const uint8_t g_pvrtc_4_floor[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -8785,7 +8785,7 @@ namespace basist 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14, 14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15 }; - + static const uint8_t g_pvrtc_3_floor[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -8809,7 +8809,7 @@ namespace basist 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 }; - + static const uint8_t g_pvrtc_alpha_floor[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -8916,10 +8916,10 @@ namespace basist } assert((r < 32) && (g < 32) && (b < 32) && (a < 16)); - + return color32(r, g, b, a); } - + inline color32 get_endpoint_8888(uint32_t endpoint_index) const { assert(endpoint_index < 2); @@ -8966,7 +8966,7 @@ namespace basist a = g_pvrtc_alpha[a]; } - + return color32(r, g, b, a); } @@ -8975,7 +8975,7 @@ namespace basist color32 c(get_endpoint_8888(endpoint_index)); return c.r + c.g + c.b + c.a; } - + inline uint32_t get_opaque_endpoint_l0() const { uint32_t packed = m_endpoints & 0xFFFE; @@ -9090,7 +9090,7 @@ namespace basist else m_endpoints = (m_endpoints & 0xFFFF0000U) | packed; } - + // opaque endpoints: 554 or 555 // transparent endpoints: 3443 or 3444 inline void set_endpoint_raw(uint32_t endpoint_index, const color32& c, bool opaque_endpoint) @@ -9143,7 +9143,7 @@ namespace basist else m_endpoints = (m_endpoints & 0xFFFF0000U) | packed; } - + inline void set_endpoint_floor(uint32_t endpoint_index, const color32& c) { assert(endpoint_index < 2); @@ -9368,7 +9368,7 @@ namespace basist for (int ey = 0; ey < 3; ey++) { - int by = y + ey - 1; + int by = y + ey - 1; const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; @@ -9376,7 +9376,7 @@ namespace basist for (int ex = 0; ex < 3; ex++) { - int bx = 0 + ex - 1; + int bx = 0 + ex - 1; const uint32_t e = pE[bx & x_mask]; @@ -9525,8 +9525,8 @@ namespace basist } static void fixup_pvrtc1_4_modulation_rgba( - const decoder_etc_block* pETC_Blocks, - const uint32_t* pPVRTC_endpoints, + const decoder_etc_block* pETC_Blocks, + const uint32_t* pPVRTC_endpoints, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, void *pAlpha_blocks, const endpoint* pEndpoints, const selector* pSelectors) { @@ -9549,7 +9549,7 @@ namespace basist for (int ey = 0; ey < 3; ey++) { - int by = y + ey - 1; + int by = y + ey - 1; const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; @@ -9557,7 +9557,7 @@ namespace basist for (int ex = 0; ex < 3; ex++) { - int bx = 0 + ex - 1; + int bx = 0 + ex - 1; const uint32_t e = pE[bx & x_mask]; @@ -9571,13 +9571,13 @@ namespace basist for (int x = 0; x < static_cast(num_blocks_x); x++, block_index++) { const decoder_etc_block& src_block = pETC_Blocks[block_index]; - + const uint16_t* pSrc_alpha_block = reinterpret_cast(static_cast(pAlpha_blocks) + x + (y * num_blocks_x)); const endpoint* pAlpha_endpoints = &pEndpoints[pSrc_alpha_block[0]]; const selector* pAlpha_selectors = &pSelectors[pSrc_alpha_block[1]]; - + const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1); - + uint32_t swizzled = x_swizzle | y_swizzle; if (num_blocks_x != num_blocks_y) { @@ -9720,7 +9720,7 @@ namespace basist const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_m5_selector_ranges) / sizeof(g_etc1_to_bc7_m5_selector_ranges[0]); static uint32_t g_etc1_to_bc7_m5_selector_range_index[4][4]; - + const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS = 10; static const uint8_t g_etc1_to_bc7_m5_selector_mappings[NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS][4] = { @@ -9742,7 +9742,7 @@ namespace basist uint8_t m_hi; uint16_t m_err; }; - + static const etc1_to_bc7_m5_solution g_etc1_to_bc7_m5_color[32 * 8 * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS * NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES] = { /**** start inlining basisu_transcoder_tables_bc7_m5_color.inc ****/ {0,7,18},{0,5,2},{0,4,1},{0,3,8},{0,4,35},{0,3,24},{0,3,12},{0,2,29},{0,2,36},{0,2,30},{0,7,18},{0,5,2},{0,4,1},{0,3,8},{2,0,35},{0,3,24},{0,3,12},{0,2,29},{4,0,35},{0,2,29},{0,3,0},{0,3,0},{0,3,0},{0,1,1},{0,1,2},{0,1,2},{0,1,2},{0,1,1},{1,0,3},{0,1,2},{0,3,0}, @@ -10228,7 +10228,7 @@ namespace basist {5,127,1413}, /**** ended inlining basisu_transcoder_tables_bc7_m5_color.inc ****/ }; - + static dxt_selector_range g_etc1_to_bc7_m5a_selector_ranges[] = { { 0, 3 }, @@ -10303,7 +10303,7 @@ namespace basist {208,5,2}, /**** ended inlining basisu_transcoder_tables_bc7_m5_alpha.inc ****/ }; - + static inline uint32_t set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t cur_ofs) { assert(num_bits < 32); @@ -10450,7 +10450,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1_to_bc7_m5_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -10529,7 +10529,7 @@ namespace basist int mapping_err = block_colors[s].g - colors[k]; mapping_err *= mapping_err; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) mapping_err *= 5; @@ -10540,7 +10540,7 @@ namespace basist best_k = k; } } // k - + total_err += best_mapping_err; output_selectors |= (best_k << (s * 2)); } // s @@ -10555,7 +10555,7 @@ namespace basist } // lo } // hi - + fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, best_output_selectors); n++; if ((n & 31) == 31) @@ -10594,7 +10594,7 @@ namespace basist {127,104},{126,105},{126,106},{127,106},{127,107},{126,108},{125,109},{127,109},{126,110},{126,111},{127,111},{127,112},{126,113},{126,114},{127,114},{127,115}, {126,116},{126,117},{127,117},{127,118},{126,119},{126,120},{127,120},{127,121},{126,122},{126,123},{127,123},{127,124},{126,125},{126,126},{127,126},{127,127} }; - + static void transcoder_init_bc7_mode5() { #if 0 @@ -10622,9 +10622,9 @@ namespace basist } } // hi - + } // lo - + printf("{%u,%u},", g_bc7_m5_equals_1[i].m_hi, g_bc7_m5_equals_1[i].m_lo); if ((i & 15) == 15) printf("\n"); } @@ -10648,7 +10648,7 @@ namespace basist static void convert_etc1s_to_bc7_m5_color(void* pDst, const endpoint* pEndpoints, const selector* pSelector) { bc7_mode_5* pDst_block = static_cast(pDst); - + // First ensure the block is cleared to all 0's static_cast(pDst)[0] = 0; static_cast(pDst)[1] = 0; @@ -10774,7 +10774,7 @@ namespace basist pDst_block->m_lo.m_r1 = pTable_r[best_mapping].m_lo; pDst_block->m_lo.m_g1 = pTable_g[best_mapping].m_lo; pDst_block->m_lo.m_b1 = pTable_b[best_mapping].m_lo; - + s_inv = 3; } else @@ -10795,7 +10795,7 @@ namespace basist for (uint32_t x = 0; x < 4; x++) { const uint32_t s = pSelector->get_selector(x, y); - + const uint32_t os = pSelectors_xlat[s] ^ s_inv; output_bits |= (os << output_bit_ofs); @@ -10825,7 +10825,7 @@ namespace basist pDst_block->m_lo.m_a0 = r; pDst_block->m_lo.m_a1_0 = r & 63; pDst_block->m_hi.m_a1_1 = r >> 6; - + return; } else if (pSelector->m_num_unique_selectors == 2) @@ -10875,7 +10875,7 @@ namespace basist } const uint32_t selector_range_table = g_etc1_to_bc7_m5a_selector_range_index[low_selector][high_selector]; - + const etc1_g_to_bc7_m5a_conversion* pTable = &g_etc1_g_to_bc7_m5a[inten_table * (32 * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES) + base_color_r * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES + selector_range_table]; pDst_block->m_lo.m_a0 = pTable->m_lo; @@ -11827,7 +11827,7 @@ namespace basist // The best selector mapping to use given a base base+inten table and used selector range for converting grayscale data. static uint8_t g_etc1_to_astc_best_grayscale_mapping[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES]; - + #if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY static const etc1_to_astc_solution g_etc1_to_astc_0_255[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] = { /**** start inlining basisu_transcoder_tables_astc_0_255.inc ****/ @@ -12374,7 +12374,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 8; @@ -12395,7 +12395,7 @@ namespace basist mapping_best_high[m] = best_hi; mapping_best_err[m] = best_err; highest_best_err = basisu::maximum(highest_best_err, best_err); - + } // m for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++) @@ -12471,7 +12471,7 @@ namespace basist { int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]]; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. int err_scale = 1; if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) @@ -12500,9 +12500,9 @@ namespace basist uint64_t err = mapping_best_err[m]; err = basisu::minimum(err, 0xFFFF); - + fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err); - + n++; if ((n & 31) == 31) fprintf(pFile, "\n"); @@ -12585,14 +12585,14 @@ namespace basist struct astc_block_params { // 2 groups of 5, but only a max of 8 are used (RRGGBBAA00) - uint8_t m_endpoints[10]; + uint8_t m_endpoints[10]; uint8_t m_weights[32]; }; - - // Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2). + + // Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2). // We're always going to output blocks containing alpha, even if the input doesn't have alpha, for simplicity. // Each block always has 4x4 weights, uses range 13 BISE encoding on the endpoints (0-47), and each weight ranges from 0-3. This encoding should be roughly equal in quality vs. BC1 for color. - // 8 total endpoints, stored as RGBA LH LH LH LH order, each ranging from 0-47. + // 8 total endpoints, stored as RGBA LH LH LH LH order, each ranging from 0-47. // Note the input [0,47] endpoint values are not linear - they are encoded as outlined in the ASTC spec: // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-endpoint-unquantization // 32 total weights, stored as 16 CA CA, each ranging from 0-3. @@ -12614,7 +12614,7 @@ namespace basist astc_encode_trits(pOutput, pBlock->m_endpoints + 5, bit_pos, 4); // Pack 32 2-bit weights, which are stored from the top down into the block in opposite bit order. - + for (uint32_t i = 0; i < 32; i++) { static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 }; @@ -12623,7 +12623,7 @@ namespace basist } } - // CEM mode 12 (LDR RGBA Direct), 8-bit endpoints, 1-bit weights + // CEM mode 12 (LDR RGBA Direct), 8-bit endpoints, 1-bit weights // This ASTC mode is basically block truncation coding (BTC) using 1-bit weights and 8-bit/component endpoints - very convenient. static void astc_pack_block_cem_12_weight_range0(uint32_t* pOutput, const astc_block_params* pBlock) { @@ -12661,7 +12661,7 @@ namespace basist // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode pBytes[0] = 0x42; pBytes[1] = 0x84; pBytes[2] = 0x00; pBytes[3] = 0x00; pBytes[4] = 0x00; pBytes[5] = 0x00; pBytes[6] = 0x00; pBytes[7] = 0xc0; - + pOutput[2] = 0; pOutput[3] = 0; @@ -12687,7 +12687,7 @@ namespace basist // Write constant block mode, color component selector, number of partitions, color endpoint mode // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode pBytes[0] = 0x42; pBytes[1] = 0x00; pBytes[2] = 0x01; pBytes[3] = 0x00; - + pOutput[1] = 0; pOutput[2] = 0; pOutput[3] = 0; @@ -12715,7 +12715,7 @@ namespace basist { uint8_t m_lo, m_hi; } g_astc_single_color_encoding_1[256]; - + static void transcoder_init_astc() { for (uint32_t base_color = 0; base_color < 32; base_color++) @@ -12793,7 +12793,7 @@ namespace basist g_ise_to_unquant[bit | (trit << 4)] = unq; } } - + // Compute table used for optimal single color encoding. for (int i = 0; i < 256; i++) { @@ -12808,9 +12808,9 @@ namespace basist int l = lo_v | (lo_v << 8); int h = hi_v | (hi_v << 8); - + int v = ((l * (64 - 21) + (h * 21) + 32) / 64) >> 8; - + int e = abs(v - i); if (e < lowest_e) @@ -12832,7 +12832,7 @@ namespace basist for (int lo = 0; lo < 48; lo++) { const int lo_v = g_ise_to_unquant[lo]; - + int e = abs(lo_v - i); if (e < lowest_e) @@ -12847,7 +12847,7 @@ namespace basist // Converts opaque or color+alpha ETC1S block to ASTC 4x4. // This function tries to use the best ASTC mode given the block's actual contents. - static void convert_etc1s_to_astc_4x4(void* pDst_block, const endpoint* pEndpoints, const selector* pSelector, + static void convert_etc1s_to_astc_4x4(void* pDst_block, const endpoint* pEndpoints, const selector* pSelector, bool transcode_alpha, const endpoint *pEndpoint_codebook, const selector *pSelector_codebook) { astc_block_params blk; @@ -12891,7 +12891,7 @@ namespace basist // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-void-extent-blocks uint32_t r, g, b; decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b); - + uint32_t* pOutput = static_cast(pDst_block); uint8_t* pBytes = reinterpret_cast(pDst_block); @@ -12911,7 +12911,7 @@ namespace basist } else if ((pSelector->m_num_unique_selectors <= 2) && (num_unique_alpha_selectors <= 2)) { - // Both color and alpha use <= 2 unique selectors each. + // Both color and alpha use <= 2 unique selectors each. // Use block truncation coding, which is lossless with ASTC (8-bit endpoints, 1-bit weights). color32 block_colors[4]; decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table); @@ -12958,7 +12958,7 @@ namespace basist { uint32_t s = alpha_selectors.get_selector(x, y); s = (s == alpha_high_selector) ? 1 : 0; - + blk.m_weights[(x + y * 4) * 2 + 1] = static_cast(s); } // x } // y @@ -12991,12 +12991,12 @@ namespace basist return; } - + // Either alpha and/or color use > 2 unique selectors each, so we must do something more complex. - + #if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY // The optional higher quality modes use 8-bits endpoints vs. [0,47] endpoints. - + // If the block's base color is grayscale, all pixels are grayscale, so encode the block as Luminance+Alpha. if ((base_color.r == base_color.g) && (base_color.r == base_color.b)) { @@ -13030,7 +13030,7 @@ namespace basist { // Convert ETC1S alpha const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector]; - + //[32][8][RANGES][MAPPING] const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; @@ -13038,7 +13038,7 @@ namespace basist blk.m_endpoints[2] = pTable_g[best_mapping].m_lo; blk.m_endpoints[3] = pTable_g[best_mapping].m_hi; - + const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0]; for (uint32_t y = 0; y < 4; y++) @@ -13082,10 +13082,10 @@ namespace basist { // Convert ETC1S alpha const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector]; - + //[32][8][RANGES][MAPPING] const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; - + const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping_0_255[base_color.g][inten_table][selector_range_table]; blk.m_endpoints[0] = pTable_g[best_mapping].m_lo; @@ -13227,7 +13227,7 @@ namespace basist { // Convert ETC1S alpha const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector]; - + //[32][8][RANGES][MAPPING] const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; @@ -13271,7 +13271,7 @@ namespace basist const uint32_t r = block_colors[low_selector].r; const uint32_t g = block_colors[low_selector].g; const uint32_t b = block_colors[low_selector].b; - + blk.m_endpoints[0] = g_astc_single_color_encoding_1[r].m_lo; blk.m_endpoints[1] = g_astc_single_color_encoding_1[r].m_hi; @@ -13373,7 +13373,7 @@ namespace basist blk.m_endpoints[4] = pTable_b[best_mapping].m_lo; blk.m_endpoints[5] = pTable_b[best_mapping].m_hi; - + int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]]; int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]]; bool invert = false; @@ -15466,8 +15466,8 @@ namespace basist static void transcoder_init_atc() { prepare_atc_single_color_table(g_pvrtc2_match45_equals_1, 16, 32, 1); - prepare_atc_single_color_table(g_atc_match55_equals_1, 32, 32, 1); - prepare_atc_single_color_table(g_atc_match56_equals_1, 32, 64, 1); + prepare_atc_single_color_table(g_atc_match55_equals_1, 32, 32, 1); + prepare_atc_single_color_table(g_atc_match56_equals_1, 32, 64, 1); prepare_atc_single_color_table(g_pvrtc2_match4, 1, 16, 3); prepare_atc_single_color_table(g_atc_match5, 1, 32, 3); @@ -15521,7 +15521,7 @@ namespace basist pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match56_equals_1[g].m_lo, g_atc_match55_equals_1[b].m_lo); pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match56_equals_1[g].m_hi, g_atc_match55_equals_1[b].m_hi); - + pBlock->m_sels[0] = 0x55; pBlock->m_sels[1] = 0x55; pBlock->m_sels[2] = 0x55; @@ -15656,7 +15656,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -15730,7 +15730,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -15760,7 +15760,7 @@ namespace basist } // inten fclose(pFile); - + // PVRTC2 45 fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_45.inc", "w"); @@ -15805,7 +15805,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -15882,7 +15882,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -15959,7 +15959,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -16036,7 +16036,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -16164,12 +16164,12 @@ namespace basist { uint8_t m_l, m_h; } g_pvrtc2_trans_match44[256]; - + static struct { uint8_t m_l, m_h; } g_pvrtc2_alpha_match33[256]; - + static struct { uint8_t m_l, m_h; @@ -16179,7 +16179,7 @@ namespace basist { uint8_t m_l, m_h; } g_pvrtc2_alpha_match33_3[256]; - + // PVRTC2 can be forced to look like a slightly weaker variant of ATC/BC1, so that's what we do here for simplicity. static void convert_etc1s_to_pvrtc2_rgb(void* pDst, const endpoint* pEndpoints, const selector* pSelector) { @@ -16293,7 +16293,7 @@ namespace basist } typedef struct { float c[4]; } vec4F; - + static inline vec4F* vec4F_set_scalar(vec4F* pV, float x) { pV->c[0] = x; pV->c[1] = x; pV->c[2] = x; pV->c[3] = x; return pV; } static inline vec4F* vec4F_set(vec4F* pV, float x, float y, float z, float w) { pV->c[0] = x; pV->c[1] = y; pV->c[2] = z; pV->c[3] = w; return pV; } static inline vec4F* vec4F_saturate_in_place(vec4F* pV) { pV->c[0] = saturate(pV->c[0]); pV->c[1] = saturate(pV->c[1]); pV->c[2] = saturate(pV->c[2]); pV->c[3] = saturate(pV->c[3]); return pV; } @@ -16311,9 +16311,9 @@ namespace basist } static inline int sq(int x) { return x * x; } - - // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0. - // This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha! + + // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0. + // This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha! // And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it. static void convert_etc1s_to_pvrtc2_rgba(void* pDst, const endpoint* pEndpoints, const selector* pSelector, const endpoint* pEndpoint_codebook, const selector* pSelector_codebook) { @@ -16368,13 +16368,13 @@ namespace basist const uint32_t high_selector = pSelector->m_hi_selector; const int num_unique_color_selectors = pSelector->m_num_unique_selectors; - + // We need to reencode the block at the pixel level, unfortunately, from two ETC1S planes. // Do 4D incremental PCA, project all pixels to this hyperline, then quantize to packed endpoints and compute the modulation values. const int br = (base_color.r << 3) | (base_color.r >> 2); const int bg = (base_color.g << 3) | (base_color.g >> 2); const int bb = (base_color.b << 3) | (base_color.b >> 2); - + color32 block_cols[4]; for (uint32_t i = 0; i < 4; i++) { @@ -16403,14 +16403,14 @@ namespace basist decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b); // Mod 0 - uint32_t lr0 = (r * 15 + 128) / 255, lg0 = (g * 15 + 128) / 255, lb0 = (b * 7 + 128) / 255; + uint32_t lr0 = (r * 15 + 128) / 255, lg0 = (g * 15 + 128) / 255, lb0 = (b * 7 + 128) / 255; uint32_t la0 = g_pvrtc2_alpha_match33_0[constant_alpha_val].m_l; uint32_t cr0 = (lr0 << 1) | (lr0 >> 3); uint32_t cg0 = (lg0 << 1) | (lg0 >> 3); uint32_t cb0 = (lb0 << 2) | (lb0 >> 1); uint32_t ca0 = (la0 << 1); - + cr0 = (cr0 << 3) | (cr0 >> 2); cg0 = (cg0 << 3) | (cg0 >> 2); cb0 = (cb0 << 3) | (cb0 >> 2); @@ -16439,14 +16439,14 @@ namespace basist uint32_t cg3 = (lg3 << 1) | (lg3 >> 3); uint32_t cb3 = (lb3 << 1) | (lb3 >> 3); uint32_t ca3 = (la3 << 1) | 1; - + cr3 = (cr3 << 3) | (cr3 >> 2); cg3 = (cg3 << 3) | (cg3 >> 2); cb3 = (cb3 << 3) | (cb3 >> 2); ca3 = (ca3 << 4) | ca3; uint32_t err3 = sq(cr3 - r) + sq(cg3 - g) + sq(cb3 - b) + sq(ca3 - constant_alpha_val) * 2; - + // Mod 1 uint32_t lr1 = g_pvrtc2_trans_match44[r].m_l, lg1 = g_pvrtc2_trans_match44[g].m_l, lb1 = g_pvrtc2_trans_match34[b].m_l; uint32_t hr1 = g_pvrtc2_trans_match44[r].m_h, hg1 = g_pvrtc2_trans_match44[g].m_h, hb1 = g_pvrtc2_trans_match34[b].m_h; @@ -16521,7 +16521,7 @@ namespace basist // It's a solid color block. uint32_t low_a = block_cols[alpha_selectors.m_lo_selector].a; uint32_t high_a = block_cols[alpha_selectors.m_hi_selector].a; - + const float S = 1.0f / 255.0f; vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, low_a * S); vec4F_set(&maxColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, high_a * S); @@ -16533,7 +16533,7 @@ namespace basist vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, constant_alpha_val * S); vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, constant_alpha_val * S); } - // See if any of the block colors got clamped - if so the principle axis got distorted (it's no longer just the ETC1S luma axis). + // See if any of the block colors got clamped - if so the principle axis got distorted (it's no longer just the ETC1S luma axis). // To keep quality up we need to use full 4D PCA in this case. else if ((block_cols[low_selector].c[0] == 0) || (block_cols[high_selector].c[0] == 255) || (block_cols[low_selector].c[1] == 0) || (block_cols[high_selector].c[1] == 255) || @@ -16584,7 +16584,7 @@ namespace basist } vec4F_normalize_in_place(&axis); - + if (vec4F_dot(&axis, &axis) < .5f) vec4F_set_scalar(&axis, .5f); @@ -16684,10 +16684,10 @@ namespace basist // 4433 4443 color32 trialMinColor, trialMaxColor; - + trialMinColor.set_clamped((int)(minColor.c[0] * 15.0f + .5f), (int)(minColor.c[1] * 15.0f + .5f), (int)(minColor.c[2] * 7.0f + .5f), (int)(minColor.c[3] * 7.0f + .5f)); trialMaxColor.set_clamped((int)(maxColor.c[0] * 15.0f + .5f), (int)(maxColor.c[1] * 15.0f + .5f), (int)(maxColor.c[2] * 15.0f + .5f), (int)(maxColor.c[3] * 7.0f + .5f)); - + pBlock->set_trans_low_color(trialMinColor.r, trialMinColor.g, trialMinColor.b, trialMinColor.a); pBlock->set_trans_high_color(trialMaxColor.r, trialMaxColor.g, trialMaxColor.b, trialMaxColor.a); @@ -16760,7 +16760,7 @@ namespace basist } } } - + static void transcoder_init_pvrtc2() { for (uint32_t v = 0; v < 256; v++) @@ -16866,7 +16866,7 @@ namespace basist g_pvrtc2_trans_match34[v].m_l = (uint8_t)best_l; g_pvrtc2_trans_match34[v].m_h = (uint8_t)best_h; } - + for (uint32_t v = 0; v < 256; v++) { int best_l = 0, best_h = 0, lowest_err = INT_MAX; @@ -16994,7 +16994,7 @@ namespace basist sym_codec.stop(); m_local_selectors.resize(num_selectors); - + if (!sym_codec.init(pSelectors_data, selectors_data_size)) { BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 5\n"); @@ -17019,7 +17019,7 @@ namespace basist BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: hybrid global selector codebooks are unsupported\n"); return false; } - + const bool used_raw_encoding = (sym_codec.get_bits(1) == 1); if (used_raw_encoding) @@ -17200,7 +17200,7 @@ namespace basist if (!output_rows_in_pixels) output_rows_in_pixels = orig_height; } - + basisu::vector* pPrev_frame_indices = nullptr; if (is_video) { @@ -17228,12 +17228,12 @@ namespace basist } approx_move_to_front selector_history_buf(m_selector_history_buf_size); - + uint32_t cur_selector_rle_count = 0; decoder_etc_block block; memset(&block, 0, sizeof(block)); - + //block.set_flip_bit(true); // Setting the flip bit to false to be compatible with the Khronos KDFS. block.set_flip_bit(false); @@ -17481,7 +17481,7 @@ namespace basist case block_format::cETC1: { decoder_etc_block* pDst_block = reinterpret_cast(static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes); - + block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false)); block.set_inten_table(0, pEndpoints->m_inten5); block.set_inten_table(1, pEndpoints->m_inten5); @@ -17532,7 +17532,7 @@ namespace basist const uint32_t low_selector = pSelector->m_lo_selector; const uint32_t high_selector = pSelector->m_hi_selector; - // Get block's RGB bounding box + // Get block's RGB bounding box color32 block_colors[2]; decoder_etc_block::get_block_colors5_bounds(block_colors, base_color, inten_table, low_selector, high_selector); @@ -17548,7 +17548,7 @@ namespace basist pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints; #else assert(0); -#endif +#endif break; } @@ -17556,7 +17556,7 @@ namespace basist { #if BASISD_SUPPORT_PVRTC1 assert(pAlpha_blocks); - + block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false)); block.set_inten_table(0, pEndpoints->m_inten5); block.set_inten_table(1, pEndpoints->m_inten5); @@ -17564,7 +17564,7 @@ namespace basist ((decoder_etc_block*)pPVRTC_work_mem)[block_x + block_y * num_blocks_x] = block; - // Get block's RGBA bounding box + // Get block's RGBA bounding box const color32& base_color = pEndpoints->m_color5; const uint32_t inten_table = pEndpoints->m_inten5; const uint32_t low_selector = pSelector->m_lo_selector; @@ -17599,7 +17599,7 @@ namespace basist pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints; #else assert(0); -#endif +#endif break; } @@ -17683,7 +17683,7 @@ namespace basist assert(transcode_alpha); void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; - + convert_etc1s_to_pvrtc2_rgba(pDst_block, pEndpoints, pSelector, &endpoints[0], &selectors[0]); #endif break; @@ -17699,10 +17699,10 @@ namespace basist { assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t); - + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); - + int colors[4]; decoder_etc_block::get_block_colors5_g(colors, pEndpoints->m_color5, pEndpoints->m_inten5); @@ -17716,7 +17716,7 @@ namespace basist pDst_pixels[3+4] = static_cast(colors[(s >> 2) & 3]); pDst_pixels[3+8] = static_cast(colors[(s >> 4) & 3]); pDst_pixels[3+12] = static_cast(colors[(s >> 6) & 3]); - + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t); } } @@ -17745,7 +17745,7 @@ namespace basist color32 colors[4]; decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); - + for (uint32_t y = 0; y < max_y; y++) { const uint32_t s = pSelector->m_selectors[y]; @@ -17866,7 +17866,7 @@ namespace basist cur = byteswap_uint16(cur); cur = (cur & 0xF) | packed_colors[(s >> (x * 2)) & 3]; - + if (BASISD_IS_BIG_ENDIAN) cur = byteswap_uint16(cur); @@ -17998,7 +17998,7 @@ namespace basist if (!output_row_pitch_in_blocks_or_pixels) output_row_pitch_in_blocks_or_pixels = orig_width; - if (!output_rows_in_pixels) + if (!output_rows_in_pixels) output_rows_in_pixels = orig_height; // Now make sure the output buffer is large enough, or we'll overwrite memory. @@ -18078,7 +18078,7 @@ namespace basist // Switch to PVRTC1 RGB if the input doesn't have alpha. target_format = transcoder_texture_format::cTFPVRTC1_4_RGB; } - + const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0; const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y; @@ -18109,7 +18109,7 @@ namespace basist { //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); - + if (!status) { BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC1 failed\n"); @@ -18234,7 +18234,7 @@ namespace basist if (basis_file_has_alpha_slices) { - // First decode the alpha data + // First decode the alpha data //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_A8, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); } @@ -18272,8 +18272,8 @@ namespace basist return false; #else assert(bytes_per_block_or_pixel == 16); - - // First decode the alpha data + + // First decode the alpha data if (basis_file_has_alpha_slices) { //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); @@ -18402,7 +18402,7 @@ namespace basist #else assert(bytes_per_block_or_pixel == 16); - // First decode the alpha data + // First decode the alpha data if (basis_file_has_alpha_slices) { //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); @@ -18462,7 +18462,7 @@ namespace basist } else { - // Now decode the color data and transcode to PVRTC2 RGBA. + // Now decode the color data and transcode to PVRTC2 RGBA. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels); } @@ -18483,7 +18483,7 @@ namespace basist { // Raw 32bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. - // First decode the alpha data + // First decode the alpha data if (basis_file_has_alpha_slices) //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cA32, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); @@ -18524,7 +18524,7 @@ namespace basist { // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. - // First decode the alpha data + // First decode the alpha data if (basis_file_has_alpha_slices) //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); @@ -18626,7 +18626,7 @@ namespace basist return status; } - + basisu_lowlevel_uastc_transcoder::basisu_lowlevel_uastc_transcoder() { } @@ -18692,7 +18692,7 @@ namespace basist for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y) { void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes; - + for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t *)pDst_block + output_block_or_pixel_stride_in_bytes) { switch (fmt) @@ -18722,7 +18722,7 @@ namespace basist } case block_format::cBC4: { - if (channel0 < 0) + if (channel0 < 0) channel0 = 0; status = transcode_uastc_to_bc4(*pSource_block, pDst_block, high_quality, channel0); break; @@ -18885,7 +18885,7 @@ namespace basist return false; #endif } - + bool basisu_lowlevel_uastc_transcoder::transcode_image( transcoder_texture_format target_format, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, @@ -18907,7 +18907,7 @@ namespace basist { BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: source data buffer too small\n"); return false; - } + } if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA)) { @@ -18934,7 +18934,7 @@ namespace basist BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: output buffer size too small\n"); return false; } - + bool status = false; // UASTC4x4 @@ -18945,7 +18945,7 @@ namespace basist //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC1, bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1); - + if (!status) { BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to ETC1 failed\n"); @@ -19162,7 +19162,7 @@ namespace basist return status; } - + basisu_transcoder::basisu_transcoder() : m_ready_to_transcode(false) { @@ -19190,7 +19190,7 @@ namespace basist return false; } } -#endif +#endif return true; } @@ -19277,7 +19277,7 @@ namespace basist return false; } } - + // This flag dates back to pre-Basis Universal, when .basis supported full ETC1 too. if ((pHeader->m_flags & cBASISHeaderFlagETC1S) == 0) { @@ -19293,7 +19293,7 @@ namespace basist return false; } } - + if ((pHeader->m_slice_desc_file_ofs >= data_size) || ((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices)) ) @@ -19409,12 +19409,12 @@ namespace basist image_info.m_image_index = image_index; image_info.m_total_levels = total_levels; - + image_info.m_alpha_flag = false; // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha. if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) - image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; + image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; else image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; @@ -19537,13 +19537,13 @@ namespace basist image_info.m_image_index = image_index; image_info.m_level_index = level_index; - + // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha. if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; else image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; - + image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0; image_info.m_width = slice_desc.m_num_blocks_x * 4; image_info.m_height = slice_desc.m_num_blocks_y * 4; @@ -19601,7 +19601,7 @@ namespace basist file_info.m_tex_format = static_cast(static_cast(pHeader->m_tex_format)); file_info.m_etc1s = (pHeader->m_tex_format == (int)basis_tex_format::cETC1S); - + file_info.m_y_flipped = (pHeader->m_flags & cBASISHeaderFlagYFlipped) != 0; file_info.m_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; @@ -19666,7 +19666,7 @@ namespace basist return true; } - + bool basisu_transcoder::start_transcoding(const void* pData, uint32_t data_size) { if (!validate_header_quick(pData, data_size)) @@ -19774,7 +19774,7 @@ namespace basist m_lowlevel_etc1s_decoder.clear(); } } - + m_ready_to_transcode = true; return true; @@ -19785,7 +19785,7 @@ namespace basist m_lowlevel_etc1s_decoder.clear(); m_ready_to_transcode = false; - + return true; } @@ -19824,7 +19824,7 @@ namespace basist const basis_slice_desc& slice_desc = reinterpret_cast(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_index]; uint32_t total_4x4_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; - + if (basis_block_format_is_uncompressed(fmt)) { // Assume the output buffer is orig_width by orig_height @@ -19887,7 +19887,7 @@ namespace basist BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_size, or passed in buffer too small\n"); return false; } - + if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) { return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, @@ -19975,7 +19975,7 @@ namespace basist if (!output_row_pitch_in_blocks_or_pixels) output_row_pitch_in_blocks_or_pixels = num_blocks_x; - + if ((fmt == block_format::cETC2_EAC_A8) || (fmt == block_format::cETC2_EAC_R11)) { #if BASISD_SUPPORT_ETC2_EAC_A8 @@ -20061,7 +20061,7 @@ namespace basist if (slice_index < 0) { BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: failed finding slice index\n"); - // Unable to find the requested image/level + // Unable to find the requested image/level return false; } @@ -20070,7 +20070,7 @@ namespace basist // Switch to PVRTC1 RGB if the input doesn't have alpha. fmt = transcoder_texture_format::cTFPVRTC1_4_RGB; } - + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) { if (pSlice_descs[slice_index].m_flags & cSliceDescFlagsHasAlpha) @@ -20107,7 +20107,7 @@ namespace basist } } } - + bool status = false; const uint32_t total_slice_blocks = pSlice_descs[slice_index].m_num_blocks_x * pSlice_descs[slice_index].m_num_blocks_y; @@ -20115,11 +20115,11 @@ namespace basist if (((fmt == transcoder_texture_format::cTFPVRTC1_4_RGB) || (fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA)) && (output_blocks_buf_size_in_blocks_or_pixels > total_slice_blocks)) { // The transcoder doesn't write beyond total_slice_blocks, so we need to clear the rest ourselves. - // For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8. + // For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8. // However, for KTX and internally in Basis this formula isn't used, it's just ((width+3)/4) * ((height+3)/4) * bytes_per_block_or_pixel. This is all the transcoder actually writes to memory. memset(static_cast(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel); } - + if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) { const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; @@ -20131,7 +20131,7 @@ namespace basist pSlice_desc->m_file_ofs, pSlice_desc->m_file_size, decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); } - else + else { // ETC1S const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; @@ -20157,14 +20157,14 @@ namespace basist decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); } // if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) - + if (!status) { BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning false\n"); } else { - //BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning true\n"); + //BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning true\n"); } return status; @@ -20378,13 +20378,13 @@ namespace basist } return false; } - + uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt) { switch (fmt) { case transcoder_texture_format::cTFRGBA32: - return sizeof(uint32_t); + return sizeof(uint32_t); case transcoder_texture_format::cTFRGB565: case transcoder_texture_format::cTFBGR565: case transcoder_texture_format::cTFRGBA4444: @@ -20394,7 +20394,7 @@ namespace basist } return 0; } - + uint32_t basis_get_block_width(transcoder_texture_format tex_type) { switch (tex_type) @@ -20412,7 +20412,7 @@ namespace basist BASISU_NOTE_UNUSED(tex_type); return 4; } - + bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt) { if (fmt == basis_tex_format::cUASTC4x4) @@ -20470,7 +20470,7 @@ namespace basist case transcoder_texture_format::cTFETC2_RGBA: return true; #endif -#if BASISD_SUPPORT_ASTC +#if BASISD_SUPPORT_ASTC case transcoder_texture_format::cTFASTC_4x4_RGBA: return true; #endif @@ -20501,9 +20501,9 @@ namespace basist return false; } - // ------------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------------ // UASTC - // ------------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------------ #if BASISD_SUPPORT_UASTC const astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2] = @@ -21228,7 +21228,7 @@ namespace basist if (group_size) { - // Range has trits or quints - pack each group of 5 or 3 values + // Range has trits or quints - pack each group of 5 or 3 values const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3); for (int group_index = 0; group_index < total_groups; group_index++) @@ -21518,7 +21518,7 @@ namespace basist bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints) { //memset(&unpacked, 0, sizeof(unpacked)); - + #if 0 uint8_t table[128]; memset(table, 0xFF, sizeof(table)); @@ -21573,7 +21573,7 @@ namespace basist return true; } - + if (read_hints) { if (g_uastc_mode_has_bc1_hint0[mode]) @@ -21606,7 +21606,7 @@ namespace basist } else bit_ofs += g_uastc_mode_total_hint_bits[mode]; - + uint32_t subsets = 1; switch (mode) { @@ -21819,7 +21819,7 @@ namespace basist { // All other modes have <= 64 weight bits. uint64_t bits; - + // Read the weight bits if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS)) bits = read_bits64(blk.m_bytes, bit_ofs, basisu::minimum(64, 128 - (int)bit_ofs)); @@ -21831,31 +21831,31 @@ namespace basist #else bits = blk.m_qwords[1]; #endif - + if (bit_ofs >= 64U) bits >>= (bit_ofs - 64U); else { assert(bit_ofs >= 56U); - + uint32_t bits_needed = 64U - bit_ofs; bits <<= bits_needed; bits |= (blk.m_bytes[7] >> (8U - bits_needed)); } } - + bit_ofs = 0; const uint32_t mask = (1U << weight_bits) - 1U; const uint32_t anchor_mask = (1U << (weight_bits - 1U)) - 1U; - + if (total_planes == 2) { // Dual plane modes always have a single subset, and the first 2 weights are anchors. unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); bit_ofs += (weight_bits - 1); - + unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); bit_ofs += (weight_bits - 1); @@ -21873,7 +21873,7 @@ namespace basist if (weight_bits == 4) { assert(bit_ofs == 0); - + // Specialize the most common case: 4-bit weights. unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits) & 7); unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> 3) & 15); @@ -22419,7 +22419,7 @@ namespace basist } case 2: { - // 2. DualPlane: 0, WeightRange : 5 (8), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE1 + // 2. DualPlane: 0, WeightRange : 5 (8), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE1 dst_blk.m_mode = 1; dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7; @@ -23358,7 +23358,7 @@ namespace basist bool flip = pack_etc1_y_estimate_flipped(&block_y[0][0], upper_avg, lower_avg, left_avg, right_avg); // non-flipped: | | - // vs. + // vs. // flipped: -- // -- @@ -23969,7 +23969,7 @@ namespace basist static const uint8_t s_uastc2_to_bc1[4] = { 0, 2, 3, 1 }; static const uint8_t s_uastc1_to_bc1[2] = { 0, 1 }; const uint8_t* s_uastc_to_bc1_weights[6] = { nullptr, s_uastc1_to_bc1, s_uastc2_to_bc1, s_uastc3_to_bc1, s_uastc4_to_bc1, s_uastc5_to_bc1 }; - + void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride) { uint32_t min0_v, max0_v, min1_v, max1_v,min2_v, max2_v, min3_v, max3_v; @@ -24057,7 +24057,7 @@ namespace basist a2 |= (s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U); a3 |= (s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U); } - + { const int v0 = pPixels[8 * stride] * 14 + bias; const int v1 = pPixels[9 * stride] * 14 + bias; @@ -24081,7 +24081,7 @@ namespace basist } const uint64_t f = a0 | a1 | a2 | a3; - + pDst_bytes[2] = (uint8_t)f; pDst_bytes[3] = (uint8_t)(f >> 8U); pDst_bytes[4] = (uint8_t)(f >> 16U); @@ -24104,7 +24104,7 @@ namespace basist int dots[4]; for (uint32_t i = 0; i < 4; i++) dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; - + int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; ar *= 2; ag *= 2; ab *= 2; @@ -24113,7 +24113,7 @@ namespace basist { const int d = pSrc_pixels[i].r * ar + pSrc_pixels[i].g * ag + pSrc_pixels[i].b * ab; static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; - + // Rounding matters here! // d <= t0: <=, not <, to the later LS step "sees" a wider range of selectors. It matters for quality. sels[i] = s_sels[(d <= t0) + (d < t1) + (d < t2)]; @@ -24156,11 +24156,11 @@ namespace basist } struct vec3F { float c[3]; }; - + static bool compute_least_squares_endpoints_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh) { // Derived from bc7enc16's LS function. - // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // I did this in matrix form first, expanded out all the ops, then optimized it a bit. uint32_t uq00_r = 0, uq10_r = 0, ut_r = 0, uq00_g = 0, uq10_g = 0, ut_g = 0, uq00_b = 0, uq10_b = 0, ut_b = 0; @@ -24234,7 +24234,7 @@ namespace basist return true; } - void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb) + void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb) { dxt1_block* pDst_block = static_cast(pDst); @@ -24286,19 +24286,19 @@ namespace basist { const color32* pSrc_pixels = (const color32*)pPixels; dxt1_block* pDst_block = static_cast(pDst); - + int avg_r = -1, avg_g = 0, avg_b = 0; int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0; uint8_t sels[16]; - + const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0; if (use_sels) { // Caller is jamming in their own selectors for us to try. const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24); - + static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 }; - + for (uint32_t i = 0; i < 16; i++) sels[i] = s_sel_tran[(s >> (i * 2)) & 3]; } @@ -24310,13 +24310,13 @@ namespace basist for (j = 1; j < 16; j++) if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb)) break; - + if (j == 16) { encode_bc1_solid_block(pDst, fr, fg, fb); return; } - + // Select 2 colors along the principle axis. (There must be a faster/simpler way.) int total_r = fr, total_g = fg, total_b = fb; int max_r = fr, max_g = fg, max_b = fb; @@ -24350,7 +24350,7 @@ namespace basist float cov[6]; for (uint32_t i = 0; i < 6; i++) cov[i] = static_cast(icov[i])* (1.0f / 255.0f); - + #if 0 // Seems silly to use full PCA to choose 2 colors. The diff in avg. PSNR between using PCA vs. not is small (~.025 difference). // TODO: Try 2 or 3 different normalized diagonal vectors, choose the one that results in the largest dot delta @@ -24382,7 +24382,7 @@ namespace basist saxis_b = (int)(xb * m); } #endif - + int low_dot = INT_MAX, high_dot = INT_MIN, low_c = 0, high_c = 0; for (uint32_t i = 0; i < 16; i++) { @@ -24406,7 +24406,7 @@ namespace basist hr = to_5(pSrc_pixels[high_c].r); hg = to_6(pSrc_pixels[high_c].g); hb = to_5(pSrc_pixels[high_c].b); - + bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); } // if (use_sels) @@ -24453,13 +24453,13 @@ namespace basist hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63); hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31); } - + bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); } uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb); uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb); - + // Always forbid 3 color blocks if (lc16 == hc16) { @@ -24511,7 +24511,7 @@ namespace basist pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask; } } - + void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags) { const color32* pSrc_pixels = (const color32*)pPixels; @@ -24560,8 +24560,8 @@ namespace basist min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b); total_r += r; total_g += g; total_b += b; } - - if (grayscale_flag) + + if (grayscale_flag) { // Grayscale blocks are a common enough case to specialize. if ((max_r - min_r) < 2) @@ -24878,7 +24878,7 @@ namespace basist // Always forbid 3 color blocks uint16_t lc16 = (uint16_t)b.get_low_color(); uint16_t hc16 = (uint16_t)b.get_high_color(); - + uint8_t mask = 0; // Make l > h @@ -25108,7 +25108,7 @@ namespace basist blk.m_base = static_cast(a); blk.m_table = 13; blk.m_multiplier = 0; - + memcpy(blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); return; @@ -25798,7 +25798,7 @@ namespace basist if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false)) return false; - // Get block's RGB bounding box + // Get block's RGB bounding box color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0); if (from_alpha) @@ -25857,7 +25857,7 @@ namespace basist if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false)) return false; - // Get block's RGBA bounding box + // Get block's RGBA bounding box color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0); for (uint32_t i = 0; i < 16; i++) @@ -25973,9 +25973,9 @@ namespace basist #endif // #if BASISD_SUPPORT_UASTC -// ------------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------------ // KTX2 -// ------------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------------ #if BASISD_SUPPORT_KTX2 const uint8_t g_ktx2_file_identifier[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }; @@ -25997,7 +25997,7 @@ namespace basist m_key_values.clear(); memset(&m_etc1s_header, 0, sizeof(m_etc1s_header)); m_etc1s_image_descs.clear(); - + m_format = basist::basis_tex_format::cETC1S; m_dfd_color_model = 0; @@ -26009,9 +26009,9 @@ namespace basist m_dfd_chan1 = KTX2_DF_CHANNEL_UASTC_RGB; m_etc1s_transcoder.clear(); - + m_def_transcoder_state.clear(); - + m_has_alpha = false; m_is_video = false; } @@ -26082,7 +26082,7 @@ namespace basist return false; } } - + // 3.7 levelCount: "levelCount=0 is allowed, except for block-compressed formats" if (m_header.m_level_count < 1) { @@ -26139,7 +26139,7 @@ namespace basist } memcpy(&m_levels[0], m_pData + sizeof(ktx2_header), level_index_size_in_bytes); - + // Sanity check the level offsets and byte sizes for (uint32_t i = 0; i < m_levels.size(); i++) { @@ -26159,9 +26159,9 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset and/or length\n"); return false; } - + const uint64_t MAX_SANE_LEVEL_UNCOMP_SIZE = 2048ULL * 1024ULL * 1024ULL; - + if (m_levels[i].m_uncompressed_byte_length >= MAX_SANE_LEVEL_UNCOMP_SIZE) { BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too large)\n"); @@ -26198,7 +26198,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD offset and/or length\n"); return false; } - + const uint8_t* pDFD = m_pData + m_header.m_dfd_byte_offset; if (!m_dfd.try_resize(m_header.m_dfd_byte_length)) @@ -26208,17 +26208,17 @@ namespace basist } memcpy(m_dfd.data(), pDFD, m_header.m_dfd_byte_length); - + // This is all hard coded for only ETC1S and UASTC. uint32_t dfd_total_size = basisu::read_le_dword(pDFD); - + // 3.10.3: Sanity check if (dfd_total_size != m_header.m_dfd_byte_length) { BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (1)\n"); return false; } - + // 3.10.3: More sanity checking if (m_header.m_kvd_byte_length) { @@ -26231,7 +26231,7 @@ namespace basist const uint32_t dfd_bits = basisu::read_le_dword(pDFD + 3 * sizeof(uint32_t)); const uint32_t sample_channel0 = basisu::read_le_dword(pDFD + 7 * sizeof(uint32_t)); - + m_dfd_color_model = dfd_bits & 255; m_dfd_color_prims = (ktx2_df_color_primaries)((dfd_bits >> 8) & 255); m_dfd_transfer_func = (dfd_bits >> 16) & 255; @@ -26247,11 +26247,11 @@ namespace basist if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ETC1S) { m_format = basist::basis_tex_format::cETC1S; - + // 3.10.2: "Whether the image has 1 or 2 slices can be determined from the DFD’s sample count." // If m_has_alpha is true it may be 2-channel RRRG or 4-channel RGBA, but we let the caller deal with that. m_has_alpha = (m_header.m_dfd_byte_length == 60); - + m_dfd_samples = m_has_alpha ? 2 : 1; m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); @@ -26267,7 +26267,7 @@ namespace basist m_dfd_samples = 1; m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); - + // We're assuming "DATA" means RGBA so it has alpha. m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG); } @@ -26277,7 +26277,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD color model\n"); return false; } - + if (!read_key_values()) { BASISU_DEVEL_ERROR("ktx2_transcoder::init: read_key_values() failed\n"); @@ -26321,7 +26321,7 @@ namespace basist return nullptr; } - + bool ktx2_transcoder::start_transcoding() { if (!m_pData) @@ -26330,7 +26330,7 @@ namespace basist return false; } - if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) + if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) { // Check if we've already decompressed the ETC1S global data. If so don't unpack it again. if (!m_etc1s_transcoder.get_endpoints().empty()) @@ -26341,7 +26341,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: decompress_etc1s_global_data() failed\n"); return false; } - + if (!m_is_video) { // See if there are any P-frames. If so it must be a video, even if there wasn't a KTXanimData key. @@ -26397,7 +26397,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: layer_index >= maximum(m_header.m_layer_count, 1)\n"); return false; } - + const uint32_t level_width = basisu::maximum(m_header.m_pixel_width >> level_index, 1); const uint32_t level_height = basisu::maximum(m_header.m_pixel_height >> level_index, 1); const uint32_t num_blocks_x = (level_width + 3) >> 2; @@ -26427,9 +26427,9 @@ namespace basist return true; } - + bool ktx2_transcoder::transcode_image_level( - uint32_t level_index, uint32_t layer_index, uint32_t face_index, + uint32_t level_index, uint32_t layer_index, uint32_t face_index, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, basist::transcoder_texture_format fmt, uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, uint32_t output_rows_in_pixels, int channel0, int channel1, @@ -26443,7 +26443,7 @@ namespace basist if (!pState) pState = &m_def_transcoder_state; - + if (level_index >= m_levels.size()) { BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: level_index >= m_levels.size()\n"); @@ -26472,7 +26472,7 @@ namespace basist const uint8_t* pComp_level_data = m_pData + m_levels[level_index].m_byte_offset; uint64_t comp_level_data_size = m_levels[level_index].m_byte_length; - + const uint8_t* pUncomp_level_data = pComp_level_data; uint64_t uncomp_level_data_size = comp_level_data_size; @@ -26481,7 +26481,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_level_data_size > UINT32_MAX\n"); return false; } - + if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) { // Check if we've already decompressed this level's supercompressed data. @@ -26499,12 +26499,12 @@ namespace basist pUncomp_level_data = pState->m_level_uncomp_data.data(); uncomp_level_data_size = pState->m_level_uncomp_data.size(); } - + const uint32_t level_width = basisu::maximum(m_header.m_pixel_width >> level_index, 1); const uint32_t level_height = basisu::maximum(m_header.m_pixel_height >> level_index, 1); const uint32_t num_blocks_x = (level_width + 3) >> 2; const uint32_t num_blocks_y = (level_height + 3) >> 2; - + if (m_format == basist::basis_tex_format::cETC1S) { // Ensure start_transcoding() was called. @@ -26518,7 +26518,7 @@ namespace basist (level_index * basisu::maximum(m_header.m_layer_count, 1) * m_header.m_face_count) + layer_index * m_header.m_face_count + face_index; - + // Sanity check if (etc1s_image_index >= m_etc1s_image_descs.size()) { @@ -26553,7 +26553,7 @@ namespace basist // Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices. assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length); const uint32_t total_2D_image_size = num_blocks_x * num_blocks_y * KTX2_UASTC_BLOCK_SIZE; - + const uint32_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size; // Sanity checks @@ -26589,12 +26589,12 @@ namespace basist return true; } - + bool ktx2_transcoder::decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data) { const uint8_t* pComp_data = m_levels[level_index].m_byte_offset + m_pData; const uint64_t comp_size = m_levels[level_index].m_byte_length; - + const uint64_t uncomp_size = m_levels[level_index].m_uncompressed_byte_length; if (((size_t)comp_size) != comp_size) @@ -26613,7 +26613,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Out of memory\n"); return false; } - + if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) { #if BASISD_SUPPORT_KTX2_ZSTD @@ -26636,7 +26636,7 @@ namespace basist return true; } - + bool ktx2_transcoder::decompress_etc1s_global_data() { // Note: we don't actually support 3D textures in here yet @@ -26675,13 +26675,13 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: SGD byte length is too small, file is invalid or corrupted\n"); return false; } - + if (!m_etc1s_image_descs.try_resize(image_count)) { BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Out of memory\n"); return false; } - + memcpy(m_etc1s_image_descs.data(), pSrc, sizeof(ktx2_etc1s_image_desc) * image_count); pSrc += sizeof(ktx2_etc1s_image_desc) * image_count; @@ -26715,7 +26715,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_tables() failed, file is invalid or corrupted\n"); return false; } - + if (!m_etc1s_transcoder.decode_palettes( m_etc1s_header.m_endpoint_count, pEndpoint_data, m_etc1s_header.m_endpoints_byte_length, m_etc1s_header.m_selector_count, pSelector_data, m_etc1s_header.m_selectors_byte_length)) @@ -26723,7 +26723,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_palettes() failed, file is likely corrupted\n"); return false; } - + return true; } @@ -26764,7 +26764,7 @@ namespace basist while (src_left > sizeof(uint32_t)) { uint32_t l = basisu::read_le_dword(pSrc); - + pSrc += sizeof(uint32_t); src_left -= sizeof(uint32_t); @@ -26785,7 +26785,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); return false; } - + basisu::uint8_vec& key_data = m_key_values.back().m_key; basisu::uint8_vec& value_data = m_key_values.back().m_value; @@ -26807,7 +26807,7 @@ namespace basist l--; } while (key_data.back()); - + if (!value_data.try_resize(l)) { BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); @@ -26836,7 +26836,7 @@ namespace basist return true; } - + #endif // BASISD_SUPPORT_KTX2 bool basisu_transcoder_supports_ktx2() @@ -26863,7 +26863,7 @@ namespace basist /** * Collection of unused functions and const variables to work around \c * -Wunused-function and \c -Wunused-const-variable warnings. - * + * * \todo LTO does its thing so any unused are removed but is there a better way? */ void _basisu_translib_dummy() { diff --git a/contrib/previewers/win/basisthumbprovider.h b/contrib/previewers/win/basisthumbprovider.h index e73747cb..cca93a8f 100644 --- a/contrib/previewers/win/basisthumbprovider.h +++ b/contrib/previewers/win/basisthumbprovider.h @@ -4,7 +4,7 @@ #include /** - * + * */ class BasisThumbProvider : public IInitializeWithStream, public IThumbnailProvider { @@ -16,23 +16,23 @@ class BasisThumbProvider : public IInitializeWithStream, public IThumbnailProvid IFACEMETHODIMP_(ULONG) AddRef() override; // IUnknown::Release() IFACEMETHODIMP_(ULONG) Release() override; - + // IInitializeWithStream::Initialize() IFACEMETHODIMP Initialize(IStream *pStream, DWORD grfMode) override; - + // IThumbnailProvider::GetThumbnail() IFACEMETHODIMP GetThumbnail(UINT cx, HBITMAP *phbmp, WTS_ALPHATYPE *pdwAlpha) override; protected: virtual ~BasisThumbProvider(); - + private: LONG count; IStream* stream; }; /** - * + * */ class BasisThumbProviderFactory : public IClassFactory { @@ -44,15 +44,15 @@ class BasisThumbProviderFactory : public IClassFactory IFACEMETHODIMP_(ULONG) AddRef() override; // IUnknown::Release() IFACEMETHODIMP_(ULONG) Release() override; - + // IClassFactory::CreateInstance() IFACEMETHODIMP CreateInstance(IUnknown *pUnkOuter, REFIID riid, void **ppv) override; // IClassFactory::LockServer() IFACEMETHODIMP LockServer(BOOL fLock) override; - + protected: virtual ~BasisThumbProviderFactory(); - + private: LONG count; }; diff --git a/contrib/previewers/win/helpers.cpp b/contrib/previewers/win/helpers.cpp index d4fab038..82100940 100644 --- a/contrib/previewers/win/helpers.cpp +++ b/contrib/previewers/win/helpers.cpp @@ -22,7 +22,7 @@ HBITMAP rgbToBitmap(const uint32_t* src, uint32_t const imgW, uint32_t const img * Creates a bitmap (a DIB) for the passed-in pixel size. Note that * negation of the height means top-down, origin upper-left, which is the * regular case. - * + * * TODO: 16-bit variant instead? */ assert(src && imgW && imgH); @@ -38,7 +38,7 @@ HBITMAP rgbToBitmap(const uint32_t* src, uint32_t const imgW, uint32_t const img HBITMAP hbmp = CreateDIBSection(NULL, &bmi, DIB_RGB_COLORS, &pixels, NULL, 0); /* * RGBA to BGRA conversion. - * + * * Note: we keep the alpha. */ if (hbmp && pixels) { diff --git a/contrib/previewers/win/helpers.h b/contrib/previewers/win/helpers.h index 691d5610..4fd23722 100644 --- a/contrib/previewers/win/helpers.h +++ b/contrib/previewers/win/helpers.h @@ -6,15 +6,15 @@ /** * Write a formatted string to the connected debugger (e.g. DebugView). - * + * * \param[in] fmt content to write in \c printf format (followed by optional arguments) */ void dprintf(char* const fmt, ...); /** * Converts raw RGBA data to a Windows BGRA bitmap. - * - * \param[in] src raw RGBA data + * + * \param[in] src raw RGBA data * \param[in] imgW width of the decoded image * \param[in] imgH height of the decoded image * \return handle to a bitmap (ownership passed to the caller) diff --git a/contrib/single_file_transcoder/README.md b/contrib/single_file_transcoder/README.md index 80c7fe17..73ae7b79 100644 --- a/contrib/single_file_transcoder/README.md +++ b/contrib/single_file_transcoder/README.md @@ -20,7 +20,7 @@ The combiner script can also generate separate amalgamated header and source fil ``` python3 combine.py -r ../../transcoder -o basisu_transcoder.h -p ../../transcoder/basisu_transcoder.h -python3 combine.py -r ../../transcoder -x basisu_transcoder_tables_bc7_m6.inc -k basisu_transcoder.h -o basisu_transcoder.cpp basisu_transcoder-in.cpp +python3 combine.py -r ../../transcoder -x basisu_transcoder_tables_bc7_m6.inc -k basisu_transcoder.h -o basisu_transcoder.cpp basisu_transcoder-in.cpp ``` @@ -29,6 +29,6 @@ Note: the amalgamation script was tested on Windows and Mac, requiring Python 3. Why? ---- -Because all it now takes to support Basis Universal is the addition of a single file, two if using the header, with no configuration or further build steps (the out-of-the-box defaults tailor the included formats for various platforms). +Because all it now takes to support Basis Universal is the addition of a single file, two if using the header, with no configuration or further build steps (the out-of-the-box defaults tailor the included formats for various platforms). The library is small, adding, for example, around 250kB to an Emscripten compiled WebAssembly project (with transcoding disabled for BC7 and ATC; disabling ASTC will remove a further 64kB, and `gzip` will approximately half the `wasm` file). diff --git a/contrib/single_file_transcoder/basisu_transcoder-in.cpp b/contrib/single_file_transcoder/basisu_transcoder-in.cpp index 27bc278b..b073a378 100644 --- a/contrib/single_file_transcoder/basisu_transcoder-in.cpp +++ b/contrib/single_file_transcoder/basisu_transcoder-in.cpp @@ -9,7 +9,7 @@ * Transcoder build options for known platforms (iOS has ETC, ASTC and PVRTC; * Emscripten adds DXT to iOS's options; Android adds PVRTC2 to Emscripten's * options; other platforms build all except FXT1). - * + * * See https://github.com/BinomialLLC/basis_universal#shrinking-the-transcoders-compiled-size */ #ifdef __APPLE__ @@ -38,7 +38,7 @@ /** * Collection of unused functions and const variables to work around \c * -Wunused-function and \c -Wunused-const-variable warnings. - * + * * \todo LTO does its thing so any unused are removed but is there a better way? */ void _basisu_translib_dummy() { diff --git a/contrib/single_file_transcoder/combine.py b/contrib/single_file_transcoder/combine.py index 3d1018d5..829d4331 100755 --- a/contrib/single_file_transcoder/combine.py +++ b/contrib/single_file_transcoder/combine.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # Tool to bundle multiple C/C++ source files, inlining any includes. -# +# # Note: there are two types of exclusion options: the '-x' flag, which besides # excluding a file also adds an #error directive in place of the #include, and # the '-k' flag, which keeps the #include and doesn't inline the file. The @@ -10,10 +10,10 @@ # occurrence adds the error, and '-k' for headers that we wish to manually # include, such as a project's public API, for which occurrences after the first # are removed. -# +# # Todo: the error handling could be better, which currently throws and halts # (which is functional just not very friendly). -# +# # Author: Carl Woffenden, Numfum GmbH (this script is released under a CC0 license/Public Domain) import argparse, re, sys @@ -42,29 +42,29 @@ found: Set[Path] = set() # Compiled regex Patern to handle the following type of file includes: -# +# # #include "file" # #include "file" # # include "file" # #include "file" # #include "file" // comment # #include "file" // comment with quote " -# +# # And all combinations of, as well as ignoring the following: -# +# # #include # //#include "file" # /*#include "file"*/ -# +# # We don't try to catch errors since the compiler will do this (and the code is # expected to be valid before processing) and we don't care what follows the # file (whether it's a valid comment or not, since anything after the quoted # string is ignored) -# +# include_regex: Pattern = re.compile(r'^\s*#\s*include\s*"(.+?)"') # Simple tests to prove include_regex's cases. -# +# def test_match_include() -> bool: if (include_regex.match('#include "file"') and include_regex.match(' #include "file"') and @@ -81,19 +81,19 @@ def test_match_include() -> bool: return False # Compiled regex Patern to handle "#pragma once" in various formats: -# +# # #pragma once # #pragma once # # pragma once # #pragma once # #pragma once // comment -# +# # Ignoring commented versions, same as include_regex. -# +# pragma_regex: Pattern = re.compile(r'^\s*#\s*pragma\s*once\s*') # Simple tests to prove pragma_regex's cases. -# +# def text_match_pragma() -> bool: if (pragma_regex.match('#pragma once') and pragma_regex.match(' #pragma once') and @@ -109,7 +109,7 @@ def text_match_pragma() -> bool: # Finds 'file'. First the list of 'root' paths are searched, followed by the # the currently processing file's 'parent' path, returning a valid Path in # canonical form. If no match is found None is returned. -# +# def resolve_include(file: str, parent: Optional[Path] = None) -> Optional[Path]: for root in roots: found = root.joinpath(file).resolve() @@ -127,7 +127,7 @@ def resolve_include(file: str, parent: Optional[Path] = None) -> Optional[Path]: # and each entry resolved to its canonical path (like any include entry, either # from the list of root paths or the owning file's 'parent', which in this case # is case is the input file). The results are stored in 'resolved'. -# +# def resolve_excluded_files(file_list: Optional[List[str]], resolved: Set[Path], parent: Optional[Path] = None) -> None: if (file_list): for filename in file_list: @@ -138,23 +138,23 @@ def resolve_excluded_files(file_list: Optional[List[str]], resolved: Set[Path], error_line(f'Warning: excluded file not found: {filename}') # Writes 'line' to the open 'destn' (or stdout). -# +# def write_line(line: str) -> None: print(line, file=destn) # Logs 'line' to stderr. This is also used for general notifications that we # don't want to go to stdout (so the source can be piped). -# +# def error_line(line: Any) -> None: print(line, file=sys.stderr) # Inline the contents of 'file' (with any of its includes also inlined, etc.). -# +# # Note: text encoding errors are ignored and replaced with ? when reading the # input files. This isn't ideal, but it's more than likely in the comments than # code and a) the text editor has probably also failed to read the same content, # and b) the compiler probably did too. -# +# def add_file(file: Path, file_name: str = None) -> None: if (file.is_file()): if (not file_name): diff --git a/contrib/single_file_transcoder/combine.sh b/contrib/single_file_transcoder/combine.sh index aedae922..2b9ab2c5 100755 --- a/contrib/single_file_transcoder/combine.sh +++ b/contrib/single_file_transcoder/combine.sh @@ -1,13 +1,13 @@ #!/bin/sh -e # Tool to bundle multiple C/C++ source files, inlining any includes. -# +# # Note: this POSIX-compliant script is many times slower than the original bash # implementation (due to the grep calls) but it runs and works everywhere. -# +# # TODO: ROOTS, FOUND, etc., as arrays (since they fail on paths with spaces) # TODO: revert to Bash-only regex (the grep ones being too slow) -# +# # Script released under a CC0 license. # Common file roots diff --git a/contrib/single_file_transcoder/examples/emscripten.cpp b/contrib/single_file_transcoder/examples/emscripten.cpp index e56ae6af..ebdf9975 100644 --- a/contrib/single_file_transcoder/examples/emscripten.cpp +++ b/contrib/single_file_transcoder/examples/emscripten.cpp @@ -131,7 +131,7 @@ static GLchar const fragShader2D[] = /** * Helper to compile a shader. - * + * * \param type shader type * \param text shader source * \return the shader ID (or zero if compilation failed) @@ -199,7 +199,7 @@ struct posTex2d { /* * Possibly missing GL enums. - * + * * Note: GL_COMPRESSED_RGB_ETC1_WEBGL is the same as GL_ETC1_RGB8_OES */ #ifndef GL_ETC1_RGB8_OES @@ -233,7 +233,7 @@ static etc1_global_selector_codebook* globalCodebook = NULL; /** * Returns a supported compressed texture format for a given context. - * + * * \param[in] ctx WebGL context * \param[in] alpha \c true if the texture has an alpha channel * \return corresponding Basis format @@ -243,7 +243,7 @@ static transcoder_texture_format supports(EMSCRIPTEN_WEBGL_CONTEXT_HANDLE const /* * Test for both prefixed and non-prefixed versions. This should grab iOS * and other ImgTec GPUs first as a preference. - * + * * TODO: do older iOS expose ASTC to the browser and does it transcode to RGBA? */ static bool const pvr = GL_HAS_EXT(ctx, "WEBKIT_WEBGL_compressed_texture_pvrtc") @@ -291,7 +291,7 @@ static transcoder_texture_format supports(EMSCRIPTEN_WEBGL_CONTEXT_HANDLE const #endif /* * Finally ETC1, falling back on RGBA. - * + * * TODO: we might just prefer to transcode to dithered 565 once available */ static bool const etc1 = GL_HAS_EXT(ctx, "WEBGL_compressed_texture_etc1"); @@ -306,10 +306,10 @@ static transcoder_texture_format supports(EMSCRIPTEN_WEBGL_CONTEXT_HANDLE const /** * Returns the equivalent GL type given a BasisU type. - * + * * \note This relies on \c #supports() returning the supported formats, and so * only converts to the GL equivalents (without further testing for support). - * + * * \param[in] type BasisU transcode target * \return equivalent GL type */ @@ -340,13 +340,13 @@ static GLenum toGlType(transcoder_texture_format const type) { /** * Uploads the texture. - * + * * \param[in] ctx ctx WebGL context * \param[in] name texture \e name * \param[in] data \c .basis file content * \param[in] size number of bytes in \a data * \return \c true if the texture was decoded and created - * + * * \todo reuse the decode buffer (the first mips level should be able to contain the rest) */ bool upload(EMSCRIPTEN_WEBGL_CONTEXT_HANDLE const ctx, GLuint const name, const uint8_t* const data, size_t const size) { @@ -477,7 +477,7 @@ static EM_BOOL initContext() { static void tick() { glClearColor(1.0f, 0.0f, 1.0f, 1.0f); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - + if (uRotId >= 0) { glUniform1f(uRotId, rotDeg); rotDeg += 0.1f; @@ -486,7 +486,7 @@ static void tick() { } glBindTexture(GL_TEXTURE_2D, txName[(lround(rotDeg / 45) & 1) != 0]); } - + glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0); glFlush(); } @@ -501,10 +501,10 @@ int main() { if ((progId = glCreateProgram())) { vertId = compileShader(GL_VERTEX_SHADER, vertShader2D); fragId = compileShader(GL_FRAGMENT_SHADER, fragShader2D); - + glBindAttribLocation(progId, GL_VERT_POSXY_ID, "aPos"); glBindAttribLocation(progId, GL_VERT_TXUV0_ID, "aUV0"); - + glAttachShader(progId, vertId); glAttachShader(progId, fragId); glLinkProgram (progId); @@ -514,7 +514,7 @@ int main() { if (uTx0Id >= 0) { glUniform1i(uTx0Id, 0); } - + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); glEnable(GL_BLEND); glDisable(GL_DITHER); @@ -522,7 +522,7 @@ int main() { glCullFace(GL_BACK); glEnable(GL_CULL_FACE); } - + GLuint vertsBuf = 0; GLuint indexBuf = 0; // Create the textured quad (vert positions then UVs) @@ -551,14 +551,14 @@ int main() { sizeof(index2d), index2d, GL_STATIC_DRAW); glEnableVertexAttribArray(GL_VERT_POSXY_ID); glEnableVertexAttribArray(GL_VERT_TXUV0_ID); - + glGenTextures(2, txName); if (upload(glCtx, txName[0], srcRgb, sizeof srcRgb) && upload(glCtx, txName[1], srcRgba, sizeof srcRgba)) { printf("Decoded!\n"); } - + emscripten_set_main_loop(tick, 0, EM_FALSE); emscripten_exit_with_live_runtime(); } else { diff --git a/contrib/single_file_transcoder/examples/simple.cpp b/contrib/single_file_transcoder/examples/simple.cpp index 64c8b091..7a802f0f 100644 --- a/contrib/single_file_transcoder/examples/simple.cpp +++ b/contrib/single_file_transcoder/examples/simple.cpp @@ -8,7 +8,7 @@ * \code * cc -std=c++11 -lstdc++ simple.cpp * \endcode - * + * * Example code released under a CC0 license. */ #include "../basisu_transcoder.cpp" @@ -37,7 +37,7 @@ static uint8_t const srcRgb[] = { */ int main() { basisu_transcoder_init(); - + basisu_transcoder transcoder; if (transcoder.validate_header(srcRgb, sizeof srcRgb)) { basisu_file_info fileInfo; diff --git a/encoder/basisu_backend.cpp b/encoder/basisu_backend.cpp index abb61750..d1477ccb 100644 --- a/encoder/basisu_backend.cpp +++ b/encoder/basisu_backend.cpp @@ -54,7 +54,7 @@ namespace basisu m_pFront_end = pFront_end; m_params = params; m_slices = slice_descs; - + debug_printf("basisu_backend::Init: Slices: %u, ETC1S: %u, EndpointRDOQualityThresh: %f, SelectorRDOQualityThresh: %f\n", m_slices.size(), params.m_etc1s, @@ -196,7 +196,7 @@ namespace basisu m_endpoint_remap_table_old_to_new = reorderer.get_remap_table(); } - // For endpoints, old_to_new[] may not be bijective! + // For endpoints, old_to_new[] may not be bijective! // Some "old" entries may be unused and don't get remapped into the "new" array. m_old_endpoint_was_used.clear(); @@ -220,13 +220,13 @@ namespace basisu } // slice_index debug_printf("basisu_backend::reoptimize_and_sort_endpoints_codebook: First old entry index: %u\n", first_old_entry_index); - + m_new_endpoint_was_used.clear(); m_new_endpoint_was_used.resize(r.get_total_endpoint_clusters()); m_endpoint_remap_table_new_to_old.clear(); m_endpoint_remap_table_new_to_old.resize(r.get_total_endpoint_clusters()); - + // Set unused entries in the new array to point to the first used entry in the old array. m_endpoint_remap_table_new_to_old.set_all(first_old_entry_index); @@ -235,7 +235,7 @@ namespace basisu if (m_old_endpoint_was_used[old_index]) { const uint32_t new_index = m_endpoint_remap_table_old_to_new[old_index]; - + m_new_endpoint_was_used[new_index] = true; m_endpoint_remap_table_new_to_old[new_index] = old_index; @@ -612,7 +612,7 @@ namespace basisu sort_selector_codebook(); check_for_valid_cr_blocks(); - + debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); } @@ -669,11 +669,11 @@ namespace basisu gi.unpack(gi_unpacked); char buf[256]; -#ifdef _WIN32 +#ifdef _WIN32 sprintf_s(buf, sizeof(buf), "basisu_backend_slice_%u.png", slice_index); #else snprintf(buf, sizeof(buf), "basisu_backend_slice_%u.png", slice_index); -#endif +#endif save_png(buf, gi_unpacked); } @@ -682,7 +682,7 @@ namespace basisu //uint32_t g_color_delta_hist[255 * 3 + 1]; //uint32_t g_color_delta_bad_hist[255 * 3 + 1]; - + // TODO: Split this into multiple methods. bool basisu_backend::encode_image() { @@ -718,7 +718,7 @@ namespace basisu const int COLOR_DELTA_THRESH = 8; const int SEL_DIFF_THRESHOLD = 11; - + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) { //const int prev_frame_slice_index = is_video ? find_video_frame(slice_index, -1) : -1; @@ -764,7 +764,7 @@ namespace basisu } // block_x } // block_y - + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) { for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) @@ -842,7 +842,7 @@ namespace basisu const uint32_t cur_inten5 = etc_blk.get_inten_table(0); const etc1_endpoint_palette_entry& cur_endpoints = m_endpoint_palette[m.m_endpoint_index]; - + if (cur_err) { const float endpoint_remap_thresh = maximum(1.0f, m_params.m_endpoint_rdo_quality_thresh); @@ -858,7 +858,7 @@ namespace basisu int best_trial_idx = 0; etc_block trial_etc_blk(etc_blk); - + const int search_dist = minimum(iabs(endpoint_delta) - 1, MAX_ENDPOINT_SEARCH_DIST); for (int d = -search_dist; d < search_dist; d++) { @@ -876,7 +876,7 @@ namespace basisu continue; const etc1_endpoint_palette_entry& p = m_endpoint_palette[m_endpoint_remap_table_new_to_old[trial_idx]]; - + if (m_params.m_compression_level <= 1) { if (p.m_inten5 > cur_inten5) @@ -886,7 +886,7 @@ namespace basisu int delta_g = iabs(cur_endpoints.m_color5.g - p.m_color5.g); int delta_b = iabs(cur_endpoints.m_color5.b - p.m_color5.b); int color_delta = delta_r + delta_g + delta_b; - + if (color_delta > COLOR_DELTA_THRESH) continue; } @@ -924,7 +924,7 @@ namespace basisu const int64_t initial_best_trial_err = INT64_MAX; int64_t best_trial_err = initial_best_trial_err; int best_trial_idx = 0; - + const int search_dist = minimum(iabs(endpoint_delta) - 1, MAX_ENDPOINT_SEARCH_DIST); for (int d = -search_dist; d < search_dist; d++) { @@ -942,7 +942,7 @@ namespace basisu continue; const etc1_endpoint_palette_entry& p = m_endpoint_palette[m_endpoint_remap_table_new_to_old[trial_idx]]; - + if (m_params.m_compression_level <= 1) { if (p.m_inten5 > cur_inten5) @@ -952,7 +952,7 @@ namespace basisu int delta_g = iabs(cur_endpoints.m_color5.g - p.m_color5.g); int delta_b = iabs(cur_endpoints.m_color5.b - p.m_color5.b); int color_delta = delta_r + delta_g + delta_b; - + if (color_delta > COLOR_DELTA_THRESH) continue; } @@ -992,7 +992,7 @@ namespace basisu } #endif // BASISU_SUPPORT_SSE } // if (!g_cpu_supports_sse41) - + } // if (cur_err) } // if ((m_params.m_endpoint_rdo_quality_thresh > 1.0f) && (iabs(endpoint_delta) > 1) && (!block_endpoints_are_referenced(block_x, block_y))) @@ -1011,7 +1011,7 @@ namespace basisu if ((!is_video) || (m.m_endpoint_predictor != basist::CR_ENDPOINT_PRED_INDEX)) { int new_selector_index = m_selector_remap_table_old_to_new[m.m_selector_index]; - + const float selector_remap_thresh = maximum(1.0f, m_params.m_selector_rdo_quality_thresh); //2.5f; int selector_history_buf_index = -1; @@ -1060,7 +1060,7 @@ namespace basisu for (uint32_t p = 0; p < 16; p++) cur_err += color_distance(false, src_pixels.get_ptr()[p], block_colors[pCur_selectors[p]], false); } - + const uint64_t limit_err = (uint64_t)ceilf(cur_err * selector_remap_thresh); // Even if cur_err==limit_err, we still want to scan the history buffer because there may be equivalent entries that are cheaper to code. @@ -1091,7 +1091,7 @@ namespace basisu if (sel_diff >= SEL_DIFF_THRESHOLD) continue; } - + const uint64_t thresh_err = minimum(limit_err, best_trial_err); uint64_t trial_err = 0; @@ -1266,7 +1266,7 @@ namespace basisu //{ // printf("%u, %u, %f\n", g_color_delta_bad_hist[i], g_color_delta_hist[i], g_color_delta_hist[i] ? g_color_delta_bad_hist[i] / (float)g_color_delta_hist[i] : 0); //} - + double total_prep_time = tm.get_elapsed_secs(); debug_printf("basisu_backend::encode_image: Total prep time: %3.2f\n", total_prep_time); @@ -1521,7 +1521,7 @@ namespace basisu if (old_endpoint_was_used[old_endpoint_index]) { const uint32_t new_endpoint_index = m_endpoint_remap_table_old_to_new[old_endpoint_index]; - + new_endpoint_was_used[new_endpoint_index] = true; endpoint_remap_table_new_to_old[new_endpoint_index] = old_endpoint_index; @@ -1660,7 +1660,7 @@ namespace basisu bool basisu_backend::encode_selector_palette() { const basisu_frontend& r = *m_pFront_end; - + histogram delta_selector_pal_histogram(256); for (uint32_t q = 0; q < r.get_total_selector_clusters(); q++) diff --git a/encoder/basisu_backend.h b/encoder/basisu_backend.h index 07778aeb..0bac61e1 100644 --- a/encoder/basisu_backend.h +++ b/encoder/basisu_backend.h @@ -103,8 +103,8 @@ namespace basisu { clear(); } - - uint32_t m_endpoint_predictor; + + uint32_t m_endpoint_predictor; int m_endpoint_index; int m_selector_index; @@ -115,10 +115,10 @@ namespace basisu void clear() { m_endpoint_predictor = 0; - + m_endpoint_index = 0; m_selector_index = 0; - + m_selector_history_buf_index = 0; m_is_cr_target = false; } @@ -137,7 +137,7 @@ namespace basisu color_rgba m_color5; uint32_t m_inten5; bool m_color5_valid; - + void clear() { clear_obj(*this); @@ -153,7 +153,7 @@ namespace basisu float m_endpoint_rdo_quality_thresh; float m_selector_rdo_quality_thresh; uint32_t m_compression_level; - + bool m_used_global_codebooks; bool m_validate; @@ -285,7 +285,7 @@ namespace basisu basisu_backend_params m_params; basisu_backend_slice_desc_vec m_slices; basisu_backend_output m_output; - + etc1_endpoint_palette_entry_vec m_endpoint_palette; etc1_selector_palette_entry_vec m_selector_palette; @@ -331,12 +331,12 @@ namespace basisu return slice.m_first_block_index + block_y * slice.m_num_blocks_x + block_x; } - + uint32_t get_total_blocks(uint32_t slice_index) const { return m_slices[slice_index].m_num_blocks_x * m_slices[slice_index].m_num_blocks_y; } - + uint32_t get_total_blocks() const { uint32_t total_blocks = 0; diff --git a/encoder/basisu_basis_file.cpp b/encoder/basisu_basis_file.cpp index f4c77bef..3db73af8 100644 --- a/encoder/basisu_basis_file.cpp +++ b/encoder/basisu_basis_file.cpp @@ -27,14 +27,14 @@ namespace basisu m_header.m_data_size = m_total_file_size - sizeof(basist::basis_file_header); m_header.m_total_slices = (uint32_t)encoder_output.m_slice_desc.size(); - + m_header.m_total_images = 0; for (uint32_t i = 0; i < encoder_output.m_slice_desc.size(); i++) m_header.m_total_images = maximum(m_header.m_total_images, encoder_output.m_slice_desc[i].m_source_file_index + 1); - + m_header.m_tex_format = (int)encoder_output.m_tex_format; m_header.m_flags = 0; - + if (encoder_output.m_etc1s) { assert(encoder_output.m_tex_format == basist::basis_tex_format::cETC1S); @@ -51,7 +51,7 @@ namespace basisu m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagUsesGlobalCodebook; if (encoder_output.m_srgb) m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagSRGB; - + for (uint32_t i = 0; i < encoder_output.m_slice_desc.size(); i++) { if (encoder_output.m_slice_desc[i].m_alpha) @@ -108,7 +108,7 @@ namespace basisu m_images_descs[i].m_image_index = slice_descs[i].m_source_file_index; m_images_descs[i].m_level_index = slice_descs[i].m_mip_index; - + if (slice_descs[i].m_alpha) m_images_descs[i].m_flags = m_images_descs[i].m_flags | basist::cSliceDescFlagsHasAlpha; if (slice_descs[i].m_iframe) @@ -186,7 +186,7 @@ namespace basisu pHeader->m_data_size = m_total_file_size - sizeof(basist::basis_file_header); pHeader->m_data_crc16 = basist::crc16(&m_comp_data[0] + sizeof(basist::basis_file_header), m_total_file_size - sizeof(basist::basis_file_header), 0); - + pHeader->m_header_crc16 = basist::crc16(&pHeader->m_data_size, sizeof(basist::basis_file_header) - BASISU_OFFSETOF(basist::basis_file_header, m_data_size), 0); pHeader->m_sig = basist::basis_file_header::cBASISSigValue; @@ -242,7 +242,7 @@ namespace basisu m_tables_file_ofs = 0; m_first_image_file_ofs = m_slice_descs_file_ofs + sizeof(basist::basis_slice_desc) * (uint32_t)slice_descs.size(); } - + uint64_t total_file_size = m_first_image_file_ofs; for (uint32_t i = 0; i < encoder_output.m_slice_image_data.size(); i++) total_file_size += encoder_output.m_slice_image_data[i].size(); diff --git a/encoder/basisu_bc7enc.cpp b/encoder/basisu_bc7enc.cpp index 22fdfa60..b15e1b10 100644 --- a/encoder/basisu_bc7enc.cpp +++ b/encoder/basisu_bc7enc.cpp @@ -100,24 +100,24 @@ static void astc_init() { if (!astc_is_valid_endpoint_range(range)) continue; - + const uint32_t levels = astc_get_levels(range); uint32_t vals[256]; // TODO for (uint32_t i = 0; i < levels; i++) vals[i] = (unquant_astc_endpoint_val(i, range) << 8) | i; - + std::sort(vals, vals + levels); for (uint32_t i = 0; i < levels; i++) { uint32_t order = vals[i] & 0xFF; uint32_t unq = vals[i] >> 8; - + g_astc_sorted_order_unquant[range][i].m_unquant = (uint8_t)unq; g_astc_sorted_order_unquant[range][i].m_index = (uint8_t)order; - + } // i #if 0 @@ -186,7 +186,7 @@ static inline uint32_t astc_interpolate_linear(uint32_t l, uint32_t h, uint32_t void bc7enc_compress_block_init() { astc_init(); - + // BC7 666.1 for (int c = 0; c < 256; c++) { @@ -224,11 +224,11 @@ void bc7enc_compress_block_init() for (uint32_t l = 0; l < 16; l++) { uint32_t low = (l << 4) | l; - + for (uint32_t h = 0; h < 16; h++) { uint32_t high = (h << 4) | h; - + const int k = astc_interpolate_linear(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]); const int err = (k - c) * (k - c); @@ -240,9 +240,9 @@ void bc7enc_compress_block_init() } } // h } // l - + g_astc_4bit_3bit_optimal_endpoints[c] = best; - + } // c // ASTC [0,15] 2-bit @@ -253,11 +253,11 @@ void bc7enc_compress_block_init() for (uint32_t l = 0; l < 16; l++) { uint32_t low = (l << 4) | l; - + for (uint32_t h = 0; h < 16; h++) { uint32_t high = (h << 4) | h; - + const int k = astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]); const int err = (k - c) * (k - c); @@ -269,9 +269,9 @@ void bc7enc_compress_block_init() } } // h } // l - + g_astc_4bit_2bit_optimal_endpoints[c] = best; - + } // c // ASTC range 7 [0,11] 2-bit @@ -282,11 +282,11 @@ void bc7enc_compress_block_init() for (uint32_t l = 0; l < 12; l++) { uint32_t low = g_astc_sorted_order_unquant[7][l].m_unquant; - + for (uint32_t h = 0; h < 12; h++) { uint32_t high = g_astc_sorted_order_unquant[7][h].m_unquant; - + const int k = astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]); const int err = (k - c) * (k - c); @@ -298,9 +298,9 @@ void bc7enc_compress_block_init() } } // h } // l - + g_astc_range7_2bit_optimal_endpoints[c] = best; - + } // c // ASTC range 13 [0,47] 4-bit @@ -311,11 +311,11 @@ void bc7enc_compress_block_init() for (uint32_t l = 0; l < 48; l++) { uint32_t low = g_astc_sorted_order_unquant[13][l].m_unquant; - + for (uint32_t h = 0; h < 48; h++) { uint32_t high = g_astc_sorted_order_unquant[13][h].m_unquant; - + const int k = astc_interpolate_linear(low, high, g_astc_weights4[BC7ENC_ASTC_RANGE13_4BIT_OPTIMAL_INDEX]); const int err = (k - c) * (k - c); @@ -327,9 +327,9 @@ void bc7enc_compress_block_init() } } // h } // l - + g_astc_range13_4bit_optimal_endpoints[c] = best; - + } // c // ASTC range 13 [0,47] 2-bit @@ -340,11 +340,11 @@ void bc7enc_compress_block_init() for (uint32_t l = 0; l < 48; l++) { uint32_t low = g_astc_sorted_order_unquant[13][l].m_unquant; - + for (uint32_t h = 0; h < 48; h++) { uint32_t high = g_astc_sorted_order_unquant[13][h].m_unquant; - + const int k = astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]); const int err = (k - c) * (k - c); @@ -356,9 +356,9 @@ void bc7enc_compress_block_init() } } // h } // l - + g_astc_range13_2bit_optimal_endpoints[c] = best; - + } // c // ASTC range 11 [0,31] 5-bit @@ -393,14 +393,14 @@ void bc7enc_compress_block_init() static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSelectors, const bc7enc_vec4F* pSelector_weights, bc7enc_vec4F* pXl, bc7enc_vec4F* pXh, const color_quad_u8 *pColors) { - // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // I did this in matrix form first, expanded out all the ops, then optimized it a bit. double z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; double q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; double q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f; double q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f; double q00_a = 0.0f, q10_a = 0.0f, t_a = 0.0f; - + for (uint32_t i = 0; i < N; i++) { const uint32_t sel = pSelectors[i]; @@ -648,7 +648,7 @@ static uint64_t pack_astc_4bit_3bit_to_one_color(const color_cell_compressor_par { uint32_t low = (pResults->m_low_endpoint.m_c[i] << 4) | pResults->m_low_endpoint.m_c[i]; uint32_t high = (pResults->m_high_endpoint.m_c[i] << 4) | pResults->m_high_endpoint.m_c[i]; - + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]); } p.m_c[3] = 255; @@ -687,10 +687,10 @@ static uint64_t pack_astc_4bit_2bit_to_one_color_rgba(const color_cell_compresso { uint32_t low = (pResults->m_low_endpoint.m_c[i] << 4) | pResults->m_low_endpoint.m_c[i]; uint32_t high = (pResults->m_high_endpoint.m_c[i] << 4) | pResults->m_high_endpoint.m_c[i]; - + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]); } - + uint64_t total_err = 0; for (uint32_t i = 0; i < pParams->m_num_pixels; i++) total_err += compute_color_distance_rgba(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); @@ -726,7 +726,7 @@ static uint64_t pack_astc_range7_2bit_to_one_color(const color_cell_compressor_p { uint32_t low = g_astc_sorted_order_unquant[7][pResults->m_low_endpoint.m_c[i]].m_unquant; uint32_t high = g_astc_sorted_order_unquant[7][pResults->m_high_endpoint.m_c[i]].m_unquant; - + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]); } p.m_c[3] = 255; @@ -747,7 +747,7 @@ static uint64_t pack_astc_range13_2bit_to_one_color(const color_cell_compressor_ const endpoint_err *pEr = &g_astc_range13_2bit_optimal_endpoints[r]; const endpoint_err *pEg = &g_astc_range13_2bit_optimal_endpoints[g]; const endpoint_err *pEb = &g_astc_range13_2bit_optimal_endpoints[b]; - + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, 47); color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, 47); pResults->m_pbits[0] = 0; @@ -766,10 +766,10 @@ static uint64_t pack_astc_range13_2bit_to_one_color(const color_cell_compressor_ { uint32_t low = g_astc_sorted_order_unquant[13][pResults->m_low_endpoint.m_c[i]].m_unquant; uint32_t high = g_astc_sorted_order_unquant[13][pResults->m_high_endpoint.m_c[i]].m_unquant; - + p.m_c[i] = (uint8_t)astc_interpolate_linear(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]); } - + uint64_t total_err = 0; for (uint32_t i = 0; i < pParams->m_num_pixels; i++) total_err += compute_color_distance_rgb(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); @@ -878,18 +878,18 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 const int dr = actualMaxColor.m_c[0] - lr; const int dg = actualMaxColor.m_c[1] - lg; const int db = actualMaxColor.m_c[2] - lb; - + uint64_t total_err = 0; - + if (pParams->m_pForce_selectors) { for (uint32_t i = 0; i < pParams->m_num_pixels; i++) { const color_quad_u8* pC = &pParams->m_pPixels[i]; - + const uint8_t sel = pParams->m_pForce_selectors[i]; assert(sel < N); - + total_err += (pParams->m_has_alpha ? compute_color_distance_rgba : compute_color_distance_rgb)(&weightedColors[sel], pC, pParams->m_perceptual, pParams->m_weights); pResults->m_pSelectors_temp[i] = sel; @@ -930,7 +930,7 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 --best_sel; } total_err += err1; - + pResults->m_pSelectors_temp[i] = (uint8_t)best_sel; } } @@ -1027,7 +1027,7 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); } - + return total_err; } @@ -1257,7 +1257,7 @@ static uint64_t find_optimal_solution(uint32_t mode, bc7enc_vec4F xl, bc7enc_vec } } } - + fixDegenerateEndpoints(mode, &bestMinColor, &bestMaxColor, &xl, &xh, iscalep >> 1, 0); if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&bestMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&bestMaxColor, &pResults->m_high_endpoint) || (best_pbits[0] != pResults->m_pbits[0]) || (best_pbits[1] != pResults->m_pbits[1])) @@ -1296,7 +1296,7 @@ void check_best_overall_error(const color_cell_compressor_params *pParams, color colors[n-1].m_c[c] = g_astc_unquant[pParams->m_astc_endpoint_range][pResults->m_astc_high_endpoint.m_c[c]].m_unquant; assert(colors[n-1].m_c[c] == g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[c]].m_unquant); } - + for (uint32_t i = 1; i < pParams->m_num_selector_weights - 1; i++) for (uint32_t c = 0; c < 4; c++) colors[i].m_c[c] = (uint8_t)astc_interpolate_linear(colors[0].m_c[c], colors[n - 1].m_c[c], pParams->m_pSelector_weights[i]); @@ -1306,14 +1306,14 @@ void check_best_overall_error(const color_cell_compressor_params *pParams, color { const color_quad_u8 &orig = pParams->m_pPixels[p]; const color_quad_u8 &packed = colors[pResults->m_pSelectors[p]]; - + if (pParams->m_has_alpha) total_err += compute_color_distance_rgba(&orig, &packed, pParams->m_perceptual, pParams->m_weights); else total_err += compute_color_distance_rgb(&orig, &packed, pParams->m_perceptual, pParams->m_weights); } assert(total_err == pResults->m_best_overall_err); - + // HACK HACK //if (total_err != pResults->m_best_overall_err) // printf("X"); @@ -1416,12 +1416,12 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param bc7enc_vec4F color = vec4F_from_color(&pParams->m_pPixels[i]); meanColor = vec4F_add(&meanColor, &color); } - + bc7enc_vec4F meanColorScaled = vec4F_mul(&meanColor, 1.0f / (float)(pParams->m_num_pixels)); meanColor = vec4F_mul(&meanColor, 1.0f / (float)(pParams->m_num_pixels * 255.0f)); vec4F_saturate_in_place(&meanColor); - + if (pParams->m_has_alpha) { // Use incremental PCA for RGBA PCA, because it's simple. @@ -1484,7 +1484,7 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param vec4F_set(&axis, xr, xg, xb, 0); } } - + if (vec4F_dot(&axis, &axis) < .5f) { if (pParams->m_perceptual) @@ -1493,7 +1493,7 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param vec4F_set(&axis, 1.0f, 1.0f, 1.0f, pParams->m_has_alpha ? 1.0f : 0); vec4F_normalize_in_place(&axis); } - + bc7enc_vec4F minColor, maxColor; float l = 1e+9f, h = -1e+9f; @@ -1518,7 +1518,7 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param bc7enc_vec4F c1 = vec4F_add(&meanColor, &b1); minColor = vec4F_saturate(&c0); maxColor = vec4F_saturate(&c1); - + bc7enc_vec4F whiteVec; vec4F_set_scalar(&whiteVec, 1.0f); if (vec4F_dot(&minColor, &whiteVec) > vec4F_dot(&maxColor, &whiteVec)) @@ -1542,7 +1542,7 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param // First find a solution using the block's PCA. if (!find_optimal_solution(mode, minColor, maxColor, pParams, pResults)) return 0; - + for (uint32_t i = 0; i < pComp_params->m_least_squares_passes; i++) { // Now try to refine the solution using least squares by computing the optimal endpoints from the current selectors. @@ -1556,11 +1556,11 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param xl = vec4F_mul(&xl, (1.0f / 255.0f)); xh = vec4F_mul(&xh, (1.0f / 255.0f)); - + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) return 0; } - + if ((!pParams->m_pForce_selectors) && (pComp_params->m_uber_level > 0)) { // In uber level 1, try varying the selectors a little, somewhat like cluster fit would. First try incrementing the minimum selectors, @@ -1597,7 +1597,7 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param xl = vec4F_mul(&xl, (1.0f / 255.0f)); xh = vec4F_mul(&xh, (1.0f / 255.0f)); - + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) return 0; @@ -1616,7 +1616,7 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param xl = vec4F_mul(&xl, (1.0f / 255.0f)); xh = vec4F_mul(&xh, (1.0f / 255.0f)); - + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) return 0; @@ -1673,7 +1673,7 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param } } } - + if (!pParams->m_pForce_selectors) { // Try encoding the partition as a single color by using the optimal single colors tables to encode the block to its mean. @@ -1754,13 +1754,13 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param #if BC7ENC_CHECK_OVERALL_ERROR check_best_overall_error(pParams, pResults); #endif - + return pResults->m_best_overall_err; } uint64_t color_cell_compression_est_astc( uint32_t num_weights, uint32_t num_comps, const uint32_t *pWeight_table, - uint32_t num_pixels, const color_quad_u8* pPixels, + uint32_t num_pixels, const color_quad_u8* pPixels, uint64_t best_err_so_far, const uint32_t weights[4]) { assert(num_comps == 3 || num_comps == 4); @@ -1807,7 +1807,7 @@ uint64_t color_cell_compression_est_astc( color_quad_u8_set(&lowColor, lr, lg, lb, la); color_quad_u8_set(&highColor, hr, hg, hb, ha); - // Place endpoints at bbox diagonals and compute interpolated colors + // Place endpoints at bbox diagonals and compute interpolated colors color_quad_u8 weightedColors[32]; weightedColors[0] = lowColor; diff --git a/encoder/basisu_bc7enc.h b/encoder/basisu_bc7enc.h index 8d8b7888..44d37dc8 100644 --- a/encoder/basisu_bc7enc.h +++ b/encoder/basisu_bc7enc.h @@ -26,7 +26,7 @@ namespace basisu #define BC7ENC_TRUE (1) #define BC7ENC_FALSE (0) - + typedef struct { float m_c[4]; } bc7enc_vec4F; extern const float g_bc7_weights1x[2 * 4]; @@ -36,9 +36,9 @@ namespace basisu extern const float g_astc_weights4x[16 * 4]; extern const float g_astc_weights5x[32 * 4]; extern const float g_astc_weights_3levelsx[3 * 4]; - + extern basist::astc_quant_bin g_astc_sorted_order_unquant[basist::BC7ENC_TOTAL_ASTC_RANGES][256]; // [sorted unquantized order] - + struct color_cell_compressor_params { uint32_t m_num_pixels; @@ -94,12 +94,12 @@ namespace basisu }; uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_params* pParams, color_cell_compressor_results* pResults, const bc7enc_compress_block_params* pComp_params); - + uint64_t color_cell_compression_est_astc( uint32_t num_weights, uint32_t num_comps, const uint32_t* pWeight_table, uint32_t num_pixels, const basist::color_quad_u8* pPixels, uint64_t best_err_so_far, const uint32_t weights[4]); - + inline void bc7enc_compress_block_params_init_linear_weights(bc7enc_compress_block_params* p) { p->m_perceptual = BC7ENC_FALSE; @@ -128,5 +128,5 @@ namespace basisu // bc7enc_compress_block_init() MUST be called before calling bc7enc_compress_block() (or you'll get artifacts). void bc7enc_compress_block_init(); - + } // namespace basisu diff --git a/encoder/basisu_comp.cpp b/encoder/basisu_comp.cpp index 41eae2b7..9ee453b8 100644 --- a/encoder/basisu_comp.cpp +++ b/encoder/basisu_comp.cpp @@ -55,7 +55,7 @@ namespace basisu m_opencl_failed(false) { debug_printf("basis_compressor::basis_compressor\n"); - + assert(g_library_initialized); } @@ -71,7 +71,7 @@ namespace basisu bool basis_compressor::init(const basis_compressor_params ¶ms) { debug_printf("basis_compressor::init\n"); - + if (!g_library_initialized) { error_printf("basis_compressor::init: basisu_encoder_init() MUST be called before using any encoder functionality!\n"); @@ -83,9 +83,9 @@ namespace basisu error_printf("basis_compressor::init: A non-null job_pool pointer must be specified\n"); return false; } - + m_params = params; - + if (m_params.m_debug) { debug_printf("basis_compressor::init:\n"); @@ -94,7 +94,7 @@ namespace basisu #define PRINT_INT_VALUE(v) debug_printf("%s: %i %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); #define PRINT_UINT_VALUE(v) debug_printf("%s: %u %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); #define PRINT_FLOAT_VALUE(v) debug_printf("%s: %f %u\n", BASISU_STRINGIZE2(v), static_cast(m_params.v), m_params.v.was_changed()); - + debug_printf("Source images: %u, source filenames: %u, source alpha filenames: %i, Source mipmap images: %u\n", m_params.m_source_images.size(), m_params.m_source_filenames.size(), m_params.m_source_alpha_filenames.size(), m_params.m_source_mipmap_images.size()); @@ -129,10 +129,10 @@ namespace basisu PRINT_BOOL_VALUE(m_renormalize); PRINT_BOOL_VALUE(m_multithreading); PRINT_BOOL_VALUE(m_disable_hierarchical_endpoint_codebooks); - + PRINT_FLOAT_VALUE(m_endpoint_rdo_thresh); PRINT_FLOAT_VALUE(m_selector_rdo_thresh); - + PRINT_BOOL_VALUE(m_mip_gen); PRINT_BOOL_VALUE(m_mip_renormalize); PRINT_BOOL_VALUE(m_mip_wrapping); @@ -151,7 +151,7 @@ namespace basisu debug_printf("m_userdata0: 0x%X, m_userdata1: 0x%X\n", m_params.m_userdata0, m_params.m_userdata1); debug_printf("m_us_per_frame: %i (%f fps)\n", m_params.m_us_per_frame, m_params.m_us_per_frame ? 1.0f / (m_params.m_us_per_frame / 1000000.0f) : 0); debug_printf("m_pack_uastc_flags: 0x%X\n", m_params.m_pack_uastc_flags); - + PRINT_BOOL_VALUE(m_rdo_uastc); PRINT_FLOAT_VALUE(m_rdo_uastc_quality_scalar); PRINT_INT_VALUE(m_rdo_uastc_dict_size); @@ -165,7 +165,7 @@ namespace basisu PRINT_INT_VALUE(m_resample_width); PRINT_INT_VALUE(m_resample_height); PRINT_FLOAT_VALUE(m_resample_factor); - + debug_printf("Has global codebooks: %u\n", m_params.m_pGlobal_codebooks ? 1 : 0); if (m_params.m_pGlobal_codebooks) { @@ -185,7 +185,7 @@ namespace basisu } PRINT_BOOL_VALUE(m_validate_output_data); - + #undef PRINT_BOOL_VALUE #undef PRINT_INT_VALUE #undef PRINT_UINT_VALUE @@ -201,7 +201,7 @@ namespace basisu if ((m_params.m_compute_stats) && (!m_params.m_validate_output_data)) { m_params.m_validate_output_data = true; - + debug_printf("Note: m_compute_stats is true, so forcing m_validate_output_data to true as well\n"); } @@ -214,7 +214,7 @@ namespace basisu return true; } - + basis_compressor::error_code basis_compressor::process() { debug_printf("basis_compressor::process\n"); @@ -254,7 +254,7 @@ namespace basisu if (!create_basis_file_and_transcode()) return cECFailedCreateBasisFile; - + if (m_params.m_create_ktx2_file) { if (!create_ktx2_file()) @@ -280,7 +280,7 @@ namespace basisu m_uastc_backend_output.m_slice_desc = m_slice_descs; m_uastc_backend_output.m_slice_image_data.resize(m_slice_descs.size()); m_uastc_backend_output.m_slice_image_crcs.resize(m_slice_descs.size()); - + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) { gpu_image& tex = m_uastc_slice_textures[slice_index]; @@ -291,7 +291,7 @@ namespace basisu const uint32_t num_blocks_y = tex.get_blocks_y(); const uint32_t total_blocks = tex.get_total_blocks(); const image& source_image = m_slice_images[slice_index]; - + std::atomic total_blocks_processed; total_blocks_processed = 0; @@ -307,7 +307,7 @@ namespace basisu { #endif BASISU_NOTE_UNUSED(num_blocks_y); - + uint32_t uastc_flags = m_params.m_pack_uastc_flags; if ((m_params.m_rdo_uastc) && (m_params.m_rdo_uastc_favor_simpler_modes_in_rdo_mode)) uastc_flags |= cPackUASTCFavorSimplerModes; @@ -326,7 +326,7 @@ namespace basisu encode_uastc(&block_pixels[0][0].r, dest_block, uastc_flags); total_blocks_processed++; - + uint32_t val = total_blocks_processed; if ((val & 16383) == 16383) { @@ -354,7 +354,7 @@ namespace basisu rdo_params.m_lz_dict_size = m_params.m_rdo_uastc_dict_size; rdo_params.m_smooth_block_max_error_scale = m_params.m_rdo_uastc_max_smooth_block_error_scale; rdo_params.m_max_smooth_block_std_dev = m_params.m_rdo_uastc_smooth_block_max_std_dev; - + bool status = uastc_rdo(tex.get_total_blocks(), (basist::uastc_block*)tex.get_ptr(), (const color_rgba *)m_source_blocks[slice_desc.m_first_block_index].m_pixels, rdo_params, m_params.m_pack_uastc_flags, m_params.m_rdo_uastc_multithreading ? m_params.m_pJob_pool : nullptr, (m_params.m_rdo_uastc_multithreading && m_params.m_pJob_pool) ? basisu::minimum(4, (uint32_t)m_params.m_pJob_pool->get_total_threads()) : 0); @@ -366,11 +366,11 @@ namespace basisu m_uastc_backend_output.m_slice_image_data[slice_index].resize(tex.get_size_in_bytes()); memcpy(&m_uastc_backend_output.m_slice_image_data[slice_index][0], tex.get_ptr(), tex.get_size_in_bytes()); - + m_uastc_backend_output.m_slice_image_crcs[slice_index] = basist::crc16(tex.get_ptr(), tex.get_size_in_bytes(), 0); - + } // slice_index - + return cECSuccess; } @@ -411,12 +411,12 @@ namespace basisu image &level_img = *enlarge_vector(mips, 1); level_img.resize(level_width, level_height); - - int result = stbir_resize_uint8_generic( + + int result = stbir_resize_uint8_generic( (const uint8_t *)img.get_ptr(), img.get_width(), img.get_height(), img.get_pitch() * sizeof(color_rgba), (uint8_t *)level_img.get_ptr(), level_img.get_width(), level_img.get_height(), level_img.get_pitch() * sizeof(color_rgba), has_alpha ? 4 : 3, has_alpha ? 3 : STBIR_ALPHA_CHANNEL_NONE, m_params.m_mip_premultiplied ? STBIR_FLAG_ALPHA_PREMULTIPLIED : 0, - m_params.m_mip_wrapping ? STBIR_EDGE_WRAP : STBIR_EDGE_CLAMP, filter, m_params.m_mip_srgb ? STBIR_COLORSPACE_SRGB : STBIR_COLORSPACE_LINEAR, + m_params.m_mip_wrapping ? STBIR_EDGE_WRAP : STBIR_EDGE_CLAMP, filter, m_params.m_mip_srgb ? STBIR_COLORSPACE_SRGB : STBIR_COLORSPACE_LINEAR, nullptr); if (result == 0) @@ -424,7 +424,7 @@ namespace basisu error_printf("basis_compressor::generate_mipmaps: stbir_resize_uint8_generic() failed!\n"); return false; } - + if (m_params.m_mip_renormalize) level_img.renormalize_normal_map(); } @@ -482,14 +482,14 @@ namespace basisu basisu::vector source_images; basisu::vector source_filenames; - + // First load all source images, and determine if any have an alpha channel. for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) { const char *pSource_filename = ""; image file_image; - + if (m_params.m_read_source_images) { pSource_filename = m_params.m_source_filenames[source_file_index].c_str(); @@ -551,7 +551,7 @@ namespace basisu } alpha_swizzled = m_params.m_swizzle[3] != 3; } - + bool has_alpha = false; if (m_params.m_force_alpha || alpha_swizzled) has_alpha = true; @@ -564,7 +564,7 @@ namespace basisu m_any_source_image_has_alpha = true; debug_printf("Source image index %u filename %s %ux%u has alpha: %u\n", source_file_index, pSource_filename, file_image.get_width(), file_image.get_height(), has_alpha); - + if (m_params.m_y_flip) file_image.flip_y(); @@ -621,7 +621,7 @@ namespace basisu source_filenames.push_back(pSource_filename); } - // Check if the caller has generated their own mipmaps. + // Check if the caller has generated their own mipmaps. if (m_params.m_source_mipmap_images.size()) { // Make sure they've passed us enough mipmap chains. @@ -662,15 +662,15 @@ namespace basisu // Now, for each source image, create the slices corresponding to that image. basisu::vector slices; - + slices.reserve(32); - + // The first (largest) mipmap level. image& file_image = source_images[source_file_index]; - + // Reserve a slot for mip0. slices.resize(1); - + if (m_params.m_source_mipmap_images.size()) { // User-provided mipmaps for each layer or image in the texture array. @@ -709,10 +709,10 @@ namespace basisu uint_vec mip_indices(slices.size()); for (uint32_t i = 0; i < slices.size(); i++) mip_indices[i] = i; - + if ((m_any_source_image_has_alpha) && (!m_params.m_uastc)) { - // For ETC1S, if source has alpha, then even mips will have RGB, and odd mips will have alpha in RGB. + // For ETC1S, if source has alpha, then even mips will have RGB, and odd mips will have alpha in RGB. basisu::vector alpha_slices; uint_vec new_mip_indices; @@ -731,7 +731,7 @@ namespace basisu lvl_a(x, y).set_noclamp_rgba(a, a, a, 255); } } - + lvl_rgb.set_alpha(255); alpha_slices.push_back(lvl_rgb); @@ -746,7 +746,7 @@ namespace basisu } assert(slices.size() == mip_indices.size()); - + for (uint32_t slice_index = 0; slice_index < slices.size(); slice_index++) { image& slice_image = slices[slice_index]; @@ -779,11 +779,11 @@ namespace basisu enlarge_vector(m_stats, 1); enlarge_vector(m_slice_images, 1); enlarge_vector(m_slice_descs, 1); - + m_stats[dest_image_index].m_filename = source_filename.c_str(); m_stats[dest_image_index].m_width = orig_width; m_stats[dest_image_index].m_height = orig_height; - + debug_printf("****** Slice %u: mip %u, alpha_slice: %u, filename: \"%s\", original: %ux%u actual: %ux%u\n", m_slice_descs.size() - 1, mip_indices[slice_index], is_alpha_slice, source_filename.c_str(), orig_width, orig_height, slice_image.get_width(), slice_image.get_height()); basisu_backend_slice_desc &slice_desc = m_slice_descs[dest_image_index]; @@ -803,7 +803,7 @@ namespace basisu slice_desc.m_num_macroblocks_y = (slice_desc.m_num_blocks_y + 1) >> 1; slice_desc.m_source_file_index = source_file_index; - + slice_desc.m_mip_index = mip_indices[slice_index]; slice_desc.m_alpha = is_alpha_slice; @@ -819,7 +819,7 @@ namespace basisu // Finally, swap in the slice's image to avoid copying it. // NOTE: slice_image is now blank. m_slice_images[dest_image_index].swap(slice_image); - + } // slice_index } // source_file_index @@ -832,7 +832,7 @@ namespace basisu error_printf("Too many slices!\n"); return false; } - + // Basic sanity check on the slices for (uint32_t i = 1; i < m_slice_descs.size(); i++) { @@ -842,7 +842,7 @@ namespace basisu // Make sure images are in order int image_delta = (int)slice_desc.m_source_file_index - (int)prev_slice_desc.m_source_file_index; if (image_delta > 1) - return false; + return false; // Make sure mipmap levels are in order if (!image_delta) @@ -914,20 +914,20 @@ namespace basisu } // Do some basic validation for 2D arrays, cubemaps, video, and volumes. - bool basis_compressor::validate_texture_type_constraints() + bool basis_compressor::validate_texture_type_constraints() { debug_printf("basis_compressor::validate_texture_type_constraints\n"); // In 2D mode anything goes (each image may have a different resolution and # of mipmap levels). if (m_params.m_tex_type == basist::cBASISTexType2D) return true; - + uint32_t total_basis_images = 0; for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++) { const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; - + total_basis_images = maximum(total_basis_images, slice_desc.m_source_file_index + 1); } @@ -950,7 +950,7 @@ namespace basisu const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; image_mipmap_levels[slice_desc.m_source_file_index] = maximum(image_mipmap_levels[slice_desc.m_source_file_index], slice_desc.m_mip_index + 1); - + if (slice_desc.m_mip_index != 0) continue; @@ -1004,7 +1004,7 @@ namespace basisu bool basis_compressor::process_frontend() { debug_printf("basis_compressor::process_frontend\n"); - + #if 0 // TODO basis_etc1_pack_params pack_params; @@ -1055,21 +1055,21 @@ namespace basisu error_printf("Too many selector clusters! (%u but max is %u)\n", selector_clusters, basisu_frontend::cMaxSelectorClusters); return false; } - + if (m_params.m_quality_level != -1) { const float quality = saturate(m_params.m_quality_level / 255.0f); - + const float bits_per_endpoint_cluster = 14.0f; const float max_desired_endpoint_cluster_bits_per_texel = 1.0f; // .15f int max_endpoints = static_cast((max_desired_endpoint_cluster_bits_per_texel * total_texels) / bits_per_endpoint_cluster); - + const float mid = 128.0f / 255.0f; float color_endpoint_quality = quality; const float endpoint_split_point = 0.5f; - + // In v1.2 and in previous versions, the endpoint codebook size at quality 128 was 3072. This wasn't quite large enough. const int ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE = 4800; const int MAX_ENDPOINT_CODEBOOK_SIZE = 8192; @@ -1080,7 +1080,7 @@ namespace basisu max_endpoints = clamp(max_endpoints, 256, ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE); max_endpoints = minimum(max_endpoints, m_total_blocks); - + if (max_endpoints < 64) max_endpoints = 64; endpoint_clusters = clamp((uint32_t)(.5f + lerp(32, static_cast(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters); @@ -1091,12 +1091,12 @@ namespace basisu max_endpoints = clamp(max_endpoints, 256, MAX_ENDPOINT_CODEBOOK_SIZE); max_endpoints = minimum(max_endpoints, m_total_blocks); - + if (max_endpoints < ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE) max_endpoints = ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE; endpoint_clusters = clamp((uint32_t)(.5f + lerp(ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE, static_cast(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters); } - + float bits_per_selector_cluster = 14.0f; const float max_desired_selector_cluster_bits_per_texel = 1.0f; // .15f @@ -1120,7 +1120,7 @@ namespace basisu { if (!m_params.m_endpoint_rdo_thresh.was_changed()) m_params.m_endpoint_rdo_thresh *= .25f; - + if (!m_params.m_selector_rdo_thresh.was_changed()) m_params.m_selector_rdo_thresh *= .25f; } @@ -1147,12 +1147,12 @@ namespace basisu if (!m_params.m_endpoint_rdo_thresh.was_changed()) m_params.m_endpoint_rdo_thresh *= lerp(1.0f, .75f, l); - + if (!m_params.m_selector_rdo_thresh.was_changed()) m_params.m_selector_rdo_thresh *= lerp(1.0f, .75f, l); } } - + basisu_frontend::params p; p.m_num_source_blocks = m_total_blocks; p.m_pSource_blocks = &m_source_blocks[0]; @@ -1168,7 +1168,7 @@ namespace basisu p.m_validate = m_params.m_validate_etc1s; p.m_pJob_pool = m_params.m_pJob_pool; p.m_pGlobal_codebooks = m_params.m_pGlobal_codebooks; - + // Don't keep trying to use OpenCL if it ever fails. p.m_pOpenCL_context = !m_opencl_failed ? m_pOpenCL_context : nullptr; @@ -1177,7 +1177,7 @@ namespace basisu error_printf("basisu_frontend::init() failed!\n"); return false; } - + m_frontend.compress(); if (m_frontend.get_opencl_failed()) @@ -1188,18 +1188,18 @@ namespace basisu for (uint32_t i = 0; i < m_slice_descs.size(); i++) { char filename[1024]; -#ifdef _WIN32 +#ifdef _WIN32 sprintf_s(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i); #else snprintf(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i); -#endif +#endif m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, true); #ifdef _WIN32 sprintf_s(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i); #else snprintf(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i); -#endif +#endif m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, false); } } @@ -1263,13 +1263,13 @@ namespace basisu backend_params.m_debug_images = m_params.m_debug_images; backend_params.m_etc1s = true; backend_params.m_compression_level = m_params.m_compression_level; - + if (!m_params.m_no_endpoint_rdo) backend_params.m_endpoint_rdo_quality_thresh = m_params.m_endpoint_rdo_thresh; if (!m_params.m_no_selector_rdo) backend_params.m_selector_rdo_quality_thresh = m_params.m_selector_rdo_thresh; - + backend_params.m_used_global_codebooks = m_frontend.get_params().m_pGlobal_codebooks != nullptr; backend_params.m_validate = m_params.m_validate_output_data; @@ -1298,7 +1298,7 @@ namespace basisu error_printf("basis_compressor::create_basis_file_and_transcode: basisu_backend:init() failed!\n"); return false; } - + const uint8_vec &comp_data = m_basis_file.get_compressed_data(); m_output_basis_file = comp_data; @@ -1439,7 +1439,7 @@ namespace basisu assert(m_decoded_output_textures[slice_index].get_total_blocks() == total_blocks); } } // if (m_params.m_validate_output_data) - + return true; } @@ -1482,7 +1482,7 @@ namespace basisu uint32_t total_texels = 0; for (uint32_t i = 0; i < m_slice_descs.size(); i++) total_texels += (m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y) * 16; - + m_basis_bits_per_texel = comp_size * 8.0f / total_texels; debug_printf(".basis file size: %u, LZ compressed file size: %u, %3.2f bits/texel\n", @@ -1492,7 +1492,7 @@ namespace basisu } m_stats.resize(m_slice_descs.size()); - + if (m_params.m_validate_output_data) { for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) @@ -1681,10 +1681,10 @@ namespace basisu } } } // if (m_params.m_validate_output_data) - + return true; } - + // Make sure all the mip 0's have the same dimensions and number of mipmap levels, or we can't encode the KTX2 file. bool basis_compressor::validate_ktx2_constraints() { @@ -1730,7 +1730,7 @@ namespace basisu static uint8_t g_ktx2_etc1s_alpha_dfd[60] = { 0x3C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x38,0x0,0xA3,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3F,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF,0x40,0x0,0x3F,0xF,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; static uint8_t g_ktx2_uastc_nonalpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x4,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; static uint8_t g_ktx2_uastc_alpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; - + void basis_compressor::get_dfd(uint8_vec &dfd, const basist::ktx2_header &header) { const uint8_t* pDFD; @@ -1762,14 +1762,14 @@ namespace basisu dfd_len = sizeof(g_ktx2_etc1s_nonalpha_dfd); } } - + assert(dfd_len >= 44); dfd.resize(dfd_len); memcpy(dfd.data(), pDFD, dfd_len); uint32_t dfd_bits = basisu::read_le_dword(dfd.data() + 3 * sizeof(uint32_t)); - + dfd_bits &= ~(0xFF << 16); if (m_params.m_ktx2_srgb_transfer_func) @@ -1794,7 +1794,7 @@ namespace basisu if (m_params.m_uastc) { dfd_chan0 &= ~(0xF << 24); - + // TODO: Allow the caller to override this if (m_any_source_image_has_alpha) dfd_chan0 |= (basist::KTX2_DF_CHANNEL_UASTC_RGBA << 24); @@ -1818,7 +1818,7 @@ namespace basisu // Determine the width/height, number of array layers, mipmap levels, and the number of faces (1 for 2D, 6 for cubemap). // This does not support 1D or 3D. uint32_t base_width = 0, base_height = 0, total_layers = 0, total_levels = 0, total_faces = 1; - + for (uint32_t i = 0; i < m_slice_descs.size(); i++) { if ((m_slice_descs[i].m_mip_index == 0) && (!base_width)) @@ -1836,7 +1836,7 @@ namespace basisu if (m_params.m_tex_type == basist::cBASISTexTypeCubemapArray) { assert((total_layers % 6) == 0); - + total_layers /= 6; assert(total_layers >= 1); @@ -1922,7 +1922,7 @@ namespace basisu // No supercompression compressed_level_data_bytes = level_data_bytes; } - + uint8_vec etc1s_global_data; // Create ETC1S global supercompressed data @@ -1976,14 +1976,14 @@ namespace basisu append_vector(etc1s_global_data, backend_output.m_endpoint_palette); append_vector(etc1s_global_data, backend_output.m_selector_palette); append_vector(etc1s_global_data, backend_output.m_slice_image_tables); - + header.m_supercompression_scheme = basist::KTX2_SS_BASISLZ; } // Key values basist::ktx2_transcoder::key_value_vec key_values(m_params.m_ktx2_key_values); key_values.enlarge(1); - + const char* pKTXwriter = "KTXwriter"; key_values.back().m_key.resize(strlen(pKTXwriter) + 1); memcpy(key_values.back().m_key.data(), pKTXwriter, strlen(pKTXwriter) + 1); @@ -2044,7 +2044,7 @@ namespace basisu #if BASISU_DISABLE_KTX2_ALIGNMENT_WORKAROUND break; #endif - + // Hack to ensure the KVD block ends on a 16 byte boundary, because we have no other official way of aligning the data. uint32_t kvd_end_file_offset = kvd_file_offset + key_value_data.size(); uint32_t bytes_needed_to_pad = (16 - (kvd_end_file_offset & 15)) & 15; @@ -2062,13 +2062,13 @@ namespace basisu bytes_needed_to_pad += 16; printf("WARNING: Due to a KTX2 validator bug related to mipPadding, we must insert a dummy key into the KTX2 file of %u bytes\n", bytes_needed_to_pad); - - // We're not good - need to add a dummy key large enough to force file alignment so the mip level array gets aligned. + + // We're not good - need to add a dummy key large enough to force file alignment so the mip level array gets aligned. // We can't just add some bytes before the mip level array because ktx2check will see that as extra data in the file that shouldn't be there in ktxValidator::validateDataSize(). key_values.enlarge(1); for (uint32_t i = 0; i < (bytes_needed_to_pad - 4 - 1 - 1); i++) key_values.back().m_key.push_back(127); - + key_values.back().m_key.push_back(0); key_values.back().m_value.push_back(0); @@ -2076,13 +2076,13 @@ namespace basisu key_values.sort(); key_value_data.resize(0); - + // Try again } basisu::vector level_index_array(total_levels); memset(level_index_array.data(), 0, level_index_array.size_in_bytes()); - + m_output_ktx2_file.clear(); m_output_ktx2_file.reserve(m_output_basis_file.size()); @@ -2091,7 +2091,7 @@ namespace basisu // Level index array append_vector(m_output_ktx2_file, (const uint8_t*)level_index_array.data(), level_index_array.size_in_bytes()); - + // DFD const uint8_t* pDFD = dfd.data(); uint32_t dfd_len = dfd.size(); @@ -2151,7 +2151,7 @@ namespace basisu level_index_array[level].m_byte_offset = m_output_ktx2_file.size(); append_vector(m_output_ktx2_file, compressed_level_data_bytes[level]); } - + // Write final header memcpy(m_output_ktx2_file.data(), &header, sizeof(header)); @@ -2185,7 +2185,7 @@ namespace basisu std::atomic result; result = true; - + std::atomic opencl_failed; opencl_failed = false; @@ -2200,19 +2200,19 @@ namespace basisu tm.start(); basis_compressor c; - + // Dummy job pool job_pool task_jpool(1); params.m_pJob_pool = &task_jpool; // TODO: Remove this flag entirely - params.m_multithreading = true; - + params.m_multithreading = true; + // Stop using OpenCL if a failure ever occurs. if (opencl_failed) params.m_use_opencl = false; bool status = c.init(params); - + if (c.get_opencl_failed()) opencl_failed = true; @@ -2241,7 +2241,7 @@ namespace basisu else { results.m_error_code = basis_compressor::cECFailedInitializing; - + result = false; } @@ -2301,7 +2301,7 @@ namespace basisu for (uint32_t i = 1; i < source_images.size(); i++) comp_params.m_source_mipmap_images[0][i - 1] = source_images[i]; } - + comp_params.m_multithreading = (flags_and_quality & cFlagThreaded) != 0; comp_params.m_use_opencl = (flags_and_quality & cFlagUseOpenCL) != 0; @@ -2321,9 +2321,9 @@ namespace basisu } else comp_params.m_quality_level = basisu::maximum(1, flags_and_quality & 255); - + comp_params.m_create_ktx2_file = (flags_and_quality & cFlagKTX2) != 0; - + if (comp_params.m_create_ktx2_file) { // Set KTX2 specific parameters. @@ -2432,7 +2432,7 @@ namespace basisu const uint32_t W = 1024, H = 1024; basisu::vector images; image& img = images.enlarge(1)->resize(W, H); - + const uint32_t NUM_RAND_LETTERS = 6000;// 40000; rand r; @@ -2473,7 +2473,7 @@ namespace basisu error_printf("basis_benchmark_etc1s_opencl: basis_compress() failed (CPU)!\n"); return false; } - + best_cpu_time = minimum(best_cpu_time, cpu_time); basis_free_data(pComp_data); @@ -2515,7 +2515,7 @@ namespace basisu } printf("Best GPU time: %3.3f\n", best_gpu_time); - + return best_gpu_time < best_cpu_time; } diff --git a/encoder/basisu_comp.h b/encoder/basisu_comp.h index b6c9fef9..747b02dd 100644 --- a/encoder/basisu_comp.h +++ b/encoder/basisu_comp.h @@ -41,10 +41,10 @@ namespace basisu const uint32_t BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION = 16384; // Allow block's color distance to increase by 1.5 while searching for an alternative nearby endpoint. - const float BASISU_DEFAULT_ENDPOINT_RDO_THRESH = 1.5f; - + const float BASISU_DEFAULT_ENDPOINT_RDO_THRESH = 1.5f; + // Allow block's color distance to increase by 1.25 while searching the selector history buffer for a close enough match. - const float BASISU_DEFAULT_SELECTOR_RDO_THRESH = 1.25f; + const float BASISU_DEFAULT_SELECTOR_RDO_THRESH = 1.25f; const int BASISU_DEFAULT_QUALITY = 128; const float BASISU_DEFAULT_HYBRID_SEL_CB_QUALITY_THRESH = 2.0f; @@ -73,7 +73,7 @@ namespace basisu m_filename.clear(); m_width = 0; m_height = 0; - + m_basis_rgb_avg_psnr = 0.0f; m_basis_rgba_avg_psnr = 0.0f; m_basis_a_avg_psnr = 0.0f; @@ -87,7 +87,7 @@ namespace basisu m_bc7_luma_709_psnr = 0.0f; m_bc7_luma_601_psnr = 0.0f; m_bc7_luma_709_ssim = 0.0f; - + m_best_etc1s_rgb_avg_psnr = 0.0f; m_best_etc1s_luma_709_psnr = 0.0f; m_best_etc1s_luma_601_psnr = 0.0f; @@ -115,7 +115,7 @@ namespace basisu float m_bc7_luma_709_psnr; float m_bc7_luma_601_psnr; float m_bc7_luma_709_ssim; - + // Highest achievable quality ETC1S statistics float m_best_etc1s_rgb_avg_psnr; float m_best_etc1s_luma_709_psnr; @@ -272,7 +272,7 @@ namespace basisu m_no_endpoint_rdo.clear(); m_endpoint_rdo_thresh.clear(); - + m_mip_gen.clear(); m_mip_scale.clear(); m_mip_filter = "kaiser"; @@ -319,24 +319,24 @@ namespace basisu m_pJob_pool = nullptr; } - + // True to generate UASTC .basis file data, otherwise ETC1S. bool_param m_uastc; bool_param m_use_opencl; - // If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG images to read. + // If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG images to read. // Otherwise, the compressor processes the images in m_source_images. basisu::vector m_source_filenames; basisu::vector m_source_alpha_filenames; - + basisu::vector m_source_images; - + // Stores mipmaps starting from level 1. Level 0 is still stored in m_source_images, as usual. // If m_source_mipmaps isn't empty, automatic mipmap generation isn't done. m_source_mipmaps.size() MUST equal m_source_images.size() or the compressor returns an error. // The compressor applies the user-provided swizzling (in m_swizzle) to these images. basisu::vector< basisu::vector > m_source_mipmap_images; - + // Filename of the output basis file std::string m_out_filename; @@ -347,19 +347,19 @@ namespace basisu // If true, the compressor will print basis status to stdout during compression. bool_param m_status_output; - + // Output debug information during compression bool_param m_debug; bool_param m_validate_etc1s; - + // m_debug_images is pretty slow bool_param m_debug_images; - // ETC1S compression level, from 0 to BASISU_MAX_COMPRESSION_LEVEL (higher is slower). + // ETC1S compression level, from 0 to BASISU_MAX_COMPRESSION_LEVEL (higher is slower). // This parameter controls numerous internal encoding speed vs. compression efficiency/performance tradeoffs. // Note this is NOT the same as the ETC1S quality level, and most users shouldn't change this. param m_compression_level; - + // Use perceptual sRGB colorspace metrics instead of linear bool_param m_perceptual; @@ -375,20 +375,20 @@ namespace basisu // Write the output basis file to disk using m_out_filename bool_param m_write_output_basis_files; - - // Compute and display image metrics + + // Compute and display image metrics bool_param m_compute_stats; // Print stats to stdout, if m_compute_stats is true. bool_param m_print_stats; - + // Check to see if any input image has an alpha channel, if so then the output basis file will have alpha channels bool_param m_check_for_alpha; - + // Always put alpha slices in the output basis file, even when the input doesn't have alpha - bool_param m_force_alpha; + bool_param m_force_alpha; bool_param m_multithreading; - + // Split the R channel to RGB and the G channel to alpha, then write a basis file with alpha channels char m_swizzle[4]; @@ -397,25 +397,25 @@ namespace basisu // If true the front end will not use 2 level endpoint codebook searching, for slightly higher quality but much slower execution. // Note some m_compression_level's disable this automatically. bool_param m_disable_hierarchical_endpoint_codebooks; - + // mipmap generation parameters bool_param m_mip_gen; param m_mip_scale; std::string m_mip_filter; bool_param m_mip_srgb; bool_param m_mip_premultiplied; // not currently supported - bool_param m_mip_renormalize; + bool_param m_mip_renormalize; bool_param m_mip_wrapping; bool_param m_mip_fast; param m_mip_smallest_dimension; - - // Codebook size (quality) control. + + // Codebook size (quality) control. // If m_quality_level != -1, it controls the quality level. It ranges from [1,255] or [BASISU_QUALITY_MIN, BASISU_QUALITY_MAX]. // Otherwise m_max_endpoint_clusters/m_max_selector_clusters controls the codebook sizes directly. uint32_t m_max_endpoint_clusters; uint32_t m_max_selector_clusters; int m_quality_level; - + // m_tex_type, m_userdata0, m_userdata1, m_framerate - These fields go directly into the Basis file header. basist::basis_texture_type m_tex_type; uint32_t m_userdata0; @@ -464,7 +464,7 @@ namespace basisu // Note it *should* be possible to call init() multiple times with different inputs, but this scenario isn't well tested. Ideally, create 1 object, compress, then delete it. bool init(const basis_compressor_params ¶ms); - + enum error_code { cECSuccess = 0, @@ -485,7 +485,7 @@ namespace basisu // The output .basis file will always be valid of process() succeeded. const uint8_vec &get_output_basis_file() const { return m_output_basis_file; } - + // The output .ktx2 file will only be valid if m_create_ktx2_file was true and process() succeeded. const uint8_vec& get_output_ktx2_file() const { return m_output_ktx2_file; } @@ -493,27 +493,27 @@ namespace basisu uint32_t get_basis_file_size() const { return m_basis_file_size; } double get_basis_bits_per_texel() const { return m_basis_bits_per_texel; } - + bool get_any_source_image_has_alpha() const { return m_any_source_image_has_alpha; } bool get_opencl_failed() const { return m_opencl_failed; } - + private: basis_compressor_params m_params; opencl_context_ptr m_pOpenCL_context; - + basisu::vector m_slice_images; basisu::vector m_stats; uint32_t m_basis_file_size; double m_basis_bits_per_texel; - + basisu_backend_slice_desc_vec m_slice_descs; uint32_t m_total_blocks; - + basisu_frontend m_frontend; pixel_block_vec m_source_blocks; @@ -533,7 +533,7 @@ namespace basisu uint8_vec m_output_basis_file; uint8_vec m_output_ktx2_file; - + basisu::vector m_uastc_slice_textures; basisu_backend_output m_uastc_backend_output; @@ -555,8 +555,8 @@ namespace basisu void get_dfd(uint8_vec& dfd, const basist::ktx2_header& hdr); bool create_ktx2_file(); }; - - // Alternative simple C-style wrapper API around the basis_compressor class. + + // Alternative simple C-style wrapper API around the basis_compressor class. // This doesn't expose every encoder feature, but it's enough to get going. // Important: basisu_encoder_init() MUST be called first before calling these functions. // @@ -565,15 +565,15 @@ namespace basisu // OR // pImageRGBA: pointer to a 32-bpp RGBx or RGBA raster image, R first in memory, A last. Top scanline first in memory. // width/height/pitch_in_pixels: dimensions of pImageRGBA - // + // // flags_and_quality: Combination of the above flags logically OR'd with the ETC1S or UASTC level, i.e. "cFlagSRGB | cFlagGenMipsClamp | cFlagThreaded | 128" or "cFlagSRGB | cFlagGenMipsClamp | cFlagUASTC | cFlagThreaded | cPackUASTCLevelDefault". // In ETC1S mode, the lower 8-bits are the ETC1S quality level which ranges from [1,255] (higher=better quality/larger files) - // In UASTC mode, the lower 8-bits are the UASTC pack level (see cPackUASTCLevelFastest, etc.). Fastest/lowest quality is 0, so be sure to set it correctly. - // + // In UASTC mode, the lower 8-bits are the UASTC pack level (see cPackUASTCLevelFastest, etc.). Fastest/lowest quality is 0, so be sure to set it correctly. + // // uastc_rdo_quality: Float UASTC RDO quality level (0=no change, higher values lower quality but increase compressibility, initially try .5-1.5) - // + // // pSize: Returns the output data's compressed size in bytes - // + // // Return value is the compressed .basis or .ktx2 file data, or nullptr on failure. Must call basis_free() to free it. enum { @@ -587,17 +587,17 @@ namespace basisu cFlagSRGB = 1 << 13, // input texture is sRGB, use perceptual colorspace metrics, also use sRGB filtering during mipmap gen, and also sets KTX2 output transfer func to sRGB cFlagGenMipsClamp = 1 << 14, // generate mipmaps with clamp addressing cFlagGenMipsWrap = 1 << 15, // generate mipmaps with wrap addressing - + cFlagYFlip = 1 << 16, // flip source image on Y axis before compression - + cFlagUASTC = 1 << 17, // use UASTC compression vs. ETC1S cFlagUASTCRDO = 1 << 18, // use RDO postprocessing when generating UASTC files (must set uastc_rdo_quality to the quality scalar) - + cFlagPrintStats = 1 << 19, // print image stats to stdout cFlagPrintStatus = 1 << 20 // print status to stdout }; - // This function accepts an array of source images. + // This function accepts an array of source images. // If more than one image is provided, it's assumed the images form a mipmap pyramid and automatic mipmap generation is disabled. // Returns a pointer to the compressed .basis or .ktx2 file data. *pSize is the size of the compressed data. The returned block must be freed using basis_free_data(). // basisu_encoder_init() MUST be called first! @@ -634,7 +634,7 @@ namespace basisu double m_basis_bits_per_texel; bool m_any_source_image_has_alpha; - parallel_results() + parallel_results() { clear(); } @@ -650,7 +650,7 @@ namespace basisu m_any_source_image_has_alpha = false; } }; - + // Compresses an array of input textures across total_threads threads using the basis_compressor class. // Compressing multiple textures at a time is substantially more efficient than just compressing one at a time. // total_threads must be >= 1. @@ -658,6 +658,6 @@ namespace basisu uint32_t total_threads, const basisu::vector ¶ms_vec, basisu::vector< parallel_results > &results_vec); - + } // namespace basisu diff --git a/encoder/basisu_enc.cpp b/encoder/basisu_enc.cpp index 99ef7ab1..9f86e3cd 100644 --- a/encoder/basisu_enc.cpp +++ b/encoder/basisu_enc.cpp @@ -63,7 +63,7 @@ namespace basisu // This is a Public Domain 8x8 font from here: // https://github.com/dhepper/font8x8/blob/master/font8x8_basic.h - const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8] = + const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8] = { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0020 ( ) { 0x18, 0x3C, 0x3C, 0x18, 0x18, 0x00, 0x18, 0x00}, // U+0021 (!) @@ -203,7 +203,7 @@ namespace basisu { char buf[8192]; -#ifdef _WIN32 +#ifdef _WIN32 vsprintf_s(buf, sizeof(buf), pFmt, args); #else vsnprintf(buf, sizeof(buf), pFmt, args); @@ -256,7 +256,7 @@ namespace basisu #else #error TODO #endif - + interval_timer::interval_timer() : m_start_time(0), m_stop_time(0), m_started(false), m_stopped(false) { if (!g_timer_freq) @@ -290,7 +290,7 @@ namespace basisu timer_ticks delta = stop_time - m_start_time; return delta * g_timer_freq; } - + void interval_timer::init() { if (!g_timer_freq) @@ -316,21 +316,21 @@ namespace basisu init(); return ticks * g_timer_freq; } - + const uint32_t MAX_32BIT_ALLOC_SIZE = 250000000; - + bool load_tga(const char* pFilename, image& img) { int w = 0, h = 0, n_chans = 0; uint8_t* pImage_data = read_tga(pFilename, w, h, n_chans); - + if ((!pImage_data) || (!w) || (!h) || ((n_chans != 3) && (n_chans != 4))) { error_printf("Failed loading .TGA image \"%s\"!\n", pFilename); if (pImage_data) free(pImage_data); - + return false; } @@ -346,7 +346,7 @@ namespace basisu return false; } } - + img.resize(w, h); const uint8_t *pSrc = pImage_data; @@ -375,7 +375,7 @@ namespace basisu { interval_timer tm; tm.start(); - + if (!buf_size) return false; @@ -393,7 +393,7 @@ namespace basisu return true; } - + bool load_png(const char* pFilename, image& img) { uint8_vec buffer; @@ -412,9 +412,9 @@ namespace basisu uint8_t *pImage_data = jpgd::decompress_jpeg_image_from_file(pFilename, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering); if (!pImage_data) return false; - + img.init(pImage_data, width, height, 4); - + free(pImage_data); return true; @@ -438,12 +438,12 @@ namespace basisu return false; } - + bool save_png(const char* pFilename, const image &img, uint32_t image_save_flags, uint32_t grayscale_comp) { if (!img.get_total_pixels()) return false; - + void* pPNG_data = nullptr; size_t PNG_data_size = 0; @@ -461,7 +461,7 @@ namespace basisu else { bool has_alpha = false; - + if ((image_save_flags & cImageSaveIgnoreAlpha) == 0) has_alpha = img.has_alpha(); @@ -478,7 +478,7 @@ namespace basisu pDst[0] = pSrc->r; pDst[1] = pSrc->g; pDst[2] = pSrc->b; - + pSrc++; pDst += 3; } @@ -502,10 +502,10 @@ namespace basisu } free(pPNG_data); - + return status; } - + bool read_file_to_vec(const char* pFilename, uint8_vec& data) { FILE* pFile = nullptr; @@ -516,7 +516,7 @@ namespace basisu #endif if (!pFile) return false; - + fseek(pFile, 0, SEEK_END); #ifdef _WIN32 int64_t filesize = _ftelli64(pFile); @@ -601,17 +601,17 @@ namespace basisu } bool image_resample(const image &src, image &dst, bool srgb, - const char *pFilter, float filter_scale, + const char *pFilter, float filter_scale, bool wrapping, uint32_t first_comp, uint32_t num_comps) { assert((first_comp + num_comps) <= 4); const int cMaxComps = 4; - + const uint32_t src_w = src.get_width(), src_h = src.get_height(); const uint32_t dst_w = dst.get_width(), dst_h = dst.get_height(); - + if (maximum(src_w, src_h) > BASISU_RESAMPLER_MAX_DIMENSION) { printf("Image is too large!\n"); @@ -620,10 +620,10 @@ namespace basisu if (!src_w || !src_h || !dst_w || !dst_h) return false; - + if ((num_comps < 1) || (num_comps > cMaxComps)) return false; - + if ((minimum(dst_w, dst_h) < 1) || (maximum(dst_w, dst_h) > BASISU_RESAMPLER_MAX_DIMENSION)) { printf("Image is too large!\n"); @@ -654,7 +654,7 @@ namespace basisu std::vector samples[cMaxComps]; Resampler *resamplers[cMaxComps]; - + resamplers[0] = new Resampler(src_w, src_h, dst_w, dst_h, wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, pFilter, nullptr, nullptr, filter_scale, filter_scale, 0, 0); @@ -714,7 +714,7 @@ namespace basisu break; const bool linear_flag = !srgb || (comp_index == 3); - + color_rgba *pDst = &dst(0, dst_y); for (uint32_t x = 0; x < dst_w; x++) @@ -758,9 +758,9 @@ namespace basisu A[0].m_key = 1; return; } - + A[0].m_key += A[1].m_key; - + int s = 2, r = 0, next; for (next = 1; next < (num_syms - 1); ++next) { @@ -852,7 +852,7 @@ namespace basisu for (i = 0; i < num_syms; i++) { uint32_t freq = pSyms0[i].m_key; - + // We scale all input frequencies to 16-bits. assert(freq <= UINT16_MAX); @@ -1043,7 +1043,7 @@ namespace basisu uint32_t total_used = tab.get_total_used_codes(); put_bits(total_used, cHuffmanMaxSymsLog2); - + if (!total_used) return 0; @@ -1107,7 +1107,7 @@ namespace basisu const uint32_t l = syms[i] & 63, e = syms[i] >> 6; put_code(l, ct); - + if (l == cHuffmanSmallZeroRunCode) put_bits(e, cHuffmanSmallZeroRunExtraBits); else if (l == cHuffmanBigZeroRunCode) @@ -1134,7 +1134,7 @@ namespace basisu huffman_encoding_table etab; etab.init(h, 16); - + { bitwise_coder c; c.init(1024); @@ -1269,9 +1269,9 @@ namespace basisu // We now have chosen an entry to place in the picked list, now determine which side it goes on. const uint32_t entry_to_move = m_entries_to_do[best_entry]; - + float side = pick_side(num_syms, entry_to_move, pDist_func, pCtx, dist_func_weight); - + // Put entry_to_move either on the "left" or "right" side of the picked entries if (side <= 0) m_entries_picked.push_back(entry_to_move); @@ -1451,7 +1451,7 @@ namespace basisu uint32_t hash_hsieh(const uint8_t *pBuf, size_t len) { - if (!pBuf || !len) + if (!pBuf || !len) return 0; uint32_t h = static_cast(len); @@ -1464,23 +1464,23 @@ namespace basisu const uint16_t *pWords = reinterpret_cast(pBuf); h += pWords[0]; - + const uint32_t t = (pWords[1] << 11) ^ h; h = (h << 16) ^ t; - + pBuf += sizeof(uint32_t); - + h += h >> 11; } switch (bytes_left) { - case 1: + case 1: h += *reinterpret_cast(pBuf); h ^= h << 10; h += h >> 1; break; - case 2: + case 2: h += *reinterpret_cast(pBuf); h ^= h << 11; h += h >> 17; @@ -1494,7 +1494,7 @@ namespace basisu default: break; } - + h ^= h << 3; h += h >> 5; h ^= h << 4; @@ -1505,7 +1505,7 @@ namespace basisu return h; } - job_pool::job_pool(uint32_t num_threads) : + job_pool::job_pool(uint32_t num_threads) : m_num_active_jobs(0), m_kill_flag(false) { @@ -1525,17 +1525,17 @@ namespace basisu job_pool::~job_pool() { debug_printf("job_pool::~job_pool\n"); - + // Notify all workers that they need to die right now. m_kill_flag = true; - + m_has_work.notify_all(); // Wait for all workers to die. for (uint32_t i = 0; i < m_threads.size(); i++) m_threads[i].join(); } - + void job_pool::add_job(const std::function& job) { std::unique_lock lock(m_mutex); @@ -1555,7 +1555,7 @@ namespace basisu std::unique_lock lock(m_mutex); m_queue.emplace_back(std::move(job)); - + const size_t queue_size = m_queue.size(); lock.unlock(); @@ -1591,7 +1591,7 @@ namespace basisu { BASISU_NOTE_UNUSED(index); //debug_printf("job_pool::job_thread: starting %u\n", index); - + while (true) { std::unique_lock lock(m_mutex); @@ -1617,9 +1617,9 @@ namespace basisu --m_num_active_jobs; - // Now check if there are no more jobs remaining. + // Now check if there are no more jobs remaining. const bool all_done = m_queue.empty() && !m_num_active_jobs; - + lock.unlock(); if (all_done) @@ -1678,7 +1678,7 @@ namespace basisu // Simple validation if ((hdr.m_cmap != 0) && (hdr.m_cmap != 1)) return nullptr; - + if (hdr.m_cmap) { if ((hdr.m_cmap_bpp == 0) || (hdr.m_cmap_bpp > 32)) @@ -1837,13 +1837,13 @@ namespace basisu bytes_remaining += bytes_to_skip; } } - + width = hdr.m_width; height = hdr.m_height; const uint32_t source_pitch = width * tga_bytes_per_pixel; const uint32_t dest_pitch = width * n_chans; - + uint8_t *pImage = (uint8_t *)malloc(dest_pitch * height); if (!pImage) return nullptr; @@ -1865,7 +1865,7 @@ namespace basisu int pixels_remaining = width; uint8_t *pDst = &input_line_buf[0]; - do + do { if (!run_remaining) { @@ -2050,7 +2050,7 @@ namespace basisu if (!filedata.size() || (filedata.size() > UINT32_MAX)) return nullptr; - + return read_tga(&filedata[0], (uint32_t)filedata.size(), width, height, n_chans); } @@ -2060,7 +2060,7 @@ namespace basisu va_list args; va_start(args, pFmt); -#ifdef _WIN32 +#ifdef _WIN32 vsprintf_s(buf, sizeof(buf), pFmt, args); #else vsnprintf(buf, sizeof(buf), pFmt, args); @@ -2085,7 +2085,7 @@ namespace basisu for (uint32_t x = 0; x < 8; x++) { const uint32_t q = row_bits & (1 << x); - + const color_rgba* pColor = q ? &fg : pBG; if (!pColor) continue; @@ -2105,5 +2105,5 @@ namespace basisu } } } - + } // namespace basisu diff --git a/encoder/basisu_enc.h b/encoder/basisu_enc.h index 0efeaa46..bf9e02e9 100644 --- a/encoder/basisu_enc.h +++ b/encoder/basisu_enc.h @@ -62,7 +62,7 @@ namespace basisu void error_vprintf(const char* pFmt, va_list args); void error_printf(const char *pFmt, ...); - + // Helpers inline uint8_t clamp255(int32_t i) @@ -70,18 +70,18 @@ namespace basisu return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } - inline int32_t clampi(int32_t value, int32_t low, int32_t high) - { - if (value < low) - value = low; - else if (value > high) - value = high; - return value; + inline int32_t clampi(int32_t value, int32_t low, int32_t high) + { + if (value < low) + value = low; + else if (value > high) + value = high; + return value; } inline uint8_t mul_8(uint32_t v, uint32_t a) { - v = v * a + 128; + v = v * a + 128; return (uint8_t)((v + (v >> 8)) >> 8); } @@ -130,10 +130,10 @@ namespace basisu return bits; } - + // Hashing - - inline uint32_t bitmix32c(uint32_t v) + + inline uint32_t bitmix32c(uint32_t v) { v = (v + 0x7ed55d16) + (v << 12); v = (v ^ 0xc761c23c) ^ (v >> 19); @@ -144,7 +144,7 @@ namespace basisu return v; } - inline uint32_t bitmix32(uint32_t v) + inline uint32_t bitmix32(uint32_t v) { v -= (v << 6); v ^= (v >> 17); @@ -361,7 +361,7 @@ namespace basisu inline const T *get_ptr() const { return reinterpret_cast(&m_v[0]); } inline T *get_ptr() { return reinterpret_cast(&m_v[0]); } - + inline vec operator- () const { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = -m_v[i]; return res; } inline vec operator+ () const { return *this; } inline vec &operator+= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] += other.m_v[i]; return *this; } @@ -370,14 +370,14 @@ namespace basisu inline vec &operator*=(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] *= other.m_v[i]; return *this; } inline vec &operator/= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] /= s; return *this; } inline vec &operator*= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] *= s; return *this; } - + friend inline vec operator+(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] + rhs.m_v[i]; return res; } friend inline vec operator-(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] - rhs.m_v[i]; return res; } friend inline vec operator*(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] * val; return res; } friend inline vec operator*(T val, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = val * rhs.m_v[i]; return res; } friend inline vec operator/(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / val; return res; } friend inline vec operator/(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / rhs.m_v[i]; return res; } - + static inline T dot_product(const vec &lhs, const vec &rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < N; i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; } inline T dot(const vec &rhs) const { return dot_product(*this, rhs); } @@ -428,7 +428,7 @@ namespace basisu typedef vec<1, float> vec1F; typedef vec<16, float> vec16F; - + template class matrix { @@ -689,7 +689,7 @@ namespace basisu } #undef BASISU_GET_KEY - + // Very simple job pool with no dependencies. class job_pool { @@ -699,24 +699,24 @@ namespace basisu // num_threads is the TOTAL number of job pool threads, including the calling thread! So 2=1 new thread, 3=2 new threads, etc. job_pool(uint32_t num_threads); ~job_pool(); - + void add_job(const std::function& job); void add_job(std::function&& job); void wait_for_all(); size_t get_total_threads() const { return 1 + m_threads.size(); } - + private: std::vector m_threads; std::vector > m_queue; - + std::mutex m_mutex; std::condition_variable m_has_work; std::condition_variable m_no_more_jobs; - + uint32_t m_num_active_jobs; - + std::atomic m_kill_flag; void job_thread(uint32_t index); @@ -759,7 +759,7 @@ namespace basisu return *this; } }; - + class color_rgba { public: @@ -883,7 +883,7 @@ namespace basisu inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; } inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; } - + inline void clear() { m_comps[0] = 0; @@ -919,7 +919,7 @@ namespace basisu } inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; } - inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } + inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); } inline basist::color32 get_color32() const @@ -956,7 +956,7 @@ namespace basisu else return color_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b); } - + // TODO: Allow user to control channel weightings. inline uint32_t color_distance(bool perceptual, const color_rgba &e1, const color_rgba &e2, bool alpha) { @@ -977,7 +977,7 @@ namespace basisu const float dcb = cb1 - cb2; uint32_t d = static_cast(32.0f*4.0f*dl*dl + 32.0f*2.0f*(.5f / (1.0f - .2126f))*(.5f / (1.0f - .2126f))*dcr*dcr + 32.0f*.25f*(.5f / (1.0f - .0722f))*(.5f / (1.0f - .0722f))*dcb*dcb); - + if (alpha) { int da = static_cast(e1.a) - static_cast(e2.a); @@ -994,7 +994,7 @@ namespace basisu int delta_l = dr * 27 + dg * 92 + db * 9; int delta_cr = dr * 128 - delta_l; int delta_cb = db * 128 - delta_l; - + uint32_t id = ((uint32_t)(delta_l * delta_l) >> 7U) + ((((uint32_t)(delta_cr * delta_cr) >> 7U) * 26U) >> 7U) + ((((uint32_t)(delta_cb * delta_cb) >> 7U) * 3U) >> 7U); @@ -1102,11 +1102,11 @@ namespace basisu va_list args; va_start(args, pFmt); -#ifdef _WIN32 +#ifdef _WIN32 vsprintf_s(buf, sizeof(buf), pFmt, args); #else vsnprintf(buf, sizeof(buf), pFmt, args); -#endif +#endif va_end(args); return std::string(buf); @@ -1154,7 +1154,7 @@ namespace basisu char fname_buf[_MAX_FNAME] = { 0 }; char ext_buf[_MAX_EXT] = { 0 }; - errno_t error = _splitpath_s(p, + errno_t error = _splitpath_s(p, pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0, pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0, pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0, @@ -1186,7 +1186,7 @@ namespace basisu if ((pDir->size()) && (pDir->back() != '/')) *pDir += "/"; } - + if (pFilename) { *pFilename = pBaseName; @@ -1213,7 +1213,7 @@ namespace basisu return (c == '/'); #endif } - + inline bool is_drive_separator(char c) { #ifdef _WIN32 @@ -1241,7 +1241,7 @@ namespace basisu string_combine_path(dst, p, q); string_combine_path(dst, dst.c_str(), r); } - + inline void string_combine_path_and_extension(std::string &dst, const char *p, const char *q, const char *r, const char *pExt) { string_combine_path(dst, p, q, r); @@ -1443,7 +1443,7 @@ namespace basisu codebook.resize(0); codebook.reserve(max_clusters); - + uint32_t node_index = 0; while (true) @@ -1454,7 +1454,7 @@ namespace basisu { codebook.resize(codebook.size() + 1); codebook.back() = cur.m_training_vecs; - + if (node_stack.empty()) break; @@ -1462,7 +1462,7 @@ namespace basisu node_stack.pop_back(); continue; } - + node_stack.push_back(cur.m_right_index); node_index = cur.m_left_index; } @@ -1503,7 +1503,7 @@ namespace basisu assert(node.is_leaf()); var_heap.delete_top(); - + if (node.m_training_vecs.size() > 1) { if (split_node(node_index, var_heap, l_children, r_children)) @@ -1592,7 +1592,7 @@ namespace basisu m_nodes[node_index].m_left_index = l_child_index; m_nodes[node_index].m_right_index = r_child_index; - + m_nodes[node_index].m_codebook_index = m_next_codebook_index; m_next_codebook_index++; @@ -1606,7 +1606,7 @@ namespace basisu if ((l_child.m_var <= 0.0f) && (l_child.m_training_vecs.size() > 1)) { TrainingVectorType v(m_training_vecs[l_child.m_training_vecs[0]].first); - + for (uint32_t i = 1; i < l_child.m_training_vecs.size(); i++) { if (!(v == m_training_vecs[l_child.m_training_vecs[i]].first)) @@ -1633,10 +1633,10 @@ namespace basisu if ((l_child.m_var > 0.0f) && (l_child.m_training_vecs.size() > 1)) var_heap.add_heap(l_child_index, l_child.m_var); - + if ((r_child.m_var > 0.0f) && (r_child.m_training_vecs.size() > 1)) var_heap.add_heap(r_child_index, r_child.m_var); - + return true; } @@ -1732,7 +1732,7 @@ namespace basisu for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) { const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; - + l = TrainingVectorType::component_min(l, v); h = TrainingVectorType::component_max(h, v); } @@ -1813,8 +1813,8 @@ namespace basisu const uint32_t cMaxIters = 6; for (uint32_t iter = 0; iter < cMaxIters; iter++) { - l_children.resize(0); - r_children.resize(0); + l_children.resize(0); + r_children.resize(0); TrainingVectorType new_l_child(cZero), new_r_child(cZero); @@ -1866,7 +1866,7 @@ namespace basisu { const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second; - + if ((!i) || (v == firstVec)) { firstVec = v; @@ -1968,7 +1968,7 @@ namespace basisu } Quantizer quantizers[cMaxThreads]; - + bool success_flags[cMaxThreads]; clear_obj(success_flags); @@ -2070,10 +2070,10 @@ namespace basisu bool even_odd_input_pairs_equal) { typedef bit_hasher training_vec_bit_hasher; - - typedef std::unordered_map < typename Quantizer::training_vec_type, weighted_block_group, + + typedef std::unordered_map < typename Quantizer::training_vec_type, weighted_block_group, training_vec_bit_hasher> group_hash; - + //interval_timer tm; //tm.start(); @@ -2082,7 +2082,7 @@ namespace basisu unique_vecs.reserve(20000); weighted_block_group g; - + if (even_odd_input_pairs_equal) { g.m_indices.resize(2); @@ -2167,7 +2167,7 @@ namespace basisu typename group_hash::const_iterator group_iter = unique_vec_iters[group_index]; const uint_vec& training_vec_indices = group_iter->second.m_indices; - + append_vector(codebook.back(), training_vec_indices); } } @@ -2244,7 +2244,7 @@ namespace basisu const double inv_total = 1.0f / total; const double neg_inv_log2 = -1.0f / log(2.0f); - + double e = 0.0f; for (uint32_t i = 0; i < m_hist.size(); i++) if (m_hist[i]) @@ -2253,7 +2253,7 @@ namespace basisu return e; } }; - + struct sym_freq { uint32_t m_key; @@ -2263,7 +2263,7 @@ namespace basisu sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1); void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms); void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size); - + class huffman_encoding_table { public: @@ -2284,7 +2284,7 @@ namespace basisu bool init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size); bool init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size); - + inline const uint16_vec &get_codes() const { return m_codes; } inline const uint8_vec &get_code_sizes() const { return m_code_sizes; } @@ -2343,7 +2343,7 @@ namespace basisu m_bit_buffer = 0; m_bit_buffer_size = 0; - + return 8; } @@ -2392,7 +2392,7 @@ namespace basisu if (v < u) return put_bits(v, k); - + uint32_t x = v + u; assert((x >> 1) >= u); @@ -2404,20 +2404,20 @@ namespace basisu inline uint32_t put_rice(uint32_t v, uint32_t m) { assert(m); - + const uint64_t start_bits = m_total_bits; uint32_t q = v >> m, r = v & ((1 << m) - 1); // rice coding sanity check assert(q <= 64); - + for (; q > 16; q -= 16) put_bits(0xFFFF, 16); put_bits((1 << q) - 1, q); put_bits(r << 1, m + 1); - + return (uint32_t)(m_total_bits - start_bits); } @@ -2427,13 +2427,13 @@ namespace basisu const uint32_t chunk_size = 1 << chunk_bits; const uint32_t chunk_mask = chunk_size - 1; - + uint32_t total_bits = 0; for ( ; ; ) { uint32_t next_v = v >> chunk_bits; - + total_bits += put_bits((v & chunk_mask) | (next_v ? chunk_size : 0), chunk_bits + 1); if (!next_v) break; @@ -2445,7 +2445,7 @@ namespace basisu } uint32_t emit_huffman_table(const huffman_encoding_table &tab); - + private: uint8_vec m_bytes; uint32_t m_bit_buffer, m_bit_buffer_size; @@ -2473,7 +2473,7 @@ namespace basisu inline void init(uint32_t bits_per_sym, uint32_t total_syms_per_group) { assert((bits_per_sym * total_syms_per_group) <= 16 && total_syms_per_group >= 1 && bits_per_sym >= 1); - + m_bits_per_sym = bits_per_sym; m_total_syms_per_group = total_syms_per_group; m_cur_sym_bits = 0; @@ -2527,7 +2527,7 @@ namespace basisu return true; } - + inline uint32_t emit_next_sym(bitwise_coder &c) { uint32_t bits = 0; @@ -2557,7 +2557,7 @@ namespace basisu bool huffman_test(int rand_seed); // VQ index reordering - + class palette_index_reorderer { public: @@ -2578,7 +2578,7 @@ namespace basisu typedef float(*pEntry_dist_func)(uint32_t i, uint32_t j, void *pCtx); void init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); - + // Table remaps old to new symbol indices inline const uint_vec &get_remap_table() const { return m_remap_table; } @@ -2599,12 +2599,12 @@ namespace basisu class image { public: - image() : + image() : m_width(0), m_height(0), m_pitch(0) { } - image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : + image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : m_width(0), m_height(0), m_pitch(0) { resize(w, h, p); @@ -2645,7 +2645,7 @@ namespace basisu image &clear() { - m_width = 0; + m_width = 0; m_height = 0; m_pitch = 0; clear_vector(m_pixels); @@ -2667,7 +2667,7 @@ namespace basisu void init(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) { assert(comps >= 1 && comps <= 4); - + resize(width, height); for (uint32_t y = 0; y < height; y++) @@ -2753,7 +2753,7 @@ namespace basisu p = w; clear(); - + if ((!p) || (!w) || (!h)) return *this; @@ -2832,8 +2832,8 @@ namespace basisu y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); return m_pixels[x + y * m_pitch]; } - - inline image &set_clipped(int x, int y, const color_rgba &c) + + inline image &set_clipped(int x, int y, const color_rgba &c) { if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) (*this)(x, y) = c; @@ -2989,7 +2989,7 @@ namespace basisu } void debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t x_scale, uint32_t y_scale, const color_rgba &fg, const color_rgba *pBG, bool alpha_only, const char* p, ...); - + private: uint32_t m_width, m_height, m_pitch; // all in pixels color_rgba_vec m_pixels; @@ -3002,12 +3002,12 @@ namespace basisu class imagef { public: - imagef() : + imagef() : m_width(0), m_height(0), m_pitch(0) { } - imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : + imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : m_width(0), m_height(0), m_pitch(0) { resize(w, h, p); @@ -3042,7 +3042,7 @@ namespace basisu imagef &clear() { - m_width = 0; + m_width = 0; m_height = 0; m_pitch = 0; clear_vector(m_pixels); @@ -3092,7 +3092,7 @@ namespace basisu set_clipped(x + ix, y + iy, c); return *this; } - + imagef &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F &background = vec4F(0,0,0,1)) { if (p == UINT32_MAX) @@ -3111,7 +3111,7 @@ namespace basisu cur_state.swap(m_pixels); m_pixels.resize(p * h); - + for (uint32_t y = 0; y < h; y++) { for (uint32_t x = 0; x < w; x++) @@ -3149,8 +3149,8 @@ namespace basisu y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); return m_pixels[x + y * m_pitch]; } - - inline imagef &set_clipped(int x, int y, const vec4F &c) + + inline imagef &set_clipped(int x, int y, const vec4F &c) { if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) (*this)(x, y) = c; @@ -3213,14 +3213,14 @@ namespace basisu inline const vec4F *get_ptr() const { return &m_pixels[0]; } inline vec4F *get_ptr() { return &m_pixels[0]; } - + private: uint32_t m_width, m_height, m_pitch; // all in pixels vec4F_vec m_pixels; }; // Image metrics - + class image_metrics { public: @@ -3258,14 +3258,14 @@ namespace basisu bool load_jpg(const char *pFilename, image& img); inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); } - + // Currently loads .PNG, .TGA, or .JPG bool load_image(const char* pFilename, image& img); inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); } uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans); uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans); - + enum { cImageSaveGrayscale = 1, @@ -3274,23 +3274,23 @@ namespace basisu bool save_png(const char* pFilename, const image& img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0); inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); } - + bool read_file_to_vec(const char* pFilename, uint8_vec& data); - + bool write_data_to_file(const char* pFilename, const void* pData, size_t len); - + inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) { return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "", 0); } float linear_to_srgb(float l); float srgb_to_linear(float s); bool image_resample(const image &src, image &dst, bool srgb = false, - const char *pFilter = "lanczos4", float filter_scale = 1.0f, + const char *pFilter = "lanczos4", float filter_scale = 1.0f, bool wrapping = false, uint32_t first_comp = 0, uint32_t num_comps = 4); // Timing - + typedef uint64_t timer_ticks; class interval_timer @@ -3303,7 +3303,7 @@ namespace basisu double get_elapsed_secs() const; inline double get_elapsed_ms() const { return 1000.0f* get_elapsed_secs(); } - + static void init(); static inline timer_ticks get_ticks_per_sec() { return g_freq; } static timer_ticks get_ticks(); @@ -3371,8 +3371,8 @@ namespace basisu inline const T &operator[] (uint32_t i) const { return m_values[i]; } inline T &operator[] (uint32_t i) { return m_values[i]; } - - inline const T &at_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width), clamp(y, 0, m_height)); } + + inline const T &at_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width), clamp(y, 0, m_height)); } inline T &at_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width), clamp(y, 0, m_height)); } void clear() @@ -3450,7 +3450,7 @@ namespace basisu } }; typedef basisu::vector pixel_block_vec; - + } // namespace basisu diff --git a/encoder/basisu_etc.cpp b/encoder/basisu_etc.cpp index f8bd0f12..02ab0423 100644 --- a/encoder/basisu_etc.cpp +++ b/encoder/basisu_etc.cpp @@ -39,7 +39,7 @@ namespace basisu { -16,-48,-64,-80,8,40,56,72 }, { -16,-40,-64,-80,8,32,56,72 }, { -16,-32,-64,-80,8,24,56,72 }, { -16,-40,-56,-80,8,32,48,72 }, { -24,-32,-56,-80,16,24,48,72 }, { -8,-16,-24,-80,0,8,16,72 }, { -32,-48,-64,-72,24,40,56,64 }, { -24,-40,-56,-72,16,32,48,64 } }; - + // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte. static uint16_t g_etc1_inverse_lookup[2 * 8 * 4][256]; // [ diff/inten_table/selector][desired_color ] @@ -113,7 +113,7 @@ namespace basisu static uint32_t etc1_decode_value(uint32_t diff, uint32_t inten, uint32_t selector, uint32_t packed_c) { - const uint32_t limit = diff ? 32 : 16; + const uint32_t limit = diff ? 32 : 16; BASISU_NOTE_UNUSED(limit); assert((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < limit)); int c; @@ -261,7 +261,7 @@ namespace basisu return best_error; } - + const uint32_t BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE = 165; static const struct { uint8_t m_v[4]; } g_cluster_fit_order_tab[BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE] = @@ -300,7 +300,7 @@ namespace basisu { { 2, 1, 2, 3 } },{ { 4, 1, 0, 3 } },{ { 3, 1, 1, 3 } },{ { 1, 1, 2, 4 } },{ { 2, 1, 0, 5 } }, { { 1, 0, 1, 6 } },{ { 0, 2, 1, 5 } },{ { 0, 2, 0, 6 } },{ { 1, 1, 1, 5 } },{ { 1, 1, 0, 6 } } }; - + const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = { { -8, -2, 2, 8 }, { -17, -5, 5, 17 }, { -29, -9, 9, 29 }, { -42, -13, 13, 42 }, @@ -600,7 +600,7 @@ namespace basisu const int y3 = pInten_modifer_table[3]; pDst[3].set(ir + y3, ig + y3, ib + y3, 255); } - + bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha) { const bool diff_flag = block.get_diff_bit(); @@ -723,7 +723,7 @@ namespace basisu { return (n << 4) | n; } - + uint64_t etc_block::evaluate_etc1_error(const color_rgba* pBlock_pixels, bool perceptual, int subblock_index) const { color_rgba unpacked_block[16]; @@ -772,7 +772,7 @@ namespace basisu } } } - + bool etc1_optimizer::compute() { assert(m_pResult->m_pSelectors); @@ -817,19 +817,19 @@ namespace basisu const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; uint64_t actual_error = 0; - + bool perceptual; if (m_pParams->m_quality >= cETCQualityMedium) perceptual = m_pParams->m_perceptual; else perceptual = (m_pParams->m_quality == cETCQualityFast) ? false : m_pParams->m_perceptual; - + for (uint32_t i = 0; i < n; i++) actual_error += color_distance(perceptual, pSrc_pixels[i], block_colors[pSelectors[i]], false); assert(actual_error == m_best_solution.m_error); } -#endif +#endif m_pResult->m_error = m_best_solution.m_error; @@ -1014,10 +1014,10 @@ namespace basisu m_luma.resize(n); m_sorted_luma_indices.resize(n); m_sorted_luma.resize(n); - + int min_r = 255, min_g = 255, min_b = 255; int max_r = 0, max_g = 0, max_b = 0; - + for (uint32_t i = 0; i < n; i++) { const color_rgba& c = m_pParams->m_pSrc_pixels[i]; @@ -1055,7 +1055,7 @@ namespace basisu m_pSorted_luma = &m_sorted_luma[0]; m_pSorted_luma_indices = &m_sorted_luma_indices[0]; - + for (uint32_t i = 0; i < n; i++) m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]]; } @@ -1086,7 +1086,7 @@ namespace basisu return true; } - + static uint8_t g_eval_dist_tables[8][256] = { // 99% threshold @@ -1255,7 +1255,7 @@ namespace basisu } trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; - + #if BASISU_DEBUG_ETC_ENCODER_DEEPER printf("Eval done: %u error: %I64u best error so far: %I64u\n", (trial_solution.m_error < pBest_solution->m_error), trial_solution.m_error, pBest_solution->m_error); #endif @@ -1269,7 +1269,7 @@ namespace basisu success = true; } } - + return success; } @@ -1300,14 +1300,14 @@ namespace basisu } const color_rgba base_color(coords.get_scaled_color()); - + const uint32_t n = m_pParams->m_num_src_pixels; assert(trial_solution.m_selectors.size() == n); trial_solution.m_error = UINT64_MAX; - + const bool perceptual = (m_pParams->m_quality == cETCQualityFast) ? false : m_pParams->m_perceptual; - + for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) { const int* pInten_table = g_etc1_inten_tables[inten_table]; @@ -1327,10 +1327,10 @@ namespace basisu // 0 1 2 3 // 01 12 23 const uint32_t block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] }; - + uint64_t total_error = 0; const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; - + if (perceptual) { if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) diff --git a/encoder/basisu_etc.h b/encoder/basisu_etc.h index 208f2aac..f987b69d 100644 --- a/encoder/basisu_etc.h +++ b/encoder/basisu_etc.h @@ -76,7 +76,7 @@ namespace basisu // 000 001 010 011 100 101 110 111 // 0 1 2 3 -4 -3 -2 -1 }; - + extern const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues]; extern const uint8_t g_etc1_to_selector_index[cETC1SelectorValues]; extern const uint8_t g_selector_index_to_etc1[cETC1SelectorValues]; @@ -92,7 +92,7 @@ namespace basisu { // big endian uint64: // bit ofs: 56 48 40 32 24 16 8 0 - // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 + // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 union { uint64_t m_uint64; @@ -275,7 +275,7 @@ namespace basisu const uint32_t byte_bit_ofs = bit_index & 7; const uint32_t mask = 1 << byte_bit_ofs; - + const uint32_t lsb = etc1_val & 1; const uint32_t msb = etc1_val >> 1; @@ -510,7 +510,7 @@ namespace basisu b.r = (base5_color.r << 3U) | (base5_color.r >> 2U); b.g = (base5_color.g << 3U) | (base5_color.g >> 2U); b.b = (base5_color.b << 3U) | (base5_color.b >> 2U); - + const int* pInten_table = g_etc1_inten_tables[inten_table]; pBlock_colors[0].set(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255); @@ -646,7 +646,7 @@ namespace basisu void set_block_color5_etc1s(const color_rgba &c_unscaled) { set_diff_bit(true); - + set_base5_color(pack_color5(c_unscaled, false)); set_delta3_color(pack_delta3(0, 0, 0)); } @@ -679,11 +679,11 @@ namespace basisu int dr = c1_unscaled.r - c0_unscaled.r; int dg = c1_unscaled.g - c0_unscaled.g; int db = c1_unscaled.b - c0_unscaled.b; - + dr = clamp(dr, cETC1ColorDeltaMin, cETC1ColorDeltaMax); dg = clamp(dg, cETC1ColorDeltaMin, cETC1ColorDeltaMax); db = clamp(db, cETC1ColorDeltaMin, cETC1ColorDeltaMax); - + set_delta3_color(pack_delta3(dr, dg, db)); return true; @@ -785,12 +785,12 @@ namespace basisu return static_cast(x); } }; - + typedef basisu::vector etc_block_vec; // Returns false if the unpack fails (could be bogus data or ETC2) bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha = false); - + enum basis_etc_quality { cETCQualityFast, @@ -1077,13 +1077,13 @@ namespace basisu enum { cSolutionsTriedHashBits = 10, cTotalSolutionsTriedHashSize = 1 << cSolutionsTriedHashBits, cSolutionsTriedHashMask = cTotalSolutionsTriedHashSize - 1 }; uint8_t m_solutions_tried[cTotalSolutionsTriedHashSize / 8]; - + void get_nearby_inten_tables(uint32_t idx, int &first_inten_table, int &last_inten_table) { first_inten_table = maximum(idx - 1, 0); last_inten_table = minimum(cETC1IntenModifierValues, idx + 1); } - + bool check_for_redundant_solution(const etc1_solution_coordinates& coords); bool evaluate_solution_slow(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); @@ -1105,7 +1105,7 @@ namespace basisu { etc1_optimizer m_optimizer; }; - + void pack_etc1_solid_color_init(); uint64_t pack_etc1_block_solid_color(etc_block& block, const uint8_t* pColor); @@ -1177,5 +1177,5 @@ namespace basisu uint64_t pack_eac_a8(pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask = UINT32_MAX); void pack_eac_a8(eac_a8_block* pBlock, const uint8_t* pPixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask = UINT32_MAX); - + } // namespace basisu diff --git a/encoder/basisu_frontend.cpp b/encoder/basisu_frontend.cpp index 1f30a33c..8c2f6969 100644 --- a/encoder/basisu_frontend.cpp +++ b/encoder/basisu_frontend.cpp @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. // -// TODO: +// TODO: // This code originally supported full ETC1 and ETC1S, so there's some legacy stuff to be cleaned up in here. // Add endpoint tiling support (where we force adjacent blocks to use the same endpoints during quantization), for a ~10% or more increase in bitrate at same SSIM. The backend already supports this. // @@ -40,20 +40,20 @@ namespace basisu const uint32_t BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE = 16; const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 = 32; const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT = 16; - + // TODO - How to handle internal verifies in the basisu lib static inline void handle_verify_failure(int line) { error_printf("basisu_frontend: verify check failed at line %i!\n", line); abort(); } - + bool basisu_frontend::init(const params &p) { debug_printf("basisu_frontend::init: Multithreaded: %u, Job pool total threads: %u, NumEndpointClusters: %u, NumSelectorClusters: %u, Perceptual: %u, CompressionLevel: %u\n", p.m_multithreaded, p.m_pJob_pool ? p.m_pJob_pool->get_total_threads() : 0, p.m_max_endpoint_clusters, p.m_max_selector_clusters, p.m_perceptual, p.m_compression_level); - + if ((p.m_max_endpoint_clusters < 1) || (p.m_max_endpoint_clusters > cMaxEndpointClusters)) return false; if ((p.m_max_selector_clusters < 1) || (p.m_max_selector_clusters > cMaxSelectorClusters)) @@ -61,9 +61,9 @@ namespace basisu m_source_blocks.resize(0); append_vector(m_source_blocks, p.m_pSource_blocks, p.m_num_source_blocks); - + m_params = p; - + if (m_params.m_pOpenCL_context) { BASISU_ASSUME(sizeof(cl_pixel_block) == sizeof(pixel_block)); @@ -80,7 +80,7 @@ namespace basisu m_encoded_blocks.resize(m_params.m_num_source_blocks); memset(&m_encoded_blocks[0], 0, m_encoded_blocks.size() * sizeof(m_encoded_blocks[0])); - + m_num_endpoint_codebook_iterations = 1; m_num_selector_codebook_iterations = 1; @@ -150,7 +150,7 @@ namespace basisu if (m_params.m_disable_hierarchical_endpoint_codebooks) m_use_hierarchical_endpoint_codebooks = false; - debug_printf("Endpoint refinement: %u, Hierarchical endpoint codebooks: %u, Hierarchical selector codebooks: %u, Endpoint codebook iters: %u, Selector codebook iters: %u\n", + debug_printf("Endpoint refinement: %u, Hierarchical endpoint codebooks: %u, Hierarchical selector codebooks: %u, Endpoint codebook iters: %u, Selector codebook iters: %u\n", m_endpoint_refinement, m_use_hierarchical_endpoint_codebooks, m_use_hierarchical_selector_codebooks, m_num_endpoint_codebook_iterations, m_num_selector_codebook_iterations); return true; @@ -238,7 +238,7 @@ namespace basisu { BASISU_FRONTEND_VERIFY(validate_endpoint_cluster_hierarchy(false)); } - + eliminate_redundant_or_empty_endpoint_clusters(); if (m_params.m_validate) @@ -252,7 +252,7 @@ namespace basisu if (early_out) break; } - + if (m_params.m_validate) { BASISU_FRONTEND_VERIFY(check_etc1s_constraints()); @@ -268,13 +268,13 @@ namespace basisu if (m_use_hierarchical_selector_codebooks) compute_selector_clusters_within_each_parent_cluster(); - + if (m_params.m_compression_level == 0) { create_optimized_selector_codebook(0); find_optimal_selector_clusters_for_each_block(); - + introduce_special_selector_clusters(); } else @@ -295,7 +295,7 @@ namespace basisu } } } - + optimize_selector_codebook(); if (m_params.m_debug_stats) @@ -321,7 +321,7 @@ namespace basisu const basist::basisu_lowlevel_etc1s_transcoder::endpoint_vec& endpoints = pTranscoder->get_endpoints(); const basist::basisu_lowlevel_etc1s_transcoder::selector_vec& selectors = pTranscoder->get_selectors(); - + m_endpoint_cluster_etc_params.resize(endpoints.size()); for (uint32_t i = 0; i < endpoints.size(); i++) { @@ -428,7 +428,7 @@ namespace basisu #ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job([this, first_index, last_index, pass] { #endif - + for (uint32_t block_index = first_index; block_index < last_index; block_index++) { const etc_block& blk = pass ? m_encoded_blocks[block_index] : m_etc1_blocks_etc1s[block_index]; @@ -441,7 +441,7 @@ namespace basisu uint64_t best_err = UINT64_MAX; uint32_t best_index = 0; etc_block best_block(trial_blk); - + for (uint32_t i = 0; i < m_endpoint_cluster_etc_params.size(); i++) { if (m_endpoint_cluster_etc_params[i].m_inten_table[0] > blk.get_inten_table(0)) @@ -564,7 +564,7 @@ namespace basisu m_selector_cluster_block_indices.resize(selectors.size()); for (uint32_t block_index = 0; block_index < m_etc1_blocks_etc1s.size(); block_index++) m_selector_cluster_block_indices[m_block_selector_cluster_index[block_index]].push_back(block_index); - + return true; } @@ -598,9 +598,9 @@ namespace basisu const uint32_t new_selector_cluster_index = (uint32_t)m_optimized_cluster_selectors.size(); m_optimized_cluster_selectors.push_back(blk); - + vector_ensure_element_is_valid(m_selector_cluster_block_indices, new_selector_cluster_index); - + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) { if (m_orig_encoded_blocks[block_index].get_raw_selector_bits() != blk.get_raw_selector_bits()) @@ -608,7 +608,7 @@ namespace basisu // See if using flat selectors actually decreases the block's error. const uint32_t old_selector_cluster_index = m_block_selector_cluster_index[block_index]; - + etc_block cur_blk; const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, 0); cur_blk.set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(endpoint_cluster_index, false)); @@ -624,10 +624,10 @@ namespace basisu if (new_err >= cur_err) continue; - + // Change the block to use the new cluster m_block_selector_cluster_index[block_index] = new_selector_cluster_index; - + m_selector_cluster_block_indices[new_selector_cluster_index].push_back(block_index); block_relocated_flags[block_index] = true; @@ -703,7 +703,7 @@ namespace basisu old_to_new[i] = (find_res.first)->second; continue; } - + old_to_new[i] = total_new_entries++; new_to_old.push_back(i); } @@ -732,7 +732,7 @@ namespace basisu { new_selector_cluster_indices[m_block_selector_cluster_index[i]].push_back(i); } - + m_optimized_cluster_selectors.swap(new_optimized_cluster_selectors); m_selector_cluster_block_indices.swap(new_selector_cluster_indices); @@ -743,7 +743,7 @@ namespace basisu for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[i].size(); j++) m_selector_clusters_within_each_parent_cluster[i][j] = old_to_new[m_selector_clusters_within_each_parent_cluster[i][j]]; } - + debug_printf("optimize_selector_codebook: Before: %u After: %u\n", orig_total_selector_clusters, total_new_entries); } @@ -753,11 +753,11 @@ namespace basisu interval_timer tm; tm.start(); - + m_etc1_blocks_etc1s.resize(m_total_blocks); bool use_cpu = true; - + if (m_params.m_pOpenCL_context) { uint32_t total_perms = 64; @@ -767,7 +767,7 @@ namespace basisu total_perms = 16; else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL) total_perms = OPENCL_ENCODE_ETC1S_MAX_PERMS; - + bool status = opencl_encode_etc1s_blocks(m_params.m_pOpenCL_context, m_etc1_blocks_etc1s.data(), m_params.m_perceptual, total_perms); if (status) use_cpu = false; @@ -778,7 +778,7 @@ namespace basisu m_opencl_failed = true; } } - + if (use_cpu) { const uint32_t N = 4096; @@ -841,16 +841,16 @@ namespace basisu #endif } // use_cpu - + debug_printf("init_etc1_images: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); } void basisu_frontend::init_endpoint_training_vectors() { debug_printf("init_endpoint_training_vectors\n"); - + vec6F_quantizer::array_of_weighted_training_vecs &training_vecs = m_endpoint_clusterizer.get_training_vecs(); - + training_vecs.resize(m_total_blocks * 2); const uint32_t N = 16384; @@ -864,12 +864,12 @@ namespace basisu #endif for (uint32_t block_index = first_index; block_index < last_index; block_index++) - { + { const etc_block &blk = m_etc1_blocks_etc1s[block_index]; color_rgba block_colors[2]; blk.get_block_low_high_colors(block_colors, 0); - + vec6F v; v[0] = block_colors[0].r * (1.0f / 255.0f); v[1] = block_colors[0].g * (1.0f / 255.0f); @@ -877,7 +877,7 @@ namespace basisu v[3] = block_colors[1].r * (1.0f / 255.0f); v[4] = block_colors[1].g * (1.0f / 255.0f); v[5] = block_colors[1].b * (1.0f / 255.0f); - + training_vecs[block_index * 2 + 0] = std::make_pair(v, 1); training_vecs[block_index * 2 + 1] = std::make_pair(v, 1); @@ -949,12 +949,12 @@ namespace basisu for (uint32_t cluster_index = 0; cluster_index < m_endpoint_clusters.size(); cluster_index++) { const uint_vec &cluster = m_endpoint_clusters[cluster_index]; - + uint32_t parent_cluster_index = 0; for (uint32_t j = 0; j < cluster.size(); j++) { const uint32_t block_index = cluster[j] >> 1; - + BASISU_FRONTEND_VERIFY(block_index < m_block_parent_endpoint_cluster.size()); if (!j) @@ -968,7 +968,7 @@ namespace basisu } } } - + if (m_params.m_debug_stats) debug_printf("Total endpoint clusters: %u, parent clusters: %u\n", (uint32_t)m_endpoint_clusters.size(), (uint32_t)m_endpoint_parent_clusters.size()); } @@ -1026,7 +1026,7 @@ namespace basisu BASISU_FRONTEND_VERIFY(cluster_indices.size()); vector_sort(cluster_indices); - + auto last = std::unique(cluster_indices.begin(), cluster_indices.end()); cluster_indices.erase(last, cluster_indices.end()); } @@ -1039,8 +1039,8 @@ namespace basisu const uint32_t N = 512; for (uint32_t cluster_index_iter = 0; cluster_index_iter < m_endpoint_clusters.size(); cluster_index_iter += N) { - const uint32_t first_index = cluster_index_iter; - const uint32_t last_index = minimum((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N); + const uint32_t first_index = cluster_index_iter; + const uint32_t last_index = minimum((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N); #ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index] { @@ -1071,7 +1071,7 @@ namespace basisu const endpoint_cluster_etc_params &etc_params = m_endpoint_cluster_etc_params[cluster_index]; assert(etc_params.m_valid); - + color_rgba block_colors[4]; etc_block::get_block_colors5(block_colors, etc_params.m_color_unscaled[0], etc_params.m_inten_table[0], true); @@ -1103,7 +1103,7 @@ namespace basisu quant_err.m_cluster_subblock_index = cluster_indices_iter; quant_err.m_block_index = block_index; quant_err.m_subblock_index = subblock_index; - + { std::lock_guard lock(m_lock); @@ -1124,7 +1124,7 @@ namespace basisu vector_sort(m_subblock_endpoint_quant_err_vec); } - + void basisu_frontend::introduce_new_endpoint_clusters() { debug_printf("introduce_new_endpoint_clusters\n"); @@ -1195,9 +1195,9 @@ namespace basisu BASISU_FRONTEND_VERIFY(cluster_sizes[subblock_to_move.m_cluster_index] >= 2); cluster_sizes[subblock_to_move.m_cluster_index] -= 2; - + ignore_cluster.insert(subblock_to_move.m_cluster_index); - + total_new_clusters++; num_new_endpoint_clusters--; @@ -1233,23 +1233,23 @@ namespace basisu inline std::size_t operator()(const color_rgba& k) const { uint32_t v = *(const uint32_t*)&k; - + //return bitmix32(v); - + //v ^= (v << 10); //v ^= (v >> 12); - + return v; } }; - + // Given each endpoint cluster, gather all the block pixels which are in that cluster and compute optimized ETC1S endpoints for them. // TODO: Don't optimize endpoint clusters which haven't changed. // If step>=1, we check to ensure the new endpoint values actually decrease quantization error. void basisu_frontend::generate_endpoint_codebook(uint32_t step) { debug_printf("generate_endpoint_codebook\n"); - + interval_timer tm; tm.start(); @@ -1262,7 +1262,7 @@ namespace basisu const uint32_t total_clusters = m_endpoint_clusters.size(); basisu::vector pixel_clusters(total_clusters); - + std::vector input_pixels; input_pixels.reserve(m_total_blocks * 16); @@ -1295,7 +1295,7 @@ namespace basisu pixel_weights.resize(pixel_weights.size() + total_pixels); uint64_t dst_ofs = first_pixel_index; - + uint64_t total_r = 0, total_g = 0, total_b = 0; for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) { @@ -1347,7 +1347,7 @@ namespace basisu const uint64_t first_pixel_index = input_pixels.size(); uint32_t prev_color = 0, cur_weight = 0; - + for (uint32_t i = 0; i < colors.size(); i++) { uint32_t cur_color = pSorted[i]; @@ -1395,7 +1395,7 @@ namespace basisu uint32_t *pPrev_weight = nullptr; color_rgba prev_color; - + { color_rgba cur_color = pBlock_pixels[0]; auto res = color_hasher.insert(cur_color, 0); @@ -1407,7 +1407,7 @@ namespace basisu prev_color = cur_color; pPrev_weight = &(res.first)->second; } - + for (uint32_t i = 1; i < 16; i++) { color_rgba cur_color = pBlock_pixels[i]; @@ -1440,9 +1440,9 @@ namespace basisu input_pixels.resize(first_pixel_index + total_unique_pixels); pixel_weights.resize(first_pixel_index + total_unique_pixels); - + uint32_t j = 0; - + for (auto it = color_hasher.begin(); it != color_hasher.end(); ++it, ++j) { input_pixels[first_pixel_index + j] = it->first; @@ -1492,7 +1492,7 @@ namespace basisu for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++) { const uint32_t new_cluster_index = sorted_cluster_indices_old_to_new[old_cluster_index]; - + const etc_block& blk = output_blocks[new_cluster_index]; endpoint_cluster_etc_params& prev_etc_params = m_endpoint_cluster_etc_params[old_cluster_index]; @@ -1500,7 +1500,7 @@ namespace basisu prev_etc_params.m_valid = true; etc_block::unpack_color5(prev_etc_params.m_color_unscaled[0], blk.get_base5_color(), false); prev_etc_params.m_inten_table[0] = blk.get_inten_table(0); - prev_etc_params.m_color_error[0] = 0; // dummy value - we don't actually use this + prev_etc_params.m_color_error[0] = 0; // dummy value - we don't actually use this } use_cpu = false; @@ -1689,7 +1689,7 @@ namespace basisu uint32_t basisu_frontend::refine_endpoint_clusterization() { debug_printf("refine_endpoint_clusterization\n"); - + if (m_use_hierarchical_endpoint_codebooks) compute_endpoint_clusters_within_each_parent_cluster(); @@ -1710,9 +1710,9 @@ namespace basisu } // cluster_indices_iter } - + //---------------------------------------------------------- - + // Create a new endpoint clusterization interval_timer tm; @@ -1729,7 +1729,7 @@ namespace basisu const uint32_t total_parent_clusters = m_endpoint_clusters_within_each_parent_cluster.size(); basisu::vector cl_block_info_structs(m_total_blocks); - + // the size of each parent cluster, in total clusters uint_vec parent_cluster_sizes(total_parent_clusters); for (uint32_t i = 0; i < total_parent_clusters; i++) @@ -1743,7 +1743,7 @@ namespace basisu cur_ofs += parent_cluster_sizes[i]; } - + // Note: total_actual_endpoint_clusters is not necessarly equal to m_endpoint_clusters.size(), because clusters may live in multiple parent clusters after the first refinement step. BASISU_FRONTEND_VERIFY(cur_ofs >= m_endpoint_clusters.size()); const uint32_t total_actual_endpoint_clusters = cur_ofs; @@ -1769,11 +1769,11 @@ namespace basisu cl_endpoint_cluster_structs[dst_ofs + j].m_cluster_index = (uint16_t)endpoint_cluster_index; } } - + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) { const uint32_t block_parent_endpoint_cluster_index = m_block_parent_endpoint_cluster[block_index]; - + cl_block_info_structs[block_index].m_num_clusters = (uint16_t)(parent_cluster_sizes[block_parent_endpoint_cluster_index]); cl_block_info_structs[block_index].m_first_cluster_ofs = (uint16_t)(first_parent_cluster_ofs[block_parent_endpoint_cluster_index]); @@ -1788,7 +1788,7 @@ namespace basisu uint_vec sorted_block_indices(m_total_blocks); indirect_sort(m_total_blocks, sorted_block_indices.data(), block_cluster_indices.data()); - + bool status = opencl_refine_endpoint_clusterization( m_params.m_pOpenCL_context, cl_block_info_structs.data(), @@ -1946,7 +1946,7 @@ namespace basisu break; } } // j - + best_cluster_indices[block_index] = best_cluster_index; } // block_index @@ -1960,9 +1960,9 @@ namespace basisu #ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); #endif - + } // use_cpu - + debug_printf("refine_endpoint_clusterization time: %3.3f secs\n", tm.get_elapsed_secs()); basisu::vector > optimized_endpoint_clusters(m_endpoint_clusters.size()); @@ -2005,7 +2005,7 @@ namespace basisu basisu::vector > new_endpoint_clusters(m_endpoint_clusters.size()); basisu::vector new_subblock_etc_params(m_endpoint_clusters.size()); - + for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++) { uint32_t j = sorted_endpoint_cluster_indices[i]; @@ -2020,7 +2020,7 @@ namespace basisu new_endpoint_clusters.resize(0); new_subblock_etc_params.resize(0); - + for (int i = 0; i < (int)m_endpoint_clusters.size(); ) { if (!m_endpoint_clusters[i].size()) @@ -2038,7 +2038,7 @@ namespace basisu new_endpoint_clusters.push_back(m_endpoint_clusters[i]); new_subblock_etc_params.push_back(m_endpoint_cluster_etc_params[i]); - + for (int k = i + 1; k < j; k++) { append_vector(new_endpoint_clusters.back(), m_endpoint_clusters[k]); @@ -2046,7 +2046,7 @@ namespace basisu i = j; } - + if (m_endpoint_clusters.size() != new_endpoint_clusters.size()) { if (m_params.m_debug_stats) @@ -2061,7 +2061,7 @@ namespace basisu void basisu_frontend::create_initial_packed_texture() { debug_printf("create_initial_packed_texture\n"); - + interval_timer tm; tm.start(); @@ -2074,7 +2074,7 @@ namespace basisu for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) { uint32_t cluster0 = m_block_endpoint_clusters_indices[block_index][0]; - + const color_rgba& color_unscaled = m_endpoint_cluster_etc_params[cluster0].m_color_unscaled[0]; uint32_t inten = m_endpoint_cluster_etc_params[cluster0].m_inten_table[0]; @@ -2142,7 +2142,7 @@ namespace basisu #endif } // use_cpu - + m_orig_encoded_blocks = m_encoded_blocks; debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); @@ -2159,7 +2159,7 @@ namespace basisu for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) { const uint32_t block_index = cluster_indices[cluster_indices_iter]; - + block_selector_cluster_indices[block_index] = cluster_index; } // cluster_indices_iter @@ -2184,7 +2184,7 @@ namespace basisu BASISU_FRONTEND_VERIFY(cluster_indices.size()); vector_sort(cluster_indices); - + auto last = std::unique(cluster_indices.begin(), cluster_indices.end()); cluster_indices.erase(last, cluster_indices.end()); } @@ -2193,11 +2193,11 @@ namespace basisu void basisu_frontend::generate_selector_clusters() { debug_printf("generate_selector_clusters\n"); - + typedef tree_vector_quant vec16F_clusterizer; - + vec16F_clusterizer::array_of_weighted_training_vecs training_vecs(m_total_blocks); - + const uint32_t N = 4096; for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) { @@ -2227,10 +2227,10 @@ namespace basisu const uint32_t cColorDistToWeight = 300; const uint32_t cMaxWeight = 4096; uint32_t weight = clamp(dist / cColorDistToWeight, 1, cMaxWeight); - + training_vecs[block_index].first = v; training_vecs[block_index].second = weight; - + } // block_index #ifndef __EMSCRIPTEN__ @@ -2295,7 +2295,7 @@ namespace basisu for (uint32_t cluster_index = 0; cluster_index < m_selector_cluster_block_indices.size(); cluster_index++) { const uint_vec &cluster = m_selector_cluster_block_indices[cluster_index]; - + uint32_t parent_cluster_index = 0; for (uint32_t j = 0; j < cluster.size(); j++) { @@ -2327,7 +2327,7 @@ namespace basisu debug_printf("Total selector clusters (from m_selector_cluster_block_indices.size()): %u\n", (uint32_t)m_selector_cluster_block_indices.size()); m_optimized_cluster_selectors.resize(total_selector_clusters); - + // For each selector codebook entry, and for each of the 4x4 selectors, determine which selector minimizes the error across all the blocks that use that quantized selector. const uint32_t N = 256; for (uint32_t cluster_index_iter = 0; cluster_index_iter < total_selector_clusters; cluster_index_iter += N) @@ -2335,7 +2335,7 @@ namespace basisu const uint32_t first_index = cluster_index_iter; const uint32_t last_index = minimum((uint32_t)total_selector_clusters, cluster_index_iter + N); -#ifndef __EMSCRIPTEN__ +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job([this, first_index, last_index] { #endif @@ -2416,7 +2416,7 @@ namespace basisu #endif debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); - + if (m_params.m_debug_images) { uint32_t max_selector_cluster_size = 0; @@ -2442,7 +2442,7 @@ namespace basisu uint32_t block_index = cluster_block_indices[i]; const etc_block &blk = m_orig_encoded_blocks[block_index]; - + for (uint32_t y = 0; y < 4; y++) for (uint32_t x = 0; x < 4; x++) selector_cluster_vis.set_clipped(x_spacer_len + x + 5 * i, selector_cluster_index * 5 + y, color_rgba((blk.get_selector(x, y) * 255) / 3)); @@ -2464,7 +2464,7 @@ namespace basisu interval_timer tm; tm.start(); - + if (m_params.m_validate) { // Sanity checks @@ -2479,7 +2479,7 @@ namespace basisu } m_block_selector_cluster_index.resize(m_total_blocks); - + if (m_params.m_compression_level == 0) { // Just leave the blocks in their original selector clusters. @@ -2500,7 +2500,7 @@ namespace basisu return; } - + bool use_cpu = true; if ((m_params.m_pOpenCL_context) && m_use_hierarchical_selector_codebooks) @@ -2509,17 +2509,17 @@ namespace basisu basisu::vector selector_structs; selector_structs.reserve(m_optimized_cluster_selectors.size()); - + uint_vec parent_selector_cluster_offsets(num_parent_clusters); uint_vec selector_cluster_indices; selector_cluster_indices.reserve(m_optimized_cluster_selectors.size()); - + uint32_t cur_ofs = 0; for (uint32_t parent_index = 0; parent_index < num_parent_clusters; parent_index++) { parent_selector_cluster_offsets[parent_index] = cur_ofs; - + for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[parent_index].size(); j++) { const uint32_t selector_cluster_index = m_selector_clusters_within_each_parent_cluster[parent_index][j]; @@ -2529,7 +2529,7 @@ namespace basisu sel_bits |= (m_optimized_cluster_selectors[selector_cluster_index].get_selector(p & 3, p >> 2) << (p * 2)); selector_structs.enlarge(1)->m_packed_selectors = sel_bits; - + selector_cluster_indices.push_back(selector_cluster_index); } @@ -2537,7 +2537,7 @@ namespace basisu } const uint32_t total_input_selectors = cur_ofs; - + basisu::vector block_structs(m_total_blocks); for (uint32_t i = 0; i < m_total_blocks; i++) { @@ -2561,7 +2561,7 @@ namespace basisu selector_cluster_indices.data(), output_selector_cluster_indices.data(), m_params.m_perceptual); - + if (!status) { error_printf("basisu_frontend::find_optimal_selector_clusters_for_each_block: opencl_find_optimal_selector_clusters_for_each_block() failed! Using CPU.\n"); @@ -2575,7 +2575,7 @@ namespace basisu m_selector_cluster_block_indices[i].resize(0); m_selector_cluster_block_indices[i].reserve(128); } - + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) { etc_block& blk = m_encoded_blocks[block_index]; @@ -2607,7 +2607,7 @@ namespace basisu } } } - + const uint32_t N = 2048; for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) { @@ -2623,7 +2623,7 @@ namespace basisu for (uint32_t block_index = first_index; block_index < last_index; block_index++) { const pixel_block& block = get_source_pixel_block(block_index); - + etc_block& blk = m_encoded_blocks[block_index]; if ((block_index > first_index) && (block == get_source_pixel_block(block_index - 1))) @@ -2631,18 +2631,18 @@ namespace basisu blk.set_raw_selector_bits(m_optimized_cluster_selectors[prev_best_cluster_index].get_raw_selector_bits()); m_block_selector_cluster_index[block_index] = prev_best_cluster_index; - + continue; } - + const color_rgba* pBlock_pixels = block.get_ptr(); - + color_rgba trial_block_colors[4]; blk.get_block_colors_etc1s(trial_block_colors); // precompute errors for the i-th block pixel and selector sel: [sel][i] uint32_t trial_errors[4][16]; - + if (m_params.m_perceptual) { for (uint32_t sel = 0; sel < 4; ++sel) @@ -2719,7 +2719,7 @@ namespace basisu for (uint32_t cluster_iter = 0; cluster_iter < total_clusters; cluster_iter++) { const uint32_t cluster_index = m_use_hierarchical_selector_codebooks ? (*pCluster_indices)[cluster_iter] : cluster_iter; - + const uint8_t* pSels = &unpacked_optimized_cluster_selectors[cluster_index * 16]; uint64_t trial_err = (uint64_t)trial_errors[pSels[0]][0] + trial_errors[pSels[1]][1] + trial_errors[pSels[2]][2] + trial_errors[pSels[3]][3]; @@ -2752,7 +2752,7 @@ namespace basisu m_block_selector_cluster_index[block_index] = best_cluster_index; prev_best_cluster_index = best_cluster_index; - + } // block_index #ifndef __EMSCRIPTEN__ @@ -2764,7 +2764,7 @@ namespace basisu #ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); #endif - + for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++) { m_selector_cluster_block_indices[i].resize(0); @@ -2778,7 +2778,7 @@ namespace basisu vector_ensure_element_is_valid(m_selector_cluster_block_indices, best_cluster_index); m_selector_cluster_block_indices[best_cluster_index].push_back(block_index); } - + } // if (use_cpu) debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); @@ -2788,7 +2788,7 @@ namespace basisu uint32_t basisu_frontend::refine_block_endpoints_given_selectors() { debug_printf("refine_block_endpoints_given_selectors\n"); - + for (int block_index = 0; block_index < static_cast(m_total_blocks); block_index++) { //uint32_t selector_cluster = m_block_selector_cluster_index(block_x, block_y); @@ -2969,7 +2969,7 @@ namespace basisu if (m_params.m_debug_stats) debug_printf("Total subblock endpoints refined: %u (%3.1f%%)\n", total_subblocks_refined, total_subblocks_refined * 100.0f / total_subblocks_examined); - + return total_subblocks_refined; } @@ -3061,7 +3061,7 @@ namespace basisu } // The backend has remapped the block endpoints while optimizing the output symbols for better rate distortion performance, so let's go and reoptimize the endpoint codebook. - // This is currently the only place where the backend actually goes and changes the quantization and calls the frontend to fix things up. + // This is currently the only place where the backend actually goes and changes the quantization and calls the frontend to fix things up. // This is basically a bottom up clusterization stage, where some leaves can be combined. void basisu_frontend::reoptimize_remapped_endpoints(const uint_vec &new_block_endpoints, int_vec &old_to_new_endpoint_cluster_indices, bool optimize_final_codebook, uint_vec *pBlock_selector_indices) { @@ -3073,12 +3073,12 @@ namespace basisu basisu::vector cluster_valid(new_endpoint_cluster_block_indices.size()); basisu::vector cluster_improved(new_endpoint_cluster_block_indices.size()); - + const uint32_t N = 256; for (uint32_t cluster_index_iter = 0; cluster_index_iter < new_endpoint_cluster_block_indices.size(); cluster_index_iter += N) { - const uint32_t first_index = cluster_index_iter; - const uint32_t last_index = minimum((uint32_t)new_endpoint_cluster_block_indices.size(), cluster_index_iter + N); + const uint32_t first_index = cluster_index_iter; + const uint32_t last_index = minimum((uint32_t)new_endpoint_cluster_block_indices.size(), cluster_index_iter + N); #ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, &cluster_improved, &cluster_valid, &new_endpoint_cluster_block_indices, &pBlock_selector_indices ] { @@ -3100,13 +3100,13 @@ namespace basisu blk.set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(cluster_index, false)); blk.set_inten_tables_etc1s(get_endpoint_cluster_inten_table(cluster_index, false)); blk.set_flip_bit(true); - + uint64_t cur_err = 0; for (uint32_t cluster_block_indices_iter = 0; cluster_block_indices_iter < cluster_block_indices.size(); cluster_block_indices_iter++) { const uint32_t block_index = cluster_block_indices[cluster_block_indices_iter]; - + const color_rgba *pBlock_pixels = get_source_pixel_block(block_index).get_ptr(); memcpy(&cluster_pixels[cluster_block_indices_iter * 16], pBlock_pixels, 16 * sizeof(color_rgba)); @@ -3118,14 +3118,14 @@ namespace basisu blk.set_raw_selector_bits(blk_selectors.get_raw_selector_bits()); cur_err += blk.evaluate_etc1_error(pBlock_pixels, m_params.m_perceptual); - + for (uint32_t y = 0; y < 4; y++) for (uint32_t x = 0; x < 4; x++) force_selectors[cluster_block_indices_iter * 16 + x + y * 4] = static_cast(blk_selectors.get_selector(x, y)); } endpoint_cluster_etc_params new_endpoint_cluster_etc_params; - + { etc1_optimizer optimizer; etc1_solution_coordinates solutions[2]; @@ -3164,7 +3164,7 @@ namespace basisu if (new_endpoint_cluster_etc_params.m_color_error[0] < cur_err) { m_endpoint_cluster_etc_params[cluster_index] = new_endpoint_cluster_etc_params; - + cluster_improved[cluster_index] = true; } @@ -3181,13 +3181,13 @@ namespace basisu #ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); #endif - + uint32_t total_unused_clusters = 0; uint32_t total_improved_clusters = 0; - + old_to_new_endpoint_cluster_indices.resize(m_endpoint_clusters.size()); vector_set_all(old_to_new_endpoint_cluster_indices, -1); - + int total_new_endpoint_clusters = 0; for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++) @@ -3222,7 +3222,7 @@ namespace basisu for (uint32_t block_index = 0; block_index < new_block_endpoints.size(); block_index++) { const uint32_t old_endpoint_cluster_index = new_block_endpoints[block_index]; - + const int new_endpoint_cluster_index = old_to_new_endpoint_cluster_indices[old_endpoint_cluster_index]; BASISU_FRONTEND_VERIFY(new_endpoint_cluster_index >= 0); @@ -3235,13 +3235,13 @@ namespace basisu new_endpoint_cluster_etc_params[new_endpoint_cluster_index].m_subblocks.push_back(block_index * 2 + 0); new_endpoint_cluster_etc_params[new_endpoint_cluster_index].m_subblocks.push_back(block_index * 2 + 1); - + m_block_endpoint_clusters_indices[block_index][0] = new_endpoint_cluster_index; m_block_endpoint_clusters_indices[block_index][1] = new_endpoint_cluster_index; } debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 2\n"); - + m_endpoint_clusters = new_endpoint_clusters; m_endpoint_cluster_etc_params = new_endpoint_cluster_etc_params; @@ -3277,7 +3277,7 @@ namespace basisu debug_printf("Final (post-RDO) endpoint clusters: %u\n", m_endpoint_clusters.size()); } - + //debug_printf("validate_output: %u\n", validate_output()); } @@ -3305,7 +3305,7 @@ namespace basisu // If the endpoint cluster lives in more than one parent node, that's wrong. if (subblock_parent_indices[subblock_index] != -1) return false; - + subblock_parent_indices[subblock_index] = parent_index; } } @@ -3329,7 +3329,7 @@ namespace basisu if (subblock_cluster_indices[subblock_index] != -1) return false; - + subblock_cluster_indices[subblock_index] = cluster_index; // There are transformations on the endpoint clusters that can break the strict tree requirement @@ -3343,7 +3343,7 @@ namespace basisu } } } - + // Make sure all endpoint clusters are present in the parent cluster. for (uint32_t i = 0; i < subblock_cluster_indices.size(); i++) { @@ -3368,7 +3368,7 @@ namespace basisu #define CHECK(x) BASISU_FRONTEND_VERIFY(x); CHECK(get_output_block(block_index).get_flip_bit() == true); - + const bool diff_flag = get_diff_flag(block_index); CHECK(diff_flag == true); @@ -3382,11 +3382,11 @@ namespace basisu // basisu only supports ETC1S, so these must be equal. CHECK(endpoint_cluster0_index == endpoint_cluster1_index); - + CHECK(blk.set_block_color5_check(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, false), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, false))); CHECK(get_endpoint_cluster_color_is_used(endpoint_cluster0_index, false)); - + blk.set_inten_table(0, get_endpoint_cluster_inten_table(endpoint_cluster0_index, false)); blk.set_inten_table(1, get_endpoint_cluster_inten_table(endpoint_cluster1_index, false)); @@ -3406,7 +3406,7 @@ namespace basisu CHECK(rdo_output_block.get_base5_color() == blk.get_base5_color()); CHECK(rdo_output_block.get_delta3_color() == blk.get_delta3_color()); CHECK(rdo_output_block.get_raw_selector_bits() == blk.get_raw_selector_bits()); - + #undef CHECK } diff --git a/encoder/basisu_frontend.h b/encoder/basisu_frontend.h index cda73f39..ab285d20 100644 --- a/encoder/basisu_frontend.h +++ b/encoder/basisu_frontend.h @@ -61,7 +61,7 @@ namespace basisu enum { cMaxEndpointClusters = 16128, - + cMaxSelectorClusters = 16128, }; @@ -101,12 +101,12 @@ namespace basisu bool m_validate; bool m_multithreaded; bool m_disable_hierarchical_endpoint_codebooks; - + basist::basis_texture_type m_tex_type; const basist::basisu_lowlevel_etc1s_transcoder *m_pGlobal_codebooks; - + opencl_context_ptr m_pOpenCL_context; - + job_pool *m_pJob_pool; }; @@ -143,12 +143,12 @@ namespace basisu uint32_t get_total_selector_clusters() const { return static_cast(m_selector_cluster_block_indices.size()); } uint32_t get_block_selector_cluster_index(uint32_t block_index) const { return m_block_selector_cluster_index[block_index]; } const etc_block &get_selector_cluster_selector_bits(uint32_t cluster_index) const { return m_optimized_cluster_selectors[cluster_index]; } - + // Returns block indices using each selector cluster const uint_vec &get_selector_cluster_block_indices(uint32_t selector_cluster_index) const { return m_selector_cluster_block_indices[selector_cluster_index]; } void dump_debug_image(const char *pFilename, uint32_t first_block, uint32_t num_blocks_x, uint32_t num_blocks_y, bool output_blocks); - + void reoptimize_remapped_endpoints(const uint_vec &new_block_endpoints, int_vec &old_to_new_endpoint_cluster_indices, bool optimize_final_codebook, uint_vec *pBlock_selector_indices = nullptr); bool get_opencl_failed() const { return m_opencl_failed; } @@ -170,15 +170,15 @@ namespace basisu // The quantized ETC1S texture. etc_block_vec m_encoded_blocks; - + // Quantized blocks after endpoint quant, but before selector quant - etc_block_vec m_orig_encoded_blocks; - + etc_block_vec m_orig_encoded_blocks; + // Full quality ETC1S texture etc_block_vec m_etc1_blocks_etc1s; - + typedef vec<6, float> vec6F; - + // Endpoint clusterizer typedef tree_vector_quant vec6F_quantizer; vec6F_quantizer m_endpoint_clusterizer; @@ -187,16 +187,16 @@ namespace basisu basisu::vector m_endpoint_clusters; // Array of subblock indices for each parent endpoint cluster - // Note: Initially, each endpoint cluster will only live in a single parent cluster, in a shallow tree. + // Note: Initially, each endpoint cluster will only live in a single parent cluster, in a shallow tree. // As the endpoint clusters are manipulated this constraint gets broken. basisu::vector m_endpoint_parent_clusters; - + // Each block's parent endpoint cluster index - uint8_vec m_block_parent_endpoint_cluster; + uint8_vec m_block_parent_endpoint_cluster; // Array of endpoint cluster indices for each parent endpoint cluster basisu::vector m_endpoint_clusters_within_each_parent_cluster; - + struct endpoint_cluster_etc_params { endpoint_cluster_etc_params() @@ -266,13 +266,13 @@ namespace basisu }; typedef basisu::vector cluster_subblock_etc_params_vec; - - // Each endpoint cluster's ETC1S parameters + + // Each endpoint cluster's ETC1S parameters cluster_subblock_etc_params_vec m_endpoint_cluster_etc_params; // The endpoint cluster index used by each ETC1 subblock. basisu::vector m_block_endpoint_clusters_indices; - + // The block(s) within each selector cluster // Note: If you add anything here that uses selector cluster indicies, be sure to update optimize_selector_codebook()! basisu::vector m_selector_cluster_block_indices; @@ -282,13 +282,13 @@ namespace basisu // The block(s) within each parent selector cluster. basisu::vector m_selector_parent_cluster_block_indices; - + // Each block's parent selector cluster uint8_vec m_block_parent_selector_cluster; // Array of selector cluster indices for each parent selector cluster basisu::vector m_selector_clusters_within_each_parent_cluster; - + // Each block's selector cluster index basisu::vector m_block_selector_cluster_index; diff --git a/encoder/basisu_gpu_texture.cpp b/encoder/basisu_gpu_texture.cpp index dec769d5..65550b27 100644 --- a/encoder/basisu_gpu_texture.cpp +++ b/encoder/basisu_gpu_texture.cpp @@ -29,9 +29,9 @@ namespace basisu const eac_a8_block *pBlock = static_cast(pBlock_bits); const int8_t *pTable = g_etc2_eac_tables[pBlock->m_table]; - + const uint64_t selector_bits = pBlock->get_selector_bits(); - + const int32_t base = pBlock->m_base; const int32_t mul = pBlock->m_multiplier; @@ -63,16 +63,16 @@ namespace basisu uint8_t m_low_color[cTotalEndpointBytes]; uint8_t m_high_color[cTotalEndpointBytes]; uint8_t m_selectors[cTotalSelectorBytes]; - + inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); } inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); } - static void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) + static void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) { r = (c >> 11) & 31; g = (c >> 5) & 63; b = c & 31; - + r = (r << 3) | (r >> 2); g = (g << 2) | (g >> 4); b = (b << 3) | (b >> 2); @@ -118,9 +118,9 @@ namespace basisu { for (uint32_t y = 0; y < 4; y++, pPixels += 4) { - pPixels[0] = c[pBlock->get_selector(0, y)]; - pPixels[1] = c[pBlock->get_selector(1, y)]; - pPixels[2] = c[pBlock->get_selector(2, y)]; + pPixels[0] = c[pBlock->get_selector(0, y)]; + pPixels[1] = c[pBlock->get_selector(1, y)]; + pPixels[2] = c[pBlock->get_selector(2, y)]; pPixels[3] = c[pBlock->get_selector(3, y)]; } } @@ -128,9 +128,9 @@ namespace basisu { for (uint32_t y = 0; y < 4; y++, pPixels += 4) { - pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); - pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); - pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); + pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); + pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); + pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]); } } @@ -197,9 +197,9 @@ namespace basisu { for (uint32_t y = 0; y < 4; y++, pPixels += 4) { - pPixels[0] = c[pBlock->get_selector(0, y)]; - pPixels[1] = c[pBlock->get_selector(1, y)]; - pPixels[2] = c[pBlock->get_selector(2, y)]; + pPixels[0] = c[pBlock->get_selector(0, y)]; + pPixels[1] = c[pBlock->get_selector(1, y)]; + pPixels[2] = c[pBlock->get_selector(2, y)]; pPixels[3] = c[pBlock->get_selector(3, y)]; } } @@ -207,9 +207,9 @@ namespace basisu { for (uint32_t y = 0; y < 4; y++, pPixels += 4) { - pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); - pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); - pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); + pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); + pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); + pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]); } } @@ -235,7 +235,7 @@ namespace basisu c[0].set_noclamp_rgba(r0, g0, b0, 255); c[1].set_noclamp_rgba(r1, g1, b1, 255); - + bool used_punchthrough = false; if (l > h) @@ -254,9 +254,9 @@ namespace basisu { for (uint32_t y = 0; y < 4; y++, pPixels += 4) { - pPixels[0] = c[pBlock->get_selector(0, y)]; - pPixels[1] = c[pBlock->get_selector(1, y)]; - pPixels[2] = c[pBlock->get_selector(2, y)]; + pPixels[0] = c[pBlock->get_selector(0, y)]; + pPixels[1] = c[pBlock->get_selector(1, y)]; + pPixels[2] = c[pBlock->get_selector(2, y)]; pPixels[3] = c[pBlock->get_selector(3, y)]; } } @@ -264,9 +264,9 @@ namespace basisu { for (uint32_t y = 0; y < 4; y++, pPixels += 4) { - pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); - pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); - pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); + pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); + pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); + pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]); } } @@ -286,7 +286,7 @@ namespace basisu inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); } inline uint64_t get_selector_bits() const - { + { return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | ((uint32_t)m_selectors[3] << 24U))) | (((uint64_t)m_selectors[4]) << 32U) | (((uint64_t)m_selectors[5]) << 40U); @@ -297,7 +297,7 @@ namespace basisu assert((x < 4U) && (y < 4U)); return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1); } - + static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h) { pDst[0] = static_cast(l); @@ -352,7 +352,7 @@ namespace basisu pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)]; } } - + // Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3. bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels) { @@ -362,7 +362,7 @@ namespace basisu success = false; unpack_bc4(pBlock_bits, &pPixels[0].a, sizeof(color_rgba)); - + return success; } @@ -419,9 +419,9 @@ namespace basisu for (uint32_t i = 0; i < 16; i++) { const uint32_t s = sels & 3; - + pPixels[i] = c[s]; - + sels >>= 2; } } @@ -443,12 +443,12 @@ namespace basisu case 2: return bc7_interp2(l, h, w); case 3: return bc7_interp3(l, h, w); case 4: return bc7_interp4(l, h, w); - default: + default: break; } return 0; } - + bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) { //const uint32_t SUBSETS = 3; @@ -458,7 +458,7 @@ namespace basisu const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5; const uint32_t PBITS = (mode == 0) ? 6 : 0; const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; - + uint32_t bit_offset = 0; const uint8_t* pBuf = static_cast(pBlock_bits); @@ -510,7 +510,7 @@ namespace basisu const uint32_t PBITS = (mode == 1) ? 2 : 4; const uint32_t SHARED_PBITS = (mode == 1) ? true : false; const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; - + uint32_t bit_offset = 0; const uint8_t* pBuf = static_cast(pBlock_bits); @@ -522,21 +522,21 @@ namespace basisu for (uint32_t c = 0; c < COMPS; c++) for (uint32_t e = 0; e < ENDPOINTS; e++) endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); - + uint32_t pbits[4]; for (uint32_t p = 0; p < PBITS; p++) pbits[p] = read_bits32(pBuf, bit_offset, 1); - + uint32_t weights[16]; for (uint32_t i = 0; i < 16; i++) weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); - + assert(bit_offset == 128); for (uint32_t e = 0; e < ENDPOINTS; e++) for (uint32_t c = 0; c < 4; c++) endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS)); - + color_rgba block_colors[2][8]; for (uint32_t s = 0; s < 2; s++) for (uint32_t i = 0; i < WEIGHT_VALS; i++) @@ -575,11 +575,11 @@ namespace basisu for (uint32_t c = 0; c < COMPS; c++) for (uint32_t e = 0; e < ENDPOINTS; e++) endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); - + const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS }; - + uint32_t weights[16], a_weights[16]; - + for (uint32_t i = 0; i < 16; i++) (index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0)); @@ -681,10 +681,10 @@ namespace basisu { const uint32_t w = basist::g_bc7_weights4[i]; const uint32_t iw = 64 - w; - vals[i].set_noclamp_rgba( - (r0 * iw + r1 * w + 32) >> 6, - (g0 * iw + g1 * w + 32) >> 6, - (b0 * iw + b1 * w + 32) >> 6, + vals[i].set_noclamp_rgba( + (r0 * iw + r1 * w + 32) >> 6, + (g0 * iw + g1 * w + 32) >> 6, + (b0 * iw + b1 * w + 32) >> 6, (a0 * iw + a1 * w + 32) >> 6); } @@ -697,7 +697,7 @@ namespace basisu pPixels[5] = vals[block.m_hi.m_s11]; pPixels[6] = vals[block.m_hi.m_s21]; pPixels[7] = vals[block.m_hi.m_s31]; - + pPixels[8] = vals[block.m_hi.m_s02]; pPixels[9] = vals[block.m_hi.m_s12]; pPixels[10] = vals[block.m_hi.m_s22]; @@ -741,7 +741,7 @@ namespace basisu return false; } - + struct fxt1_block { union @@ -842,7 +842,7 @@ namespace basisu return false; if (pBlock->m_hi.m_alpha == 1) return false; - + color_rgba colors[4]; colors[0].r = pBlock->m_hi.m_r0; @@ -892,7 +892,7 @@ namespace basisu for (uint32_t i = 0; i < 16; i++) { const uint32_t sel = (pBlock->m_sels[4 + (i >> 2)] >> ((i & 3) * 2)) & 3; - + const uint32_t x = i & 3; const uint32_t y = i >> 2; pPixels[4 + x + y * 8] = block1_colors[sel]; @@ -950,7 +950,7 @@ namespace basisu { return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), 255); } - + static color_rgba convert_rgba_5554_to_8888(const color_rgba& col) { return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), (col[3] << 4) | col[3]); @@ -973,10 +973,10 @@ namespace basisu { // colora=554 color_rgba color_a(pBlock->m_opaque_color_data.m_red_a, pBlock->m_opaque_color_data.m_green_a, (pBlock->m_opaque_color_data.m_blue_a << 1) | (pBlock->m_opaque_color_data.m_blue_a >> 3), 255); - + // colora=555 color_rgba color_b(pBlock->m_opaque_color_data.m_red_b, pBlock->m_opaque_color_data.m_green_b, pBlock->m_opaque_color_data.m_blue_b, 255); - + colors[0] = convert_rgb_555_to_888(color_a); colors[3] = convert_rgb_555_to_888(color_b); @@ -985,11 +985,11 @@ namespace basisu } else { - // colora=4433 + // colora=4433 color_rgba color_a( - (pBlock->m_trans_color_data.m_red_a << 1) | (pBlock->m_trans_color_data.m_red_a >> 3), + (pBlock->m_trans_color_data.m_red_a << 1) | (pBlock->m_trans_color_data.m_red_a >> 3), (pBlock->m_trans_color_data.m_green_a << 1) | (pBlock->m_trans_color_data.m_green_a >> 3), - (pBlock->m_trans_color_data.m_blue_a << 2) | (pBlock->m_trans_color_data.m_blue_a >> 1), + (pBlock->m_trans_color_data.m_blue_a << 2) | (pBlock->m_trans_color_data.m_blue_a >> 1), pBlock->m_trans_color_data.m_alpha_a << 1); //colorb=4443 @@ -1062,9 +1062,9 @@ namespace basisu for (uint32_t x = 0; x < 4; x++) { const uint32_t shift = 45 - ((y + x * 4) * 3); - + const uint32_t sel = (uint32_t)((sels >> shift) & 7); - + int val = base + g_etc2_eac_tables[table][sel] * mul; val = clamp(val, 0, 2047); @@ -1085,12 +1085,12 @@ namespace basisu unpack_etc2_eac_r(pBlock, pPixels, c); } } - + void unpack_uastc(const void* p, color_rgba* pPixels) { basist::unpack_uastc(*static_cast(p), (basist::color32 *)pPixels, false); } - + // Unpacks to RGBA, R, RG, or A bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels) { @@ -1217,10 +1217,10 @@ namespace basisu if ((m_fmt == texture_format::cPVRTC1_4_RGB) || (m_fmt == texture_format::cPVRTC1_4_RGBA)) { pvrtc4_image pi(m_width, m_height); - + if (get_total_blocks() != pi.get_total_blocks()) return false; - + memcpy(&pi.get_blocks()[0], get_ptr(), get_size_in_bytes()); pi.deswizzle(); @@ -1252,13 +1252,13 @@ namespace basisu return success; } - + static const uint8_t g_ktx_file_id[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }; // KTX/GL enums enum { - KTX_ENDIAN = 0x04030201, + KTX_ENDIAN = 0x04030201, KTX_OPPOSITE_ENDIAN = 0x01020304, KTX_ETC1_RGB8_OES = 0x8D64, KTX_RED = 0x1903, @@ -1286,7 +1286,7 @@ namespace basisu KTX_COMPRESSED_R11_EAC = 0x9270, KTX_COMPRESSED_RG11_EAC = 0x9272 }; - + struct ktx_header { uint8_t m_identifier[12]; @@ -1496,17 +1496,17 @@ namespace basisu return false; } } - + ktx_header header; header.clear(); memcpy(&header.m_identifier, g_ktx_file_id, sizeof(g_ktx_file_id)); header.m_endianness = KTX_ENDIAN; - + header.m_pixelWidth = width; header.m_pixelHeight = height; - + header.m_glTypeSize = 1; - + header.m_glInternalFormat = internal_fmt; header.m_glBaseInternalFormat = base_internal_fmt; @@ -1522,7 +1522,7 @@ namespace basisu for (uint32_t level_index = 0; level_index < total_levels; level_index++) { uint32_t img_size = gpu_images[0][level_index].get_size_in_bytes(); - + if ((header.m_numberOfFaces == 1) || (header.m_numberOfArrayElements > 1)) { img_size = img_size * header.m_numberOfFaces * maximum(1, header.m_numberOfArrayElements); @@ -1542,10 +1542,10 @@ namespace basisu const gpu_image& img = gpu_images[cubemap_flag ? (array_index * 6 + face_index) : array_index][level_index]; append_vector(ktx_data, (uint8_t *)img.get_ptr(), img.get_size_in_bytes()); - + bytes_written += img.get_size_in_bytes(); } - + } // array_index } // level_index @@ -1591,7 +1591,7 @@ namespace basisu } //const uint32_t OUT_FILE_MAGIC = 'TEXC'; - struct out_file_header + struct out_file_header { packed_uint<4> m_magic; packed_uint<4> m_pad; @@ -1623,7 +1623,7 @@ namespace basisu fwrite(&hdr, sizeof(hdr), 1, pFile); fwrite(gi.get_ptr(), gi.get_size_in_bytes(), 1, pFile); - + return fclose(pFile) != EOF; } } // basisu diff --git a/encoder/basisu_gpu_texture.h b/encoder/basisu_gpu_texture.h index 619926f5..9a2c8475 100644 --- a/encoder/basisu_gpu_texture.h +++ b/encoder/basisu_gpu_texture.h @@ -48,11 +48,11 @@ namespace basisu } inline texture_format get_format() const { return m_fmt; } - + // Width/height in pixels inline uint32_t get_pixel_width() const { return m_width; } inline uint32_t get_pixel_height() const { return m_height; } - + // Width/height in blocks, row pitch is assumed to be m_blocks_x. inline uint32_t get_blocks_x() const { return m_blocks_x; } inline uint32_t get_blocks_y() const { return m_blocks_y; } @@ -67,7 +67,7 @@ namespace basisu inline uint32_t get_row_pitch_in_bytes() const { return get_bytes_per_block() * get_blocks_x(); } inline const uint64_vec &get_blocks() const { return m_blocks; } - + inline const uint64_t *get_ptr() const { return &m_blocks[0]; } inline uint64_t *get_ptr() { return &m_blocks[0]; } @@ -101,7 +101,7 @@ namespace basisu } bool unpack(image& img) const; - + void override_dimensions(uint32_t w, uint32_t h) { m_width = w; @@ -119,9 +119,9 @@ namespace basisu // KTX file writing bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector& gpu_images, bool cubemap_flag); - + bool write_compressed_texture_file(const char *pFilename, const basisu::vector& g, bool cubemap_flag); - + inline bool write_compressed_texture_file(const char *pFilename, const gpu_image_vec &g) { basisu::vector a; @@ -130,7 +130,7 @@ namespace basisu } bool write_compressed_texture_file(const char *pFilename, const gpu_image &g); - + bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi); // GPU texture block unpacking @@ -150,5 +150,5 @@ namespace basisu // unpack_block() is primarily intended to unpack texture data created by the transcoder. // For some texture formats (like ETC2 RGB, PVRTC2, FXT1) it's not a complete implementation. bool unpack_block(texture_format fmt, const void *pBlock, color_rgba *pPixels); - + } // namespace basisu diff --git a/encoder/basisu_kernels_imp.h b/encoder/basisu_kernels_imp.h index dcf1ce06..c9e3fa4d 100644 --- a/encoder/basisu_kernels_imp.h +++ b/encoder/basisu_kernels_imp.h @@ -22,7 +22,7 @@ namespace CPPSPMD_NAME(basisu_kernels_namespace) void _call(int64_t* pDistance, const uint8_t* pSelectors, const color_rgba* pBlock_colors, - const color_rgba* pSrc_pixels, uint32_t n, + const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) { assert(early_out_err >= 0); @@ -110,7 +110,7 @@ namespace CPPSPMD_NAME(basisu_kernels_namespace) void _call(int64_t* pDistance, const uint8_t* pSelectors, const color_rgba* pBlock_colors, - const color_rgba* pSrc_pixels, uint32_t n, + const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) { assert(early_out_err >= 0); @@ -205,7 +205,7 @@ namespace CPPSPMD_NAME(basisu_kernels_namespace) void _call(int64_t* pDistance, uint8_t* pSelectors, const color_rgba* pBlock_colors, - const color_rgba* pSrc_pixels, uint32_t n, + const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) { assert(early_out_err >= 0); @@ -301,7 +301,7 @@ namespace CPPSPMD_NAME(basisu_kernels_namespace) void _call(int64_t* pDistance, uint8_t* pSelectors, const color_rgba* pBlock_colors, - const color_rgba* pSrc_pixels, uint32_t n, + const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) { assert(early_out_err >= 0); @@ -397,7 +397,7 @@ namespace CPPSPMD_NAME(basisu_kernels_namespace) void _call(int64_t* pDistance, const color_rgba* pBlock_colors, - const color_rgba* pSrc_pixels, uint32_t n, + const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_error) { assert(early_out_error >= 0); @@ -453,7 +453,7 @@ namespace CPPSPMD_NAME(basisu_kernels_namespace) int id = ((delta_l * delta_l) >> 7) + ((((delta_cr * delta_cr) >> 7) * 26) >> 7) + ((((delta_cb * delta_cb) >> 7) * 3) >> 7); - + if (id < best_err) { best_err = id; @@ -554,10 +554,10 @@ namespace CPPSPMD_NAME(basisu_kernels_namespace) uint32_t num_vecs, const void* pWeighted_vecs_void, const void* pOrigin_void, const uint32_t* pVec_indices, void* pMatrix16x16_void) { const std::pair* pWeighted_vecs = static_cast< const std::pair *>(pWeighted_vecs_void); - + const float* pOrigin = static_cast(pOrigin_void); vfloat org0 = loadu_linear_all(pOrigin), org1 = loadu_linear_all(pOrigin + 4), org2 = loadu_linear_all(pOrigin + 8), org3 = loadu_linear_all(pOrigin + 12); - + vfloat mat[16][4]; vfloat vzero(zero_vfloat()); @@ -577,7 +577,7 @@ namespace CPPSPMD_NAME(basisu_kernels_namespace) vfloat weight((float)pWeighted_vecs[vec_index].second); vfloat vec[4] = { loadu_linear_all(pW) - org0, loadu_linear_all(pW + 4) - org1, loadu_linear_all(pW + 8) - org2, loadu_linear_all(pW + 12) - org3 }; - + vfloat wvec0 = vec[0] * weight, wvec1 = vec[1] * weight, wvec2 = vec[2] * weight, wvec3 = vec[3] * weight; for (uint32_t j = 0; j < 16; j++) diff --git a/encoder/basisu_miniz.h b/encoder/basisu_miniz.h index 18de9972..bc48bfee 100644 --- a/encoder/basisu_miniz.h +++ b/encoder/basisu_miniz.h @@ -1,8 +1,8 @@ /* miniz.c v1.15 - deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt - - Forked from the public domain/unlicense version at: https://code.google.com/archive/p/miniz/ - + + Forked from the public domain/unlicense version at: https://code.google.com/archive/p/miniz/ + Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -497,7 +497,7 @@ size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void // Compresses an image to a compressed PNG file in memory. // On entry: -// pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. +// pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. // The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. // level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL // If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps). @@ -799,7 +799,7 @@ mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) // This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.) mz_uint64 a = 128ULL + (source_len * 110ULL) / 100ULL; mz_uint64 b = 128ULL + (mz_uint64)source_len + ((source_len / (31 * 1024)) + 1ULL) * 5ULL; - + mz_uint64 t = MZ_MAX(a, b); if (((mz_ulong)t) != t) t = (mz_ulong)(-1); @@ -1377,8 +1377,8 @@ tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_nex for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1; s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; } - r->m_check_adler32 = (s2 << 16) + s1; - if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32)) + r->m_check_adler32 = (s2 << 16) + s1; + if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32)) status = TINFL_STATUS_ADLER32_MISMATCH; } return status; diff --git a/encoder/basisu_opencl.cpp b/encoder/basisu_opencl.cpp index 81e3090a..d7f31070 100644 --- a/encoder/basisu_opencl.cpp +++ b/encoder/basisu_opencl.cpp @@ -54,10 +54,10 @@ namespace basisu class ocl { public: - ocl() + ocl() { memset(&m_dev_fp_config, 0, sizeof(m_dev_fp_config)); - + m_ocl_mutex.lock(); m_ocl_mutex.unlock(); } @@ -161,12 +161,12 @@ namespace basisu deinit(); return false; } - + printf("OpenCL init time: %3.3f secs\n", tm.get_elapsed_secs()); return true; } - + bool deinit() { if (m_program) @@ -364,7 +364,7 @@ namespace basisu return obj; } - + bool destroy_buffer(cl_mem buf) { if (buf) @@ -678,7 +678,7 @@ namespace basisu cl_command_queue m_command_queue = nullptr; cl_program m_program = nullptr; cl_device_fp_config m_dev_fp_config; - + bool m_use_mutex = false; std::mutex m_ocl_mutex; @@ -704,7 +704,7 @@ namespace basisu private: ocl* m_p; }; - + cl_image_format get_image_format(uint32_t bytes_per_pixel, bool normalized) { cl_image_format fmt; @@ -721,10 +721,10 @@ namespace basisu return fmt; } }; - + // Library blobal state ocl g_ocl; - + bool opencl_init(bool force_serialization) { if (g_ocl.is_initialized()) @@ -753,11 +753,11 @@ namespace basisu g_ocl.deinit(); return false; } - + pKernel_src = (char*)kernel_src.data(); kernel_src_size = kernel_src.size(); #endif - + if (!kernel_src_size) { ocl_error_printf("opencl_init: Invalid OpenCL kernel source file \"%s\"\n", BASISU_OCL_KERNELS_FILENAME); @@ -771,7 +771,7 @@ namespace basisu g_ocl.deinit(); return false; } - + printf("OpenCL support initialized successfully\n"); return true; @@ -816,10 +816,10 @@ namespace basisu opencl_context* pContext = static_cast(calloc(sizeof(opencl_context), 1)); if (!pContext) return nullptr; - + // To avoid driver bugs in some drivers - serialize this. Likely not necessary, we don't know. // https://community.intel.com/t5/OpenCL-for-CPU/Bug-report-clCreateKernelsInProgram-is-not-thread-safe/td-p/1159771 - + pContext->m_command_queue = g_ocl.create_command_queue(); if (!pContext->m_command_queue) { @@ -890,7 +890,7 @@ namespace basisu g_ocl.destroy_kernel(pContext->m_ocl_refine_endpoint_clusterization_kernel); g_ocl.destroy_command_queue(pContext->m_command_queue); - + memset(pContext, 0, sizeof(opencl_context)); free(pContext); @@ -938,7 +938,7 @@ namespace basisu assert(pContext->m_ocl_pixel_blocks); if (!pContext->m_ocl_pixel_blocks) return false; - + cl_encode_etc1s_param_struct ps; ps.m_total_blocks = pContext->m_ocl_total_pixel_blocks; ps.m_perceptual = perceptual; @@ -948,7 +948,7 @@ namespace basisu cl_mem vars = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue , &ps, sizeof(ps)); cl_mem block_buf = g_ocl.alloc_write_buffer(sizeof(etc_block) * pContext->m_ocl_total_pixel_blocks); - + if (!vars || !block_buf) goto exit; @@ -986,7 +986,7 @@ namespace basisu interval_timer tm; tm.start(); - + cl_encode_etc1s_param_struct ps; ps.m_total_blocks = total_clusters; ps.m_perceptual = perceptual; @@ -1003,7 +1003,7 @@ namespace basisu return false; } } - + cl_mem vars = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue , &ps, sizeof(ps)); cl_mem input_clusters = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pClusters, (size_t)(sizeof(cl_pixel_cluster) * total_clusters)); cl_mem input_pixels = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pPixels, (size_t)(sizeof(color_rgba) * total_pixels)); @@ -1062,7 +1062,7 @@ namespace basisu assert(pContext->m_ocl_pixel_blocks); if (!pContext->m_ocl_pixel_blocks) return false; - + cl_rec_param_struct ps; ps.m_total_blocks = pContext->m_ocl_total_pixel_blocks; ps.m_perceptual = perceptual; @@ -1073,7 +1073,7 @@ namespace basisu cl_mem cluster_info = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pCluster_info, sizeof(cl_endpoint_cluster_struct) * total_clusters); cl_mem sorted_block_indices = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pSorted_block_indices, sizeof(uint32_t) * pContext->m_ocl_total_pixel_blocks); cl_mem output_buf = g_ocl.alloc_write_buffer(sizeof(uint32_t) * pContext->m_ocl_total_pixel_blocks); - + if (!pixel_block_info || !cluster_info || !sorted_block_indices || !output_buf) goto exit; @@ -1087,7 +1087,7 @@ namespace basisu goto exit; debug_printf("opencl_refine_endpoint_clusterization: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); - + status = true; exit: @@ -1121,7 +1121,7 @@ namespace basisu fosc_param_struct ps; ps.m_total_blocks = pContext->m_ocl_total_pixel_blocks; ps.m_perceptual = perceptual; - + bool status = false; cl_mem input_block_info = g_ocl.alloc_and_init_read_buffer(pContext->m_command_queue, pInput_block_info, sizeof(fosc_block_struct) * pContext->m_ocl_total_pixel_blocks); @@ -1192,9 +1192,9 @@ namespace basisu goto exit; debug_printf("opencl_determine_selectors: Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); - + status = true; - + exit: g_ocl.destroy_buffer(input_etc_color5_intens); g_ocl.destroy_buffer(output_blocks); @@ -1202,7 +1202,7 @@ namespace basisu return status; } -#else +#else namespace basisu { // No OpenCL support - all dummy functions that return false; @@ -1269,7 +1269,7 @@ namespace basisu BASISU_NOTE_UNUSED(pPixel_weights); BASISU_NOTE_UNUSED(perceptual); BASISU_NOTE_UNUSED(total_perms); - + return false; } diff --git a/encoder/basisu_opencl.h b/encoder/basisu_opencl.h index 4194a084..51fa75a1 100644 --- a/encoder/basisu_opencl.h +++ b/encoder/basisu_opencl.h @@ -59,7 +59,7 @@ namespace basisu bool opencl_encode_etc1s_pixel_clusters( opencl_context_ptr pContext, - etc_block* pOutput_blocks, + etc_block* pOutput_blocks, uint32_t total_clusters, const cl_pixel_cluster *pClusters, uint64_t total_pixels, @@ -92,7 +92,7 @@ namespace basisu uint32_t total_clusters, const cl_endpoint_cluster_struct *pCluster_info, const uint32_t *pSorted_block_indices, - uint32_t* pOutput_cluster_indices, + uint32_t* pOutput_cluster_indices, bool perceptual); // opencl_find_optimal_selector_clusters_for_each_block diff --git a/encoder/basisu_pvrtc1_4.cpp b/encoder/basisu_pvrtc1_4.cpp index 596fc197..fe2c5917 100644 --- a/encoder/basisu_pvrtc1_4.cpp +++ b/encoder/basisu_pvrtc1_4.cpp @@ -131,7 +131,7 @@ namespace basisu uint32_t pvrtc4_swizzle_uv(uint32_t width, uint32_t height, uint32_t x, uint32_t y) { assert((x < width) && (y < height) && basisu::is_pow2(height) && basisu::is_pow2(width)); - + uint32_t min_d = width, max_v = y; if (height < width) { @@ -148,7 +148,7 @@ namespace basisu } max_v >>= shift_ofs; - + // OR in the rest of the bits from the largest dimension swizzled |= (max_v << (2 * shift_ofs)); @@ -169,7 +169,7 @@ namespace basisu r = (packed >> 10) & 31; g = (packed >> 5) & 31; b = (packed >> 1) & 15; - + if (unpack) { b = (b << 1) | (b >> 3); @@ -198,7 +198,7 @@ namespace basisu { a = (a << 1); a = (a << 4) | a; - + r = (r << 1) | (r >> 3); g = (g << 1) | (g >> 3); b = (b << 2) | (b >> 1); @@ -272,7 +272,7 @@ namespace basisu b = (packed >> 1) & 7; a = a << 1; - + r = (r << 1) | (r >> 3); g = (g << 1) | (g >> 3); b = (b << 2) | (b >> 1); @@ -285,13 +285,13 @@ namespace basisu b = packed & 15; a = a << 1; - + r = (r << 1) | (r >> 3); g = (g << 1) | (g >> 3); b = (b << 1) | (b >> 3); } } - + assert((r < 32) && (g < 32) && (b < 32) && (a < 16)); return color_rgba(r, g, b, a); @@ -305,12 +305,12 @@ namespace basisu int block_x1 = block_x0 + 1; int block_y0 = (static_cast(y) - 2) >> 2; int block_y1 = block_y0 + 1; - + block_x0 = posmod(block_x0, m_block_width); block_x1 = posmod(block_x1, m_block_width); block_y0 = posmod(block_y0, m_block_height); block_y1 = posmod(block_y1, m_block_height); - + pColors[0] = interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); pColors[3] = interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); @@ -334,7 +334,7 @@ namespace basisu return false; } - + color_rgba pvrtc4_image::get_pixel(uint32_t x, uint32_t y, uint32_t m) const { assert((x < m_width) && (y < m_height)); @@ -343,12 +343,12 @@ namespace basisu int block_x1 = block_x0 + 1; int block_y0 = (static_cast(y) - 2) >> 2; int block_y1 = block_y0 + 1; - + block_x0 = posmod(block_x0, m_block_width); block_x1 = posmod(block_x1, m_block_width); block_y0 = posmod(block_y0, m_block_height); block_y1 = posmod(block_y1, m_block_height); - + if (get_block_uses_transparent_modulation(x >> 2, y >> 2)) { if (m == 0) @@ -471,7 +471,7 @@ namespace basisu color_rgba color_1((int)colors[1][0], (int)colors[1][1], (int)colors[1][2], 0); pvrtc4_block cur_blocks[3][3]; - + for (int y = -1; y <= 1; y++) { for (int x = -1; x <= 1; x++) diff --git a/encoder/basisu_pvrtc1_4.h b/encoder/basisu_pvrtc1_4.h index db6985a4..afe71841 100644 --- a/encoder/basisu_pvrtc1_4.h +++ b/encoder/basisu_pvrtc1_4.h @@ -17,14 +17,14 @@ namespace basisu { - enum - { - PVRTC2_MIN_WIDTH = 16, - PVRTC2_MIN_HEIGHT = 8, - PVRTC4_MIN_WIDTH = 8, - PVRTC4_MIN_HEIGHT = 8 + enum + { + PVRTC2_MIN_WIDTH = 16, + PVRTC2_MIN_HEIGHT = 8, + PVRTC4_MIN_WIDTH = 8, + PVRTC4_MIN_HEIGHT = 8 }; - + struct pvrtc4_block { uint32_t m_modulation; @@ -56,9 +56,9 @@ namespace basisu // Returns raw endpoint or 8888 color_rgba get_endpoint(uint32_t endpoint_index, bool unpack) const; - + color_rgba get_endpoint_5554(uint32_t endpoint_index) const; - + static uint32_t get_component_precision_in_bits(uint32_t c, uint32_t endpoint_index, bool opaque_endpoint) { static const uint32_t s_comp_prec[4][4] = @@ -80,7 +80,7 @@ namespace basisu }; return s_color_prec[open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)]; } - + inline uint32_t get_modulation(uint32_t x, uint32_t y) const { assert((x < 4) && (y < 4)); @@ -121,7 +121,7 @@ namespace basisu assert(endpoint_index < 2); const uint32_t m = m_endpoints & 1; uint32_t r = c[0], g = c[1], b = c[2], a = c[3]; - + uint32_t packed; if (opaque_endpoint) @@ -243,7 +243,7 @@ namespace basisu { return m_blocks(bx, by).is_endpoint_opaque(endpoint_index); } - + color_rgba get_endpoint(uint32_t bx, uint32_t by, uint32_t endpoint_index, bool unpack) const { assert((bx < m_block_width) && (by < m_block_height)); @@ -255,12 +255,12 @@ namespace basisu assert((x < m_width) && (y < m_height)); return m_blocks(x >> 2, y >> 2).get_modulation(x & 3, y & 3); } - + // Returns true if the block uses transparent modulation. bool get_interpolated_colors(uint32_t x, uint32_t y, color_rgba* pColors) const; - + color_rgba get_pixel(uint32_t x, uint32_t y, uint32_t m) const; - + inline color_rgba get_pixel(uint32_t x, uint32_t y) const { assert((x < m_width) && (y < m_height)); @@ -445,12 +445,12 @@ namespace basisu return total_error; } - - public: + + public: uint32_t m_width, m_height; pvrtc4_block_vector2D m_blocks; uint32_t m_block_width, m_block_height; - + bool m_uses_alpha; }; diff --git a/encoder/basisu_resample_filters.cpp b/encoder/basisu_resample_filters.cpp index 597cb3f6..1125be87 100644 --- a/encoder/basisu_resample_filters.cpp +++ b/encoder/basisu_resample_filters.cpp @@ -310,21 +310,21 @@ namespace basisu const resample_filter g_resample_filters[] = { - { "box", box_filter, BOX_FILTER_SUPPORT }, - { "tent", tent_filter, TENT_FILTER_SUPPORT }, - { "bell", bell_filter, BELL_SUPPORT }, + { "box", box_filter, BOX_FILTER_SUPPORT }, + { "tent", tent_filter, TENT_FILTER_SUPPORT }, + { "bell", bell_filter, BELL_SUPPORT }, { "b-spline", B_spline_filter, B_SPLINE_SUPPORT }, - { "mitchell", mitchell_filter, MITCHELL_SUPPORT }, - { "blackman", blackman_filter, BLACKMAN_SUPPORT }, + { "mitchell", mitchell_filter, MITCHELL_SUPPORT }, + { "blackman", blackman_filter, BLACKMAN_SUPPORT }, { "lanczos3", lanczos3_filter, LANCZOS3_SUPPORT }, { "lanczos4", lanczos4_filter, LANCZOS4_SUPPORT }, - { "lanczos6", lanczos6_filter, LANCZOS6_SUPPORT }, - { "lanczos12", lanczos12_filter, LANCZOS12_SUPPORT }, - { "kaiser", kaiser_filter, KAISER_SUPPORT }, + { "lanczos6", lanczos6_filter, LANCZOS6_SUPPORT }, + { "lanczos12", lanczos12_filter, LANCZOS12_SUPPORT }, + { "kaiser", kaiser_filter, KAISER_SUPPORT }, { "gaussian", gaussian_filter, GAUSSIAN_SUPPORT }, - { "catmullrom", catmull_rom_filter, CATMULL_ROM_SUPPORT }, - { "quadratic_interp", quadratic_interp_filter, QUADRATIC_SUPPORT }, - { "quadratic_approx", quadratic_approx_filter, QUADRATIC_SUPPORT }, + { "catmullrom", catmull_rom_filter, CATMULL_ROM_SUPPORT }, + { "quadratic_interp", quadratic_interp_filter, QUADRATIC_SUPPORT }, + { "quadratic_approx", quadratic_approx_filter, QUADRATIC_SUPPORT }, { "quadratic_mix", quadratic_mix_filter, QUADRATIC_SUPPORT }, }; diff --git a/encoder/basisu_resampler.cpp b/encoder/basisu_resampler.cpp index f4cedf00..83708d03 100644 --- a/encoder/basisu_resampler.cpp +++ b/encoder/basisu_resampler.cpp @@ -139,7 +139,7 @@ namespace basisu n += (right - left + 1); } - // Allocate memory for contributors. + // Allocate memory for contributors. if ((n == 0) || ((Pcpool = (Contrib*)calloc(n, sizeof(Contrib))) == NULL)) { @@ -840,5 +840,5 @@ namespace basisu else return g_resample_filters[filter_num].name; } - + } // namespace basisu diff --git a/encoder/basisu_ssim.cpp b/encoder/basisu_ssim.cpp index cceb400b..8e8cdea7 100644 --- a/encoder/basisu_ssim.cpp +++ b/encoder/basisu_ssim.cpp @@ -26,7 +26,7 @@ namespace basisu float g = (1.0f / (sqrtf((float)(2.0f * M_PI * sigma_sqr)))) * pow; return g; } - + // size_x/y should be odd void compute_gaussian_kernel(float *pDst, int size_x, int size_y, float sigma_sqr, uint32_t flags) { @@ -316,14 +316,14 @@ namespace basisu return avg; } - + // Reference: https://ece.uwaterloo.ca/~z70wang/research/ssim/index.html vec4F compute_ssim(const imagef &a, const imagef &b) { imagef axb, a_sq, b_sq, mu1, mu2, mu1_sq, mu2_sq, mu1_mu2, s1_sq, s2_sq, s12, smap, t1, t2, t3; const float C1 = 6.50250f, C2 = 58.52250f; - + pow_image(a, a_sq, vec4F(2)); pow_image(b, b_sq, vec4F(2)); mul_image(a, b, axb, vec4F(1.0f)); diff --git a/encoder/basisu_uastc_enc.cpp b/encoder/basisu_uastc_enc.cpp index 271bbc6f..0b104cb9 100644 --- a/encoder/basisu_uastc_enc.cpp +++ b/encoder/basisu_uastc_enc.cpp @@ -227,7 +227,7 @@ namespace basisu default: break; } -#endif +#endif uint32_t total_planes = 1; switch (result.m_uastc_mode) @@ -457,7 +457,7 @@ namespace basisu printf("Total bits: %u, endpoint bits: %u, weight bits: %u\n", block_bit_offset, total_endpoint_bits, total_weight_bits); #endif } - + // MODE 0 // 0. DualPlane: 0, WeightRange: 8 (16), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 19 (192) MODE6 RGB // 18. DualPlane: 0, WeightRange: 11 (32), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 11 (32) MODE6 RGB @@ -508,7 +508,7 @@ namespace basisu astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; - + bool invert = false; if (pForce_selectors == nullptr) @@ -1129,7 +1129,7 @@ namespace basisu } // common_pattern } - // MODE 5 + // MODE 5 // DualPlane: 0, WeightRange: 5 (8), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 20 (256) BC7 MODE 6 (or MODE 1 1-subset) static void astc_mode5(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) { @@ -1260,7 +1260,7 @@ namespace basisu ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0]; uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &comp_params); - + color_cell_compressor_params ccell_params_a; memset(&ccell_params_a, 0, sizeof(ccell_params_a)); @@ -1417,9 +1417,9 @@ namespace basisu for (uint32_t x = 0; x < 4; x++) { const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k); -#ifdef _DEBUG +#ifdef _DEBUG assert((int)astc_part == astc_compute_texel_partition(astc_pattern, x, y, 0, 2, true)); -#endif +#endif part_pixel_index[y][x] = num_part_pixels[astc_part]; part_pixels[astc_part][num_part_pixels[astc_part]++] = block[y][x]; @@ -1584,7 +1584,7 @@ namespace basisu } #endif } - + // 9. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 12 (RGBA Direct), EndpointRange: 8 (16) - BC7 MODE 7 // 16. DualPlane: 0, WeightRange : 2 (4), Subsets : 2, CEM: 4 (LA Direct), EndpointRange : 20 (256) - BC7 MODE 7 static void astc_mode9_or_16(uint32_t mode, const color_rgba source_block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, uint32_t estimate_partition_list_size) @@ -2500,7 +2500,7 @@ namespace basisu total_results++; } } - + static void compute_block_error(const color_rgba block[4][4], const color_rgba decoded_block[4][4], uint64_t &total_rgb_err, uint64_t &total_rgba_err, uint64_t &total_la_err) { uint64_t total_err_r = 0, total_err_g = 0, total_err_b = 0, total_err_a = 0; @@ -2547,14 +2547,14 @@ namespace basisu color_rgba tblock_hint0_bc1[4][4]; color_rgba tblock_hint1_bc1[4][4]; - + etc_block etc1_blk; memset(&etc1_blk, 0, sizeof(etc1_blk)); eac_a8_block etc2_blk; memset(&etc2_blk, 0, sizeof(etc2_blk)); etc2_blk.m_multiplier = 1; - + // Pack to UASTC, then unpack, because the endpoints may be swapped. uastc_block temp_ublock; @@ -2562,7 +2562,7 @@ namespace basisu unpacked_uastc_block temp_ublock_unpacked; unpack_uastc(temp_ublock, temp_ublock_unpacked, false); - + unpacked_uastc_block ublock; memset(&ublock, 0, sizeof(ublock)); ublock.m_mode = best_results.m_uastc_mode; @@ -2591,7 +2591,7 @@ namespace basisu else { transcode_uastc_to_bc1_hint0(ublock, &b); - + unpack_block(texture_format::cBC1, &b, &tblock_hint0_bc1[0][0]); } @@ -2613,7 +2613,7 @@ namespace basisu const float err_thresh0 = 1.075f; const float err_thresh1 = 1.075f; - + if ((g_uastc_mode_has_bc1_hint0[best_mode]) && (t_err_hint0 <= t_err * err_thresh0)) bc1_hint0 = true; @@ -2780,7 +2780,7 @@ namespace basisu uint32_t first_flip = 0, last_flip = 2; uint32_t first_individ = 0, last_individ = 2; - + if (flags & cPackUASTCETC1DisableFlipAndIndividual) { last_flip = 1; @@ -2792,7 +2792,7 @@ namespace basisu first_flip = 1; last_flip = first_flip + 1; } - + for (uint32_t flip = first_flip; flip < last_flip; flip++) { trial_block.set_flip_bit(flip != 0); @@ -2800,7 +2800,7 @@ namespace basisu for (uint32_t individ = first_individ; individ < last_individ; individ++) { const uint32_t mul = individ ? 15 : 31; - + trial_block.set_diff_bit(individ == 0); color_rgba unbiased_block_colors[2]; @@ -2816,7 +2816,7 @@ namespace basisu { const etc_coord2 &c = g_etc1_pixel_coords[flip][subset][j]; const color_rgba& p = decoded_uastc_block[c.m_y][c.m_x]; - + avg_color[0] += p.r; avg_color[1] += p.g; avg_color[2] += p.b; @@ -2834,13 +2834,13 @@ namespace basisu unbiased_block_colors[subset][1] = (uint8_t)((avg_color[1] * mul + 1020) / (8 * 255)); unbiased_block_colors[subset][2] = (uint8_t)((avg_color[2] * mul + 1020) / (8 * 255)); unbiased_block_colors[subset][3] = 0; - + } // subset - + for (uint32_t bias_iter = 0; bias_iter < last_bias; bias_iter++) { const uint32_t bias = use_faster_bias_mode_table ? s_sorted_bias_modes[bias_iter] : bias_iter; - + color_rgba block_colors[2]; for (uint32_t subset = 0; subset < 2; subset++) block_colors[subset] = has_bias ? apply_etc1_bias((color32&)unbiased_block_colors[subset], bias, mul, subset) : unbiased_block_colors[subset]; @@ -2874,7 +2874,7 @@ namespace basisu uint64_t best_subset_err = UINT64_MAX; const uint32_t inten_table_limit = (level == cPackUASTCLevelVerySlow) ? 8 : ((range[subset] > 51) ? 8 : (range[subset] >= 7 ? 4 : 2)); - + for (uint32_t inten_table = 0; inten_table < inten_table_limit; inten_table++) { trial_block.set_inten_table(subset, inten_table); @@ -3009,7 +3009,7 @@ namespace basisu uint32_t m_table; uint32_t m_multiplier; }; - + static uint64_t uastc_pack_eac_a8(uastc_pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask) { assert(num_pixels <= 16); @@ -3153,7 +3153,7 @@ namespace basisu solid_results.m_common_pattern = 0; solid_results.m_solid_color = first_color; memset(&solid_results.m_astc, 0, sizeof(solid_results.m_astc)); - + etc_block etc1_blk; uint32_t etc1_bias = 0; @@ -3169,16 +3169,16 @@ namespace basisu return; } - + int level = flags & 7; const bool favor_uastc_error = (flags & cPackUASTCFavorUASTCError) != 0; const bool favor_bc7_error = !favor_uastc_error && ((flags & cPackUASTCFavorBC7Error) != 0); //const bool etc1_perceptual = true; - + uastc_encode_results results[MAX_ENCODE_RESULTS]; - + level = clampi(level, cPackUASTCLevelFastest, cPackUASTCLevelVerySlow); - + // Set all options to slowest, then configure from there depending on the selected level. uint32_t mode_mask = UINT32_MAX; uint32_t uber_level = 6; @@ -3189,12 +3189,12 @@ namespace basisu uint32_t least_squares_passes = 2; bool bc1_hints = true; bool only_use_la_on_transparent_blocks = false; - + switch (level) { case cPackUASTCLevelFastest: { - mode_mask = (1 << 0) | (1 << 8) | + mode_mask = (1 << 0) | (1 << 8) | (1 << 11) | (1 << 12) | (1 << 15); always_try_alpha_modes = false; @@ -3220,7 +3220,7 @@ namespace basisu estimate_partition = true; break; } - case cPackUASTCLevelDefault: + case cPackUASTCLevelDefault: { mode_mask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 6) | (1 << 8) | (1 << 9) | (1 << 10) | (1 << 11) | (1 << 12) | (1 << 13) | @@ -3258,9 +3258,9 @@ namespace basisu // HACK HACK //mode_mask &= ~(1 << 18); //mode_mask = (1 << 18)| (1 << 10); - + uint32_t total_results = 0; - + if (only_use_la_on_transparent_blocks) { if ((is_la) && (!has_alpha)) @@ -3268,7 +3268,7 @@ namespace basisu } const bool try_alpha_modes = has_alpha || always_try_alpha_modes; - + bc7enc_compress_block_params comp_params; memset(&comp_params, 0, sizeof(comp_params)); comp_params.m_max_partitions_mode1 = 64; @@ -3343,7 +3343,7 @@ namespace basisu } assert(total_results); - + // Fix up the errors so we consistently have LA, RGB, or RGBA error. for (uint32_t i = 0; i < total_results; i++) { @@ -3377,7 +3377,7 @@ namespace basisu } } } - + unpacked_uastc_block unpacked_ublock; memset(&unpacked_ublock, 0, sizeof(unpacked_ublock)); @@ -3544,7 +3544,7 @@ namespace basisu const uastc_encode_results& best_results = results[best_index]; const uint32_t best_mode = best_results.m_uastc_mode; const astc_block_desc& best_astc_results = best_results.m_astc; - + color_rgba decoded_uastc_block[4][4]; bool success = unpack_uastc(best_mode, best_results.m_common_pattern, best_results.m_solid_color.get_color32(), best_astc_results, (basist::color32 *)&decoded_uastc_block[0][0], false); (void)success; @@ -3562,7 +3562,7 @@ namespace basisu basist::uastc_block temp_block; pack_uastc(temp_block, best_results, etc1_blk, 0, etc_eac_a8_blk, false, false); - + basist::color32 temp_block_unpacked[4][4]; success = basist::unpack_uastc(temp_block, (basist::color32 *)temp_block_unpacked, false); VALIDATE(success); @@ -3570,7 +3570,7 @@ namespace basisu #if BASISU_USE_ASTC_DECOMPRESS // Now round trip to packed ASTC and back, then decode to pixels. uint32_t astc_data[4]; - + if (best_results.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR) pack_astc_solid_block(astc_data, (color32 &)best_results.m_solid_color); else @@ -3588,7 +3588,7 @@ namespace basisu for (uint32_t x = 0; x < 4; x++) { VALIDATE(decoded_astc_block[y][x] == decoded_uastc_block[y][x]); - + VALIDATE(temp_block_unpacked[y][x].c[0] == decoded_uastc_block[y][x].r); VALIDATE(temp_block_unpacked[y][x].c[1] == decoded_uastc_block[y][x].g); VALIDATE(temp_block_unpacked[y][x].c[2] == decoded_uastc_block[y][x].b); @@ -3603,7 +3603,7 @@ namespace basisu bool bc1_hint0 = false, bc1_hint1 = false; if (bc1_hints) compute_bc1_hints(bc1_hint0, bc1_hint1, best_results, block, decoded_uastc_block); - + eac_a8_block eac_a8_blk; if ((g_uastc_mode_has_alpha[best_mode]) && (best_mode != UASTC_MODE_INDEX_SOLID_COLOR)) { @@ -3615,7 +3615,7 @@ namespace basisu uastc_pack_eac_a8_results eac8_a8_results; memset(&eac8_a8_results, 0, sizeof(eac8_a8_results)); uastc_pack_eac_a8(eac8_a8_results, decoded_uastc_block_alpha, 16, 0, eac_a8_mul_search_rad, eac_a8_table_mask); - + // All we care about for hinting is the table and multiplier. eac_a8_blk.m_table = eac8_a8_results.m_table; eac_a8_blk.m_multiplier = eac8_a8_results.m_multiplier; @@ -3839,8 +3839,8 @@ namespace basisu uint64_t m_total; uint64_t m_total2; }; - - static bool uastc_rdo_blocks(uint32_t first_index, uint32_t last_index, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params& params, uint32_t flags, + + static bool uastc_rdo_blocks(uint32_t first_index, uint32_t last_index, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params& params, uint32_t flags, uint32_t &total_skipped, uint32_t &total_refined, uint32_t &total_modified, uint32_t &total_smooth) { debug_printf("uastc_rdo_blocks: Processing blocks %u to %u\n", first_index, last_index); @@ -3849,7 +3849,7 @@ namespace basisu const bool perceptual = false; std::unordered_map selector_history; - + for (uint32_t block_index = first_index; block_index < last_index; block_index++) { const basist::uastc_block& blk = pBlocks[block_index]; @@ -3899,7 +3899,7 @@ namespace basisu color_rgba decoded_b7_blk[4][4]; unpack_block(texture_format::cBC7, &b7_block, &decoded_b7_blk[0][0]); - + uint64_t bc7_err = 0; for (uint32_t i = 0; i < 16; i++) bc7_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_b7_blk)[i], true); @@ -3954,7 +3954,7 @@ namespace basisu float best_t = cur_ms_err * smooth_block_error_scale + cur_bits * params.m_lambda; - // Now scan through previous blocks, insert their selector bit patterns into the current block, and find + // Now scan through previous blocks, insert their selector bit patterns into the current block, and find // selector bit patterns which don't increase the overall block error too much. for (int prev_block_index = last_block_to_check; prev_block_index >= first_block_to_check; --prev_block_index) { @@ -4076,7 +4076,7 @@ namespace basisu color_rgba decoded_trial_uastc_block[4][4]; bool success = unpack_uastc(results.m_uastc_mode, results.m_common_pattern, results.m_solid_color.get_color32(), results.m_astc, (basist::color32*) & decoded_trial_uastc_block[0][0], false); assert(success); - + BASISU_NOTE_UNUSED(success); uint64_t trial_uastc_err = 0; @@ -4103,7 +4103,7 @@ namespace basisu // Write the modified block pBlocks[block_index] = best_block; - + } // if (best_block_index != block_index) { @@ -4119,8 +4119,8 @@ namespace basisu return true; } - - // This function implements a basic form of rate distortion optimization (RDO) for UASTC. + + // This function implements a basic form of rate distortion optimization (RDO) for UASTC. // It only changes selectors and then updates the hints. It uses very approximate LZ bitprice estimation. // There's A LOT that can be done better in here, but it's a start. // One nice advantage of the method used here is that it works for any input, no matter which or how many modes it uses. @@ -4161,7 +4161,7 @@ namespace basisu { std::lock_guard lck(stat_mutex); - + all_succeeded = all_succeeded && status; total_skipped += job_skipped; total_modified += job_modified; @@ -4184,7 +4184,7 @@ namespace basisu } debug_printf("uastc_rdo: Total modified: %3.2f%%, total skipped: %3.2f%%, total refined: %3.2f%%, total smooth: %3.2f%%\n", total_modified * 100.0f / num_blocks, total_skipped * 100.0f / num_blocks, total_refined * 100.0f / num_blocks, total_smooth * 100.0f / num_blocks); - + return status; } } // namespace basisu diff --git a/encoder/basisu_uastc_enc.h b/encoder/basisu_uastc_enc.h index ba39a558..bbc01659 100644 --- a/encoder/basisu_uastc_enc.h +++ b/encoder/basisu_uastc_enc.h @@ -25,15 +25,15 @@ namespace basisu { // Fastest is the lowest quality, although it's stil substantially higher quality vs. BC1/ETC1. It supports 5 modes. // The output may be somewhat blocky because this setting doesn't support 2/3-subset UASTC modes, but it should be less blocky vs. BC1/ETC1. - // This setting doesn't write BC1 hints, so BC1 transcoding will be slower. + // This setting doesn't write BC1 hints, so BC1 transcoding will be slower. // Transcoded ETC1 quality will be lower because it only considers 2 hints out of 32. // Avg. 43.45 dB cPackUASTCLevelFastest = 0, - + // Faster is ~3x slower than fastest. It supports 9 modes. // Avg. 46.49 dB cPackUASTCLevelFaster = 1, - + // Default is ~5.5x slower than fastest. It supports 14 modes. // Avg. 47.47 dB cPackUASTCLevelDefault = 2, @@ -42,7 +42,7 @@ namespace basisu // Avg. 48.01 dB cPackUASTCLevelSlower = 3, - // VerySlow is ~200x slower than fastest. + // VerySlow is ~200x slower than fastest. // The best quality the codec is capable of, but you'll need to be patient or have a lot of cores. // Avg. 48.24 dB cPackUASTCLevelVerySlow = 4, @@ -53,13 +53,13 @@ namespace basisu // These flags allow you to favor only optimizing for lowest UASTC error, or lowest BC7 error. cPackUASTCFavorUASTCError = 8, cPackUASTCFavorBC7Error = 16, - + cPackUASTCETC1FasterHints = 64, cPackUASTCETC1FastestHints = 128, cPackUASTCETC1DisableFlipAndIndividual = 256, - + // Favor UASTC modes 0 and 10 more than the others (this is experimental, it's useful for RDO compression) - cPackUASTCFavorSimplerModes = 512, + cPackUASTCFavorSimplerModes = 512, }; // pRGBAPixels: Pointer to source 4x4 block of RGBA pixels (R first in memory). @@ -75,18 +75,18 @@ namespace basisu color_rgba m_solid_color; uint64_t m_astc_err; }; - + void pack_uastc(basist::uastc_block& blk, const uastc_encode_results& result, const etc_block& etc1_blk, uint32_t etc1_bias, const eac_a8_block& etc_eac_a8_blk, bool bc1_hint0, bool bc1_hint1); const uint32_t UASCT_RDO_DEFAULT_LZ_DICT_SIZE = 4096; const float UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO = 10.0f; const float UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH = 8.0f; - + // The RDO encoder computes a smoothness factor, from [0,1], for each block. To do this it computes each block's maximum component variance, then it divides this by this factor and clamps the result. // Larger values will result in more blocks being protected from too much distortion. const float UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV = 18.0f; - + // The RDO encoder can artifically boost the error of smooth blocks, in order to suppress distortions on smooth areas of the texture. // The encoder will use this value as the maximum error scale to use on smooth blocks. The larger this value, the better smooth bocks will look. Set to 1.0 to disable this completely. const float UASTC_RDO_DEFAULT_SMOOTH_BLOCK_MAX_ERROR_SCALE = 10.0f; @@ -106,30 +106,30 @@ namespace basisu m_skip_block_rms_thresh = UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH; m_endpoint_refinement = true; m_lz_literal_cost = 100; - + m_max_smooth_block_std_dev = UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV; m_smooth_block_max_error_scale = UASTC_RDO_DEFAULT_SMOOTH_BLOCK_MAX_ERROR_SCALE; } - + // m_lz_dict_size: Size of LZ dictionary to simulate in bytes. The larger this value, the slower the encoder but the higher the quality per LZ compressed bit. uint32_t m_lz_dict_size; // m_lambda: The post-processor tries to reduce distortion+rate*lambda (rate is approximate LZ bits and distortion is scaled MS error). // Larger values push the postprocessor towards optimizing more for lower rate, and smaller values more for distortion. 0=minimal distortion. float m_lambda; - + // m_max_allowed_rms_increase_ratio: How much the RMS error of a block is allowed to increase before a trial is rejected. 1.0=no increase allowed, 1.05=5% increase allowed, etc. float m_max_allowed_rms_increase_ratio; - - // m_skip_block_rms_thresh: Blocks with this much RMS error or more are completely skipped by the RDO encoder. + + // m_skip_block_rms_thresh: Blocks with this much RMS error or more are completely skipped by the RDO encoder. float m_skip_block_rms_thresh; - // m_endpoint_refinement: If true, the post-process will attempt to refine the endpoints of blocks with modified selectors. + // m_endpoint_refinement: If true, the post-process will attempt to refine the endpoints of blocks with modified selectors. bool m_endpoint_refinement; float m_max_smooth_block_std_dev; float m_smooth_block_max_error_scale; - + uint32_t m_lz_literal_cost; }; diff --git a/encoder/cppspmd_flow.h b/encoder/cppspmd_flow.h index f6930476..cbb756fe 100644 --- a/encoder/cppspmd_flow.h +++ b/encoder/cppspmd_flow.h @@ -48,7 +48,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_return() m_kernel_exec = andnot(m_exec, m_kernel_exec); m_exec = exec_mask::all_off(); } - + template CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_unmasked(const UnmaskedBody& unmaskedBody) { @@ -61,7 +61,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_unmasked(const UnmaskedBody& unmaske m_kernel_exec = m_kernel_exec & orig_kernel_exec; m_exec = m_exec & orig_exec; - + check_masks(); } @@ -69,9 +69,9 @@ struct scoped_unmasked_restorer { spmd_kernel *m_pKernel; exec_mask m_orig_exec, m_orig_kernel_exec; - - CPPSPMD_FORCE_INLINE scoped_unmasked_restorer(spmd_kernel *pKernel) : - m_pKernel(pKernel), + + CPPSPMD_FORCE_INLINE scoped_unmasked_restorer(spmd_kernel *pKernel) : + m_pKernel(pKernel), m_orig_exec(pKernel->m_exec), m_orig_kernel_exec(pKernel->m_kernel_exec) { @@ -79,15 +79,15 @@ struct scoped_unmasked_restorer pKernel->m_exec = exec_mask::all_on(); } - CPPSPMD_FORCE_INLINE ~scoped_unmasked_restorer() - { + CPPSPMD_FORCE_INLINE ~scoped_unmasked_restorer() + { m_pKernel->m_kernel_exec = m_pKernel->m_kernel_exec & m_orig_kernel_exec; m_pKernel->m_exec = m_pKernel->m_exec & m_orig_exec; m_pKernel->check_masks(); } }; -#define SPMD_UNMASKED_BEGIN { scoped_unmasked_restorer _unmasked_restorer(this); +#define SPMD_UNMASKED_BEGIN { scoped_unmasked_restorer _unmasked_restorer(this); #define SPMD_UNMASKED_END } #if 0 @@ -113,9 +113,9 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_if_break(const vbool& cond) #ifdef _DEBUG assert(m_in_loop); #endif - + exec_mask cond_exec(cond); - + m_exec = andnot(m_exec & cond_exec, m_exec); check_masks(); @@ -157,7 +157,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_sifelse(const vbool& cond, const IfB m_exec = em; elseBody(); } - + m_exec = orig_exec; } @@ -165,7 +165,7 @@ template CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_if(const vbool& cond, const IfBody& ifBody) { exec_mask cond_exec(cond); - + exec_mask pre_if_exec = cond_exec & m_exec; if (any(pre_if_exec)) @@ -188,7 +188,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_ifelse(const vbool& cond, const IfBo bool all_flag = false; exec_mask cond_exec(cond); - + { exec_mask pre_if_exec = cond_exec & m_exec; @@ -290,17 +290,17 @@ struct scoped_exec_restorer2 { spmd_kernel *m_pKernel; exec_mask m_unexecuted_lanes; - - CPPSPMD_FORCE_INLINE scoped_exec_restorer2(spmd_kernel *pKernel, const vbool &cond) : + + CPPSPMD_FORCE_INLINE scoped_exec_restorer2(spmd_kernel *pKernel, const vbool &cond) : m_pKernel(pKernel) - { + { exec_mask cond_exec(cond); m_unexecuted_lanes = andnot(cond_exec, pKernel->m_exec); pKernel->m_exec = cond_exec & pKernel->m_exec; } - CPPSPMD_FORCE_INLINE ~scoped_exec_restorer2() - { + CPPSPMD_FORCE_INLINE ~scoped_exec_restorer2() + { m_pKernel->m_exec = m_pKernel->m_exec | m_unexecuted_lanes; m_pKernel->check_masks(); } @@ -327,17 +327,17 @@ class scoped_exec_saver inline scoped_exec_saver(spmd_kernel *pKernel) : m_exec(pKernel->m_exec), m_kernel_exec(pKernel->m_kernel_exec), m_continue_mask(pKernel->m_continue_mask), m_pKernel(pKernel) - { + { #ifdef _DEBUG m_in_loop = pKernel->m_in_loop; #endif } - + inline ~scoped_exec_saver() - { - m_pKernel->m_exec = m_exec; - m_pKernel->m_continue_mask = m_continue_mask; - m_pKernel->m_kernel_exec = m_kernel_exec; + { + m_pKernel->m_exec = m_exec; + m_pKernel->m_continue_mask = m_continue_mask; + m_pKernel->m_kernel_exec = m_kernel_exec; #ifdef _DEBUG m_pKernel->m_in_loop = m_in_loop; m_pKernel->check_masks(); @@ -353,7 +353,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_foreach(int begin, int end, const Fo { if (begin == end) return; - + if (!any(m_exec)) return; @@ -362,12 +362,12 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_foreach(int begin, int end, const Fo std::swap(begin, end); exec_mask prev_continue_mask = m_continue_mask, prev_exec = m_exec; - + int total_full = (end - begin) / PROGRAM_COUNT; int total_partial = (end - begin) % PROGRAM_COUNT; lint_t loop_index = begin + program_index; - + const int total_loops = total_full + (total_partial ? 1 : 0); m_continue_mask = exec_mask::all_off(); @@ -390,7 +390,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_foreach(int begin, int end, const Fo m_continue_mask = exec_mask::all_off(); check_masks(); - + store_all(loop_index, loop_index + PROGRAM_COUNT); } @@ -443,9 +443,9 @@ struct scoped_while_restorer #ifdef _DEBUG bool m_prev_in_loop; #endif - - CPPSPMD_FORCE_INLINE scoped_while_restorer(spmd_kernel *pKernel) : - m_pKernel(pKernel), + + CPPSPMD_FORCE_INLINE scoped_while_restorer(spmd_kernel *pKernel) : + m_pKernel(pKernel), m_orig_exec(pKernel->m_exec), m_orig_continue_mask(pKernel->m_continue_mask) { @@ -457,8 +457,8 @@ struct scoped_while_restorer #endif } - CPPSPMD_FORCE_INLINE ~scoped_while_restorer() - { + CPPSPMD_FORCE_INLINE ~scoped_while_restorer() + { m_pKernel->m_exec = m_orig_exec & m_pKernel->m_kernel_exec; m_pKernel->m_continue_mask = m_orig_continue_mask; #ifdef _DEBUG @@ -514,7 +514,7 @@ struct scoped_simple_while_restorer m_pKernel(pKernel), m_orig_exec(pKernel->m_exec) { - + #ifdef _DEBUG m_prev_in_loop = pKernel->m_in_loop; pKernel->m_in_loop = true; @@ -536,18 +536,18 @@ struct scoped_simple_while_restorer #define SPMD_SWHILE(cond) { scoped_simple_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \ while(true) { \ exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; -#define SPMD_SWEND } } +#define SPMD_SWEND } } // Cannot use SPMD break, continue, or return inside simple do #define SPMD_SDO { scoped_simple_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); while(true) { -#define SPMD_SEND_DO(cond) exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; } } +#define SPMD_SEND_DO(cond) exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; } } #undef SPMD_FOR #undef SPMD_END_FOR #define SPMD_FOR(for_init, for_cond) { for_init; scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); while(true) { exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(for_cond)); \ m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; #define SPMD_END_FOR(for_inc) m_exec = m_exec | m_continue_mask; m_continue_mask = exec_mask::all_off(); check_masks(); for_inc; } } - + template CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_for(const ForInitBody& forInitBody, const ForCondBody& forCondBody, const ForIncrBody& forIncrBody, const ForBody& forBody) { @@ -576,7 +576,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_for(const ForInitBody& forInitBody, m_exec = m_exec | m_continue_mask; m_continue_mask = exec_mask::all_off(); check_masks(); - + forIncrBody(); } diff --git a/encoder/cppspmd_math.h b/encoder/cppspmd_math.h index e7b3202b..0b46d6c6 100644 --- a/encoder/cppspmd_math.h +++ b/encoder/cppspmd_math.h @@ -15,21 +15,21 @@ // limitations under the License. // The general goal of these vectorized estimated math functions is scalability/performance. -// There are explictly no checks NaN's/Inf's on the input arguments. There are no assertions either. -// These are fast estimate functions - if you need more than that, use stdlib. Please do a proper +// There are explictly no checks NaN's/Inf's on the input arguments. There are no assertions either. +// These are fast estimate functions - if you need more than that, use stdlib. Please do a proper // engineering analysis before relying on them. // I have chosen functions written by others, ported them to CppSPMD, then measured their abs/rel errors. // I compared each to the ones in DirectXMath and stdlib's for accuracy/performance. -CPPSPMD_FORCE_INLINE vfloat fmod_inv(const vfloat& a, const vfloat& b, const vfloat& b_inv) -{ - vfloat c = frac(abs(a * b_inv)) * abs(b); - return spmd_ternaryf(a < 0, -c, c); +CPPSPMD_FORCE_INLINE vfloat fmod_inv(const vfloat& a, const vfloat& b, const vfloat& b_inv) +{ + vfloat c = frac(abs(a * b_inv)) * abs(b); + return spmd_ternaryf(a < 0, -c, c); } -CPPSPMD_FORCE_INLINE vfloat fmod_inv_p(const vfloat& a, const vfloat& b, const vfloat& b_inv) -{ - return frac(a * b_inv) * b; +CPPSPMD_FORCE_INLINE vfloat fmod_inv_p(const vfloat& a, const vfloat& b, const vfloat& b_inv) +{ + return frac(a * b_inv) * b; } // Avoids dividing by zero or very small values. @@ -87,13 +87,13 @@ inline vfloat spmd_kernel::log2_est(vfloat v) vint greater = ux1_i & 0x00400000; // true if signif > 1.5 SPMD_SIF(greater != 0) { - // signif >= 1.5 so need to divide by 2. Accomplish this by stuffing exp = 126 which corresponds to an exponent of -1 + // signif >= 1.5 so need to divide by 2. Accomplish this by stuffing exp = 126 which corresponds to an exponent of -1 store_all(ux2_i, (ux1_i & 0x007FFFFF) | 0x3f000000); store_all(ux2_f, cast_vint_to_vfloat(ux2_i)); // 126 instead of 127 compensates for division by 2 - store_all(fexp, vfloat(exp - 126)); + store_all(fexp, vfloat(exp - 126)); } SPMD_SELSE(greater != 0) { @@ -113,9 +113,9 @@ inline vfloat spmd_kernel::log2_est(vfloat v) vfloat xm1 = signif; vfloat xm1sqr = xm1 * xm1; - + return fexp + ((a * (xm1sqr * xm1) + b * xm1sqr + c * xm1) / (xm1sqr + d * xm1 + e)); - + // fma lowers accuracy for SSE4.1 - no idea why (compiler reordering?) //return fexp + ((vfma(a, (xm1sqr * xm1), vfma(b, xm1sqr, c * xm1))) / (xm1sqr + vfma(d, xm1, e))); } @@ -130,15 +130,15 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::reduce_expb(vfloat& arg, vfloat& two_int_ { // Assume we're using equation (2) store_all(adjustment, 0); - + // integer part of the input argument vint int_arg = (vint)arg; - + // if frac(arg) is in [0.5, 1.0]... - SPMD_SIF((arg - int_arg) > 0.5f) + SPMD_SIF((arg - int_arg) > 0.5f) { store(adjustment, 1); - + // then change it to [0.0, 0.5] store(arg, arg - 0.5f); } @@ -146,17 +146,17 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::reduce_expb(vfloat& arg, vfloat& two_int_ // arg == just the fractional part store_all(arg, arg - (vfloat)int_arg); - - // Now compute 2** (int) arg. + + // Now compute 2** (int) arg. store_all(int_arg, min(int_arg + 127, 254)); - + store_all(two_int_a, cast_vint_to_vfloat(VINT_SHIFT_LEFT(int_arg, 23))); } /* clang 9.0.0 for win /fp:precise release f range : -50.0000000000000000 49.9999940395355225, vals : 16777216 - + exp2_est(): Total passed near - zero check : 16777216 Total sign diffs : 0 @@ -164,7 +164,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::reduce_expb(vfloat& arg, vfloat& two_int_ max rel err: 0.0000015642030031 avg abs err: 10793794.4007573910057545 avg rel err: 0.0000003890893282 - + XMVectorExp2(): Total passed near-zero check: 16777216 Total sign diffs: 0 @@ -191,11 +191,11 @@ inline vfloat spmd_kernel::exp2_est(vfloat arg) const vfloat P01 = +0.0576900723731f; const vfloat Q00 = +20.8189237930062f; const vfloat Q01 = +1.0f; - const vfloat sqrt2 = 1.4142135623730950488f; // sqrt(2) for scaling + const vfloat sqrt2 = 1.4142135623730950488f; // sqrt(2) for scaling vfloat result = 0.0f; - // Return 0 if arg is too large. + // Return 0 if arg is too large. // We're not introducing inf/nan's into calculations, or risk doing so by returning huge default values. SPMD_IF(abs(arg) > 126.0f) { @@ -204,13 +204,13 @@ inline vfloat spmd_kernel::exp2_est(vfloat arg) SPMD_END_IF // 2**(int(a)) - vfloat two_int_a; - + vfloat two_int_a; + // set to 1 by reduce_expb vint adjustment; - + // 0 if arg is +; 1 if negative - vint negative = 0; + vint negative = 0; // If the input is negative, invert it. At the end we'll take the reciprocal, since n**(-1) = 1/(n**x). SPMD_SIF(arg < 0.0f) @@ -232,15 +232,15 @@ inline vfloat spmd_kernel::exp2_est(vfloat arg) // Q(x**2) vfloat Q = vfma(Q01, (arg * arg), Q00); - + // x*P(x**2) vfloat x_P = arg * (vfma(P01, arg * arg, P00)); - + vfloat answer = (Q + x_P) / (Q - x_P); // Now correct for the scaling factor of 2**(int(a)) store_all(answer, answer * two_int_a); - + // If the result had a fractional part > 0.5, correct for that store_all(answer, spmd_ternaryf(adjustment != 0, answer * sqrt2, answer)); @@ -295,35 +295,35 @@ inline vfloat spmd_kernel::sincos_est_a(vfloat a, bool sin_flag) store_all(r1_x, sin_flag ? vfms(c1_w, a, c1_x) : c1_w * a); - store_all(r1_y, frac(r1_x)); - - store_all(r2_x, (vfloat)(r1_y < c1_x)); + store_all(r1_y, frac(r1_x)); + + store_all(r2_x, (vfloat)(r1_y < c1_x)); - store_all(r2_y, (vfloat)(r1_y >= c1_y)); - store_all(r2_z, (vfloat)(r1_y >= c1_z)); + store_all(r2_y, (vfloat)(r1_y >= c1_y)); + store_all(r2_z, (vfloat)(r1_y >= c1_z)); store_all(r2_y, vfma(r2_x, c4_z, vfma(r2_y, c4_w, r2_z * c4_z))); - store_all(r0_x, c0_x - r1_y); - store_all(r0_y, c0_y - r1_y); - store_all(r0_z, c0_z - r1_y); - + store_all(r0_x, c0_x - r1_y); + store_all(r0_y, c0_y - r1_y); + store_all(r0_z, c0_z - r1_y); + store_all(r0_x, r0_x * r0_x); store_all(r0_y, r0_y * r0_y); store_all(r0_z, r0_z * r0_z); - store_all(r1_x, vfma(c2_x, r0_x, c2_z)); - store_all(r1_y, vfma(c2_y, r0_y, c2_w)); - store_all(r1_z, vfma(c2_x, r0_z, c2_z)); - + store_all(r1_x, vfma(c2_x, r0_x, c2_z)); + store_all(r1_y, vfma(c2_y, r0_y, c2_w)); + store_all(r1_z, vfma(c2_x, r0_z, c2_z)); + store_all(r1_x, vfma(r1_x, r0_x, c3_x)); store_all(r1_y, vfma(r1_y, r0_y, c3_y)); store_all(r1_z, vfma(r1_z, r0_z, c3_x)); - + store_all(r1_x, vfma(r1_x, r0_x, c3_z)); store_all(r1_y, vfma(r1_y, r0_y, c3_w)); store_all(r1_z, vfma(r1_z, r0_z, c3_z)); - + store_all(r1_x, vfma(r1_x, r0_x, c4_x)); store_all(r1_y, vfma(r1_y, r0_y, c4_y)); store_all(r1_z, vfma(r1_z, r0_z, c4_x)); @@ -347,9 +347,9 @@ CPPSPMD_FORCE_INLINE vfloat spmd_kernel::recip_est1(const vfloat& q) vfloat l = spmd_ternaryf(q >= fMinThresh, q, cast_vint_to_vfloat(vint(mag))); vint x_l = vint(mag) - cast_vfloat_to_vint(l); - + vfloat rcp_l = cast_vint_to_vfloat(x_l); - + return rcp_l * vfnma(rcp_l, q, 2.0f); } @@ -395,12 +395,12 @@ CPPSPMD_FORCE_INLINE vfloat spmd_kernel::atan2_est(vfloat y, vfloat x) { vfloat t1 = abs(y); vfloat t3 = abs(x); - + vfloat t0 = max(t3, t1); store_all(t1, min(t3, t1)); store_all(t3, t1 / t0); - + vfloat t4 = t3 * t3; store_all(t0, vfma(-0.013480470f, t4, 0.057477314f)); store_all(t0, vfms(t0, t4, 0.121239071f)); @@ -452,7 +452,7 @@ CPPSPMD_FORCE_INLINE vfloat spmd_kernel::atan2_est(vfloat y, vfloat x) max abs err: 0.8989131818294709 max rel err: 0.0573181403173166 avg rel err: 0.0000030791301203 - + Originally from: http://www.ganssle.com/approx.htm */ @@ -495,7 +495,7 @@ inline vfloat spmd_kernel::tan_est(vfloat x) vfloat z = tan82(y); vfloat r; - + vbool octant_one_or_two = (octant == 1) || (octant == 2); // SPMD optimization - skip costly divide if we can @@ -503,7 +503,7 @@ inline vfloat spmd_kernel::tan_est(vfloat x) { const float fDivThresh = .4371e-7f; vfloat one_over_z = 1.0f / spmd_ternaryf(abs(z) > fDivThresh, z, spmd_ternaryf(z < 0.0f, -fDivThresh, fDivThresh)); - + vfloat b = spmd_ternaryf(octant_one_or_two, one_over_z, z); store_all(r, spmd_ternaryf((octant & 2) != 0, -b, b)); } @@ -511,7 +511,7 @@ inline vfloat spmd_kernel::tan_est(vfloat x) { store_all(r, spmd_ternaryf(octant == 0, z, -z)); } - + // Small angle approximation, to decrease the max rel error near Pi. SPMD_SIF(x >= (1.0f - .0003125f*4.0f)) { @@ -523,25 +523,25 @@ inline vfloat spmd_kernel::tan_est(vfloat x) } inline void spmd_kernel::seed_rand(rand_context& x, vint seed) -{ - store(x.a, 0xf1ea5eed); - store(x.b, seed ^ 0xd8487b1f); - store(x.c, seed ^ 0xdbadef9a); - store(x.d, seed); - for (int i = 0; i < 20; ++i) - (void)get_randu(x); +{ + store(x.a, 0xf1ea5eed); + store(x.b, seed ^ 0xd8487b1f); + store(x.c, seed ^ 0xdbadef9a); + store(x.d, seed); + for (int i = 0; i < 20; ++i) + (void)get_randu(x); } // https://burtleburtle.net/bob/rand/smallprng.html // Returns 32-bit unsigned random numbers. inline vint spmd_kernel::get_randu(rand_context& x) -{ - vint e = x.a - VINT_ROT(x.b, 27); - store(x.a, x.b ^ VINT_ROT(x.c, 17)); - store(x.b, x.c + x.d); - store(x.c, x.d + e); - store(x.d, e + x.a); - return x.d; +{ + vint e = x.a - VINT_ROT(x.b, 27); + store(x.a, x.b ^ VINT_ROT(x.c, 17)); + store(x.b, x.c + x.d); + store(x.c, x.d + e); + store(x.d, e + x.a); + return x.d; } // Returns random numbers between [low, high), or low if low >= high @@ -552,7 +552,7 @@ inline vint spmd_kernel::get_randi(rand_context& x, vint low, vint high) vint range = high - low; vint rnd_range = mulhiu(rnd, range); - + return spmd_ternaryi(low < high, low + rnd_range, low); } @@ -637,25 +637,25 @@ CPPSPMD_FORCE_INLINE vint spmd_kernel::count_trailing_zeros(vint x) { // cast the least significant bit in v to a float vfloat f = (vfloat)(x & -x); - + // extract exponent and adjust return VUINT_SHIFT_RIGHT(cast_vfloat_to_vint(f), 23) - 0x7F; } CPPSPMD_FORCE_INLINE vint spmd_kernel::count_set_bits(vint x) { - vint v = x - (VUINT_SHIFT_RIGHT(x, 1) & 0x55555555); - vint v1 = (v & 0x33333333) + (VUINT_SHIFT_RIGHT(v, 2) & 0x33333333); + vint v = x - (VUINT_SHIFT_RIGHT(x, 1) & 0x55555555); + vint v1 = (v & 0x33333333) + (VUINT_SHIFT_RIGHT(v, 2) & 0x33333333); return VUINT_SHIFT_RIGHT(((v1 + VUINT_SHIFT_RIGHT(v1, 4) & 0xF0F0F0F) * 0x1010101), 24); } -CPPSPMD_FORCE_INLINE vint cmple_epu16(const vint &a, const vint &b) -{ - return cmpeq_epi16(subs_epu16(a, b), vint(0)); +CPPSPMD_FORCE_INLINE vint cmple_epu16(const vint &a, const vint &b) +{ + return cmpeq_epi16(subs_epu16(a, b), vint(0)); } -CPPSPMD_FORCE_INLINE vint cmpge_epu16(const vint &a, const vint &b) -{ +CPPSPMD_FORCE_INLINE vint cmpge_epu16(const vint &a, const vint &b) +{ return cmple_epu16(b, a); } @@ -679,29 +679,29 @@ CPPSPMD_FORCE_INLINE vint cmple_epi16(const vint &a, const vint &b) return cmpge_epi16(b, a); } -void spmd_kernel::print_vint(vint v) -{ - for (uint32_t i = 0; i < PROGRAM_COUNT; i++) - printf("%i ", extract(v, i)); - printf("\n"); +void spmd_kernel::print_vint(vint v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("%i ", extract(v, i)); + printf("\n"); } -void spmd_kernel::print_vbool(vbool v) -{ - for (uint32_t i = 0; i < PROGRAM_COUNT; i++) - printf("%i ", extract(v, i) ? 1 : 0); - printf("\n"); +void spmd_kernel::print_vbool(vbool v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("%i ", extract(v, i) ? 1 : 0); + printf("\n"); } - -void spmd_kernel::print_vint_hex(vint v) -{ - for (uint32_t i = 0; i < PROGRAM_COUNT; i++) - printf("0x%X ", extract(v, i)); - printf("\n"); + +void spmd_kernel::print_vint_hex(vint v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("0x%X ", extract(v, i)); + printf("\n"); } -void spmd_kernel::print_active_lanes(const char *pPrefix) -{ +void spmd_kernel::print_active_lanes(const char *pPrefix) +{ CPPSPMD_DECL(int, flags[PROGRAM_COUNT]); memset(flags, 0, sizeof(flags)); storeu_linear(flags, vint(1)); @@ -709,17 +709,17 @@ void spmd_kernel::print_active_lanes(const char *pPrefix) if (pPrefix) printf("%s", pPrefix); - for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) { if (flags[i]) printf("%u ", i); } printf("\n"); } - -void spmd_kernel::print_vfloat(vfloat v) -{ - for (uint32_t i = 0; i < PROGRAM_COUNT; i++) - printf("%f ", extract(v, i)); - printf("\n"); + +void spmd_kernel::print_vfloat(vfloat v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("%f ", extract(v, i)); + printf("\n"); } diff --git a/encoder/cppspmd_math_declares.h b/encoder/cppspmd_math_declares.h index cdb6447b..cb186f33 100644 --- a/encoder/cppspmd_math_declares.h +++ b/encoder/cppspmd_math_declares.h @@ -54,7 +54,7 @@ CPPSPMD_FORCE_INLINE vfloat atan2_est(vfloat y, vfloat x); CPPSPMD_FORCE_INLINE vfloat atan_est(vfloat x) { return atan2_est(x, vfloat(1.0f)); } -// Don't call this for angles close to 90/270! +// Don't call this for angles close to 90/270! inline vfloat tan_est(vfloat x); // https://burtleburtle.net/bob/rand/smallprng.html diff --git a/encoder/cppspmd_sse.h b/encoder/cppspmd_sse.h index 4c61bab7..75f2dc3c 100644 --- a/encoder/cppspmd_sse.h +++ b/encoder/cppspmd_sse.h @@ -131,8 +131,8 @@ CPPSPMD_DECL(const uint32_t, g_x_128[4]) = { UINT32_MAX, 0, 0, 0 }; CPPSPMD_DECL(const float, g_onef_128[4]) = { 1.0f, 1.0f, 1.0f, 1.0f }; CPPSPMD_DECL(const uint32_t, g_oneu_128[4]) = { 1, 1, 1, 1 }; -CPPSPMD_DECL(const uint32_t, g_lane_masks_128[4][4]) = -{ +CPPSPMD_DECL(const uint32_t, g_lane_masks_128[4][4]) = +{ { UINT32_MAX, 0, 0, 0 }, { 0, UINT32_MAX, 0, 0 }, { 0, 0, UINT32_MAX, 0 }, @@ -237,7 +237,7 @@ inline __m128i shuffle_epi8(const __m128i& a, const __m128i& b) // Just emulate _mm_shuffle_epi8. This is very slow, but what else can we do? CPPSPMD_ALIGN(16) uint8_t av[16]; _mm_store_si128((__m128i*)av, a); - + CPPSPMD_ALIGN(16) uint8_t bvi[16]; _mm_store_ps((float*)bvi, _mm_and_ps(_mm_castsi128_ps(b), _mm_castsi128_ps(_mm_set1_epi32(0x0F0F0F0F)))); @@ -247,7 +247,7 @@ inline __m128i shuffle_epi8(const __m128i& a, const __m128i& b) result[1] = av[bvi[1]]; result[2] = av[bvi[2]]; result[3] = av[bvi[3]]; - + result[4] = av[bvi[4]]; result[5] = av[bvi[5]]; result[6] = av[bvi[6]]; @@ -266,9 +266,9 @@ inline __m128i shuffle_epi8(const __m128i& a, const __m128i& b) return _mm_andnot_si128(_mm_cmplt_epi8(b, _mm_setzero_si128()), _mm_load_si128((__m128i*)result)); } #else -CPPSPMD_FORCE_INLINE __m128i shuffle_epi8(const __m128i& a, const __m128i& b) -{ - return _mm_shuffle_epi8(a, b); +CPPSPMD_FORCE_INLINE __m128i shuffle_epi8(const __m128i& a, const __m128i& b) +{ + return _mm_shuffle_epi8(a, b); } #endif @@ -387,7 +387,7 @@ struct spmd_kernel typedef int int_t; typedef vint vint_t; typedef lint lint_t; - + // Exec mask struct exec_mask { @@ -399,7 +399,7 @@ struct spmd_kernel CPPSPMD_FORCE_INLINE explicit exec_mask(const __m128i& mask) : m_mask(mask) { } CPPSPMD_FORCE_INLINE void enable_lane(uint32_t lane) { m_mask = _mm_load_si128((const __m128i *)&g_lane_masks_128[lane][0]); } - + static CPPSPMD_FORCE_INLINE exec_mask all_on() { return exec_mask{ _mm_load_si128((const __m128i*)g_allones_128) }; } static CPPSPMD_FORCE_INLINE exec_mask all_off() { return exec_mask{ _mm_setzero_si128() }; } @@ -422,20 +422,20 @@ struct spmd_kernel friend CPPSPMD_FORCE_INLINE exec_mask operator^ (const exec_mask& a, const exec_mask& b); friend CPPSPMD_FORCE_INLINE exec_mask operator& (const exec_mask& a, const exec_mask& b); friend CPPSPMD_FORCE_INLINE exec_mask operator| (const exec_mask& a, const exec_mask& b); - + exec_mask m_exec; exec_mask m_kernel_exec; exec_mask m_continue_mask; #ifdef _DEBUG bool m_in_loop; #endif - + CPPSPMD_FORCE_INLINE uint32_t get_movemask() const { return m_exec.get_movemask(); } - + void init(const exec_mask& kernel_exec); - + // Varying bool - + struct vbool { __m128i m_value; @@ -448,25 +448,25 @@ struct spmd_kernel CPPSPMD_FORCE_INLINE explicit operator vfloat() const; CPPSPMD_FORCE_INLINE explicit operator vint() const; - + private: vbool& operator=(const vbool&); }; friend vbool operator!(const vbool& v); - + CPPSPMD_FORCE_INLINE vbool& store(vbool& dst, const vbool& src) { dst.m_value = blendv_mask_epi32(dst.m_value, src.m_value, m_exec.m_mask); return dst; } - + CPPSPMD_FORCE_INLINE vbool& store_all(vbool& dst, const vbool& src) { dst.m_value = src.m_value; return dst; } - + // Varying float struct vfloat { @@ -495,7 +495,7 @@ struct spmd_kernel dst.m_value = blendv_mask_ps(dst.m_value, src.m_value, _mm_castsi128_ps(m_exec.m_mask)); return dst; } - + CPPSPMD_FORCE_INLINE vfloat& store_all(vfloat& dst, const vfloat& src) { dst.m_value = src.m_value; @@ -536,7 +536,7 @@ struct spmd_kernel _mm_storeu_ps(dst.m_pValue, blendv_mask_ps(_mm_loadu_ps(dst.m_pValue), src.m_value, _mm_castsi128_ps(m_exec.m_mask))); return dst; } - + CPPSPMD_FORCE_INLINE const float_lref& store_all(const float_lref& dst, const vfloat& src) { _mm_storeu_ps(dst.m_pValue, src.m_value); @@ -553,13 +553,13 @@ struct spmd_kernel { return vfloat{ _mm_and_ps(_mm_loadu_ps(src.m_pValue), _mm_castsi128_ps(m_exec.m_mask)) }; } - + // Varying ref to floats struct float_vref { __m128i m_vindex; float* m_pValue; - + private: float_vref& operator=(const float_vref&); }; @@ -569,7 +569,7 @@ struct spmd_kernel { __m128i m_vindex; vfloat* m_pValue; - + private: vfloat_vref& operator=(const vfloat_vref&); }; @@ -579,14 +579,14 @@ struct spmd_kernel { __m128i m_vindex; vint* m_pValue; - + private: vint_vref& operator=(const vint_vref&); }; CPPSPMD_FORCE_INLINE const float_vref& store(const float_vref& dst, const vfloat& src); CPPSPMD_FORCE_INLINE const float_vref& store(const float_vref&& dst, const vfloat& src); - + CPPSPMD_FORCE_INLINE const float_vref& store_all(const float_vref& dst, const vfloat& src); CPPSPMD_FORCE_INLINE const float_vref& store_all(const float_vref&& dst, const vfloat& src); @@ -626,7 +626,7 @@ struct spmd_kernel private: int_lref& operator=(const int_lref&); }; - + CPPSPMD_FORCE_INLINE const int_lref& store(const int_lref& dst, const vint& src) { int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); @@ -689,7 +689,7 @@ struct spmd_kernel dst.m_pValue[i] = static_cast(stored[i]); return dst; } - + CPPSPMD_FORCE_INLINE vint load(const int16_lref& src) { CPPSPMD_ALIGN(16) int values[4]; @@ -713,7 +713,7 @@ struct spmd_kernel return vint{ t }; } - + // Linear ref to constant ints struct cint_lref { @@ -734,7 +734,7 @@ struct spmd_kernel { return vint{ _mm_loadu_si128((const __m128i *)src.m_pValue) }; } - + // Varying ref to ints struct int_vref { @@ -774,7 +774,7 @@ struct spmd_kernel CPPSPMD_FORCE_INLINE explicit vint(const vfloat& other) : m_value(_mm_cvttps_epi32(other.m_value)) { } - CPPSPMD_FORCE_INLINE explicit operator vbool() const + CPPSPMD_FORCE_INLINE explicit operator vbool() const { return vbool{ _mm_xor_si128( _mm_load_si128((const __m128i*)g_allones_128), _mm_cmpeq_epi32(m_value, _mm_setzero_si128())) }; } @@ -837,7 +837,7 @@ struct spmd_kernel { _mm_store_si128((__m128i*)pDst, src.m_value); } - + CPPSPMD_FORCE_INLINE vint loadu_linear(const int *pSrc) { __m128i v = _mm_loadu_si128((const __m128i*)pSrc); @@ -882,7 +882,7 @@ struct spmd_kernel { _mm_store_ps((float*)pDst, src.m_value); } - + CPPSPMD_FORCE_INLINE vfloat loadu_linear(const float *pSrc) { __m128 v = _mm_loadu_ps((const float*)pSrc); @@ -901,7 +901,7 @@ struct spmd_kernel { return vfloat{ _mm_load_ps((float*)pSrc) }; } - + CPPSPMD_FORCE_INLINE vint& store(vint& dst, const vint& src) { dst.m_value = blendv_mask_epi32(dst.m_value, src.m_value, m_exec.m_mask); @@ -924,13 +924,13 @@ struct spmd_kernel } return dst; } - + CPPSPMD_FORCE_INLINE vint& store_all(vint& dst, const vint& src) { dst.m_value = src.m_value; return dst; } - + CPPSPMD_FORCE_INLINE const int_vref& store_all(const int_vref& dst, const vint& src) { CPPSPMD_ALIGN(16) int vindex[4]; @@ -961,7 +961,7 @@ struct spmd_kernel return vint{ _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(m_exec.m_mask), _mm_load_ps((const float*)values))) }; } - + CPPSPMD_FORCE_INLINE vint load_all(const int_vref& src) { CPPSPMD_ALIGN(16) int values[4]; @@ -974,7 +974,7 @@ struct spmd_kernel return vint{ _mm_castps_si128( _mm_load_ps((const float*)values)) }; } - + CPPSPMD_FORCE_INLINE vint load(const cint_vref& src) { CPPSPMD_ALIGN(16) int values[4]; @@ -991,7 +991,7 @@ struct spmd_kernel return vint{ _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(m_exec.m_mask), _mm_load_ps((const float*)values))) }; } - + CPPSPMD_FORCE_INLINE vint load_all(const cint_vref& src) { CPPSPMD_ALIGN(16) int values[4]; @@ -1034,7 +1034,7 @@ struct spmd_kernel CPPSPMD_FORCE_INLINE void store_strided(int *pDst, uint32_t stride, const vint &v) { int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); - + if (mask & 1) pDst[0] = extract_x(v.m_value); if (mask & 2) pDst[stride] = extract_y(v.m_value); if (mask & 4) pDst[stride*2] = extract_z(v.m_value); @@ -1070,7 +1070,7 @@ struct spmd_kernel CPPSPMD_FORCE_INLINE vint load_strided(const int *pSrc, uint32_t stride) { int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); - + #if CPPSPMD_SSE2 CPPSPMD_ALIGN(16) int vals[4] = { 0, 0, 0, 0 }; if (mask & 1) vals[0] = pSrc[0]; @@ -1119,7 +1119,7 @@ struct spmd_kernel vals[2] = pSrc[stride * 2]; vals[3] = pSrc[stride * 3]; return vint{ _mm_load_si128((__m128i*)vals) }; -#else +#else const float* pSrcF = (const float*)pSrc; __m128 v = _mm_load_ss(pSrcF); v = _mm_insert_ps(v, _mm_load_ss(pSrcF + stride), 0x10); @@ -1151,7 +1151,7 @@ struct spmd_kernel { // TODO: There's surely a better way int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); - + if (mask & 1) ((int *)(&dst.m_pValue[extract_x(dst.m_vindex)]))[0] = extract_x(_mm_castps_si128(src.m_value)); if (mask & 2) ((int *)(&dst.m_pValue[extract_y(dst.m_vindex)]))[1] = extract_y(_mm_castps_si128(src.m_value)); if (mask & 4) ((int *)(&dst.m_pValue[extract_z(dst.m_vindex)]))[2] = extract_z(_mm_castps_si128(src.m_value)); @@ -1179,7 +1179,7 @@ struct spmd_kernel { // TODO: There's surely a better way int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); - + if (mask & 1) ((int *)(&dst.m_pValue[extract_x(dst.m_vindex)]))[0] = extract_x(src.m_value); if (mask & 2) ((int *)(&dst.m_pValue[extract_y(dst.m_vindex)]))[1] = extract_y(src.m_value); if (mask & 4) ((int *)(&dst.m_pValue[extract_z(dst.m_vindex)]))[2] = extract_z(src.m_value); @@ -1215,7 +1215,7 @@ struct spmd_kernel return vint{ k }; } - + // Linear integer struct lint { @@ -1235,7 +1235,7 @@ struct spmd_kernel return vint{ m_value }; } - CPPSPMD_FORCE_INLINE int get_first_value() const + CPPSPMD_FORCE_INLINE int get_first_value() const { return _mm_cvtsi128_si32(m_value); } @@ -1269,9 +1269,9 @@ struct spmd_kernel dst.m_value = src.m_value; return dst; } - + const lint program_index = lint{ _mm_set_epi32( 3, 2, 1, 0 ) }; - + // SPMD condition helpers template @@ -1298,7 +1298,7 @@ struct spmd_kernel template CPPSPMD_FORCE_INLINE void spmd_foreach(int begin, int end, const ForeachBody& foreachBody); - + #ifdef _DEBUG CPPSPMD_FORCE_INLINE void check_masks(); #else @@ -1307,9 +1307,9 @@ struct spmd_kernel CPPSPMD_FORCE_INLINE void spmd_break(); CPPSPMD_FORCE_INLINE void spmd_continue(); - + CPPSPMD_FORCE_INLINE void spmd_return(); - + template CPPSPMD_FORCE_INLINE void spmd_unmasked(const UnmaskedBody& unmaskedBody); @@ -1321,8 +1321,8 @@ struct spmd_kernel CPPSPMD_FORCE_INLINE void swap(vfloat &a, vfloat &b) { vfloat temp = a; store(a, b); store(b, temp); } CPPSPMD_FORCE_INLINE void swap(vbool &a, vbool &b) { vbool temp = a; store(a, b); store(b, temp); } - CPPSPMD_FORCE_INLINE float reduce_add(vfloat v) - { + CPPSPMD_FORCE_INLINE float reduce_add(vfloat v) + { __m128 k3210 = _mm_castsi128_ps(blendv_mask_epi32(_mm_setzero_si128(), _mm_castps_si128(v.m_value), m_exec.m_mask)); __m128 temp = _mm_add_ps(_mm_shuffle_ps(k3210, k3210, _MM_SHUFFLE(0, 1, 2, 3)), k3210); return _mm_cvtss_f32(_mm_add_ss(_mm_movehl_ps(temp, temp), temp)); @@ -1353,14 +1353,14 @@ using float_vref = spmd_kernel::float_vref; using vfloat_vref = spmd_kernel::vfloat_vref; using vint_vref = spmd_kernel::vint_vref; -CPPSPMD_FORCE_INLINE spmd_kernel::vbool::operator vfloat() const -{ - return vfloat { _mm_and_ps( _mm_castsi128_ps(m_value), *(const __m128 *)g_onef_128 ) }; +CPPSPMD_FORCE_INLINE spmd_kernel::vbool::operator vfloat() const +{ + return vfloat { _mm_and_ps( _mm_castsi128_ps(m_value), *(const __m128 *)g_onef_128 ) }; } - + // Returns UINT32_MAX's for true, 0 for false. (Should it return 1's?) -CPPSPMD_FORCE_INLINE spmd_kernel::vbool::operator vint() const -{ +CPPSPMD_FORCE_INLINE spmd_kernel::vbool::operator vint() const +{ return vint { m_value }; } @@ -1441,9 +1441,9 @@ CPPSPMD_FORCE_INLINE vfloat round_truncate(const vfloat& a) { __m128i abs_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x7FFFFFFFU) ); __m128i has_fractional = _mm_cmplt_epi32(abs_a, _mm_castps_si128(_mm_set1_ps(8388608.0f))); - + __m128i ai = _mm_cvttps_epi32(a.m_value); - + __m128 af = _mm_cvtepi32_ps(ai); return vfloat{ blendv_mask_ps(a.m_value, af, _mm_castsi128_ps(has_fractional)) }; } @@ -1466,11 +1466,11 @@ CPPSPMD_FORCE_INLINE vfloat ceil(const vfloat& a) { __m128i abs_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x7FFFFFFFU)); __m128i has_fractional = _mm_cmplt_epi32(abs_a, _mm_castps_si128(_mm_set1_ps(8388608.0f))); - + __m128i ai = _mm_cvtps_epi32(a.m_value); __m128 af = _mm_cvtepi32_ps(ai); __m128 changed = _mm_cvtepi32_ps(_mm_castps_si128(_mm_cmplt_ps(af, a.m_value))); - + af = _mm_sub_ps(af, changed); return vfloat{ blendv_mask_ps(a.m_value, af, _mm_castsi128_ps(has_fractional)) }; @@ -1503,12 +1503,12 @@ CPPSPMD_FORCE_INLINE vfloat round_nearest(const vfloat& a) __m128i sign_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x80000000U)); __m128 force_int = _mm_castsi128_ps(_mm_or_si128(no_fract_fp_bits, sign_a)); - + // Can't use individual _mm_add_ps/_mm_sub_ps - this will be optimized out with /fp:fast by clang and probably other compilers. //__m128 temp1 = _mm_add_ps(a.m_value, force_int); //__m128 temp2 = _mm_sub_ps(temp1, force_int); __m128 temp2 = add_sub(a.m_value, force_int); - + __m128i abs_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x7FFFFFFFU)); __m128i has_fractional = _mm_cmplt_epi32(abs_a, no_fract_fp_bits); return vfloat{ blendv_mask_ps(a.m_value, temp2, _mm_castsi128_ps(has_fractional)) }; @@ -1824,7 +1824,7 @@ CPPSPMD_FORCE_INLINE vint vuint_shift_right(const vint& a, const vint& b) #else //vint inv_shift = 32 - b; //vfloat f = cast_vint_to_vfloat(vint(_mm_slli_epi32(inv_shift.m_value, 23)) + cast_vfloat_to_vint(vfloat(1.0f))); - + // Take float rep of 1.0f (0x3f800000), subtract (32<<23), subtract (shift<<23), cast to float. vfloat f = cast_vint_to_vfloat(vint(_mm_sub_epi32(_mm_set1_epi32(0x4f800000), _mm_slli_epi32(b.m_value, 23)))); @@ -1843,7 +1843,7 @@ CPPSPMD_FORCE_INLINE vint vuint_shift_right_not_zero(const vint& a, const vint& { //vint inv_shift = 32 - b; //vfloat f = cast_vint_to_vfloat(vint(_mm_slli_epi32(inv_shift.m_value, 23)) + cast_vfloat_to_vint(vfloat(1.0f))); - + // Take float rep of 1.0f (0x3f800000), subtract (32<<23), subtract (shift<<23), cast to float. vfloat f = cast_vint_to_vfloat(vint(_mm_sub_epi32(_mm_set1_epi32(0x4f800000), _mm_slli_epi32(b.m_value, 23)))); @@ -1887,7 +1887,7 @@ CPPSPMD_FORCE_INLINE vint operator>> (const vint& a, const vint& b) // Shift left/right by a uniform immediate constant #define VINT_SHIFT_LEFT(a, b) vint(_mm_slli_epi32( (a).m_value, (b) ) ) -#define VINT_SHIFT_RIGHT(a, b) vint( _mm_srai_epi32( (a).m_value, (b) ) ) +#define VINT_SHIFT_RIGHT(a, b) vint( _mm_srai_epi32( (a).m_value, (b) ) ) #define VUINT_SHIFT_RIGHT(a, b) vint( _mm_srli_epi32( (a).m_value, (b) ) ) #define VINT_ROT(x, k) (VINT_SHIFT_LEFT((x), (k)) | VUINT_SHIFT_RIGHT((x), 32 - (k))) diff --git a/encoder/jpgd.cpp b/encoder/jpgd.cpp index fec8b714..dd267fb1 100644 --- a/encoder/jpgd.cpp +++ b/encoder/jpgd.cpp @@ -3,7 +3,7 @@ // Supports box and linear chroma upsampling. // // Released under two licenses. You are free to choose which license you want: -// License 1: +// License 1: // Public Domain // // License 2: @@ -138,7 +138,7 @@ namespace jpgd { { static void idct(int* pTemp, const jpgd_block_t* pSrc) { - (void)pTemp; + (void)pTemp; (void)pSrc; } }; @@ -253,10 +253,10 @@ namespace jpgd { 8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8, }; - static const uint8 s_idct_col_table[] = - { - 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 + static const uint8 s_idct_col_table[] = + { + 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 }; // Scalar "fast pathing" IDCT. diff --git a/encoder/jpgd.h b/encoder/jpgd.h index 86a7814c..92e53335 100644 --- a/encoder/jpgd.h +++ b/encoder/jpgd.h @@ -10,7 +10,7 @@ #include #ifdef _MSC_VER -#define JPGD_NORETURN __declspec(noreturn) +#define JPGD_NORETURN __declspec(noreturn) #elif defined(__GNUC__) #define JPGD_NORETURN __attribute__ ((noreturn)) #else @@ -140,7 +140,7 @@ namespace jpgd int begin_decoding(); // Returns the next scan line. - // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1). + // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1). // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4). // Returns JPGD_SUCCESS if a scan line has been returned. // Returns JPGD_DONE if all scan lines have been returned. diff --git a/encoder/pvpngreader.cpp b/encoder/pvpngreader.cpp index 46639f27..442735bf 100644 --- a/encoder/pvpngreader.cpp +++ b/encoder/pvpngreader.cpp @@ -1,12 +1,12 @@ // pngreader.cpp - Public Domain - see unlicense at bottom of file. // -// Notes: -// This is ancient code from ~1995 ported to C++. It was originally written for a -// DOS app with very limited memory. It's not as fast as it should be, but it works. -// The low-level PNG reader class was written assuming the PNG file could not fit +// Notes: +// This is ancient code from ~1995 ported to C++. It was originally written for a +// DOS app with very limited memory. It's not as fast as it should be, but it works. +// The low-level PNG reader class was written assuming the PNG file could not fit // entirely into memory, which dictated how it was written/structured. // It has been modified to use either zlib or miniz. -// It supports all PNG color types/bit depths/interlacing, however 16-bit/component +// It supports all PNG color types/bit depths/interlacing, however 16-bit/component // images are converted to 8-bit. // TRNS chunks are converted to alpha as needed. // GAMA chunk is read, but not applied. @@ -109,20 +109,20 @@ class png_memory_file : public png_file public: std::vector m_buf; uint64_t m_ofs; - - png_memory_file() : + + png_memory_file() : png_file(), m_ofs(0) - { + { } - + virtual ~png_memory_file() - { + { } std::vector& get_buf() { return m_buf; } const std::vector& get_buf() const { return m_buf; } - + void init() { m_ofs = 0; @@ -269,8 +269,8 @@ class png_cfile : public png_file { public: FILE* m_pFile; - - png_cfile() : + + png_cfile() : png_file(), m_pFile(nullptr) { @@ -284,9 +284,9 @@ class png_cfile : public png_file bool init(const char *pFilename, const char *pMode) { close(); - + m_pFile = nullptr; - + #ifdef _MSC_VER fopen_s(&m_pFile, pFilename, pMode); #else @@ -333,17 +333,17 @@ class png_cfile : public png_file int64_t cur_ofs = ftell64(m_pFile); if (cur_ofs < 0) return 0; - + if (fseek64(m_pFile, 0, SEEK_END) != 0) return 0; - + const int64_t cur_size = ftell64(m_pFile); if (cur_size < 0) return 0; if (fseek64(m_pFile, cur_ofs, SEEK_SET) != 0) return 0; - + return cur_size; } @@ -379,13 +379,13 @@ class png_decoder png_decoder(); ~png_decoder(); - // Scans the PNG file, but doesn't decode the IDAT data. + // Scans the PNG file, but doesn't decode the IDAT data. // Returns 0 on success, or an error code. // If the returned status is non-zero, or m_img_supported_flag==FALSE the image either the image is corrupted/not PNG or is unsupported in some way. int png_scan(png_file *pFile); // Decodes a single scanline of PNG image data. - // Returns a pointer to the scanline's pixel data and its size in bytes. + // Returns a pointer to the scanline's pixel data and its size in bytes. // This data is only minimally processed from the internal PNG pixel data. // The caller must use the ihdr, trns_flag and values, and the palette to actually decode the pixel data. // @@ -397,21 +397,21 @@ class png_decoder // // Returns 0 on success, a non-zero error code, or PNG_ALLDONE. int png_decode(void** ppImg_ptr, uint32_t* pImg_len); - + // Starts decoding. Returns 0 on success, otherwise an error code. int png_decode_start(); - + // Deinitializes the decoder, freeing all allocations. void png_decode_end(); png_file* m_pFile; - + // Image's 24bpp palette - 3 bytes per entry uint8_t m_plte_flag; uint8_t m_img_pal[768]; - + int m_img_supported_flag; - + ihdr_struct m_ihdr; uint8_t m_chunk_flag; @@ -442,7 +442,7 @@ class png_decoder uint8_t m_gama_flag; uint32_t m_gama_value; - + uint8_t m_trns_flag; uint32_t m_trns_value[256]; @@ -455,7 +455,7 @@ class png_decoder uint32_t m_inflate_dst_buf_ofs; int m_inflate_eof_flag; - + uint8_t m_gamma_table[256]; int m_pass_x_size; @@ -467,16 +467,16 @@ class png_decoder int m_adam7_pass_size_y[7]; std::vector m_adam7_image_buf; - + int m_adam7_decoded_flag; - + bool m_scanned_flag; - + int m_terminate_status; - + #define TEMP_BUF_SIZE (384) uint8_t m_temp_buf[TEMP_BUF_SIZE * 4]; - + void clear(); void uninitialize(); int terminate(int status); @@ -516,7 +516,7 @@ class png_decoder void png_decoder::uninitialize() { m_pFile = nullptr; - + for (int i = 0; i < PNG_MAX_ALLOC_BLOCKS; i++) { free(m_pMalloc_blocks[i]); @@ -600,7 +600,7 @@ int png_decoder::fetch_next_chunk_data(uint8_t* buf, int bytes) int status = block_read(buf, bytes); if (status != 0) return status; - + #if PVPNG_IDAT_CRC_CHECKING bool check_crc32 = true; #else @@ -680,7 +680,7 @@ int png_decoder::fetch_next_chunk_init() if (status != 0) return status; } - + int64_t n = block_read_dword(); if (n < 0) return (int)n; @@ -776,7 +776,7 @@ static void PixelDePack2(void* src, void* dst, int numbytes) while (numbytes) { uint8_t v = *src8++; - + for (uint32_t i = 0; i < 8; i++) dst8[7 - i] = (v >> i) & 1; @@ -933,7 +933,7 @@ static int unpack_true_8(uint8_t* src, uint8_t* dst, int pixels, png_decoder* pw uint8_t r = src[i * 3 + 0]; uint8_t g = src[i * 3 + 1]; uint8_t b = src[i * 3 + 2]; - + dst[i * 4 + 0] = r; dst[i * 4 + 1] = g; dst[i * 4 + 2] = b; @@ -982,7 +982,7 @@ static int unpack_true_16(uint8_t* src, uint8_t* dst, int pixels, png_decoder* p dst[1] = src[2]; dst[2] = src[4]; dst[3] = 255; - + dst += 4; src += 6; } @@ -1180,15 +1180,15 @@ int png_decoder::decompress_line(uint32_t* bytes_decoded) m_inflator.next_in = inflate_src_buf + m_inflate_src_buf_ofs; m_inflator.avail_in = src_bytes_left; - + m_inflator.next_out = m_pCur_line_buf + m_inflate_dst_buf_ofs; m_inflator.avail_out = dst_bytes_left; - + status = buminiz::mz_inflate2(&m_inflator, buminiz::MZ_NO_FLUSH, PVPNG_ADLER32_CHECKING); const uint32_t src_bytes_consumed = src_bytes_left - m_inflator.avail_in; const uint32_t dst_bytes_written = dst_bytes_left - m_inflator.avail_out; - + m_inflate_src_buf_ofs += src_bytes_consumed; m_inflate_dst_buf_ofs += dst_bytes_written; @@ -1255,10 +1255,10 @@ int png_decoder::png_decode(void** ppImg_ptr, uint32_t* pImg_len) { if (m_pass_y_left == 0) return PNG_ALLDONE; - + *ppImg_ptr = &m_adam7_image_buf[(m_ihdr.m_height - m_pass_y_left) * m_dst_bytes_per_line]; *pImg_len = m_dst_bytes_per_line; - + m_pass_y_left--; return 0; @@ -1282,7 +1282,7 @@ int png_decoder::png_decode(void** ppImg_ptr, uint32_t* pImg_len) status = find_iend_chunk(); if (status < 0) return status; - + return PNG_ALLDONE; } @@ -1398,7 +1398,7 @@ int png_decoder::png_decode(void** ppImg_ptr, uint32_t* pImg_len) if ((*m_pProcess_func)(m_pCur_line_buf + 1, m_pPro_line_buf, m_pass_x_size, this)) decoded_line = m_pPro_line_buf; } - + if (m_ihdr.m_ilace_type == 0) { *ppImg_ptr = decoded_line; @@ -1499,17 +1499,17 @@ void png_decoder::png_decode_end() int png_decoder::png_decode_start() { int status; - + if (m_img_supported_flag != TRUE) return terminate(m_img_supported_flag); - + switch (m_ihdr.m_color_type) { case PNG_COLOR_TYPE_GREYSCALE: { if (m_ihdr.m_bit_depth == 16) { - // This is a special case. We can't pass back 8-bit samples and let the caller decide on transparency because the PNG is 16-bits. + // This is a special case. We can't pass back 8-bit samples and let the caller decide on transparency because the PNG is 16-bits. // So we expand to 8-bit Gray-Alpha and handle transparency during decoding. // We don't do this with all grayscale cases because that would require more code to deal with 1/2/4bpp expansion. m_dec_bytes_per_pixel = (m_ihdr.m_bit_depth + 7) / 8; @@ -1534,7 +1534,7 @@ int png_decoder::png_decode_start() m_pProcess_func = unpack_grey_2; else if (m_ihdr.m_bit_depth == 4) m_pProcess_func = unpack_grey_4; - else + else m_pProcess_func = unpack_grey_8; } @@ -1644,9 +1644,9 @@ int png_decoder::png_decode_start() m_adam7_pass_size_y[4] = adam7_pass_size(m_ihdr.m_height, 2, 4); m_adam7_pass_size_y[5] = adam7_pass_size(m_ihdr.m_height, 0, 2); m_adam7_pass_size_y[6] = adam7_pass_size(m_ihdr.m_height, 1, 2); - + m_adam7_image_buf.resize(m_dst_bytes_per_line * m_ihdr.m_height); - + m_adam7_pass_num = -1; m_pass_y_left = 0; @@ -1686,7 +1686,7 @@ int png_decoder::png_decode_start() m_pass_x_size = m_ihdr.m_width; m_pass_y_left = m_ihdr.m_height; } - + return 0; } @@ -1779,13 +1779,13 @@ int png_decoder::read_ihdr_chunk() if ((m_ihdr.m_height == 0) || (m_ihdr.m_height > MAX_SUPPORTED_RES)) return terminate(PNG_BAD_HEIGHT); - int v = fetch_next_chunk_byte(); - if (v < 0) + int v = fetch_next_chunk_byte(); + if (v < 0) return v; m_ihdr.m_bit_depth = (uint8_t)v; - v = fetch_next_chunk_byte(); - if (v < 0) + v = fetch_next_chunk_byte(); + if (v < 0) return v; m_ihdr.m_color_type = (uint8_t)v; @@ -1812,7 +1812,7 @@ int png_decoder::read_ihdr_chunk() if (m_ihdr.m_ilace_type > 1) m_img_supported_flag = PNG_UNS_ILACE; - + switch (m_ihdr.m_color_type) { case PNG_COLOR_TYPE_GREYSCALE: @@ -1903,7 +1903,7 @@ int png_decoder::read_bkgd_chunk() if (v < 0) return v; m_bkgd_value[1] = v; - + v = fetch_next_chunk_word(); if (v < 0) return v; @@ -1922,7 +1922,7 @@ int png_decoder::read_gama_chunk() return (int)v; m_gama_value = (uint32_t)v; - + return 0; } @@ -1962,12 +1962,12 @@ int png_decoder::read_trns_chunk() if (v < 0) return v; m_trns_value[0] = v; - + v = fetch_next_chunk_word(); if (v < 0) return v; m_trns_value[1] = v; - + v = fetch_next_chunk_word(); if (v < 0) return v; @@ -2013,7 +2013,7 @@ int png_decoder::read_plte_chunk() if (v < 0) return v; *p++ = (uint8_t)v; - + v = fetch_next_chunk_byte(); if (v < 0) return v; @@ -2024,7 +2024,7 @@ int png_decoder::read_plte_chunk() return v; *p++ = (uint8_t)v; } - + return 0; } @@ -2142,7 +2142,7 @@ void png_decoder::clear() m_inflate_dst_buf_ofs = 0; m_inflate_eof_flag = FALSE; - + clear_obj(m_trns_value); m_pass_x_size = 0; @@ -2152,18 +2152,18 @@ void png_decoder::clear() m_adam7_pass_y = 0; clear_obj(m_adam7_pass_size_x); clear_obj(m_adam7_pass_size_y); - + m_adam7_decoded_flag = FALSE; - + m_scanned_flag = false; - + m_terminate_status = 0; } int png_decoder::png_scan(png_file *pFile) { m_pFile = pFile; - + m_img_supported_flag = TRUE; m_terminate_status = 0; @@ -2174,11 +2174,11 @@ int png_decoder::png_scan(png_file *pFile) res = read_ihdr_chunk(); if (res != 0) return res; - + res = find_idat_chunk(); if (res != 0) return res; - + if (m_gama_flag) calc_gamma_table(); @@ -2198,14 +2198,14 @@ int png_decoder::png_scan(png_file *pFile) } static inline uint8_t get_709_luma(uint32_t r, uint32_t g, uint32_t b) -{ +{ return (uint8_t)((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U); } bool get_png_info(const void* pImage_buf, size_t buf_size, png_info &info) { memset(&info, 0, sizeof(info)); - + if ((!pImage_buf) || (buf_size < MIN_PNG_SIZE)) return false; @@ -2254,7 +2254,7 @@ void* load_png(const void* pImage_buf, size_t buf_size, uint32_t desired_chans, width = 0; height = 0; num_chans = 0; - + if ((!pImage_buf) || (buf_size < MIN_PNG_SIZE)) { assert(0); @@ -2271,7 +2271,7 @@ void* load_png(const void* pImage_buf, size_t buf_size, uint32_t desired_chans, mf.init(pImage_buf, buf_size); png_decoder dec; - + int status = dec.png_scan(&mf); if ((status != 0) || (dec.m_img_supported_flag != TRUE)) return nullptr; @@ -2318,11 +2318,11 @@ void* load_png(const void* pImage_buf, size_t buf_size, uint32_t desired_chans, uint64_t total_size = (uint64_t)pitch * height; if (total_size > 0x7FFFFFFFULL) return nullptr; - + uint8_t* pBuf = (uint8_t*)malloc((size_t)total_size); if (!pBuf) return nullptr; - + if (dec.png_decode_start() != 0) { free(pBuf); diff --git a/encoder/pvpngreader.h b/encoder/pvpngreader.h index 4f3fe46b..b1850f1b 100644 --- a/encoder/pvpngreader.h +++ b/encoder/pvpngreader.h @@ -19,7 +19,7 @@ namespace pv_png { uint32_t m_width; uint32_t m_height; - + uint32_t m_num_chans; // The number of channels, factoring in transparency. Ranges from [1-4]. uint32_t m_bit_depth; // PNG ihdr bit depth: 1, 2, 4, 8 or 16 diff --git a/format.sh b/format.sh index 5760aec5..f8e84852 100755 --- a/format.sh +++ b/format.sh @@ -18,9 +18,9 @@ while IFS= read -rd '' f; do continue fi # Ensures that files are UTF-8 formatted. - recode UTF-8 $f 2> /dev/null + recode UTF-8 "$f" 2> /dev/null # Ensures that files have LF line endings. - dos2unix $f 2> /dev/null + dos2unix "$f" 2> /dev/null # Ensures that files do not contain a BOM. sed -i '1s/^\xEF\xBB\xBF//' "$f" # Ensures that files end with newline characters. diff --git a/spec/basis_spec.txt b/spec/basis_spec.txt index ba9fa4bc..4ccc24c8 100644 --- a/spec/basis_spec.txt +++ b/spec/basis_spec.txt @@ -7,24 +7,24 @@ Version 1.01 1.0 Introduction ---------------- -The Basis Universal GPU texture codec supports reading and writing ".basis" files. +The Basis Universal GPU texture codec supports reading and writing ".basis" files. The .basis file format supports ETC1S or UASTC 4x4 texture data. * ETC1S is a simplified subset of ETC1. -The mode is always differential (diff bit=1), the Rd, Gd, and Bd color deltas -are always (0,0,0), and the flip bit is always set. ETC1S texture data is fully -compliant with all existing software and hardware ETC1 decoders. Existing encoders +The mode is always differential (diff bit=1), the Rd, Gd, and Bd color deltas +are always (0,0,0), and the flip bit is always set. ETC1S texture data is fully +compliant with all existing software and hardware ETC1 decoders. Existing encoders can be easily modified to limit their output to ETC1S. -* UASTC 4x4 is a 19 mode subset of the ASTC texture format. Its specification is +* UASTC 4x4 is a 19 mode subset of the ASTC texture format. Its specification is [here](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-Texture-Specification). UASTC texture data can always be losslessly transcoded to ASTC. 2.0 High-Level File Structure ----------------------------- A .basis file consists of multiple sections. Apart from the header, which must always -be at the start of the file, the other sections may appear in any order. +be at the start of the file, the other sections may appear in any order. Here's the high level organization of a typical .basis file: @@ -32,8 +32,8 @@ Here's the high level organization of a typical .basis file: * Optional ETC1S compressed endpoint/selector codebooks * Optional ETC1S Huffman table information * A required "slice" description array describing the resolutions and file offset/compressed sizes of each texture slice present in the file -* 1 or more slices containing ETC1S or UASTC compressed texture data. -* For future expansion, the format supports an "extended" header which may be located anywhere in the file. This section contains .PNG-like chunked data. +* 1 or more slices containing ETC1S or UASTC compressed texture data. +* For future expansion, the format supports an "extended" header which may be located anywhere in the file. This section contains .PNG-like chunked data. 3.0 File Enums -------------- @@ -41,11 +41,11 @@ Here's the high level organization of a typical .basis file: // basis_file_header::m_tex_type enum basis_texture_type { - cBASISTexType2D = 0, - cBASISTexType2DArray = 1, - cBASISTexTypeCubemapArray = 2, - cBASISTexTypeVideoFrames = 3, - cBASISTexTypeVolume = 4, + cBASISTexType2D = 0, + cBASISTexType2DArray = 1, + cBASISTexTypeCubemapArray = 2, + cBASISTexTypeVideoFrames = 3, + cBASISTexTypeVolume = 4, cBASISTexTypeTotal }; @@ -53,17 +53,17 @@ enum basis_texture_type enum basis_slice_desc_flags { cSliceDescFlagsHasAlpha = 1, - cSliceDescFlagsFrameIsIFrame = 2 + cSliceDescFlagsFrameIsIFrame = 2 }; -// basis_file_header::m_tex_format +// basis_file_header::m_tex_format enum basis_tex_format { cETC1S = 0, cUASTC4x4 = 1 }; -// basis_file_header::m_flags +// basis_file_header::m_flags enum basis_header_flags { cBASISHeaderFlagETC1S = 1, @@ -74,7 +74,7 @@ enum basis_header_flags 4.0 File Structures ------------------- -All individual members in all file structures are byte aligned and little endian. The structs +All individual members in all file structures are byte aligned and little endian. The structs have no padding (i.e. they are declared with #pragma pack(1)). 4.1 "basis_file_header" structure @@ -92,7 +92,7 @@ struct basis_file_header uint32 m_data_size; // The total size of all data after the header uint16 m_data_crc16; // The CRC16 of all data after the header - uint24 m_total_slices; // The number of compressed slices + uint24 m_total_slices; // The number of compressed slices uint24 m_total_images; // The total # of images byte m_tex_format; // enum basis_tex_format @@ -104,11 +104,11 @@ struct basis_file_header uint32 m_userdata0; // For client use uint32 m_userdata1; // For client use - uint16 m_total_endpoints; // ETC1S: The number of endpoints in the endpoint codebook + uint16 m_total_endpoints; // ETC1S: The number of endpoints in the endpoint codebook uint32 m_endpoint_cb_file_ofs; // ETC1S: The compressed endpoint codebook's file offset relative to the start of the file uint24 m_endpoint_cb_file_size; // ETC1S: The compressed endpoint codebook's size in bytes - uint16 m_total_selectors; // ETC1S: The number of selectors in the selector codebook + uint16 m_total_selectors; // ETC1S: The number of selectors in the selector codebook uint32 m_selector_cb_file_ofs; // ETC1S: The compressed selector codebook's file offset relative to the start of the file uint24 m_selector_cb_file_size; // ETC1S: The compressed selector codebook's size in bytes @@ -135,7 +135,7 @@ struct basis_file_header * m_us_per_frame: Microseconds per frame, only valid for cBASISTexTypeVideoFrames texture types. * m_total_endpoints, m_endpoint_cb_file_ofs, m_endpoint_cb_file_size: Information about the compressed ETC1S endpoint codebook: The total # of entries, the offset to the compressed data, and the compressed data's size. * m_total_selectors, m_selector_cb_file_ofs, m_selector_cb_file_size: Information about the compressed ETC1S selector codebook: The total # of entries, the offset to the compressed data, and the compressed data's size. -* m_tables_file_ofs, m_tables_file_size: The file offset and size of the compressed Huffman tables for ETC1S format files. +* m_tables_file_ofs, m_tables_file_size: The file offset and size of the compressed Huffman tables for ETC1S format files. * m_slice_desc_file_ofs: The file offset to the array of slice description structures. There will be m_total_slices structures at this file offset. * m_extended_file_ofs, m_extended_file_size: The "extended" header, for future expansion. Currently unused. @@ -144,20 +144,20 @@ struct basis_file_header struct basis_slice_desc { - uint24 m_image_index; - uint8 m_level_index; - uint8 m_flags; + uint24 m_image_index; + uint8 m_level_index; + uint8 m_flags; - uint16 m_orig_width; - uint16 m_orig_height; + uint16 m_orig_width; + uint16 m_orig_height; - uint16 m_num_blocks_x; - uint16 m_num_blocks_y; + uint16 m_num_blocks_x; + uint16 m_num_blocks_y; - uint32 m_file_ofs; - uint32 m_file_size; + uint32 m_file_ofs; + uint32 m_file_size; - uint16 m_slice_data_crc16; + uint16 m_slice_data_crc16; }; 4.2.1 Details: @@ -168,7 +168,7 @@ struct basis_slice_desc * m_orig_width: The original image width (may not be a multiple of 4 pixels) * m_orig_height: The original image height (may not be a multiple of 4 pixels) * m_num_blocks_x: The slice's block X dimensions. Each block is 4x4 pixels. The slice's pixel resolution may or may not be a power of 2. -* m_num_blocks_y: The slice's block Y dimensions. +* m_num_blocks_y: The slice's block Y dimensions. * m_file_ofs: Offset from the start of the file to the start of the slice's data * m_file_size: The size of the compressed slice data in bytes * m_slice_data_crc16: The CRC16 of the compressed slice data, for extra-paranoid use cases @@ -176,7 +176,7 @@ struct basis_slice_desc 5.0 CRC-16 Function ------------------- -.basis files use CRC-16/genibus(aka CRC-16 EPC, CRC-16 I-CODE, CRC-16 DARC) format CRC-16's. +.basis files use CRC-16/genibus(aka CRC-16 EPC, CRC-16 I-CODE, CRC-16 DARC) format CRC-16's. Here's an example function in C++: @@ -216,39 +216,39 @@ First, some enums: enum { // Max supported Huffman code size is 16-bits - cHuffmanMaxSupportedCodeSize = 16, + cHuffmanMaxSupportedCodeSize = 16, // The maximum number of symbols is 2^14 - cHuffmanMaxSymsLog2 = 14, + cHuffmanMaxSymsLog2 = 14, cHuffmanMaxSyms = 1 << cHuffmanMaxSymsLog2, // Small zero runs may range from 3-10 entries - cHuffmanSmallZeroRunSizeMin = 3, - cHuffmanSmallZeroRunSizeMax = 10, + cHuffmanSmallZeroRunSizeMin = 3, + cHuffmanSmallZeroRunSizeMax = 10, cHuffmanSmallZeroRunExtraBits = 3, // Big zero runs may range from 11-138 entries - cHuffmanBigZeroRunSizeMin = 11, - cHuffmanBigZeroRunSizeMax = 138, + cHuffmanBigZeroRunSizeMin = 11, + cHuffmanBigZeroRunSizeMax = 138, cHuffmanBigZeroRunExtraBits = 7, // Small non-zero runs may range from 3-6 entries - cHuffmanSmallRepeatSizeMin = 3, - cHuffmanSmallRepeatSizeMax = 6, + cHuffmanSmallRepeatSizeMin = 3, + cHuffmanSmallRepeatSizeMax = 6, cHuffmanSmallRepeatExtraBits = 2, // Big non-zero run may range from 7-134 entries - cHuffmanBigRepeatSizeMin = 7, - cHuffmanBigRepeatSizeMax = 134, + cHuffmanBigRepeatSizeMin = 7, + cHuffmanBigRepeatSizeMax = 134, cHuffmanBigRepeatExtraBits = 7, // There are a maximum of 21 symbols in a compressed Huffman code length table. - cHuffmanTotalCodelengthCodes = 21, - + cHuffmanTotalCodelengthCodes = 21, + // Symbols [0,16] indicate code sizes. Other symbols indicate zero runs or repeats: - cHuffmanSmallZeroRunCode = 17, - cHuffmanBigZeroRunCode = 18, - cHuffmanSmallRepeatCode = 19, + cHuffmanSmallZeroRunCode = 17, + cHuffmanBigZeroRunCode = 18, + cHuffmanSmallRepeatCode = 19, cHuffmanBigRepeatCode = 20 }; @@ -261,41 +261,41 @@ Each table begins with a small fixed header: 14 bits: total_used_syms [1, cHuffmanMaxSyms] 5 bits: num_codelength_codes [1, cHuffmanTotalCodelengthCodes] - + Next, the code lengths for the small Huffman table which is used to send the compressed codelengths (and RLE/repeat codes) are sent uncompressed but in a reordered manner: - + 3*num_codelength_codes bits: Code size of each Huffman symbol for the compressed Huffman codelength table. - + These code lengths are sent in this order (to help reduce the number that must be sent): - - { - cHuffmanSmallZeroRunCode, cHuffmanBigZeroRunCode, cHuffmanSmallRepeatCode, cHuffmanBigRepeatCode, - 0, 8, 7, 9, 6, 0xA, 5, 0xB, 4, 0xC, 3, 0xD, 2, 0xE, 1, 0xF, 0x10 + + { + cHuffmanSmallZeroRunCode, cHuffmanBigZeroRunCode, cHuffmanSmallRepeatCode, cHuffmanBigRepeatCode, + 0, 8, 7, 9, 6, 0xA, 5, 0xB, 4, 0xC, 3, 0xD, 2, 0xE, 1, 0xF, 0x10 }; - + A canonical Huffman decoding table (of up to 21 symbols) should be built from these code lengths. Immediately following this data are the Huffman symbols (sometimes intermixed with raw bits) which describe how to unpack the codelengths of each symbol in the Huffman table: - Symbols [0,16] indicate a specific symbol code length in bits. - + - Symbol cHuffmanSmallZeroRunCode (17) indicates a short run of symbols with 0 bit code lengths. cHuffmanSmallZeroRunExtraBits (3) bits are sent after this symbol, which indicates the run's size after adding the minimum size (cHuffmanSmallZeroRunSizeMin). - - - Symbol cHuffmanBigZeroRunCode (18) indicates a long run of symbols with 0 bit code lengths. + + - Symbol cHuffmanBigZeroRunCode (18) indicates a long run of symbols with 0 bit code lengths. cHuffmanBigZeroRunExtraBits (7) bits are sent after this symbol, which indicates the run's size after adding the minimum size (cHuffmanBigZeroRunSizeMin) - Symbol cHuffmanSmallRepeatCode (19) indicates a short run of symbols that repeat the previous symbol's code length. - cHuffmanSmallRepeatExtraBits (2) bits are sent after this symbol, which indicates the number of times to repeat the previous symbol's code length, + cHuffmanSmallRepeatExtraBits (2) bits are sent after this symbol, which indicates the number of times to repeat the previous symbol's code length, after adding the minimum size (cHuffmanSmallRepeatSizeMin). Cannot be the first symbol, and the previous symbol cannot have a code length of 0. - + - Symbol cHuffmanBigRepeatCode (20) indicates a short run of symbols that repeat the previous symbol's code length. cHuffmanBigRepeatExtraBits (7) bits are sent after this symbol, which indicates the number of times to repeat the previous symbol's code length, after adding the minimum size (cHuffmanBigRepeatSizeMin). Cannot be the first symbol, and the previous symbol cannot have a code length of 0. - + There should be exactly total_used_syms code lengths stored in the compressed Huffman table. If not the stream is either corrupted or invalid. After all the symbol codelengths are uncompressed, the symbol codes can be computed and the canonical Huffman decoding tables can be built. @@ -318,9 +318,9 @@ appear in this order: 4. inten_delta_model Following the data for these Huffman tables is a single 1-bit code which -indicates if the color endpoint codebook is grayscale or not. +indicates if the color endpoint codebook is grayscale or not. -Immediately following this code is the compressed color endpoint codebook data. +Immediately following this code is the compressed color endpoint codebook data. A simple form of DPCM (Delta Pulse Code Modulation) coding is used to send the ETC1S intensity table indices and color values. Here is the procedure to decode the endpoint codebook: @@ -393,11 +393,11 @@ byte corresponds to four 2-bit ETC1S selectors. The first selector of each group of 4 selectors starts at the LSB (least significant bit) of each byte, and is 2-bits wide. -If the third bit is 0, the selectors have been DPCM coded with Huffman coding. +If the third bit is 0, the selectors have been DPCM coded with Huffman coding. The "delta_selector_pal_model" Huffman table will immediately follow the third bit, and is stored using the procedure outlined in section 6.0. -Immediately following the Huffman table is the compressed selector codebook. +Immediately following the Huffman table is the compressed selector codebook. Here is the DPCM decoding procedure: uint8_t prev_bytes[4] = { 0, 0, 0, 0 }; @@ -439,7 +439,7 @@ Any bytes in this section following the selector codebook bits can be safely ign Each ETC1S slice is compressed with four Huffman tables stored using the procedure outlined in section 6.0. These Huffman tables are stored at file -offset basis_file_header::m_tables_file_ofs. This section will be +offset basis_file_header::m_tables_file_ofs. This section will be basis_file_header::m_tables_file_size bytes long. The following four Huffman tables are sent, in this order: @@ -458,7 +458,7 @@ history buffer. Any remaining bits may be safely ignored. ETC1S slices consist of a compressed 2D array of ETC1S blocks, always compressed in top-down/left-right raster order. For texture video, the previous slice's already decoded contents may be referred to when blocks are encoded using -Conditional Replenishment (also known as "skip blocks"). +Conditional Replenishment (also known as "skip blocks"). Each ETC1S block is encoded by using references to the color endpoint codebook and the selector codebook. Sections 10.1 and 10.2 describe the helper procedures @@ -508,7 +508,7 @@ decoding: m_values[index] = x; } } - + private: std::vector m_values; uint32_t m_rover; @@ -526,7 +526,7 @@ sends raw bits using variable-size chunks. Here is the VLC decoding procedure: const uint32_t chunk_size = 1 << chunk_bits; const uint32_t chunk_mask = chunk_size - 1; - + uint32_t v = 0; uint32_t ofs = 0; @@ -538,7 +538,7 @@ sends raw bits using variable-size chunks. Here is the VLC decoding procedure: if ((s & chunk_size) == 0) break; - + if (ofs >= 32) { assert(0); @@ -583,11 +583,11 @@ selector codebook indices are decoded. const uint32_t NUM_ENDPOINT_PREDS = 3; const uint32_t CR_ENDPOINT_PRED_INDEX = NUM_ENDPOINT_PREDS - 1; const uint32_t NO_ENDPOINT_PRED_INDEX = 3; - + // Endpoint/selector codebooks - decoded previously. See sections 7.0 and 8.0. endpoint endpoints[endpoint_codebook_size]; - selector selectors[selector_codebook_size]; - + selector selectors[selector_codebook_size]; + // Array of per-block values used for endpoint index prediction (enough for 2 rows). struct block_preds { @@ -595,22 +595,22 @@ selector codebook indices are decoded. uint8_t m_pred_bits; }; block_preds block_endpoint_preds[2][num_blocks_x]; - + // Some constants and state used during block decoding const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = selector_codebook_size; const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = selector_history_buf_size + SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX; uint32_t cur_selector_rle_count = 0; - + uint32_t cur_pred_bits = 0; int prev_endpoint_pred_sym = 0; int endpoint_pred_repeat_count = 0; uint32_t prev_endpoint_index = 0; // This array is only used for texture video. It holds the previous frame's endpoint and selector indices (each 16-bits, for 32-bits total). - uint32_t prev_frame_indices[num_blocks_x][num_blocks_y]; - + uint32_t prev_frame_indices[num_blocks_x][num_blocks_y]; + // Selector history buffer - See section 10.1. - // For the selector history buffer's size, see section 9.0. + // For the selector history buffer's size, see section 9.0. approx_move_to_front selector_history_buf(selector_history_buf_size); // Loop over all slice blocks in raster order @@ -629,7 +629,7 @@ selector codebook indices are decoded. { // We're on an even row and column of blocks. Decode the combined endpoint index predictor symbols for 2x2 blocks. // This symbol tells the decoder how the endpoints are decoded for each block in a 2x2 group of blocks. - + // Are we in an RLE run? if (endpoint_pred_repeat_count) { @@ -655,7 +655,7 @@ selector codebook indices are decoded. } } - // The symbol has enough endpoint prediction information for 4 blocks (2 bits per block), so 8 bits total. + // The symbol has enough endpoint prediction information for 4 blocks (2 bits per block), so 8 bits total. // Remember the prediction information we should use for the next row of 2 blocks beneath the current block. block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x].m_pred_bits = (uint8_t)(cur_pred_bits >> 4); } @@ -665,7 +665,7 @@ selector codebook indices are decoded. cur_pred_bits = block_endpoint_preds[cur_block_endpoint_pred_array][block_x].m_pred_bits; } } - + // Decode the current block's endpoint and selector indices. uint32_t endpoint_index, selector_index = 0; @@ -673,8 +673,8 @@ selector codebook indices are decoded. const uint32_t pred = cur_pred_bits & 3; // Get the next block's endpoint prediction bits ready. - cur_pred_bits >>= 2; - + cur_pred_bits >>= 2; + // Now check to see if we should reuse a previously encoded block's endpoints. if (pred == 0) { @@ -712,7 +712,7 @@ selector codebook indices are decoded. const uint32_t delta_sym = decode_huffman(delta_endpoint_model); endpoint_index = delta_sym + prev_endpoint_index; - + // Wrap around if the index goes beyond the end of the endpoint codebook if (endpoint_index >= endpoints.size()) endpoint_index -= (int)endpoints.size(); @@ -723,12 +723,12 @@ selector codebook indices are decoded. // Remember the endpoint index used prev_endpoint_index = endpoint_index; - - // Now we have fully decoded the ETC1S endpoint codebook index, in endpoint_index. - + + // Now we have fully decoded the ETC1S endpoint codebook index, in endpoint_index. + // Now decode the selector index (see the next block of code, below). < selector decoding - see below > - + } // block_x } // block_y @@ -756,7 +756,7 @@ The code to decode the selector codebook index immediately follows the code abov const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6; const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS); - // Decode selector index, unless it's texture video and the endpoint predictor indicated that the + // Decode selector index, unless it's texture video and the endpoint predictor indicated that the // block's endpoints were reused from the previous frame. if ((!is_video) || (pred != CR_ENDPOINT_PRED_INDEX)) { @@ -820,7 +820,7 @@ The code to decode the selector codebook index immediately follows the code abov selector_history_buf.add(selector_index); } } - + // For texture video, remember the endpoint and selector indices used by the block on this frame, for later reuse on the next frame. if (is_video) prev_frame_indices[block_x][block_y] = endpoint_index | (selector_index << 16); @@ -836,24 +836,24 @@ texture data, or it can immedately transcode the ETC1S data to another GPU textu 11.0 Alpha Channels in ETC1S Format Files ----------------------------------------- -ETC1S .basis files can have optional alpha channels, stored in odd slices. If any slice needs an alpha channel, -all slices must have alpha channels. basis_file_header::m_flags will be logically OR'd with -cBASISHeaderFlagHasAlphaSlices. Alpha channel ETC1S files will contain two slices for each mipmap level -(or face, or video frame, etc.). The basis_slice_desc::m_flags field will be logically OR'd with -cSliceDescFlagsHasAlpha for all odd alpha slices. +ETC1S .basis files can have optional alpha channels, stored in odd slices. If any slice needs an alpha channel, +all slices must have alpha channels. basis_file_header::m_flags will be logically OR'd with +cBASISHeaderFlagHasAlphaSlices. Alpha channel ETC1S files will contain two slices for each mipmap level +(or face, or video frame, etc.). The basis_slice_desc::m_flags field will be logically OR'd with +cSliceDescFlagsHasAlpha for all odd alpha slices. -The even slices will contain the RGB data, and the odd slices will contain the alpha data, both stored in ETC1S -format. Alpha channel ETC1S files must always have an even total number of slices. A decoder can first decode -the RGB data slice, then the next alpha channel slice, or it can decode them in parallel using multithreading. +The even slices will contain the RGB data, and the odd slices will contain the alpha data, both stored in ETC1S +format. Alpha channel ETC1S files must always have an even total number of slices. A decoder can first decode +the RGB data slice, then the next alpha channel slice, or it can decode them in parallel using multithreading. The ETC1S green channel (on the odd slices) contains the alpha values. 12.0 Texture Video ------------------ Both ETC1S and UASTC format files support texture video. Texture video files can be optionally mipmapped, and can -contain optional alpha channels (stored as separate slices in ETC1S format files). Currently, the first frame is -always an i-frame, and all subsequent frames are p-frames, but the file format and transcoder supports any -frame being an i-frame (and the encoder will be enhanced to support this feature). Decoders must track the previously +contain optional alpha channels (stored as separate slices in ETC1S format files). Currently, the first frame is +always an i-frame, and all subsequent frames are p-frames, but the file format and transcoder supports any +frame being an i-frame (and the encoder will be enhanced to support this feature). Decoders must track the previously decoded frame's endpoints/selectors for all mipmap levels (if any), not just the top level's. Skip blocks always refer to the previous frame. i-frames cannot use skip blocks (encoded as endpoint predictor index 2). diff --git a/transcoder/basisu.h b/transcoder/basisu.h index 1230b59e..d78e557e 100644 --- a/transcoder/basisu.h +++ b/transcoder/basisu.h @@ -120,7 +120,7 @@ namespace basisu void enable_debug_printf(bool enabled); void debug_printf(const char *pFmt, ...); - + template inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(obj)); } @@ -129,7 +129,7 @@ namespace basisu template inline S maximum(S a, S b) { return (a > b) ? a : b; } template inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); } template inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); } - + template inline S minimum(S a, S b) { return (a < b) ? a : b; } template inline S minimum(S a, S b, S c) { return minimum(minimum(a, b), c); } template inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); } @@ -153,7 +153,7 @@ namespace basisu inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } inline uint64_t iabs64(int64_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } - template inline void clear_vector(T &vec) { vec.erase(vec.begin(), vec.end()); } + template inline void clear_vector(T &vec) { vec.erase(vec.begin(), vec.end()); } template inline typename T::value_type *enlarge_vector(T &vec, size_t n) { size_t cs = vec.size(); vec.resize(cs + n); return &vec[cs]; } inline bool is_pow2(uint32_t x) { return x && ((x & (x - 1U)) == 0U); } @@ -166,8 +166,8 @@ namespace basisu template inline T saturate(T val) { return clamp(val, 0.0f, 1.0f); } - template inline void append_vector(T &vec, const R *pObjs, size_t n) - { + template inline void append_vector(T &vec, const R *pObjs, size_t n) + { if (n) { if (vec.size()) @@ -218,7 +218,7 @@ namespace basisu for (size_t i = 0; i < vec.size(); i++) vec[i] = obj; } - + inline uint64_t read_be64(const void *p) { uint64_t val = 0; @@ -279,7 +279,7 @@ namespace basisu pBytes[2] = (uint8_t)(val >> 16U); pBytes[3] = (uint8_t)(val >> 24U); } - + // Always little endian 1-8 byte unsigned int template struct packed_uint @@ -289,17 +289,17 @@ namespace basisu inline packed_uint() { static_assert(NumBytes <= sizeof(uint64_t), "Invalid NumBytes"); } inline packed_uint(uint64_t v) { *this = v; } inline packed_uint(const packed_uint& other) { *this = other; } - - inline packed_uint& operator= (uint64_t v) - { - for (uint32_t i = 0; i < NumBytes; i++) - m_bytes[i] = static_cast(v >> (i * 8)); - return *this; + + inline packed_uint& operator= (uint64_t v) + { + for (uint32_t i = 0; i < NumBytes; i++) + m_bytes[i] = static_cast(v >> (i * 8)); + return *this; } - inline packed_uint& operator= (const packed_uint& rhs) - { - memcpy(m_bytes, rhs.m_bytes, sizeof(m_bytes)); + inline packed_uint& operator= (const packed_uint& rhs) + { + memcpy(m_bytes, rhs.m_bytes, sizeof(m_bytes)); return *this; } @@ -307,19 +307,19 @@ namespace basisu { switch (NumBytes) { - case 1: + case 1: { return m_bytes[0]; } - case 2: + case 2: { return (m_bytes[1] << 8U) | m_bytes[0]; } - case 3: + case 3: { return (m_bytes[2] << 16U) | (m_bytes[1] << 8U) | m_bytes[0]; } - case 4: + case 4: { return read_le_dword(m_bytes); } @@ -341,13 +341,13 @@ namespace basisu uint32_t h = (m_bytes[6] << 16U) | (m_bytes[5] << 8U) | m_bytes[4]; return static_cast(l) | (static_cast(h) << 32U); } - case 8: + case 8: { uint32_t l = read_le_dword(m_bytes); uint32_t h = read_le_dword(m_bytes + 4); return static_cast(l) | (static_cast(h) << 32U); } - default: + default: { assert(0); return 0; @@ -358,14 +358,14 @@ namespace basisu enum eZero { cZero }; enum eNoClamp { cNoClamp }; - + // Rice/Huffman entropy coding - + // This is basically Deflate-style canonical Huffman, except we allow for a lot more symbols. enum { - cHuffmanMaxSupportedCodeSize = 16, cHuffmanMaxSupportedInternalCodeSize = 31, - cHuffmanFastLookupBits = 10, + cHuffmanMaxSupportedCodeSize = 16, cHuffmanMaxSupportedInternalCodeSize = 31, + cHuffmanFastLookupBits = 10, cHuffmanMaxSymsLog2 = 14, cHuffmanMaxSyms = 1 << cHuffmanMaxSymsLog2, // Small zero runs @@ -391,13 +391,13 @@ namespace basisu enum class texture_format { cInvalidTextureFormat = -1, - + // Block-based formats cETC1, // ETC1 cETC1S, // ETC1 (subset: diff colors only, no subblocks) cETC2_RGB, // ETC2 color block (basisu doesn't support ETC2 planar/T/H modes - just basic ETC1) cETC2_RGBA, // ETC2 EAC alpha block followed by ETC2 color block - cETC2_ALPHA, // ETC2 EAC alpha block + cETC2_ALPHA, // ETC2 EAC alpha block cBC1, // DXT1 cBC3, // DXT5 (BC4/DXT5A block followed by a BC1/DXT1 block) cBC4, // DXT5A @@ -412,10 +412,10 @@ namespace basisu cPVRTC2_4_RGBA, cETC2_R11_EAC, cETC2_RG11_EAC, - cUASTC4x4, + cUASTC4x4, cBC1_NV, cBC1_AMD, - + // Uncompressed/raw pixels cRGBA32, cRGB565, @@ -473,6 +473,6 @@ namespace basisu BASISU_NOTE_UNUSED(fmt); return 4; } - + } // namespace basisu diff --git a/transcoder/basisu_containers.h b/transcoder/basisu_containers.h index 67416ee7..0bc39d33 100644 --- a/transcoder/basisu_containers.h +++ b/transcoder/basisu_containers.h @@ -1,8 +1,10 @@ // basisu_containers.h #pragma once +#include #include #include #include +#include #include #include @@ -139,8 +141,8 @@ namespace basisu static inline void construct(T** p) { memset(p, 0, sizeof(T*)); } static inline void construct(T** p, T* init) { *p = init; } static inline void construct_array(T** p, size_t n) { memset(p, 0, sizeof(T*) * n); } - static inline void destruct(T** p) { p; } - static inline void destruct_array(T** p, size_t n) { p, n; } + static inline void destruct(T** p) { (void)p; } + static inline void destruct_array(T** p, size_t n) { (void)p; (void)n; } }; #define BASISU_DEFINE_BUILT_IN_TYPE(X) \ @@ -149,8 +151,8 @@ namespace basisu static inline void construct(X* p) { memset(p, 0, sizeof(X)); } \ static inline void construct(X* p, const X& init) { memcpy(p, &init, sizeof(X)); } \ static inline void construct_array(X* p, size_t n) { memset(p, 0, sizeof(X) * n); } \ - static inline void destruct(X* p) { p; } \ - static inline void destruct_array(X* p, size_t n) { p, n; } }; + static inline void destruct(X* p) { (void)p; } \ + static inline void destruct_array(X* p, size_t n) { (void)p; (void)n; } }; BASISU_DEFINE_BUILT_IN_TYPE(bool) BASISU_DEFINE_BUILT_IN_TYPE(char) @@ -360,28 +362,28 @@ namespace basisu // operator[] will assert on out of range indices, but in final builds there is (and will never be) any range checking on this method. //BASISU_FORCE_INLINE const T& operator[] (uint32_t i) const { assert(i < m_size); return m_p[i]; } //BASISU_FORCE_INLINE T& operator[] (uint32_t i) { assert(i < m_size); return m_p[i]; } - + #if !BASISU_VECTOR_FORCE_CHECKING BASISU_FORCE_INLINE const T& operator[] (size_t i) const { assert(i < m_size); return m_p[i]; } BASISU_FORCE_INLINE T& operator[] (size_t i) { assert(i < m_size); return m_p[i]; } #else - BASISU_FORCE_INLINE const T& operator[] (size_t i) const - { + BASISU_FORCE_INLINE const T& operator[] (size_t i) const + { if (i >= m_size) { fprintf(stderr, "operator[] invalid index: %u, max entries %u, type size %u\n", (uint32_t)i, m_size, (uint32_t)sizeof(T)); abort(); } - return m_p[i]; + return m_p[i]; } - BASISU_FORCE_INLINE T& operator[] (size_t i) - { + BASISU_FORCE_INLINE T& operator[] (size_t i) + { if (i >= m_size) { fprintf(stderr, "operator[] invalid index: %u, max entries %u, type size %u\n", (uint32_t)i, m_size, (uint32_t)sizeof(T)); abort(); } - return m_p[i]; + return m_p[i]; } #endif @@ -389,7 +391,7 @@ namespace basisu // The first element is returned if the index is out of range. BASISU_FORCE_INLINE const T& at(size_t i) const { assert(i < m_size); return (i >= m_size) ? m_p[0] : m_p[i]; } BASISU_FORCE_INLINE T& at(size_t i) { assert(i < m_size); return (i >= m_size) ? m_p[0] : m_p[i]; } - + #if !BASISU_VECTOR_FORCE_CHECKING BASISU_FORCE_INLINE const T& front() const { assert(m_size); return m_p[0]; } BASISU_FORCE_INLINE T& front() { assert(m_size); return m_p[0]; } @@ -397,42 +399,42 @@ namespace basisu BASISU_FORCE_INLINE const T& back() const { assert(m_size); return m_p[m_size - 1]; } BASISU_FORCE_INLINE T& back() { assert(m_size); return m_p[m_size - 1]; } #else - BASISU_FORCE_INLINE const T& front() const - { + BASISU_FORCE_INLINE const T& front() const + { if (!m_size) { fprintf(stderr, "front: vector is empty, type size %u\n", (uint32_t)sizeof(T)); abort(); } - return m_p[0]; + return m_p[0]; } - BASISU_FORCE_INLINE T& front() - { + BASISU_FORCE_INLINE T& front() + { if (!m_size) { fprintf(stderr, "front: vector is empty, type size %u\n", (uint32_t)sizeof(T)); abort(); } - return m_p[0]; + return m_p[0]; } - BASISU_FORCE_INLINE const T& back() const - { + BASISU_FORCE_INLINE const T& back() const + { if(!m_size) { fprintf(stderr, "back: vector is empty, type size %u\n", (uint32_t)sizeof(T)); abort(); } - return m_p[m_size - 1]; + return m_p[m_size - 1]; } - BASISU_FORCE_INLINE T& back() - { + BASISU_FORCE_INLINE T& back() + { if (!m_size) { fprintf(stderr, "back: vector is empty, type size %u\n", (uint32_t)sizeof(T)); abort(); } - return m_p[m_size - 1]; + return m_p[m_size - 1]; } #endif @@ -723,7 +725,7 @@ namespace basisu insert(m_size, p, n); return *this; } - + inline void erase(uint32_t start, uint32_t n) { assert((start + n) <= m_size); @@ -754,7 +756,7 @@ namespace basisu } else { - // Type is not bitwise copyable or movable. + // Type is not bitwise copyable or movable. // Move them down one at a time by using the equals operator, and destroying anything that's left over at the end. T* pDst_end = pDst + num_to_move; while (pDst != pDst_end) @@ -974,7 +976,7 @@ namespace basisu if (!m) break; cmp = -cmp; i += (((m + 1) >> 1) ^ cmp) - cmp; - if (i < 0) + if (i < 0) break; } } @@ -1119,7 +1121,7 @@ namespace basisu public: class iterator; class const_iterator; - + private: friend class iterator; friend class const_iterator; @@ -1307,7 +1309,7 @@ namespace basisu if (new_hash_size > m_values.size()) rehash((uint32_t)new_hash_size); } - + class iterator { friend class hash_map; @@ -1742,7 +1744,7 @@ namespace basisu inline void grow() { uint64_t n = m_values.size() * 3ULL; // was * 2 - + if (!helpers::is_power_of_2(n)) n = helpers::next_pow2(n); @@ -1959,11 +1961,11 @@ namespace basisu template struct bitwise_movable< hash_map > { enum { cFlag = true }; }; - + #if BASISU_HASHMAP_TEST extern void hash_map_test(); #endif - + } // namespace basisu namespace std diff --git a/transcoder/basisu_containers_impl.h b/transcoder/basisu_containers_impl.h index d5cb6156..72b10692 100644 --- a/transcoder/basisu_containers_impl.h +++ b/transcoder/basisu_containers_impl.h @@ -34,7 +34,7 @@ namespace basisu abort(); } } - + const size_t desired_size = element_size * new_capacity; size_t actual_size = 0; if (!pMover) @@ -98,7 +98,7 @@ namespace basisu if (m_p) free(m_p); - + m_p = new_p; } diff --git a/transcoder/basisu_file_headers.h b/transcoder/basisu_file_headers.h index 4316d738..e99972d8 100644 --- a/transcoder/basisu_file_headers.h +++ b/transcoder/basisu_file_headers.h @@ -21,10 +21,10 @@ namespace basist enum basis_slice_desc_flags { cSliceDescFlagsHasAlpha = 1, - + // Video only: Frame doesn't refer to previous frame (no usage of conditional replenishment pred symbols) // Currently the first frame is always an I-Frame, all subsequent frames are P-Frames. This will eventually be changed to periodic I-Frames. - cSliceDescFlagsFrameIsIFrame = 2 + cSliceDescFlagsFrameIsIFrame = 2 }; #pragma pack(push) @@ -39,7 +39,7 @@ namespace basist basisu::packed_uint<2> m_orig_height; // The original image height (may not be a multiple of 4 pixels) basisu::packed_uint<2> m_num_blocks_x; // The slice's block X dimensions. Each block is 4x4 pixels. The slice's pixel resolution may or may not be a power of 2. - basisu::packed_uint<2> m_num_blocks_y; // The slice's block Y dimensions. + basisu::packed_uint<2> m_num_blocks_y; // The slice's block Y dimensions. basisu::packed_uint<4> m_file_ofs; // Offset from the start of the file to the start of the slice's data basisu::packed_uint<4> m_file_size; // The size of the compressed slice data in bytes @@ -51,24 +51,24 @@ namespace basist enum basis_header_flags { // Always set for ETC1S files. Not set for UASTC files. - cBASISHeaderFlagETC1S = 1, - + cBASISHeaderFlagETC1S = 1, + // Set if the texture had to be Y flipped before encoding. The actual interpretation of this (is Y up or down?) is up to the user. - cBASISHeaderFlagYFlipped = 2, - + cBASISHeaderFlagYFlipped = 2, + // Set if any slices contain alpha (for ETC1S, if the odd slices contain alpha data) - cBASISHeaderFlagHasAlphaSlices = 4, - - // For ETC1S files, this will be true if the file utilizes a codebook from another .basis file. - cBASISHeaderFlagUsesGlobalCodebook = 8, - - // Set if the texture data is sRGB, otherwise it's linear. + cBASISHeaderFlagHasAlphaSlices = 4, + + // For ETC1S files, this will be true if the file utilizes a codebook from another .basis file. + cBASISHeaderFlagUsesGlobalCodebook = 8, + + // Set if the texture data is sRGB, otherwise it's linear. // In reality, we have no idea if the texture data is actually linear or sRGB. This is the m_perceptual parameter passed to the compressor. - cBASISHeaderFlagSRGB = 16, + cBASISHeaderFlagSRGB = 16, }; // The image type field attempts to describe how to interpret the image data in a Basis file. - // The encoder library doesn't really do anything special or different with these texture types, this is mostly here for the benefit of the user. + // The encoder library doesn't really do anything special or different with these texture types, this is mostly here for the benefit of the user. // We do make sure the various constraints are followed (2DArray/cubemap/videoframes/volume implies that each image has the same resolution and # of mipmap levels, etc., cubemap implies that the # of image slices is a multiple of 6) enum basis_texture_type { @@ -111,7 +111,7 @@ namespace basist basisu::packed_uint<3> m_total_slices; // The total # of compressed slices (1 slice per image, or 2 for alpha .basis files) basisu::packed_uint<3> m_total_images; // The total # of images - + basisu::packed_uint<1> m_tex_format; // enum basis_tex_format basisu::packed_uint<2> m_flags; // enum basist::header_flags basisu::packed_uint<1> m_tex_type; // enum basist::basis_texture_type @@ -121,11 +121,11 @@ namespace basist basisu::packed_uint<4> m_userdata0; // For client use basisu::packed_uint<4> m_userdata1; // For client use - basisu::packed_uint<2> m_total_endpoints; // The number of endpoints in the endpoint codebook + basisu::packed_uint<2> m_total_endpoints; // The number of endpoints in the endpoint codebook basisu::packed_uint<4> m_endpoint_cb_file_ofs; // The compressed endpoint codebook's file offset relative to the start of the file basisu::packed_uint<3> m_endpoint_cb_file_size; // The compressed endpoint codebook's size in bytes - basisu::packed_uint<2> m_total_selectors; // The number of selectors in the endpoint codebook + basisu::packed_uint<2> m_total_selectors; // The number of selectors in the endpoint codebook basisu::packed_uint<4> m_selector_cb_file_ofs; // The compressed selectors codebook's file offset relative to the start of the file basisu::packed_uint<3> m_selector_cb_file_size; // The compressed selector codebook's size in bytes @@ -133,7 +133,7 @@ namespace basist basisu::packed_uint<4> m_tables_file_size; // The file size in bytes of the compressed huffman codelength tables basisu::packed_uint<4> m_slice_desc_file_ofs; // The file offset to the slice description array, usually follows the header - + basisu::packed_uint<4> m_extended_file_ofs; // The file offset of the "extended" header and compressed data, for future use basisu::packed_uint<4> m_extended_file_size; // The file size in bytes of the "extended" header and compressed data, for future use }; diff --git a/transcoder/basisu_transcoder.cpp b/transcoder/basisu_transcoder.cpp index 3aeba0ee..ab52d031 100644 --- a/transcoder/basisu_transcoder.cpp +++ b/transcoder/basisu_transcoder.cpp @@ -170,7 +170,7 @@ namespace basisu void debug_printf(const char* pFmt, ...) { -#if BASISU_FORCE_DEVEL_MESSAGES +#if BASISU_FORCE_DEVEL_MESSAGES g_debug_printf = true; #endif if (g_debug_printf) @@ -232,7 +232,7 @@ namespace basist return static_cast(~crc); } - + enum etc_constants { cETC1BytesPerBlock = 8U, @@ -305,14 +305,14 @@ namespace basist //const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; - + static const uint8_t g_etc_5_to_8[32] = { 0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123, 132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255 }; struct decoder_etc_block { // big endian uint64: // bit ofs: 56 48 40 32 24 16 8 0 - // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 + // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 union { uint64_t m_uint64; @@ -580,7 +580,7 @@ namespace basist { return (m_bytes[3] & 2) != 0; } - + inline uint32_t get_inten_table(uint32_t subblock_id) const { assert(subblock_id < 2); @@ -595,7 +595,7 @@ namespace basist const uint32_t b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); return static_cast(b | (g << 3U) | (r << 6U)); } - + void get_block_colors(color32* pBlock_colors, uint32_t subblock_index) const { color32 b; @@ -713,7 +713,7 @@ namespace basist g = c.g; b = c.b; } - + static void unpack_color5(color32& result, uint16_t packed_color5, bool scaled) { result = unpack_color5(packed_color5, scaled, 255); @@ -842,7 +842,7 @@ namespace basist static void get_block_color5_r(const color32& base_color5, uint32_t inten_table, uint32_t index, uint32_t &r) { assert(index < 4); - + uint32_t br = (base_color5.r << 3) | (base_color5.r >> 2); const int* pInten_table = g_etc1_inten_tables[inten_table]; @@ -1018,7 +1018,7 @@ namespace basist { 1, 2, 2, 2 }, { 1, 2, 3, 3 }, }; - + static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256]; static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256]; @@ -1410,9 +1410,9 @@ namespace basist return best_err; } #endif // BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES - + static -#if !BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES +#if !BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES const #endif etc1_g_to_eac_conversion s_etc1_g_to_etc2_a8[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] = @@ -1902,18 +1902,18 @@ namespace basist #endif static bool g_transcoder_initialized; - + // Library global initialization. Requires ~9 milliseconds when compiled and executed natively on a Core i7 2.2 GHz. // If this is too slow, these computed tables can easilky be moved to be compiled in. void basisu_transcoder_init() { if (g_transcoder_initialized) { - BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n"); + BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n"); return; } - - BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n"); + + BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n"); #if BASISD_SUPPORT_UASTC uastc_init(); @@ -1922,7 +1922,7 @@ namespace basist #if BASISD_SUPPORT_ASTC transcoder_init_astc(); #endif - + #if BASISD_WRITE_NEW_ASTC_TABLES create_etc1_to_astc_conversion_table_0_47(); create_etc1_to_astc_conversion_table_0_255(); @@ -2178,7 +2178,7 @@ namespace basist std::swap(l, h); pSelectors_xlat_256 = &g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[best_mapping][0]; } - + pDst_block->set_low_color(static_cast(l)); pDst_block->set_high_color(static_cast(h)); @@ -2338,7 +2338,7 @@ namespace basist fxt1_block* pBlock = static_cast(pDst); // CC_MIXED is basically DXT1 with different encoding tricks. - // So transcode ETC1S to DXT1, then transcode that to FXT1 which is easy and nearly lossless. + // So transcode ETC1S to DXT1, then transcode that to FXT1 which is easy and nearly lossless. // (It's not completely lossless because FXT1 rounds in its color lerps while DXT1 doesn't, but it should be good enough.) dxt1_block blk; convert_etc1s_to_dxt1(&blk, pEndpoints, pSelectors, false); @@ -2351,7 +2351,7 @@ namespace basist uint32_t g0 = color0.g & 1; uint32_t g1 = color1.g & 1; - + color0.g >>= 1; color1.g >>= 1; @@ -2359,7 +2359,7 @@ namespace basist blk.m_selectors[1] = conv_dxt1_to_fxt1_sels(blk.m_selectors[1]); blk.m_selectors[2] = conv_dxt1_to_fxt1_sels(blk.m_selectors[2]); blk.m_selectors[3] = conv_dxt1_to_fxt1_sels(blk.m_selectors[3]); - + if ((blk.get_selector(0, 0) >> 1) != (g0 ^ g1)) { std::swap(color0, color1); @@ -2373,7 +2373,7 @@ namespace basist if (fxt1_subblock == 0) { - pBlock->m_hi.m_mode = 1; + pBlock->m_hi.m_mode = 1; pBlock->m_hi.m_alpha = 0; pBlock->m_hi.m_glsb = g1 | (g1 << 1); pBlock->m_hi.m_r0 = color0.r; @@ -2694,7 +2694,7 @@ namespace basist { uint32_t r; decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r); - + pDst_block->set_low_alpha(r); pDst_block->set_high_alpha(r); pDst_block->m_selectors[0] = 0; @@ -2777,7 +2777,7 @@ namespace basist static const uint8_t g_pvrtc_4[16] = { 0,16,33,49,66,82,99,115,140,156,173,189,206,222,239,255 }; static const uint8_t g_pvrtc_3[8] = { 0,33,74,107,148,181,222,255 }; static const uint8_t g_pvrtc_alpha[9] = { 0,34,68,102,136,170,204,238,255 }; - + static const uint8_t g_pvrtc_5_floor[256] = { 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3, @@ -2801,7 +2801,7 @@ namespace basist 24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,28, 28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31,31,31,31,31,31,31,31 }; - + static const uint8_t g_pvrtc_4_floor[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -2825,7 +2825,7 @@ namespace basist 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14, 14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15 }; - + static const uint8_t g_pvrtc_3_floor[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -2849,7 +2849,7 @@ namespace basist 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 }; - + static const uint8_t g_pvrtc_alpha_floor[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -2956,10 +2956,10 @@ namespace basist } assert((r < 32) && (g < 32) && (b < 32) && (a < 16)); - + return color32(r, g, b, a); } - + inline color32 get_endpoint_8888(uint32_t endpoint_index) const { assert(endpoint_index < 2); @@ -3006,7 +3006,7 @@ namespace basist a = g_pvrtc_alpha[a]; } - + return color32(r, g, b, a); } @@ -3015,7 +3015,7 @@ namespace basist color32 c(get_endpoint_8888(endpoint_index)); return c.r + c.g + c.b + c.a; } - + inline uint32_t get_opaque_endpoint_l0() const { uint32_t packed = m_endpoints & 0xFFFE; @@ -3130,7 +3130,7 @@ namespace basist else m_endpoints = (m_endpoints & 0xFFFF0000U) | packed; } - + // opaque endpoints: 554 or 555 // transparent endpoints: 3443 or 3444 inline void set_endpoint_raw(uint32_t endpoint_index, const color32& c, bool opaque_endpoint) @@ -3183,7 +3183,7 @@ namespace basist else m_endpoints = (m_endpoints & 0xFFFF0000U) | packed; } - + inline void set_endpoint_floor(uint32_t endpoint_index, const color32& c) { assert(endpoint_index < 2); @@ -3408,7 +3408,7 @@ namespace basist for (int ey = 0; ey < 3; ey++) { - int by = y + ey - 1; + int by = y + ey - 1; const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; @@ -3416,7 +3416,7 @@ namespace basist for (int ex = 0; ex < 3; ex++) { - int bx = 0 + ex - 1; + int bx = 0 + ex - 1; const uint32_t e = pE[bx & x_mask]; @@ -3565,8 +3565,8 @@ namespace basist } static void fixup_pvrtc1_4_modulation_rgba( - const decoder_etc_block* pETC_Blocks, - const uint32_t* pPVRTC_endpoints, + const decoder_etc_block* pETC_Blocks, + const uint32_t* pPVRTC_endpoints, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, void *pAlpha_blocks, const endpoint* pEndpoints, const selector* pSelectors) { @@ -3589,7 +3589,7 @@ namespace basist for (int ey = 0; ey < 3; ey++) { - int by = y + ey - 1; + int by = y + ey - 1; const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; @@ -3597,7 +3597,7 @@ namespace basist for (int ex = 0; ex < 3; ex++) { - int bx = 0 + ex - 1; + int bx = 0 + ex - 1; const uint32_t e = pE[bx & x_mask]; @@ -3611,13 +3611,13 @@ namespace basist for (int x = 0; x < static_cast(num_blocks_x); x++, block_index++) { const decoder_etc_block& src_block = pETC_Blocks[block_index]; - + const uint16_t* pSrc_alpha_block = reinterpret_cast(static_cast(pAlpha_blocks) + x + (y * num_blocks_x)); const endpoint* pAlpha_endpoints = &pEndpoints[pSrc_alpha_block[0]]; const selector* pAlpha_selectors = &pSelectors[pSrc_alpha_block[1]]; - + const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1); - + uint32_t swizzled = x_swizzle | y_swizzle; if (num_blocks_x != num_blocks_y) { @@ -3760,7 +3760,7 @@ namespace basist const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_m5_selector_ranges) / sizeof(g_etc1_to_bc7_m5_selector_ranges[0]); static uint32_t g_etc1_to_bc7_m5_selector_range_index[4][4]; - + const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS = 10; static const uint8_t g_etc1_to_bc7_m5_selector_mappings[NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS][4] = { @@ -3782,11 +3782,11 @@ namespace basist uint8_t m_hi; uint16_t m_err; }; - + static const etc1_to_bc7_m5_solution g_etc1_to_bc7_m5_color[32 * 8 * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS * NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES] = { #include "basisu_transcoder_tables_bc7_m5_color.inc" }; - + static dxt_selector_range g_etc1_to_bc7_m5a_selector_ranges[] = { { 0, 3 }, @@ -3811,7 +3811,7 @@ namespace basist { #include "basisu_transcoder_tables_bc7_m5_alpha.inc" }; - + static inline uint32_t set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t cur_ofs) { assert(num_bits < 32); @@ -3958,7 +3958,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1_to_bc7_m5_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -4037,7 +4037,7 @@ namespace basist int mapping_err = block_colors[s].g - colors[k]; mapping_err *= mapping_err; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) mapping_err *= 5; @@ -4048,7 +4048,7 @@ namespace basist best_k = k; } } // k - + total_err += best_mapping_err; output_selectors |= (best_k << (s * 2)); } // s @@ -4063,7 +4063,7 @@ namespace basist } // lo } // hi - + fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, best_output_selectors); n++; if ((n & 31) == 31) @@ -4102,7 +4102,7 @@ namespace basist {127,104},{126,105},{126,106},{127,106},{127,107},{126,108},{125,109},{127,109},{126,110},{126,111},{127,111},{127,112},{126,113},{126,114},{127,114},{127,115}, {126,116},{126,117},{127,117},{127,118},{126,119},{126,120},{127,120},{127,121},{126,122},{126,123},{127,123},{127,124},{126,125},{126,126},{127,126},{127,127} }; - + static void transcoder_init_bc7_mode5() { #if 0 @@ -4130,9 +4130,9 @@ namespace basist } } // hi - + } // lo - + printf("{%u,%u},", g_bc7_m5_equals_1[i].m_hi, g_bc7_m5_equals_1[i].m_lo); if ((i & 15) == 15) printf("\n"); } @@ -4156,7 +4156,7 @@ namespace basist static void convert_etc1s_to_bc7_m5_color(void* pDst, const endpoint* pEndpoints, const selector* pSelector) { bc7_mode_5* pDst_block = static_cast(pDst); - + // First ensure the block is cleared to all 0's static_cast(pDst)[0] = 0; static_cast(pDst)[1] = 0; @@ -4282,7 +4282,7 @@ namespace basist pDst_block->m_lo.m_r1 = pTable_r[best_mapping].m_lo; pDst_block->m_lo.m_g1 = pTable_g[best_mapping].m_lo; pDst_block->m_lo.m_b1 = pTable_b[best_mapping].m_lo; - + s_inv = 3; } else @@ -4303,7 +4303,7 @@ namespace basist for (uint32_t x = 0; x < 4; x++) { const uint32_t s = pSelector->get_selector(x, y); - + const uint32_t os = pSelectors_xlat[s] ^ s_inv; output_bits |= (os << output_bit_ofs); @@ -4333,7 +4333,7 @@ namespace basist pDst_block->m_lo.m_a0 = r; pDst_block->m_lo.m_a1_0 = r & 63; pDst_block->m_hi.m_a1_1 = r >> 6; - + return; } else if (pSelector->m_num_unique_selectors == 2) @@ -4383,7 +4383,7 @@ namespace basist } const uint32_t selector_range_table = g_etc1_to_bc7_m5a_selector_range_index[low_selector][high_selector]; - + const etc1_g_to_bc7_m5a_conversion* pTable = &g_etc1_g_to_bc7_m5a[inten_table * (32 * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES) + base_color_r * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES + selector_range_table]; pDst_block->m_lo.m_a0 = pTable->m_lo; @@ -4853,7 +4853,7 @@ namespace basist // The best selector mapping to use given a base base+inten table and used selector range for converting grayscale data. static uint8_t g_etc1_to_astc_best_grayscale_mapping[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES]; - + #if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY static const etc1_to_astc_solution g_etc1_to_astc_0_255[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] = { #include "basisu_transcoder_tables_astc_0_255.inc" @@ -4918,7 +4918,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 8; @@ -4939,7 +4939,7 @@ namespace basist mapping_best_high[m] = best_hi; mapping_best_err[m] = best_err; highest_best_err = basisu::maximum(highest_best_err, best_err); - + } // m for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++) @@ -5015,7 +5015,7 @@ namespace basist { int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]]; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. int err_scale = 1; if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) @@ -5044,9 +5044,9 @@ namespace basist uint64_t err = mapping_best_err[m]; err = basisu::minimum(err, 0xFFFF); - + fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err); - + n++; if ((n & 31) == 31) fprintf(pFile, "\n"); @@ -5129,14 +5129,14 @@ namespace basist struct astc_block_params { // 2 groups of 5, but only a max of 8 are used (RRGGBBAA00) - uint8_t m_endpoints[10]; + uint8_t m_endpoints[10]; uint8_t m_weights[32]; }; - - // Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2). + + // Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2). // We're always going to output blocks containing alpha, even if the input doesn't have alpha, for simplicity. // Each block always has 4x4 weights, uses range 13 BISE encoding on the endpoints (0-47), and each weight ranges from 0-3. This encoding should be roughly equal in quality vs. BC1 for color. - // 8 total endpoints, stored as RGBA LH LH LH LH order, each ranging from 0-47. + // 8 total endpoints, stored as RGBA LH LH LH LH order, each ranging from 0-47. // Note the input [0,47] endpoint values are not linear - they are encoded as outlined in the ASTC spec: // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-endpoint-unquantization // 32 total weights, stored as 16 CA CA, each ranging from 0-3. @@ -5158,7 +5158,7 @@ namespace basist astc_encode_trits(pOutput, pBlock->m_endpoints + 5, bit_pos, 4); // Pack 32 2-bit weights, which are stored from the top down into the block in opposite bit order. - + for (uint32_t i = 0; i < 32; i++) { static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 }; @@ -5167,7 +5167,7 @@ namespace basist } } - // CEM mode 12 (LDR RGBA Direct), 8-bit endpoints, 1-bit weights + // CEM mode 12 (LDR RGBA Direct), 8-bit endpoints, 1-bit weights // This ASTC mode is basically block truncation coding (BTC) using 1-bit weights and 8-bit/component endpoints - very convenient. static void astc_pack_block_cem_12_weight_range0(uint32_t* pOutput, const astc_block_params* pBlock) { @@ -5205,7 +5205,7 @@ namespace basist // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode pBytes[0] = 0x42; pBytes[1] = 0x84; pBytes[2] = 0x00; pBytes[3] = 0x00; pBytes[4] = 0x00; pBytes[5] = 0x00; pBytes[6] = 0x00; pBytes[7] = 0xc0; - + pOutput[2] = 0; pOutput[3] = 0; @@ -5231,7 +5231,7 @@ namespace basist // Write constant block mode, color component selector, number of partitions, color endpoint mode // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode pBytes[0] = 0x42; pBytes[1] = 0x00; pBytes[2] = 0x01; pBytes[3] = 0x00; - + pOutput[1] = 0; pOutput[2] = 0; pOutput[3] = 0; @@ -5259,7 +5259,7 @@ namespace basist { uint8_t m_lo, m_hi; } g_astc_single_color_encoding_1[256]; - + static void transcoder_init_astc() { for (uint32_t base_color = 0; base_color < 32; base_color++) @@ -5337,7 +5337,7 @@ namespace basist g_ise_to_unquant[bit | (trit << 4)] = unq; } } - + // Compute table used for optimal single color encoding. for (int i = 0; i < 256; i++) { @@ -5352,9 +5352,9 @@ namespace basist int l = lo_v | (lo_v << 8); int h = hi_v | (hi_v << 8); - + int v = ((l * (64 - 21) + (h * 21) + 32) / 64) >> 8; - + int e = abs(v - i); if (e < lowest_e) @@ -5376,7 +5376,7 @@ namespace basist for (int lo = 0; lo < 48; lo++) { const int lo_v = g_ise_to_unquant[lo]; - + int e = abs(lo_v - i); if (e < lowest_e) @@ -5391,7 +5391,7 @@ namespace basist // Converts opaque or color+alpha ETC1S block to ASTC 4x4. // This function tries to use the best ASTC mode given the block's actual contents. - static void convert_etc1s_to_astc_4x4(void* pDst_block, const endpoint* pEndpoints, const selector* pSelector, + static void convert_etc1s_to_astc_4x4(void* pDst_block, const endpoint* pEndpoints, const selector* pSelector, bool transcode_alpha, const endpoint *pEndpoint_codebook, const selector *pSelector_codebook) { astc_block_params blk; @@ -5435,7 +5435,7 @@ namespace basist // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-void-extent-blocks uint32_t r, g, b; decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b); - + uint32_t* pOutput = static_cast(pDst_block); uint8_t* pBytes = reinterpret_cast(pDst_block); @@ -5455,7 +5455,7 @@ namespace basist } else if ((pSelector->m_num_unique_selectors <= 2) && (num_unique_alpha_selectors <= 2)) { - // Both color and alpha use <= 2 unique selectors each. + // Both color and alpha use <= 2 unique selectors each. // Use block truncation coding, which is lossless with ASTC (8-bit endpoints, 1-bit weights). color32 block_colors[4]; decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table); @@ -5502,7 +5502,7 @@ namespace basist { uint32_t s = alpha_selectors.get_selector(x, y); s = (s == alpha_high_selector) ? 1 : 0; - + blk.m_weights[(x + y * 4) * 2 + 1] = static_cast(s); } // x } // y @@ -5535,12 +5535,12 @@ namespace basist return; } - + // Either alpha and/or color use > 2 unique selectors each, so we must do something more complex. - + #if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY // The optional higher quality modes use 8-bits endpoints vs. [0,47] endpoints. - + // If the block's base color is grayscale, all pixels are grayscale, so encode the block as Luminance+Alpha. if ((base_color.r == base_color.g) && (base_color.r == base_color.b)) { @@ -5574,7 +5574,7 @@ namespace basist { // Convert ETC1S alpha const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector]; - + //[32][8][RANGES][MAPPING] const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; @@ -5582,7 +5582,7 @@ namespace basist blk.m_endpoints[2] = pTable_g[best_mapping].m_lo; blk.m_endpoints[3] = pTable_g[best_mapping].m_hi; - + const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0]; for (uint32_t y = 0; y < 4; y++) @@ -5626,10 +5626,10 @@ namespace basist { // Convert ETC1S alpha const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector]; - + //[32][8][RANGES][MAPPING] const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; - + const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping_0_255[base_color.g][inten_table][selector_range_table]; blk.m_endpoints[0] = pTable_g[best_mapping].m_lo; @@ -5771,7 +5771,7 @@ namespace basist { // Convert ETC1S alpha const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector]; - + //[32][8][RANGES][MAPPING] const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; @@ -5815,7 +5815,7 @@ namespace basist const uint32_t r = block_colors[low_selector].r; const uint32_t g = block_colors[low_selector].g; const uint32_t b = block_colors[low_selector].b; - + blk.m_endpoints[0] = g_astc_single_color_encoding_1[r].m_lo; blk.m_endpoints[1] = g_astc_single_color_encoding_1[r].m_hi; @@ -5917,7 +5917,7 @@ namespace basist blk.m_endpoints[4] = pTable_b[best_mapping].m_lo; blk.m_endpoints[5] = pTable_b[best_mapping].m_hi; - + int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]]; int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]]; bool invert = false; @@ -6082,8 +6082,8 @@ namespace basist static void transcoder_init_atc() { prepare_atc_single_color_table(g_pvrtc2_match45_equals_1, 16, 32, 1); - prepare_atc_single_color_table(g_atc_match55_equals_1, 32, 32, 1); - prepare_atc_single_color_table(g_atc_match56_equals_1, 32, 64, 1); + prepare_atc_single_color_table(g_atc_match55_equals_1, 32, 32, 1); + prepare_atc_single_color_table(g_atc_match56_equals_1, 32, 64, 1); prepare_atc_single_color_table(g_pvrtc2_match4, 1, 16, 3); prepare_atc_single_color_table(g_atc_match5, 1, 32, 3); @@ -6137,7 +6137,7 @@ namespace basist pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match56_equals_1[g].m_lo, g_atc_match55_equals_1[b].m_lo); pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match56_equals_1[g].m_hi, g_atc_match55_equals_1[b].m_hi); - + pBlock->m_sels[0] = 0x55; pBlock->m_sels[1] = 0x55; pBlock->m_sels[2] = 0x55; @@ -6272,7 +6272,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -6346,7 +6346,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -6376,7 +6376,7 @@ namespace basist } // inten fclose(pFile); - + // PVRTC2 45 fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_45.inc", "w"); @@ -6421,7 +6421,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -6498,7 +6498,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -6575,7 +6575,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -6652,7 +6652,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -6780,12 +6780,12 @@ namespace basist { uint8_t m_l, m_h; } g_pvrtc2_trans_match44[256]; - + static struct { uint8_t m_l, m_h; } g_pvrtc2_alpha_match33[256]; - + static struct { uint8_t m_l, m_h; @@ -6795,7 +6795,7 @@ namespace basist { uint8_t m_l, m_h; } g_pvrtc2_alpha_match33_3[256]; - + // PVRTC2 can be forced to look like a slightly weaker variant of ATC/BC1, so that's what we do here for simplicity. static void convert_etc1s_to_pvrtc2_rgb(void* pDst, const endpoint* pEndpoints, const selector* pSelector) { @@ -6909,7 +6909,7 @@ namespace basist } typedef struct { float c[4]; } vec4F; - + static inline vec4F* vec4F_set_scalar(vec4F* pV, float x) { pV->c[0] = x; pV->c[1] = x; pV->c[2] = x; pV->c[3] = x; return pV; } static inline vec4F* vec4F_set(vec4F* pV, float x, float y, float z, float w) { pV->c[0] = x; pV->c[1] = y; pV->c[2] = z; pV->c[3] = w; return pV; } static inline vec4F* vec4F_saturate_in_place(vec4F* pV) { pV->c[0] = saturate(pV->c[0]); pV->c[1] = saturate(pV->c[1]); pV->c[2] = saturate(pV->c[2]); pV->c[3] = saturate(pV->c[3]); return pV; } @@ -6927,9 +6927,9 @@ namespace basist } static inline int sq(int x) { return x * x; } - - // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0. - // This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha! + + // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0. + // This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha! // And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it. static void convert_etc1s_to_pvrtc2_rgba(void* pDst, const endpoint* pEndpoints, const selector* pSelector, const endpoint* pEndpoint_codebook, const selector* pSelector_codebook) { @@ -6984,13 +6984,13 @@ namespace basist const uint32_t high_selector = pSelector->m_hi_selector; const int num_unique_color_selectors = pSelector->m_num_unique_selectors; - + // We need to reencode the block at the pixel level, unfortunately, from two ETC1S planes. // Do 4D incremental PCA, project all pixels to this hyperline, then quantize to packed endpoints and compute the modulation values. const int br = (base_color.r << 3) | (base_color.r >> 2); const int bg = (base_color.g << 3) | (base_color.g >> 2); const int bb = (base_color.b << 3) | (base_color.b >> 2); - + color32 block_cols[4]; for (uint32_t i = 0; i < 4; i++) { @@ -7019,14 +7019,14 @@ namespace basist decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b); // Mod 0 - uint32_t lr0 = (r * 15 + 128) / 255, lg0 = (g * 15 + 128) / 255, lb0 = (b * 7 + 128) / 255; + uint32_t lr0 = (r * 15 + 128) / 255, lg0 = (g * 15 + 128) / 255, lb0 = (b * 7 + 128) / 255; uint32_t la0 = g_pvrtc2_alpha_match33_0[constant_alpha_val].m_l; uint32_t cr0 = (lr0 << 1) | (lr0 >> 3); uint32_t cg0 = (lg0 << 1) | (lg0 >> 3); uint32_t cb0 = (lb0 << 2) | (lb0 >> 1); uint32_t ca0 = (la0 << 1); - + cr0 = (cr0 << 3) | (cr0 >> 2); cg0 = (cg0 << 3) | (cg0 >> 2); cb0 = (cb0 << 3) | (cb0 >> 2); @@ -7055,14 +7055,14 @@ namespace basist uint32_t cg3 = (lg3 << 1) | (lg3 >> 3); uint32_t cb3 = (lb3 << 1) | (lb3 >> 3); uint32_t ca3 = (la3 << 1) | 1; - + cr3 = (cr3 << 3) | (cr3 >> 2); cg3 = (cg3 << 3) | (cg3 >> 2); cb3 = (cb3 << 3) | (cb3 >> 2); ca3 = (ca3 << 4) | ca3; uint32_t err3 = sq(cr3 - r) + sq(cg3 - g) + sq(cb3 - b) + sq(ca3 - constant_alpha_val) * 2; - + // Mod 1 uint32_t lr1 = g_pvrtc2_trans_match44[r].m_l, lg1 = g_pvrtc2_trans_match44[g].m_l, lb1 = g_pvrtc2_trans_match34[b].m_l; uint32_t hr1 = g_pvrtc2_trans_match44[r].m_h, hg1 = g_pvrtc2_trans_match44[g].m_h, hb1 = g_pvrtc2_trans_match34[b].m_h; @@ -7137,7 +7137,7 @@ namespace basist // It's a solid color block. uint32_t low_a = block_cols[alpha_selectors.m_lo_selector].a; uint32_t high_a = block_cols[alpha_selectors.m_hi_selector].a; - + const float S = 1.0f / 255.0f; vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, low_a * S); vec4F_set(&maxColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, high_a * S); @@ -7149,7 +7149,7 @@ namespace basist vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, constant_alpha_val * S); vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, constant_alpha_val * S); } - // See if any of the block colors got clamped - if so the principle axis got distorted (it's no longer just the ETC1S luma axis). + // See if any of the block colors got clamped - if so the principle axis got distorted (it's no longer just the ETC1S luma axis). // To keep quality up we need to use full 4D PCA in this case. else if ((block_cols[low_selector].c[0] == 0) || (block_cols[high_selector].c[0] == 255) || (block_cols[low_selector].c[1] == 0) || (block_cols[high_selector].c[1] == 255) || @@ -7200,7 +7200,7 @@ namespace basist } vec4F_normalize_in_place(&axis); - + if (vec4F_dot(&axis, &axis) < .5f) vec4F_set_scalar(&axis, .5f); @@ -7300,10 +7300,10 @@ namespace basist // 4433 4443 color32 trialMinColor, trialMaxColor; - + trialMinColor.set_clamped((int)(minColor.c[0] * 15.0f + .5f), (int)(minColor.c[1] * 15.0f + .5f), (int)(minColor.c[2] * 7.0f + .5f), (int)(minColor.c[3] * 7.0f + .5f)); trialMaxColor.set_clamped((int)(maxColor.c[0] * 15.0f + .5f), (int)(maxColor.c[1] * 15.0f + .5f), (int)(maxColor.c[2] * 15.0f + .5f), (int)(maxColor.c[3] * 7.0f + .5f)); - + pBlock->set_trans_low_color(trialMinColor.r, trialMinColor.g, trialMinColor.b, trialMinColor.a); pBlock->set_trans_high_color(trialMaxColor.r, trialMaxColor.g, trialMaxColor.b, trialMaxColor.a); @@ -7376,7 +7376,7 @@ namespace basist } } } - + static void transcoder_init_pvrtc2() { for (uint32_t v = 0; v < 256; v++) @@ -7482,7 +7482,7 @@ namespace basist g_pvrtc2_trans_match34[v].m_l = (uint8_t)best_l; g_pvrtc2_trans_match34[v].m_h = (uint8_t)best_h; } - + for (uint32_t v = 0; v < 256; v++) { int best_l = 0, best_h = 0, lowest_err = INT_MAX; @@ -7610,7 +7610,7 @@ namespace basist sym_codec.stop(); m_local_selectors.resize(num_selectors); - + if (!sym_codec.init(pSelectors_data, selectors_data_size)) { BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 5\n"); @@ -7635,7 +7635,7 @@ namespace basist BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: hybrid global selector codebooks are unsupported\n"); return false; } - + const bool used_raw_encoding = (sym_codec.get_bits(1) == 1); if (used_raw_encoding) @@ -7816,7 +7816,7 @@ namespace basist if (!output_rows_in_pixels) output_rows_in_pixels = orig_height; } - + basisu::vector* pPrev_frame_indices = nullptr; if (is_video) { @@ -7844,12 +7844,12 @@ namespace basist } approx_move_to_front selector_history_buf(m_selector_history_buf_size); - + uint32_t cur_selector_rle_count = 0; decoder_etc_block block; memset(&block, 0, sizeof(block)); - + //block.set_flip_bit(true); // Setting the flip bit to false to be compatible with the Khronos KDFS. block.set_flip_bit(false); @@ -8097,7 +8097,7 @@ namespace basist case block_format::cETC1: { decoder_etc_block* pDst_block = reinterpret_cast(static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes); - + block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false)); block.set_inten_table(0, pEndpoints->m_inten5); block.set_inten_table(1, pEndpoints->m_inten5); @@ -8148,7 +8148,7 @@ namespace basist const uint32_t low_selector = pSelector->m_lo_selector; const uint32_t high_selector = pSelector->m_hi_selector; - // Get block's RGB bounding box + // Get block's RGB bounding box color32 block_colors[2]; decoder_etc_block::get_block_colors5_bounds(block_colors, base_color, inten_table, low_selector, high_selector); @@ -8164,7 +8164,7 @@ namespace basist pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints; #else assert(0); -#endif +#endif break; } @@ -8172,7 +8172,7 @@ namespace basist { #if BASISD_SUPPORT_PVRTC1 assert(pAlpha_blocks); - + block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false)); block.set_inten_table(0, pEndpoints->m_inten5); block.set_inten_table(1, pEndpoints->m_inten5); @@ -8180,7 +8180,7 @@ namespace basist ((decoder_etc_block*)pPVRTC_work_mem)[block_x + block_y * num_blocks_x] = block; - // Get block's RGBA bounding box + // Get block's RGBA bounding box const color32& base_color = pEndpoints->m_color5; const uint32_t inten_table = pEndpoints->m_inten5; const uint32_t low_selector = pSelector->m_lo_selector; @@ -8215,7 +8215,7 @@ namespace basist pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints; #else assert(0); -#endif +#endif break; } @@ -8299,7 +8299,7 @@ namespace basist assert(transcode_alpha); void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; - + convert_etc1s_to_pvrtc2_rgba(pDst_block, pEndpoints, pSelector, &endpoints[0], &selectors[0]); #endif break; @@ -8315,10 +8315,10 @@ namespace basist { assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t); - + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); - + int colors[4]; decoder_etc_block::get_block_colors5_g(colors, pEndpoints->m_color5, pEndpoints->m_inten5); @@ -8332,7 +8332,7 @@ namespace basist pDst_pixels[3+4] = static_cast(colors[(s >> 2) & 3]); pDst_pixels[3+8] = static_cast(colors[(s >> 4) & 3]); pDst_pixels[3+12] = static_cast(colors[(s >> 6) & 3]); - + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t); } } @@ -8361,7 +8361,7 @@ namespace basist color32 colors[4]; decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); - + for (uint32_t y = 0; y < max_y; y++) { const uint32_t s = pSelector->m_selectors[y]; @@ -8482,7 +8482,7 @@ namespace basist cur = byteswap_uint16(cur); cur = (cur & 0xF) | packed_colors[(s >> (x * 2)) & 3]; - + if (BASISD_IS_BIG_ENDIAN) cur = byteswap_uint16(cur); @@ -8614,7 +8614,7 @@ namespace basist if (!output_row_pitch_in_blocks_or_pixels) output_row_pitch_in_blocks_or_pixels = orig_width; - if (!output_rows_in_pixels) + if (!output_rows_in_pixels) output_rows_in_pixels = orig_height; // Now make sure the output buffer is large enough, or we'll overwrite memory. @@ -8694,7 +8694,7 @@ namespace basist // Switch to PVRTC1 RGB if the input doesn't have alpha. target_format = transcoder_texture_format::cTFPVRTC1_4_RGB; } - + const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0; const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y; @@ -8725,7 +8725,7 @@ namespace basist { //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); - + if (!status) { BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC1 failed\n"); @@ -8850,7 +8850,7 @@ namespace basist if (basis_file_has_alpha_slices) { - // First decode the alpha data + // First decode the alpha data //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_A8, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); } @@ -8888,8 +8888,8 @@ namespace basist return false; #else assert(bytes_per_block_or_pixel == 16); - - // First decode the alpha data + + // First decode the alpha data if (basis_file_has_alpha_slices) { //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); @@ -9018,7 +9018,7 @@ namespace basist #else assert(bytes_per_block_or_pixel == 16); - // First decode the alpha data + // First decode the alpha data if (basis_file_has_alpha_slices) { //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); @@ -9078,7 +9078,7 @@ namespace basist } else { - // Now decode the color data and transcode to PVRTC2 RGBA. + // Now decode the color data and transcode to PVRTC2 RGBA. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels); } @@ -9099,7 +9099,7 @@ namespace basist { // Raw 32bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. - // First decode the alpha data + // First decode the alpha data if (basis_file_has_alpha_slices) //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cA32, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); @@ -9140,7 +9140,7 @@ namespace basist { // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. - // First decode the alpha data + // First decode the alpha data if (basis_file_has_alpha_slices) //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); @@ -9242,7 +9242,7 @@ namespace basist return status; } - + basisu_lowlevel_uastc_transcoder::basisu_lowlevel_uastc_transcoder() { } @@ -9308,7 +9308,7 @@ namespace basist for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y) { void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes; - + for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t *)pDst_block + output_block_or_pixel_stride_in_bytes) { switch (fmt) @@ -9344,7 +9344,7 @@ namespace basist } case block_format::cBC4: { - if (channel0 < 0) + if (channel0 < 0) channel0 = 0; status = transcode_uastc_to_bc4(*pSource_block, pDst_block, high_quality, channel0); break; @@ -9507,7 +9507,7 @@ namespace basist return false; #endif } - + bool basisu_lowlevel_uastc_transcoder::transcode_image( transcoder_texture_format target_format, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, @@ -9529,7 +9529,7 @@ namespace basist { BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: source data buffer too small\n"); return false; - } + } if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA)) { @@ -9556,7 +9556,7 @@ namespace basist BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: output buffer size too small\n"); return false; } - + bool status = false; // UASTC4x4 @@ -9567,7 +9567,7 @@ namespace basist //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC1, bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1); - + if (!status) { BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to ETC1 failed\n"); @@ -9784,7 +9784,7 @@ namespace basist return status; } - + basisu_transcoder::basisu_transcoder() : m_ready_to_transcode(false) { @@ -9812,7 +9812,7 @@ namespace basist return false; } } -#endif +#endif return true; } @@ -9899,7 +9899,7 @@ namespace basist return false; } } - + // This flag dates back to pre-Basis Universal, when .basis supported full ETC1 too. if ((pHeader->m_flags & cBASISHeaderFlagETC1S) == 0) { @@ -9915,7 +9915,7 @@ namespace basist return false; } } - + if ((pHeader->m_slice_desc_file_ofs >= data_size) || ((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices)) ) @@ -10031,12 +10031,12 @@ namespace basist image_info.m_image_index = image_index; image_info.m_total_levels = total_levels; - + image_info.m_alpha_flag = false; // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha. if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) - image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; + image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; else image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; @@ -10159,13 +10159,13 @@ namespace basist image_info.m_image_index = image_index; image_info.m_level_index = level_index; - + // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha. if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; else image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; - + image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0; image_info.m_width = slice_desc.m_num_blocks_x * 4; image_info.m_height = slice_desc.m_num_blocks_y * 4; @@ -10223,7 +10223,7 @@ namespace basist file_info.m_tex_format = static_cast(static_cast(pHeader->m_tex_format)); file_info.m_etc1s = (pHeader->m_tex_format == (int)basis_tex_format::cETC1S); - + file_info.m_y_flipped = (pHeader->m_flags & cBASISHeaderFlagYFlipped) != 0; file_info.m_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; @@ -10288,7 +10288,7 @@ namespace basist return true; } - + bool basisu_transcoder::start_transcoding(const void* pData, uint32_t data_size) { if (!validate_header_quick(pData, data_size)) @@ -10396,7 +10396,7 @@ namespace basist m_lowlevel_etc1s_decoder.clear(); } } - + m_ready_to_transcode = true; return true; @@ -10407,7 +10407,7 @@ namespace basist m_lowlevel_etc1s_decoder.clear(); m_ready_to_transcode = false; - + return true; } @@ -10446,7 +10446,7 @@ namespace basist const basis_slice_desc& slice_desc = reinterpret_cast(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_index]; uint32_t total_4x4_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; - + if (basis_block_format_is_uncompressed(fmt)) { // Assume the output buffer is orig_width by orig_height @@ -10509,7 +10509,7 @@ namespace basist BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_size, or passed in buffer too small\n"); return false; } - + if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) { return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, @@ -10597,7 +10597,7 @@ namespace basist if (!output_row_pitch_in_blocks_or_pixels) output_row_pitch_in_blocks_or_pixels = num_blocks_x; - + if ((fmt == block_format::cETC2_EAC_A8) || (fmt == block_format::cETC2_EAC_R11)) { #if BASISD_SUPPORT_ETC2_EAC_A8 @@ -10683,7 +10683,7 @@ namespace basist if (slice_index < 0) { BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: failed finding slice index\n"); - // Unable to find the requested image/level + // Unable to find the requested image/level return false; } @@ -10692,7 +10692,7 @@ namespace basist // Switch to PVRTC1 RGB if the input doesn't have alpha. fmt = transcoder_texture_format::cTFPVRTC1_4_RGB; } - + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) { if (pSlice_descs[slice_index].m_flags & cSliceDescFlagsHasAlpha) @@ -10729,7 +10729,7 @@ namespace basist } } } - + bool status = false; const uint32_t total_slice_blocks = pSlice_descs[slice_index].m_num_blocks_x * pSlice_descs[slice_index].m_num_blocks_y; @@ -10737,11 +10737,11 @@ namespace basist if (((fmt == transcoder_texture_format::cTFPVRTC1_4_RGB) || (fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA)) && (output_blocks_buf_size_in_blocks_or_pixels > total_slice_blocks)) { // The transcoder doesn't write beyond total_slice_blocks, so we need to clear the rest ourselves. - // For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8. + // For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8. // However, for KTX and internally in Basis this formula isn't used, it's just ((width+3)/4) * ((height+3)/4) * bytes_per_block_or_pixel. This is all the transcoder actually writes to memory. memset(static_cast(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel); } - + if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) { const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; @@ -10753,7 +10753,7 @@ namespace basist pSlice_desc->m_file_ofs, pSlice_desc->m_file_size, decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); } - else + else { // ETC1S const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; @@ -10779,14 +10779,14 @@ namespace basist decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); } // if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) - + if (!status) { BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning false\n"); } else { - //BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning true\n"); + //BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning true\n"); } return status; @@ -11001,13 +11001,13 @@ namespace basist } return false; } - + uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt) { switch (fmt) { case transcoder_texture_format::cTFRGBA32: - return sizeof(uint32_t); + return sizeof(uint32_t); case transcoder_texture_format::cTFRGB565: case transcoder_texture_format::cTFBGR565: case transcoder_texture_format::cTFRGBA4444: @@ -11017,7 +11017,7 @@ namespace basist } return 0; } - + uint32_t basis_get_block_width(transcoder_texture_format tex_type) { switch (tex_type) @@ -11035,7 +11035,7 @@ namespace basist BASISU_NOTE_UNUSED(tex_type); return 4; } - + bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt) { if (fmt == basis_tex_format::cUASTC4x4) @@ -11093,7 +11093,7 @@ namespace basist case transcoder_texture_format::cTFETC2_RGBA: return true; #endif -#if BASISD_SUPPORT_ASTC +#if BASISD_SUPPORT_ASTC case transcoder_texture_format::cTFASTC_4x4_RGBA: return true; #endif @@ -11124,9 +11124,9 @@ namespace basist return false; } - // ------------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------------ // UASTC - // ------------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------------ #if BASISD_SUPPORT_UASTC const astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2] = @@ -11851,7 +11851,7 @@ namespace basist if (group_size) { - // Range has trits or quints - pack each group of 5 or 3 values + // Range has trits or quints - pack each group of 5 or 3 values const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3); for (int group_index = 0; group_index < total_groups; group_index++) @@ -12141,7 +12141,7 @@ namespace basist bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints) { //memset(&unpacked, 0, sizeof(unpacked)); - + #if 0 uint8_t table[128]; memset(table, 0xFF, sizeof(table)); @@ -12196,7 +12196,7 @@ namespace basist return true; } - + if (read_hints) { if (g_uastc_mode_has_bc1_hint0[mode]) @@ -12229,7 +12229,7 @@ namespace basist } else bit_ofs += g_uastc_mode_total_hint_bits[mode]; - + uint32_t subsets = 1; switch (mode) { @@ -12442,7 +12442,7 @@ namespace basist { // All other modes have <= 64 weight bits. uint64_t bits; - + // Read the weight bits if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS)) bits = read_bits64(blk.m_bytes, bit_ofs, basisu::minimum(64, 128 - (int)bit_ofs)); @@ -12450,31 +12450,31 @@ namespace basist { bits = blk.m_dwords[2]; bits |= (((uint64_t)blk.m_dwords[3]) << 32U); - + if (bit_ofs >= 64U) bits >>= (bit_ofs - 64U); else { assert(bit_ofs >= 56U); - + uint32_t bits_needed = 64U - bit_ofs; bits <<= bits_needed; bits |= (blk.m_bytes[7] >> (8U - bits_needed)); } } - + bit_ofs = 0; const uint32_t mask = (1U << weight_bits) - 1U; const uint32_t anchor_mask = (1U << (weight_bits - 1U)) - 1U; - + if (total_planes == 2) { // Dual plane modes always have a single subset, and the first 2 weights are anchors. unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); bit_ofs += (weight_bits - 1); - + unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); bit_ofs += (weight_bits - 1); @@ -12492,7 +12492,7 @@ namespace basist if (weight_bits == 4) { assert(bit_ofs == 0); - + // Specialize the most common case: 4-bit weights. unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits) & 7); unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> 3) & 15); @@ -13038,7 +13038,7 @@ namespace basist } case 2: { - // 2. DualPlane: 0, WeightRange : 5 (8), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE1 + // 2. DualPlane: 0, WeightRange : 5 (8), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE1 dst_blk.m_mode = 1; dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7; @@ -13977,7 +13977,7 @@ namespace basist bool flip = pack_etc1_y_estimate_flipped(&block_y[0][0], upper_avg, lower_avg, left_avg, right_avg); // non-flipped: | | - // vs. + // vs. // flipped: -- // -- @@ -14588,7 +14588,7 @@ namespace basist static const uint8_t s_uastc2_to_bc1[4] = { 0, 2, 3, 1 }; static const uint8_t s_uastc1_to_bc1[2] = { 0, 1 }; const uint8_t* s_uastc_to_bc1_weights[6] = { nullptr, s_uastc1_to_bc1, s_uastc2_to_bc1, s_uastc3_to_bc1, s_uastc4_to_bc1, s_uastc5_to_bc1 }; - + void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride) { uint32_t min0_v, max0_v, min1_v, max1_v,min2_v, max2_v, min3_v, max3_v; @@ -14676,7 +14676,7 @@ namespace basist a2 |= (s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U); a3 |= (s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U); } - + { const int v0 = pPixels[8 * stride] * 14 + bias; const int v1 = pPixels[9 * stride] * 14 + bias; @@ -14700,7 +14700,7 @@ namespace basist } const uint64_t f = a0 | a1 | a2 | a3; - + pDst_bytes[2] = (uint8_t)f; pDst_bytes[3] = (uint8_t)(f >> 8U); pDst_bytes[4] = (uint8_t)(f >> 16U); @@ -14723,7 +14723,7 @@ namespace basist int dots[4]; for (uint32_t i = 0; i < 4; i++) dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; - + int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; ar *= 2; ag *= 2; ab *= 2; @@ -14732,7 +14732,7 @@ namespace basist { const int d = pSrc_pixels[i].r * ar + pSrc_pixels[i].g * ag + pSrc_pixels[i].b * ab; static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; - + // Rounding matters here! // d <= t0: <=, not <, to the later LS step "sees" a wider range of selectors. It matters for quality. sels[i] = s_sels[(d <= t0) + (d < t1) + (d < t2)]; @@ -14775,11 +14775,11 @@ namespace basist } struct vec3F { float c[3]; }; - + static bool compute_least_squares_endpoints_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh) { // Derived from bc7enc16's LS function. - // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // I did this in matrix form first, expanded out all the ops, then optimized it a bit. uint32_t uq00_r = 0, uq10_r = 0, ut_r = 0, uq00_g = 0, uq10_g = 0, ut_g = 0, uq00_b = 0, uq10_b = 0, ut_b = 0; @@ -14853,7 +14853,7 @@ namespace basist return true; } - void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb) + void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb) { dxt1_block* pDst_block = static_cast(pDst); @@ -14905,19 +14905,19 @@ namespace basist { const color32* pSrc_pixels = (const color32*)pPixels; dxt1_block* pDst_block = static_cast(pDst); - + int avg_r = -1, avg_g = 0, avg_b = 0; int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0; uint8_t sels[16]; - + const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0; if (use_sels) { // Caller is jamming in their own selectors for us to try. const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24); - + static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 }; - + for (uint32_t i = 0; i < 16; i++) sels[i] = s_sel_tran[(s >> (i * 2)) & 3]; } @@ -14929,13 +14929,13 @@ namespace basist for (j = 1; j < 16; j++) if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb)) break; - + if (j == 16) { encode_bc1_solid_block(pDst, fr, fg, fb); return; } - + // Select 2 colors along the principle axis. (There must be a faster/simpler way.) int total_r = fr, total_g = fg, total_b = fb; int max_r = fr, max_g = fg, max_b = fb; @@ -14969,7 +14969,7 @@ namespace basist float cov[6]; for (uint32_t i = 0; i < 6; i++) cov[i] = static_cast(icov[i])* (1.0f / 255.0f); - + #if 0 // Seems silly to use full PCA to choose 2 colors. The diff in avg. PSNR between using PCA vs. not is small (~.025 difference). // TODO: Try 2 or 3 different normalized diagonal vectors, choose the one that results in the largest dot delta @@ -15001,7 +15001,7 @@ namespace basist saxis_b = (int)(xb * m); } #endif - + int low_dot = INT_MAX, high_dot = INT_MIN, low_c = 0, high_c = 0; for (uint32_t i = 0; i < 16; i++) { @@ -15025,7 +15025,7 @@ namespace basist hr = to_5(pSrc_pixels[high_c].r); hg = to_6(pSrc_pixels[high_c].g); hb = to_5(pSrc_pixels[high_c].b); - + bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); } // if (use_sels) @@ -15072,13 +15072,13 @@ namespace basist hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63); hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31); } - + bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); } uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb); uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb); - + // Always forbid 3 color blocks if (lc16 == hc16) { @@ -15130,7 +15130,7 @@ namespace basist pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask; } } - + void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags) { const color32* pSrc_pixels = (const color32*)pPixels; @@ -15179,8 +15179,8 @@ namespace basist min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b); total_r += r; total_g += g; total_b += b; } - - if (grayscale_flag) + + if (grayscale_flag) { // Grayscale blocks are a common enough case to specialize. if ((max_r - min_r) < 2) @@ -15497,7 +15497,7 @@ namespace basist // Always forbid 3 color blocks uint16_t lc16 = (uint16_t)b.get_low_color(); uint16_t hc16 = (uint16_t)b.get_high_color(); - + uint8_t mask = 0; // Make l > h @@ -15727,7 +15727,7 @@ namespace basist blk.m_base = static_cast(a); blk.m_table = 13; blk.m_multiplier = 0; - + memcpy(blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); return; @@ -16417,7 +16417,7 @@ namespace basist if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false)) return false; - // Get block's RGB bounding box + // Get block's RGB bounding box color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0); if (from_alpha) @@ -16476,7 +16476,7 @@ namespace basist if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false)) return false; - // Get block's RGBA bounding box + // Get block's RGBA bounding box color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0); for (uint32_t i = 0; i < 16; i++) @@ -16592,9 +16592,9 @@ namespace basist #endif // #if BASISD_SUPPORT_UASTC -// ------------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------------ // KTX2 -// ------------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------------ #if BASISD_SUPPORT_KTX2 const uint8_t g_ktx2_file_identifier[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }; @@ -16616,7 +16616,7 @@ namespace basist m_key_values.clear(); memset(&m_etc1s_header, 0, sizeof(m_etc1s_header)); m_etc1s_image_descs.clear(); - + m_format = basist::basis_tex_format::cETC1S; m_dfd_color_model = 0; @@ -16628,9 +16628,9 @@ namespace basist m_dfd_chan1 = KTX2_DF_CHANNEL_UASTC_RGB; m_etc1s_transcoder.clear(); - + m_def_transcoder_state.clear(); - + m_has_alpha = false; m_is_video = false; } @@ -16701,7 +16701,7 @@ namespace basist return false; } } - + // 3.7 levelCount: "levelCount=0 is allowed, except for block-compressed formats" if (m_header.m_level_count < 1) { @@ -16758,7 +16758,7 @@ namespace basist } memcpy(&m_levels[0], m_pData + sizeof(ktx2_header), level_index_size_in_bytes); - + // Sanity check the level offsets and byte sizes for (uint32_t i = 0; i < m_levels.size(); i++) { @@ -16778,9 +16778,9 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset and/or length\n"); return false; } - + const uint64_t MAX_SANE_LEVEL_UNCOMP_SIZE = 2048ULL * 1024ULL * 1024ULL; - + if (m_levels[i].m_uncompressed_byte_length >= MAX_SANE_LEVEL_UNCOMP_SIZE) { BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too large)\n"); @@ -16817,7 +16817,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD offset and/or length\n"); return false; } - + const uint8_t* pDFD = m_pData + m_header.m_dfd_byte_offset; if (!m_dfd.try_resize(m_header.m_dfd_byte_length)) @@ -16827,17 +16827,17 @@ namespace basist } memcpy(m_dfd.data(), pDFD, m_header.m_dfd_byte_length); - + // This is all hard coded for only ETC1S and UASTC. uint32_t dfd_total_size = basisu::read_le_dword(pDFD); - + // 3.10.3: Sanity check if (dfd_total_size != m_header.m_dfd_byte_length) { BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (1)\n"); return false; } - + // 3.10.3: More sanity checking if (m_header.m_kvd_byte_length) { @@ -16850,7 +16850,7 @@ namespace basist const uint32_t dfd_bits = basisu::read_le_dword(pDFD + 3 * sizeof(uint32_t)); const uint32_t sample_channel0 = basisu::read_le_dword(pDFD + 7 * sizeof(uint32_t)); - + m_dfd_color_model = dfd_bits & 255; m_dfd_color_prims = (ktx2_df_color_primaries)((dfd_bits >> 8) & 255); m_dfd_transfer_func = (dfd_bits >> 16) & 255; @@ -16866,11 +16866,11 @@ namespace basist if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ETC1S) { m_format = basist::basis_tex_format::cETC1S; - + // 3.10.2: "Whether the image has 1 or 2 slices can be determined from the DFD's sample count." // If m_has_alpha is true it may be 2-channel RRRG or 4-channel RGBA, but we let the caller deal with that. m_has_alpha = (m_header.m_dfd_byte_length == 60); - + m_dfd_samples = m_has_alpha ? 2 : 1; m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); @@ -16886,7 +16886,7 @@ namespace basist m_dfd_samples = 1; m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); - + // We're assuming "DATA" means RGBA so it has alpha. m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG); } @@ -16896,7 +16896,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD color model\n"); return false; } - + if (!read_key_values()) { BASISU_DEVEL_ERROR("ktx2_transcoder::init: read_key_values() failed\n"); @@ -16940,7 +16940,7 @@ namespace basist return nullptr; } - + bool ktx2_transcoder::start_transcoding() { if (!m_pData) @@ -16949,7 +16949,7 @@ namespace basist return false; } - if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) + if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) { // Check if we've already decompressed the ETC1S global data. If so don't unpack it again. if (!m_etc1s_transcoder.get_endpoints().empty()) @@ -16960,7 +16960,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: decompress_etc1s_global_data() failed\n"); return false; } - + if (!m_is_video) { // See if there are any P-frames. If so it must be a video, even if there wasn't a KTXanimData key. @@ -17016,7 +17016,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: layer_index >= maximum(m_header.m_layer_count, 1)\n"); return false; } - + const uint32_t level_width = basisu::maximum(m_header.m_pixel_width >> level_index, 1); const uint32_t level_height = basisu::maximum(m_header.m_pixel_height >> level_index, 1); const uint32_t num_blocks_x = (level_width + 3) >> 2; @@ -17046,9 +17046,9 @@ namespace basist return true; } - + bool ktx2_transcoder::transcode_image_level( - uint32_t level_index, uint32_t layer_index, uint32_t face_index, + uint32_t level_index, uint32_t layer_index, uint32_t face_index, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, basist::transcoder_texture_format fmt, uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, uint32_t output_rows_in_pixels, int channel0, int channel1, @@ -17062,7 +17062,7 @@ namespace basist if (!pState) pState = &m_def_transcoder_state; - + if (level_index >= m_levels.size()) { BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: level_index >= m_levels.size()\n"); @@ -17091,7 +17091,7 @@ namespace basist const uint8_t* pComp_level_data = m_pData + m_levels[level_index].m_byte_offset; uint64_t comp_level_data_size = m_levels[level_index].m_byte_length; - + const uint8_t* pUncomp_level_data = pComp_level_data; uint64_t uncomp_level_data_size = comp_level_data_size; @@ -17100,7 +17100,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_level_data_size > UINT32_MAX\n"); return false; } - + if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) { // Check if we've already decompressed this level's supercompressed data. @@ -17118,12 +17118,12 @@ namespace basist pUncomp_level_data = pState->m_level_uncomp_data.data(); uncomp_level_data_size = pState->m_level_uncomp_data.size(); } - + const uint32_t level_width = basisu::maximum(m_header.m_pixel_width >> level_index, 1); const uint32_t level_height = basisu::maximum(m_header.m_pixel_height >> level_index, 1); const uint32_t num_blocks_x = (level_width + 3) >> 2; const uint32_t num_blocks_y = (level_height + 3) >> 2; - + if (m_format == basist::basis_tex_format::cETC1S) { // Ensure start_transcoding() was called. @@ -17137,7 +17137,7 @@ namespace basist (level_index * basisu::maximum(m_header.m_layer_count, 1) * m_header.m_face_count) + layer_index * m_header.m_face_count + face_index; - + // Sanity check if (etc1s_image_index >= m_etc1s_image_descs.size()) { @@ -17172,7 +17172,7 @@ namespace basist // Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices. assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length); const uint32_t total_2D_image_size = num_blocks_x * num_blocks_y * KTX2_UASTC_BLOCK_SIZE; - + const uint32_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size; // Sanity checks @@ -17208,12 +17208,12 @@ namespace basist return true; } - + bool ktx2_transcoder::decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data) { const uint8_t* pComp_data = m_levels[level_index].m_byte_offset + m_pData; const uint64_t comp_size = m_levels[level_index].m_byte_length; - + const uint64_t uncomp_size = m_levels[level_index].m_uncompressed_byte_length; if (((size_t)comp_size) != comp_size) @@ -17232,7 +17232,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Out of memory\n"); return false; } - + if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) { #if BASISD_SUPPORT_KTX2_ZSTD @@ -17255,7 +17255,7 @@ namespace basist return true; } - + bool ktx2_transcoder::decompress_etc1s_global_data() { // Note: we don't actually support 3D textures in here yet @@ -17294,13 +17294,13 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: SGD byte length is too small, file is invalid or corrupted\n"); return false; } - + if (!m_etc1s_image_descs.try_resize(image_count)) { BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Out of memory\n"); return false; } - + memcpy(m_etc1s_image_descs.data(), pSrc, sizeof(ktx2_etc1s_image_desc) * image_count); pSrc += sizeof(ktx2_etc1s_image_desc) * image_count; @@ -17334,7 +17334,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_tables() failed, file is invalid or corrupted\n"); return false; } - + if (!m_etc1s_transcoder.decode_palettes( m_etc1s_header.m_endpoint_count, pEndpoint_data, m_etc1s_header.m_endpoints_byte_length, m_etc1s_header.m_selector_count, pSelector_data, m_etc1s_header.m_selectors_byte_length)) @@ -17342,7 +17342,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_palettes() failed, file is likely corrupted\n"); return false; } - + return true; } @@ -17383,7 +17383,7 @@ namespace basist while (src_left > sizeof(uint32_t)) { uint32_t l = basisu::read_le_dword(pSrc); - + pSrc += sizeof(uint32_t); src_left -= sizeof(uint32_t); @@ -17404,7 +17404,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); return false; } - + basisu::uint8_vec& key_data = m_key_values.back().m_key; basisu::uint8_vec& value_data = m_key_values.back().m_value; @@ -17426,7 +17426,7 @@ namespace basist l--; } while (key_data.back()); - + if (!value_data.try_resize(l)) { BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); @@ -17455,7 +17455,7 @@ namespace basist return true; } - + #endif // BASISD_SUPPORT_KTX2 bool basisu_transcoder_supports_ktx2() diff --git a/transcoder/basisu_transcoder.h b/transcoder/basisu_transcoder.h index 3327e8dd..67736e67 100644 --- a/transcoder/basisu_transcoder.h +++ b/transcoder/basisu_transcoder.h @@ -22,7 +22,7 @@ #define BASISD_SUPPORT_KTX2 1 #endif -// Set BASISD_SUPPORT_KTX2_ZSTD to 0 to disable Zstd usage and KTX2 UASTC Zstd supercompression support +// Set BASISD_SUPPORT_KTX2_ZSTD to 0 to disable Zstd usage and KTX2 UASTC Zstd supercompression support #ifndef BASISD_SUPPORT_KTX2_ZSTD #define BASISD_SUPPORT_KTX2_ZSTD 1 #endif @@ -41,7 +41,7 @@ namespace basist // High-level composite texture formats supported by the transcoder. // Each of these texture formats directly correspond to OpenGL/D3D/Vulkan etc. texture formats. // Notes: - // - If you specify a texture format that supports alpha, but the .basis file doesn't have alpha, the transcoder will automatically output a + // - If you specify a texture format that supports alpha, but the .basis file doesn't have alpha, the transcoder will automatically output a // fully opaque (255) alpha channel. // - The PVRTC1 texture formats only support power of 2 dimension .basis files, but this may be relaxed in a future version. // - The PVRTC1 transcoders are real-time encoders, so don't expect the highest quality. We may add a slower encoder with improved quality. @@ -70,7 +70,7 @@ namespace basist // ATC (mobile, Adreno devices, this is a niche format) cTFATC_RGB = 11, // Opaque, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. ATI ATC (GL_ATC_RGB_AMD) - cTFATC_RGBA = 12, // Opaque+alpha, alpha channel will be opaque for opaque .basis files. ATI ATC (GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD) + cTFATC_RGBA = 12, // Opaque+alpha, alpha channel will be opaque for opaque .basis files. ATI ATC (GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD) // FXT1 (desktop, Intel devices, this is a super obscure format) cTFFXT1_RGB = 17, // Opaque only, uses exclusively CC_MIXED blocks. Notable for having a 8x4 block size. GL_3DFX_texture_compression_FXT1 is supported on Intel integrated GPU's (such as HD 630). @@ -169,7 +169,7 @@ namespace basist basisu::vector m_block_endpoint_preds[2]; enum { cMaxPrevFrameLevels = 16 }; - basisu::vector m_prev_frame_indices[2][cMaxPrevFrameLevels]; // [alpha_flag][level_index] + basisu::vector m_prev_frame_indices[2][cMaxPrevFrameLevels]; // [alpha_flag][level_index] void clear() { @@ -247,13 +247,13 @@ namespace basist typedef basisu::vector selector_vec; const selector_vec& get_selectors() const { return m_local_selectors; } - + private: const basisu_lowlevel_etc1s_transcoder* m_pGlobal_codebook; endpoint_vec m_local_endpoints; selector_vec m_local_selectors; - + huffman_decoding_table m_endpoint_pred_model, m_delta_endpoint_model, m_selector_model, m_selector_history_buf_rle_model; uint32_t m_selector_history_buf_size; @@ -274,7 +274,7 @@ namespace basist // This flag is used internally when decoding to BC3. cDecodeFlagsBC1ForbidThreeColorBlocks = 8, - // The output buffer contains alpha endpoint/selector indices. + // The output buffer contains alpha endpoint/selector indices. // Used internally when decoding formats like ASTC that require both color and alpha data to be available when transcoding to the output format. cDecodeFlagsOutputHasAlphaIndices = 16, @@ -481,11 +481,11 @@ namespace basist // transcode_image_level() decodes a single mipmap level from the .basis file to any of the supported output texture formats. // It'll first find the slice(s) to transcode, then call transcode_slice() one or two times to decode both the color and alpha texture data (or RG texture data from two slices for BC5). // If the .basis file doesn't have alpha slices, the output alpha blocks will be set to fully opaque (all 255's). - // Currently, to decode to PVRTC1 the basis texture's dimensions in pixels must be a power of 2, due to PVRTC1 format requirements. + // Currently, to decode to PVRTC1 the basis texture's dimensions in pixels must be a power of 2, due to PVRTC1 format requirements. // output_blocks_buf_size_in_blocks_or_pixels should be at least the image level's total_blocks (num_blocks_x * num_blocks_y), or the total number of output pixels if fmt==cTFRGBA32. // output_row_pitch_in_blocks_or_pixels: Number of blocks or pixels per row. If 0, the transcoder uses the slice's num_blocks_x or orig_width (NOT num_blocks_x * 4). Ignored for PVRTC1 (due to texture swizzling). // output_rows_in_pixels: Ignored unless fmt is uncompressed (cRGBA32, etc.). The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4). - // Notes: + // Notes: // - basisu_transcoder_init() must have been called first to initialize the transcoder lookup tables before calling this function. // - This method assumes the output texture buffer is readable. In some cases to handle alpha, the transcoder will write temporary data to the output texture in // a first pass, which will be read in a second pass. @@ -540,7 +540,7 @@ namespace basist // basisu_transcoder_init() MUST be called before a .basis file can be transcoded. void basisu_transcoder_init(); - + enum debug_flags_t { cDebugFlagVisCRs = 1, @@ -550,10 +550,10 @@ namespace basist uint32_t get_debug_flags(); void set_debug_flags(uint32_t f); - // ------------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------------ // Optional .KTX2 file format support // KTX2 reading optionally requires miniz or Zstd decompressors for supercompressed UASTC files. - // ------------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------------ #if BASISD_SUPPORT_KTX2 #pragma pack(push) #pragma pack(1) @@ -696,12 +696,12 @@ namespace basist { case KTX2_DF_PRIMARIES_UNSPECIFIED: return "UNSPECIFIED"; case KTX2_DF_PRIMARIES_BT709: return "BT709"; - case KTX2_DF_PRIMARIES_BT601_EBU: return "EBU"; + case KTX2_DF_PRIMARIES_BT601_EBU: return "EBU"; case KTX2_DF_PRIMARIES_BT601_SMPTE: return "SMPTE"; case KTX2_DF_PRIMARIES_BT2020: return "BT2020"; case KTX2_DF_PRIMARIES_CIEXYZ: return "CIEXYZ"; case KTX2_DF_PRIMARIES_ACES: return "ACES"; - case KTX2_DF_PRIMARIES_ACESCC: return "ACESCC"; + case KTX2_DF_PRIMARIES_ACESCC: return "ACESCC"; case KTX2_DF_PRIMARIES_NTSC1953: return "NTSC1953"; case KTX2_DF_PRIMARIES_PAL525: return "PAL525"; case KTX2_DF_PRIMARIES_DISPLAYP3: return "DISPLAYP3"; @@ -709,7 +709,7 @@ namespace basist default: break; } return "?"; - } + } // Information about a single 2D texture "image" in a KTX2 file. struct ktx2_image_level_info @@ -740,7 +740,7 @@ namespace basist // true if the image is an I-Frame. Currently, for ETC1S textures, the first frame will always be an I-Frame, and subsequent frames will always be P-Frames. bool m_iframe_flag; }; - + // Thread-specific ETC1S/supercompressed UASTC transcoder state. (If you're not doing multithreading transcoding you can ignore this.) struct ktx2_transcoder_state { @@ -758,9 +758,9 @@ namespace basist // This class is quite similar to basisu_transcoder. It treats KTX2 files as a simple container for ETC1S/UASTC texture data. // It does not support 1D or 3D textures. - // It only supports 2D and cubemap textures, with or without mipmaps, texture arrays of 2D/cubemap textures, and texture video files. + // It only supports 2D and cubemap textures, with or without mipmaps, texture arrays of 2D/cubemap textures, and texture video files. // It only supports raw non-supercompressed UASTC, ETC1S, UASTC+Zstd, or UASTC+zlib compressed files. - // DFD (Data Format Descriptor) parsing is purposely as simple as possible. + // DFD (Data Format Descriptor) parsing is purposely as simple as possible. // If you need to know how to interpret the texture channels you'll need to parse the DFD yourself after calling get_dfd(). class ktx2_transcoder { @@ -801,7 +801,7 @@ namespace basist uint32_t get_layers() const { return m_header.m_layer_count; } // Returns cETC1S or cUASTC4x4. Valid after init(). - basist::basis_tex_format get_format() const { return m_format; } + basist::basis_tex_format get_format() const { return m_format; } bool is_etc1s() const { return get_format() == basist::basis_tex_format::cETC1S; } @@ -820,7 +820,7 @@ namespace basist // Returns the DFD color primary. // We do not validate the color primaries, so the returned value may not be in the ktx2_df_color_primaries enum. ktx2_df_color_primaries get_dfd_color_primaries() const { return m_dfd_color_prims; } - + // Returns KTX2_KHR_DF_TRANSFER_LINEAR or KTX2_KHR_DF_TRANSFER_SRGB. uint32_t get_dfd_transfer_func() const { return m_dfd_transfer_func; } @@ -828,9 +828,9 @@ namespace basist // Returns 1 (ETC1S/UASTC) or 2 (ETC1S with an internal alpha channel). uint32_t get_dfd_total_samples() const { return m_dfd_samples; } - - // Returns the channel mapping for each DFD "sample". UASTC always has 1 sample, ETC1S can have one or two. - // Note the returned value SHOULD be one of the ktx2_df_channel_id enums, but we don't validate that. + + // Returns the channel mapping for each DFD "sample". UASTC always has 1 sample, ETC1S can have one or two. + // Note the returned value SHOULD be one of the ktx2_df_channel_id enums, but we don't validate that. // It's up to the caller to decide what to do if the value isn't in the enum. ktx2_df_channel_id get_dfd_channel_id0() const { return m_dfd_chan0; } ktx2_df_channel_id get_dfd_channel_id1() const { return m_dfd_chan1; } @@ -868,18 +868,18 @@ namespace basist // is_video() is only valid after start_transcoding() is called. // For ETC1S data, if this returns true you must currently transcode the file from first to last frame, in order, without skipping any frames. bool is_video() const { return m_is_video; } - + // start_transcoding() MUST be called before calling transcode_image(). // This method decompresses the ETC1S global endpoint/selector codebooks, which is not free, so try to avoid calling it excessively. bool start_transcoding(); - + // get_image_level_info() be called after init(), but the m_iframe_flag's won't be valid until start_transcoding() is called. // You can call this method before calling transcode_image_level() to retrieve basic information about the mipmap level's dimensions, etc. bool get_image_level_info(ktx2_image_level_info& level_info, uint32_t level_index, uint32_t layer_index, uint32_t face_index) const; // transcode_image_level() transcodes a single 2D texture or cubemap face from the KTX2 file. // Internally it uses the same low-level transcode API's as basisu_transcoder::transcode_image_level(). - // If the file is UASTC and is supercompressed with Zstandard, and the file is a texture array or cubemap, it's highly recommended that each mipmap level is + // If the file is UASTC and is supercompressed with Zstandard, and the file is a texture array or cubemap, it's highly recommended that each mipmap level is // completely transcoded before switching to another level. Every time the mipmap level is changed all supercompressed level data must be decompressed using Zstandard as a single unit. // Currently ETC1S videos must always be transcoded from first to last frame (or KTX2 "layer"), in order, with no skipping of frames. // By default this method is not thread safe unless you specify a pointer to a user allocated thread-specific transcoder_state struct. @@ -889,7 +889,7 @@ namespace basist basist::transcoder_texture_format fmt, uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, ktx2_transcoder_state *pState = nullptr); - + private: const uint8_t* m_pData; uint32_t m_data_size; @@ -898,22 +898,22 @@ namespace basist basisu::vector m_levels; basisu::uint8_vec m_dfd; key_value_vec m_key_values; - + ktx2_etc1s_global_data_header m_etc1s_header; basisu::vector m_etc1s_image_descs; basist::basis_tex_format m_format; - + uint32_t m_dfd_color_model; ktx2_df_color_primaries m_dfd_color_prims; uint32_t m_dfd_transfer_func; uint32_t m_dfd_flags; uint32_t m_dfd_samples; ktx2_df_channel_id m_dfd_chan0, m_dfd_chan1; - + basist::basisu_lowlevel_etc1s_transcoder m_etc1s_transcoder; basist::basisu_lowlevel_uastc_transcoder m_uastc_transcoder; - + ktx2_transcoder_state m_def_transcoder_state; bool m_has_alpha; diff --git a/transcoder/basisu_transcoder_internal.h b/transcoder/basisu_transcoder_internal.h index aa3bee35..92d2ad52 100644 --- a/transcoder/basisu_transcoder_internal.h +++ b/transcoder/basisu_transcoder_internal.h @@ -44,9 +44,9 @@ namespace basist // You probably don't care about these enum's unless you are going pretty low-level and calling the transcoder to decode individual slices. enum class block_format { - cETC1, // ETC1S RGB + cETC1, // ETC1S RGB cETC2_RGBA, // full ETC2 EAC RGBA8 block - cBC1, // DXT1 RGB + cBC1, // DXT1 RGB cBC3, // BC4 block followed by a four color BC1 block cBC4, // DXT5A (alpha block only) cBC5, // two BC4 blocks @@ -56,9 +56,9 @@ namespace basist cBC7_M5_COLOR, // RGB BC7 mode 5 color (writes an opaque mode 5 block) cBC7_M5_ALPHA, // alpha portion of BC7 mode 5 (cBC7_M5_COLOR output data must have been written to the output buffer first to set the mode/rot fields etc.) cETC2_EAC_A8, // alpha block of ETC2 EAC (first 8 bytes of the 16-bit ETC2 EAC RGBA format) - cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC + cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC // data. If you use a sRGB ASTC format you'll get ~1 LSB of additional error, because of the different way ASTC decoders scale 8-bit endpoints to 16-bits during unpacking. - + cATC_RGB, cATC_RGBA_INTERPOLATED_ALPHA, cFXT1_RGB, // Opaque-only, has oddball 8x4 pixel block size @@ -68,23 +68,23 @@ namespace basist cETC2_EAC_R11, cETC2_EAC_RG11, - + cIndices, // Used internally: Write 16-bit endpoint and selector indices directly to output (output block must be at least 32-bits) cRGB32, // Writes RGB components to 32bpp output pixels cRGBA32, // Writes RGB255 components to 32bpp output pixels cA32, // Writes alpha component to 32bpp output pixels - + cRGB565, cBGR565, - + cRGBA4444_COLOR, cRGBA4444_ALPHA, cRGBA4444_COLOR_OPAQUE, cRGBA4444, cUASTC_4x4, - + cTotalBlockFormats }; @@ -105,9 +105,9 @@ namespace basist const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH = 3; const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6; const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS); - + uint16_t crc16(const void *r, size_t size, uint16_t crc); - + class huffman_decoding_table { friend class bitwise_decoder; @@ -225,7 +225,7 @@ namespace basist return false; else if (idx >= (int)m_tree.size()) m_tree.resize(idx + 1); - + if (!m_tree[idx]) { m_tree[idx] = (int16_t)tree_next; @@ -394,14 +394,14 @@ namespace basist for (;;) { uint32_t k = peek_bits(16); - + uint32_t l = 0; while (k & 1) { l++; k >>= 1; } - + q += l; remove_bits(l); @@ -419,7 +419,7 @@ namespace basist const uint32_t chunk_size = 1 << chunk_bits; const uint32_t chunk_mask = chunk_size - 1; - + uint32_t v = 0; uint32_t ofs = 0; @@ -431,7 +431,7 @@ namespace basist if ((s & chunk_size) == 0) break; - + if (ofs >= 32) { assert(0); @@ -447,7 +447,7 @@ namespace basist assert(ct.m_code_sizes.size()); const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits; - + while (m_bit_buf_size < 16) { uint32_t c = 0; @@ -458,7 +458,7 @@ namespace basist m_bit_buf_size += 8; assert(m_bit_buf_size <= 32); } - + int code_len; int sym; @@ -643,7 +643,7 @@ namespace basist }; struct decoder_etc_block; - + inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); @@ -667,7 +667,7 @@ namespace basist }; uint8_t c[4]; - + uint32_t m; }; @@ -789,7 +789,7 @@ namespace basist }; bool basis_block_format_is_uncompressed(block_format tex_type); - + } // namespace basist diff --git a/transcoder/basisu_transcoder_uastc.h b/transcoder/basisu_transcoder_uastc.h index f91314f4..9e5ead3d 100644 --- a/transcoder/basisu_transcoder_uastc.h +++ b/transcoder/basisu_transcoder_uastc.h @@ -5,8 +5,8 @@ namespace basist { struct color_quad_u8 - { - uint8_t m_c[4]; + { + uint8_t m_c[4]; }; const uint32_t TOTAL_UASTC_MODES = 19; @@ -101,9 +101,9 @@ namespace basist int m_ccs; // color component selector (dual plane only) bool m_dual_plane; // true if dual plane - // Weight and endpoint BISE values. + // Weight and endpoint BISE values. // Note these values are NOT linear, they must be BISE encoded. See Table 97 and Table 107. - uint8_t m_endpoints[18]; // endpoint values, in RR GG BB etc. order + uint8_t m_endpoints[18]; // endpoint values, in RR GG BB etc. order uint8_t m_weights[64]; // weight index values, raster order, in P0 P1, P0 P1, etc. or P0, P0, P0, P0, etc. order }; @@ -198,7 +198,7 @@ namespace basist #ifdef _DEBUG int astc_compute_texel_partition(int seed, int x, int y, int z, int partitioncount, bool small_block); #endif - + struct uastc_block { union @@ -234,10 +234,10 @@ namespace basist }; color32 apply_etc1_bias(const color32 &block_color, uint32_t bias, uint32_t limit, uint32_t subblock); - + struct decoder_etc_block; struct eac_block; - + bool unpack_uastc(uint32_t mode, uint32_t common_pattern, const color32& solid_color, const astc_block_desc& astc, color32* pPixels, bool srgb); bool unpack_uastc(const unpacked_uastc_block& unpacked_blk, color32* pPixels, bool srgb); @@ -259,7 +259,7 @@ namespace basist // Packs 16 scalar values to BC4. Same PSNR as stb_dxt's BC4 encoder, around 13% faster. void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride); - + void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb); enum @@ -269,7 +269,7 @@ namespace basist cEncodeBC1UseSelectors = 4, }; void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags); - + // Alternate PCA-free encoder, around 15% faster, same (or slightly higher) avg. PSNR void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags); @@ -286,7 +286,7 @@ namespace basist bool transcode_uastc_to_pvrtc1_4_rgb(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality, bool from_alpha); bool transcode_uastc_to_pvrtc1_4_rgba(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality); - + // uastc_init() MUST be called before using this module. void uastc_init(); diff --git a/webgl/README.md b/webgl/README.md index 79b6cb90..bfb7131a 100644 --- a/webgl/README.md +++ b/webgl/README.md @@ -42,7 +42,7 @@ extension that is [currently in development](https://github.com/KhronosGroup/glT ## Compressor (encode_test) -This demo shows how to use the compressor from JavaScript. To use it, select a .PNG file then hit the "Encode!" button. The compressor will dynamically generate a .basis file in memory which will then be immediately transcoded and displayed. Hit the "Download!" button to locally download the generated .basis file. +This demo shows how to use the compressor from JavaScript. To use it, select a .PNG file then hit the "Encode!" button. The compressor will dynamically generate a .basis file in memory which will then be immediately transcoded and displayed. Hit the "Download!" button to locally download the generated .basis file. To view the compressor's textual debug output, open your browser's developer debug console (under Developer Tools in Chrome) and enable the Debug checkbox before hitting the "Encode!" button. Multithreading is not currently supported when the compressor is compiled to WebAssembly, so compression will be slower than using the stand-alone command line tool. diff --git a/webgl/encode_test/dxt-to-rgb565.js b/webgl/encode_test/dxt-to-rgb565.js index 8e35be4d..7827cf69 100644 --- a/webgl/encode_test/dxt-to-rgb565.js +++ b/webgl/encode_test/dxt-to-rgb565.js @@ -37,7 +37,7 @@ function dxtToRgb565(src, src16Offset, width, height) { i = src16Offset + 4 * (blockY * blockWidth + blockX); c[0] = src[i]; c[1] = src[i + 1]; - + r0 = c[0] & 0x1f; g0 = c[0] & 0x7e0; b0 = c[0] & 0xf800; @@ -50,7 +50,7 @@ function dxtToRgb565(src, src16Offset, width, height) { // decoder in many GPUs does :) // rg FIXME: This is most likely leading to wrong results vs. a GPU - + c[2] = ((5 * r0 + 3 * r1) >> 3) | (((5 * g0 + 3 * g1) >> 3) & 0x7e0) | (((5 * b0 + 3 * b1) >> 3) & 0xf800); diff --git a/webgl/encode_test/index.html b/webgl/encode_test/index.html index ad74da13..193ff789 100644 --- a/webgl/encode_test/index.html +++ b/webgl/encode_test/index.html @@ -124,8 +124,8 @@ cTFFXT1_RGB: 17, cTFPVRTC2_4_RGB: 18, cTFPVRTC2_4_RGBA: 19, - cTFETC2_EAC_R11: 20, - cTFETC2_EAC_RG11: 21 + cTFETC2_EAC_R11: 20, + cTFETC2_EAC_RG11: 21 }; BASIS_FORMAT_NAMES = {}; @@ -136,7 +136,7 @@ DXT_FORMAT_MAP = {}; DXT_FORMAT_MAP[BASIS_FORMAT.cTFBC1] = COMPRESSED_RGB_S3TC_DXT1_EXT; DXT_FORMAT_MAP[BASIS_FORMAT.cTFBC3] = COMPRESSED_RGBA_S3TC_DXT5_EXT; -DXT_FORMAT_MAP[BASIS_FORMAT.cTFBC7] = COMPRESSED_RGBA_BPTC_UNORM; +DXT_FORMAT_MAP[BASIS_FORMAT.cTFBC7] = COMPRESSED_RGBA_BPTC_UNORM; var astcSupported = false; var etcSupported = false; @@ -159,7 +159,7 @@ { var basisFileDesc = basisFile.getFileDesc(); - log('------'); + log('------'); log('getFileDesc():'); log('version: ' + basisFileDesc.version); log('us per frame: ' + basisFileDesc.usPerFrame); @@ -167,7 +167,7 @@ log('userdata0: ' + basisFileDesc.userdata0 + ' userdata1: ' + basisFileDesc.userdata1); log('texFormat: ' + basisFileDesc.texFormat); log('yFlipped: ' + basisFileDesc.yFlipped + ' hasAlphaSlices: ' + basisFileDesc.hasAlphaSlices); - + if (basisFileDesc.texFormat == Module.basis_tex_format.cETC1S.value) { log('numEndpoints: ' + basisFileDesc.numEndpoints); @@ -185,9 +185,9 @@ for (image_index = 0; image_index < basisFileDesc.totalImages; image_index++) { log('image: ' + image_index); - + var basisImageDesc = basisFile.getImageDesc(image_index); - + log('origWidth: ' + basisImageDesc.origWidth + ' origWidth: ' + basisImageDesc.origHeight); log('numBlocksX: ' + basisImageDesc.numBlocksX + ' origWidth: ' + basisImageDesc.numBlocksY); log('numLevels: ' + basisImageDesc.numLevels); @@ -198,15 +198,15 @@ for (level_index = 0; level_index < basisImageDesc.numLevels; level_index++) { var basisImageLevelDesc = basisFile.getImageLevelDesc(image_index, level_index); - - log('level: ' + level_index + + + log('level: ' + level_index + ' rgb_file_offset: ' + basisImageLevelDesc.rgbFileOfs + ' rgb_file_len: ' + basisImageLevelDesc.rgbFileLen); - if (basisFileDesc.hasAlphaSlices) + if (basisFileDesc.hasAlphaSlices) log('alpha_file_offset: ' + basisImageLevelDesc.alphaFileOfs + ' alpha_file_len: ' + basisImageLevelDesc.alphaFileLen); } } - + log('------'); } @@ -226,7 +226,7 @@ images = basisFile.getNumImages(); levels = basisFile.getNumLevels(0); has_alpha = basisFile.getHasAlpha(); - + dumpBasisFileDesc(basisFile); if (!width || !height || !images || !levels) { @@ -235,9 +235,9 @@ basisFile.delete(); return; } - + // Note: If the file is UASTC, the preferred formats are ASTC/BC7. - // If the file is ETC1S and doesn't have alpha, the preferred formats are ETC1 and BC1. For alpha, the preferred formats are ETC2, BC3 or BC7. + // If the file is ETC1S and doesn't have alpha, the preferred formats are ETC1 and BC1. For alpha, the preferred formats are ETC2, BC3 or BC7. var formatString = 'UNKNOWN'; if (astcSupported) @@ -275,7 +275,7 @@ formatString = 'PVRTC1_RGB'; format = BASIS_FORMAT.cTFPVRTC1_4_RGB; } - + if ( ((width & (width - 1)) != 0) || ((height & (height - 1)) != 0) ) @@ -284,7 +284,7 @@ } if (width != height) { - log('ERROR: PVRTC1 requires square power of 2 textures'); + log('ERROR: PVRTC1 requires square power of 2 textures'); } } else if (etcSupported) @@ -311,7 +311,7 @@ const dstSize = basisFile.getImageTranscodedSizeInBytes(0, 0, format); const dst = new Uint8Array(dstSize); - + //log(dstSize); // if (!basisFile.transcodeImage(dst, 0, 0, format, 1, 0)) { @@ -320,7 +320,7 @@ console.warn('transcodeImage failed'); basisFile.close(); basisFile.delete(); - + return; } @@ -338,7 +338,7 @@ alignedWidth = (width + 3) & ~3; alignedHeight = (height + 3) & ~3; - + displayWidth = alignedWidth; displayHeight = alignedHeight; @@ -373,9 +373,9 @@ displayWidth = width; displayHeight = height; - // Create 565 texture. + // Create 565 texture. var dstTex = new Uint16Array(width * height); - + // Convert the array of bytes to an array of uint16's. var pix = 0; for (var y = 0; y < height; y++) @@ -388,16 +388,16 @@ redraw(); } -function download_file(filename, body) +function download_file(filename, body) { var element = document.createElement('a'); - + //element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text)); - const blob = new Blob([body]); + const blob = new Blob([body]); const url = URL.createObjectURL(blob); element.setAttribute('href', url); - + element.setAttribute('download', filename); element.style.display = 'none'; @@ -413,14 +413,14 @@ function PNGDataLoaded(data) { const { BasisFile, BasisEncoder, initializeBasis, encodeBasisTexture } = Module; - + initializeBasis(); - + // Create a destination buffer to hold the compressed .basis file data. If this buffer isn't large enough compression will fail. var basisFileData = new Uint8Array(1024*1024*10); - + var num_output_bytes; - + // Compress using the BasisEncoder class. log('BasisEncoder::encode() started:'); @@ -437,22 +437,22 @@ basisEncoder.setQualityLevel(qualityLevel); basisEncoder.setUASTC(uastcFlag); basisEncoder.setMipGen(elem('Mipmaps').checked); - + if (!uastcFlag) log('Encoding at ETC1S quality level ' + qualityLevel); - + const startTime = performance.now(); - + num_output_bytes = basisEncoder.encode(basisFileData); - + const elapsed = performance.now() - startTime; - + logTime('encoding time', elapsed.toFixed(2)); - + var actualBasisFileData = new Uint8Array(basisFileData.buffer, 0, num_output_bytes); basisEncoder.delete(); - + if (num_output_bytes == 0) { log('encodeBasisTexture() failed!'); @@ -460,12 +460,12 @@ else { log('encodeBasisTexture() succeeded, output size ' + num_output_bytes); - + encodedBasisFile = actualBasisFileData; - + //download("test.basis", actualBasisFileData); } - + if (num_output_bytes != 0) { dataLoaded(actualBasisFileData); @@ -486,10 +486,10 @@ function viewRGB() { drawMode = 1; redraw(); } function viewAlpha() { drawMode = 2; redraw(); } -function downloadEncodedFile() +function downloadEncodedFile() { if (encodedBasisFile) - { + { if (encodedBasisFile.length) download_file("encoded_file.basis", encodedBasisFile); } @@ -511,7 +511,7 @@
- +
.png file: @@ -533,15 +533,15 @@
UASTC: - +
- + ETC1S Quality: - - + +
- +
@@ -558,17 +558,17 @@ - + diff --git a/webgl/ktx2_encode_test/dxt-to-rgb565.js b/webgl/ktx2_encode_test/dxt-to-rgb565.js index 8e35be4d..7827cf69 100644 --- a/webgl/ktx2_encode_test/dxt-to-rgb565.js +++ b/webgl/ktx2_encode_test/dxt-to-rgb565.js @@ -37,7 +37,7 @@ function dxtToRgb565(src, src16Offset, width, height) { i = src16Offset + 4 * (blockY * blockWidth + blockX); c[0] = src[i]; c[1] = src[i + 1]; - + r0 = c[0] & 0x1f; g0 = c[0] & 0x7e0; b0 = c[0] & 0xf800; @@ -50,7 +50,7 @@ function dxtToRgb565(src, src16Offset, width, height) { // decoder in many GPUs does :) // rg FIXME: This is most likely leading to wrong results vs. a GPU - + c[2] = ((5 * r0 + 3 * r1) >> 3) | (((5 * g0 + 3 * g1) >> 3) & 0x7e0) | (((5 * b0 + 3 * b1) >> 3) & 0xf800); diff --git a/webgl/ktx2_encode_test/index.html b/webgl/ktx2_encode_test/index.html index 1835ae75..81588176 100644 --- a/webgl/ktx2_encode_test/index.html +++ b/webgl/ktx2_encode_test/index.html @@ -125,8 +125,8 @@ cTFFXT1_RGB: 17, cTFPVRTC2_4_RGB: 18, cTFPVRTC2_4_RGBA: 19, - cTFETC2_EAC_R11: 20, - cTFETC2_EAC_RG11: 21 + cTFETC2_EAC_R11: 20, + cTFETC2_EAC_RG11: 21 }; BASIS_FORMAT_NAMES = {}; @@ -137,7 +137,7 @@ DXT_FORMAT_MAP = {}; DXT_FORMAT_MAP[BASIS_FORMAT.cTFBC1] = COMPRESSED_RGB_S3TC_DXT1_EXT; DXT_FORMAT_MAP[BASIS_FORMAT.cTFBC3] = COMPRESSED_RGBA_S3TC_DXT5_EXT; -DXT_FORMAT_MAP[BASIS_FORMAT.cTFBC7] = COMPRESSED_RGBA_BPTC_UNORM; +DXT_FORMAT_MAP[BASIS_FORMAT.cTFBC7] = COMPRESSED_RGBA_BPTC_UNORM; var astcSupported = false; var etcSupported = false; @@ -158,8 +158,8 @@ function dumpKTX2FileDesc(ktx2File) { - log('------'); - + log('------'); + log('Width: ' + ktx2File.getWidth()); log('Height: ' + ktx2File.getHeight()); log('Faces: ' + ktx2File.getFaces()); @@ -179,7 +179,7 @@ log('DFD Channel0: ' + ktx2File.getDFDChannelID0()); log('DFD Channel1: ' + ktx2File.getDFDChannelID1()); log('Is Video: ' + ktx2File.isVideo()); - + var dfdSize = ktx2File.getDFDSize(); var dvdData = new Uint8Array(dfdSize); ktx2File.getDFD(dvdData); @@ -188,17 +188,17 @@ log('--'); log('--'); - log('Key values:'); + log('Key values:'); var key_index; - for (key_index = 0; key_index < ktx2File.getTotalKeys(); key_index++) + for (key_index = 0; key_index < ktx2File.getTotalKeys(); key_index++) { var key_name = ktx2File.getKey(key_index); log('Key ' + key_index + ': "' + key_name + '"'); - + var valSize = ktx2File.getKeyValueSize(key_name); if (valSize != 0) - { + { var val_data = new Uint8Array(valSize); var status = ktx2File.getKeyValue(key_name, val_data); if (!status) @@ -207,21 +207,21 @@ { log('value size: ' + val_data.length); var i, str = ""; - + for (i = 0; i < val_data.length; i++) { var c = val_data[i]; str = str + String.fromCharCode(c); } - + log(str); } - + } else log(''); } - + log('--'); log('Image level information:'); var level_index; @@ -229,14 +229,14 @@ { var layer_index; for (layer_index = 0; layer_index < Math.max(1, ktx2File.getLayers()); layer_index++) - { + { var face_index; for (face_index = 0; face_index < ktx2File.getFaces(); face_index++) { var imageLevelInfo = ktx2File.getImageLevelInfo(level_index, layer_index, face_index); - + log('level: ' + level_index + ' layer: ' + layer_index + ' face: ' + face_index); - + log('orig_width: ' + imageLevelInfo.origWidth); log('orig_height: ' + imageLevelInfo.origHeight); log('width: ' + imageLevelInfo.width); @@ -248,7 +248,7 @@ log('iframeFlag: ' + imageLevelInfo.iframeFlag); if (ktx2File.isETC1S()) log('ETC1S image desc image flags: ' + ktx2File.getETC1SImageDescImageFlags(level_index, layer_index, face_index)); - + log('--'); } } @@ -256,7 +256,7 @@ log('--'); log('KTX2 header:'); var hdr = ktx2File.getHeader(); - + log('vkFormat: ' + hdr.vkFormat); log('typeSize: ' + hdr.typeSize); log('pixelWidth: ' + hdr.pixelWidth); @@ -272,7 +272,7 @@ log('kvdByteLength: ' + hdr.kvdByteLength); log('sgdByteOffset: ' + hdr.sgdByteOffset); log('sgdByteLength: ' + hdr.sgdByteLength); - + log('------'); } @@ -286,7 +286,7 @@ const startTime = performance.now(); const ktx2File = new KTX2File(new Uint8Array(data)); - + if (!ktx2File.isValid()) { console.warn('Invalid or unsupported .ktx2 file'); @@ -301,16 +301,16 @@ levels = ktx2File.getLevels(); faces = ktx2File.getFaces(); has_alpha = ktx2File.getHasAlpha(); - + if (!width || !height || !levels) { console.warn('Invalid .ktx2 file'); ktx2File.close(); ktx2File.delete(); return; } - + // Note: If the file is UASTC, the preferred formats are ASTC/BC7. - // If the file is ETC1S and doesn't have alpha, the preferred formats are ETC1 and BC1. For alpha, the preferred formats are ETC2, BC3 or BC7. + // If the file is ETC1S and doesn't have alpha, the preferred formats are ETC1 and BC1. For alpha, the preferred formats are ETC2, BC3 or BC7. var formatString = 'UNKNOWN'; if (astcSupported) @@ -348,7 +348,7 @@ formatString = 'PVRTC1_RGB'; format = BASIS_FORMAT.cTFPVRTC1_4_RGB; } - + if ( ((width & (width - 1)) != 0) || ((height & (height - 1)) != 0) ) @@ -357,7 +357,7 @@ } if (width != height) { - log('ERROR: PVRTC1 requires square power of 2 textures'); + log('ERROR: PVRTC1 requires square power of 2 textures'); } } else if (etcSupported) @@ -381,12 +381,12 @@ basisFile.delete(); return; } - + dumpKTX2FileDesc(ktx2File); const dstSize = ktx2File.getImageTranscodedSizeInBytes(0, 0, 0, format); const dst = new Uint8Array(dstSize); - + //log(dstSize); if (!ktx2File.transcodeImage(dst, 0, 0, 0, format, 0, -1, -1)) { @@ -394,7 +394,7 @@ console.warn('transcodeImage failed'); ktx2File.close(); ktx2File.delete(); - + return; } @@ -413,7 +413,7 @@ alignedWidth = (width + 3) & ~3; alignedHeight = (height + 3) & ~3; - + displayWidth = alignedWidth; displayHeight = alignedHeight; @@ -448,9 +448,9 @@ displayWidth = width; displayHeight = height; - // Create 565 texture. + // Create 565 texture. var dstTex = new Uint16Array(width * height); - + // Convert the array of bytes to an array of uint16's. var pix = 0; for (var y = 0; y < height; y++) @@ -463,16 +463,16 @@ redraw(); } -function download_file(filename, body) +function download_file(filename, body) { var element = document.createElement('a'); - + //element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text)); - const blob = new Blob([body]); + const blob = new Blob([body]); const url = URL.createObjectURL(blob); element.setAttribute('href', url); - + element.setAttribute('download', filename); element.style.display = 'none'; @@ -488,14 +488,14 @@ function PNGDataLoaded(data) { const { BasisFile, BasisEncoder, initializeBasis, encodeBasisTexture } = Module; - + initializeBasis(); - + // Create a destination buffer to hold the compressed .basis file data. If this buffer isn't large enough compression will fail. var ktx2FileData = new Uint8Array(1024*1024*10); - + var num_output_bytes; - + // Compress using the BasisEncoder class. log('BasisEncoder::encode() started:'); @@ -507,7 +507,7 @@ basisEncoder.setCreateKTX2File(true); basisEncoder.setKTX2UASTCSupercompression(true); basisEncoder.setKTX2SRGBTransferFunc(true); - + basisEncoder.setSliceSourceImage(0, new Uint8Array(data), 0, 0, true); basisEncoder.setDebug(elem('Debug').checked); basisEncoder.setComputeStats(elem('ComputeStats').checked); @@ -516,22 +516,22 @@ basisEncoder.setQualityLevel(qualityLevel); basisEncoder.setUASTC(uastcFlag); basisEncoder.setMipGen(elem('Mipmaps').checked); - + if (!uastcFlag) log('Encoding at ETC1S quality level ' + qualityLevel); - + const startTime = performance.now(); - + num_output_bytes = basisEncoder.encode(ktx2FileData); - + const elapsed = performance.now() - startTime; - + logTime('encoding time', elapsed.toFixed(2)); - + var actualKTX2FileData = new Uint8Array(ktx2FileData.buffer, 0, num_output_bytes); basisEncoder.delete(); - + if (num_output_bytes == 0) { log('encodeBasisTexture() failed!'); @@ -539,12 +539,12 @@ else { log('encodeBasisTexture() succeeded, output size ' + num_output_bytes); - + encodedKTX2File = actualKTX2FileData; - + //download("test.ktx2", actualKTX2FileData); } - + if (num_output_bytes != 0) { dataLoaded(actualKTX2FileData); @@ -565,10 +565,10 @@ function viewRGB() { drawMode = 1; redraw(); } function viewAlpha() { drawMode = 2; redraw(); } -function downloadEncodedFile() +function downloadEncodedFile() { if (encodedKTX2File) - { + { if (encodedKTX2File.length) download_file("encoded_file.ktx2", encodedKTX2File); } @@ -591,7 +591,7 @@
- +
.png file: @@ -613,15 +613,15 @@
UASTC: - +
- + ETC1S Quality: - - + +
- +
@@ -638,17 +638,17 @@