From dc837074c4ca73583c3541ea54438d7fda84fdf9 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Thu, 11 Apr 2024 11:04:42 -0700 Subject: [PATCH] Add .npy support to debug_to_file() (#8177) * Add .npy support to halide_image_io The .npy format is NumPy's native format for storing multidimensional arrays (aka tensors/buffers). Being able to load/save in this format makes it (potentially) a lot easier to interchange data with the Python ecosystem, as well as providing a file format that support floating-point data more robustly than any of the others that we current support. This adds load/save support for a useful subset: - We support the int/uint/float types common in Halide (except for f16/bf16 for now) - We don't support reading or writing files that are in `fortran_order` - We don't support any object/struct/etc files, only numeric primitives - We only support loading files that are in the host's endianness (typically little-endian) Note that at present this doesn't support f16 / bf16 formats, but that could likely be added with minimal difficulty. The tricky bit of this is that the reading code has to parse a (limited) Python dict in text form. Please review that part carefully. TODO: we could probably add this as an option for `debug_to_file()` without too much pain in a followup PR. * clang-tidy * clang-tidy * Address review comments * Allow for "keys" as well as 'keys' * Add .npy support to debug_to_file() Built on top of https://github.com/halide/Halide/pull/8175, this adds .npy as an option. This is actually pretty great because it's easy to do something like ``` ss = numpy.load("my_file.npy") print(ss) ``` in Python and get nicely-formatted output, which can sometimes be a lot easier for debugging that inserting lots of print() statements (see https://github.com/halide/Halide/issues/8176) Did a drive-by change to the correctness test to use this format instead of .mat. * Add float16 support * Add support for Float16 images in npy * Assume little-endian * Remove redundant halide_error() * naming convention * naming convention * Test both mat and npy * Don't call halide_error() * Use old-school parser * clang-tidy --- src/DebugToFile.cpp | 4 + src/runtime/write_debug_image.cpp | 140 ++++++++++++++++++++++++--- test/correctness/debug_to_file.cpp | 147 +++++++++++++++-------------- 3 files changed, 207 insertions(+), 84 deletions(-) diff --git a/src/DebugToFile.cpp b/src/DebugToFile.cpp index 8147e4cfe7f1..8510b806a132 100644 --- a/src/DebugToFile.cpp +++ b/src/DebugToFile.cpp @@ -42,6 +42,8 @@ class DebugToFile : public IRMutator { num_elements *= bound.extent; } + // TODO: why do we bother with this? halide_debug_to_file() + // can infer the type-and-size it needs from the buffer's type field. int type_code = 0; Type t = op->types[0]; if (t == Float(32)) { @@ -64,6 +66,8 @@ class DebugToFile : public IRMutator { type_code = 8; } else if (t == Int(64)) { type_code = 9; + } else if (t == Float(16)) { + type_code = 10; } else { user_error << "Type " << t << " not supported for debug_to_file\n"; } diff --git a/src/runtime/write_debug_image.cpp b/src/runtime/write_debug_image.cpp index f51017c1fbb4..a5f8816db2c7 100644 --- a/src/runtime/write_debug_image.cpp +++ b/src/runtime/write_debug_image.cpp @@ -1,13 +1,16 @@ #include "HalideRuntime.h" -// We support three formats, tiff, mat, and tmp. +// We support four formats, npy, tiff, mat, and tmp. // // All formats support arbitrary types, and are easy to write in a // small amount of code. // +// npy: +// - Arbitrary dimensionality, type +// - Readable by NumPy and other Python tools // TIFF: // - 2/3-D only -// - Readable by the most tools +// - Readable by a lot of tools // mat: // - Arbitrary dimensionality, type // - Readable by matlab, ImageStack, and many other tools @@ -26,20 +29,22 @@ namespace Internal { // Mappings from the type_code passed in to the type codes of the // formats. See "type_code" in DebugToFile.cpp +constexpr int kNumTypeCodes = 11; + // TIFF sample type values are: // 1 => Unsigned int // 2 => Signed int // 3 => Floating-point -WEAK int16_t pixel_type_to_tiff_sample_type[] = { +WEAK int16_t pixel_type_to_tiff_sample_type[kNumTypeCodes] = { // float, double, uint8, int8, ... uint64, int64 - 3, 3, 1, 2, 1, 2, 1, 2, 1, 2}; + 3, 3, 1, 2, 1, 2, 1, 2, 1, 2, 0}; // See the .mat level 5 documentation for matlab class codes. -WEAK uint8_t pixel_type_to_matlab_class_code[] = { - 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}; +WEAK uint8_t pixel_type_to_matlab_class_code[kNumTypeCodes] = { + 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0}; -WEAK uint8_t pixel_type_to_matlab_type_code[] = { - 7, 9, 2, 1, 4, 3, 6, 5, 13, 12}; +WEAK uint8_t pixel_type_to_matlab_type_code[kNumTypeCodes] = { + 7, 9, 2, 1, 4, 3, 6, 5, 13, 12, 0}; #pragma pack(push) #pragma pack(2) @@ -125,6 +130,39 @@ struct ScopedFile { } }; +// Halide runtime has lots of assumptions that we are always little-endian, +// so we'll hardcode this here; leaving in the logic to make it clear. +constexpr bool host_is_big_endian = false; +constexpr char little_endian_char = '<'; +constexpr char big_endian_char = '>'; +constexpr char no_endian_char = '|'; +constexpr char host_endian_char = (host_is_big_endian ? big_endian_char : little_endian_char); + +struct npy_dtype_info_t { + char byte_order; + char kind; + size_t item_size; +}; + +struct htype_to_dtype { + halide_type_t htype; + npy_dtype_info_t dtype; +}; + +WEAK htype_to_dtype npy_dtypes[] = { + {halide_type_t(halide_type_float, 16), {host_endian_char, 'f', 2}}, + {halide_type_of(), {host_endian_char, 'f', sizeof(float)}}, + {halide_type_of(), {host_endian_char, 'f', sizeof(double)}}, + {halide_type_of(), {no_endian_char, 'i', sizeof(int8_t)}}, + {halide_type_of(), {host_endian_char, 'i', sizeof(int16_t)}}, + {halide_type_of(), {host_endian_char, 'i', sizeof(int32_t)}}, + {halide_type_of(), {host_endian_char, 'i', sizeof(int64_t)}}, + {halide_type_of(), {no_endian_char, 'u', sizeof(uint8_t)}}, + {halide_type_of(), {host_endian_char, 'u', sizeof(uint16_t)}}, + {halide_type_of(), {host_endian_char, 'u', sizeof(uint32_t)}}, + {halide_type_of(), {host_endian_char, 'u', sizeof(uint64_t)}}, +}; + } // namespace Internal } // namespace Runtime } // namespace Halide @@ -142,11 +180,15 @@ WEAK extern "C" int halide_debug_to_file(void *user_context, const char *filenam return halide_error_code_bad_dimensions; } - if (auto result = halide_copy_to_host(user_context, buf); - result != halide_error_code_success) { + if (auto result = halide_copy_to_host(user_context, buf); result != halide_error_code_success) { + // halide_error() has already been called return result; } + // Note: all calls to this function are wrapped in an assert that identifies + // the function that failed, so calling halide_error() anywhere after this is redundant + // and actually unhelpful. + ScopedFile f(filename, "wb"); if (!f.open()) { return halide_error_code_debug_to_file_failed; @@ -167,7 +209,73 @@ WEAK extern "C" int halide_debug_to_file(void *user_context, const char *filenam uint32_t final_padding_bytes = 0; - if (ends_with(filename, ".tiff") || ends_with(filename, ".tif")) { + if (ends_with(filename, ".npy")) { + npy_dtype_info_t di = {0, 0, 0}; + for (const auto &d : npy_dtypes) { + if (d.htype == buf->type) { + di = d.dtype; + break; + } + } + if (di.byte_order == 0) { + return halide_error_code_debug_to_file_failed; + } + + constexpr int max_dict_string_size = 1024; + char dict_string_buf[max_dict_string_size]; + char *dst = dict_string_buf; + char *end = dict_string_buf + max_dict_string_size - 1; + + dst = halide_string_to_string(dst, end, "{'descr': '"); + *dst++ = di.byte_order; + *dst++ = di.kind; + dst = halide_int64_to_string(dst, end, di.item_size, 1); + dst = halide_string_to_string(dst, end, "', 'fortran_order': False, 'shape': ("); + for (int d = 0; d < buf->dimensions; ++d) { + if (d > 0) { + dst = halide_string_to_string(dst, end, ","); + } + dst = halide_int64_to_string(dst, end, buf->dim[d].extent, 1); + if (buf->dimensions == 1) { + dst = halide_string_to_string(dst, end, ","); // special-case for single-element tuples + } + } + dst = halide_string_to_string(dst, end, ")}\n"); + if (dst >= end) { + // bloody unlikely, but just in case + return halide_error_code_debug_to_file_failed; + } + + const char *npy_magic_string_and_version = "\x93NUMPY\x01\x00"; + + const size_t unpadded_length = 8 + 2 + (dst - dict_string_buf); + const size_t padded_length = (unpadded_length + 64 - 1) & ~(64 - 1); + const size_t padding = padded_length - unpadded_length; + memset(dst, ' ', padding); + dst += padding; + + const size_t header_len = dst - dict_string_buf; + if (header_len > 65535) { + return halide_error_code_debug_to_file_failed; + } + const uint8_t header_len_le[2] = { + (uint8_t)((header_len >> 0) & 0xff), + (uint8_t)((header_len >> 8) & 0xff)}; + + if (!f.write(npy_magic_string_and_version, 8)) { + return halide_error_code_debug_to_file_failed; + } + if (!f.write(header_len_le, 2)) { + return halide_error_code_debug_to_file_failed; + } + if (!f.write(dict_string_buf, dst - dict_string_buf)) { + return halide_error_code_debug_to_file_failed; + } + } else if (ends_with(filename, ".tiff") || ends_with(filename, ".tif")) { + if (type_code == 10) { + return halide_error_code_debug_to_file_failed; + } + int32_t channels; int32_t width = shape[0].extent; int32_t height = shape[1].extent; @@ -243,6 +351,10 @@ WEAK extern "C" int halide_debug_to_file(void *user_context, const char *filenam } } } else if (ends_with(filename, ".mat")) { + if (type_code == 10) { + return halide_error_code_debug_to_file_failed; + } + // Construct a name for the array from the filename const char *end = filename; while (*end) { @@ -279,7 +391,6 @@ WEAK extern "C" int halide_debug_to_file(void *user_context, const char *filenam // level 5 .mat files have a size limit. (Padding itself should never cause the overflow. // Code written this way for safety.) if (((uint64_t)payload_bytes + final_padding_bytes) >> 32) { - halide_error(user_context, "Can't debug_to_file to a .mat file greater than 4GB\n"); return halide_error_code_debug_to_file_failed; } @@ -325,6 +436,10 @@ WEAK extern "C" int halide_debug_to_file(void *user_context, const char *filenam return halide_error_code_debug_to_file_failed; } } else { + if (type_code == 10) { + return halide_error_code_debug_to_file_failed; + } + int32_t header[] = {shape[0].extent, shape[1].extent, shape[2].extent, @@ -370,7 +485,6 @@ WEAK extern "C" int halide_debug_to_file(void *user_context, const char *filenam const uint64_t zero = 0; if (final_padding_bytes) { if (final_padding_bytes > sizeof(zero)) { - halide_error(user_context, "Unexpectedly large final_padding_bytes"); return halide_error_code_debug_to_file_failed; } if (!f.write(&zero, final_padding_bytes)) { diff --git a/test/correctness/debug_to_file.cpp b/test/correctness/debug_to_file.cpp index 2b0aee28e8c0..780428c3389f 100644 --- a/test/correctness/debug_to_file.cpp +++ b/test/correctness/debug_to_file.cpp @@ -15,88 +15,93 @@ int main(int argc, char **argv) { return 0; } - std::string f_mat = Internal::get_test_tmp_dir() + "f.mat"; - std::string g_mat = Internal::get_test_tmp_dir() + "g.mat"; - std::string h_mat = Internal::get_test_tmp_dir() + "h.mat"; - - Internal::ensure_no_file_exists(f_mat); - Internal::ensure_no_file_exists(g_mat); - Internal::ensure_no_file_exists(h_mat); - - { - Func f, g, h, j; - Var x, y, z; - f(x, y, z) = cast(x + y + z); - g(x, y) = cast(f(x, y, 0) + f(x + 1, y, 1)); - h(x, y) = cast(f(x, y, -1) + g(x, y)); - - Target target = get_jit_target_from_environment(); - if (target.has_gpu_feature()) { - Var xi, yi; - f.compute_root().gpu_tile(x, y, xi, yi, 1, 1).debug_to_file(f_mat); - g.compute_root().gpu_tile(x, y, xi, yi, 1, 1).debug_to_file(g_mat); - h.compute_root().gpu_tile(x, y, xi, yi, 1, 1).debug_to_file(h_mat); - } else { - f.compute_root().debug_to_file(f_mat); - g.compute_root().debug_to_file(g_mat); - h.compute_root().debug_to_file(h_mat); - } + std::vector formats = {"npy", "mat"}; + for (const auto &format : formats) { + std::cout << "Testing format " << format << "...\n"; + + std::string f_path = Internal::get_test_tmp_dir() + "f." + format; + std::string g_path = Internal::get_test_tmp_dir() + "g." + format; + std::string h_path = Internal::get_test_tmp_dir() + "h." + format; + + Internal::ensure_no_file_exists(f_path); + Internal::ensure_no_file_exists(g_path); + Internal::ensure_no_file_exists(h_path); + + { + Func f, g, h, j; + Var x, y, z; + f(x, y, z) = cast(x + y + z); + g(x, y) = cast(f(x, y, 0) + f(x + 1, y, 1)); + h(x, y) = cast(f(x, y, -1) + g(x, y)); + + Target target = get_jit_target_from_environment(); + if (target.has_gpu_feature()) { + Var xi, yi; + f.compute_root().gpu_tile(x, y, xi, yi, 1, 1).debug_to_file(f_path); + g.compute_root().gpu_tile(x, y, xi, yi, 1, 1).debug_to_file(g_path); + h.compute_root().gpu_tile(x, y, xi, yi, 1, 1).debug_to_file(h_path); + } else { + f.compute_root().debug_to_file(f_path); + g.compute_root().debug_to_file(g_path); + h.compute_root().debug_to_file(h_path); + } - Buffer im = h.realize({10, 10}, target); - } + Buffer im = h.realize({10, 10}, target); + } - { - Internal::assert_file_exists(f_mat); - Internal::assert_file_exists(g_mat); - Internal::assert_file_exists(h_mat); + { + Internal::assert_file_exists(f_path); + Internal::assert_file_exists(g_path); + Internal::assert_file_exists(h_path); + + Buffer f = Tools::load_image(f_path); + assert(f.dimensions() == 3 && + f.dim(0).extent() == 11 && + f.dim(1).extent() == 10 && + f.dim(2).extent() == 3); + + for (int z = 0; z < 3; z++) { + for (int y = 0; y < 10; y++) { + for (int x = 0; x < 11; x++) { + int32_t val = f(x, y, z); + // The min coord gets lost on debug_to_file, so f should be shifted up by one. + if (val != x + y + z - 1) { + printf("f(%d, %d, %d) = %d instead of %d\n", x, y, z, val, x + y); + return 1; + } + } + } + } - Buffer f = Tools::load_image(f_mat); - assert(f.dimensions() == 3 && - f.dim(0).extent() == 11 && - f.dim(1).extent() == 10 && - f.dim(2).extent() == 3); + Buffer g = Tools::load_image(g_path); + assert(g.dimensions() == 2 && + g.dim(0).extent() == 10 && + g.dim(1).extent() == 10); - for (int z = 0; z < 3; z++) { for (int y = 0; y < 10; y++) { - for (int x = 0; x < 11; x++) { - int32_t val = f(x, y, z); - // The min coord gets lost on debug_to_file, so f should be shifted up by one. - if (val != x + y + z - 1) { - printf("f(%d, %d, %d) = %d instead of %d\n", x, y, z, val, x + y); + for (int x = 0; x < 10; x++) { + float val = g(x, y); + float correct = (float)(f(x, y, 1) + f(x + 1, y, 2)); + if (val != correct) { + printf("g(%d, %d) = %f instead of %f\n", x, y, val, correct); return 1; } } } - } - Buffer g = Tools::load_image(g_mat); - assert(g.dimensions() == 2 && - g.dim(0).extent() == 10 && - g.dim(1).extent() == 10); - - for (int y = 0; y < 10; y++) { - for (int x = 0; x < 10; x++) { - float val = g(x, y); - float correct = (float)(f(x, y, 1) + f(x + 1, y, 2)); - if (val != correct) { - printf("g(%d, %d) = %f instead of %f\n", x, y, val, correct); - return 1; - } - } - } + Buffer h = Tools::load_image(h_path); + assert(h.dimensions() == 2 && + h.dim(0).extent() == 10 && + h.dim(1).extent() == 10); - Buffer h = Tools::load_image(h_mat); - assert(h.dimensions() == 2 && - h.dim(0).extent() == 10 && - h.dim(1).extent() == 10); - - for (int y = 0; y < 10; y++) { - for (int x = 0; x < 10; x++) { - int32_t val = h(x, y); - int32_t correct = f(x, y, 0) + g(x, y); - if (val != correct) { - printf("h(%d, %d) = %d instead of %d\n", x, y, val, correct); - return 1; + for (int y = 0; y < 10; y++) { + for (int x = 0; x < 10; x++) { + int32_t val = h(x, y); + int32_t correct = f(x, y, 0) + g(x, y); + if (val != correct) { + printf("h(%d, %d) = %d instead of %d\n", x, y, val, correct); + return 1; + } } } }