From 508eb92e32aad7aacd49f4547511d7c41a0795dc Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 29 Mar 2024 07:03:28 +1100 Subject: [PATCH 01/37] Added DFS and ATK, but not fully tested. Need to add COC. Integrate with the rest of the code. --- src/core/codestream/ojph_codestream_local.cpp | 20 +- src/core/codestream/ojph_codestream_local.h | 27 +- src/core/codestream/ojph_params.cpp | 371 ++++++++++++++++-- src/core/codestream/ojph_params_local.h | 206 ++++++++-- src/core/codestream/ojph_subband.cpp | 9 +- src/core/common/ojph_version.h | 4 +- 6 files changed, 554 insertions(+), 83 deletions(-) diff --git a/src/core/codestream/ojph_codestream_local.cpp b/src/core/codestream/ojph_codestream_local.cpp index df2f18c4..c2154fa0 100644 --- a/src/core/codestream/ojph_codestream_local.cpp +++ b/src/core/codestream/ojph_codestream_local.cpp @@ -81,6 +81,8 @@ namespace ojph { used_qcc_fields = 0; qcc = qcc_store; + used_coc_fields = 0; + coc = coc_store; allocator = new mem_fixed_allocator; elastic_alloc = new mem_elastic_allocator(1048576); //1 megabyte @@ -717,15 +719,15 @@ namespace ojph { { if (msg_level == OJPH_MSG_LEVEL::INFO) { - OJPH_INFO(0x00030001, "%s\n", msg); + OJPH_INFO(0x00030001, "%s", msg); } else if (msg_level == OJPH_MSG_LEVEL::WARN) { - OJPH_WARN(0x00030001, "%s\n", msg); + OJPH_WARN(0x00030001, "%s", msg); } else if (msg_level == OJPH_MSG_LEVEL::ERROR) { - OJPH_ERROR(0x00030001, "%s\n", msg); + OJPH_ERROR(0x00030001, "%s", msg); } else assert(0); @@ -736,8 +738,8 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// void codestream::read_headers(infile_base *file) { - ui16 marker_list[17] = { SOC, SIZ, CAP, PRF, CPF, COD, COC, QCD, QCC, - RGN, POC, PPM, TLM, PLM, CRG, COM, SOT }; + ui16 marker_list[19] = { SOC, SIZ, CAP, PRF, CPF, COD, COC, QCD, QCC, + RGN, POC, PPM, TLM, PLM, CRG, COM, DFS, ATK, SOT }; find_marker(file, marker_list, 1); //find SOC find_marker(file, marker_list + 1, 1); //find SIZ siz.read(file); @@ -745,7 +747,7 @@ namespace ojph { int received_markers = 0; //check that COD, & QCD received while (true) { - marker_idx = find_marker(file, marker_list + 2, 15); + marker_idx = find_marker(file, marker_list + 2, 17); if (marker_idx == 0) cap.read(file); else if (marker_idx == 1) @@ -805,11 +807,17 @@ namespace ojph { else if (marker_idx == 13) skip_marker(file, "COM", NULL, OJPH_MSG_LEVEL::NO_MSG, false); else if (marker_idx == 14) + dfs.read(file); + else if (marker_idx == 15) + atk.read(file); + else if (marker_idx == 16) break; else OJPH_ERROR(0x00030051, "File ended before finding a tile segment"); } + //qcd.update(&dfs); + if (received_markers != 3) OJPH_ERROR(0x00030052, "markers error, COD and QCD are required"); diff --git a/src/core/codestream/ojph_codestream_local.h b/src/core/codestream/ojph_codestream_local.h index 5e0bbfaf..035b534f 100644 --- a/src/core/codestream/ojph_codestream_local.h +++ b/src/core/codestream/ojph_codestream_local.h @@ -148,20 +148,27 @@ namespace ojph { bool employ_color_transform; int planar; int profile; - ui32 tilepart_div; // tilepart division value - bool need_tlm; // true if tlm markers are needed + ui32 tilepart_div; // tilepart division value + bool need_tlm; // true if tlm markers are needed private: - param_siz siz; - param_cod cod; - param_cap cap; - param_qcd qcd; - param_tlm tlm; + param_siz siz; // image and tile size + param_cod cod; // coding style default + param_cap cap; // extended capabilities + param_qcd qcd; // quantization default + param_tlm tlm; // tile-part lengths - private: // this is to handle qcc + private: // this is to handle qcc and coc int used_qcc_fields; - param_qcc qcc_store[4], *qcc; // we allocate 4, - // if not enough, we allocate more + param_qcc *qcc; // quantization component + param_qcc qcc_store[4]; // we allocate 4, we allocate more if needed + int used_coc_fields; + param_coc *coc; // coding style component + param_coc coc_store[4]; // we allocate 4, we allocate more if needed + + private: // these are from Part 2 of the standard + param_dfs dfs; // downsmapling factor styles + param_atk atk; // arbitrary transformation kernels private: mem_fixed_allocator *allocator; diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index fa194431..5243762f 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -417,6 +417,16 @@ namespace ojph { return u; } + ////////////////////////////////////////////////////////////////////////// + static inline + ui64 swap_byte(ui64 t) + { + ui64 u = swap_byte((ui32)(t & 0xFFFFFFFFu)); + u <<= 32; + u |= swap_byte((ui32)(t >> 32)); + return u; + } + ////////////////////////////////////////////////////////////////////////// // // @@ -790,7 +800,7 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// - void param_qcd::set_rev_quant(ui32 bit_depth, + void param_qcd::set_rev_quant(int num_decomps, ui32 bit_depth, bool is_employing_color_transform) { int guard_bits = 1; @@ -815,7 +825,7 @@ namespace ojph { } ////////////////////////////////////////////////////////////////////////// - void param_qcd::set_irrev_quant() + void param_qcd::set_irrev_quant(int num_decomps) { int guard_bits = 1; Sqcd = (ui8)((guard_bits<<5)|0x2);//one guard bit, scalar quantization @@ -859,13 +869,17 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ui32 param_qcd::get_MAGBp() const { //this can be written better, but it is only executed once + + // this assumes a bi-directional wavelet (conventional DWT) + ui32 num_decomps = (num_subbands - 1) / 3; + ui32 B = 0; int irrev = Sqcd & 0x1F; if (irrev == 0) //reversible - for (ui32 i = 0; i < 3 * num_decomps + 1; ++i) + for (ui32 i = 0; i < num_subbands; ++i) B = ojph_max(B, (u8_SPqcd[i] >> 3) + get_num_guard_bits() - 1u); else if (irrev == 2) //scalar expounded - for (ui32 i = 0; i < 3 * num_decomps + 1; ++i) + for (ui32 i = 0; i < num_subbands; ++i) { ui32 nb = num_decomps - (i ? (i - 1) / 3 : 0); //decompsition level B = ojph_max(B, (u16_SPqcd[i] >> 11) + get_num_guard_bits() - nb); @@ -877,14 +891,24 @@ namespace ojph { } ////////////////////////////////////////////////////////////////////////// - float param_qcd::irrev_get_delta(ui32 resolution, ui32 subband) const + float param_qcd::irrev_get_delta(const param_dfs* dfs, + ui32 num_decompositions, + ui32 resolution, ui32 subband) const { - assert((resolution == 0 && subband == 0) || - (resolution <= num_decomps && subband > 0 && subband<4)); - assert((Sqcd & 0x1F) == 2); float arr[] = { 1.0f, 2.0f, 2.0f, 4.0f }; + assert((Sqcd & 0x1F) == 2); - ui32 idx = resolution == 0 ? 0 : (resolution - 1) * 3 + subband; + ui32 idx = + dfs->get_subband_idx(num_decompositions, resolution, subband); + if (idx >= num_subbands) { + OJPH_INFO(0x00050101, "Trying to access quantization step size for " + "subband %d when the QCD/QCC marker segment specifies " + "quantization step sizes for %d subbands only. To continue " + "decoding, we are using the step size for subband %d, which can " + "produce incorrect results", + idx + 1, num_subbands, num_subbands - 1); + idx = num_subbands - 1; + } int eps = u16_SPqcd[idx] >> 11; float mantissa; mantissa = (float)((u16_SPqcd[idx] & 0x7FF) | 0x800) * arr[subband]; @@ -900,12 +924,22 @@ namespace ojph { } ////////////////////////////////////////////////////////////////////////// - ui32 param_qcd::get_Kmax(ui32 resolution, ui32 subband) const + ui32 param_qcd::get_Kmax(const param_dfs* dfs, ui32 num_decompositions, + ui32 resolution, ui32 subband) const { - assert((resolution == 0 && subband == 0) || - (resolution <= num_decomps && subband > 0 && subband<4)); ui32 num_bits = get_num_guard_bits(); - ui32 idx = resolution == 0 ? 0 : (resolution - 1) * 3 + subband; + ui32 idx = + dfs->get_subband_idx(num_decompositions, resolution, subband); + if (idx >= num_subbands) { + OJPH_INFO(0x00050111, "Trying to access quantization step size for " + "subband %d when the QCD/QCC marker segment specifies " + "quantization step sizes for %d subbands only. To continue " + "decoding, we are using the step size for subband %d, which can " + "produce incorrect results", + idx + 1, num_subbands, num_subbands - 1); + idx = num_subbands - 1; + } + int irrev = Sqcd & 0x1F; if (irrev == 0) //reversible; this is (10.22) from the J2K book { @@ -926,7 +960,6 @@ namespace ojph { bool param_qcd::write(outfile_base *file) { int irrev = Sqcd & 0x1F; - ui32 num_subbands = 1 + 3 * num_decomps; //marker size excluding header Lqcd = 3; @@ -976,16 +1009,16 @@ namespace ojph { OJPH_ERROR(0x00050082, "error reading QCD marker"); if ((Sqcd & 0x1F) == 0) { - num_decomps = (Lqcd - 4) / 3; - if (Lqcd != 4 + 3 * num_decomps) + num_subbands = (Lqcd - 3); + if (Lqcd != 3 + num_subbands) OJPH_ERROR(0x00050083, "wrong Lqcd value in QCD marker"); - for (ui32 i = 0; i < 1 + 3 * num_decomps; ++i) + for (ui32 i = 0; i < num_subbands; ++i) if (file->read(&u8_SPqcd[i], 1) != 1) OJPH_ERROR(0x00050084, "error reading QCD marker"); } else if ((Sqcd & 0x1F) == 1) { - num_decomps = 0; + num_subbands = 0; OJPH_ERROR(0x00050089, "Scalar derived quantization is not supported yet in QCD marker"); if (Lqcd != 5) @@ -993,10 +1026,10 @@ namespace ojph { } else if ((Sqcd & 0x1F) == 2) { - num_decomps = (Lqcd - 5) / 6; - if (Lqcd != 5 + 6 * num_decomps) + num_subbands = (Lqcd - 3) / 2; + if (Lqcd != 3 + 2 * num_subbands) OJPH_ERROR(0x00050086, "wrong Lqcd value in QCD marker"); - for (ui32 i = 0; i < 1 + 3 * num_decomps; ++i) + for (ui32 i = 0; i < num_subbands; ++i) { if (file->read(&u16_SPqcd[i], 2) != 2) OJPH_ERROR(0x00050087, "error reading QCD marker"); @@ -1036,20 +1069,19 @@ namespace ojph { } if (file->read(&Sqcd, 1) != 1) OJPH_ERROR(0x000500A4, "error reading QCC marker"); + ui32 offset = num_comps < 257 ? 4 : 5; if ((Sqcd & 0x1F) == 0) { - ui32 offset = num_comps < 257 ? 5 : 6; - num_decomps = (Lqcd - offset) / 3; - if (Lqcd != offset + 3 * num_decomps) + num_subbands = (Lqcd - offset); + if (Lqcd != offset + num_subbands) OJPH_ERROR(0x000500A5, "wrong Lqcd value in QCC marker"); - for (ui32 i = 0; i < 1 + 3 * num_decomps; ++i) + for (ui32 i = 0; i < num_subbands; ++i) if (file->read(&u8_SPqcd[i], 1) != 1) OJPH_ERROR(0x000500A6, "error reading QCC marker"); } else if ((Sqcd & 0x1F) == 1) { - ui32 offset = num_comps < 257 ? 6 : 7; - num_decomps = 0; + num_subbands = 0; OJPH_ERROR(0x000500AB, "Scalar derived quantization is not supported yet in QCC marker"); if (Lqcd != offset) @@ -1057,11 +1089,10 @@ namespace ojph { } else if ((Sqcd & 0x1F) == 2) { - ui32 offset = num_comps < 257 ? 6 : 7; - num_decomps = (Lqcd - offset) / 6; - if (Lqcd != offset + 6 * num_decomps) + num_subbands = (Lqcd - offset) / 2; + if (Lqcd != offset + 2 * num_subbands) OJPH_ERROR(0x000500A8, "wrong Lqcc value in QCC marker"); - for (ui32 i = 0; i < 1 + 3 * num_decomps; ++i) + for (ui32 i = 0; i < num_subbands; ++i) { if (file->read(&u16_SPqcd[i], 2) != 2) OJPH_ERROR(0x000500A9, "error reading QCC marker"); @@ -1260,6 +1291,280 @@ namespace ojph { return result; } - } + ////////////////////////////////////////////////////////////////////////// + // + // + // + // + // + ////////////////////////////////////////////////////////////////////////// -} + ////////////////////////////////////////////////////////////////////////// + const param_dfs* param_dfs::get_dfs(int index) const + { + const param_dfs* p = this; + while (p && p->Sdfs != index) + p = p->next; + return p; + } + + ////////////////////////////////////////////////////////////////////////// + param_dfs::dfs_dwt_type param_dfs::get_dwt_type(ui32 decomp_level) const + { + assert(decomp_level > 0 && decomp_level <= Ids); + + decomp_level = ojph_min(decomp_level, Ids); + ui8 d = decomp_level - 1; // decomp_level starts from 1 + ui8 idx = d >> 2; // complete bytes + ui8 bits = d & 0x3; // bit within the bytes + ui8 val = (Ddfs[idx] >> (6 - 2 * bits)) & 0x3; + return (dfs_dwt_type)val; + } + + ////////////////////////////////////////////////////////////////////////// + int param_dfs::get_subband_idx(ui32 num_decompositions, ui32 resolution, + ui32 subband) const + { + int idx; + if (this != NULL) + { + assert((resolution == 0 && subband == 0) || + (resolution > 0 && resolution <= Ids && + subband > 0 && subband < 4)); + + ui32 ns[4] = { 0, 3, 2, 2 }; + ui32 off[4] = {}; + + idx = 0; + if (resolution > 0) + { + idx = 0; + ui32 i = 1; + for (; i < resolution; ++i) + idx += ns[get_dwt_type(num_decompositions - i + 1)]; + dfs_dwt_type t = get_dwt_type(num_decompositions - i + 1); + idx += subband; + if (t == VERT_DWT && subband == 2) + --idx; + } + } + else + { + assert(subband >= 0 && subband < 4); + idx = resolution ? (resolution - 1) * 3 + subband : 0; + } + + return idx; + } + + ////////////////////////////////////////////////////////////////////////// + bool param_dfs::read(infile_base *file) + { + if (Ldfs != 0) { // this param_dfs is used + param_dfs* p = this; + while (p->next != NULL) + p = p->next; + p->next = new param_dfs; + p = p->next; + return p->read(file); + } + + if (file->read(&Ldfs, 2) != 2) + OJPH_ERROR(0x000500D1, "error reading DFS-Ldfs parameter"); + Ldfs = swap_byte(Ldfs); + if (file->read(&Sdfs, 2) != 2) + OJPH_ERROR(0x000500D2, "error reading DFS-Sdfs parameter"); + Sdfs = swap_byte(Sdfs); + if (Sdfs > 15) + OJPH_ERROR(0x000500D3, "The DFS-Sdfs parameter is %d, which is " + "larger than the permissible 15", Sdfs); + ui8 t, l_Ids = 0; + if (file->read(&l_Ids, 1) != 1) + OJPH_ERROR(0x000500D4, "error reading DFS-Ids parameter"); + constexpr int max_Ddfs = sizeof(Ddfs) * 4; + if (l_Ids > max_Ddfs) + OJPH_INFO(0x000500D5, "The DFS-Ids parameter is %d; while this is " + "valid, the number is unnessarily large -- you do not need more " + "than %d. Please contact me regarding this issue.", + l_Ids, max_Ddfs); + Ids = l_Ids < max_Ddfs ? l_Ids : max_Ddfs; + for (int i = 0; i < Ids; i += 4) + if (file->read(&Ddfs[i / 4], 1) != 1) + OJPH_ERROR(0x000500D6, "error reading DFS-Ddfs parameters"); + for (int i = Ids; i < l_Ids; i += 4) + if (file->read(&t, 1) != 1) + OJPH_ERROR(0x000500D7, "error reading DFS-Ddfs parameters"); + return true; + } + + ////////////////////////////////////////////////////////////////////////// + // + // + // + // + // + ////////////////////////////////////////////////////////////////////////// + + ////////////////////////////////////////////////////////////////////////// + const param_atk* param_atk::get_atk(int index) const + { + const param_atk* p = this; + while (p && p->get_index() != index) + p = p->next; + return p; + } + + ////////////////////////////////////////////////////////////////////////// + bool param_atk::read_coefficient(infile_base *file, float &K) + { + int coeff_type = get_coeff_type(); + if (coeff_type == 0) { // 8bit + ui8 v; + if (file->read(&v, 1) != 1) return false; + K = v; + } + else if (coeff_type == 1) { // 16bit + ui16 v; + if (file->read(&v, 2) != 2) return false; + K = swap_byte(v); + } + else if (coeff_type == 2) { // float + if (file->read(&K, 4) != 4) return false; + ui32 t = swap_byte(*(ui32*)&K); + K = *(float*)&t; + } + else if (coeff_type == 3) { // double + double v; + if (file->read(&v, 8) != 8) return false; + ui64 t = swap_byte(*(ui64*)&v); + double u = *(float*)&t; + K = (float)u; + } + else if (coeff_type == 4) { // 128 bit float + ui64 v, v1; + if (file->read(&v, 8) != 8) return false; + if (file->read(&v1, 8) != 8) return false; // not needed + v = swap_byte(v); + + // convert the MSB of 128b float to 32b float + // 32b float has 1 sign bit, 8 exponent (offset 127), 23 mantissa + // 128b float has 1 sign bit, 15 exponent (offset 16383), 112 mantissa + si32 t1 = (si32)((v >> 48) & 0x7FFF); // exponent + t1 -= 16383; + t1 += 127; + t1 = t1 & 0xFF; // removes MSBs if negative + t1 <<= 23; // move bits to their location + ui32 t = 0; + t |= ((ui32)(v >> 32) & 0x80000000); // copy sign bit + t |= t1; // copy exponent + t |= (ui32)((v >> 25) & 0x007FFFFF); // copy 23 mantissa + K = *(float*)&t; + } + return true; + } + + + ////////////////////////////////////////////////////////////////////////// + bool param_atk::read_coefficient(infile_base *file, si16 &K) + { + int coeff_type = get_coeff_type(); + if (coeff_type == 0) { + ui8 v; + if (file->read(&v, 1) != 1) return false; + K = v; + } + else if (coeff_type == 1) { + ui16 v; + if (file->read(&v, 2) != 2) return false; + v = swap_byte(v); + K = v; + } + else + return false; + return true; + } + + ////////////////////////////////////////////////////////////////////////// + bool param_atk::read(infile_base *file) + { + if (Latk != 0) { // this param_atk is used + param_atk *p = this; + while (p->next != NULL) + p = p->next; + p->next = new param_atk; + p = p->next; + return p->read(file); + } + + if (file->read(&Latk, 2) != 2) + OJPH_ERROR(0x000500E1, "error reading ATK-Latk parameter"); + Latk = swap_byte(Latk); + if (file->read(&Satk, 2) != 2) + OJPH_ERROR(0x000500E2, "error reading ATK-Satk parameter"); + Satk = swap_byte(Satk); + if (is_m_init0() == false) // only even-indexed is supported + OJPH_ERROR(0x000500E3, "ATK-Satk parameter sets m_init to 1, " + "requiring odd-indexed subsequence in first reconstruction step, " + "which is not supported yet."); + if (is_whole_sample() == false) // ARB filter not supported + OJPH_ERROR(0x000500E4, "ATK-Satk parameter specified ARB filter, " + "which is not supported yet."); + if (is_reversible() && get_coeff_type() >= 2) // reversible & float + OJPH_ERROR(0x000500E5, "ATK-Satk parameter does not make sense. " + "It employs floats with reversible filtering."); + if (is_reversible() == false) + if (read_coefficient(file, Katk) == false) + OJPH_ERROR(0x000500E6, "error reading ATK-Katk parameter"); + if (file->read(&Natk, 1) != 1) + OJPH_ERROR(0x000500E7, "error reading ATK-Natk parameter"); + if (Natk > max_steps) { + if (d != d_store) // was this allocated -- very unlikely + delete[] d; + d = new data[Natk]; + max_steps = Natk; + } + + if (is_reversible()) + { + for (int s = 0; s < Natk; ++s) + { + if (file->read(&d[s].rev.Eatk, 1) != 1) + OJPH_ERROR(0x000500E8, "error reading ATK-Eatk parameter"); + if (file->read(&d[s].rev.Batk, 2) != 2) + OJPH_ERROR(0x000500E9, "error reading ATK-Batk parameter"); + d[s].rev.Batk = (si16)swap_byte((ui16)d[s].rev.Batk); + ui8 LCatk; + if (file->read(&LCatk, 1) != 1) + OJPH_ERROR(0x000500EA, "error reading ATK-LCatk parameter"); + if (LCatk == 0) + OJPH_ERROR(0x000500EB, "Encountered a ATK-LCatk value of zero; " + "something is wrong."); + if (LCatk > 1) + OJPH_ERROR(0x000500EC, "ATK-LCatk value greater than 1; " + "that is, a multitap filter is not supported"); + if (read_coefficient(file, d[s].rev.Aatk) == false) + OJPH_ERROR(0x000500ED, "Error reding ATK-Aatk parameter"); + } + } + else + { + for (int s = 0; s < Natk; ++s) + { + ui8 LCatk; + if (file->read(&LCatk, 1) != 1) + OJPH_ERROR(0x000500EE, "error reading ATK-LCatk parameter"); + if (LCatk == 0) + OJPH_ERROR(0x000500EF, "Encountered a ATK-LCatk value of zero; " + "something is wrong."); + if (LCatk > 1) + OJPH_ERROR(0x000500F0, "ATK-LCatk value greater than 1; " + "that is, a multitap filter is not supported."); + if (read_coefficient(file, d[s].irv.Aatk) == false) + OJPH_ERROR(0x000500F1, "Error reding ATK-Aatk parameter"); + } + } + + return true; + } + } // !local namespace +} // !ojph namespace diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index bac0c359..acfd0347 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -96,14 +96,26 @@ namespace ojph { //////////////////////////////////////////////////////////////////////////// enum OJPH_TILEPART_DIVISIONS: ui32 { - OJPH_TILEPART_NO_DIVISIONS = 0x0, // no divisions to tile parts - OJPH_TILEPART_RESOLUTIONS = 0x1, - OJPH_TILEPART_COMPONENTS = 0x2, - OJPH_TILEPART_LAYERS = 0x4, // these are meaningless with HTJ2K + OJPH_TILEPART_NO_DIVISIONS = 0x0, // no divisions to tile parts + OJPH_TILEPART_RESOLUTIONS = 0x1, + OJPH_TILEPART_COMPONENTS = 0x2, + OJPH_TILEPART_LAYERS = 0x4, // these are meaningless with HTJ2K }; namespace local { + //defined here + struct param_siz; + struct param_cod; + struct param_qcd; + struct param_qcc; + struct param_cap; + struct param_sot; + struct param_tlm; + struct param_coc; + struct param_dfs; + struct param_atk; + ////////////////////////////////////////////////////////////////////////// enum JP2K_MARKER : ui16 { @@ -111,6 +123,7 @@ namespace ojph { CAP = 0xFF50, //extended capability SIZ = 0xFF51, //image and tile size (required) COD = 0xFF52, //coding style default (required) + COC = 0xFF53, //coding style component TLM = 0xFF55, //tile-part lengths PRF = 0xFF56, //profile PLM = 0xFF57, //packet length, main header @@ -118,19 +131,20 @@ namespace ojph { CPF = 0xFF59, //corresponding profile values QCD = 0xFF5C, //qunatization default (required) QCC = 0xFF5D, //quantization component + RGN = 0xFF5E, //region of interest + POC = 0xFF5F, //progression order change + PPM = 0xFF60, //packed packet headers, main header + PPT = 0xFF61, //packed packet headers, tile-part header + CRG = 0xFF63, //component registration COM = 0xFF64, //comment + DFS = 0xFF72, //downsampling factor styles + ADS = 0xFF73, //arbitrary decomposition styles + ATK = 0xFF79, //arbitrary transformation kernels SOT = 0xFF90, //start of tile-part SOP = 0xFF91, //start of packet EPH = 0xFF92, //end of packet SOD = 0xFF93, //start of data EOC = 0xFFD9, //end of codestream (required) - - COC = 0xFF53, //coding style component - RGN = 0xFF5E, //region of interest - POC = 0xFF5F, //progression order change - PPM = 0xFF60, //packed packet headers, main header - PPT = 0xFF61, //packed packet headers, tile-part header - CRG = 0xFF63, //component registration }; ////////////////////////////////////////////////////////////////////////// @@ -442,23 +456,23 @@ namespace ojph { Sqcd = 0; for (int i = 0; i < 97; ++i) u16_SPqcd[i] = 0; - num_decomps = 0; - base_delta = -1.0f; + num_subbands = 0; + base_delta = -1.0f; } void set_delta(float delta) { base_delta = delta; } - void set_rev_quant(ui32 bit_depth, bool is_employing_color_transform); - void set_irrev_quant(); void check_validity(const param_siz& siz, const param_cod& cod) { - num_decomps = cod.get_num_decompositions(); + int num_decomps = cod.get_num_decompositions(); + num_subbands = 1 + 3 * num_decomps; if (cod.is_reversible()) { ui32 bit_depth = 0; for (ui32 i = 0; i < siz.get_num_components(); ++i) bit_depth = ojph_max(bit_depth, siz.get_bit_depth(i)); - set_rev_quant(bit_depth, cod.is_employing_color_transform()); + set_rev_quant(num_decomps, bit_depth, + cod.is_employing_color_transform()); } else { @@ -466,21 +480,28 @@ namespace ojph { ui32 bit_depth = 0; for (ui32 i = 0; i < siz.get_num_components(); ++i) bit_depth = - ojph_max(bit_depth, siz.get_bit_depth(i) + siz.is_signed(i)); + ojph_max(bit_depth, siz.get_bit_depth(i) + siz.is_signed(i)); base_delta = 1.0f / (float)(1 << bit_depth); } - set_irrev_quant(); - } + set_irrev_quant(num_decomps); + } } - ui32 get_num_guard_bits() const; ui32 get_MAGBp() const; - ui32 get_Kmax(ui32 resolution, ui32 subband) const; - float irrev_get_delta(ui32 resolution, ui32 subband) const; + ui32 get_Kmax(const param_dfs* dfs, ui32 num_decompositions, + ui32 resolution, ui32 subband) const; + float irrev_get_delta(const param_dfs* dfs, + ui32 num_decompositions, + ui32 resolution, ui32 subband) const; bool write(outfile_base *file); void read(infile_base *file); + protected: + void set_rev_quant(int num_decomps, ui32 bit_depth, + bool is_employing_color_transform); + void set_irrev_quant(int num_decomps); + protected: ui16 Lqcd; ui8 Sqcd; @@ -489,8 +510,9 @@ namespace ojph { ui8 u8_SPqcd[97]; ui16 u16_SPqcd[97]; }; - ui32 num_decomps; - float base_delta; + ui32 num_subbands; // number of subbands + float base_delta; // base quantization step size -- all other + // step sizes are derived from it. }; /////////////////////////////////////////////////////////////////////////// @@ -502,7 +524,6 @@ namespace ojph { /////////////////////////////////////////////////////////////////////////// struct param_qcc : public param_qcd { - //friend ::ojph::param_qcc; public: param_qcc() : param_qcd() { comp_idx = 0; } @@ -627,9 +648,136 @@ namespace ojph { Ttlm_Ptlm_pair* pairs; ui32 num_pairs; ui32 next_pair_index; - }; - } -} + + /////////////////////////////////////////////////////////////////////////// + // + // + // + // + // + /////////////////////////////////////////////////////////////////////////// + struct param_coc : public param_cod + { + + }; + + /////////////////////////////////////////////////////////////////////////// + // + // + // + // + // + /////////////////////////////////////////////////////////////////////////// + struct param_dfs + { + public: + enum dfs_dwt_type : ui8 { + NO_DWT = 0, // no wavelet transform + BIDIR_DWT = 1, // bidirectional DWT (this the conventional DWT) + HORZ_DWT = 2, // horizontal only DWT transform + VERT_DWT = 3, // vertical only DWT transform + }; + + public: // member functions + param_dfs() { memset(this, 0, sizeof(param_dfs)); } + ~param_dfs() { if (next) delete next; } + void init() { memset(this, 0, sizeof(param_dfs)); } + bool read(infile_base *file); + bool exists() const { return Ldfs != 0; } + + // get_dfs return a dfs structure Sdfs == index, or NULL if not found + const param_dfs* get_dfs(int index) const; + // decomp_level is the decomposition level, starting from 1 for highest + // resolution to num_decomps for the coarsest resolution + dfs_dwt_type get_dwt_type(ui32 decomp_level) const; + int get_subband_idx(ui32 num_decompositions, ui32 resolution, + ui32 subband) const; + + private: // member variables + ui16 Ldfs; // length of the segment marker + ui16 Sdfs; // index of this DFS marker segment + ui8 Ids; // number of elements in Ddfs, 2 bits per sub-level + ui8 Ddfs[8]; // a string defining number of decomposition sub-levels + // 8 bytes should be enough for 32 levels + param_dfs* next; // used for linking other dfs segments + }; + + /////////////////////////////////////////////////////////////////////////// + // + // + // + // + // + /////////////////////////////////////////////////////////////////////////// + struct param_atk + { + // Limitations: + // Arbitrary filters (ARB) are not supported + // Up to 6 steps are supported -- more than 6 are not supported + // Only one coefficient per step -- first order filter + // Only even-indexed subsequence in first reconstruction step, + // m_init = 0 is supported + + public: // data structures used by this object + struct irv_data { + // si8 Oatk; // only for arbitrary filter + // ui8 LCatk; // number of lifting coefficients in a step + float Aatk; // lifting coefficient + }; + + struct rev_data { + // si8 Oatk; // only for arbitrary filter, offset of filter + ui8 Eatk; // only for reversible, epsilon, the power of 2 + si16 Batk; // only for reversible, beta, the additive residue + // ui8 LCatk; // number of lifting coefficients in a step + si16 Aatk; // lifting coefficient + }; + + union data { + irv_data irv; + rev_data rev; + }; + + public: // member functions + param_atk() { init(); } + ~param_atk() { + if (next) delete next; + if (d != NULL && d != d_store) { + delete[] d; + init(false); + } + } + bool read(infile_base *file); + bool read_coefficient(infile_base *file, float &K); + bool read_coefficient(infile_base *file, si16 &K); + void init(bool clear_all = true) { + if (clear_all) + memset(this, 0, sizeof(param_atk)); + d = d_store; max_steps = sizeof(d_store) / sizeof(data); + } + + ui8 get_index() const { return (ui8)(Satk & 0xFF); } + int get_coeff_type() const { return (Satk >> 8) & 0x7; } + bool is_whole_sample() const { return (Satk & 0x800) != 0; } + bool is_reversible() const { return (Satk & 0x1000) != 0; } + bool is_m_init0() const { return (Satk & 0x2000) == 0; } + bool is_using_ws_extension() const { return (Satk & 0x4000) != 0x4000; } + const param_atk* get_atk(int index) const; + const data* get_step(ui32 s) const { assert(s < Natk); return d + s; } + + private: // member variables + ui16 Latk; // structure length + ui16 Satk; // carries a variety of information + float Katk; // only for irreversible scaling factor K + ui8 Natk; // number of lifting steps + data* d; // pointer to data, initialized to d_store + int max_steps; // maximum number of steps without memory allocation + data d_store[6]; // step coefficient + param_atk* next; // used for chaining if more than one atk segment + // exist in the codestream + }; + } // !local namespace +} // !ojph namespace #endif // !OJPH_PARAMS_LOCAL_H diff --git a/src/core/codestream/ojph_subband.cpp b/src/core/codestream/ojph_subband.cpp index fc83bf2b..eb958bfb 100644 --- a/src/core/codestream/ojph_subband.cpp +++ b/src/core/codestream/ojph_subband.cpp @@ -124,11 +124,14 @@ namespace ojph { cur_cb_row = 0; cur_line = 0; cur_cb_height = 0; - param_qcd *qcd = codestream->access_qcd(parent->get_comp_num()); - this->K_max = qcd->get_Kmax(this->res_num, band_num); + param_qcd* qcd = codestream->access_qcd(parent->get_comp_num()); + const param_cod* cod = codestream->get_cod(); + int num_decomps = cod->get_num_decompositions(); + this->K_max = qcd->get_Kmax(NULL, num_decomps, this->res_num, band_num); if (!reversible) { - float d = qcd->irrev_get_delta(res_num, subband_num); + float d = + qcd->irrev_get_delta(NULL, num_decomps, res_num, subband_num); d /= (float)(1u << (31 - this->K_max)); delta = d; delta_inv = (1.0f/d); diff --git a/src/core/common/ojph_version.h b/src/core/common/ojph_version.h index fdf28bc2..ff62f0aa 100644 --- a/src/core/common/ojph_version.h +++ b/src/core/common/ojph_version.h @@ -34,5 +34,5 @@ //***************************************************************************/ #define OPENJPH_VERSION_MAJOR 0 -#define OPENJPH_VERSION_MINOR 10 -#define OPENJPH_VERSION_PATCH 5 +#define OPENJPH_VERSION_MINOR 11 +#define OPENJPH_VERSION_PATCH 0 From 4648f913599bde67b2c4763ddfd357adc68b1124 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 29 Mar 2024 08:04:15 +1100 Subject: [PATCH 02/37] This fixes an issue with the previous commit --- src/core/codestream/ojph_params.cpp | 60 ++++++++++++++--------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index 5243762f..b2b1980e 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -898,8 +898,11 @@ namespace ojph { float arr[] = { 1.0f, 2.0f, 2.0f, 4.0f }; assert((Sqcd & 0x1F) == 2); - ui32 idx = - dfs->get_subband_idx(num_decompositions, resolution, subband); + ui32 idx; + if (dfs != NULL && dfs->exists()) + idx = dfs->get_subband_idx(num_decompositions, resolution, subband); + else + idx = resolution ? (resolution - 1) * 3 + subband : 0; if (idx >= num_subbands) { OJPH_INFO(0x00050101, "Trying to access quantization step size for " "subband %d when the QCD/QCC marker segment specifies " @@ -928,8 +931,11 @@ namespace ojph { ui32 resolution, ui32 subband) const { ui32 num_bits = get_num_guard_bits(); - ui32 idx = - dfs->get_subband_idx(num_decompositions, resolution, subband); + ui32 idx; + if (dfs != NULL && dfs->exists()) + idx = dfs->get_subband_idx(num_decompositions, resolution, subband); + else + idx = resolution ? (resolution - 1) * 3 + subband : 0; if (idx >= num_subbands) { OJPH_INFO(0x00050111, "Trying to access quantization step size for " "subband %d when the QCD/QCC marker segment specifies " @@ -1314,10 +1320,10 @@ namespace ojph { assert(decomp_level > 0 && decomp_level <= Ids); decomp_level = ojph_min(decomp_level, Ids); - ui8 d = decomp_level - 1; // decomp_level starts from 1 - ui8 idx = d >> 2; // complete bytes - ui8 bits = d & 0x3; // bit within the bytes - ui8 val = (Ddfs[idx] >> (6 - 2 * bits)) & 0x3; + ui32 d = decomp_level - 1; // decomp_level starts from 1 + ui32 idx = d >> 2; // complete bytes + ui32 bits = d & 0x3; // bit within the bytes + ui32 val = (Ddfs[idx] >> (6 - 2 * bits)) & 0x3; return (dfs_dwt_type)val; } @@ -1325,33 +1331,23 @@ namespace ojph { int param_dfs::get_subband_idx(ui32 num_decompositions, ui32 resolution, ui32 subband) const { - int idx; - if (this != NULL) - { - assert((resolution == 0 && subband == 0) || - (resolution > 0 && resolution <= Ids && - subband > 0 && subband < 4)); + assert((resolution == 0 && subband == 0) || + (resolution > 0 && resolution <= Ids && + subband > 0 && subband < 4)); - ui32 ns[4] = { 0, 3, 2, 2 }; - ui32 off[4] = {}; + ui32 ns[4] = { 0, 3, 2, 2 }; - idx = 0; - if (resolution > 0) - { - idx = 0; - ui32 i = 1; - for (; i < resolution; ++i) - idx += ns[get_dwt_type(num_decompositions - i + 1)]; - dfs_dwt_type t = get_dwt_type(num_decompositions - i + 1); - idx += subband; - if (t == VERT_DWT && subband == 2) - --idx; - } - } - else + int idx = 0; + if (resolution > 0) { - assert(subband >= 0 && subband < 4); - idx = resolution ? (resolution - 1) * 3 + subband : 0; + idx = 0; + ui32 i = 1; + for (; i < resolution; ++i) + idx += ns[get_dwt_type(num_decompositions - i + 1)]; + dfs_dwt_type t = get_dwt_type(num_decompositions - i + 1); + idx += subband; + if (t == VERT_DWT && subband == 2) + --idx; } return idx; From 1a5925f44c8a4f43d4205885d5bb67ba36d4fdef Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 29 Mar 2024 08:17:51 +1100 Subject: [PATCH 03/37] More fixes --- src/core/codestream/ojph_params.cpp | 48 +++++++++++++++++------------ 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index b2b1980e..8c2169c3 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -1425,36 +1425,46 @@ namespace ojph { K = swap_byte(v); } else if (coeff_type == 2) { // float - if (file->read(&K, 4) != 4) return false; - ui32 t = swap_byte(*(ui32*)&K); - K = *(float*)&t; + union { + float f; + ui32 i; + } v; + if (file->read(&v.i, 4) != 4) return false; + v.i = swap_byte(v.i); + K = v.f; } else if (coeff_type == 3) { // double - double v; - if (file->read(&v, 8) != 8) return false; - ui64 t = swap_byte(*(ui64*)&v); - double u = *(float*)&t; - K = (float)u; + union { + double d; + ui64 i; + } v; + if (file->read(&v.i, 8) != 8) return false; + v.i = swap_byte(v.i); + K = (float)v.d; } else if (coeff_type == 4) { // 128 bit float ui64 v, v1; if (file->read(&v, 8) != 8) return false; - if (file->read(&v1, 8) != 8) return false; // not needed + if (file->read(&v1, 8) != 8) return false; // v1 not needed v = swap_byte(v); + union { + float f; + ui32 i; + } s; // convert the MSB of 128b float to 32b float // 32b float has 1 sign bit, 8 exponent (offset 127), 23 mantissa // 128b float has 1 sign bit, 15 exponent (offset 16383), 112 mantissa - si32 t1 = (si32)((v >> 48) & 0x7FFF); // exponent - t1 -= 16383; - t1 += 127; - t1 = t1 & 0xFF; // removes MSBs if negative - t1 <<= 23; // move bits to their location - ui32 t = 0; - t |= ((ui32)(v >> 32) & 0x80000000); // copy sign bit - t |= t1; // copy exponent - t |= (ui32)((v >> 25) & 0x007FFFFF); // copy 23 mantissa - K = *(float*)&t; + si32 e = (si32)((v >> 48) & 0x7FFF); // exponent + e -= 16383; + e += 127; + e = e & 0xFF; // removes MSBs if negative + e <<= 23; // move bits to their location + s.i = 0; + s.i |= ((ui32)(v >> 32) & 0x80000000); // copy sign bit + s.i |= e; // copy exponent + s.i |= (ui32)((v >> 25) & 0x007FFFFF); // copy 23 mantissa + K = s.f; } return true; } From 2b38785cd1111072f9e7f43a1caa69670bd677a4 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 30 Mar 2024 18:19:02 +1100 Subject: [PATCH 04/37] Implemented COC. Linked ATK to COD/COC. --- src/core/codestream/ojph_codestream_local.cpp | 40 ++-- src/core/codestream/ojph_codestream_local.h | 4 +- src/core/codestream/ojph_params.cpp | 112 +++++++++-- src/core/codestream/ojph_params_local.h | 178 ++++++++++++------ src/core/codestream/ojph_resolution.cpp | 4 +- src/core/codestream/ojph_subband.cpp | 2 +- src/core/codestream/ojph_tile.cpp | 2 +- 7 files changed, 254 insertions(+), 88 deletions(-) diff --git a/src/core/codestream/ojph_codestream_local.cpp b/src/core/codestream/ojph_codestream_local.cpp index c2154fa0..d4d20a38 100644 --- a/src/core/codestream/ojph_codestream_local.cpp +++ b/src/core/codestream/ojph_codestream_local.cpp @@ -758,7 +758,8 @@ namespace ojph { skip_marker(file, "CPF", NULL, OJPH_MSG_LEVEL::NO_MSG, false); else if (marker_idx == 3) { - cod.read(file); received_markers |= 1; + cod.read(file, param_cod::COD_MAIN); + received_markers |= 1; ojph::param_cod c(&cod); int num_qlayers = c.get_num_layers(); if (num_qlayers != 1) @@ -766,21 +767,32 @@ namespace ojph { "1 quality layer only. This codestream has %d quality layers", num_qlayers); } - else if (marker_idx == 4) - skip_marker(file, "COC", "COC is not supported yet", - OJPH_MSG_LEVEL::WARN, false); + else if (marker_idx == 4) + { + ui32 num_comps = siz.get_num_components(); + if (coc == coc_store && + num_comps * sizeof(param_cod) > sizeof(coc_store)) + { + coc = new param_cod[num_comps]; + } + coc[used_coc_fields++].read( + file, param_cod::COC_MAIN, num_comps, &cod); + } else if (marker_idx == 5) - { qcd.read(file); received_markers |= 2; } + { + qcd.read(file); + received_markers |= 2; + } else if (marker_idx == 6) + { + ui32 num_comps = siz.get_num_components(); + if (qcc == qcc_store && + num_comps * sizeof(param_qcc) > sizeof(qcc_store)) { - ui32 num_comps = siz.get_num_components(); - if (qcc == qcc_store && - num_comps * sizeof(param_qcc) > sizeof(qcc_store)) - { - qcc = new param_qcc[num_comps]; - } - qcc[used_qcc_fields++].read(file, num_comps); + qcc = new param_qcc[num_comps]; } + qcc[used_qcc_fields++].read(file, num_comps); + } else if (marker_idx == 7) skip_marker(file, "RGN", "RGN is not supported yet", OJPH_MSG_LEVEL::WARN, false); @@ -816,7 +828,9 @@ namespace ojph { OJPH_ERROR(0x00030051, "File ended before finding a tile segment"); } - //qcd.update(&dfs); + cod.update_atk(&atk); + for (int i = 0; i < used_coc_fields; ++i) + coc[i].update_atk(&atk); if (received_markers != 3) OJPH_ERROR(0x00030052, "markers error, COD and QCD are required"); diff --git a/src/core/codestream/ojph_codestream_local.h b/src/core/codestream/ojph_codestream_local.h index 035b534f..34ffc355 100644 --- a/src/core/codestream/ojph_codestream_local.h +++ b/src/core/codestream/ojph_codestream_local.h @@ -163,8 +163,8 @@ namespace ojph { param_qcc *qcc; // quantization component param_qcc qcc_store[4]; // we allocate 4, we allocate more if needed int used_coc_fields; - param_coc *coc; // coding style component - param_coc coc_store[4]; // we allocate 4, we allocate more if needed + param_cod *coc; // coding style component + param_cod coc_store[4]; // we allocate 4, we allocate more if needed private: // these are from Part 2 of the standard param_dfs dfs; // downsmapling factor styles diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index 8c2169c3..07446c0f 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -275,7 +275,12 @@ namespace ojph { //////////////////////////////////////////////////////////////////////////// bool param_cod::is_reversible() const { - return state->is_reversible(); + if (state->SPcod.wavelet_trans <= 1) + return state->get_wavelet_kern() == local::param_cod::DWT_REV53; + else { + assert(state->atk != NULL); + return state->atk->is_reversible(); + } } //////////////////////////////////////////////////////////////////////////// @@ -604,8 +609,9 @@ namespace ojph { OJPH_ERROR(0x00050043, "error reading SIZ marker"); Rsiz = swap_byte(Rsiz); if ((Rsiz & 0x4000) == 0) - OJPH_ERROR(0x00050044, "Rsiz bit 14 not set (this is not a JPH file)"); - if (Rsiz & 0xBFFF) + OJPH_ERROR(0x00050044, + "Rsiz bit 14 is not set (this is not a JPH file)"); + if ((Rsiz & 0x8000) != 0 && (Rsiz & 0xF5F) != 0) OJPH_WARN(0x00050001, "Rsiz in SIZ has unimplemented fields"); if (file->read(&Xsiz, 4) != 4) OJPH_ERROR(0x00050045, "error reading SIZ marker"); @@ -652,6 +658,9 @@ namespace ojph { if (file->read(&cptr[c].YRsiz, 1) != 1) OJPH_ERROR(0x00050053, "error reading SIZ marker"); } + + ws_kern_support_needed = (Rsiz & 0x20) != 0; + dfs_support_needed = (Rsiz & 0x80) != 0; } ////////////////////////////////////////////////////////////////////////// @@ -720,6 +729,8 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// bool param_cod::write(outfile_base *file) { + assert(type == COD_MAIN); + //marker size excluding header Lcod = 12; Lcod = (ui16)(Lcod + (Scod & 1 ? 1 + SPcod.num_decomp : 0)); @@ -758,37 +769,106 @@ namespace ojph { } ////////////////////////////////////////////////////////////////////////// - void param_cod::read(infile_base *file) + void param_cod::read(infile_base *file, param_cod::cod_type type) { + assert(this->type == UNDEFINED); + assert(type == COD_MAIN); + + this->type = type; if (file->read(&Lcod, 2) != 2) - OJPH_ERROR(0x00050071, "error reading COD marker"); + OJPH_ERROR(0x00050071, "error reading COD segment"); Lcod = swap_byte(Lcod); if (file->read(&Scod, 1) != 1) - OJPH_ERROR(0x00050072, "error reading COD marker"); + OJPH_ERROR(0x00050072, "error reading COD segment"); if (file->read(&SGCod.prog_order, 1) != 1) - OJPH_ERROR(0x00050073, "error reading COD marker"); + OJPH_ERROR(0x00050073, "error reading COD segment"); if (file->read(&SGCod.num_layers, 2) != 2) - { OJPH_ERROR(0x00050074, "error reading COD marker"); } + { OJPH_ERROR(0x00050074, "error reading COD segment"); } else SGCod.num_layers = swap_byte(SGCod.num_layers); if (file->read(&SGCod.mc_trans, 1) != 1) - OJPH_ERROR(0x00050075, "error reading COD marker"); + OJPH_ERROR(0x00050075, "error reading COD segment"); if (file->read(&SPcod.num_decomp, 1) != 1) - OJPH_ERROR(0x00050076, "error reading COD marker"); + OJPH_ERROR(0x00050076, "error reading COD segment"); if (file->read(&SPcod.block_width, 1) != 1) - OJPH_ERROR(0x00050077, "error reading COD marker"); + OJPH_ERROR(0x00050077, "error reading COD segment"); if (file->read(&SPcod.block_height, 1) != 1) - OJPH_ERROR(0x00050078, "error reading COD marker"); + OJPH_ERROR(0x00050078, "error reading COD segment"); if (file->read(&SPcod.block_style, 1) != 1) - OJPH_ERROR(0x00050079, "error reading COD marker"); + OJPH_ERROR(0x00050079, "error reading COD segment"); if (file->read(&SPcod.wavelet_trans, 1) != 1) - OJPH_ERROR(0x0005007A, "error reading COD marker"); + OJPH_ERROR(0x0005007A, "error reading COD segment"); if (Scod & 1) for (int i = 0; i <= SPcod.num_decomp; ++i) if (file->read(&SPcod.precinct_size[i], 1) != 1) - OJPH_ERROR(0x0005007B, "error reading COD marker"); + OJPH_ERROR(0x0005007B, "error reading COD segment"); if (Lcod != 12 + ((Scod & 1) ? 1 + SPcod.num_decomp : 0)) - OJPH_ERROR(0x0005007C, "error in COD marker length"); + OJPH_ERROR(0x0005007C, "error in COD segment length"); + } + + ////////////////////////////////////////////////////////////////////////// + void param_cod::read(infile_base* file, param_cod::cod_type type, + ui32 num_comps, param_cod *cod) + { + assert(this->type == UNDEFINED); + assert(type == COC_MAIN); + assert(cod != NULL); + + this->type = type; + this->SGCod = cod->SGCod; + this->parent = cod; + if (file->read(&Lcod, 2) != 2) + OJPH_ERROR(0x00050121, "error reading COC segment"); + Lcod = swap_byte(Lcod); + if (num_comps < 257) { + ui8 t; + if (file->read(&t, 1) != 1) + OJPH_ERROR(0x00050122, "error reading COC segment"); + comp_idx = t; + } + else { + if (file->read(&comp_idx, 2) != 2) + OJPH_ERROR(0x00050123, "error reading COC segment"); + comp_idx = swap_byte(comp_idx); + } + if (file->read(&Scod, 1) != 1) + OJPH_ERROR(0x00050124, "error reading COC segment"); + if (Scod & 0xF8) + OJPH_WARN(0x00050011, + "Unsupported options in Scoc field of the COC segment"); + if (file->read(&SPcod.num_decomp, 1) != 1) + OJPH_ERROR(0x00050125, "error reading COC segment"); + if (file->read(&SPcod.block_width, 1) != 1) + OJPH_ERROR(0x00050126, "error reading COC segment"); + if (file->read(&SPcod.block_height, 1) != 1) + OJPH_ERROR(0x00050127, "error reading COC segment"); + if (file->read(&SPcod.block_style, 1) != 1) + OJPH_ERROR(0x00050128, "error reading COC segment"); + if (file->read(&SPcod.wavelet_trans, 1) != 1) + OJPH_ERROR(0x00050129, "error reading COC segment"); + if (Scod & 1) + for (int i = 0; i <= get_num_decompositions(); ++i) + if (file->read(&SPcod.precinct_size[i], 1) != 1) + OJPH_ERROR(0x0005012A, "error reading COC segment"); + ui16 t = 9; + t += num_comps < 257 ? 0 : 1; + t += (Scod & 1) ? 1 + get_num_decompositions() : 0; + if (Lcod != t) + OJPH_ERROR(0x0005012B, "error in COC segment length"); + } + + ////////////////////////////////////////////////////////////////////////// + void param_cod::update_atk(const param_atk* atk) + { + if (SPcod.wavelet_trans > 1) { + this->atk = atk->get_atk(SPcod.wavelet_trans); + if (this->atk == NULL) + OJPH_ERROR(0x00050131, "A COD/COC segment employs the DWT kernel " + "atk=%d, but a corresponding ATK segment cannot be found", + SPcod.wavelet_trans); + } + else + this->atk = NULL; } ////////////////////////////////////////////////////////////////////////// diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index acfd0347..91447f15 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -112,7 +112,6 @@ namespace ojph { struct param_cap; struct param_sot; struct param_tlm; - struct param_coc; struct param_dfs; struct param_atk; @@ -173,6 +172,7 @@ namespace ojph { cptr = store; old_Csiz = 4; Rsiz = 0x4000; //for jph, bit 14 of Rsiz is 1 + ws_kern_support_needed = dfs_support_needed = false; } ~param_siz() @@ -270,6 +270,8 @@ namespace ojph { ui32 t = ojph_div_ceil(Ysiz, ds) - ojph_div_ceil(YOsiz, ds); return t; } + bool is_ws_kern_support_needed() { return ws_kern_support_needed; } + bool is_dfs_support_needed() { return dfs_support_needed; } private: ui16 Lsiz; @@ -289,6 +291,8 @@ namespace ojph { ui32 skipped_resolutions; int old_Csiz; siz_comp_info store[4]; + bool ws_kern_support_needed; + bool dfs_support_needed; param_siz(const param_siz&) = delete; //prevent copy constructor param_siz& operator=(const param_siz&) = delete; //prevent copy }; @@ -308,10 +312,18 @@ namespace ojph { ui8 block_style; ui8 wavelet_trans; ui8 precinct_size[33]; //num_decomp is in [0,32] - }; - /////////////////////////////////////////////////////////////////////////// - typedef cod_SPcod cod_SPcoc; + size get_log_block_dims() const + { return size(block_width + 2, block_height + 2); } + size get_block_dims() const + { size t = get_log_block_dims(); return size(1 << t.w, 1 << t.h); } + size get_log_precinct_size(ui32 res_num) const + { + assert(res_num <= num_decomp); + size ps(precinct_size[res_num] & 0xF, precinct_size[res_num] >> 4); + return ps; + } + }; /////////////////////////////////////////////////////////////////////////// struct cod_SGcod @@ -324,38 +336,65 @@ namespace ojph { /////////////////////////////////////////////////////////////////////////// struct param_cod { + // serves for both COD and COC markers + friend ::ojph::param_cod; + //////////////////////////////////////// enum BLOCK_CODING_STYLES { VERT_CAUSAL_MODE = 0x8, HT_MODE = 0x40 }; - public: + //////////////////////////////////////// + enum cod_type : ui8 { + UNDEFINED = 0, + COD_MAIN = 1, + COC_MAIN = 2, + COD_TILE = 3, + COC_TILE = 4 + }; + //////////////////////////////////////// + enum dwt_type : ui8 { + DWT_IRV97 = 0, + DWT_REV53 = 1, + }; + + public: // COD_MAIN and COC_MAIN common functions + //////////////////////////////////////// param_cod() { memset(this, 0, sizeof(param_cod)); SPcod.block_style = HT_MODE; - SGCod.prog_order = 2; + SGCod.prog_order = OJPH_PO_RPCL; SGCod.num_layers = 1; SGCod.mc_trans = 0; SPcod.num_decomp = 5; SPcod.block_width = 4; //64 SPcod.block_height = 4; //64 - set_reversible(false); } + //////////////////////////////////////// void set_reversible(bool reversible) { - SPcod.wavelet_trans = reversible ? 1 : 0; + assert(type == UNDEFINED || type == COD_MAIN); + type = COD_MAIN; + SPcod.wavelet_trans = reversible ? DWT_REV53 : DWT_IRV97; } + //////////////////////////////////////// void employ_color_transform(ui8 val) { assert(val == 0 || val == 1); + assert(type == UNDEFINED || type == COD_MAIN); + type = COD_MAIN; SGCod.mc_trans = val; } + //////////////////////////////////////// void check_validity(const param_siz& siz) { + assert(type == UNDEFINED || type == COD_MAIN); + type = COD_MAIN; + //check that colour transform and match number of components and // downsampling int num_comps = siz.get_num_components(); @@ -393,50 +432,97 @@ namespace ojph { } } + //////////////////////////////////////// ui8 get_num_decompositions() const - { return SPcod.num_decomp; } - size get_block_dims() const { - return size(1 << (SPcod.block_width + 2), - 1 << (SPcod.block_height + 2)); + if (type == COD_MAIN) + return SPcod.num_decomp; + else if (type == COC_MAIN) + { + if (is_dfs_defined()) + return parent->get_num_decompositions(); + else + return SPcod.num_decomp; + } + else { + assert(0); + return 0; // just in case + } } - bool is_reversible() const - { return (SPcod.wavelet_trans == 1); } + + //////////////////////////////////////// + size get_block_dims() const + { return SPcod.get_block_dims(); } + + //////////////////////////////////////// + size get_log_block_dims() const + { return SPcod.get_log_block_dims(); } + + //////////////////////////////////////// + ui8 get_wavelet_kern() const + { return SPcod.wavelet_trans; } + + //////////////////////////////////////// bool is_employing_color_transform() const { return (SGCod.mc_trans == 1); } - size get_log_block_dims() const - { return size(SPcod.block_width + 2, SPcod.block_height + 2); } + + //////////////////////////////////////// size get_precinct_size(ui32 res_num) const { size t = get_log_precinct_size(res_num); - t.w = 1 << t.w; - t.h = 1 << t.h; - return t; + return size(1 << t.w, 1 << t.h); } + + //////////////////////////////////////// size get_log_precinct_size(ui32 res_num) const - { - assert(res_num <= SPcod.num_decomp); - size ps(15, 15); + { if (Scod & 1) - { - ps.w = SPcod.precinct_size[res_num] & 0xF; - ps.h = SPcod.precinct_size[res_num] >> 4; - } - return ps; + return SPcod.get_log_precinct_size(res_num); + else + return size(15, 15); } + + //////////////////////////////////////// bool packets_may_use_sop() const { return (Scod & 2) == 2; } + + //////////////////////////////////////// bool packets_use_eph() const { return (Scod & 4) == 4; } + //////////////////////////////////////// bool write(outfile_base *file); - void read(infile_base *file); - private: - ui16 Lcod; - ui8 Scod; - cod_SGcod SGCod; - cod_SPcod SPcod; + //////////////////////////////////////// + void read(infile_base *file, cod_type type); + + //////////////////////////////////////// + void read(infile_base* file, cod_type type, ui32 num_comps, + param_cod* cod); + + //////////////////////////////////////// + void update_atk(const param_atk* atk); + + public: // COC_MAIN only functions + //////////////////////////////////////// + bool is_dfs_defined() const + { return (SPcod.num_decomp & 0x80) != 0; } + + //////////////////////////////////////// + ui16 get_dfs_index() const // cannot be more than 15 + { return SPcod.num_decomp & 0xF; } + + private: // Common variables + cod_type type; // The type of this cod structure + ui16 Lcod; // serves as Lcod and Scod + ui8 Scod; // serves as Scod and Scoc + cod_SGcod SGCod; // Used in COD and copied to COC + cod_SPcod SPcod; // serves as SPcod and SPcoc + + private: // COC only variables + param_cod* parent; // parent COD structure + ui16 comp_idx; // component index of this COC structure + const param_atk* atk; // useful when SPcod.wavelet_trans > 1 }; /////////////////////////////////////////////////////////////////////////// @@ -452,11 +538,7 @@ namespace ojph { public: param_qcd() { - Lqcd = 0; - Sqcd = 0; - for (int i = 0; i < 97; ++i) - u16_SPqcd[i] = 0; - num_subbands = 0; + memset(this, 0, sizeof(param_qcd)); base_delta = -1.0f; } @@ -466,7 +548,7 @@ namespace ojph { { int num_decomps = cod.get_num_decompositions(); num_subbands = 1 + 3 * num_decomps; - if (cod.is_reversible()) + if (cod.get_wavelet_kern() == param_cod::DWT_REV53) { ui32 bit_depth = 0; for (ui32 i = 0; i < siz.get_num_components(); ++i) @@ -474,7 +556,7 @@ namespace ojph { set_rev_quant(num_decomps, bit_depth, cod.is_employing_color_transform()); } - else + else if (cod.get_wavelet_kern() == param_cod::DWT_IRV97) { if (base_delta == -1.0f) { ui32 bit_depth = 0; @@ -485,6 +567,8 @@ namespace ojph { } set_irrev_quant(num_decomps); } + else + assert(0); } ui32 get_num_guard_bits() const; ui32 get_MAGBp() const; @@ -554,7 +638,7 @@ namespace ojph { void check_validity(const param_cod& cod, const param_qcd& qcd) { - if (cod.is_reversible()) + if (cod.get_wavelet_kern() == param_cod::DWT_REV53) Ccap[0] &= 0xFFDF; else Ccap[0] |= 0x0020; @@ -650,18 +734,6 @@ namespace ojph { ui32 next_pair_index; }; - /////////////////////////////////////////////////////////////////////////// - // - // - // - // - // - /////////////////////////////////////////////////////////////////////////// - struct param_coc : public param_cod - { - - }; - /////////////////////////////////////////////////////////////////////////// // // diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index 82371bd7..0cc7e3b9 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -166,7 +166,7 @@ namespace ojph { //allocate lines if (skipped_res_for_recon == false) { - bool reversible = cdp->is_reversible(); + bool reversible = (cdp->get_wavelet_kern() == param_cod::DWT_REV53); ui32 num_lines = reversible ? 4 : 6; allocator->pre_alloc_obj(num_lines); @@ -321,7 +321,7 @@ namespace ojph { //allocate lines if (skipped_res_for_recon == false) { - this->reversible = cdp->is_reversible(); + this->reversible = cdp->get_wavelet_kern() == param_cod::DWT_REV53; this->num_lines = this->reversible ? 4 : 6; lines = allocator->post_alloc_obj(num_lines); diff --git a/src/core/codestream/ojph_subband.cpp b/src/core/codestream/ojph_subband.cpp index eb958bfb..ba6c5b96 100644 --- a/src/core/codestream/ojph_subband.cpp +++ b/src/core/codestream/ojph_subband.cpp @@ -112,7 +112,7 @@ namespace ojph { this->parent = res; const param_cod* cdp = codestream->get_cod(); - this->reversible = cdp->is_reversible(); + this->reversible = cdp->get_wavelet_kern() == param_cod::DWT_REV53; size log_cb = cdp->get_log_block_dims(); log_PP = cdp->get_log_precinct_size(res_num); diff --git a/src/core/codestream/ojph_tile.cpp b/src/core/codestream/ojph_tile.cpp index 0ad4acd3..38bcd686 100644 --- a/src/core/codestream/ojph_tile.cpp +++ b/src/core/codestream/ojph_tile.cpp @@ -214,7 +214,7 @@ namespace ojph { //allocate lines const param_cod* cdp = codestream->get_cod(); - this->reversible = cdp->is_reversible(); + this->reversible = cdp->get_wavelet_kern() == param_cod::DWT_REV53; this->employ_color_transform = cdp->is_employing_color_transform(); if (this->employ_color_transform) { From 0363e40896ed45f8ce826d50053ae2fe17e448d5 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 30 Mar 2024 18:24:20 +1100 Subject: [PATCH 05/37] Warning fix. --- src/core/codestream/ojph_params.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index 07446c0f..5a76f24c 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -850,7 +850,7 @@ namespace ojph { for (int i = 0; i <= get_num_decompositions(); ++i) if (file->read(&SPcod.precinct_size[i], 1) != 1) OJPH_ERROR(0x0005012A, "error reading COC segment"); - ui16 t = 9; + ui32 t = 9; t += num_comps < 257 ? 0 : 1; t += (Scod & 1) ? 1 + get_num_decompositions() : 0; if (Lcod != t) From 02f6967ecbb9472227f0154ef93b6ce65e3266a5 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sun, 31 Mar 2024 13:38:21 +1100 Subject: [PATCH 06/37] More changes to param_atk. --- src/apps/ojph_compress/ojph_compress.cpp | 98 +++++++++---------- src/core/codestream/ojph_codestream_local.cpp | 15 ++- src/core/codestream/ojph_codestream_local.h | 5 +- src/core/codestream/ojph_params.cpp | 58 ++++++++--- src/core/codestream/ojph_params_local.h | 16 ++- src/core/codestream/ojph_resolution.cpp | 4 +- src/core/codestream/ojph_subband.cpp | 2 +- src/core/codestream/ojph_tile.cpp | 2 +- 8 files changed, 127 insertions(+), 73 deletions(-) diff --git a/src/apps/ojph_compress/ojph_compress.cpp b/src/apps/ojph_compress/ojph_compress.cpp index 42befaff..0ee86f7f 100644 --- a/src/apps/ojph_compress/ojph_compress.cpp +++ b/src/apps/ojph_compress/ojph_compress.cpp @@ -917,55 +917,55 @@ int main(int argc, char * argv[]) { } else if (is_matching(".dpx", v)) { - dpx.open(input_filename); - ojph::param_siz siz = codestream.access_siz(); - siz.set_image_extent(ojph::point(image_offset.x + dpx.get_size().w, - image_offset.y + dpx.get_size().h)); - ojph::ui32 num_comps = dpx.get_num_components(); - siz.set_num_components(num_comps); - //if (num_bit_depths > 0) - // dpx.set_bit_depth(num_bit_depths, bit_depth); - for (ojph::ui32 c = 0; c < num_comps; ++c) - siz.set_component(c, dpx.get_comp_subsampling(c), - dpx.get_bit_depth(c), dpx.get_is_signed(c)); - siz.set_image_offset(image_offset); - siz.set_tile_size(tile_size); - siz.set_tile_offset(tile_offset); - - ojph::param_cod cod = codestream.access_cod(); - cod.set_num_decomposition(num_decompositions); - cod.set_block_dims(block_size.w, block_size.h); - if (num_precincts != -1) - cod.set_precinct_size(num_precincts, precinct_size); - cod.set_progression_order(prog_order); - if (employ_color_transform == -1 && num_comps >= 3) - cod.set_color_transform(true); - else - cod.set_color_transform(employ_color_transform == 1); - cod.set_reversible(reversible); - if (!reversible && quantization_step != -1) - codestream.access_qcd().set_irrev_quant(quantization_step); - codestream.set_planar(false); - if (profile_string[0] != '\0') - codestream.set_profile(profile_string); - codestream.set_tilepart_divisions(tileparts_at_resolutions, - tileparts_at_components); - codestream.request_tlm_marker(tlm_marker); - - if (dims.w != 0 || dims.h != 0) - OJPH_WARN(0x01000071, - "-dims option is not needed and was not used\n"); - if (num_components != 0) - OJPH_WARN(0x01000072, - "-num_comps is not needed and was not used\n"); - if (is_signed[0] != -1) - OJPH_WARN(0x01000073, - "-signed is not needed and was not used\n"); - if (comp_downsampling[0].x != 0 || comp_downsampling[0].y != 0) - OJPH_WARN(0x01000075, - "-downsamp is not needed and was not used\n"); - - base = &dpx; + dpx.open(input_filename); + ojph::param_siz siz = codestream.access_siz(); + siz.set_image_extent(ojph::point(image_offset.x + dpx.get_size().w, + image_offset.y + dpx.get_size().h)); + ojph::ui32 num_comps = dpx.get_num_components(); + siz.set_num_components(num_comps); + //if (num_bit_depths > 0) + // dpx.set_bit_depth(num_bit_depths, bit_depth); + for (ojph::ui32 c = 0; c < num_comps; ++c) + siz.set_component(c, dpx.get_comp_subsampling(c), + dpx.get_bit_depth(c), dpx.get_is_signed(c)); + siz.set_image_offset(image_offset); + siz.set_tile_size(tile_size); + siz.set_tile_offset(tile_offset); + + ojph::param_cod cod = codestream.access_cod(); + cod.set_num_decomposition(num_decompositions); + cod.set_block_dims(block_size.w, block_size.h); + if (num_precincts != -1) + cod.set_precinct_size(num_precincts, precinct_size); + cod.set_progression_order(prog_order); + if (employ_color_transform == -1 && num_comps >= 3) + cod.set_color_transform(true); + else + cod.set_color_transform(employ_color_transform == 1); + cod.set_reversible(reversible); + if (!reversible && quantization_step != -1) + codestream.access_qcd().set_irrev_quant(quantization_step); + codestream.set_planar(false); + if (profile_string[0] != '\0') + codestream.set_profile(profile_string); + codestream.set_tilepart_divisions(tileparts_at_resolutions, + tileparts_at_components); + codestream.request_tlm_marker(tlm_marker); + + if (dims.w != 0 || dims.h != 0) + OJPH_WARN(0x01000071, + "-dims option is not needed and was not used\n"); + if (num_components != 0) + OJPH_WARN(0x01000072, + "-num_comps is not needed and was not used\n"); + if (is_signed[0] != -1) + OJPH_WARN(0x01000073, + "-signed is not needed and was not used\n"); + if (comp_downsampling[0].x != 0 || comp_downsampling[0].y != 0) + OJPH_WARN(0x01000075, + "-downsamp is not needed and was not used\n"); + + base = &dpx; } else #if defined( OJPH_ENABLE_TIFF_SUPPORT) diff --git a/src/core/codestream/ojph_codestream_local.cpp b/src/core/codestream/ojph_codestream_local.cpp index d4d20a38..737daffb 100644 --- a/src/core/codestream/ojph_codestream_local.cpp +++ b/src/core/codestream/ojph_codestream_local.cpp @@ -84,6 +84,12 @@ namespace ojph { used_coc_fields = 0; coc = coc_store; + atk = atk_store; + atk[0].init_irv97(); + atk[0].link(atk_store + 1); + atk[1].init_rev53(); + atk[1].link(atk_store + 2); + allocator = new mem_fixed_allocator; elastic_alloc = new mem_elastic_allocator(1048576); //1 megabyte @@ -557,7 +563,8 @@ namespace ojph { { //finalize siz.check_validity(); - cod.check_validity(siz); + cod.check_validity(siz); + cod.update_atk(atk); qcd.check_validity(siz, cod); cap.check_validity(cod, qcd); if (profile == OJPH_PN_IMF) @@ -821,16 +828,16 @@ namespace ojph { else if (marker_idx == 14) dfs.read(file); else if (marker_idx == 15) - atk.read(file); + atk[2].read(file); else if (marker_idx == 16) break; else OJPH_ERROR(0x00030051, "File ended before finding a tile segment"); } - cod.update_atk(&atk); + cod.update_atk(atk); for (int i = 0; i < used_coc_fields; ++i) - coc[i].update_atk(&atk); + coc[i].update_atk(atk); if (received_markers != 3) OJPH_ERROR(0x00030052, "markers error, COD and QCD are required"); diff --git a/src/core/codestream/ojph_codestream_local.h b/src/core/codestream/ojph_codestream_local.h index 34ffc355..5bfa09d4 100644 --- a/src/core/codestream/ojph_codestream_local.h +++ b/src/core/codestream/ojph_codestream_local.h @@ -168,7 +168,10 @@ namespace ojph { private: // these are from Part 2 of the standard param_dfs dfs; // downsmapling factor styles - param_atk atk; // arbitrary transformation kernels + param_atk* atk; // a pointer to atk + param_atk atk_store[3];// 0 and 1 are for DWT from Part 1, 2 onward are + // for arbitrary transformation kernels + private: mem_fixed_allocator *allocator; diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index 5a76f24c..1735c819 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -279,7 +279,7 @@ namespace ojph { return state->get_wavelet_kern() == local::param_cod::DWT_REV53; else { assert(state->atk != NULL); - return state->atk->is_reversible(); + return state->access_atk()->is_reversible(); } } @@ -1578,6 +1578,7 @@ namespace ojph { while (p->next != NULL) p = p->next; p->next = new param_atk; + p->alloced_next = true; p = p->next; return p->read(file); } @@ -1598,11 +1599,14 @@ namespace ojph { if (is_reversible() && get_coeff_type() >= 2) // reversible & float OJPH_ERROR(0x000500E5, "ATK-Satk parameter does not make sense. " "It employs floats with reversible filtering."); + if (is_using_ws_extension() == false) // only sym. ext is supported + OJPH_ERROR(0x000500E6, "ATK-Satk parameter requires constant " + "boundary extension, which is not supported yet."); if (is_reversible() == false) if (read_coefficient(file, Katk) == false) - OJPH_ERROR(0x000500E6, "error reading ATK-Katk parameter"); + OJPH_ERROR(0x000500E7, "error reading ATK-Katk parameter"); if (file->read(&Natk, 1) != 1) - OJPH_ERROR(0x000500E7, "error reading ATK-Natk parameter"); + OJPH_ERROR(0x000500E8, "error reading ATK-Natk parameter"); if (Natk > max_steps) { if (d != d_store) // was this allocated -- very unlikely delete[] d; @@ -1615,21 +1619,21 @@ namespace ojph { for (int s = 0; s < Natk; ++s) { if (file->read(&d[s].rev.Eatk, 1) != 1) - OJPH_ERROR(0x000500E8, "error reading ATK-Eatk parameter"); + OJPH_ERROR(0x000500E9, "error reading ATK-Eatk parameter"); if (file->read(&d[s].rev.Batk, 2) != 2) - OJPH_ERROR(0x000500E9, "error reading ATK-Batk parameter"); + OJPH_ERROR(0x000500EA, "error reading ATK-Batk parameter"); d[s].rev.Batk = (si16)swap_byte((ui16)d[s].rev.Batk); ui8 LCatk; if (file->read(&LCatk, 1) != 1) - OJPH_ERROR(0x000500EA, "error reading ATK-LCatk parameter"); + OJPH_ERROR(0x000500EB, "error reading ATK-LCatk parameter"); if (LCatk == 0) - OJPH_ERROR(0x000500EB, "Encountered a ATK-LCatk value of zero; " + OJPH_ERROR(0x000500EC, "Encountered a ATK-LCatk value of zero; " "something is wrong."); if (LCatk > 1) - OJPH_ERROR(0x000500EC, "ATK-LCatk value greater than 1; " + OJPH_ERROR(0x000500ED, "ATK-LCatk value greater than 1; " "that is, a multitap filter is not supported"); if (read_coefficient(file, d[s].rev.Aatk) == false) - OJPH_ERROR(0x000500ED, "Error reding ATK-Aatk parameter"); + OJPH_ERROR(0x000500EE, "Error reding ATK-Aatk parameter"); } } else @@ -1638,19 +1642,47 @@ namespace ojph { { ui8 LCatk; if (file->read(&LCatk, 1) != 1) - OJPH_ERROR(0x000500EE, "error reading ATK-LCatk parameter"); + OJPH_ERROR(0x000500EF, "error reading ATK-LCatk parameter"); if (LCatk == 0) - OJPH_ERROR(0x000500EF, "Encountered a ATK-LCatk value of zero; " + OJPH_ERROR(0x000500F0, "Encountered a ATK-LCatk value of zero; " "something is wrong."); if (LCatk > 1) - OJPH_ERROR(0x000500F0, "ATK-LCatk value greater than 1; " + OJPH_ERROR(0x000500F1, "ATK-LCatk value greater than 1; " "that is, a multitap filter is not supported."); if (read_coefficient(file, d[s].irv.Aatk) == false) - OJPH_ERROR(0x000500F1, "Error reding ATK-Aatk parameter"); + OJPH_ERROR(0x000500F2, "Error reding ATK-Aatk parameter"); } } return true; } + + ////////////////////////////////////////////////////////////////////////// + void param_atk::init_irv97() + { + Satk = 0x4a00; // illegal because ATK = 0 + Katk = (float)1.230174104914001; + Natk = 4; + Latk = 5 + Natk + sizeof(float) * (1 + Natk); // (A-4) in T.801 + d[0].irv.Aatk = (float)-1.586134342059924; + d[1].irv.Aatk = (float)-0.052980118572961; + d[2].irv.Aatk = (float)0.882911075530934; + d[3].irv.Aatk = (float)0.443506852043971; + } + + ////////////////////////////////////////////////////////////////////////// + void param_atk::init_rev53() + { + Satk = 0x5801; // illegal because ATK = 1 + Natk = 2; + Latk = 5 + 2 * Natk + sizeof(ui8) * (Natk + Natk); // (A-4) in T.801 + d[0].rev.Aatk = -1; + d[0].rev.Batk = 0; + d[0].rev.Eatk = 1; + d[1].rev.Aatk = 1; + d[1].rev.Batk = 2; + d[1].rev.Eatk = 2; + } + } // !local namespace } // !ojph namespace diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index 91447f15..59425da3 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -503,6 +503,9 @@ namespace ojph { //////////////////////////////////////// void update_atk(const param_atk* atk); + //////////////////////////////////////// + const param_atk* access_atk() const { return atk; } + public: // COC_MAIN only functions //////////////////////////////////////// bool is_dfs_defined() const @@ -814,7 +817,10 @@ namespace ojph { public: // member functions param_atk() { init(); } ~param_atk() { - if (next) delete next; + if (next && alloced_next) { + delete next; + next = NULL; + } if (d != NULL && d != d_store) { delete[] d; init(false); @@ -828,13 +834,17 @@ namespace ojph { memset(this, 0, sizeof(param_atk)); d = d_store; max_steps = sizeof(d_store) / sizeof(data); } + void init_irv97(); + void init_rev53(); + void link(param_atk* next) + { assert(this->next == NULL); this->next = next; alloced_next = false; } ui8 get_index() const { return (ui8)(Satk & 0xFF); } int get_coeff_type() const { return (Satk >> 8) & 0x7; } bool is_whole_sample() const { return (Satk & 0x800) != 0; } bool is_reversible() const { return (Satk & 0x1000) != 0; } bool is_m_init0() const { return (Satk & 0x2000) == 0; } - bool is_using_ws_extension() const { return (Satk & 0x4000) != 0x4000; } + bool is_using_ws_extension() const { return (Satk & 0x4000) != 0; } const param_atk* get_atk(int index) const; const data* get_step(ui32 s) const { assert(s < Natk); return d + s; } @@ -848,6 +858,8 @@ namespace ojph { data d_store[6]; // step coefficient param_atk* next; // used for chaining if more than one atk segment // exist in the codestream + bool alloced_next; // true if next was allocated, not just set to an + // existing object }; } // !local namespace } // !ojph namespace diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index 0cc7e3b9..03d1278d 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -166,7 +166,7 @@ namespace ojph { //allocate lines if (skipped_res_for_recon == false) { - bool reversible = (cdp->get_wavelet_kern() == param_cod::DWT_REV53); + bool reversible = cdp->access_atk()->is_reversible(); ui32 num_lines = reversible ? 4 : 6; allocator->pre_alloc_obj(num_lines); @@ -321,7 +321,7 @@ namespace ojph { //allocate lines if (skipped_res_for_recon == false) { - this->reversible = cdp->get_wavelet_kern() == param_cod::DWT_REV53; + this->reversible = cdp->access_atk()->is_reversible(); this->num_lines = this->reversible ? 4 : 6; lines = allocator->post_alloc_obj(num_lines); diff --git a/src/core/codestream/ojph_subband.cpp b/src/core/codestream/ojph_subband.cpp index ba6c5b96..c65a2ebb 100644 --- a/src/core/codestream/ojph_subband.cpp +++ b/src/core/codestream/ojph_subband.cpp @@ -112,7 +112,7 @@ namespace ojph { this->parent = res; const param_cod* cdp = codestream->get_cod(); - this->reversible = cdp->get_wavelet_kern() == param_cod::DWT_REV53; + this->reversible = cdp->access_atk()->is_reversible(); size log_cb = cdp->get_log_block_dims(); log_PP = cdp->get_log_precinct_size(res_num); diff --git a/src/core/codestream/ojph_tile.cpp b/src/core/codestream/ojph_tile.cpp index 38bcd686..b7cb52cd 100644 --- a/src/core/codestream/ojph_tile.cpp +++ b/src/core/codestream/ojph_tile.cpp @@ -214,7 +214,7 @@ namespace ojph { //allocate lines const param_cod* cdp = codestream->get_cod(); - this->reversible = cdp->get_wavelet_kern() == param_cod::DWT_REV53; + this->reversible = cdp->access_atk()->is_reversible(); this->employ_color_transform = cdp->is_employing_color_transform(); if (this->employ_color_transform) { From 32f17b5d4d9a95f202b52061c899d2b25fa926a6 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sun, 31 Mar 2024 13:55:11 +1100 Subject: [PATCH 07/37] A bug fix. --- src/core/codestream/ojph_params.cpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index 1735c819..67c8fad3 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -860,15 +860,11 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// void param_cod::update_atk(const param_atk* atk) { - if (SPcod.wavelet_trans > 1) { - this->atk = atk->get_atk(SPcod.wavelet_trans); - if (this->atk == NULL) - OJPH_ERROR(0x00050131, "A COD/COC segment employs the DWT kernel " - "atk=%d, but a corresponding ATK segment cannot be found", - SPcod.wavelet_trans); - } - else - this->atk = NULL; + this->atk = atk->get_atk(SPcod.wavelet_trans); + if (this->atk == NULL) + OJPH_ERROR(0x00050131, "A COD/COC segment employs the DWT kernel " + "atk=%d, but a corresponding ATK segment cannot be found", + SPcod.wavelet_trans); } ////////////////////////////////////////////////////////////////////////// @@ -1663,7 +1659,8 @@ namespace ojph { Satk = 0x4a00; // illegal because ATK = 0 Katk = (float)1.230174104914001; Natk = 4; - Latk = 5 + Natk + sizeof(float) * (1 + Natk); // (A-4) in T.801 + // next is (A-4) in T.801 second line + Latk = (ui16)(5 + Natk + sizeof(float) * (1 + Natk)); d[0].irv.Aatk = (float)-1.586134342059924; d[1].irv.Aatk = (float)-0.052980118572961; d[2].irv.Aatk = (float)0.882911075530934; @@ -1675,7 +1672,8 @@ namespace ojph { { Satk = 0x5801; // illegal because ATK = 1 Natk = 2; - Latk = 5 + 2 * Natk + sizeof(ui8) * (Natk + Natk); // (A-4) in T.801 + // next is (A-4) in T.801 fourth line + Latk = (ui16)(5 + 2 * Natk + sizeof(ui8) * (Natk + Natk)); d[0].rev.Aatk = -1; d[0].rev.Batk = 0; d[0].rev.Eatk = 1; From abe4ccf67b05eb1356261fed35965300cde13c47 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sun, 31 Mar 2024 14:06:58 +1100 Subject: [PATCH 08/37] Remove warnings. --- src/core/codestream/ojph_params.cpp | 12 ++++++------ src/core/codestream/ojph_params_local.h | 15 +++++++++------ src/core/codestream/ojph_subband.cpp | 2 +- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index 67c8fad3..affa222a 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -876,7 +876,7 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// - void param_qcd::set_rev_quant(int num_decomps, ui32 bit_depth, + void param_qcd::set_rev_quant(ui32 num_decomps, ui32 bit_depth, bool is_employing_color_transform) { int guard_bits = 1; @@ -901,7 +901,7 @@ namespace ojph { } ////////////////////////////////////////////////////////////////////////// - void param_qcd::set_irrev_quant(int num_decomps) + void param_qcd::set_irrev_quant(ui32 num_decomps) { int guard_bits = 1; Sqcd = (ui8)((guard_bits<<5)|0x2);//one guard bit, scalar quantization @@ -1404,8 +1404,8 @@ namespace ojph { } ////////////////////////////////////////////////////////////////////////// - int param_dfs::get_subband_idx(ui32 num_decompositions, ui32 resolution, - ui32 subband) const + ui32 param_dfs::get_subband_idx(ui32 num_decompositions, ui32 resolution, + ui32 subband) const { assert((resolution == 0 && subband == 0) || (resolution > 0 && resolution <= Ids && @@ -1413,7 +1413,7 @@ namespace ojph { ui32 ns[4] = { 0, 3, 2, 2 }; - int idx = 0; + ui32 idx = 0; if (resolution > 0) { idx = 0; @@ -1538,7 +1538,7 @@ namespace ojph { e <<= 23; // move bits to their location s.i = 0; s.i |= ((ui32)(v >> 32) & 0x80000000); // copy sign bit - s.i |= e; // copy exponent + s.i |= (ui32)e; // copy exponent s.i |= (ui32)((v >> 25) & 0x007FFFFF); // copy 23 mantissa K = s.f; } diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index 59425da3..e8e43f8f 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -541,7 +541,10 @@ namespace ojph { public: param_qcd() { - memset(this, 0, sizeof(param_qcd)); + Lqcd = 0; + Sqcd = 0; + memset(u16_SPqcd, 0, sizeof(u16_SPqcd)); + num_subbands = 0; base_delta = -1.0f; } @@ -549,7 +552,7 @@ namespace ojph { void check_validity(const param_siz& siz, const param_cod& cod) { - int num_decomps = cod.get_num_decompositions(); + ui32 num_decomps = cod.get_num_decompositions(); num_subbands = 1 + 3 * num_decomps; if (cod.get_wavelet_kern() == param_cod::DWT_REV53) { @@ -585,9 +588,9 @@ namespace ojph { void read(infile_base *file); protected: - void set_rev_quant(int num_decomps, ui32 bit_depth, + void set_rev_quant(ui32 num_decomps, ui32 bit_depth, bool is_employing_color_transform); - void set_irrev_quant(int num_decomps); + void set_irrev_quant(ui32 num_decomps); protected: ui16 Lqcd; @@ -766,8 +769,8 @@ namespace ojph { // decomp_level is the decomposition level, starting from 1 for highest // resolution to num_decomps for the coarsest resolution dfs_dwt_type get_dwt_type(ui32 decomp_level) const; - int get_subband_idx(ui32 num_decompositions, ui32 resolution, - ui32 subband) const; + ui32 get_subband_idx(ui32 num_decompositions, ui32 resolution, + ui32 subband) const; private: // member variables ui16 Ldfs; // length of the segment marker diff --git a/src/core/codestream/ojph_subband.cpp b/src/core/codestream/ojph_subband.cpp index c65a2ebb..6348e98b 100644 --- a/src/core/codestream/ojph_subband.cpp +++ b/src/core/codestream/ojph_subband.cpp @@ -126,7 +126,7 @@ namespace ojph { cur_cb_height = 0; param_qcd* qcd = codestream->access_qcd(parent->get_comp_num()); const param_cod* cod = codestream->get_cod(); - int num_decomps = cod->get_num_decompositions(); + ui32 num_decomps = cod->get_num_decompositions(); this->K_max = qcd->get_Kmax(NULL, num_decomps, this->res_num, band_num); if (!reversible) { From fc377de1de6eabca5b193b6da8b36c5f189a2800 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sun, 31 Mar 2024 14:19:09 +1100 Subject: [PATCH 09/37] Warning/bug fix. --- src/core/codestream/ojph_params.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index affa222a..3795d4b8 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -1551,15 +1551,14 @@ namespace ojph { { int coeff_type = get_coeff_type(); if (coeff_type == 0) { - ui8 v; + si8 v; if (file->read(&v, 1) != 1) return false; K = v; } else if (coeff_type == 1) { - ui16 v; + si16 v; if (file->read(&v, 2) != 2) return false; - v = swap_byte(v); - K = v; + K = (si16)swap_byte((ui16)v); } else return false; From 1c08cf3b7a3842611a18d633c76b6c5932f1b600 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Tue, 2 Apr 2024 21:07:54 +1100 Subject: [PATCH 10/37] Added COC. ATK is used for wavelet transform. Modified wavelet synthesis (no acceleration yet). Wavelet analysis is broken. Close to enabling DFS in synthesis. --- src/core/codestream/ojph_codestream_local.h | 15 +- src/core/codestream/ojph_params.cpp | 10 +- src/core/codestream/ojph_params_local.h | 44 +- src/core/codestream/ojph_resolution.cpp | 811 +++++++++++--------- src/core/codestream/ojph_resolution.h | 15 +- src/core/codestream/ojph_subband.cpp | 9 +- src/core/codestream/ojph_subband.h | 2 +- src/core/codestream/ojph_tile.cpp | 2 +- src/core/codestream/ojph_tile_comp.cpp | 7 +- src/core/codestream/ojph_tile_comp.h | 3 +- src/core/common/ojph_mem.h | 10 + src/core/transform/ojph_transform.cpp | 235 ++++++ src/core/transform/ojph_transform.h | 42 + src/core/transform/ojph_transform_local.h | 38 + 14 files changed, 839 insertions(+), 404 deletions(-) diff --git a/src/core/codestream/ojph_codestream_local.h b/src/core/codestream/ojph_codestream_local.h index 5bfa09d4..8e77eb17 100644 --- a/src/core/codestream/ojph_codestream_local.h +++ b/src/core/codestream/ojph_codestream_local.h @@ -82,8 +82,19 @@ namespace ojph { { return &siz; } ojph::param_cod access_cod() //return externally wrapped cod { return ojph::param_cod(&cod); } - const param_cod* get_cod() //return internal code + const param_cod* get_cod() //return internal code { return &cod; } + const param_cod* get_cod(ui32 comp_num) //return internal code + { + if (used_coc_fields == 0) + return &cod; + else { + for (int i = 0; i < used_coc_fields; ++i) + if (coc[i].get_comp_num() == comp_num) + return coc + i; + return &cod; + } + } param_qcd* access_qcd(ui32 comp_num) { if (used_qcc_fields > 0) @@ -92,6 +103,8 @@ namespace ojph { return qcc + v; return &qcd; } + const param_dfs* access_dfs() + { if (dfs.exists()) return &dfs; else return NULL; } mem_fixed_allocator* get_allocator() { return allocator; } mem_elastic_allocator* get_elastic_alloc() { return elastic_alloc; } outfile_base* get_file() { return outfile; } diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index 3795d4b8..ef652651 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -824,12 +824,12 @@ namespace ojph { ui8 t; if (file->read(&t, 1) != 1) OJPH_ERROR(0x00050122, "error reading COC segment"); - comp_idx = t; + comp_num = t; } else { - if (file->read(&comp_idx, 2) != 2) + if (file->read(&comp_num, 2) != 2) OJPH_ERROR(0x00050123, "error reading COC segment"); - comp_idx = swap_byte(comp_idx); + comp_num = swap_byte(comp_num); } if (file->read(&Scod, 1) != 1) OJPH_ERROR(0x00050124, "error reading COC segment"); @@ -1393,8 +1393,6 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// param_dfs::dfs_dwt_type param_dfs::get_dwt_type(ui32 decomp_level) const { - assert(decomp_level > 0 && decomp_level <= Ids); - decomp_level = ojph_min(decomp_level, Ids); ui32 d = decomp_level - 1; // decomp_level starts from 1 ui32 idx = d >> 2; // complete bytes @@ -1605,7 +1603,7 @@ namespace ojph { if (Natk > max_steps) { if (d != d_store) // was this allocated -- very unlikely delete[] d; - d = new data[Natk]; + d = new lifting_step[Natk]; max_steps = Natk; } diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index e8e43f8f..c08d750e 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -515,6 +515,10 @@ namespace ojph { ui16 get_dfs_index() const // cannot be more than 15 { return SPcod.num_decomp & 0xF; } + //////////////////////////////////////// + ui32 get_comp_num() + { assert(type == COC_MAIN); return comp_num; } + private: // Common variables cod_type type; // The type of this cod structure ui16 Lcod; // serves as Lcod and Scod @@ -524,7 +528,7 @@ namespace ojph { private: // COC only variables param_cod* parent; // parent COD structure - ui16 comp_idx; // component index of this COC structure + ui16 comp_num; // component index of this COC structure const param_atk* atk; // useful when SPcod.wavelet_trans > 1 }; @@ -788,16 +792,9 @@ namespace ojph { // // /////////////////////////////////////////////////////////////////////////// - struct param_atk - { - // Limitations: - // Arbitrary filters (ARB) are not supported - // Up to 6 steps are supported -- more than 6 are not supported - // Only one coefficient per step -- first order filter - // Only even-indexed subsequence in first reconstruction step, - // m_init = 0 is supported + // data structures used by param_atk - public: // data structures used by this object + union lifting_step { struct irv_data { // si8 Oatk; // only for arbitrary filter // ui8 LCatk; // number of lifting coefficients in a step @@ -812,10 +809,18 @@ namespace ojph { si16 Aatk; // lifting coefficient }; - union data { - irv_data irv; - rev_data rev; - }; + irv_data irv; + rev_data rev; + }; + + struct param_atk + { + // Limitations: + // Arbitrary filters (ARB) are not supported + // Up to 6 steps are supported -- more than 6 are not supported + // Only one coefficient per step -- first order filter + // Only even-indexed subsequence in first reconstruction step, + // m_init = 0 is supported public: // member functions param_atk() { init(); } @@ -835,7 +840,7 @@ namespace ojph { void init(bool clear_all = true) { if (clear_all) memset(this, 0, sizeof(param_atk)); - d = d_store; max_steps = sizeof(d_store) / sizeof(data); + d = d_store; max_steps = sizeof(d_store) / sizeof(lifting_step); } void init_irv97(); void init_rev53(); @@ -849,16 +854,19 @@ namespace ojph { bool is_m_init0() const { return (Satk & 0x2000) == 0; } bool is_using_ws_extension() const { return (Satk & 0x4000) != 0; } const param_atk* get_atk(int index) const; - const data* get_step(ui32 s) const { assert(s < Natk); return d + s; } + const lifting_step* get_step(ui32 s) const + { assert(s < Natk); return d + s; } + const ui32 get_num_steps() const { return Natk; } + const float get_K() const { return Katk; } private: // member variables ui16 Latk; // structure length ui16 Satk; // carries a variety of information float Katk; // only for irreversible scaling factor K ui8 Natk; // number of lifting steps - data* d; // pointer to data, initialized to d_store + lifting_step* d; // pointer to data, initialized to d_store int max_steps; // maximum number of steps without memory allocation - data d_store[6]; // step coefficient + lifting_step d_store[6]; // lifting step coefficient param_atk* next; // used for chaining if more than one atk segment // exist in the codestream bool alloced_next; // true if next was allocated, not just set to an diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index 03d1278d..105c57de 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -98,14 +98,39 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// void resolution::pre_alloc(codestream* codestream, const rect& res_rect, - const rect& recon_res_rect, ui32 res_num) + const rect& recon_res_rect, + ui32 comp_num, ui32 res_num) { mem_fixed_allocator* allocator = codestream->get_allocator(); - const param_cod* cdp = codestream->get_cod(); - ui32 t = codestream->get_cod()->get_num_decompositions() + const param_cod* cdp = codestream->get_cod(comp_num); + ui32 t = cdp->get_num_decompositions() - codestream->get_skipped_res_for_recon(); bool skipped_res_for_recon = res_num > t; + const param_atk* atk = cdp->access_atk(); + param_dfs::dfs_dwt_type downsampling_style = param_dfs::BIDIR_DWT; + if (cdp->is_dfs_defined()) { + const param_dfs* dfs = codestream->access_dfs(); + if (dfs == NULL) { + OJPH_ERROR(0x00070001, "There is a problem with codestream " + "marker segments. COD/COC specifies the use of a DFS marker " + "but there are no DFS markers within the main codestream " + "headers"); + } + else { + ui16 dfs_idx = cdp->get_dfs_index(); + dfs = dfs->get_dfs(dfs_idx); + if (dfs == NULL) { + OJPH_ERROR(0x00070002, "There is a problem with codestream " + "marker segments. COD/COC specifies the use of a DFS marker " + "with index %d, but there are no such marker within the " + "main codestream headers", dfs_idx); + } + ui32 num_decomps = cdp->get_num_decompositions(); + downsampling_style = dfs->get_dwt_type(num_decomps - res_num + 1); + } + } + //create next resolution if (res_num > 0) { @@ -122,7 +147,8 @@ namespace ojph { next_res_rect.siz.h = try1 - try0; resolution::pre_alloc(codestream, next_res_rect, - skipped_res_for_recon ? recon_res_rect : next_res_rect, res_num - 1); + skipped_res_for_recon ? recon_res_rect : next_res_rect, + comp_num, res_num - 1); } //allocate subbands @@ -145,11 +171,11 @@ namespace ojph { band_rect.org.y = tby0; band_rect.siz.w = tbx1 - tbx0; band_rect.siz.h = tby1 - tby0; - subband::pre_alloc(codestream, band_rect, res_num); + subband::pre_alloc(codestream, band_rect, comp_num, res_num); } } else - subband::pre_alloc(codestream, res_rect, res_num); + subband::pre_alloc(codestream, res_rect, comp_num, res_num); //prealloc precincts size log_PP = cdp->get_log_precinct_size(res_num); @@ -166,13 +192,15 @@ namespace ojph { //allocate lines if (skipped_res_for_recon == false) { - bool reversible = cdp->access_atk()->is_reversible(); - ui32 num_lines = reversible ? 4 : 6; - allocator->pre_alloc_obj(num_lines); + ui32 num_steps = atk->get_num_steps(); + allocator->pre_alloc_obj(num_steps + 2); + allocator->pre_alloc_obj(num_steps + 2); ui32 width = res_rect.siz.w + 1; - for (ui32 i = 0; i < num_lines; ++i) + for (ui32 i = 0; i < num_steps; ++i) allocator->pre_alloc_data(width, 1); + allocator->pre_alloc_data(width, 1); + allocator->pre_alloc_data(width, 1); } } @@ -187,12 +215,12 @@ namespace ojph { { mem_fixed_allocator* allocator = codestream->get_allocator(); elastic = codestream->get_elastic_alloc(); - ui32 t, num_decomps = codestream->get_cod()->get_num_decompositions(); + const param_cod* cdp = codestream->get_cod(comp_num); + ui32 t, num_decomps = cdp->get_num_decompositions(); t = num_decomps - codestream->get_skipped_res_for_recon(); skipped_res_for_recon = res_num > t; t = num_decomps - codestream->get_skipped_res_for_read(); skipped_res_for_read = res_num > t; - const param_cod* cdp = codestream->get_cod(); this->comp_downsamp = comp_downsamp; this->parent_comp = parent_tile_comp; @@ -201,6 +229,31 @@ namespace ojph { this->comp_num = comp_num; this->res_num = res_num; this->num_bytes = 0; + this->atk = cdp->access_atk(); + this->downsampling_style = param_dfs::BIDIR_DWT; + if (cdp->is_dfs_defined()) { + const param_dfs* dfs = codestream->access_dfs(); + if (dfs == NULL) { + OJPH_ERROR(0x00070011, "There is a problem with codestream " + "marker segments. COD/COC specifies the use of a DFS marker " + "but there are no DFS markers within the main codestream " + "headers"); + } + else { + ui16 dfs_idx = cdp->get_dfs_index(); + dfs = dfs->get_dfs(dfs_idx); + if (dfs == NULL) { + OJPH_ERROR(0x00070012, "There is a problem with codestream " + "marker segments. COD/COC specifies the use of a DFS marker " + "with index %d, but there are no such marker within the " + "main codestream headers", dfs_idx); + } + ui32 num_decomps = cdp->get_num_decompositions(); + this->downsampling_style = + dfs->get_dwt_type(num_decomps - res_num + 1); + } + } + //finalize next resolution if (res_num > 0) { @@ -321,13 +374,33 @@ namespace ojph { //allocate lines if (skipped_res_for_recon == false) { - this->reversible = cdp->access_atk()->is_reversible(); - this->num_lines = this->reversible ? 4 : 6; - lines = allocator->post_alloc_obj(num_lines); - + this->atk = cdp->access_atk(); + this->reversible = atk->is_reversible(); + this->num_steps = atk->get_num_steps(); + // create line buffers and lifting_bufs + lines = allocator->post_alloc_obj(num_steps + 2); + ssp = allocator->post_alloc_obj(num_steps + 2); + sig = ssp + num_steps; + aug = ssp + num_steps + 1; + + // initiate lifting_bufs + for (ui32 i = 0; i < num_steps; ++i) { + new (ssp + i) lifting_buf; + ssp[i].line = lines + i; + }; + new (sig) lifting_buf; + sig->line = lines + num_steps; + new (aug) lifting_buf; + aug->line = lines + num_steps + 1; + + // initiate storage of line_buf ui32 width = res_rect.siz.w + 1; - for (ui32 i = 0; i < num_lines; ++i) - lines[i].wrap(allocator->post_alloc_data(width, 1), width, 1); + for (ui32 i = 0; i < num_steps; ++i) + ssp[i].line->wrap( + allocator->post_alloc_data(width, 1), width, 1); + sig->line->wrap(allocator->post_alloc_data(width, 1), width, 1); + aug->line->wrap(allocator->post_alloc_data(width, 1), width, 1); + cur_line = 0; vert_even = (res_rect.org.y & 1) == 0; horz_even = (res_rect.org.x & 1) == 0; @@ -340,271 +413,271 @@ namespace ojph { if (res_num == 0) { assert(num_bands == 1 && child_res == NULL); - bands[0].exchange_buf(lines + 0);//line at location 0 + bands[0].exchange_buf(ssp[0].line);//line at location 0 bands[0].push_line(); return; } - ui32 width = res_rect.siz.w; - if (width == 0) - return; - if (reversible) - { - //vertical transform - assert(num_lines >= 4); - if (vert_even) - { - rev_vert_wvlt_fwd_predict(lines, - cur_line > 1 ? lines + 2 : lines, - lines + 1, width); - rev_vert_wvlt_fwd_update(lines + 1, - cur_line > 2 ? lines + 3 : lines + 1, - lines + 2, width); - - // push to horizontal transform lines[2](L) and lines[1] (H) - if (cur_line >= 1) - { - rev_horz_wvlt_fwd_tx(lines + 1, bands[2].get_line(), - bands[3].get_line(), width, horz_even); - bands[2].push_line(); - bands[3].push_line(); - } - if (cur_line >= 2) - { - rev_horz_wvlt_fwd_tx(lines + 2, child_res->get_line(), - bands[1].get_line(), width, horz_even); - bands[1].push_line(); - child_res->push_line(); - } - } - - if (cur_line >= res_rect.siz.h - 1) - { //finished, so we need to process any lines left - if (cur_line) - { - if (vert_even) - { - rev_vert_wvlt_fwd_update(lines + 1, lines + 1, - lines, width); - //push lines[0] to L - rev_horz_wvlt_fwd_tx(lines, child_res->get_line(), - bands[1].get_line(), width, horz_even); - bands[1].push_line(); - child_res->push_line(); - } - else - { - rev_vert_wvlt_fwd_predict(lines + 1, lines + 1, - lines, width); - rev_vert_wvlt_fwd_update(lines, - cur_line > 1 ? lines + 2 : lines, - lines + 1, width); - - // push to horizontal transform lines[1](L) and line[0] (H) - //line[0] to H - rev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), - bands[3].get_line(), width, horz_even); - bands[2].push_line(); - bands[3].push_line(); - //line[1] to L - rev_horz_wvlt_fwd_tx(lines + 1, child_res->get_line(), - bands[1].get_line(), width, horz_even); - bands[1].push_line(); - child_res->push_line(); - } - } - else - { //only one line - if (vert_even) - { - //push to L - rev_horz_wvlt_fwd_tx(lines, child_res->get_line(), - bands[1].get_line(), width, horz_even); - bands[1].push_line(); - child_res->push_line(); - } - else - { - si32* sp = lines[0].i32; - for (ui32 i = width; i > 0; --i) - *sp++ <<= 1; - //push to H - rev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), - bands[3].get_line(), width, horz_even); - bands[2].push_line(); - bands[3].push_line(); - } - } - } - - rotate_buffers(lines, lines + 1, lines + 2, lines + 3); - - ++cur_line; - vert_even = !vert_even; - } - else - { - //vertical transform - assert(num_lines >= 6); - if (vert_even) - { - irrev_vert_wvlt_step(lines + 0, - cur_line > 1 ? lines + 2 : lines, - lines + 1, 0, width); - irrev_vert_wvlt_step(lines + 1, - cur_line > 2 ? lines + 3 : lines + 1, - lines + 2, 1, width); - irrev_vert_wvlt_step(lines + 2, - cur_line > 3 ? lines + 4 : lines + 2, - lines + 3, 2, width); - irrev_vert_wvlt_step(lines + 3, - cur_line > 4 ? lines + 5 : lines + 3, - lines + 4, 3, width); - - // push to horizontal transform lines[4](L) and lines[3] (H) - if (cur_line >= 3) - { - irrev_vert_wvlt_K(lines + 3, lines + 5, - false, width); - irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), - bands[3].get_line(), width, horz_even); - bands[2].push_line(); - bands[3].push_line(); - } - if (cur_line >= 4) - { - irrev_vert_wvlt_K(lines + 4, lines + 5, - true, width); - irrev_horz_wvlt_fwd_tx(lines + 5, child_res->get_line(), - bands[1].get_line(), width, horz_even); - bands[1].push_line(); - child_res->push_line(); - } - } - - if (cur_line >= res_rect.siz.h - 1) - { //finished, so we need to process any left line - if (cur_line) - { - if (vert_even) - { - irrev_vert_wvlt_step(lines + 1, lines + 1, - lines, 1, width); - irrev_vert_wvlt_step(lines, - cur_line > 1 ? lines + 2 : lines, - lines + 1, 2, width); - irrev_vert_wvlt_step(lines + 1, - cur_line > 2 ? lines + 3 : lines + 1, - lines + 2, 3, width); - irrev_vert_wvlt_step(lines + 1, lines + 1, - lines, 3, width); - //push lines[2] to L, lines[1] to H, and lines[0] to L - if (cur_line >= 2) - { - irrev_vert_wvlt_K(lines + 2, lines + 5, - true, width); - irrev_horz_wvlt_fwd_tx(lines + 5, - child_res->get_line(), bands[1].get_line(), - width, horz_even); - bands[1].push_line(); - child_res->push_line(); - } - irrev_vert_wvlt_K(lines + 1, lines + 5, - false, width); - irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), - bands[3].get_line(), width, horz_even); - bands[2].push_line(); - bands[3].push_line(); - irrev_vert_wvlt_K(lines, lines + 5, - true, width); - irrev_horz_wvlt_fwd_tx(lines + 5, child_res->get_line(), - bands[1].get_line(), width, horz_even); - bands[1].push_line(); - child_res->push_line(); - } - else - { - irrev_vert_wvlt_step(lines + 1, lines + 1, - lines, 0, width); - irrev_vert_wvlt_step(lines, - cur_line > 1 ? lines + 2 : lines, - lines + 1, 1, width); - irrev_vert_wvlt_step(lines + 1, - cur_line > 2 ? lines + 3 : lines + 1, - lines + 2, 2, width); - irrev_vert_wvlt_step(lines + 2, - cur_line > 3 ? lines + 4 : lines + 2, - lines + 3, 3, width); - - irrev_vert_wvlt_step(lines + 1, lines + 1, - lines, 2, width); - irrev_vert_wvlt_step(lines, - cur_line > 1 ? lines + 2 : lines, - lines + 1, 3, width); - - //push lines[3] L, lines[2] H, lines[1] L, and lines[0] H - if (cur_line >= 3) - { - irrev_vert_wvlt_K(lines + 3, lines + 5, - true, width); - irrev_horz_wvlt_fwd_tx(lines + 5, - child_res->get_line(), bands[1].get_line(), - width, horz_even); - bands[1].push_line(); - child_res->push_line(); - } - if (cur_line >= 2) - irrev_vert_wvlt_K(lines + 2, lines + 5, false, width); - else - irrev_vert_wvlt_K(lines, lines + 5, false, width); - irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), - bands[3].get_line(), width, horz_even); - bands[2].push_line(); - bands[3].push_line(); - irrev_vert_wvlt_K(lines + 1, lines + 5, - true, width); - irrev_horz_wvlt_fwd_tx(lines + 5, child_res->get_line(), - bands[1].get_line(), width, horz_even); - bands[1].push_line(); - child_res->push_line(); - irrev_vert_wvlt_K(lines, lines + 5, - false, width); - irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), - bands[3].get_line(), width, horz_even); - bands[2].push_line(); - bands[3].push_line(); - } - } - else - { //only one line - if (vert_even) - { - //push to L - irrev_horz_wvlt_fwd_tx(lines, child_res->get_line(), - bands[1].get_line(), width, horz_even); - bands[1].push_line(); - child_res->push_line(); - } - else - { - float* sp = lines[0].f32; - for (ui32 i = width; i > 0; --i) - *sp++ *= 2.0f; - //push to H - irrev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), - bands[3].get_line(), width, horz_even); - bands[2].push_line(); - bands[3].push_line(); - } - } - } - - rotate_buffers(lines, lines + 1, lines + 2, lines + 3, lines + 4, - lines + 5); - - ++cur_line; - vert_even = !vert_even; - } + //ui32 width = res_rect.siz.w; + //if (width == 0) + // return; + //if (reversible) + //{ + // //vertical transform + // assert(num_lines >= 4); + // if (vert_even) + // { + // rev_vert_wvlt_fwd_predict(lines, + // cur_line > 1 ? lines + 2 : lines, + // lines + 1, width); + // rev_vert_wvlt_fwd_update(lines + 1, + // cur_line > 2 ? lines + 3 : lines + 1, + // lines + 2, width); + + // // push to horizontal transform lines[2](L) and lines[1] (H) + // if (cur_line >= 1) + // { + // rev_horz_wvlt_fwd_tx(lines + 1, bands[2].get_line(), + // bands[3].get_line(), width, horz_even); + // bands[2].push_line(); + // bands[3].push_line(); + // } + // if (cur_line >= 2) + // { + // rev_horz_wvlt_fwd_tx(lines + 2, child_res->get_line(), + // bands[1].get_line(), width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // } + // } + + // if (cur_line >= res_rect.siz.h - 1) + // { //finished, so we need to process any lines left + // if (cur_line) + // { + // if (vert_even) + // { + // rev_vert_wvlt_fwd_update(lines + 1, lines + 1, + // lines, width); + // //push lines[0] to L + // rev_horz_wvlt_fwd_tx(lines, child_res->get_line(), + // bands[1].get_line(), width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // } + // else + // { + // rev_vert_wvlt_fwd_predict(lines + 1, lines + 1, + // lines, width); + // rev_vert_wvlt_fwd_update(lines, + // cur_line > 1 ? lines + 2 : lines, + // lines + 1, width); + + // // push to horizontal transform lines[1](L) and line[0] (H) + // //line[0] to H + // rev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), + // bands[3].get_line(), width, horz_even); + // bands[2].push_line(); + // bands[3].push_line(); + // //line[1] to L + // rev_horz_wvlt_fwd_tx(lines + 1, child_res->get_line(), + // bands[1].get_line(), width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // } + // } + // else + // { //only one line + // if (vert_even) + // { + // //push to L + // rev_horz_wvlt_fwd_tx(lines, child_res->get_line(), + // bands[1].get_line(), width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // } + // else + // { + // si32* sp = lines[0].i32; + // for (ui32 i = width; i > 0; --i) + // *sp++ <<= 1; + // //push to H + // rev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), + // bands[3].get_line(), width, horz_even); + // bands[2].push_line(); + // bands[3].push_line(); + // } + // } + // } + + // rotate_buffers(lines, lines + 1, lines + 2, lines + 3); + + // ++cur_line; + // vert_even = !vert_even; + //} + //else + //{ + // //vertical transform + // assert(num_lines >= 6); + // if (vert_even) + // { + // irrev_vert_wvlt_step(lines + 0, + // cur_line > 1 ? lines + 2 : lines, + // lines + 1, 0, width); + // irrev_vert_wvlt_step(lines + 1, + // cur_line > 2 ? lines + 3 : lines + 1, + // lines + 2, 1, width); + // irrev_vert_wvlt_step(lines + 2, + // cur_line > 3 ? lines + 4 : lines + 2, + // lines + 3, 2, width); + // irrev_vert_wvlt_step(lines + 3, + // cur_line > 4 ? lines + 5 : lines + 3, + // lines + 4, 3, width); + + // // push to horizontal transform lines[4](L) and lines[3] (H) + // if (cur_line >= 3) + // { + // irrev_vert_wvlt_K(lines + 3, lines + 5, + // false, width); + // irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), + // bands[3].get_line(), width, horz_even); + // bands[2].push_line(); + // bands[3].push_line(); + // } + // if (cur_line >= 4) + // { + // irrev_vert_wvlt_K(lines + 4, lines + 5, + // true, width); + // irrev_horz_wvlt_fwd_tx(lines + 5, child_res->get_line(), + // bands[1].get_line(), width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // } + // } + + // if (cur_line >= res_rect.siz.h - 1) + // { //finished, so we need to process any left line + // if (cur_line) + // { + // if (vert_even) + // { + // irrev_vert_wvlt_step(lines + 1, lines + 1, + // lines, 1, width); + // irrev_vert_wvlt_step(lines, + // cur_line > 1 ? lines + 2 : lines, + // lines + 1, 2, width); + // irrev_vert_wvlt_step(lines + 1, + // cur_line > 2 ? lines + 3 : lines + 1, + // lines + 2, 3, width); + // irrev_vert_wvlt_step(lines + 1, lines + 1, + // lines, 3, width); + // //push lines[2] to L, lines[1] to H, and lines[0] to L + // if (cur_line >= 2) + // { + // irrev_vert_wvlt_K(lines + 2, lines + 5, + // true, width); + // irrev_horz_wvlt_fwd_tx(lines + 5, + // child_res->get_line(), bands[1].get_line(), + // width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // } + // irrev_vert_wvlt_K(lines + 1, lines + 5, + // false, width); + // irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), + // bands[3].get_line(), width, horz_even); + // bands[2].push_line(); + // bands[3].push_line(); + // irrev_vert_wvlt_K(lines, lines + 5, + // true, width); + // irrev_horz_wvlt_fwd_tx(lines + 5, child_res->get_line(), + // bands[1].get_line(), width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // } + // else + // { + // irrev_vert_wvlt_step(lines + 1, lines + 1, + // lines, 0, width); + // irrev_vert_wvlt_step(lines, + // cur_line > 1 ? lines + 2 : lines, + // lines + 1, 1, width); + // irrev_vert_wvlt_step(lines + 1, + // cur_line > 2 ? lines + 3 : lines + 1, + // lines + 2, 2, width); + // irrev_vert_wvlt_step(lines + 2, + // cur_line > 3 ? lines + 4 : lines + 2, + // lines + 3, 3, width); + + // irrev_vert_wvlt_step(lines + 1, lines + 1, + // lines, 2, width); + // irrev_vert_wvlt_step(lines, + // cur_line > 1 ? lines + 2 : lines, + // lines + 1, 3, width); + + // //push lines[3] L, lines[2] H, lines[1] L, and lines[0] H + // if (cur_line >= 3) + // { + // irrev_vert_wvlt_K(lines + 3, lines + 5, + // true, width); + // irrev_horz_wvlt_fwd_tx(lines + 5, + // child_res->get_line(), bands[1].get_line(), + // width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // } + // if (cur_line >= 2) + // irrev_vert_wvlt_K(lines + 2, lines + 5, false, width); + // else + // irrev_vert_wvlt_K(lines, lines + 5, false, width); + // irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), + // bands[3].get_line(), width, horz_even); + // bands[2].push_line(); + // bands[3].push_line(); + // irrev_vert_wvlt_K(lines + 1, lines + 5, + // true, width); + // irrev_horz_wvlt_fwd_tx(lines + 5, child_res->get_line(), + // bands[1].get_line(), width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // irrev_vert_wvlt_K(lines, lines + 5, + // false, width); + // irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), + // bands[3].get_line(), width, horz_even); + // bands[2].push_line(); + // bands[3].push_line(); + // } + // } + // else + // { //only one line + // if (vert_even) + // { + // //push to L + // irrev_horz_wvlt_fwd_tx(lines, child_res->get_line(), + // bands[1].get_line(), width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // } + // else + // { + // float* sp = lines[0].f32; + // for (ui32 i = width; i > 0; --i) + // *sp++ *= 2.0f; + // //push to H + // irrev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), + // bands[3].get_line(), width, horz_even); + // bands[2].push_line(); + // bands[3].push_line(); + // } + // } + // } + + // rotate_buffers(lines, lines + 1, lines + 2, lines + 3, lines + 4, + // lines + 5); + + // ++cur_line; + // vert_even = !vert_even; + //} } ////////////////////////////////////////////////////////////////////////// @@ -621,147 +694,159 @@ namespace ojph { ui32 width = res_rect.siz.w; if (width == 0) - return lines; + return NULL; if (reversible) { - assert(num_lines >= 4); if (res_rect.siz.h > 1) { - do + if (sig->active) { + sig->active = false; + return sig->line; + }; + for (;;) { //horizontal transform if (cur_line < res_rect.siz.h) { - if (vert_even) - rev_horz_wvlt_bwd_tx(lines, + if (vert_even) { // even + rev_horz_syn(atk, aug->line, child_res->pull_line(), bands[1].pull_line(), width, horz_even); - else - rev_horz_wvlt_bwd_tx(lines, + aug->active = true; + vert_even = !vert_even; + ++cur_line; + continue; + } + else { + rev_horz_syn(atk, sig->line, bands[2].pull_line(), bands[3].pull_line(), width, horz_even); + sig->active = true; + vert_even = !vert_even; + ++cur_line; + } } //vertical transform - if (!vert_even) + for (ui32 i = 0; i < num_steps; ++i) { - rev_vert_wvlt_bwd_update( - cur_line > 1 ? lines + 2 : lines, - cur_line < res_rect.siz.h ? lines : lines + 2, - lines + 1, width); - rev_vert_wvlt_bwd_predict( - cur_line > 2 ? lines + 3 : lines + 1, - cur_line < res_rect.siz.h + 1 ? lines + 1 : lines + 3, - lines + 2, width); + if (aug->active && + (sig->active == true || ssp[i].active == true)) + { + line_buf* dp = aug->line; + line_buf* sp1 = sig->active ? sig->line : ssp[i].line; + line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; + const lifting_step* s = atk->get_step(num_steps - i - 1); + rev_vert_syn_step(s, dp, sp1, sp2, width); + } + lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } - vert_even = !vert_even; - rotate_buffers(lines, lines + 1, lines + 2, lines + 3); - ++cur_line; - } while (cur_line < 3); - memcpy(lines[0].i32, lines[3].i32, res_rect.siz.w * sizeof(si32)); - return lines; + if (aug->active) { + aug->active = false; + return aug->line; + } + if (sig->active) { + sig->active = false; + return sig->line; + }; + } } - else if (res_rect.siz.h == 1) + else { if (vert_even) - { - rev_horz_wvlt_bwd_tx(lines, child_res->pull_line(), + rev_horz_syn(atk, aug->line, child_res->pull_line(), bands[1].pull_line(), width, horz_even); - } else { - rev_horz_wvlt_bwd_tx(lines, bands[2].pull_line(), + rev_horz_syn(atk, aug->line, bands[2].pull_line(), bands[3].pull_line(), width, horz_even); - if (width) - { - si32* sp = lines[0].i32; - for (ui32 i = width; i > 0; --i) - *sp++ >>= 1; - } + si32* sp = aug->line->i32; + for (ui32 i = width; i > 0; --i) + *sp++ >>= 1; } - return lines; + return aug->line; } - else - return lines; } else { - assert(num_lines >= 6); if (res_rect.siz.h > 1) { - do + if (sig->active) { + sig->active = false; + return sig->line; + }; + for (;;) { //horizontal transform if (cur_line < res_rect.siz.h) { - if (vert_even) - { - irrev_horz_wvlt_bwd_tx(lines, + if (vert_even) { // even + irv_horz_syn(atk, aug->line, child_res->pull_line(), bands[1].pull_line(), width, horz_even); - irrev_vert_wvlt_K(lines, lines, false, width); + aug->active = true; + vert_even = !vert_even; + ++cur_line; + + const float K = atk->get_K(); + irv_vert_syn_K(K, aug->line, width); + + continue; } - else - { - irrev_horz_wvlt_bwd_tx(lines, + else { + irv_horz_syn(atk, sig->line, bands[2].pull_line(), bands[3].pull_line(), width, horz_even); - irrev_vert_wvlt_K(lines, lines, true, width); + sig->active = true; + vert_even = !vert_even; + ++cur_line; + + const float K_inv = 1.0f / atk->get_K(); + irv_vert_syn_K(K_inv, sig->line, width); } } //vertical transform - if (!vert_even) + for (ui32 i = 0; i < num_steps; ++i) { - irrev_vert_wvlt_step( - cur_line > 1 ? lines + 2 : lines, - cur_line < res_rect.siz.h ? lines : lines + 2, - lines + 1, 7, width); - irrev_vert_wvlt_step( - cur_line > 2 ? lines + 3 : lines + 1, - cur_line < res_rect.siz.h + 1 ? lines + 1 : lines + 3, - lines + 2, 6, width); - irrev_vert_wvlt_step( - cur_line > 3 ? lines + 4 : lines + 2, - cur_line < res_rect.siz.h + 2 ? lines + 2 : lines + 4, - lines + 3, 5, width); - irrev_vert_wvlt_step( - cur_line > 4 ? lines + 5 : lines + 3, - cur_line < res_rect.siz.h + 3 ? lines + 3 : lines + 5, - lines + 4, 4, width); + if (aug->active && + (sig->active == true || ssp[i].active == true)) + { + line_buf* dp = aug->line; + line_buf* sp1 = sig->active ? sig->line : ssp[i].line; + line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; + const lifting_step* s = atk->get_step(num_steps - i - 1); + irv_vert_syn_step(s, dp, sp1, sp2, width); + } + lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } - vert_even = !vert_even; - rotate_buffers(lines, lines + 1, lines + 2, lines + 3, lines + 4, - lines + 5); - ++cur_line; - } while (cur_line < 5); - memcpy(lines[0].f32, lines[5].f32, res_rect.siz.w * sizeof(float)); - return lines; + if (aug->active) { + aug->active = false; + return aug->line; + } + if (sig->active) { + sig->active = false; + return sig->line; + }; + } } - else if (res_rect.siz.h == 1) + else { if (vert_even) - { - irrev_horz_wvlt_bwd_tx(lines, child_res->pull_line(), + irv_horz_syn(atk, aug->line, child_res->pull_line(), bands[1].pull_line(), width, horz_even); - } else { - irrev_horz_wvlt_bwd_tx(lines, bands[2].pull_line(), + irv_horz_syn(atk, aug->line, bands[2].pull_line(), bands[3].pull_line(), width, horz_even); - if (width) - { - float* sp = lines[0].f32; - for (ui32 i = width; i > 0; --i) - *sp++ *= 0.5f; - } + float *sp = aug->line->f32; + for (ui32 i = width; i > 0; --i) + *sp++ *= 0.5f; } - return lines; + return aug->line; } - else - return lines; } } diff --git a/src/core/codestream/ojph_resolution.h b/src/core/codestream/ojph_resolution.h index e110811b..7a7d43d5 100644 --- a/src/core/codestream/ojph_resolution.h +++ b/src/core/codestream/ojph_resolution.h @@ -64,14 +64,15 @@ namespace ojph { public: static void pre_alloc(codestream *codestream, const rect& res_rect, - const rect& recon_res_rect, ui32 res_num); + const rect& recon_res_rect, + ui32 comp_num, ui32 res_num); void finalize_alloc(codestream *codestream, const rect& res_rect, const rect& recon_res_rect, ui32 comp_num, ui32 res_num, point comp_downsamp, tile_comp *parent_tile_comp, resolution *parent_res); - line_buf* get_line() { return lines + 0; } + line_buf* get_line() { return ssp[0].line; } void push_line(); line_buf* pull_line(); rect get_rect() { return res_rect; } @@ -90,14 +91,16 @@ namespace ojph { private: bool reversible, skipped_res_for_read, skipped_res_for_recon; - ui32 num_lines; + ui32 num_steps; ui32 num_bands, res_num; ui32 comp_num; ui32 num_bytes; // number of bytes in this resolution // used for tilepart length point comp_downsamp; - rect res_rect; - line_buf *lines; + rect res_rect; // resolution rectangle + line_buf* lines; // used to store lines + lifting_buf *ssp; // step state pointer + lifting_buf *aug, *sig; subband *bands; tile_comp *parent_comp; resolution *parent_res, *child_res; @@ -109,6 +112,8 @@ namespace ojph { int tag_tree_size; ui32 level_index[20]; //more than enough point cur_precinct_loc; //used for progressing spatial modes (2, 3, 4) + const param_atk* atk; + param_dfs::dfs_dwt_type downsampling_style; //wavelet machinery ui32 cur_line; bool vert_even, horz_even; diff --git a/src/core/codestream/ojph_subband.cpp b/src/core/codestream/ojph_subband.cpp index 6348e98b..dbef3b75 100644 --- a/src/core/codestream/ojph_subband.cpp +++ b/src/core/codestream/ojph_subband.cpp @@ -55,7 +55,7 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// void subband::pre_alloc(codestream *codestream, const rect &band_rect, - ui32 res_num) + ui32 comp_num, ui32 res_num) { mem_fixed_allocator* allocator = codestream->get_allocator(); @@ -63,7 +63,7 @@ namespace ojph { if (empty) return; - const param_cod* cdp = codestream->get_cod(); + const param_cod* cdp = codestream->get_cod(comp_num); size log_cb = cdp->get_log_block_dims(); size log_PP = cdp->get_log_precinct_size(res_num); @@ -111,7 +111,7 @@ namespace ojph { this->band_rect = band_rect; this->parent = res; - const param_cod* cdp = codestream->get_cod(); + const param_cod* cdp = codestream->get_cod(parent->get_comp_num()); this->reversible = cdp->access_atk()->is_reversible(); size log_cb = cdp->get_log_block_dims(); log_PP = cdp->get_log_precinct_size(res_num); @@ -125,8 +125,7 @@ namespace ojph { cur_line = 0; cur_cb_height = 0; param_qcd* qcd = codestream->access_qcd(parent->get_comp_num()); - const param_cod* cod = codestream->get_cod(); - ui32 num_decomps = cod->get_num_decompositions(); + ui32 num_decomps = cdp->get_num_decompositions(); this->K_max = qcd->get_Kmax(NULL, num_decomps, this->res_num, band_num); if (!reversible) { diff --git a/src/core/codestream/ojph_subband.h b/src/core/codestream/ojph_subband.h index 3bcc6edb..9928c5ef 100644 --- a/src/core/codestream/ojph_subband.h +++ b/src/core/codestream/ojph_subband.h @@ -64,7 +64,7 @@ namespace ojph { friend struct precinct; public: static void pre_alloc(codestream *codestream, const rect& band_rect, - ui32 res_num); + ui32 comp_num, ui32 res_num); void finalize_alloc(codestream *codestream, const rect& band_rect, resolution* res, ui32 res_num, ui32 subband_num); diff --git a/src/core/codestream/ojph_tile.cpp b/src/core/codestream/ojph_tile.cpp index b7cb52cd..48f8bb56 100644 --- a/src/core/codestream/ojph_tile.cpp +++ b/src/core/codestream/ojph_tile.cpp @@ -116,7 +116,7 @@ namespace ojph { recon_comp_rect.siz.w = recon_tcx1 - recon_tcx0; recon_comp_rect.siz.h = recon_tcy1 - recon_tcy0; - tile_comp::pre_alloc(codestream, comp_rect, recon_comp_rect); + tile_comp::pre_alloc(codestream, i, comp_rect, recon_comp_rect); width = ojph_max(width, recon_comp_rect.siz.w); } diff --git a/src/core/codestream/ojph_tile_comp.cpp b/src/core/codestream/ojph_tile_comp.cpp index a2124e8b..69ed0bcb 100644 --- a/src/core/codestream/ojph_tile_comp.cpp +++ b/src/core/codestream/ojph_tile_comp.cpp @@ -51,7 +51,8 @@ namespace ojph { { ////////////////////////////////////////////////////////////////////////// - void tile_comp::pre_alloc(codestream *codestream, const rect& comp_rect, + void tile_comp::pre_alloc(codestream *codestream, ui32 comp_num, + const rect& comp_rect, const rect& recon_comp_rect) { mem_fixed_allocator* allocator = codestream->get_allocator(); @@ -60,7 +61,7 @@ namespace ojph { ui32 num_decomps = codestream->access_cod().get_num_decompositions(); allocator->pre_alloc_obj(1); - resolution::pre_alloc(codestream, comp_rect, recon_comp_rect, + resolution::pre_alloc(codestream, comp_rect, recon_comp_rect, comp_num, num_decomps); } @@ -72,7 +73,7 @@ namespace ojph { mem_fixed_allocator* allocator = codestream->get_allocator(); //allocate a resolution - num_decomps = codestream->get_cod()->get_num_decompositions(); + num_decomps = codestream->get_cod(comp_num)->get_num_decompositions(); comp_downsamp = codestream->get_siz()->get_downsampling(comp_num); this->comp_rect = comp_rect; diff --git a/src/core/codestream/ojph_tile_comp.h b/src/core/codestream/ojph_tile_comp.h index d7304d96..def39e55 100644 --- a/src/core/codestream/ojph_tile_comp.h +++ b/src/core/codestream/ojph_tile_comp.h @@ -62,7 +62,8 @@ namespace ojph { class tile_comp { public: - static void pre_alloc(codestream *codestream, const rect& comp_rect, + static void pre_alloc(codestream *codestream, ui32 comp_num, + const rect& comp_rect, const rect& recon_comp_rect); void finalize_alloc(codestream *codestream, tile *parent, ui32 comp_num, const rect& comp_rect, diff --git a/src/core/common/ojph_mem.h b/src/core/common/ojph_mem.h index 712727c0..d7497cdb 100644 --- a/src/core/common/ojph_mem.h +++ b/src/core/common/ojph_mem.h @@ -134,6 +134,8 @@ namespace ojph { ///////////////////////////////////////////////////////////////////////////// struct line_buf { + line_buf() : size(0), pre_size(0), i32(0) {} + template void pre_alloc(mem_fixed_allocator *p, size_t num_ele, ui32 pre_size) { @@ -157,6 +159,14 @@ namespace ojph { }; }; + ///////////////////////////////////////////////////////////////////////////// + struct lifting_buf + { + lifting_buf() { line = NULL; active = false; } + line_buf *line; + bool active; + }; + ///////////////////////////////////////////////////////////////////////////// struct coded_lists { diff --git a/src/core/transform/ojph_transform.cpp b/src/core/transform/ojph_transform.cpp index b6919032..46231d63 100644 --- a/src/core/transform/ojph_transform.cpp +++ b/src/core/transform/ojph_transform.cpp @@ -41,6 +41,8 @@ #include "ojph_mem.h" #include "ojph_transform.h" #include "ojph_transform_local.h" +#include "ojph_params.h" +#include "../codestream/ojph_params_local.h" namespace ojph { struct line_buf; @@ -81,6 +83,24 @@ namespace ojph { (line_buf* dst, line_buf *lsrc, line_buf *hsrc, ui32 width, bool even) = NULL; + + + + + ///////////////////////////////////////////////////////////////////////// + void (*rev_vert_syn_step) + (const lifting_step* s, line_buf* aug, const line_buf* sig, + line_buf* other, ui32 repeat) = NULL; + + ///////////////////////////////////////////////////////////////////////// + void (*rev_horz_syn) + (const param_atk* atk, line_buf* dst, line_buf* lsrc, + line_buf* hsrc, ui32 width, bool even) = NULL; + + + + + ///////////////////////////////////////////////////////////////////////// // Irreversible functions ///////////////////////////////////////////////////////////////////////// @@ -105,6 +125,27 @@ namespace ojph { (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even) = NULL; + + + + + ///////////////////////////////////////////////////////////////////////// + void (*irv_vert_syn_step) + (const lifting_step* s, line_buf* aug, const line_buf* sig, + line_buf* other, ui32 repeat) = NULL; + + ///////////////////////////////////////////////////////////////////////// + void (*irv_vert_syn_K)(const float K, line_buf* aug, ui32 repeat) = NULL; + + ///////////////////////////////////////////////////////////////////////// + void (*irv_horz_syn) + (const param_atk* atk, line_buf* dst, line_buf* lsrc, + line_buf* hsrc, ui32 width, bool even) = NULL; + + + + + //////////////////////////////////////////////////////////////////////////// static bool wavelet_transform_functions_initialized = false; @@ -122,11 +163,19 @@ namespace ojph { rev_vert_wvlt_bwd_predict = gen_rev_vert_wvlt_bwd_predict; rev_vert_wvlt_bwd_update = gen_rev_vert_wvlt_bwd_update; rev_horz_wvlt_bwd_tx = gen_rev_horz_wvlt_bwd_tx; + + rev_vert_syn_step = gen_rev_vert_syn_step; + rev_horz_syn = gen_rev_horz_syn; + irrev_vert_wvlt_step = gen_irrev_vert_wvlt_step; irrev_vert_wvlt_K = gen_irrev_vert_wvlt_K; irrev_horz_wvlt_fwd_tx = gen_irrev_horz_wvlt_fwd_tx; irrev_horz_wvlt_bwd_tx = gen_irrev_horz_wvlt_bwd_tx; + irv_vert_syn_step = gen_irv_vert_syn_step; + irv_vert_syn_K = gen_irv_vert_syn_K; + irv_horz_syn = gen_irv_horz_syn; + #ifndef OJPH_DISABLE_INTEL_SIMD int level = get_cpu_ext_level(); @@ -326,6 +375,96 @@ namespace ojph { } + + + + ////////////////////////////////////////////////////////////////////////// + void gen_rev_vert_syn_step(const lifting_step* s, line_buf* aug, + const line_buf* sig, line_buf* other, + ui32 repeat) + { + si32 a = s->rev.Aatk; + si32 b = s->rev.Batk; + ui32 e = s->rev.Eatk; + + si32* dst = aug->i32; + const si32* src1 = sig->i32, * src2 = other->i32; + if (a >= 0) + for (ui32 i = repeat; i > 0; --i) + *dst++ -= (b + a * (*src1++ + *src2++)) >> e; + else + for (ui32 i = repeat; i > 0; --i) + *dst++ += (b - a * (*src1++ + *src2++)) >> e; + } + + ////////////////////////////////////////////////////////////////////////// + void gen_rev_horz_syn(const param_atk *atk, line_buf* dst, line_buf *lsrc, + line_buf *hsrc, ui32 width, bool even) + { + if (width > 1) + { + bool ev = even; + si32* oth = hsrc->i32, * aug = lsrc->i32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) + { + // first lifting step + const lifting_step* s = atk->get_step(j - 1); + si32 a = s->rev.Aatk; + si32 b = s->rev.Batk; + ui32 e = s->rev.Eatk; + + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const si32* sp = oth + (ev ? 0 : 1); + si32* dp = aug; + if (a >= 0) + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp -= (b + a * (sp[-1] + sp[0])) >> e; + else + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp += (b - a * (sp[-1] + sp[0])) >> e; + + // swap buffers + si32* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; + } + + // combine both lsrc and hsrc into dst + si32* sph = hsrc->i32; + si32* spl = lsrc->i32; + si32* dp = dst->i32; + ui32 w = width; + if (!even) + { + *dp++ = *sph++; --w; + } + for (; w > 1; w -= 2) + { + *dp++ = *spl++; *dp++ = *sph++; + } + if (w) + { + *dp++ = *spl++; --w; + } + } + else { + if (even) + dst->i32[0] = lsrc->i32[0]; + else + dst->i32[0] = hsrc->i32[0] >> 1; + } + } + + + + + ////////////////////////////////////////////////////////////////////////// void gen_irrev_vert_wvlt_step(const line_buf* line_src1, const line_buf* line_src2, @@ -499,6 +638,102 @@ namespace ojph { } } + + + + + ////////////////////////////////////////////////////////////////////////// + void gen_irv_vert_syn_step(const lifting_step* s, line_buf* aug, + const line_buf* sig, line_buf* other, + ui32 repeat) + { + float a = s->irv.Aatk; + + float* dst = aug->f32; + const float* src1 = sig->f32, * src2 = other->f32; + for (ui32 i = repeat; i > 0; --i) + *dst++ -= a * (*src1++ + *src2++); + } + + ////////////////////////////////////////////////////////////////////////// + void gen_irv_vert_syn_K(const float K, line_buf* aug, ui32 repeat) + { + float* dst = aug->f32; + for (ui32 i = repeat; i > 0; --i) + *dst++ *= K; + } + + ////////////////////////////////////////////////////////////////////////// + void gen_irv_horz_syn(const param_atk* atk, line_buf* dst, line_buf* lsrc, + line_buf* hsrc, ui32 width, bool even) + { + if (width > 1) + { + bool ev = even; + float* oth = hsrc->f32, * aug = lsrc->f32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass + + { + float K = atk->get_K(); + float K_inv = 1.0f / K; + float* dp; + + dp = aug; + for (ui32 i = aug_width; i > 0; --i) + *dp++ *= K; + + dp = oth; + for (ui32 i = oth_width; i > 0; --i) + *dp++ *= K_inv; + } + + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) + { + // first lifting step + const lifting_step* s = atk->get_step(j - 1); + float a = s->irv.Aatk; + + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const float* sp = oth + (ev ? 0 : 1); + float* dp = aug; + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp -= a * (sp[-1] + sp[0]); + + // swap buffers + float* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; + } + + // combine both lsrc and hsrc into dst + float* sph = hsrc->f32; + float* spl = lsrc->f32; + float* dp = dst->f32; + ui32 w = width; + if (!even) + { *dp++ = *sph++; --w; } + for (; w > 1; w -= 2) + { *dp++ = *spl++; *dp++ = *sph++; } + if (w) + { *dp++ = *spl++; --w; } + } + else { + if (even) + dst->f32[0] = lsrc->f32[0]; + else + dst->f32[0] = hsrc->f32[0] * 0.5f; + } + } + + + + + #endif // !OJPH_ENABLE_WASM_SIMD } diff --git a/src/core/transform/ojph_transform.h b/src/core/transform/ojph_transform.h index 002235d3..77ede96f 100644 --- a/src/core/transform/ojph_transform.h +++ b/src/core/transform/ojph_transform.h @@ -44,6 +44,8 @@ namespace ojph { struct line_buf; namespace local { + union lifting_step; + struct param_atk; ////////////////////////////////////////////////////////////////////////// void init_wavelet_transform_functions(); @@ -80,6 +82,24 @@ namespace ojph { extern void (*rev_horz_wvlt_bwd_tx) (line_buf* dst, line_buf *lsrc, line_buf *hsrc, ui32 width, bool even); + + + + + ///////////////////////////////////////////////////////////////////////// + extern void (*rev_vert_syn_step) + (const lifting_step* s, line_buf* aug, const line_buf* sig, + line_buf* other, ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + extern void (*rev_horz_syn) + (const param_atk* atk, line_buf* dst, line_buf* lsrc, + line_buf* hsrc, ui32 width, bool even); + + + + + ///////////////////////////////////////////////////////////////////////// // Irreversible functions ///////////////////////////////////////////////////////////////////////// @@ -102,6 +122,28 @@ namespace ojph { extern void (*irrev_horz_wvlt_bwd_tx) (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even); + + + + + ///////////////////////////////////////////////////////////////////////// + extern void (*irv_vert_syn_step) + (const lifting_step* s, line_buf* aug, const line_buf* sig, + line_buf* other, ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + extern void (*irv_vert_syn_K) + (const float K, line_buf* aug, ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + extern void (*irv_horz_syn) + (const param_atk* atk, line_buf* dst, line_buf* lsrc, + line_buf* hsrc, ui32 width, bool even); + + + + + } } diff --git a/src/core/transform/ojph_transform_local.h b/src/core/transform/ojph_transform_local.h index 2bf041c8..42cec378 100644 --- a/src/core/transform/ojph_transform_local.h +++ b/src/core/transform/ojph_transform_local.h @@ -44,6 +44,7 @@ namespace ojph { struct line_buf; namespace local { + struct param_atk; ////////////////////////////////////////////////////////////////////////// struct LIFTING_FACTORS @@ -93,6 +94,23 @@ namespace ojph { void gen_rev_horz_wvlt_bwd_tx(line_buf* dst, line_buf *lsrc, line_buf *hsrc, ui32 width, bool even); + + + + + ///////////////////////////////////////////////////////////////////////// + void gen_rev_vert_syn_step(const lifting_step* s, line_buf* aug, + const line_buf* sig, line_buf* other, + ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void gen_rev_horz_syn(const param_atk *atk, line_buf* dst, line_buf *lsrc, + line_buf *hsrc, ui32 width, bool even); + + + + + ////////////////////////////////////////////////////////////////////////// // Irreversible functions ////////////////////////////////////////////////////////////////////////// @@ -113,6 +131,26 @@ namespace ojph { void gen_irrev_horz_wvlt_bwd_tx(line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even); + + + + + ///////////////////////////////////////////////////////////////////////// + void gen_irv_vert_syn_step(const lifting_step* s, line_buf* aug, + const line_buf* sig, line_buf* other, + ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void gen_irv_vert_syn_K(const float K, line_buf* aug, ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void gen_irv_horz_syn(const param_atk *atk, line_buf* dst, line_buf *lsrc, + line_buf *hsrc, ui32 width, bool even); + + + + + ////////////////////////////////////////////////////////////////////////// // // From 2c74db3ab52487b2d74c63e58bce5c89f0547e02 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Tue, 2 Apr 2024 21:16:57 +1100 Subject: [PATCH 11/37] Syntax error fix. --- src/core/codestream/ojph_params_local.h | 4 +-- src/core/codestream/ojph_resolution.cpp | 43 +------------------------ 2 files changed, 3 insertions(+), 44 deletions(-) diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index c08d750e..43c1181d 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -856,8 +856,8 @@ namespace ojph { const param_atk* get_atk(int index) const; const lifting_step* get_step(ui32 s) const { assert(s < Natk); return d + s; } - const ui32 get_num_steps() const { return Natk; } - const float get_K() const { return Katk; } + ui32 get_num_steps() const { return Natk; } + float get_K() const { return Katk; } private: // member variables ui16 Latk; // structure length diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index 105c57de..7f226445 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -38,6 +38,7 @@ #include #include +#include #include "ojph_mem.h" #include "ojph_params.h" @@ -54,48 +55,6 @@ namespace ojph { namespace local { - - ////////////////////////////////////////////////////////////////////////// - static void rotate_buffers(line_buf* line1, line_buf* line2, - line_buf* line3, line_buf* line4) - { - assert(line1->size == line2->size && - line1->pre_size == line2->pre_size && - line1->size == line3->size && - line1->pre_size == line3->pre_size && - line1->size == line4->size && - line1->pre_size == line4->pre_size); - si32* p = line4->i32; - line4->i32 = line3->i32; - line3->i32 = line2->i32; - line2->i32 = line1->i32; - line1->i32 = p; - } - - ////////////////////////////////////////////////////////////////////////// - static void rotate_buffers(line_buf* line1, line_buf* line2, - line_buf* line3, line_buf* line4, - line_buf* line5, line_buf* line6) - { - assert(line1->size == line2->size && - line1->pre_size == line2->pre_size && - line1->size == line3->size && - line1->pre_size == line3->pre_size && - line1->size == line4->size && - line1->pre_size == line4->pre_size && - line1->size == line5->size && - line1->pre_size == line5->pre_size && - line1->size == line6->size && - line1->pre_size == line6->pre_size); - si32* p = line6->i32; - line6->i32 = line5->i32; - line5->i32 = line4->i32; - line4->i32 = line3->i32; - line3->i32 = line2->i32; - line2->i32 = line1->i32; - line1->i32 = p; - } - ////////////////////////////////////////////////////////////////////////// void resolution::pre_alloc(codestream* codestream, const rect& res_rect, const rect& recon_res_rect, From be39386e13e426e5868fce6563f2520d6b4cd10a Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Thu, 4 Apr 2024 06:49:08 +1100 Subject: [PATCH 12/37] reversible analysis is working; irreversible not. More testing is needed. --- src/core/codestream/ojph_resolution.cpp | 430 ++++++++-------------- src/core/codestream/ojph_resolution.h | 2 +- src/core/transform/ojph_transform.cpp | 242 ++++++++++-- src/core/transform/ojph_transform.h | 41 ++- src/core/transform/ojph_transform_local.h | 39 +- 5 files changed, 442 insertions(+), 312 deletions(-) diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index 105c57de..f28cfd5d 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -407,277 +407,175 @@ namespace ojph { } } + ////////////////////////////////////////////////////////////////////////// + line_buf* resolution::get_line() + { + if (vert_even) + { + ++cur_line; + sig->active = true; + return sig->line; + } + else + { + ++cur_line; + aug->active = true; + return aug->line; + } + } + ////////////////////////////////////////////////////////////////////////// void resolution::push_line() { if (res_num == 0) { assert(num_bands == 1 && child_res == NULL); - bands[0].exchange_buf(ssp[0].line);//line at location 0 + bands[0].exchange_buf(vert_even ? sig->line : aug->line); bands[0].push_line(); return; } - //ui32 width = res_rect.siz.w; - //if (width == 0) - // return; - //if (reversible) - //{ - // //vertical transform - // assert(num_lines >= 4); - // if (vert_even) - // { - // rev_vert_wvlt_fwd_predict(lines, - // cur_line > 1 ? lines + 2 : lines, - // lines + 1, width); - // rev_vert_wvlt_fwd_update(lines + 1, - // cur_line > 2 ? lines + 3 : lines + 1, - // lines + 2, width); - - // // push to horizontal transform lines[2](L) and lines[1] (H) - // if (cur_line >= 1) - // { - // rev_horz_wvlt_fwd_tx(lines + 1, bands[2].get_line(), - // bands[3].get_line(), width, horz_even); - // bands[2].push_line(); - // bands[3].push_line(); - // } - // if (cur_line >= 2) - // { - // rev_horz_wvlt_fwd_tx(lines + 2, child_res->get_line(), - // bands[1].get_line(), width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // } - // } - - // if (cur_line >= res_rect.siz.h - 1) - // { //finished, so we need to process any lines left - // if (cur_line) - // { - // if (vert_even) - // { - // rev_vert_wvlt_fwd_update(lines + 1, lines + 1, - // lines, width); - // //push lines[0] to L - // rev_horz_wvlt_fwd_tx(lines, child_res->get_line(), - // bands[1].get_line(), width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // } - // else - // { - // rev_vert_wvlt_fwd_predict(lines + 1, lines + 1, - // lines, width); - // rev_vert_wvlt_fwd_update(lines, - // cur_line > 1 ? lines + 2 : lines, - // lines + 1, width); - - // // push to horizontal transform lines[1](L) and line[0] (H) - // //line[0] to H - // rev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), - // bands[3].get_line(), width, horz_even); - // bands[2].push_line(); - // bands[3].push_line(); - // //line[1] to L - // rev_horz_wvlt_fwd_tx(lines + 1, child_res->get_line(), - // bands[1].get_line(), width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // } - // } - // else - // { //only one line - // if (vert_even) - // { - // //push to L - // rev_horz_wvlt_fwd_tx(lines, child_res->get_line(), - // bands[1].get_line(), width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // } - // else - // { - // si32* sp = lines[0].i32; - // for (ui32 i = width; i > 0; --i) - // *sp++ <<= 1; - // //push to H - // rev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), - // bands[3].get_line(), width, horz_even); - // bands[2].push_line(); - // bands[3].push_line(); - // } - // } - // } - - // rotate_buffers(lines, lines + 1, lines + 2, lines + 3); - - // ++cur_line; - // vert_even = !vert_even; - //} - //else - //{ - // //vertical transform - // assert(num_lines >= 6); - // if (vert_even) - // { - // irrev_vert_wvlt_step(lines + 0, - // cur_line > 1 ? lines + 2 : lines, - // lines + 1, 0, width); - // irrev_vert_wvlt_step(lines + 1, - // cur_line > 2 ? lines + 3 : lines + 1, - // lines + 2, 1, width); - // irrev_vert_wvlt_step(lines + 2, - // cur_line > 3 ? lines + 4 : lines + 2, - // lines + 3, 2, width); - // irrev_vert_wvlt_step(lines + 3, - // cur_line > 4 ? lines + 5 : lines + 3, - // lines + 4, 3, width); - - // // push to horizontal transform lines[4](L) and lines[3] (H) - // if (cur_line >= 3) - // { - // irrev_vert_wvlt_K(lines + 3, lines + 5, - // false, width); - // irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), - // bands[3].get_line(), width, horz_even); - // bands[2].push_line(); - // bands[3].push_line(); - // } - // if (cur_line >= 4) - // { - // irrev_vert_wvlt_K(lines + 4, lines + 5, - // true, width); - // irrev_horz_wvlt_fwd_tx(lines + 5, child_res->get_line(), - // bands[1].get_line(), width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // } - // } - - // if (cur_line >= res_rect.siz.h - 1) - // { //finished, so we need to process any left line - // if (cur_line) - // { - // if (vert_even) - // { - // irrev_vert_wvlt_step(lines + 1, lines + 1, - // lines, 1, width); - // irrev_vert_wvlt_step(lines, - // cur_line > 1 ? lines + 2 : lines, - // lines + 1, 2, width); - // irrev_vert_wvlt_step(lines + 1, - // cur_line > 2 ? lines + 3 : lines + 1, - // lines + 2, 3, width); - // irrev_vert_wvlt_step(lines + 1, lines + 1, - // lines, 3, width); - // //push lines[2] to L, lines[1] to H, and lines[0] to L - // if (cur_line >= 2) - // { - // irrev_vert_wvlt_K(lines + 2, lines + 5, - // true, width); - // irrev_horz_wvlt_fwd_tx(lines + 5, - // child_res->get_line(), bands[1].get_line(), - // width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // } - // irrev_vert_wvlt_K(lines + 1, lines + 5, - // false, width); - // irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), - // bands[3].get_line(), width, horz_even); - // bands[2].push_line(); - // bands[3].push_line(); - // irrev_vert_wvlt_K(lines, lines + 5, - // true, width); - // irrev_horz_wvlt_fwd_tx(lines + 5, child_res->get_line(), - // bands[1].get_line(), width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // } - // else - // { - // irrev_vert_wvlt_step(lines + 1, lines + 1, - // lines, 0, width); - // irrev_vert_wvlt_step(lines, - // cur_line > 1 ? lines + 2 : lines, - // lines + 1, 1, width); - // irrev_vert_wvlt_step(lines + 1, - // cur_line > 2 ? lines + 3 : lines + 1, - // lines + 2, 2, width); - // irrev_vert_wvlt_step(lines + 2, - // cur_line > 3 ? lines + 4 : lines + 2, - // lines + 3, 3, width); - - // irrev_vert_wvlt_step(lines + 1, lines + 1, - // lines, 2, width); - // irrev_vert_wvlt_step(lines, - // cur_line > 1 ? lines + 2 : lines, - // lines + 1, 3, width); - - // //push lines[3] L, lines[2] H, lines[1] L, and lines[0] H - // if (cur_line >= 3) - // { - // irrev_vert_wvlt_K(lines + 3, lines + 5, - // true, width); - // irrev_horz_wvlt_fwd_tx(lines + 5, - // child_res->get_line(), bands[1].get_line(), - // width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // } - // if (cur_line >= 2) - // irrev_vert_wvlt_K(lines + 2, lines + 5, false, width); - // else - // irrev_vert_wvlt_K(lines, lines + 5, false, width); - // irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), - // bands[3].get_line(), width, horz_even); - // bands[2].push_line(); - // bands[3].push_line(); - // irrev_vert_wvlt_K(lines + 1, lines + 5, - // true, width); - // irrev_horz_wvlt_fwd_tx(lines + 5, child_res->get_line(), - // bands[1].get_line(), width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // irrev_vert_wvlt_K(lines, lines + 5, - // false, width); - // irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), - // bands[3].get_line(), width, horz_even); - // bands[2].push_line(); - // bands[3].push_line(); - // } - // } - // else - // { //only one line - // if (vert_even) - // { - // //push to L - // irrev_horz_wvlt_fwd_tx(lines, child_res->get_line(), - // bands[1].get_line(), width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // } - // else - // { - // float* sp = lines[0].f32; - // for (ui32 i = width; i > 0; --i) - // *sp++ *= 2.0f; - // //push to H - // irrev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), - // bands[3].get_line(), width, horz_even); - // bands[2].push_line(); - // bands[3].push_line(); - // } - // } - // } - - // rotate_buffers(lines, lines + 1, lines + 2, lines + 3, lines + 4, - // lines + 5); - - // ++cur_line; - // vert_even = !vert_even; - //} + ui32 width = res_rect.siz.w; + if (width == 0) + return; + if (reversible) + { + if (res_rect.siz.h > 1) + { + if (!vert_even && cur_line < res_rect.siz.h) { + vert_even = !vert_even; + return; + } + + bool finished; + do + { + //vertical transform + for (ui32 i = 0; i < num_steps; ++i) + { + if (aug->active && (sig->active || ssp[i].active)) + { + line_buf* dp = aug->line; + line_buf* sp1 = sig->active ? sig->line : ssp[i].line; + line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; + const lifting_step* s = atk->get_step(i); + rev_vert_ana_step(s, sp1, sp2, dp, width); + } + lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; + } + + finished = true; + if (aug->active) { + rev_horz_ana(atk, bands[2].get_line(), + bands[3].get_line(), aug->line, width, horz_even); + bands[2].push_line(); + bands[3].push_line(); + aug->active = false; + finished = false; + } + if (sig->active) { + rev_horz_ana(atk, child_res->get_line(), + bands[1].get_line(), sig->line, width, horz_even); + bands[1].push_line(); + child_res->push_line(); + sig->active = false; + finished = false; + }; + vert_even = !vert_even; + } while (cur_line >= res_rect.siz.h && !finished); + } + else + { + if (vert_even) { + rev_horz_ana(atk, child_res->get_line(), + bands[1].get_line(), sig->line, width, horz_even); + bands[1].push_line(); + child_res->push_line(); + } + else + { + si32* sp = aug->line->i32; + for (ui32 i = width; i > 0; --i) + *sp++ <<= 1; + rev_horz_ana(atk, bands[2].get_line(), + bands[3].get_line(), aug->line, width, horz_even); + bands[2].push_line(); + bands[3].push_line(); + } + } + } + else + { + if (res_rect.siz.h > 1) + { + if (!vert_even && cur_line < res_rect.siz.h) { + vert_even = !vert_even; + return; + } + + bool finished; + do + { + //vertical transform + for (ui32 i = 0; i < num_steps; ++i) + { + if (aug->active && (sig->active || ssp[i].active)) + { + line_buf* dp = aug->line; + line_buf* sp1 = sig->active ? sig->line : ssp[i].line; + line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; + const lifting_step* s = atk->get_step(i); + irv_vert_ana_step(s, sp1, sp2, dp, width); + } + lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; + } + + finished = true; + if (aug->active) { + const float K = atk->get_K(); + irv_vert_times_K(K, aug->line, width); + + irv_horz_ana(atk, bands[2].get_line(), + bands[3].get_line(), aug->line, width, horz_even); + bands[2].push_line(); + bands[3].push_line(); + aug->active = false; + finished = false; + } + if (sig->active) { + const float K_inv = 1.0f / atk->get_K(); + irv_vert_times_K(K_inv, sig->line, width); + + irv_horz_ana(atk, child_res->get_line(), + bands[1].get_line(), sig->line, width, horz_even); + bands[1].push_line(); + child_res->push_line(); + sig->active = false; + finished = false; + }; + vert_even = !vert_even; + } while (cur_line >= res_rect.siz.h && !finished); + } + else + { + if (vert_even) { + irv_horz_ana(atk, child_res->get_line(), + bands[1].get_line(), sig->line, width, horz_even); + bands[1].push_line(); + child_res->push_line(); + } + else + { + float* sp = aug->line->f32; + for (ui32 i = width; i > 0; --i) + *sp++ *= 2.0f; + irv_horz_ana(atk, bands[2].get_line(), + bands[3].get_line(), aug->line, width, horz_even); + bands[2].push_line(); + bands[3].push_line(); + } + } + } } ////////////////////////////////////////////////////////////////////////// @@ -730,8 +628,7 @@ namespace ojph { //vertical transform for (ui32 i = 0; i < num_steps; ++i) { - if (aug->active && - (sig->active == true || ssp[i].active == true)) + if (aug->active && (sig->active || ssp[i].active)) { line_buf* dp = aug->line; line_buf* sp1 = sig->active ? sig->line : ssp[i].line; @@ -790,7 +687,7 @@ namespace ojph { ++cur_line; const float K = atk->get_K(); - irv_vert_syn_K(K, aug->line, width); + irv_vert_times_K(K, aug->line, width); continue; } @@ -803,15 +700,14 @@ namespace ojph { ++cur_line; const float K_inv = 1.0f / atk->get_K(); - irv_vert_syn_K(K_inv, sig->line, width); + irv_vert_times_K(K_inv, sig->line, width); } } //vertical transform for (ui32 i = 0; i < num_steps; ++i) { - if (aug->active && - (sig->active == true || ssp[i].active == true)) + if (aug->active && (sig->active || ssp[i].active)) { line_buf* dp = aug->line; line_buf* sp1 = sig->active ? sig->line : ssp[i].line; diff --git a/src/core/codestream/ojph_resolution.h b/src/core/codestream/ojph_resolution.h index 7a7d43d5..36ae5d00 100644 --- a/src/core/codestream/ojph_resolution.h +++ b/src/core/codestream/ojph_resolution.h @@ -72,7 +72,7 @@ namespace ojph { tile_comp *parent_tile_comp, resolution *parent_res); - line_buf* get_line() { return ssp[0].line; } + line_buf* get_line(); void push_line(); line_buf* pull_line(); rect get_rect() { return res_rect; } diff --git a/src/core/transform/ojph_transform.cpp b/src/core/transform/ojph_transform.cpp index 46231d63..4f7f8cc1 100644 --- a/src/core/transform/ojph_transform.cpp +++ b/src/core/transform/ojph_transform.cpp @@ -87,15 +87,25 @@ namespace ojph { + ///////////////////////////////////////////////////////////////////////// + void (*rev_vert_ana_step) + (const lifting_step* s, const line_buf* sig, const line_buf* other, + const line_buf* aug, ui32 repeat) = NULL; + + ///////////////////////////////////////////////////////////////////////// + void (*rev_horz_ana) + (const param_atk* atk, const line_buf* ldst, const line_buf* hdst, + const line_buf* src, ui32 width, bool even) = NULL; + ///////////////////////////////////////////////////////////////////////// void (*rev_vert_syn_step) - (const lifting_step* s, line_buf* aug, const line_buf* sig, - line_buf* other, ui32 repeat) = NULL; + (const lifting_step* s, const line_buf* aug, const line_buf* sig, + const line_buf* other, ui32 repeat) = NULL; ///////////////////////////////////////////////////////////////////////// void (*rev_horz_syn) - (const param_atk* atk, line_buf* dst, line_buf* lsrc, - line_buf* hsrc, ui32 width, bool even) = NULL; + (const param_atk* atk, const line_buf* dst, const line_buf* lsrc, + const line_buf* hsrc, ui32 width, bool even) = NULL; @@ -130,17 +140,28 @@ namespace ojph { ///////////////////////////////////////////////////////////////////////// - void (*irv_vert_syn_step) - (const lifting_step* s, line_buf* aug, const line_buf* sig, - line_buf* other, ui32 repeat) = NULL; + void (*irv_vert_ana_step) + (const lifting_step* s, const line_buf* sig, const line_buf* other, + const line_buf* aug, ui32 repeat) = NULL; + + ///////////////////////////////////////////////////////////////////////// + void (*irv_horz_ana) + (const param_atk* atk, const line_buf* ldst, const line_buf* hdst, + const line_buf* src, ui32 width, bool even) = NULL; ///////////////////////////////////////////////////////////////////////// - void (*irv_vert_syn_K)(const float K, line_buf* aug, ui32 repeat) = NULL; + void (*irv_vert_syn_step) + (const lifting_step* s, const line_buf* aug, const line_buf* sig, + const line_buf* other, ui32 repeat) = NULL; ///////////////////////////////////////////////////////////////////////// void (*irv_horz_syn) - (const param_atk* atk, line_buf* dst, line_buf* lsrc, - line_buf* hsrc, ui32 width, bool even) = NULL; + (const param_atk* atk, const line_buf* dst, const line_buf* lsrc, + const line_buf* hsrc, ui32 width, bool even) = NULL; + + ///////////////////////////////////////////////////////////////////////// + void (*irv_vert_times_K) + (float K, const line_buf* aug, ui32 repeat) = NULL; @@ -164,6 +185,8 @@ namespace ojph { rev_vert_wvlt_bwd_update = gen_rev_vert_wvlt_bwd_update; rev_horz_wvlt_bwd_tx = gen_rev_horz_wvlt_bwd_tx; + rev_vert_ana_step = gen_rev_vert_ana_step; + rev_horz_ana = gen_rev_horz_ana; rev_vert_syn_step = gen_rev_vert_syn_step; rev_horz_syn = gen_rev_horz_syn; @@ -172,9 +195,11 @@ namespace ojph { irrev_horz_wvlt_fwd_tx = gen_irrev_horz_wvlt_fwd_tx; irrev_horz_wvlt_bwd_tx = gen_irrev_horz_wvlt_bwd_tx; + irv_vert_ana_step = gen_irv_vert_ana_step; + irv_horz_ana = gen_irv_horz_ana; irv_vert_syn_step = gen_irv_vert_syn_step; - irv_vert_syn_K = gen_irv_vert_syn_K; irv_horz_syn = gen_irv_horz_syn; + irv_vert_times_K = gen_irv_vert_times_K; #ifndef OJPH_DISABLE_INTEL_SIMD int level = get_cpu_ext_level(); @@ -378,9 +403,92 @@ namespace ojph { + ///////////////////////////////////////////////////////////////////////// + void gen_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat) + { + si32 a = s->rev.Aatk; + si32 b = s->rev.Batk; + ui32 e = s->rev.Eatk; + + si32* dst = aug->i32; + const si32* src1 = sig->i32, * src2 = other->i32; + if (a >= 0) + for (ui32 i = repeat; i > 0; --i) + *dst++ += (b + a * (*src1++ + *src2++)) >> e; + else + for (ui32 i = repeat; i > 0; --i) + *dst++ -= (b - a * (*src1++ + *src2++)) >> e; + } + + ///////////////////////////////////////////////////////////////////////// + void gen_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) + { + if (width > 1) + { + // combine both lsrc and hsrc into dst + si32* dph = hdst->i32; + si32* dpl = ldst->i32; + si32* sp = src->i32; + ui32 w = width; + if (!even) + { + *dph++ = *sp++; --w; + } + for (; w > 1; w -= 2) + { + *dpl++ = *sp++; *dph++ = *sp++; + } + if (w) + { + *dpl++ = *sp++; --w; + } + + si32* hp = hdst->i32, * lp = ldst->i32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) + { + // first lifting step + const lifting_step* s = atk->get_step(j); + si32 a = s->rev.Aatk; + si32 b = s->rev.Batk; + ui32 e = s->rev.Eatk; + + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const si32* sp = lp + (even ? 1 : 0); + si32* dp = hp; + if (a >= 0) + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp += (b + a * (sp[-1] + sp[0])) >> e; + else + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp -= (b - a * (sp[-1] + sp[0])) >> e; + + // swap buffers + si32* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; + } + } + else { + if (even) + ldst->i32[0] = src->i32[0]; + else + hdst->i32[0] = src->i32[0] << 1; + } + } + ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_syn_step(const lifting_step* s, line_buf* aug, - const line_buf* sig, line_buf* other, + void gen_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, ui32 repeat) { si32 a = s->rev.Aatk; @@ -398,8 +506,9 @@ namespace ojph { } ////////////////////////////////////////////////////////////////////////// - void gen_rev_horz_syn(const param_atk *atk, line_buf* dst, line_buf *lsrc, - line_buf *hsrc, ui32 width, bool even) + void gen_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) { if (width > 1) { @@ -643,8 +752,8 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// - void gen_irv_vert_syn_step(const lifting_step* s, line_buf* aug, - const line_buf* sig, line_buf* other, + void gen_irv_vert_ana_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, ui32 repeat) { float a = s->irv.Aatk; @@ -652,20 +761,100 @@ namespace ojph { float* dst = aug->f32; const float* src1 = sig->f32, * src2 = other->f32; for (ui32 i = repeat; i > 0; --i) - *dst++ -= a * (*src1++ + *src2++); + *dst++ += a * (*src1++ + *src2++); } + + ///////////////////////////////////////////////////////////////////////// + void gen_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) + { + if (width > 1) + { + // split src into ldst and hdst + float* dph = hdst->f32; + float* dpl = ldst->f32; + float* sp = src->f32; + ui32 w = width; + if (!even) + { + *dph++ = *sp++; --w; + } + for (; w > 1; w -= 2) + { + *dpl++ = *sp++; *dph++ = *sp++; + } + if (w) + { + *dpl++ = *sp++; --w; + } + + float* hp = hdst->f32, * lp = ldst->f32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) + { + // first lifting step + const lifting_step* s = atk->get_step(j); + float a = s->irv.Aatk; + + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const float* sp = lp + (even ? 1 : 0); + float* dp = hp; + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp += a * (sp[-1] + sp[0]); + + // swap buffers + float* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; + } + + { + float K = atk->get_K(); + float K_inv = 1.0f / K; + float* dp; + + dp = lp; + for (ui32 i = l_width; i > 0; --i) + *dp++ *= K_inv; + dp = hp; + for (ui32 i = h_width; i > 0; --i) + *dp++ *= K; + } + } + else { + if (even) + ldst->f32[0] = src->f32[0]; + else + hdst->f32[0] = src->f32[0] * 2.0f; + } + + + } + ////////////////////////////////////////////////////////////////////////// - void gen_irv_vert_syn_K(const float K, line_buf* aug, ui32 repeat) + void gen_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat) { + float a = s->irv.Aatk; + float* dst = aug->f32; + const float* src1 = sig->f32, * src2 = other->f32; for (ui32 i = repeat; i > 0; --i) - *dst++ *= K; + *dst++ -= a * (*src1++ + *src2++); } ////////////////////////////////////////////////////////////////////////// - void gen_irv_horz_syn(const param_atk* atk, line_buf* dst, line_buf* lsrc, - line_buf* hsrc, ui32 width, bool even) + void gen_irv_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) { if (width > 1) { @@ -691,7 +880,6 @@ namespace ojph { ui32 num_steps = atk->get_num_steps(); for (ui32 j = num_steps; j > 0; --j) { - // first lifting step const lifting_step* s = atk->get_step(j - 1); float a = s->irv.Aatk; @@ -730,7 +918,13 @@ namespace ojph { } } - + ////////////////////////////////////////////////////////////////////////// + void gen_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) + { + float* dst = aug->f32; + for (ui32 i = repeat; i > 0; --i) + *dst++ *= K; + } diff --git a/src/core/transform/ojph_transform.h b/src/core/transform/ojph_transform.h index 77ede96f..b31df0ef 100644 --- a/src/core/transform/ojph_transform.h +++ b/src/core/transform/ojph_transform.h @@ -85,16 +85,25 @@ namespace ojph { + ///////////////////////////////////////////////////////////////////////// + extern void (*rev_vert_ana_step) + (const lifting_step* s, const line_buf* sig, const line_buf* other, + const line_buf* aug, ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + extern void (*rev_horz_ana) + (const param_atk* atk, const line_buf* ldst, const line_buf* hdst, + const line_buf* src, ui32 width, bool even); ///////////////////////////////////////////////////////////////////////// extern void (*rev_vert_syn_step) - (const lifting_step* s, line_buf* aug, const line_buf* sig, - line_buf* other, ui32 repeat); + (const lifting_step* s, const line_buf* aug, const line_buf* sig, + const line_buf* other, ui32 repeat); ///////////////////////////////////////////////////////////////////////// extern void (*rev_horz_syn) - (const param_atk* atk, line_buf* dst, line_buf* lsrc, - line_buf* hsrc, ui32 width, bool even); + (const param_atk* atk, const line_buf* dst, const line_buf* lsrc, + const line_buf* hsrc, ui32 width, bool even); @@ -126,20 +135,30 @@ namespace ojph { + ///////////////////////////////////////////////////////////////////////// - extern void (*irv_vert_syn_step) - (const lifting_step* s, line_buf* aug, const line_buf* sig, - line_buf* other, ui32 repeat); + extern void (*irv_vert_ana_step) + (const lifting_step* s, const line_buf* sig, const line_buf* other, + const line_buf* aug, ui32 repeat); ///////////////////////////////////////////////////////////////////////// - extern void (*irv_vert_syn_K) - (const float K, line_buf* aug, ui32 repeat); + extern void (*irv_horz_ana) + (const param_atk* atk, const line_buf* ldst, const line_buf* hdst, + const line_buf* src, ui32 width, bool even); + + ///////////////////////////////////////////////////////////////////////// + extern void (*irv_vert_syn_step) + (const lifting_step* s, const line_buf* aug, const line_buf* sig, + const line_buf* other, ui32 repeat); ///////////////////////////////////////////////////////////////////////// extern void (*irv_horz_syn) - (const param_atk* atk, line_buf* dst, line_buf* lsrc, - line_buf* hsrc, ui32 width, bool even); + (const param_atk* atk, const line_buf* dst, const line_buf* lsrc, + const line_buf* hsrc, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + extern void (*irv_vert_times_K) + (float K, const line_buf* aug, ui32 repeat); diff --git a/src/core/transform/ojph_transform_local.h b/src/core/transform/ojph_transform_local.h index 42cec378..c484d279 100644 --- a/src/core/transform/ojph_transform_local.h +++ b/src/core/transform/ojph_transform_local.h @@ -99,13 +99,24 @@ namespace ojph { ///////////////////////////////////////////////////////////////////////// - void gen_rev_vert_syn_step(const lifting_step* s, line_buf* aug, - const line_buf* sig, line_buf* other, + void gen_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, ui32 repeat); ///////////////////////////////////////////////////////////////////////// - void gen_rev_horz_syn(const param_atk *atk, line_buf* dst, line_buf *lsrc, - line_buf *hsrc, ui32 width, bool even); + void gen_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); + + ///////////////////////////////////////////////////////////////////////// + void gen_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void gen_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even); @@ -134,18 +145,28 @@ namespace ojph { + ///////////////////////////////////////////////////////////////////////// + void gen_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void gen_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); ///////////////////////////////////////////////////////////////////////// - void gen_irv_vert_syn_step(const lifting_step* s, line_buf* aug, - const line_buf* sig, line_buf* other, + void gen_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, ui32 repeat); ///////////////////////////////////////////////////////////////////////// - void gen_irv_vert_syn_K(const float K, line_buf* aug, ui32 repeat); + void gen_irv_horz_syn(const param_atk *atk, const line_buf* dst, + const line_buf *lsrc, const line_buf *hsrc, + ui32 width, bool even); ///////////////////////////////////////////////////////////////////////// - void gen_irv_horz_syn(const param_atk *atk, line_buf* dst, line_buf *lsrc, - line_buf *hsrc, ui32 width, bool even); + void gen_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); From 12c3bf57624704daf3493e95eac40ae2327c3137 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Thu, 4 Apr 2024 16:04:52 +1100 Subject: [PATCH 13/37] Fixed 97 analysis. --- src/core/transform/ojph_transform.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/transform/ojph_transform.cpp b/src/core/transform/ojph_transform.cpp index 4f7f8cc1..028ac013 100644 --- a/src/core/transform/ojph_transform.cpp +++ b/src/core/transform/ojph_transform.cpp @@ -752,8 +752,8 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// - void gen_irv_vert_ana_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, + void gen_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, ui32 repeat) { float a = s->irv.Aatk; From 4dc10b6abb0e0379ef24e5d79a2e3d598a2fe2b3 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 5 Apr 2024 06:54:32 +1100 Subject: [PATCH 14/37] A bug fix. Still buggy. --- src/core/codestream/ojph_codeblock.cpp | 12 +++-- src/core/codestream/ojph_resolution.cpp | 23 ++++---- src/core/codestream/ojph_resolution.h | 1 + tests/test_executables.cpp | 70 ++++++++++++++++++++++++- tests/test_helpers/ht_cmdlines.txt | 48 +++++++++-------- 5 files changed, 116 insertions(+), 38 deletions(-) diff --git a/src/core/codestream/ojph_codeblock.cpp b/src/core/codestream/ojph_codeblock.cpp index a95cbef5..25bdc2ae 100644 --- a/src/core/codestream/ojph_codeblock.cpp +++ b/src/core/codestream/ojph_codeblock.cpp @@ -150,12 +150,14 @@ namespace ojph { cb_size.w, cb_size.h, stride, stripe_causal); if (result == false) - { - if (resilient == true) - zero_block = true; - else - OJPH_ERROR(0x000300A1, "Error decoding a codeblock\n"); + { + if (resilient == true) { + OJPH_INFO(0x000300A1, "Error decoding a codeblock\n"); + zero_block = true; } + else + OJPH_ERROR(0x000300A1, "Error decoding a codeblock\n"); + } } else zero_block = true; diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index ff148400..c4507707 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -361,6 +361,7 @@ namespace ojph { aug->line->wrap(allocator->post_alloc_data(width, 1), width, 1); cur_line = 0; + rows_to_produce = res_rect.siz.h; vert_even = (res_rect.org.y & 1) == 0; horz_even = (res_rect.org.x & 1) == 0; } @@ -406,7 +407,6 @@ namespace ojph { return; } - bool finished; do { //vertical transform @@ -423,14 +423,13 @@ namespace ojph { lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } - finished = true; if (aug->active) { rev_horz_ana(atk, bands[2].get_line(), bands[3].get_line(), aug->line, width, horz_even); bands[2].push_line(); bands[3].push_line(); aug->active = false; - finished = false; + --rows_to_produce; } if (sig->active) { rev_horz_ana(atk, child_res->get_line(), @@ -438,14 +437,15 @@ namespace ojph { bands[1].push_line(); child_res->push_line(); sig->active = false; - finished = false; + --rows_to_produce; }; vert_even = !vert_even; - } while (cur_line >= res_rect.siz.h && !finished); + } while (cur_line >= res_rect.siz.h && rows_to_produce > 0); } else { if (vert_even) { + // horizontal transform rev_horz_ana(atk, child_res->get_line(), bands[1].get_line(), sig->line, width, horz_even); bands[1].push_line(); @@ -453,9 +453,11 @@ namespace ojph { } else { + // vertical transform si32* sp = aug->line->i32; for (ui32 i = width; i > 0; --i) *sp++ <<= 1; + // horizontal transform rev_horz_ana(atk, bands[2].get_line(), bands[3].get_line(), aug->line, width, horz_even); bands[2].push_line(); @@ -472,7 +474,6 @@ namespace ojph { return; } - bool finished; do { //vertical transform @@ -489,7 +490,6 @@ namespace ojph { lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } - finished = true; if (aug->active) { const float K = atk->get_K(); irv_vert_times_K(K, aug->line, width); @@ -499,7 +499,7 @@ namespace ojph { bands[2].push_line(); bands[3].push_line(); aug->active = false; - finished = false; + --rows_to_produce; } if (sig->active) { const float K_inv = 1.0f / atk->get_K(); @@ -510,14 +510,15 @@ namespace ojph { bands[1].push_line(); child_res->push_line(); sig->active = false; - finished = false; + --rows_to_produce; }; vert_even = !vert_even; - } while (cur_line >= res_rect.siz.h && !finished); + } while (cur_line >= res_rect.siz.h && rows_to_produce > 0); } else { if (vert_even) { + // horizontal transform irv_horz_ana(atk, child_res->get_line(), bands[1].get_line(), sig->line, width, horz_even); bands[1].push_line(); @@ -525,9 +526,11 @@ namespace ojph { } else { + // vertical transform float* sp = aug->line->f32; for (ui32 i = width; i > 0; --i) *sp++ *= 2.0f; + // horizontal transform irv_horz_ana(atk, bands[2].get_line(), bands[3].get_line(), aug->line, width, horz_even); bands[2].push_line(); diff --git a/src/core/codestream/ojph_resolution.h b/src/core/codestream/ojph_resolution.h index 36ae5d00..72e0b91a 100644 --- a/src/core/codestream/ojph_resolution.h +++ b/src/core/codestream/ojph_resolution.h @@ -116,6 +116,7 @@ namespace ojph { param_dfs::dfs_dwt_type downsampling_style; //wavelet machinery ui32 cur_line; + ui32 rows_to_produce; bool vert_even, horz_even; mem_elastic_allocator *elastic; }; diff --git a/tests/test_executables.cpp b/tests/test_executables.cpp index 7e6a00cb..4c3a12b9 100644 --- a/tests/test_executables.cpp +++ b/tests/test_executables.cpp @@ -1015,6 +1015,40 @@ TEST(TestExecutables, SimpleEncIrv9732x128) { "Malamute.ppm", "", 3, mse, pae); } +/////////////////////////////////////////////////////////////////////////////// +// Test ojph_compress with codeblocks when the irv97 wavelet is used. +// We test by comparing MSE and PAE of decoded images. +// The compressed file is obtained using these command-line options: +// -o simple_enc_irv97_64x64_tiles_33x33_d5.j2c -qstep 0.01 -tile_size {33,33} +// -num_decomps 5 +TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D5) { + double mse[3] = { 46.2004, 43.622, 56.7452}; + int pae[3] = { 48, 46, 52}; + run_ojph_compress("Malamute.ppm", + "simple_enc_irv97_64x64_tiles_33x33_d5", "", "j2c", + "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 5"); + run_ojph_compress_expand("simple_enc_irv97_64x64_tiles_33x33_d5", "j2c", "ppm"); + run_mse_pae("simple_enc_irv97_64x64_tiles_33x33_d5", "ppm", + "Malamute.ppm", "", 3, mse, pae); +} + +/////////////////////////////////////////////////////////////////////////////// +// Test ojph_compress with codeblocks when the irv97 wavelet is used. +// We test by comparing MSE and PAE of decoded images. +// The compressed file is obtained using these command-line options: +// -o simple_enc_irv97_64x64_tiles_33x33_d6.j2c -qstep 0.01 -tile_size {33,33} +// -num_decomps 6 +TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D6) { + double mse[3] = { 46.2004, 43.622, 56.7452}; + int pae[3] = { 48, 46, 52}; + run_ojph_compress("Malamute.ppm", + "simple_enc_irv97_64x64_tiles_33x33_d6", "", "j2c", + "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 6"); + run_ojph_compress_expand("simple_enc_irv97_64x64_tiles_33x33_d6", "j2c", "ppm"); + run_mse_pae("simple_enc_irv97_64x64_tiles_33x33_d6", "ppm", + "Malamute.ppm", "", 3, mse, pae); +} + /////////////////////////////////////////////////////////////////////////////// // Test ojph_compress with codeblocks when the irv97 wavelet is used. // We test by comparing MSE and PAE of decoded images. @@ -1159,6 +1193,40 @@ TEST(TestExecutables, SimpleEncRev534x1024) { "Malamute.ppm", "", 3, mse, pae); } +/////////////////////////////////////////////////////////////////////////////// +// Test ojph_compress with codeblocks when the rev53 wavelet is used. +// We test by comparing MSE and PAE of decoded images. +// The compressed file is obtained using these command-line options: +// -o simple_enc_rev53_64x64_tiles_33x33.j2c -reversible true -tile_size +// {32,32} -num_decomps 5 +TEST(TestExecutables, SimpleEncRev5364x64Tiles33x33D5) { + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; + run_ojph_compress("Malamute.ppm", + "simple_enc_rev53_64x64_tiles_33x33_d5", "", "j2c", + "-reversible true -tile_size \"{32,32}\" -num_decomps 5"); + run_ojph_compress_expand("simple_enc_rev53_64x64_tiles_33x33_d5", "j2c", "ppm"); + run_mse_pae("simple_enc_rev53_64x64_tiles_33x33_d5", "ppm", + "Malamute.ppm", "", 3, mse, pae); +} + +/////////////////////////////////////////////////////////////////////////////// +// Test ojph_compress with codeblocks when the rev53 wavelet is used. +// We test by comparing MSE and PAE of decoded images. +// The compressed file is obtained using these command-line options: +// -o simple_enc_rev53_64x64_tiles_33x33.j2c -reversible true -tile_size +// {32,32} -num_decomps 6 +TEST(TestExecutables, SimpleEncRev5364x64Tiles33x33D6) { + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; + run_ojph_compress("Malamute.ppm", + "simple_enc_rev53_64x64_tiles_33x33_d6", "", "j2c", + "-reversible true -tile_size \"{32,32}\" -num_decomps 6"); + run_ojph_compress_expand("simple_enc_rev53_64x64_tiles_33x33_d6", "j2c", "ppm"); + run_mse_pae("simple_enc_rev53_64x64_tiles_33x33_d6", "ppm", + "Malamute.ppm", "", 3, mse, pae); +} + /////////////////////////////////////////////////////////////////////////////// // Test ojph_compress with codeblocks when the irv97 wavelet is used. // We test by comparing MSE and PAE of decoded images. @@ -1220,7 +1288,7 @@ TEST(TestExecutables, SimpleEncIrv97TallNarrow) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_tall_narrow1.j2c -image_offset {1,0} -qstep 0.1 TEST(TestExecutables, SimpleEncIrv97TallNarrow1) { - double mse[3] = { 100.905762, 76.113037, 72.834717}; + double mse[3] = { 100.906, 76.113, 72.8347}; int pae[3] = { 39, 35, 34}; run_ojph_compress("tall_narrow.ppm", "simple_enc_irv97_tall_narrow1", "", "j2c", diff --git a/tests/test_helpers/ht_cmdlines.txt b/tests/test_helpers/ht_cmdlines.txt index 55b8e865..c8590611 100644 --- a/tests/test_helpers/ht_cmdlines.txt +++ b/tests/test_helpers/ht_cmdlines.txt @@ -57,28 +57,32 @@ add_test(NAME simple_dec_rev53_64x64_16bit_gray COMMAND ${CMAKE_CURRENT_SOURCE_D # Encoding ############################################################# -add_test(NAME simple_enc_irv97_64x64 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_64x64.j2c -qstep 0.1" "-i simple_enc_irv97_64x64.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_64x64.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_32x32 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_32x32.j2c -qstep 0.01 -block_size \{32,32\}" "-i simple_enc_irv97_32x32.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_32x32.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_16x16 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_16x16.j2c -qstep 0.01 -block_size \{16,16\}" "-i simple_enc_irv97_16x16.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_16x16.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_4x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_4x4.j2c -qstep 0.01 -block_size \{4,4\}" "-i simple_enc_irv97_4x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_4x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_1024x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_1024x4.j2c -qstep 0.01 -block_size \{4,1024\}" "-i simple_enc_irv97_1024x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_1024x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_4x1024 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_4x1024.j2c -qstep 0.01 -block_size \{1024,4\}" "-i simple_enc_irv97_4x1024.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_4x1024.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_512x8 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_512x8.j2c -qstep 0.01 -block_size \{8,512\}" "-i simple_enc_irv97_512x8.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_512x8.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_8x512 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_8x512.j2c -qstep 0.01 -block_size \{512,8\}" "-i simple_enc_irv97_8x512.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_8x512.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_256x16 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_256x16.j2c -qstep 0.01 -block_size \{16,256\}" "-i simple_enc_irv97_256x16.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_256x16.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_16x256 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_16x256.j2c -qstep 0.01 -block_size \{256,16\}" "-i simple_enc_irv97_16x256.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_16x256.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_128x32 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_128x32.j2c -qstep 0.01 -block_size \{32,128\}" "-i simple_enc_irv97_128x32.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_128x32.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_32x128 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_32x128.j2c -qstep 0.01 -block_size \{128,32\}" "-i simple_enc_irv97_32x128.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_32x128.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_64x64_16bit COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_64x64_16bit.j2c -qstep 0.01" "-i simple_enc_irv97_64x64_16bit.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_64x64_16bit.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_64x64_16bit_gray COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.pgm -o simple_enc_irv97_64x64_16bit_gray.j2c -qstep 0.01" "-i simple_enc_irv97_64x64_16bit_gray.j2c -o test1.pgm -precise -quiet" "-i simple_enc_irv97_64x64_16bit_gray.j2c -o test2.pgm" "${images_folder}/mm.pgm" "test1.pgm" "test2.pgm") -add_test(NAME simple_enc_rev53_64x64_16bit COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64_16bit.j2c -reversible true" "-i simple_enc_rev53_64x64_16bit.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64_16bit.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_rev53_64x64_16bit_gray COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.pgm -o simple_enc_rev53_64x64_16bit_gray.j2c -reversible true" "-i simple_enc_rev53_64x64_16bit_gray.j2c -o test1.pgm -precise -quiet" "-i simple_enc_rev53_64x64_16bit_gray.j2c -o test2.pgm" "${images_folder}/mm.pgm" "test1.pgm" "test2.pgm") - -add_test(NAME simple_enc_rev53_64x64 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64.j2c -reversible true" "-i simple_enc_rev53_64x64.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_rev53_32x32 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_32x32.j2c -reversible true -block_size \{32,32\}" "-i simple_enc_rev53_32x32.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_32x32.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_rev53_4x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_4x4.j2c -reversible true -block_size \{4,4\}" "-i simple_enc_rev53_4x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_4x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_rev53_1024x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_1024x4.j2c -reversible true -block_size \{4,1024\}" "-i simple_enc_rev53_1024x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_1024x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_rev53_4x1024 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_4x1024.j2c -reversible true -block_size \{1024,4\}" "-i simple_enc_rev53_4x1024.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_4x1024.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_64x64 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_64x64.j2c -qstep 0.1" "-i simple_enc_irv97_64x64.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_64x64.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_32x32 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_32x32.j2c -qstep 0.01 -block_size \{32,32\}" "-i simple_enc_irv97_32x32.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_32x32.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_16x16 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_16x16.j2c -qstep 0.01 -block_size \{16,16\}" "-i simple_enc_irv97_16x16.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_16x16.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_4x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_4x4.j2c -qstep 0.01 -block_size \{4,4\}" "-i simple_enc_irv97_4x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_4x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_1024x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_1024x4.j2c -qstep 0.01 -block_size \{4,1024\}" "-i simple_enc_irv97_1024x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_1024x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_4x1024 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_4x1024.j2c -qstep 0.01 -block_size \{1024,4\}" "-i simple_enc_irv97_4x1024.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_4x1024.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_512x8 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_512x8.j2c -qstep 0.01 -block_size \{8,512\}" "-i simple_enc_irv97_512x8.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_512x8.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_8x512 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_8x512.j2c -qstep 0.01 -block_size \{512,8\}" "-i simple_enc_irv97_8x512.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_8x512.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_256x16 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_256x16.j2c -qstep 0.01 -block_size \{16,256\}" "-i simple_enc_irv97_256x16.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_256x16.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_16x256 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_16x256.j2c -qstep 0.01 -block_size \{256,16\}" "-i simple_enc_irv97_16x256.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_16x256.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_128x32 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_128x32.j2c -qstep 0.01 -block_size \{32,128\}" "-i simple_enc_irv97_128x32.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_128x32.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_32x128 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_32x128.j2c -qstep 0.01 -block_size \{128,32\}" "-i simple_enc_irv97_32x128.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_32x128.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_64x64_tiles_33x33_d5 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_64x64_tiles_33x33_d5.j2c -qstep 0.01 -tile_size \{33,33\} -num_decomps 5" "-i simple_enc_irv97_64x64_tiles_33x33_d5.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_64x64_tiles_33x33_d5.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_64x64_tiles_33x33_d6 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_64x64_tiles_33x33_d6.j2c -qstep 0.01 -tile_size \{33,33\} -num_decomps 6" "-i simple_enc_irv97_64x64_tiles_33x33_d6.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_64x64_tiles_33x33_d6.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_64x64_16bit COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_64x64_16bit.j2c -qstep 0.01" "-i simple_enc_irv97_64x64_16bit.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_64x64_16bit.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_64x64_16bit_gray COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.pgm -o simple_enc_irv97_64x64_16bit_gray.j2c -qstep 0.01" "-i simple_enc_irv97_64x64_16bit_gray.j2c -o test1.pgm -precise -quiet" "-i simple_enc_irv97_64x64_16bit_gray.j2c -o test2.pgm" "${images_folder}/mm.pgm" "test1.pgm" "test2.pgm") +add_test(NAME simple_enc_rev53_64x64_16bit COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64_16bit.j2c -reversible true" "-i simple_enc_rev53_64x64_16bit.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64_16bit.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_rev53_64x64_16bit_gray COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.pgm -o simple_enc_rev53_64x64_16bit_gray.j2c -reversible true" "-i simple_enc_rev53_64x64_16bit_gray.j2c -o test1.pgm -precise -quiet" "-i simple_enc_rev53_64x64_16bit_gray.j2c -o test2.pgm" "${images_folder}/mm.pgm" "test1.pgm" "test2.pgm") + +add_test(NAME simple_enc_rev53_64x64 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64.j2c -reversible true" "-i simple_enc_rev53_64x64.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_rev53_32x32 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_32x32.j2c -reversible true -block_size \{32,32\}" "-i simple_enc_rev53_32x32.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_32x32.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_rev53_4x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_4x4.j2c -reversible true -block_size \{4,4\}" "-i simple_enc_rev53_4x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_4x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_rev53_1024x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_1024x4.j2c -reversible true -block_size \{4,1024\}" "-i simple_enc_rev53_1024x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_1024x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_rev53_4x1024 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_4x1024.j2c -reversible true -block_size \{1024,4\}" "-i simple_enc_rev53_4x1024.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_4x1024.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_rev53_64x64_tiles_33x33_d5 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64_tiles_33x33.j2c -reversible true -tile_size \{32,32\} -num_decomps 5" "-i simple_enc_rev53_64x64_tiles_33x33.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_rev53_64x64_tiles_33x33_d6 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64_tiles_33x33.j2c -reversible true -tile_size \{32,32\} -num_decomps 6" "-i simple_enc_rev53_64x64_tiles_33x33.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") add_test(NAME simple_enc_irv97_64x64_yuv COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom_yuv.sh -enc "-i ${images_folder}/foreman_420.yuv -o simple_enc_irv97_64x64_yuv.j2c -qstep 0.1 -dims \{352,288\} -num_comps 3 -downsamp \{1,1\},\{2,2\},\{2,2\} -bit_depth 8,8,8 -signed false,false,false" "-i simple_enc_irv97_64x64_yuv.j2c -o test1y.rawl,test1u.rawl,test1v.rawl -precise -quiet" "-i simple_enc_irv97_64x64_yuv.j2c -o test2.yuv" "${images_folder}/foreman_420.yuv:352x288x8x420" "test1.yuv:352x288x8x420" "test2.yuv:352x288x8x420") add_test(NAME simple_enc_rev53_64x64_yuv COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom_yuv.sh -renc "-i ${images_folder}/foreman_420.yuv -o simple_enc_rev53_64x64_yuv.j2c -reversible true -qstep 0.1 -dims \{352,288\} -num_comps 3 -downsamp \{1,1\},\{2,2\},\{2,2\} -bit_depth 8,8,8 -signed false,false,false" "-i simple_enc_rev53_64x64_yuv.j2c -o test1y.rawl,test1u.rawl,test1v.rawl -precise -quiet" "-i simple_enc_rev53_64x64_yuv.j2c -o test2.yuv" "${images_folder}/foreman_420.yuv:352x288x8x420" "test1.yuv:352x288x8x420" "test2.yuv:352x288x8x420") From 9846f01b5d40a38116cd871754678a110d9d837b Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 5 Apr 2024 14:07:32 +1100 Subject: [PATCH 15/37] Small touchup for error messages --- src/core/codestream/ojph_codeblock.cpp | 4 ++-- src/core/coding/ojph_block_decoder.cpp | 12 ++++++------ src/core/coding/ojph_block_decoder_ssse3.cpp | 12 ++++++------ 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/core/codestream/ojph_codeblock.cpp b/src/core/codestream/ojph_codeblock.cpp index 25bdc2ae..9a63ca19 100644 --- a/src/core/codestream/ojph_codeblock.cpp +++ b/src/core/codestream/ojph_codeblock.cpp @@ -152,11 +152,11 @@ namespace ojph { if (result == false) { if (resilient == true) { - OJPH_INFO(0x000300A1, "Error decoding a codeblock\n"); + OJPH_INFO(0x000300A1, "Error decoding a codeblock"); zero_block = true; } else - OJPH_ERROR(0x000300A1, "Error decoding a codeblock\n"); + OJPH_ERROR(0x000300A1, "Error decoding a codeblock"); } } else diff --git a/src/core/coding/ojph_block_decoder.cpp b/src/core/coding/ojph_block_decoder.cpp index 9a121876..5be5430a 100644 --- a/src/core/coding/ojph_block_decoder.cpp +++ b/src/core/coding/ojph_block_decoder.cpp @@ -753,14 +753,14 @@ namespace ojph { { OJPH_WARN(0x00010001, "A malformed codeblock that has more than " "one coding pass, but zero length for " - "2nd and potential 3rd pass.\n"); + "2nd and potential 3rd pass"); num_passes = 1; } if (num_passes > 3) { OJPH_WARN(0x00010002, "We do not support more than 3 coding passes; " - "This codeblocks has %d passes.\n", + "This codeblocks has %d passes", num_passes); return false; } @@ -772,7 +772,7 @@ namespace ojph { insufficient_precision = true; OJPH_WARN(0x00010003, "32 bits are not enough to decode this " "codeblock. This message will not be " - "displayed again.\n"); + "displayed again"); } return false; } @@ -783,7 +783,7 @@ namespace ojph { OJPH_WARN(0x00010004, "Not enough precision to decode the cleanup " "pass. The code can be modified to support " "this case. This message will not be " - "displayed again.\n"); + "displayed again"); } return false; // 32 bits are not enough to decode this } @@ -796,7 +796,7 @@ namespace ojph { OJPH_WARN(0x00010005, "Not enough precision to decode the SgnProp " "nor MagRef passes; both will be skipped. " "This message will not be displayed " - "again.\n"); + "again"); } } } @@ -806,7 +806,7 @@ namespace ojph { if (lengths1 < 2) { - OJPH_WARN(0x00010006, "Wrong codeblock length.\n"); + OJPH_WARN(0x00010006, "Wrong codeblock length"); return false; } diff --git a/src/core/coding/ojph_block_decoder_ssse3.cpp b/src/core/coding/ojph_block_decoder_ssse3.cpp index a8f89138..99ae38cb 100644 --- a/src/core/coding/ojph_block_decoder_ssse3.cpp +++ b/src/core/coding/ojph_block_decoder_ssse3.cpp @@ -1033,14 +1033,14 @@ namespace ojph { { OJPH_WARN(0x00010001, "A malformed codeblock that has more than " "one coding pass, but zero length for " - "2nd and potential 3rd pass.\n"); + "2nd and potential 3rd pass"); num_passes = 1; } if (num_passes > 3) { OJPH_WARN(0x00010002, "We do not support more than 3 coding passes; " - "This codeblocks has %d passes.\n", + "This codeblocks has %d passes", num_passes); return false; } @@ -1052,7 +1052,7 @@ namespace ojph { insufficient_precision = true; OJPH_WARN(0x00010003, "32 bits are not enough to decode this " "codeblock. This message will not be " - "displayed again.\n"); + "displayed again"); } return false; } @@ -1063,7 +1063,7 @@ namespace ojph { OJPH_WARN(0x00010004, "Not enough precision to decode the cleanup " "pass. The code can be modified to support " "this case. This message will not be " - "displayed again.\n"); + "displayed again"); } return false; // 32 bits are not enough to decode this } @@ -1076,7 +1076,7 @@ namespace ojph { OJPH_WARN(0x00010005, "Not enough precision to decode the SgnProp " "nor MagRef passes; both will be skipped. " "This message will not be displayed " - "again.\n"); + "again"); } } } @@ -1086,7 +1086,7 @@ namespace ojph { if (lengths1 < 2) { - OJPH_WARN(0x00010006, "Wrong codeblock length.\n"); + OJPH_WARN(0x00010006, "Wrong codeblock length"); return false; } From 86b139d62f6246ca686801a7da43d198b82e02db Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 6 Apr 2024 09:13:39 +1100 Subject: [PATCH 16/37] This is a very important bug fix -- Empty subbands/precincts. --- src/core/codestream/ojph_precinct.cpp | 24 ++++++++++++++++++++++++ src/core/codestream/ojph_precinct.h | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/core/codestream/ojph_precinct.cpp b/src/core/codestream/ojph_precinct.cpp index 71b61de8..f8d69fbe 100644 --- a/src/core/codestream/ojph_precinct.cpp +++ b/src/core/codestream/ojph_precinct.cpp @@ -332,6 +332,30 @@ namespace ojph { if (may_use_sop) bb_skip_sop(&bb); + if (num_bands == 3) + { + if (bands[1].empty && bands[2].empty && bands[3].empty) + { + ui32 bit = 0; + bb_read_bit(&bb, bit); + bb_terminate(&bb, uses_eph); + assert(bit == 0); + return; + } + } + else + { + if (bands[0].empty) + { + ui32 bit = 0; + bb_read_bit(&bb, bit); + bb_terminate(&bb, uses_eph); + assert(bit == 0); + return; + } + } + + int sst = num_bands == 3 ? 1 : 0; int send = num_bands == 3 ? 4 : 1; bool empty_packet = true; diff --git a/src/core/codestream/ojph_precinct.h b/src/core/codestream/ojph_precinct.h index 4641ed68..d8e880a9 100644 --- a/src/core/codestream/ojph_precinct.h +++ b/src/core/codestream/ojph_precinct.h @@ -69,7 +69,7 @@ namespace ojph { ui32& data_left, infile_base *file, bool skipped); ui8 *scratch; - point img_point; //the precinct projected to full resolution + point img_point; //the precinct projected to full resolution rect cb_idxs[4]; //indices of codeblocks subband *bands; //the subbands coded_lists* coded; From 55993264b15ee2efba172d40d6e626e5c6f2ff06 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 6 Apr 2024 14:33:56 +1100 Subject: [PATCH 17/37] A small improvement. --- src/core/codestream/ojph_precinct.cpp | 27 ++++++------------------- src/core/codestream/ojph_resolution.cpp | 2 ++ src/core/codestream/ojph_subband.h | 2 ++ 3 files changed, 10 insertions(+), 21 deletions(-) diff --git a/src/core/codestream/ojph_precinct.cpp b/src/core/codestream/ojph_precinct.cpp index f8d69fbe..b7e25aa0 100644 --- a/src/core/codestream/ojph_precinct.cpp +++ b/src/core/codestream/ojph_precinct.cpp @@ -332,30 +332,15 @@ namespace ojph { if (may_use_sop) bb_skip_sop(&bb); - if (num_bands == 3) + if (bands[0].empty && bands[1].empty && bands[2].empty && bands[3].empty) { - if (bands[1].empty && bands[2].empty && bands[3].empty) - { - ui32 bit = 0; - bb_read_bit(&bb, bit); - bb_terminate(&bb, uses_eph); - assert(bit == 0); - return; - } - } - else - { - if (bands[0].empty) - { - ui32 bit = 0; - bb_read_bit(&bb, bit); - bb_terminate(&bb, uses_eph); - assert(bit == 0); - return; - } + ui32 bit = 0; + bb_read_bit(&bb, bit); + bb_terminate(&bb, uses_eph); + assert(bit == 0); + return; } - int sst = num_bands == 3 ? 1 : 0; int send = num_bands == 3 ? 4 : 1; bool empty_packet = true; diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index c4507707..a0413b76 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -241,6 +241,8 @@ namespace ojph { ui32 trx1 = res_rect.org.x + res_rect.siz.w; ui32 try1 = res_rect.org.y + res_rect.siz.h; bands = allocator->post_alloc_obj(4); + for (int i = 0; i < 4; ++i) + new (bands + i) subband; if (res_num > 0) { this->num_bands = 3; diff --git a/src/core/codestream/ojph_subband.h b/src/core/codestream/ojph_subband.h index 9928c5ef..34cc7396 100644 --- a/src/core/codestream/ojph_subband.h +++ b/src/core/codestream/ojph_subband.h @@ -63,6 +63,8 @@ namespace ojph { { friend struct precinct; public: + subband() { memset(this, 0, sizeof(subband)); empty = true; } + static void pre_alloc(codestream *codestream, const rect& band_rect, ui32 comp_num, ui32 res_num); void finalize_alloc(codestream *codestream, const rect& band_rect, From 0e0d41ddd4d3770df81fd3e71f1091af4d7ae9bb Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 6 Apr 2024 15:42:00 +1100 Subject: [PATCH 18/37] Corrected Tests. --- src/core/codestream/ojph_precinct.cpp | 11 +- src/core/codestream/ojph_subband.h | 20 +- tests/test_executables.cpp | 744 +++++++++++++------------- tests/test_helpers/ht_cmdlines.txt | 4 +- 4 files changed, 399 insertions(+), 380 deletions(-) diff --git a/src/core/codestream/ojph_precinct.cpp b/src/core/codestream/ojph_precinct.cpp index b7e25aa0..c20c8589 100644 --- a/src/core/codestream/ojph_precinct.cpp +++ b/src/core/codestream/ojph_precinct.cpp @@ -341,11 +341,12 @@ namespace ojph { return; } - int sst = num_bands == 3 ? 1 : 0; - int send = num_bands == 3 ? 4 : 1; bool empty_packet = true; - for (int s = sst; s < send; ++s) + for (int s = 0; s < 4; ++s) { + if (bands[s].empty) + continue; + if (cb_idxs[s].siz.w == 0 || cb_idxs[s].siz.h == 0) continue; @@ -505,8 +506,10 @@ namespace ojph { } bb_terminate(&bb, uses_eph); //read codeblock data - for (int s = sst; s < send; ++s) + for (int s = 0; s < 4; ++s) { + if (bands[s].empty) + continue; ui32 band_width = bands[s].num_blocks.w; ui32 width = cb_idxs[s].siz.w; ui32 height = cb_idxs[s].siz.h; diff --git a/src/core/codestream/ojph_subband.h b/src/core/codestream/ojph_subband.h index 34cc7396..5dd145e6 100644 --- a/src/core/codestream/ojph_subband.h +++ b/src/core/codestream/ojph_subband.h @@ -63,7 +63,22 @@ namespace ojph { { friend struct precinct; public: - subband() { memset(this, 0, sizeof(subband)); empty = true; } + subband() { + res_num = band_num = 0; + reversible = false; + empty = true; // <---- true + lines = NULL; + parent = NULL; + blocks = NULL; + xcb_prime = ycb_prime = 0; + cur_cb_row = 0; + cur_line = 0; + cur_cb_height = 0; + delta = delta_inv = 0.0f; + K_max = 0; + coded_cbs = NULL; + elastic = NULL; + } static void pre_alloc(codestream *codestream, const rect& band_rect, ui32 comp_num, ui32 res_num); @@ -80,9 +95,10 @@ namespace ojph { line_buf* pull_line(); private: + bool empty; // true if the subband has no pixels or + // the subband is NOT USED ui32 res_num, band_num; bool reversible; - bool empty; rect band_rect; line_buf *lines; resolution* parent; diff --git a/tests/test_executables.cpp b/tests/test_executables.cpp index 4c3a12b9..f42174f6 100644 --- a/tests/test_executables.cpp +++ b/tests/test_executables.cpp @@ -44,7 +44,7 @@ // STATIC ojph_popen //////////////////////////////////////////////////////////////////////////////// static inline -FILE *ojph_popen(const char *command, const char *modes) +FILE* ojph_popen(const char* command, const char* modes) { #ifdef OJPH_COMPILER_MSVC return _popen(command, modes); @@ -57,7 +57,7 @@ FILE *ojph_popen(const char *command, const char *modes) // STATIC ojph_pclose //////////////////////////////////////////////////////////////////////////////// static inline -int ojph_pclose(FILE *stream) +int ojph_pclose(FILE* stream) { #ifdef OJPH_COMPILER_MSVC return _pclose(stream); @@ -69,16 +69,16 @@ int ojph_pclose(FILE *stream) //////////////////////////////////////////////////////////////////////////////// // STATIC execute //////////////////////////////////////////////////////////////////////////////// -static -int execute(const std::string& cmd, std::string& result) +static +int execute(const std::string& cmd, std::string& result) { std::array buffer; result.clear(); FILE* pipe = ojph_popen(cmd.c_str(), "r"); - if (!pipe) + if (!pipe) throw std::runtime_error("ojph_popen() failed!"); - + while (!feof(pipe)) if (fgets(buffer.data(), 128, pipe) != nullptr) result += buffer.data(); @@ -94,21 +94,21 @@ int execute(const std::string& cmd, std::string& result) //////////////////////////////////////////////////////////////////////////////// #ifdef OJPH_OS_WINDOWS - #define SRC_FILE_DIR ".\\jp2k_test_codestreams\\openjph\\" - #define OUT_FILE_DIR ".\\" - #define REF_FILE_DIR ".\\jp2k_test_codestreams\\openjph\\references\\" - #define MSE_PAE_PATH ".\\mse_pae" - #define COMPARE_FILES_PATH ".\\compare_files" - #define EXPAND_EXECUTABLE ".\\ojph_expand.exe" - #define COMPRESS_EXECUTABLE ".\\ojph_compress.exe" +#define SRC_FILE_DIR ".\\jp2k_test_codestreams\\openjph\\" +#define OUT_FILE_DIR ".\\" +#define REF_FILE_DIR ".\\jp2k_test_codestreams\\openjph\\references\\" +#define MSE_PAE_PATH ".\\mse_pae" +#define COMPARE_FILES_PATH ".\\compare_files" +#define EXPAND_EXECUTABLE ".\\ojph_expand.exe" +#define COMPRESS_EXECUTABLE ".\\ojph_compress.exe" #else - #define SRC_FILE_DIR "./jp2k_test_codestreams/openjph/" - #define OUT_FILE_DIR "./" - #define REF_FILE_DIR "./jp2k_test_codestreams/openjph/references/" - #define MSE_PAE_PATH "./mse_pae" - #define COMPARE_FILES_PATH "./compare_files" - #define EXPAND_EXECUTABLE "./ojph_expand" - #define COMPRESS_EXECUTABLE "./ojph_compress" +#define SRC_FILE_DIR "./jp2k_test_codestreams/openjph/" +#define OUT_FILE_DIR "./" +#define REF_FILE_DIR "./jp2k_test_codestreams/openjph/references/" +#define MSE_PAE_PATH "./mse_pae" +#define COMPARE_FILES_PATH "./compare_files" +#define EXPAND_EXECUTABLE "./ojph_expand" +#define COMPRESS_EXECUTABLE "./ojph_compress" #endif #define TOL_DOUBLE 0.01 #define TOL_INTEGER 1 @@ -116,22 +116,22 @@ int execute(const std::string& cmd, std::string& result) //////////////////////////////////////////////////////////////////////////////// // run_ojph_compress //////////////////////////////////////////////////////////////////////////////// -void run_ojph_compress(const std::string& ref_filename, - const std::string& base_filename, - const std::string& extended_base_fname, - const std::string& out_ext, - const std::string& extra_options) +void run_ojph_compress(const std::string& ref_filename, + const std::string& base_filename, + const std::string& extended_base_fname, + const std::string& out_ext, + const std::string& extra_options) { try { std::string result, command; - command = std::string(COMPRESS_EXECUTABLE) + command = std::string(COMPRESS_EXECUTABLE) + " -i " + REF_FILE_DIR + ref_filename - + " -o " + OUT_FILE_DIR + base_filename + extended_base_fname + + + " -o " + OUT_FILE_DIR + base_filename + extended_base_fname + "." + out_ext + " " + extra_options; std::cerr << command << std::endl; EXPECT_EQ(execute(command, result), 0); } - catch(const std::runtime_error& error) { + catch (const std::runtime_error& error) { FAIL() << error.what(); } } @@ -139,18 +139,18 @@ void run_ojph_compress(const std::string& ref_filename, //////////////////////////////////////////////////////////////////////////////// // run_ojph_expand //////////////////////////////////////////////////////////////////////////////// -void run_ojph_expand(const std::string& base_filename, - const std::string& src_ext, - const std::string& out_ext) +void run_ojph_expand(const std::string& base_filename, + const std::string& src_ext, + const std::string& out_ext) { try { std::string result, command; - command = std::string(EXPAND_EXECUTABLE) + command = std::string(EXPAND_EXECUTABLE) + " -i " + SRC_FILE_DIR + base_filename + "." + src_ext + " -o " + OUT_FILE_DIR + base_filename + "." + out_ext; EXPECT_EQ(execute(command, result), 0); } - catch(const std::runtime_error& error) { + catch (const std::runtime_error& error) { FAIL() << error.what(); } } @@ -158,34 +158,34 @@ void run_ojph_expand(const std::string& base_filename, //////////////////////////////////////////////////////////////////////////////// // run_ojph_compress //////////////////////////////////////////////////////////////////////////////// -void run_ojph_compress_expand(const std::string& base_filename, - const std::string& out_ext, - const std::string& decode_ext) +void run_ojph_compress_expand(const std::string& base_filename, + const std::string& out_ext, + const std::string& decode_ext) { try { std::string result, command; - command = std::string(EXPAND_EXECUTABLE) + command = std::string(EXPAND_EXECUTABLE) + " -i " + OUT_FILE_DIR + base_filename + "." + out_ext + " -o " + OUT_FILE_DIR + base_filename + "." + decode_ext; EXPECT_EQ(execute(command, result), 0); } - catch(const std::runtime_error& error) { + catch (const std::runtime_error& error) { FAIL() << error.what(); - } + } } //////////////////////////////////////////////////////////////////////////////// // run_mse_pae //////////////////////////////////////////////////////////////////////////////// -void run_mse_pae(const std::string& base_filename, - const std::string& out_ext, - const std::string& ref_filename, - const std::string& yuv_specs, - int num_components, double* mse, int* pae) +void run_mse_pae(const std::string& base_filename, + const std::string& out_ext, + const std::string& ref_filename, + const std::string& yuv_specs, + int num_components, double* mse, int* pae) { try { std::string result, command; - command = std::string(MSE_PAE_PATH) + command = std::string(MSE_PAE_PATH) + " " + OUT_FILE_DIR + base_filename + "." + out_ext + yuv_specs + " " + REF_FILE_DIR + ref_filename + yuv_specs; EXPECT_EQ(execute(command, result), 0); @@ -214,7 +214,7 @@ void run_mse_pae(const std::string& base_filename, ++pos; } } - catch(const std::runtime_error& error) { + catch (const std::runtime_error& error) { FAIL() << error.what(); } } @@ -222,20 +222,20 @@ void run_mse_pae(const std::string& base_filename, //////////////////////////////////////////////////////////////////////////////// // compare_files //////////////////////////////////////////////////////////////////////////////// -void compare_files(const std::string& base_filename, - const std::string& extended_base_fname, - const std::string& ext) +void compare_files(const std::string& base_filename, + const std::string& extended_base_fname, + const std::string& ext) { try { std::string result, command; - command = std::string(COMPARE_FILES_PATH) + command = std::string(COMPARE_FILES_PATH) + " " + OUT_FILE_DIR + base_filename + extended_base_fname + "." + ext + " " + SRC_FILE_DIR + base_filename + "." + ext; EXPECT_EQ(execute(command, result), 0); } - catch(const std::runtime_error& error) { + catch (const std::runtime_error& error) { FAIL() << error.what(); - } + } } //////////////////////////////////////////////////////////////////////////////// @@ -249,7 +249,7 @@ TEST(TestExecutables, OpenJPHCompressNoArguments) { std::string result; EXPECT_EQ(execute(COMPRESS_EXECUTABLE, result), 1); } - catch(const std::runtime_error& error) { + catch (const std::runtime_error& error) { FAIL() << error.what(); } } @@ -261,7 +261,7 @@ TEST(TestExecutables, OpenJPHExpandNoArguments) { std::string result; EXPECT_EQ(execute(EXPAND_EXECUTABLE, result), 1); } - catch(const std::runtime_error& error) { + catch (const std::runtime_error& error) { FAIL() << error.what(); } } @@ -275,11 +275,11 @@ TEST(TestExecutables, OpenJPHExpandNoArguments) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_64x64.jph -precise -quiet -rate 0.5 -full TEST(TestExecutables, SimpleDecIrv9764x64) { - double mse[3] = { 39.2812, 36.3819, 47.642}; - int pae[3] = { 74, 77, 73}; + double mse[3] = { 39.2812, 36.3819, 47.642 }; + int pae[3] = { 74, 77, 73 }; run_ojph_expand("simple_dec_irv97_64x64", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -287,11 +287,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_32x32.jph -precise -quiet -rate 1 Cblk={32,32} -full TEST(TestExecutables, SimpleDecIrv9732x32) { - double mse[3] = { 18.6979, 17.1208, 22.7539}; - int pae[3] = { 51, 48, 46}; + double mse[3] = { 18.6979, 17.1208, 22.7539 }; + int pae[3] = { 51, 48, 46 }; run_ojph_expand("simple_dec_irv97_32x32", "jph", "ppm"); run_mse_pae("simple_dec_irv97_32x32", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -299,11 +299,11 @@ TEST(TestExecutables, SimpleDecIrv9732x32) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_16x16.jph -precise -quiet -rate 1 Cblk={16,16} -full TEST(TestExecutables, SimpleDecIrv9716x16) { - double mse[3] = { 20.1706, 18.5427, 24.6146}; - int pae[3] = { 53, 51, 47}; + double mse[3] = { 20.1706, 18.5427, 24.6146 }; + int pae[3] = { 53, 51, 47 }; run_ojph_expand("simple_dec_irv97_16x16", "jph", "ppm"); run_mse_pae("simple_dec_irv97_16x16", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -311,11 +311,11 @@ TEST(TestExecutables, SimpleDecIrv9716x16) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_4x4.jph -precise -quiet -rate 1 Cblk={4,4} -full TEST(TestExecutables, SimpleDecIrv974x4) { - double mse[3] = { 40.8623, 37.9308, 49.7276}; - int pae[3] = { 75, 77, 80}; + double mse[3] = { 40.8623, 37.9308, 49.7276 }; + int pae[3] = { 75, 77, 80 }; run_ojph_expand("simple_dec_irv97_4x4", "jph", "ppm"); run_mse_pae("simple_dec_irv97_4x4", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -323,11 +323,11 @@ TEST(TestExecutables, SimpleDecIrv974x4) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_1024x4.jph -precise -quiet -rate 1 Cblk={1024,4} -full TEST(TestExecutables, SimpleDecIrv971024x4) { - double mse[3] = { 19.8275, 18.2511, 24.2832}; - int pae[3] = { 53, 52, 50}; + double mse[3] = { 19.8275, 18.2511, 24.2832 }; + int pae[3] = { 53, 52, 50 }; run_ojph_expand("simple_dec_irv97_1024x4", "jph", "ppm"); run_mse_pae("simple_dec_irv97_1024x4", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -335,11 +335,11 @@ TEST(TestExecutables, SimpleDecIrv971024x4) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_4x1024.jph -precise -quiet -rate 1 Cblk={4,1024} -full TEST(TestExecutables, SimpleDecIrv974x1024) { - double mse[3] = { 19.9635, 18.4063, 24.1719}; - int pae[3] = { 51, 48, 51}; + double mse[3] = { 19.9635, 18.4063, 24.1719 }; + int pae[3] = { 51, 48, 51 }; run_ojph_expand("simple_dec_irv97_4x1024", "jph", "ppm"); run_mse_pae("simple_dec_irv97_4x1024", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -347,11 +347,11 @@ TEST(TestExecutables, SimpleDecIrv974x1024) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_512x8.jph -precise -quiet -rate 1 Cblk={512,8} -full TEST(TestExecutables, SimpleDecIrv97512x8) { - double mse[3] = { 18.7929, 17.2026, 22.9922}; - int pae[3] = { 53, 52, 50}; + double mse[3] = { 18.7929, 17.2026, 22.9922 }; + int pae[3] = { 53, 52, 50 }; run_ojph_expand("simple_dec_irv97_512x8", "jph", "ppm"); run_mse_pae("simple_dec_irv97_512x8", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -359,11 +359,11 @@ TEST(TestExecutables, SimpleDecIrv97512x8) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_8x512.jph -precise -quiet -rate 1 Cblk={8,512} -full TEST(TestExecutables, SimpleDecIrv978x512) { - double mse[3] = { 19.3661, 17.8067, 23.4574}; - int pae[3] = { 51, 48, 52}; + double mse[3] = { 19.3661, 17.8067, 23.4574 }; + int pae[3] = { 51, 48, 52 }; run_ojph_expand("simple_dec_irv97_8x512", "jph", "ppm"); run_mse_pae("simple_dec_irv97_8x512", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -371,11 +371,11 @@ TEST(TestExecutables, SimpleDecIrv978x512) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_256x16.jph -precise -quiet -rate 1 Cblk={256,16} -full TEST(TestExecutables, SimpleDecIrv97256x16) { - double mse[3] = { 18.6355, 17.0963, 22.6076}; - int pae[3] = { 54, 51, 48}; + double mse[3] = { 18.6355, 17.0963, 22.6076 }; + int pae[3] = { 54, 51, 48 }; run_ojph_expand("simple_dec_irv97_256x16", "jph", "ppm"); run_mse_pae("simple_dec_irv97_256x16", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -383,11 +383,11 @@ TEST(TestExecutables, SimpleDecIrv97256x16) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_16x256.jph -precise -quiet -rate 1 Cblk={16,256} -full TEST(TestExecutables, SimpleDecIrv9716x256) { - double mse[3] = { 18.5933, 17.0208, 22.5709}; - int pae[3] = { 51, 48, 47}; + double mse[3] = { 18.5933, 17.0208, 22.5709 }; + int pae[3] = { 51, 48, 47 }; run_ojph_expand("simple_dec_irv97_16x256", "jph", "ppm"); run_mse_pae("simple_dec_irv97_16x256", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -395,11 +395,11 @@ TEST(TestExecutables, SimpleDecIrv9716x256) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_128x32.jph -precise -quiet -rate 1 Cblk={128,32} -full TEST(TestExecutables, SimpleDecIrv97128x32) { - double mse[3] = { 18.4443, 16.9133, 22.4193}; - int pae[3] = { 52, 50, 46}; + double mse[3] = { 18.4443, 16.9133, 22.4193 }; + int pae[3] = { 52, 50, 46 }; run_ojph_expand("simple_dec_irv97_128x32", "jph", "ppm"); run_mse_pae("simple_dec_irv97_128x32", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -407,11 +407,11 @@ TEST(TestExecutables, SimpleDecIrv97128x32) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_32x128.jph -precise -quiet -rate 1 Cblk={32,128} -full TEST(TestExecutables, SimpleDecIrv9732x128) { - double mse[3] = { 18.4874, 16.9379, 22.4855}; - int pae[3] = { 51, 48, 45}; + double mse[3] = { 18.4874, 16.9379, 22.4855 }; + int pae[3] = { 51, 48, 45 }; run_ojph_expand("simple_dec_irv97_32x128", "jph", "ppm"); run_mse_pae("simple_dec_irv97_32x128", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -419,11 +419,11 @@ TEST(TestExecutables, SimpleDecIrv9732x128) { // Command-line options used to obtain this file is: // -o simple_dec_rev53_64x64.jph -precise -quiet Creversible=yes -full TEST(TestExecutables, SimpleDecRev5364x64) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_expand("simple_dec_rev53_64x64", "jph", "ppm"); run_mse_pae("simple_dec_rev53_64x64", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -432,11 +432,11 @@ TEST(TestExecutables, SimpleDecRev5364x64) { // -o simple_dec_rev53_32x32.jph -precise -quiet Creversible=yes Cblk={32,32} // -full TEST(TestExecutables, SimpleDecRev5332x32) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_expand("simple_dec_rev53_32x32", "jph", "ppm"); run_mse_pae("simple_dec_rev53_32x32", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -445,11 +445,11 @@ TEST(TestExecutables, SimpleDecRev5332x32) { // -o simple_dec_rev53_4x4.jph -precise -quiet Creversible=yes Cblk={4,4} // -full TEST(TestExecutables, SimpleDecRev534x4) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_expand("simple_dec_rev53_4x4", "jph", "ppm"); run_mse_pae("simple_dec_rev53_4x4", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -458,11 +458,11 @@ TEST(TestExecutables, SimpleDecRev534x4) { // -o simple_dec_rev53_1024x4.jph -precise -quiet Creversible=yes // Cblk={1024,4} -full TEST(TestExecutables, SimpleDecRev531024x4) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_expand("simple_dec_rev53_1024x4", "jph", "ppm"); run_mse_pae("simple_dec_rev53_1024x4", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -471,11 +471,11 @@ TEST(TestExecutables, SimpleDecRev531024x4) { // -o simple_dec_rev53_4x1024.jph -precise -quiet Creversible=yes // Cblk={4,1024} -full TEST(TestExecutables, SimpleDecRev534x1024) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_expand("simple_dec_rev53_4x1024", "jph", "ppm"); run_mse_pae("simple_dec_rev53_4x1024", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -486,11 +486,11 @@ TEST(TestExecutables, SimpleDecRev534x1024) { // Sdims={288,352},{144,176},{144,176} Ssampling={1,1},{2,2},{2,2} // Nprecision={8} Nsigned={no} -full TEST(TestExecutables, SimpleDecIrv9764x64Yuv) { - double mse[3] = { 20.2778, 6.27912, 4.15937}; - int pae[3] = { 52, 22, 31}; + double mse[3] = { 20.2778, 6.27912, 4.15937 }; + int pae[3] = { 52, 22, 31 }; run_ojph_expand("simple_dec_irv97_64x64_yuv", "jph", "yuv"); run_mse_pae("simple_dec_irv97_64x64_yuv", "yuv", "foreman_420.yuv", - ":352x288x8x420", 3, mse, pae); + ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -501,11 +501,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64Yuv) { // Sdims={288,352},{144,176},{144,176} Ssampling={1,1},{2,2},{2,2} // Nprecision={8} Nsigned={no} -full TEST(TestExecutables, SimpleDecRev5364x64Yuv) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_expand("simple_dec_rev53_64x64_yuv", "jph", "yuv"); run_mse_pae("simple_dec_rev53_64x64_yuv", "yuv", "foreman_420.yuv", - ":352x288x8x420", 3, mse, pae); + ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -516,11 +516,11 @@ TEST(TestExecutables, SimpleDecRev5364x64Yuv) { // Sdims={288,352},{144,176},{144,176} Ssampling={1,1},{2,2},{2,2} // Nprecision={8} Nsigned={no} Stiles={33,257} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesYuv) { - double mse[3] = { 34.4972, 10.1112, 7.96331}; - int pae[3] = { 67, 30, 39}; + double mse[3] = { 34.4972, 10.1112, 7.96331 }; + int pae[3] = { 67, 30, 39 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_yuv", "jph", "yuv"); run_mse_pae("simple_dec_irv97_64x64_tiles_yuv", "yuv", "foreman_420.yuv", - ":352x288x8x420", 3, mse, pae); + ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -531,11 +531,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesYuv) { // Sdims={288,352},{144,176},{144,176} Ssampling={1,1},{2,2},{2,2} // Nprecision={8} Nsigned={no} Stiles={33,257} -full TEST(TestExecutables, SimpleDecRev5364x64TilesYuv) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_expand("simple_dec_rev53_64x64_tiles_yuv", "jph", "yuv"); run_mse_pae("simple_dec_rev53_64x64_tiles_yuv", "yuv", "foreman_420.yuv", - ":352x288x8x420", 3, mse, pae); + ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -545,11 +545,11 @@ TEST(TestExecutables, SimpleDecRev5364x64TilesYuv) { // Clevels=5 Corder=LRCP Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP) { - double mse[3] = { 71.8149, 68.7115, 89.4001}; - int pae[3] = { 78, 78, 83}; + double mse[3] = { 71.8149, 68.7115, 89.4001 }; + int pae[3] = { 78, 78, 83 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_LRCP", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_LRCP", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -559,11 +559,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP) { // Clevels=5 Corder=RLCP Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP) { - double mse[3] = { 71.8149, 68.7115, 89.4001}; - int pae[3] = { 78, 78, 83}; + double mse[3] = { 71.8149, 68.7115, 89.4001 }; + int pae[3] = { 78, 78, 83 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_RLCP", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RLCP", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -573,11 +573,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP) { // Clevels=5 Corder=RPCL Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL) { - double mse[3] = { 71.8149, 68.7115, 89.4001}; - int pae[3] = { 78, 78, 83}; + double mse[3] = { 71.8149, 68.7115, 89.4001 }; + int pae[3] = { 78, 78, 83 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_RPCL", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RPCL", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -587,11 +587,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL) { // Clevels=5 Corder=PCRL Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL) { - double mse[3] = { 71.8149, 68.7115, 89.4001}; - int pae[3] = { 78, 78, 83}; + double mse[3] = { 71.8149, 68.7115, 89.4001 }; + int pae[3] = { 78, 78, 83 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_PCRL", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_PCRL", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -601,11 +601,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL) { // Clevels=5 Corder=CPRL Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL) { - double mse[3] = { 71.8149, 68.7115, 89.4001}; - int pae[3] = { 78, 78, 83}; + double mse[3] = { 71.8149, 68.7115, 89.4001 }; + int pae[3] = { 78, 78, 83 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_CPRL", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_CPRL", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -615,11 +615,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL) { // Clevels=5 Corder=LRCP Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP33) { - double mse[3] = { 56.2139, 51.4121, 69.0107}; - int pae[3] = { 80, 81, 98}; + double mse[3] = { 56.2139, 51.4121, 69.0107 }; + int pae[3] = { 80, 81, 98 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_LRCP33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_LRCP33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -629,11 +629,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP33) { // Clevels=5 Corder=RLCP Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP33) { - double mse[3] = { 56.2139, 51.4121, 69.0107}; - int pae[3] = { 80, 81, 98}; + double mse[3] = { 56.2139, 51.4121, 69.0107 }; + int pae[3] = { 80, 81, 98 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_RLCP33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RLCP33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -643,11 +643,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP33) { // Clevels=5 Corder=RPCL Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL33) { - double mse[3] = { 56.2139, 51.4121, 69.0107}; - int pae[3] = { 80, 81, 98}; + double mse[3] = { 56.2139, 51.4121, 69.0107 }; + int pae[3] = { 80, 81, 98 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_RPCL33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RPCL33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -657,11 +657,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL33) { // Clevels=5 Corder=PCRL Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL33) { - double mse[3] = { 56.2139, 51.4121, 69.0107}; - int pae[3] = { 80, 81, 98}; + double mse[3] = { 56.2139, 51.4121, 69.0107 }; + int pae[3] = { 80, 81, 98 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_PCRL33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_PCRL33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -671,11 +671,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL33) { // Clevels=5 Corder=CPRL Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL33) { - double mse[3] = { 56.2139, 51.4121, 69.0107}; - int pae[3] = { 80, 81, 98}; + double mse[3] = { 56.2139, 51.4121, 69.0107 }; + int pae[3] = { 80, 81, 98 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_CPRL33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_CPRL33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -685,11 +685,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL33) { // Clevels=5 Corder=LRCP Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP33x33) { - double mse[3] = { 210.283, 210.214, 257.276}; - int pae[3] = { 165, 161, 166}; + double mse[3] = { 210.283, 210.214, 257.276 }; + int pae[3] = { 165, 161, 166 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_LRCP33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_LRCP33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -699,11 +699,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP33x33) { // Clevels=5 Corder=RLCP Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP33x33) { - double mse[3] = { 210.283, 210.214, 257.276}; - int pae[3] = { 165, 161, 166}; + double mse[3] = { 210.283, 210.214, 257.276 }; + int pae[3] = { 165, 161, 166 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_RLCP33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RLCP33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -713,11 +713,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP33x33) { // Clevels=5 Corder=RPCL Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL33x33) { - double mse[3] = { 210.283, 210.214, 257.276}; - int pae[3] = { 165, 161, 166}; + double mse[3] = { 210.283, 210.214, 257.276 }; + int pae[3] = { 165, 161, 166 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_RPCL33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RPCL33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -727,11 +727,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL33x33) { // Clevels=5 Corder=PCRL Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL33x33) { - double mse[3] = { 210.283, 210.214, 257.276}; - int pae[3] = { 165, 161, 166}; + double mse[3] = { 210.283, 210.214, 257.276 }; + int pae[3] = { 165, 161, 166 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_PCRL33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_PCRL33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -741,11 +741,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL33x33) { // Clevels=5 Corder=CPRL Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL33x33) { - double mse[3] = { 210.283, 210.214, 257.276}; - int pae[3] = { 165, 161, 166}; + double mse[3] = { 210.283, 210.214, 257.276 }; + int pae[3] = { 165, 161, 166 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_CPRL33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_CPRL33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -754,11 +754,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL33x33) { // -o simple_dec_rev53_64x64_gray_tiles.jph -precise -quiet Creversible=yes // Clevels=5 Stiles={33,257} -full TEST(TestExecutables, SimpleDecRev5364x64GrayTiles) { - double mse[1] = { 0}; - int pae[1] = { 0}; + double mse[1] = { 0 }; + int pae[1] = { 0 }; run_ojph_expand("simple_dec_rev53_64x64_gray_tiles", "jph", "pgm"); run_mse_pae("simple_dec_rev53_64x64_gray_tiles", "pgm", "monarch.pgm", - "", 1, mse, pae); + "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -767,11 +767,11 @@ TEST(TestExecutables, SimpleDecRev5364x64GrayTiles) { // -o simple_dec_irv97_64x64_gray_tiles.jph -precise -quiet -rate 0.5 // Clevels=5 Stiles={33,257} -full TEST(TestExecutables, SimpleDecIrv9764x64GrayTiles) { - double mse[1] = { 18.9601}; - int pae[1] = { 56}; + double mse[1] = { 18.9601 }; + int pae[1] = { 56 }; run_ojph_expand("simple_dec_irv97_64x64_gray_tiles", "jph", "pgm"); run_mse_pae("simple_dec_irv97_64x64_gray_tiles", "pgm", "monarch.pgm", - "", 1, mse, pae); + "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -779,11 +779,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64GrayTiles) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_64x64_16bit.jph -precise -quiet -rate 0.5 -full TEST(TestExecutables, SimpleDecIrv9764x6416bit) { - double mse[3] = { 60507.2, 36672.5, 64809.8}; - int pae[3] = { 2547, 1974, 1922}; + double mse[3] = { 60507.2, 36672.5, 64809.8 }; + int pae[3] = { 2547, 1974, 1922 }; run_ojph_expand("simple_dec_irv97_64x64_16bit", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_16bit", "ppm", "mm.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -791,11 +791,11 @@ TEST(TestExecutables, SimpleDecIrv9764x6416bit) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_64x64_16bit_gray.jph -precise -quiet -rate 0.5 -full TEST(TestExecutables, SimpleDecIrv9764x6416bitGray) { - double mse[1] = { 19382.9}; - int pae[1] = { 1618}; + double mse[1] = { 19382.9 }; + int pae[1] = { 1618 }; run_ojph_expand("simple_dec_irv97_64x64_16bit_gray", "jph", "pgm"); run_mse_pae("simple_dec_irv97_64x64_16bit_gray", "pgm", "mm.pgm", - "", 1, mse, pae); + "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -803,11 +803,11 @@ TEST(TestExecutables, SimpleDecIrv9764x6416bitGray) { // Command-line options used to obtain this file is: // -o simple_dec_rev53_64x64_16bit.jph -precise -quiet Creversible=yes -full TEST(TestExecutables, SimpleDecRev5364x6416bit) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_expand("simple_dec_rev53_64x64_16bit", "jph", "ppm"); run_mse_pae("simple_dec_rev53_64x64_16bit", "ppm", "mm.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -816,11 +816,11 @@ TEST(TestExecutables, SimpleDecRev5364x6416bit) { // -o simple_dec_rev53_64x64_16bit_gray.jph -precise -quiet Creversible=yes // -full TEST(TestExecutables, SimpleDecRev5364x6416bitGray) { - double mse[1] = { 0}; - int pae[1] = { 0}; + double mse[1] = { 0 }; + int pae[1] = { 0 }; run_ojph_expand("simple_dec_rev53_64x64_16bit_gray", "jph", "pgm"); run_mse_pae("simple_dec_rev53_64x64_16bit_gray", "pgm", "mm.pgm", - "", 1, mse, pae); + "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -829,14 +829,14 @@ TEST(TestExecutables, SimpleDecRev5364x6416bitGray) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_64x64.j2c -qstep 0.1 TEST(TestExecutables, SimpleEncIrv9764x64) { - double mse[3] = { 46.2004, 43.622, 56.7452}; - int pae[3] = { 48, 46, 52}; + double mse[3] = { 46.2004, 43.622, 56.7452 }; + int pae[3] = { 48, 46, 52 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_64x64", "", "j2c", - "-qstep 0.1"); + "simple_enc_irv97_64x64", "", "j2c", + "-qstep 0.1"); run_ojph_compress_expand("simple_enc_irv97_64x64", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_64x64", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -845,14 +845,14 @@ TEST(TestExecutables, SimpleEncIrv9764x64) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_32x32.j2c -qstep 0.01 -block_size {32,32} TEST(TestExecutables, SimpleEncIrv9732x32) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_32x32", "", "j2c", - "-qstep 0.01 -block_size \"{32,32}\""); + "simple_enc_irv97_32x32", "", "j2c", + "-qstep 0.01 -block_size \"{32,32}\""); run_ojph_compress_expand("simple_enc_irv97_32x32", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_32x32", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -861,14 +861,14 @@ TEST(TestExecutables, SimpleEncIrv9732x32) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_16x16.j2c -qstep 0.01 -block_size {16,16} TEST(TestExecutables, SimpleEncIrv9716x16) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_16x16", "", "j2c", - "-qstep 0.01 -block_size \"{16,16}\""); + "simple_enc_irv97_16x16", "", "j2c", + "-qstep 0.01 -block_size \"{16,16}\""); run_ojph_compress_expand("simple_enc_irv97_16x16", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_16x16", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -877,14 +877,14 @@ TEST(TestExecutables, SimpleEncIrv9716x16) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_4x4.j2c -qstep 0.01 -block_size {4,4} TEST(TestExecutables, SimpleEncIrv974x4) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_4x4", "", "j2c", - "-qstep 0.01 -block_size \"{4,4}\""); + "simple_enc_irv97_4x4", "", "j2c", + "-qstep 0.01 -block_size \"{4,4}\""); run_ojph_compress_expand("simple_enc_irv97_4x4", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_4x4", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -893,14 +893,14 @@ TEST(TestExecutables, SimpleEncIrv974x4) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_1024x4.j2c -qstep 0.01 -block_size {4,1024} TEST(TestExecutables, SimpleEncIrv971024x4) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_1024x4", "", "j2c", - "-qstep 0.01 -block_size \"{4,1024}\""); + "simple_enc_irv97_1024x4", "", "j2c", + "-qstep 0.01 -block_size \"{4,1024}\""); run_ojph_compress_expand("simple_enc_irv97_1024x4", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_1024x4", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -909,14 +909,14 @@ TEST(TestExecutables, SimpleEncIrv971024x4) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_4x1024.j2c -qstep 0.01 -block_size {1024,4} TEST(TestExecutables, SimpleEncIrv974x1024) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_4x1024", "", "j2c", - "-qstep 0.01 -block_size \"{1024,4}\""); + "simple_enc_irv97_4x1024", "", "j2c", + "-qstep 0.01 -block_size \"{1024,4}\""); run_ojph_compress_expand("simple_enc_irv97_4x1024", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_4x1024", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -925,14 +925,14 @@ TEST(TestExecutables, SimpleEncIrv974x1024) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_512x8.j2c -qstep 0.01 -block_size {8,512} TEST(TestExecutables, SimpleEncIrv97512x8) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_512x8", "", "j2c", - "-qstep 0.01 -block_size \"{8,512}\""); + "simple_enc_irv97_512x8", "", "j2c", + "-qstep 0.01 -block_size \"{8,512}\""); run_ojph_compress_expand("simple_enc_irv97_512x8", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_512x8", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -941,14 +941,14 @@ TEST(TestExecutables, SimpleEncIrv97512x8) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_8x512.j2c -qstep 0.01 -block_size {512,8} TEST(TestExecutables, SimpleEncIrv978x512) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_8x512", "", "j2c", - "-qstep 0.01 -block_size \"{512,8}\""); + "simple_enc_irv97_8x512", "", "j2c", + "-qstep 0.01 -block_size \"{512,8}\""); run_ojph_compress_expand("simple_enc_irv97_8x512", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_8x512", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -957,14 +957,14 @@ TEST(TestExecutables, SimpleEncIrv978x512) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_256x16.j2c -qstep 0.01 -block_size {16,256} TEST(TestExecutables, SimpleEncIrv97256x16) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_256x16", "", "j2c", - "-qstep 0.01 -block_size \"{16,256}\""); + "simple_enc_irv97_256x16", "", "j2c", + "-qstep 0.01 -block_size \"{16,256}\""); run_ojph_compress_expand("simple_enc_irv97_256x16", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_256x16", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -973,14 +973,14 @@ TEST(TestExecutables, SimpleEncIrv97256x16) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_16x256.j2c -qstep 0.01 -block_size {256,16} TEST(TestExecutables, SimpleEncIrv9716x256) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_16x256", "", "j2c", - "-qstep 0.01 -block_size \"{256,16}\""); + "simple_enc_irv97_16x256", "", "j2c", + "-qstep 0.01 -block_size \"{256,16}\""); run_ojph_compress_expand("simple_enc_irv97_16x256", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_16x256", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -989,14 +989,14 @@ TEST(TestExecutables, SimpleEncIrv9716x256) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_128x32.j2c -qstep 0.01 -block_size {32,128} TEST(TestExecutables, SimpleEncIrv97128x32) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_128x32", "", "j2c", - "-qstep 0.01 -block_size \"{32,128}\""); + "simple_enc_irv97_128x32", "", "j2c", + "-qstep 0.01 -block_size \"{32,128}\""); run_ojph_compress_expand("simple_enc_irv97_128x32", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_128x32", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1005,14 +1005,14 @@ TEST(TestExecutables, SimpleEncIrv97128x32) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_32x128.j2c -qstep 0.01 -block_size {128,32} TEST(TestExecutables, SimpleEncIrv9732x128) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_32x128", "", "j2c", - "-qstep 0.01 -block_size \"{128,32}\""); + "simple_enc_irv97_32x128", "", "j2c", + "-qstep 0.01 -block_size \"{128,32}\""); run_ojph_compress_expand("simple_enc_irv97_32x128", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_32x128", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1022,14 +1022,14 @@ TEST(TestExecutables, SimpleEncIrv9732x128) { // -o simple_enc_irv97_64x64_tiles_33x33_d5.j2c -qstep 0.01 -tile_size {33,33} // -num_decomps 5 TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D5) { - double mse[3] = { 46.2004, 43.622, 56.7452}; - int pae[3] = { 48, 46, 52}; + double mse[3] = { 1.88906, 1.30757, 2.5347 }; + int pae[3] = { 9, 6, 10 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_64x64_tiles_33x33_d5", "", "j2c", - "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 5"); + "simple_enc_irv97_64x64_tiles_33x33_d5", "", "j2c", + "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 5"); run_ojph_compress_expand("simple_enc_irv97_64x64_tiles_33x33_d5", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_64x64_tiles_33x33_d5", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1039,14 +1039,14 @@ TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D5) { // -o simple_enc_irv97_64x64_tiles_33x33_d6.j2c -qstep 0.01 -tile_size {33,33} // -num_decomps 6 TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D6) { - double mse[3] = { 46.2004, 43.622, 56.7452}; - int pae[3] = { 48, 46, 52}; + double mse[3] = { 1.88751, 1.30673, 2.53378 }; + int pae[3] = { 8, 6, 10 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_64x64_tiles_33x33_d6", "", "j2c", - "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 6"); + "simple_enc_irv97_64x64_tiles_33x33_d6", "", "j2c", + "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 6"); run_ojph_compress_expand("simple_enc_irv97_64x64_tiles_33x33_d6", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_64x64_tiles_33x33_d6", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1055,14 +1055,14 @@ TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D6) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_64x64_16bit.j2c -qstep 0.01 TEST(TestExecutables, SimpleEncIrv9764x6416bit) { - double mse[3] = { 51727.3, 32596.4, 45897.8}; - int pae[3] = { 1512, 1481, 1778}; + double mse[3] = { 51727.3, 32596.4, 45897.8 }; + int pae[3] = { 1512, 1481, 1778 }; run_ojph_compress("mm.ppm", - "simple_enc_irv97_64x64_16bit", "", "j2c", - "-qstep 0.01"); + "simple_enc_irv97_64x64_16bit", "", "j2c", + "-qstep 0.01"); run_ojph_compress_expand("simple_enc_irv97_64x64_16bit", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_64x64_16bit", "ppm", - "mm.ppm", "", 3, mse, pae); + "mm.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1071,14 +1071,14 @@ TEST(TestExecutables, SimpleEncIrv9764x6416bit) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_64x64_16bit_gray.j2c -qstep 0.01 TEST(TestExecutables, SimpleEncIrv9764x6416bitGray) { - double mse[1] = { 25150.6}; - int pae[1] = { 1081}; + double mse[1] = { 25150.6 }; + int pae[1] = { 1081 }; run_ojph_compress("mm.pgm", - "simple_enc_irv97_64x64_16bit_gray", "", "j2c", - "-qstep 0.01"); + "simple_enc_irv97_64x64_16bit_gray", "", "j2c", + "-qstep 0.01"); run_ojph_compress_expand("simple_enc_irv97_64x64_16bit_gray", "j2c", "pgm"); run_mse_pae("simple_enc_irv97_64x64_16bit_gray", "pgm", - "mm.pgm", "", 1, mse, pae); + "mm.pgm", "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1087,14 +1087,14 @@ TEST(TestExecutables, SimpleEncIrv9764x6416bitGray) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_64x64_16bit.j2c -reversible true TEST(TestExecutables, SimpleEncRev5364x6416bit) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("mm.ppm", - "simple_enc_rev53_64x64_16bit", "", "j2c", - "-reversible true"); + "simple_enc_rev53_64x64_16bit", "", "j2c", + "-reversible true"); run_ojph_compress_expand("simple_enc_rev53_64x64_16bit", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_64x64_16bit", "ppm", - "mm.ppm", "", 3, mse, pae); + "mm.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1103,14 +1103,14 @@ TEST(TestExecutables, SimpleEncRev5364x6416bit) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_64x64_16bit_gray.j2c -reversible true TEST(TestExecutables, SimpleEncRev5364x6416bitGray) { - double mse[1] = { 0}; - int pae[1] = { 0}; + double mse[1] = { 0 }; + int pae[1] = { 0 }; run_ojph_compress("mm.pgm", - "simple_enc_rev53_64x64_16bit_gray", "", "j2c", - "-reversible true"); + "simple_enc_rev53_64x64_16bit_gray", "", "j2c", + "-reversible true"); run_ojph_compress_expand("simple_enc_rev53_64x64_16bit_gray", "j2c", "pgm"); run_mse_pae("simple_enc_rev53_64x64_16bit_gray", "pgm", - "mm.pgm", "", 1, mse, pae); + "mm.pgm", "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1119,14 +1119,14 @@ TEST(TestExecutables, SimpleEncRev5364x6416bitGray) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_64x64_16bit.j2c -reversible true TEST(TestExecutables, SimpleEncRev5364x64) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_64x64", "", "j2c", - "-reversible true"); + "simple_enc_rev53_64x64", "", "j2c", + "-reversible true"); run_ojph_compress_expand("simple_enc_rev53_64x64", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_64x64", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1135,14 +1135,14 @@ TEST(TestExecutables, SimpleEncRev5364x64) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_32x32.j2c -reversible true -block_size {32,32} TEST(TestExecutables, SimpleEncRev5332x32) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_32x32", "", "j2c", - "-reversible true -block_size \"{32,32}\""); + "simple_enc_rev53_32x32", "", "j2c", + "-reversible true -block_size \"{32,32}\""); run_ojph_compress_expand("simple_enc_rev53_32x32", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_32x32", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1151,14 +1151,14 @@ TEST(TestExecutables, SimpleEncRev5332x32) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_4x4.j2c -reversible true -block_size {4,4} TEST(TestExecutables, SimpleEncRev534x4) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_4x4", "", "j2c", - "-reversible true -block_size \"{4,4}\""); + "simple_enc_rev53_4x4", "", "j2c", + "-reversible true -block_size \"{4,4}\""); run_ojph_compress_expand("simple_enc_rev53_4x4", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_4x4", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1167,14 +1167,14 @@ TEST(TestExecutables, SimpleEncRev534x4) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_1024x4.j2c -reversible true -block_size {4,1024} TEST(TestExecutables, SimpleEncRev531024x4) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_1024x4", "", "j2c", - "-reversible true -block_size \"{4,1024}\""); + "simple_enc_rev53_1024x4", "", "j2c", + "-reversible true -block_size \"{4,1024}\""); run_ojph_compress_expand("simple_enc_rev53_1024x4", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_1024x4", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1183,48 +1183,48 @@ TEST(TestExecutables, SimpleEncRev531024x4) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_4x1024.j2c -reversible true -block_size {1024,4} TEST(TestExecutables, SimpleEncRev534x1024) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_4x1024", "", "j2c", - "-reversible true -block_size \"{1024,4}\""); + "simple_enc_rev53_4x1024", "", "j2c", + "-reversible true -block_size \"{1024,4}\""); run_ojph_compress_expand("simple_enc_rev53_4x1024", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_4x1024", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// // Test ojph_compress with codeblocks when the rev53 wavelet is used. // We test by comparing MSE and PAE of decoded images. // The compressed file is obtained using these command-line options: -// -o simple_enc_rev53_64x64_tiles_33x33.j2c -reversible true -tile_size +// -o simple_enc_rev53_64x64_tiles_33x33_d5.j2c -reversible true -tile_size // {32,32} -num_decomps 5 TEST(TestExecutables, SimpleEncRev5364x64Tiles33x33D5) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_64x64_tiles_33x33_d5", "", "j2c", - "-reversible true -tile_size \"{32,32}\" -num_decomps 5"); + "simple_enc_rev53_64x64_tiles_33x33_d5", "", "j2c", + "-reversible true -tile_size \"{32,32}\" -num_decomps 5"); run_ojph_compress_expand("simple_enc_rev53_64x64_tiles_33x33_d5", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_64x64_tiles_33x33_d5", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// // Test ojph_compress with codeblocks when the rev53 wavelet is used. // We test by comparing MSE and PAE of decoded images. // The compressed file is obtained using these command-line options: -// -o simple_enc_rev53_64x64_tiles_33x33.j2c -reversible true -tile_size +// -o simple_enc_rev53_64x64_tiles_33x33_d6.j2c -reversible true -tile_size // {32,32} -num_decomps 6 TEST(TestExecutables, SimpleEncRev5364x64Tiles33x33D6) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_64x64_tiles_33x33_d6", "", "j2c", - "-reversible true -tile_size \"{32,32}\" -num_decomps 6"); + "simple_enc_rev53_64x64_tiles_33x33_d6", "", "j2c", + "-reversible true -tile_size \"{32,32}\" -num_decomps 6"); run_ojph_compress_expand("simple_enc_rev53_64x64_tiles_33x33_d6", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_64x64_tiles_33x33_d6", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1234,16 +1234,16 @@ TEST(TestExecutables, SimpleEncRev5364x64Tiles33x33D6) { // -o simple_enc_irv97_64x64_yuv.j2c -qstep 0.1 -dims {352,288} -num_comps 3 // -downsamp {1,1},{2,2},{2,2} -bit_depth 8,8,8 -signed false,false,false TEST(TestExecutables, SimpleEncIrv9764x64Yuv) { - double mse[3] = { 30.3548, 7.69602, 5.22246}; - int pae[3] = { 49, 27, 26}; + double mse[3] = { 30.3548, 7.69602, 5.22246 }; + int pae[3] = { 49, 27, 26 }; run_ojph_compress("foreman_420.yuv", - "simple_enc_irv97_64x64_yuv", "", "j2c", - "-qstep 0.1 -dims \"{352,288}\" -num_comps 3 -downsamp" - " \"{1,1}\",\"{2,2}\",\"{2,2}\" -bit_depth 8,8,8" - " -signed false,false,false"); + "simple_enc_irv97_64x64_yuv", "", "j2c", + "-qstep 0.1 -dims \"{352,288}\" -num_comps 3 -downsamp" + " \"{1,1}\",\"{2,2}\",\"{2,2}\" -bit_depth 8,8,8" + " -signed false,false,false"); run_ojph_compress_expand("simple_enc_irv97_64x64_yuv", "j2c", "yuv"); run_mse_pae("simple_enc_irv97_64x64_yuv", "yuv", - "foreman_420.yuv", ":352x288x8x420", 3, mse, pae); + "foreman_420.yuv", ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1254,16 +1254,16 @@ TEST(TestExecutables, SimpleEncIrv9764x64Yuv) { // {352,288} -num_comps 3 -downsamp {1,1},{2,2},{2,2} -bit_depth 8,8,8 -signed // false,false,false TEST(TestExecutables, SimpleEncRev5364x64Yuv) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("foreman_420.yuv", - "simple_enc_rev53_64x64_yuv", "", "j2c", - "-reversible true -qstep 0.1 -dims \"{352,288}\"" - " -num_comps 3 -downsamp \"{1,1}\",\"{2,2}\",\"{2,2}\"" - " -bit_depth 8,8,8 -signed false,false,false"); + "simple_enc_rev53_64x64_yuv", "", "j2c", + "-reversible true -qstep 0.1 -dims \"{352,288}\"" + " -num_comps 3 -downsamp \"{1,1}\",\"{2,2}\",\"{2,2}\"" + " -bit_depth 8,8,8 -signed false,false,false"); run_ojph_compress_expand("simple_enc_rev53_64x64_yuv", "j2c", "yuv"); run_mse_pae("simple_enc_rev53_64x64_yuv", "yuv", - "foreman_420.yuv", ":352x288x8x420", 3, mse, pae); + "foreman_420.yuv", ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1272,14 +1272,14 @@ TEST(TestExecutables, SimpleEncRev5364x64Yuv) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_tall_narrow.j2c -qstep 0.1 TEST(TestExecutables, SimpleEncIrv97TallNarrow) { - double mse[3] = { 112.097, 79.2214, 71.1367}; - int pae[3] = { 56, 41, 32}; + double mse[3] = { 112.097, 79.2214, 71.1367 }; + int pae[3] = { 56, 41, 32 }; run_ojph_compress("tall_narrow.ppm", - "simple_enc_irv97_tall_narrow", "", "j2c", - "-qstep 0.1"); + "simple_enc_irv97_tall_narrow", "", "j2c", + "-qstep 0.1"); run_ojph_compress_expand("simple_enc_irv97_tall_narrow", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_tall_narrow", "ppm", - "tall_narrow.ppm", "", 3, mse, pae); + "tall_narrow.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1288,14 +1288,14 @@ TEST(TestExecutables, SimpleEncIrv97TallNarrow) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_tall_narrow1.j2c -image_offset {1,0} -qstep 0.1 TEST(TestExecutables, SimpleEncIrv97TallNarrow1) { - double mse[3] = { 100.906, 76.113, 72.8347}; - int pae[3] = { 39, 35, 34}; + double mse[3] = { 100.906, 76.113, 72.8347 }; + int pae[3] = { 39, 35, 34 }; run_ojph_compress("tall_narrow.ppm", - "simple_enc_irv97_tall_narrow1", "", "j2c", - "-image_offset \"{1,0}\" -qstep 0.1"); + "simple_enc_irv97_tall_narrow1", "", "j2c", + "-image_offset \"{1,0}\" -qstep 0.1"); run_ojph_compress_expand("simple_enc_irv97_tall_narrow1", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_tall_narrow1", "ppm", - "tall_narrow.ppm", "", 3, mse, pae); + "tall_narrow.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1304,14 +1304,14 @@ TEST(TestExecutables, SimpleEncIrv97TallNarrow1) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_tall_narrow.j2c -reversible true TEST(TestExecutables, SimpleEncRev53TallNarrow) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("tall_narrow.ppm", - "simple_enc_rev53_tall_narrow", "", "j2c", - "-reversible true"); + "simple_enc_rev53_tall_narrow", "", "j2c", + "-reversible true"); run_ojph_compress_expand("simple_enc_rev53_tall_narrow", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_tall_narrow", "ppm", - "tall_narrow.ppm", "", 3, mse, pae); + "tall_narrow.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1320,14 +1320,14 @@ TEST(TestExecutables, SimpleEncRev53TallNarrow) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_tall_narrow1.j2c -image_offset {1,0} -reversible true TEST(TestExecutables, SimpleEncRev53TallNarrow1) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("tall_narrow.ppm", - "simple_enc_rev53_tall_narrow1", "", "j2c", - "-image_offset \"{1,0}\" -reversible true"); + "simple_enc_rev53_tall_narrow1", "", "j2c", + "-image_offset \"{1,0}\" -reversible true"); run_ojph_compress_expand("simple_enc_rev53_tall_narrow1", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_tall_narrow1", "ppm", - "tall_narrow.ppm", "", 3, mse, pae); + "tall_narrow.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1336,14 +1336,14 @@ TEST(TestExecutables, SimpleEncRev53TallNarrow1) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_10bit_le_nuke11.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72010bitLeNuke11) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("dpx_1280x720_10bit.ppm", - "dpx_enc_1280x720_10bit_le_nuke11", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_10bit_le_nuke11", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_10bit_le_nuke11", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_10bit_le_nuke11", "ppm", - "dpx_1280x720_10bit.ppm", "", 3, mse, pae); + "dpx_1280x720_10bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1352,14 +1352,14 @@ TEST(TestExecutables, DpxEnc1280x72010bitLeNuke11) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_10bit_be_nuke11.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72010bitBeNuke11) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("dpx_1280x720_10bit.ppm", - "dpx_enc_1280x720_10bit_be_nuke11", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_10bit_be_nuke11", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_10bit_be_nuke11", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_10bit_be_nuke11", "ppm", - "dpx_1280x720_10bit.ppm", "", 3, mse, pae); + "dpx_1280x720_10bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1368,14 +1368,14 @@ TEST(TestExecutables, DpxEnc1280x72010bitBeNuke11) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_16bit_le_nuke11.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72016bitLeNuke11) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("dpx_1280x720_16bit.ppm", - "dpx_enc_1280x720_16bit_le_nuke11", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_16bit_le_nuke11", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_16bit_le_nuke11", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_16bit_le_nuke11", "ppm", - "dpx_1280x720_16bit.ppm", "", 3, mse, pae); + "dpx_1280x720_16bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1384,14 +1384,14 @@ TEST(TestExecutables, DpxEnc1280x72016bitLeNuke11) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_16bit_be_nuke11.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72016bitBeNuke11) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("dpx_1280x720_16bit.ppm", - "dpx_enc_1280x720_16bit_be_nuke11", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_16bit_be_nuke11", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_16bit_be_nuke11", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_16bit_be_nuke11", "ppm", - "dpx_1280x720_16bit.ppm", "", 3, mse, pae); + "dpx_1280x720_16bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1400,14 +1400,14 @@ TEST(TestExecutables, DpxEnc1280x72016bitBeNuke11) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_10bit_resolve18.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72010bitResolve18) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("dpx_1280x720_10bit.ppm", - "dpx_enc_1280x720_10bit_resolve18", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_10bit_resolve18", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_10bit_resolve18", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_10bit_resolve18", "ppm", - "dpx_1280x720_10bit.ppm", "", 3, mse, pae); + "dpx_1280x720_10bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1416,20 +1416,20 @@ TEST(TestExecutables, DpxEnc1280x72010bitResolve18) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_16bit_resolve18.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72016bitResolve18) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("dpx_1280x720_16bit.ppm", - "dpx_enc_1280x720_16bit_resolve18", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_16bit_resolve18", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_16bit_resolve18", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_16bit_resolve18", "ppm", - "dpx_1280x720_16bit.ppm", "", 3, mse, pae); + "dpx_1280x720_16bit.ppm", "", 3, mse, pae); } //////////////////////////////////////////////////////////////////////////////// // main //////////////////////////////////////////////////////////////////////////////// -int main(int argc, char **argv) { +int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } diff --git a/tests/test_helpers/ht_cmdlines.txt b/tests/test_helpers/ht_cmdlines.txt index c8590611..a8c0987d 100644 --- a/tests/test_helpers/ht_cmdlines.txt +++ b/tests/test_helpers/ht_cmdlines.txt @@ -81,8 +81,8 @@ add_test(NAME simple_enc_rev53_32x32 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_de add_test(NAME simple_enc_rev53_4x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_4x4.j2c -reversible true -block_size \{4,4\}" "-i simple_enc_rev53_4x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_4x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") add_test(NAME simple_enc_rev53_1024x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_1024x4.j2c -reversible true -block_size \{4,1024\}" "-i simple_enc_rev53_1024x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_1024x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") add_test(NAME simple_enc_rev53_4x1024 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_4x1024.j2c -reversible true -block_size \{1024,4\}" "-i simple_enc_rev53_4x1024.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_4x1024.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_rev53_64x64_tiles_33x33_d5 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64_tiles_33x33.j2c -reversible true -tile_size \{32,32\} -num_decomps 5" "-i simple_enc_rev53_64x64_tiles_33x33.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_rev53_64x64_tiles_33x33_d6 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64_tiles_33x33.j2c -reversible true -tile_size \{32,32\} -num_decomps 6" "-i simple_enc_rev53_64x64_tiles_33x33.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_rev53_64x64_tiles_33x33_d5 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64_tiles_33x33_d5.j2c -reversible true -tile_size \{32,32\} -num_decomps 5" "-i simple_enc_rev53_64x64_tiles_33x33_d5.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64_tiles_33x33_d5.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_rev53_64x64_tiles_33x33_d6 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64_tiles_33x33_d6.j2c -reversible true -tile_size \{32,32\} -num_decomps 6" "-i simple_enc_rev53_64x64_tiles_33x33_d6.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64_tiles_33x33_d6.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") add_test(NAME simple_enc_irv97_64x64_yuv COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom_yuv.sh -enc "-i ${images_folder}/foreman_420.yuv -o simple_enc_irv97_64x64_yuv.j2c -qstep 0.1 -dims \{352,288\} -num_comps 3 -downsamp \{1,1\},\{2,2\},\{2,2\} -bit_depth 8,8,8 -signed false,false,false" "-i simple_enc_irv97_64x64_yuv.j2c -o test1y.rawl,test1u.rawl,test1v.rawl -precise -quiet" "-i simple_enc_irv97_64x64_yuv.j2c -o test2.yuv" "${images_folder}/foreman_420.yuv:352x288x8x420" "test1.yuv:352x288x8x420" "test2.yuv:352x288x8x420") add_test(NAME simple_enc_rev53_64x64_yuv COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom_yuv.sh -renc "-i ${images_folder}/foreman_420.yuv -o simple_enc_rev53_64x64_yuv.j2c -reversible true -qstep 0.1 -dims \{352,288\} -num_comps 3 -downsamp \{1,1\},\{2,2\},\{2,2\} -bit_depth 8,8,8 -signed false,false,false" "-i simple_enc_rev53_64x64_yuv.j2c -o test1y.rawl,test1u.rawl,test1v.rawl -precise -quiet" "-i simple_enc_rev53_64x64_yuv.j2c -o test2.yuv" "${images_folder}/foreman_420.yuv:352x288x8x420" "test1.yuv:352x288x8x420" "test2.yuv:352x288x8x420") From 6e9cfdc60d7d8da0ae1ef2e7cdf707623f1ef136 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Mon, 8 Apr 2024 23:50:35 +1000 Subject: [PATCH 19/37] All changes needed for DFS and ATK are done. Still some bugs. --- src/core/codestream/ojph_codestream_local.cpp | 31 +- src/core/codestream/ojph_codestream_local.h | 15 +- src/core/codestream/ojph_params.cpp | 75 ++- src/core/codestream/ojph_params_local.h | 53 +- src/core/codestream/ojph_precinct.cpp | 14 +- src/core/codestream/ojph_precinct.h | 3 +- src/core/codestream/ojph_resolution.cpp | 569 +++++++++++------- src/core/codestream/ojph_resolution.h | 14 +- src/core/codestream/ojph_subband.cpp | 38 +- src/core/codestream/ojph_subband.h | 3 +- src/core/codestream/ojph_tile.cpp | 22 +- src/core/codestream/ojph_tile.h | 5 +- src/core/codestream/ojph_tile_comp.cpp | 3 +- src/core/transform/ojph_transform.cpp | 53 +- 14 files changed, 566 insertions(+), 332 deletions(-) diff --git a/src/core/codestream/ojph_codestream_local.cpp b/src/core/codestream/ojph_codestream_local.cpp index 737daffb..5f72d3e8 100644 --- a/src/core/codestream/ojph_codestream_local.cpp +++ b/src/core/codestream/ojph_codestream_local.cpp @@ -186,8 +186,6 @@ namespace ojph { for (ui32 r = 0; r <= num_decomps; ++r) { size log_PP = cod.get_log_precinct_size(r); - log_PP.w -= (r ? 1 : 0); - log_PP.h -= (r ? 1 : 0); ratio.w = ojph_max(ratio.w, log_PP.w - ojph_min(log_cb.w, log_PP.w)); ratio.h = ojph_max(ratio.h, log_PP.h - ojph_min(log_cb.h, log_PP.h)); } @@ -200,7 +198,7 @@ namespace ojph { // We need 4 such tables. These tables store // 1. missing msbs and 2. their flags, // 3. number of layers and 4. their flags - precinct_scratch_needed_bytes = + precinct_scratch_needed_bytes = 4 * ((max_ratio * max_ratio * 4 + 2) / 3); allocator->pre_alloc_obj(precinct_scratch_needed_bytes); @@ -220,7 +218,7 @@ namespace ojph { ui32 num_tileparts = 0; point index; - rect tile_rect, recon_tile_rect; + rect tile_rect; ojph::param_siz sz = access_siz(); ui32 ds = 1 << skipped_res_for_recon; for (index.y = 0; index.y < num_tiles.h; ++index.y) @@ -233,12 +231,6 @@ namespace ojph { tile_rect.siz.h = ojph_min(y1, sz.get_image_extent().y) - tile_rect.org.y; - recon_tile_rect.org.y = ojph_max(ojph_div_ceil(y0, ds), - ojph_div_ceil(sz.get_image_offset().y, ds)); - recon_tile_rect.siz.h = ojph_min(ojph_div_ceil(y1, ds), - ojph_div_ceil(sz.get_image_extent().y, ds)) - - recon_tile_rect.org.y; - ui32 offset = 0; for (index.x = 0; index.x < num_tiles.w; ++index.x) { @@ -250,17 +242,9 @@ namespace ojph { tile_rect.siz.w = ojph_min(x1, sz.get_image_extent().x) - tile_rect.org.x; - recon_tile_rect.org.x = ojph_max(ojph_div_ceil(x0, ds), - ojph_div_ceil(sz.get_image_offset().x, ds)); - recon_tile_rect.siz.w = ojph_min(ojph_div_ceil(x1, ds), - ojph_div_ceil(sz.get_image_extent().x, ds)) - - recon_tile_rect.org.x; - ui32 tps = 0; // number of tileparts for this tile ui32 idx = index.y * num_tiles.w + index.x; - tiles[idx].finalize_alloc(this, tile_rect, recon_tile_rect, - idx, offset, tps); - offset += recon_tile_rect.siz.w; + tiles[idx].finalize_alloc(this, tile_rect, idx, offset, tps); num_tileparts += tps; } } @@ -836,8 +820,15 @@ namespace ojph { } cod.update_atk(atk); - for (int i = 0; i < used_coc_fields; ++i) + for (int i = 0; i < used_coc_fields; ++i) + { + if (i == 0) cod.link_cod(coc); + else coc[i - 1].link_cod(coc + i); coc[i].update_atk(atk); + } + siz.link(&cod); + if (dfs.exists()) + siz.link(&dfs); if (received_markers != 3) OJPH_ERROR(0x00030052, "markers error, COD and QCD are required"); diff --git a/src/core/codestream/ojph_codestream_local.h b/src/core/codestream/ojph_codestream_local.h index 8e77eb17..8ca8c717 100644 --- a/src/core/codestream/ojph_codestream_local.h +++ b/src/core/codestream/ojph_codestream_local.h @@ -82,19 +82,10 @@ namespace ojph { { return &siz; } ojph::param_cod access_cod() //return externally wrapped cod { return ojph::param_cod(&cod); } - const param_cod* get_cod() //return internal code + const param_cod* get_cod() //return internal cod { return &cod; } - const param_cod* get_cod(ui32 comp_num) //return internal code - { - if (used_coc_fields == 0) - return &cod; - else { - for (int i = 0; i < used_coc_fields; ++i) - if (coc[i].get_comp_num() == comp_num) - return coc + i; - return &cod; - } - } + const param_cod* get_cod(ui32 comp_num) //return internal cod + { return cod.get_cod(comp_num); } param_qcd* access_qcd(ui32 comp_num) { if (used_qcc_fields > 0) diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index ef652651..268135c4 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -663,6 +663,35 @@ namespace ojph { dfs_support_needed = (Rsiz & 0x80) != 0; } + ////////////////////////////////////////////////////////////////////////// + point param_siz::get_recon_downsampling(ui32 comp_num) const + { + assert(comp_num < get_num_components()); + + point factor(1u << skipped_resolutions, 1u << skipped_resolutions); + const param_cod* cdp = cod->get_cod(comp_num); + if (dfs && cdp && cdp->is_dfs_defined()) { + const param_dfs* d = dfs->get_dfs(cdp->get_dfs_index()); + factor = d->get_res_downsamp(skipped_resolutions); + } + factor.x *= (ui32)cptr[comp_num].XRsiz; + factor.y *= (ui32)cptr[comp_num].YRsiz; + return factor; + } + + ////////////////////////////////////////////////////////////////////////// + point param_siz::get_recon_size(ui32 comp_num) const + { + assert(comp_num < get_num_components()); + + point factor = get_recon_downsampling(comp_num); + point r; + r.x = ojph_div_ceil(Xsiz, factor.x) - ojph_div_ceil(XOsiz, factor.x); + r.y = ojph_div_ceil(Ysiz, factor.y) - ojph_div_ceil(YOsiz, factor.y); + return r; + } + + ////////////////////////////////////////////////////////////////////////// // // @@ -1406,10 +1435,9 @@ namespace ojph { ui32 subband) const { assert((resolution == 0 && subband == 0) || - (resolution > 0 && resolution <= Ids && - subband > 0 && subband < 4)); + (resolution > 0 && subband > 0 && subband < 4)); - ui32 ns[4] = { 0, 3, 2, 2 }; + ui32 ns[4] = { 0, 3, 1, 1 }; ui32 idx = 0; if (resolution > 0) @@ -1427,6 +1455,27 @@ namespace ojph { return idx; } + ////////////////////////////////////////////////////////////////////////// + point param_dfs::get_res_downsamp(ui32 skipped_resolutions) const + { + point factor(1, 1); + ui32 decomp_level = 1; + while (skipped_resolutions > 0) + { + param_dfs::dfs_dwt_type type = get_dwt_type(decomp_level); + if (type == BIDIR_DWT) + { factor.x *= 2; factor.y *= 2; } + else if (type == HORZ_DWT) + factor.x *= 2; + else if (type == VERT_DWT) + factor.y *= 2; + + ++decomp_level; + --skipped_resolutions; + } + return factor; + } + ////////////////////////////////////////////////////////////////////////// bool param_dfs::read(infile_base *file) { @@ -1658,10 +1707,10 @@ namespace ojph { Natk = 4; // next is (A-4) in T.801 second line Latk = (ui16)(5 + Natk + sizeof(float) * (1 + Natk)); - d[0].irv.Aatk = (float)-1.586134342059924; - d[1].irv.Aatk = (float)-0.052980118572961; - d[2].irv.Aatk = (float)0.882911075530934; - d[3].irv.Aatk = (float)0.443506852043971; + d[0].irv.Aatk = (float)0.443506852043971; + d[1].irv.Aatk = (float)0.882911075530934; + d[2].irv.Aatk = (float)-0.052980118572961; + d[3].irv.Aatk = (float)-1.586134342059924; } ////////////////////////////////////////////////////////////////////////// @@ -1671,12 +1720,12 @@ namespace ojph { Natk = 2; // next is (A-4) in T.801 fourth line Latk = (ui16)(5 + 2 * Natk + sizeof(ui8) * (Natk + Natk)); - d[0].rev.Aatk = -1; - d[0].rev.Batk = 0; - d[0].rev.Eatk = 1; - d[1].rev.Aatk = 1; - d[1].rev.Batk = 2; - d[1].rev.Eatk = 2; + d[0].rev.Aatk = 1; + d[0].rev.Batk = 2; + d[0].rev.Eatk = 2; + d[1].rev.Aatk = -1; + d[1].rev.Batk = 0; + d[1].rev.Eatk = 1; } } // !local namespace diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index 43c1181d..1ee508dc 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -172,7 +172,6 @@ namespace ojph { cptr = store; old_Csiz = 4; Rsiz = 0x4000; //for jph, bit 14 of Rsiz is 1 - ws_kern_support_needed = dfs_support_needed = false; } ~param_siz() @@ -238,10 +237,15 @@ namespace ojph { bool write(outfile_base *file); void read(infile_base *file); + void link(const param_cod* cod) + { this->cod = cod; } + + void link(const param_dfs* dfs) + { this->dfs = dfs; } + void set_skipped_resolutions(ui32 skipped_resolutions) - { - this->skipped_resolutions = skipped_resolutions; - } + { this->skipped_resolutions = skipped_resolutions; } + ui32 get_width(ui32 comp_num) const { assert(comp_num < get_num_components()); @@ -256,20 +260,14 @@ namespace ojph { ui32 t = ojph_div_ceil(Ysiz, ds) - ojph_div_ceil(YOsiz, ds); return t; } + + point get_recon_downsampling(ui32 comp_num) const; + point get_recon_size(ui32 comp_num) const; ui32 get_recon_width(ui32 comp_num) const - { - assert(comp_num < get_num_components()); - ui32 ds = (ui32)cptr[comp_num].XRsiz * (1u << skipped_resolutions); - ui32 t = ojph_div_ceil(Xsiz, ds) - ojph_div_ceil(XOsiz, ds); - return t; - } + { return get_recon_size(comp_num).x; } ui32 get_recon_height(ui32 comp_num) const - { - assert(comp_num < get_num_components()); - ui32 ds = (ui32)cptr[comp_num].YRsiz * (1u << skipped_resolutions); - ui32 t = ojph_div_ceil(Ysiz, ds) - ojph_div_ceil(YOsiz, ds); - return t; - } + { return get_recon_size(comp_num).y; } + bool is_ws_kern_support_needed() { return ws_kern_support_needed; } bool is_dfs_support_needed() { return dfs_support_needed; } @@ -293,6 +291,8 @@ namespace ojph { siz_comp_info store[4]; bool ws_kern_support_needed; bool dfs_support_needed; + const param_cod* cod; + const param_dfs* dfs; param_siz(const param_siz&) = delete; //prevent copy constructor param_siz& operator=(const param_siz&) = delete; //prevent copy }; @@ -370,6 +370,7 @@ namespace ojph { SPcod.num_decomp = 5; SPcod.block_width = 4; //64 SPcod.block_height = 4; //64 + next = NULL; } //////////////////////////////////////// @@ -503,6 +504,22 @@ namespace ojph { //////////////////////////////////////// void update_atk(const param_atk* atk); + //////////////////////////////////////// + void link_cod(const param_cod* cod) + { this->next = cod; } + + //////////////////////////////////////// + const param_cod* get_cod(ui32 comp_num) const + { + const param_cod* result = this->next; + while (result != NULL && result->get_comp_num() != comp_num) + result = result->next; + if (result) + return result; + else + return this; + } + //////////////////////////////////////// const param_atk* access_atk() const { return atk; } @@ -516,7 +533,7 @@ namespace ojph { { return SPcod.num_decomp & 0xF; } //////////////////////////////////////// - ui32 get_comp_num() + ui32 get_comp_num() const { assert(type == COC_MAIN); return comp_num; } private: // Common variables @@ -525,6 +542,7 @@ namespace ojph { ui8 Scod; // serves as Scod and Scoc cod_SGcod SGCod; // Used in COD and copied to COC cod_SPcod SPcod; // serves as SPcod and SPcoc + const param_cod* next;// to link cod parameters private: // COC only variables param_cod* parent; // parent COD structure @@ -775,6 +793,7 @@ namespace ojph { dfs_dwt_type get_dwt_type(ui32 decomp_level) const; ui32 get_subband_idx(ui32 num_decompositions, ui32 resolution, ui32 subband) const; + point get_res_downsamp(ui32 skipped_resolutions) const; private: // member variables ui16 Ldfs; // length of the segment marker diff --git a/src/core/codestream/ojph_precinct.cpp b/src/core/codestream/ojph_precinct.cpp index c20c8589..813e33b8 100644 --- a/src/core/codestream/ojph_precinct.cpp +++ b/src/core/codestream/ojph_precinct.cpp @@ -98,11 +98,12 @@ namespace ojph { coded_lists *cur_coded_list = NULL; ui32 cb_bytes = 0; //cb_bytes; ui32 ph_bytes = 0; //precinct header size - int sst = num_bands == 3 ? 1 : 0; - int send = num_bands == 3 ? 4 : 1; int num_skipped_subbands = 0; - for (int s = sst; s < send; ++s) + for (int s = 0; s < 4; ++s) { + if (bands[s].empty) + continue; + if (cb_idxs[s].siz.w == 0 || cb_idxs[s].siz.h == 0) continue; @@ -288,10 +289,11 @@ namespace ojph { } //write codeblocks - int sst = num_bands == 3 ? 1 : 0; - int send = num_bands == 3 ? 4 : 1; - for (int s = sst; s < send; ++s) + for (int s = 0; s < 4; ++s) { + if (bands[s].empty) + continue; + ui32 band_width = bands[s].num_blocks.w; ui32 width = cb_idxs[s].siz.w; ui32 height = cb_idxs[s].siz.h; diff --git a/src/core/codestream/ojph_precinct.h b/src/core/codestream/ojph_precinct.h index d8e880a9..47ec4736 100644 --- a/src/core/codestream/ojph_precinct.h +++ b/src/core/codestream/ojph_precinct.h @@ -59,7 +59,7 @@ namespace ojph { { precinct() { scratch = NULL; bands = NULL; coded = NULL; - num_bands = 0; may_use_sop = uses_eph = false; + may_use_sop = uses_eph = false; } ui32 prepare_precinct(int tag_tree_size, ui32* lev_idx, mem_elastic_allocator *elastic); @@ -73,7 +73,6 @@ namespace ojph { rect cb_idxs[4]; //indices of codeblocks subband *bands; //the subbands coded_lists* coded; - ui32 num_bands; bool may_use_sop, uses_eph; }; diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index a0413b76..14743249 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -67,7 +67,7 @@ namespace ojph { bool skipped_res_for_recon = res_num > t; const param_atk* atk = cdp->access_atk(); - param_dfs::dfs_dwt_type downsampling_style = param_dfs::BIDIR_DWT; + param_dfs::dfs_dwt_type ds = param_dfs::BIDIR_DWT; if (cdp->is_dfs_defined()) { const param_dfs* dfs = codestream->access_dfs(); if (dfs == NULL) { @@ -86,31 +86,22 @@ namespace ojph { "main codestream headers", dfs_idx); } ui32 num_decomps = cdp->get_num_decompositions(); - downsampling_style = dfs->get_dwt_type(num_decomps - res_num + 1); + ds = dfs->get_dwt_type(num_decomps - res_num + 1); } } - //create next resolution + ui32 transform_flags = 0; if (res_num > 0) { - //allocate a resolution - allocator->pre_alloc_obj(1); - ui32 trx0 = ojph_div_ceil(res_rect.org.x, 2); - ui32 try0 = ojph_div_ceil(res_rect.org.y, 2); - ui32 trx1 = ojph_div_ceil(res_rect.org.x + res_rect.siz.w, 2); - ui32 try1 = ojph_div_ceil(res_rect.org.y + res_rect.siz.h, 2); - rect next_res_rect; - next_res_rect.org.x = trx0; - next_res_rect.org.y = try0; - next_res_rect.siz.w = trx1 - trx0; - next_res_rect.siz.h = try1 - try0; - - resolution::pre_alloc(codestream, next_res_rect, - skipped_res_for_recon ? recon_res_rect : next_res_rect, - comp_num, res_num - 1); + if (ds == param_dfs::BIDIR_DWT) + transform_flags = HORZ_TRX | VERT_TRX; + else if (ds == param_dfs::HORZ_DWT) + transform_flags = HORZ_TRX; + else if (ds == param_dfs::VERT_DWT) + transform_flags = VERT_TRX; } - //allocate subbands + //allocate resolution/subbands ui32 trx0 = res_rect.org.x; ui32 try0 = res_rect.org.y; ui32 trx1 = res_rect.org.x + res_rect.siz.w; @@ -118,23 +109,83 @@ namespace ojph { allocator->pre_alloc_obj(4); if (res_num > 0) { - for (ui32 i = 1; i < 4; ++i) + if (ds == param_dfs::BIDIR_DWT) { - ui32 tbx0 = (trx0 - (i & 1) + 1) >> 1; - ui32 tbx1 = (trx1 - (i & 1) + 1) >> 1; - ui32 tby0 = (try0 - (i >> 1) + 1) >> 1; - ui32 tby1 = (try1 - (i >> 1) + 1) >> 1; - - rect band_rect; - band_rect.org.x = tbx0; - band_rect.org.y = tby0; - band_rect.siz.w = tbx1 - tbx0; - band_rect.siz.h = tby1 - tby0; - subband::pre_alloc(codestream, band_rect, comp_num, res_num); + for (ui32 i = 0; i < 4; ++i) + { + ui32 tbx0 = (trx0 - (i & 1) + 1) >> 1; + ui32 tbx1 = (trx1 - (i & 1) + 1) >> 1; + ui32 tby0 = (try0 - (i >> 1) + 1) >> 1; + ui32 tby1 = (try1 - (i >> 1) + 1) >> 1; + + rect re; + re.org.x = tbx0; + re.org.y = tby0; + re.siz.w = tbx1 - tbx0; + re.siz.h = tby1 - tby0; + if (i == 0) { + allocator->pre_alloc_obj(1); + resolution::pre_alloc(codestream, re, + skipped_res_for_recon ? recon_res_rect : re, + comp_num, res_num - 1); + } + else + subband::pre_alloc(codestream, re, comp_num, res_num, + transform_flags); + } + } + else if (ds == param_dfs::VERT_DWT) + { + ui32 tby0, tby1; + rect re = res_rect; + tby0 = (try0 + 1) >> 1; + tby1 = (try1 + 1) >> 1; + re.org.y = tby0; + re.siz.h = tby1 - tby0; + allocator->pre_alloc_obj(1); + resolution::pre_alloc(codestream, re, + skipped_res_for_recon ? recon_res_rect : re, + comp_num, res_num - 1); + + tby0 = try0 >> 1; + tby1 = try1 >> 1; + re.org.y = tby0; + re.siz.h = tby1 - tby0; + subband::pre_alloc(codestream, re, comp_num, res_num, + transform_flags); + } + else if (ds == param_dfs::HORZ_DWT) + { + ui32 tbx0, tbx1; + rect re = res_rect; + tbx0 = (trx0 + 1) >> 1; + tbx1 = (trx1 + 1) >> 1; + re.org.x = tbx0; + re.siz.w = tbx1 - tbx0; + allocator->pre_alloc_obj(1); + resolution::pre_alloc(codestream, re, + skipped_res_for_recon ? recon_res_rect : re, + comp_num, res_num - 1); + + tbx0 = trx0 >> 1; + tbx1 = trx1 >> 1; + re.org.x = tbx0; + re.siz.w = tbx1 - tbx0; + subband::pre_alloc(codestream, re, comp_num, res_num, + transform_flags); + } + else + { + assert(ds == param_dfs::NO_DWT); + allocator->pre_alloc_obj(1); + resolution::pre_alloc(codestream, res_rect, + skipped_res_for_recon ? recon_res_rect : res_rect, + comp_num, res_num - 1); } } else - subband::pre_alloc(codestream, res_rect, comp_num, res_num); + subband::pre_alloc(codestream, res_rect, comp_num, res_num, + transform_flags); //prealloc precincts size log_PP = cdp->get_log_precinct_size(res_num); @@ -168,7 +219,7 @@ namespace ojph { const rect& res_rect, const rect& recon_res_rect, ui32 comp_num, ui32 res_num, - point comp_downsamp, + point comp_downsamp, point res_downsamp, tile_comp* parent_tile_comp, resolution* parent_res) { @@ -189,7 +240,7 @@ namespace ojph { this->res_num = res_num; this->num_bytes = 0; this->atk = cdp->access_atk(); - this->downsampling_style = param_dfs::BIDIR_DWT; + param_dfs::dfs_dwt_type ds = param_dfs::BIDIR_DWT; if (cdp->is_dfs_defined()) { const param_dfs* dfs = codestream->access_dfs(); if (dfs == NULL) { @@ -208,34 +259,22 @@ namespace ojph { "main codestream headers", dfs_idx); } ui32 num_decomps = cdp->get_num_decompositions(); - this->downsampling_style = - dfs->get_dwt_type(num_decomps - res_num + 1); + ds = dfs->get_dwt_type(num_decomps - res_num + 1); } } - //finalize next resolution + transform_flags = 0; if (res_num > 0) { - //allocate a resolution - child_res = allocator->post_alloc_obj(1); - ui32 trx0 = ojph_div_ceil(res_rect.org.x, 2); - ui32 try0 = ojph_div_ceil(res_rect.org.y, 2); - ui32 trx1 = ojph_div_ceil(res_rect.org.x + res_rect.siz.w, 2); - ui32 try1 = ojph_div_ceil(res_rect.org.y + res_rect.siz.h, 2); - rect next_res_rect; - next_res_rect.org.x = trx0; - next_res_rect.org.y = try0; - next_res_rect.siz.w = trx1 - trx0; - next_res_rect.siz.h = try1 - try0; - - child_res->finalize_alloc(codestream, next_res_rect, - skipped_res_for_recon ? recon_res_rect : next_res_rect, comp_num, - res_num - 1, comp_downsamp, parent_tile_comp, this); + if (ds == param_dfs::BIDIR_DWT) + transform_flags = HORZ_TRX | VERT_TRX; + else if (ds == param_dfs::HORZ_DWT) + transform_flags = HORZ_TRX; + else if (ds == param_dfs::VERT_DWT) + transform_flags = VERT_TRX; } - else - child_res = NULL; - //allocate subbands + //allocate resolution/subbands ui32 trx0 = res_rect.org.x; ui32 try0 = res_rect.org.y; ui32 trx1 = res_rect.org.x + res_rect.siz.w; @@ -245,24 +284,94 @@ namespace ojph { new (bands + i) subband; if (res_num > 0) { - this->num_bands = 3; - for (ui32 i = 1; i < 4; ++i) + if (ds == param_dfs::BIDIR_DWT) + { + for (ui32 i = 0; i < 4; ++i) + { + ui32 tbx0 = (trx0 - (i & 1) + 1) >> 1; + ui32 tbx1 = (trx1 - (i & 1) + 1) >> 1; + ui32 tby0 = (try0 - (i >> 1) + 1) >> 1; + ui32 tby1 = (try1 - (i >> 1) + 1) >> 1; + + rect re; + re.org.x = tbx0; + re.org.y = tby0; + re.siz.w = tbx1 - tbx0; + re.siz.h = tby1 - tby0; + if (i == 0) { + point next_res_downsamp; + next_res_downsamp.x = res_downsamp.x * 2; + next_res_downsamp.y = res_downsamp.y * 2; + + child_res = allocator->post_alloc_obj(1); + child_res->finalize_alloc(codestream, re, + skipped_res_for_recon ? recon_res_rect : re, comp_num, + res_num - 1, comp_downsamp, next_res_downsamp, + parent_tile_comp, this); + } + else + bands[i].finalize_alloc(codestream, re, this, res_num, i); + } + } + else if (ds == param_dfs::VERT_DWT) + { + ui32 tby0, tby1; + rect re = res_rect; + tby0 = (try0 + 1) >> 1; + tby1 = (try1 + 1) >> 1; + re.org.y = tby0; + re.siz.h = tby1 - tby0; + + point next_res_downsamp; + next_res_downsamp.x = res_downsamp.x; + next_res_downsamp.y = res_downsamp.y * 2; + child_res = allocator->post_alloc_obj(1); + child_res->finalize_alloc(codestream, re, + skipped_res_for_recon ? recon_res_rect : re, comp_num, + res_num - 1, comp_downsamp, next_res_downsamp, + parent_tile_comp, this); + + tby0 = try0 >> 1; + tby1 = try1 >> 1; + re.org.y = tby0; + re.siz.h = tby1 - tby0; + bands[2].finalize_alloc(codestream, re, this, res_num, 2); + } + else if (ds == param_dfs::HORZ_DWT) + { + ui32 tbx0, tbx1; + rect re = res_rect; + tbx0 = (trx0 + 1) >> 1; + tbx1 = (trx1 + 1) >> 1; + re.org.x = tbx0; + re.siz.w = tbx1 - tbx0; + + point next_res_downsamp; + next_res_downsamp.x = res_downsamp.x * 2; + next_res_downsamp.y = res_downsamp.y; + child_res = allocator->post_alloc_obj(1); + child_res->finalize_alloc(codestream, re, + skipped_res_for_recon ? recon_res_rect : re, comp_num, + res_num - 1, comp_downsamp, next_res_downsamp, + parent_tile_comp, this); + + tbx0 = trx0 >> 1; + tbx1 = trx1 >> 1; + re.org.x = tbx0; + re.siz.w = tbx1 - tbx0; + bands[1].finalize_alloc(codestream, re, this, res_num, 1); + } + else { - ui32 tbx0 = (trx0 - (i & 1) + 1) >> 1; - ui32 tbx1 = (trx1 - (i & 1) + 1) >> 1; - ui32 tby0 = (try0 - (i >> 1) + 1) >> 1; - ui32 tby1 = (try1 - (i >> 1) + 1) >> 1; - - rect band_rect; - band_rect.org.x = tbx0; - band_rect.org.y = tby0; - band_rect.siz.w = tbx1 - tbx0; - band_rect.siz.h = tby1 - tby0; - bands[i].finalize_alloc(codestream, band_rect, this, res_num, i); + assert(ds == param_dfs::NO_DWT); + child_res = allocator->post_alloc_obj(1); + child_res->finalize_alloc(codestream, res_rect, + skipped_res_for_recon ? recon_res_rect : res_rect, comp_num, + res_num - 1, comp_downsamp, res_downsamp, parent_tile_comp, this); } } else { - this->num_bands = 1; + child_res = NULL; bands[0].finalize_alloc(codestream, res_rect, this, res_num, 0); } @@ -287,11 +396,7 @@ namespace ojph { ui32 x_lower_bound = (trx0 >> log_PP.w) << log_PP.w; ui32 y_lower_bound = (try0 >> log_PP.h) << log_PP.h; - point proj_factor; - proj_factor.x = comp_downsamp.x * (1 << (num_decomps - res_num)); - proj_factor.y = comp_downsamp.y * (1 << (num_decomps - res_num)); precinct* pp = precincts; - point tile_top_left = parent_tile_comp->get_tile()->get_tile_rect().org; for (ui32 y = 0; y < num_precincts.h; ++y) { @@ -299,11 +404,10 @@ namespace ojph { for (ui32 x = 0; x < num_precincts.w; ++x, ++pp) { ui32 ppx0 = x_lower_bound + (x << log_PP.w); - point t(proj_factor.x * ppx0, proj_factor.y * ppy0); + point t(res_downsamp.x * ppx0, res_downsamp.y * ppy0); t.x = t.x > tile_top_left.x ? t.x : tile_top_left.x; t.y = t.y > tile_top_left.y ? t.y : tile_top_left.y; pp->img_point = t; - pp->num_bands = num_bands; pp->bands = bands; pp->may_use_sop = cdp->packets_may_use_sop(); pp->uses_eph = cdp->packets_use_eph(); @@ -311,15 +415,15 @@ namespace ojph { pp->coded = NULL; } } - if (num_bands == 1) - bands[0].get_cb_indices(num_precincts, precincts); - else - for (int i = 1; i < 4; ++i) + for (int i = 0; i < 4; ++i) + if (bands[i].exists()) bands[i].get_cb_indices(num_precincts, precincts); + // determine how to divide scratch into multiple levels of + // tag trees size log_cb = cdp->get_log_block_dims(); - log_PP.w -= (res_num ? 1 : 0); - log_PP.h -= (res_num ? 1 : 0); + log_PP.w -= (transform_flags & HORZ_TRX) ? 1 : 0; + log_PP.h -= (transform_flags & VERT_TRX) ? 1 : 0; size ratio; ratio.w = log_PP.w - ojph_min(log_cb.w, log_PP.w); ratio.h = log_PP.h - ojph_min(log_cb.h, log_PP.h); @@ -391,7 +495,9 @@ namespace ojph { { if (res_num == 0) { - assert(num_bands == 1 && child_res == NULL); + assert(child_res == NULL); + assert(bands[0].exists() && !bands[1].exists() + && !bands[2].exists() && !bands[3].exists()); bands[0].exchange_buf(vert_even ? sig->line : aug->line); bands[0].push_line(); return; @@ -419,7 +525,7 @@ namespace ojph { line_buf* dp = aug->line; line_buf* sp1 = sig->active ? sig->line : ssp[i].line; line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; - const lifting_step* s = atk->get_step(i); + const lifting_step* s = atk->get_step(num_steps - i - 1); rev_vert_ana_step(s, sp1, sp2, dp, width); } lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; @@ -486,7 +592,7 @@ namespace ojph { line_buf* dp = aug->line; line_buf* sp1 = sig->active ? sig->line : ssp[i].line; line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; - const lifting_step* s = atk->get_step(i); + const lifting_step* s = atk->get_step(num_steps - i - 1); irv_vert_ana_step(s, sp1, sp2, dp, width); } lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; @@ -547,7 +653,9 @@ namespace ojph { { if (res_num == 0) { - assert(num_bands == 1 && child_res == NULL); + assert(child_res == NULL); + assert(bands[0].exists() && !bands[1].exists() + && !bands[2].exists() && !bands[3].exists()); return bands[0].pull_line(); } @@ -557,154 +665,211 @@ namespace ojph { ui32 width = res_rect.siz.w; if (width == 0) return NULL; - if (reversible) + + if (transform_flags & VERT_TRX) { - if (res_rect.siz.h > 1) + if (reversible) { - if (sig->active) { - sig->active = false; - return sig->line; - }; - for (;;) + if (res_rect.siz.h > 1) { - //horizontal transform - if (cur_line < res_rect.siz.h) + if (sig->active) { + sig->active = false; + return sig->line; + }; + for (;;) { - if (vert_even) { // even - rev_horz_syn(atk, aug->line, - child_res->pull_line(), bands[1].pull_line(), - width, horz_even); - aug->active = true; - vert_even = !vert_even; - ++cur_line; - continue; - } - else { - rev_horz_syn(atk, sig->line, - bands[2].pull_line(), bands[3].pull_line(), - width, horz_even); - sig->active = true; - vert_even = !vert_even; - ++cur_line; + //horizontal transform + if (cur_line < res_rect.siz.h) + { + if (vert_even) { // even + if (transform_flags & HORZ_TRX) + rev_horz_syn(atk, aug->line, child_res->pull_line(), + bands[1].pull_line(), width, horz_even); + else + memcpy(aug->line->i32, child_res->pull_line()->i32, + width * sizeof(si32)); + aug->active = true; + vert_even = !vert_even; + ++cur_line; + continue; + } + else { + if (transform_flags & HORZ_TRX) + rev_horz_syn(atk, sig->line, bands[2].pull_line(), + bands[3].pull_line(), width, horz_even); + else + memcpy(sig->line->i32, bands[2].pull_line()->i32, + width * sizeof(si32)); + sig->active = true; + vert_even = !vert_even; + ++cur_line; + } } - } - //vertical transform - for (ui32 i = 0; i < num_steps; ++i) - { - if (aug->active && (sig->active || ssp[i].active)) + //vertical transform + for (ui32 i = 0; i < num_steps; ++i) { - line_buf* dp = aug->line; - line_buf* sp1 = sig->active ? sig->line : ssp[i].line; - line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; - const lifting_step* s = atk->get_step(num_steps - i - 1); - rev_vert_syn_step(s, dp, sp1, sp2, width); + if (aug->active && (sig->active || ssp[i].active)) + { + line_buf* dp = aug->line; + line_buf* sp1 = sig->active ? sig->line : ssp[i].line; + line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; + const lifting_step* s = atk->get_step(i); + rev_vert_syn_step(s, dp, sp1, sp2, width); + } + lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } - lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; - } - if (aug->active) { - aug->active = false; - return aug->line; + if (aug->active) { + aug->active = false; + return aug->line; + } + if (sig->active) { + sig->active = false; + return sig->line; + }; } - if (sig->active) { - sig->active = false; - return sig->line; - }; } - } - else - { - if (vert_even) - rev_horz_syn(atk, aug->line, child_res->pull_line(), - bands[1].pull_line(), width, horz_even); else { - rev_horz_syn(atk, aug->line, bands[2].pull_line(), - bands[3].pull_line(), width, horz_even); - si32* sp = aug->line->i32; - for (ui32 i = width; i > 0; --i) - *sp++ >>= 1; + if (vert_even) { + if (transform_flags & HORZ_TRX) + rev_horz_syn(atk, aug->line, child_res->pull_line(), + bands[1].pull_line(), width, horz_even); + else + memcpy(aug->line->i32, child_res->pull_line()->i32, + width * sizeof(si32)); + } + else + { + if (transform_flags & HORZ_TRX) + rev_horz_syn(atk, aug->line, bands[2].pull_line(), + bands[3].pull_line(), width, horz_even); + else + memcpy(aug->line->i32, bands[2].pull_line()->i32, + width * sizeof(si32)); + si32* sp = aug->line->i32; + for (ui32 i = width; i > 0; --i) + *sp++ >>= 1; + } + return aug->line; } - return aug->line; } - } - else - { - if (res_rect.siz.h > 1) + else { - if (sig->active) { - sig->active = false; - return sig->line; - }; - for (;;) + if (res_rect.siz.h > 1) { - //horizontal transform - if (cur_line < res_rect.siz.h) + if (sig->active) { + sig->active = false; + return sig->line; + }; + for (;;) { - if (vert_even) { // even - irv_horz_syn(atk, aug->line, - child_res->pull_line(), bands[1].pull_line(), - width, horz_even); - aug->active = true; - vert_even = !vert_even; - ++cur_line; - - const float K = atk->get_K(); - irv_vert_times_K(K, aug->line, width); - - continue; - } - else { - irv_horz_syn(atk, sig->line, - bands[2].pull_line(), bands[3].pull_line(), - width, horz_even); - sig->active = true; - vert_even = !vert_even; - ++cur_line; - - const float K_inv = 1.0f / atk->get_K(); - irv_vert_times_K(K_inv, sig->line, width); + //horizontal transform + if (cur_line < res_rect.siz.h) + { + if (vert_even) { // even + if (transform_flags & HORZ_TRX) + irv_horz_syn(atk, aug->line, child_res->pull_line(), + bands[1].pull_line(), width, horz_even); + else + memcpy(aug->line->f32, child_res->pull_line()->f32, + width * sizeof(float)); + aug->active = true; + vert_even = !vert_even; + ++cur_line; + + const float K = atk->get_K(); + irv_vert_times_K(K, aug->line, width); + + continue; + } + else { + if (transform_flags & HORZ_TRX) + irv_horz_syn(atk, sig->line, bands[2].pull_line(), + bands[3].pull_line(), width, horz_even); + else + memcpy(sig->line->f32, bands[2].pull_line()->f32, + width * sizeof(float)); + sig->active = true; + vert_even = !vert_even; + ++cur_line; + + const float K_inv = 1.0f / atk->get_K(); + irv_vert_times_K(K_inv, sig->line, width); + } } - } - //vertical transform - for (ui32 i = 0; i < num_steps; ++i) - { - if (aug->active && (sig->active || ssp[i].active)) + //vertical transform + for (ui32 i = 0; i < num_steps; ++i) { - line_buf* dp = aug->line; - line_buf* sp1 = sig->active ? sig->line : ssp[i].line; - line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; - const lifting_step* s = atk->get_step(num_steps - i - 1); - irv_vert_syn_step(s, dp, sp1, sp2, width); + if (aug->active && (sig->active || ssp[i].active)) + { + line_buf* dp = aug->line; + line_buf* sp1 = sig->active ? sig->line : ssp[i].line; + line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; + const lifting_step* s = atk->get_step(i); + irv_vert_syn_step(s, dp, sp1, sp2, width); + } + lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } - lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; - } - if (aug->active) { - aug->active = false; - return aug->line; + if (aug->active) { + aug->active = false; + return aug->line; + } + if (sig->active) { + sig->active = false; + return sig->line; + }; } - if (sig->active) { - sig->active = false; - return sig->line; - }; } + else + { + if (vert_even) { + if (transform_flags & HORZ_TRX) + irv_horz_syn(atk, aug->line, child_res->pull_line(), + bands[1].pull_line(), width, horz_even); + else + memcpy(aug->line->f32, child_res->pull_line()->f32, + width * sizeof(float)); + } + else + { + if (transform_flags & HORZ_TRX) + irv_horz_syn(atk, aug->line, bands[2].pull_line(), + bands[3].pull_line(), width, horz_even); + else + memcpy(aug->line->f32, bands[2].pull_line()->f32, + width * sizeof(float)); + float* sp = aug->line->f32; + for (ui32 i = width; i > 0; --i) + *sp++ *= 0.5f; + } + return aug->line; + } + } + } + else + { + if (reversible) + { + if (transform_flags & HORZ_TRX) + rev_horz_syn(atk, aug->line, child_res->pull_line(), + bands[1].pull_line(), width, horz_even); + else + memcpy(aug->line->i32, child_res->pull_line()->i32, + width * sizeof(si32)); + return aug->line; } else { - if (vert_even) + if (transform_flags & HORZ_TRX) irv_horz_syn(atk, aug->line, child_res->pull_line(), bands[1].pull_line(), width, horz_even); else - { - irv_horz_syn(atk, aug->line, bands[2].pull_line(), - bands[3].pull_line(), width, horz_even); - float *sp = aug->line->f32; - for (ui32 i = width; i > 0; --i) - *sp++ *= 0.5f; - } + memcpy(aug->line->f32, child_res->pull_line()->f32, + width * sizeof(float)); return aug->line; } } diff --git a/src/core/codestream/ojph_resolution.h b/src/core/codestream/ojph_resolution.h index 72e0b91a..635a4ced 100644 --- a/src/core/codestream/ojph_resolution.h +++ b/src/core/codestream/ojph_resolution.h @@ -61,6 +61,10 @@ namespace ojph { class resolution { public: + enum : ui32 { + HORZ_TRX = 0x01, // horizontal transform + VERT_TRX = 0x02, // vertical transform + }; public: static void pre_alloc(codestream *codestream, const rect& res_rect, @@ -68,8 +72,8 @@ namespace ojph { ui32 comp_num, ui32 res_num); void finalize_alloc(codestream *codestream, const rect& res_rect, const rect& recon_res_rect, ui32 comp_num, - ui32 res_num, point comp_downsamp, - tile_comp *parent_tile_comp, + ui32 res_num, point comp_downsamp, + point res_downsamp, tile_comp *parent_tile_comp, resolution *parent_res); line_buf* get_line(); @@ -77,6 +81,8 @@ namespace ojph { line_buf* pull_line(); rect get_rect() { return res_rect; } ui32 get_comp_num() { return comp_num; } + bool has_horz_transform() { return (transform_flags & HORZ_TRX) != 0; } + bool has_vert_transform() { return (transform_flags & VERT_TRX) != 0; } ui32 prepare_precinct(); void write_precincts(outfile_base *file); @@ -92,7 +98,7 @@ namespace ojph { private: bool reversible, skipped_res_for_read, skipped_res_for_recon; ui32 num_steps; - ui32 num_bands, res_num; + ui32 res_num; ui32 comp_num; ui32 num_bytes; // number of bytes in this resolution // used for tilepart length @@ -113,7 +119,7 @@ namespace ojph { ui32 level_index[20]; //more than enough point cur_precinct_loc; //used for progressing spatial modes (2, 3, 4) const param_atk* atk; - param_dfs::dfs_dwt_type downsampling_style; + ui32 transform_flags; //wavelet machinery ui32 cur_line; ui32 rows_to_produce; diff --git a/src/core/codestream/ojph_subband.cpp b/src/core/codestream/ojph_subband.cpp index dbef3b75..cf007fc9 100644 --- a/src/core/codestream/ojph_subband.cpp +++ b/src/core/codestream/ojph_subband.cpp @@ -55,7 +55,7 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// void subband::pre_alloc(codestream *codestream, const rect &band_rect, - ui32 comp_num, ui32 res_num) + ui32 comp_num, ui32 res_num, ui32 transform_flags) { mem_fixed_allocator* allocator = codestream->get_allocator(); @@ -67,8 +67,11 @@ namespace ojph { size log_cb = cdp->get_log_block_dims(); size log_PP = cdp->get_log_precinct_size(res_num); - ui32 xcb_prime = ojph_min(log_cb.w, log_PP.w - (res_num?1:0)); - ui32 ycb_prime = ojph_min(log_cb.h, log_PP.h - (res_num?1:0)); + ui32 x_off = ((transform_flags & resolution::HORZ_TRX) ? 1 : 0); + ui32 y_off = ((transform_flags & resolution::VERT_TRX) ? 1 : 0); + + ui32 xcb_prime = ojph_min(log_cb.w, log_PP.w - x_off); + ui32 ycb_prime = ojph_min(log_cb.h, log_PP.h - y_off); size nominal(1 << xcb_prime, 1 << ycb_prime); @@ -116,21 +119,30 @@ namespace ojph { size log_cb = cdp->get_log_block_dims(); log_PP = cdp->get_log_precinct_size(res_num); - xcb_prime = ojph_min(log_cb.w, log_PP.w - (res_num?1:0)); - ycb_prime = ojph_min(log_cb.h, log_PP.h - (res_num?1:0)); + ui32 x_off = ((parent->has_horz_transform()) ? 1 : 0); + ui32 y_off = ((parent->has_vert_transform()) ? 1 : 0); + + xcb_prime = ojph_min(log_cb.w, log_PP.w - x_off); + ycb_prime = ojph_min(log_cb.h, log_PP.h - y_off); size nominal(1 << xcb_prime, 1 << ycb_prime); cur_cb_row = 0; cur_line = 0; cur_cb_height = 0; + const param_dfs* dfs = NULL; + if (cdp->is_dfs_defined()) { + dfs = codestream->access_dfs(); + if (dfs != NULL) + dfs = dfs->get_dfs(cdp->get_dfs_index()); + } param_qcd* qcd = codestream->access_qcd(parent->get_comp_num()); ui32 num_decomps = cdp->get_num_decompositions(); - this->K_max = qcd->get_Kmax(NULL, num_decomps, this->res_num, band_num); + this->K_max = qcd->get_Kmax(dfs, num_decomps, this->res_num, band_num); if (!reversible) { float d = - qcd->irrev_get_delta(NULL, num_decomps, res_num, subband_num); + qcd->irrev_get_delta(dfs, num_decomps, res_num, subband_num); d /= (float)(1u << (31 - this->K_max)); delta = d; delta_inv = (1.0f/d); @@ -199,14 +211,16 @@ namespace ojph { ui32 pc_lft = (res_rect.org.x >> log_PP.w) << log_PP.w; ui32 pc_top = (res_rect.org.y >> log_PP.h) << log_PP.h; - ui32 pcx0, pcx1, pcy0, pcy1, shift = (band_num != 0 ? 1 : 0); + ui32 pcx0, pcx1, pcy0, pcy1; + ui32 x_shift = parent->has_horz_transform() ? 1 : 0; + ui32 y_shift = parent->has_vert_transform() ? 1 : 0; ui32 yb, xb, coly = 0, colx = 0; for (ui32 y = 0; y < num_precincts.h; ++y) { pcy0 = ojph_max(try0, pc_top + (y << log_PP.h)); pcy1 = ojph_min(try1, pc_top + ((y + 1) << log_PP.h)); - pcy0 = (pcy0 - (band_num >> 1) + (1<> shift; - pcy1 = (pcy1 - (band_num >> 1) + (1<> shift; + pcy0 = (pcy0 - (band_num >> 1) + (1 << y_shift) - 1) >> y_shift; + pcy1 = (pcy1 - (band_num >> 1) + (1 << y_shift) - 1) >> y_shift; precinct *p = precincts + y * num_precincts.w; yb = ((pcy1 + (1<> ycb_prime); @@ -217,8 +231,8 @@ namespace ojph { { pcx0 = ojph_max(trx0, pc_lft + (x << log_PP.w)); pcx1 = ojph_min(trx1, pc_lft + ((x + 1) << log_PP.w)); - pcx0 = (pcx0 - (band_num & 1) + (1<> shift; - pcx1 = (pcx1 - (band_num & 1) + (1<> shift; + pcx0 = (pcx0 - (band_num & 1) + (1 << x_shift) - 1) >> x_shift; + pcx1 = (pcx1 - (band_num & 1) + (1 << x_shift) - 1) >> x_shift; rect *bp = p->cb_idxs + band_num; xb = ((pcx1 + (1<> xcb_prime); diff --git a/src/core/codestream/ojph_subband.h b/src/core/codestream/ojph_subband.h index 5dd145e6..8cadae07 100644 --- a/src/core/codestream/ojph_subband.h +++ b/src/core/codestream/ojph_subband.h @@ -81,7 +81,7 @@ namespace ojph { } static void pre_alloc(codestream *codestream, const rect& band_rect, - ui32 comp_num, ui32 res_num); + ui32 comp_num, ui32 res_num, ui32 transform_flags); void finalize_alloc(codestream *codestream, const rect& band_rect, resolution* res, ui32 res_num, ui32 subband_num); @@ -91,6 +91,7 @@ namespace ojph { void get_cb_indices(const size& num_precincts, precinct *precincts); float get_delta() { return delta; } + bool exists() { return !empty; } line_buf* pull_line(); diff --git a/src/core/codestream/ojph_tile.cpp b/src/core/codestream/ojph_tile.cpp index 48f8bb56..3be907d4 100644 --- a/src/core/codestream/ojph_tile.cpp +++ b/src/core/codestream/ojph_tile.cpp @@ -131,8 +131,8 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// void tile::finalize_alloc(codestream *codestream, const rect& tile_rect, - const rect& recon_tile_rect, ui32 tile_idx, - ui32 offset, ui32 &num_tileparts) + ui32 tile_idx, ui32& offset, + ui32 &num_tileparts) { //this->parent = codestream; mem_fixed_allocator* allocator = codestream->get_allocator(); @@ -167,33 +167,29 @@ namespace ojph { this->resilient = codestream->is_resilient(); this->tile_rect = tile_rect; - this->recon_tile_rect = recon_tile_rect; ui32 tx0 = tile_rect.org.x; ui32 ty0 = tile_rect.org.y; ui32 tx1 = tile_rect.org.x + tile_rect.siz.w; ui32 ty1 = tile_rect.org.y + tile_rect.siz.h; - ui32 recon_tx0 = recon_tile_rect.org.x; - ui32 recon_ty0 = recon_tile_rect.org.y; - ui32 recon_tx1 = recon_tile_rect.org.x + recon_tile_rect.siz.w; - ui32 recon_ty1 = recon_tile_rect.org.y + recon_tile_rect.siz.h; ui32 width = 0; for (ui32 i = 0; i < num_comps; ++i) { point downsamp = szp->get_downsampling(i); + point recon_downsamp = szp->get_recon_downsampling(i); ui32 tcx0 = ojph_div_ceil(tx0, downsamp.x); ui32 tcy0 = ojph_div_ceil(ty0, downsamp.y); ui32 tcx1 = ojph_div_ceil(tx1, downsamp.x); ui32 tcy1 = ojph_div_ceil(ty1, downsamp.y); - ui32 recon_tcx0 = ojph_div_ceil(recon_tx0, downsamp.x); - ui32 recon_tcy0 = ojph_div_ceil(recon_ty0, downsamp.y); - ui32 recon_tcx1 = ojph_div_ceil(recon_tx1, downsamp.x); - ui32 recon_tcy1 = ojph_div_ceil(recon_ty1, downsamp.y); + ui32 recon_tcx0 = ojph_div_ceil(tx0, recon_downsamp.x); + ui32 recon_tcy0 = ojph_div_ceil(ty0, recon_downsamp.y); + ui32 recon_tcx1 = ojph_div_ceil(tx1, recon_downsamp.x); + ui32 recon_tcy1 = ojph_div_ceil(ty1, recon_downsamp.y); line_offsets[i] = - recon_tcx0 - ojph_div_ceil(recon_tx0 - offset, downsamp.x); + recon_tcx0 - ojph_div_ceil(tx0 - offset, recon_downsamp.x); comp_rects[i].org.x = tcx0; comp_rects[i].org.y = tcy0; comp_rects[i].siz.w = tcx1 - tcx0; @@ -212,6 +208,8 @@ namespace ojph { cur_line[i] = 0; } + offset += tile_rect.siz.w; + //allocate lines const param_cod* cdp = codestream->get_cod(); this->reversible = cdp->access_atk()->is_reversible(); diff --git a/src/core/codestream/ojph_tile.h b/src/core/codestream/ojph_tile.h index b00c8181..056c7c94 100644 --- a/src/core/codestream/ojph_tile.h +++ b/src/core/codestream/ojph_tile.h @@ -63,8 +63,7 @@ namespace ojph { static void pre_alloc(codestream *codestream, const rect& tile_rect, const rect& recon_tile_rect, ui32 &num_tileparts); void finalize_alloc(codestream *codestream, const rect& tile_rect, - const rect& recon_tile_rect, ui32 tile_idx, - ui32 offset, ui32 &num_tileparts); + ui32 tile_idx, ui32& offset, ui32 &num_tileparts); bool push(line_buf *line, ui32 comp_num); void prepare_for_flush(); @@ -77,7 +76,7 @@ namespace ojph { private: //codestream *parent; - rect tile_rect, recon_tile_rect; + rect tile_rect; ui32 num_comps; tile_comp *comps; ui32 num_lines; diff --git a/src/core/codestream/ojph_tile_comp.cpp b/src/core/codestream/ojph_tile_comp.cpp index 69ed0bcb..83d1b624 100644 --- a/src/core/codestream/ojph_tile_comp.cpp +++ b/src/core/codestream/ojph_tile_comp.cpp @@ -83,7 +83,8 @@ namespace ojph { this->num_bytes = 0; res = allocator->post_alloc_obj(1); res->finalize_alloc(codestream, comp_rect, recon_comp_rect, comp_num, - num_decomps, comp_downsamp, this, NULL); + num_decomps, comp_downsamp, comp_downsamp, this, + NULL); } ////////////////////////////////////////////////////////////////////////// diff --git a/src/core/transform/ojph_transform.cpp b/src/core/transform/ojph_transform.cpp index 028ac013..b031860e 100644 --- a/src/core/transform/ojph_transform.cpp +++ b/src/core/transform/ojph_transform.cpp @@ -408,9 +408,9 @@ namespace ojph { const line_buf* other, const line_buf* aug, ui32 repeat) { - si32 a = s->rev.Aatk; - si32 b = s->rev.Batk; - ui32 e = s->rev.Eatk; + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; si32* dst = aug->i32; const si32* src1 = sig->i32, * src2 = other->i32; @@ -419,7 +419,7 @@ namespace ojph { *dst++ += (b + a * (*src1++ + *src2++)) >> e; else for (ui32 i = repeat; i > 0; --i) - *dst++ -= (b - a * (*src1++ + *src2++)) >> e; + *dst++ -= (- b - a * (*src1++ + *src2++)) >> e; } ///////////////////////////////////////////////////////////////////////// @@ -451,13 +451,13 @@ namespace ojph { ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass ui32 num_steps = atk->get_num_steps(); - for (ui32 j = 0; j < num_steps; ++j) + for (ui32 j = num_steps; j > 0; --j) { // first lifting step - const lifting_step* s = atk->get_step(j); - si32 a = s->rev.Aatk; - si32 b = s->rev.Batk; - ui32 e = s->rev.Eatk; + const lifting_step* s = atk->get_step(j - 1); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; // extension lp[-1] = lp[0]; @@ -470,7 +470,7 @@ namespace ojph { *dp += (b + a * (sp[-1] + sp[0])) >> e; else for (ui32 i = h_width; i > 0; --i, sp++, dp++) - *dp -= (b - a * (sp[-1] + sp[0])) >> e; + *dp -= (- b - a * (sp[-1] + sp[0])) >> e; // swap buffers si32* t = lp; lp = hp; hp = t; @@ -491,9 +491,9 @@ namespace ojph { const line_buf* sig, const line_buf* other, ui32 repeat) { - si32 a = s->rev.Aatk; - si32 b = s->rev.Batk; - ui32 e = s->rev.Eatk; + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; si32* dst = aug->i32; const si32* src1 = sig->i32, * src2 = other->i32; @@ -502,7 +502,7 @@ namespace ojph { *dst++ -= (b + a * (*src1++ + *src2++)) >> e; else for (ui32 i = repeat; i > 0; --i) - *dst++ += (b - a * (*src1++ + *src2++)) >> e; + *dst++ += (- b - a * (*src1++ + *src2++)) >> e; } ////////////////////////////////////////////////////////////////////////// @@ -517,13 +517,12 @@ namespace ojph { ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass ui32 num_steps = atk->get_num_steps(); - for (ui32 j = num_steps; j > 0; --j) + for (ui32 j = 0; j < num_steps; ++j) { - // first lifting step - const lifting_step* s = atk->get_step(j - 1); - si32 a = s->rev.Aatk; - si32 b = s->rev.Batk; - ui32 e = s->rev.Eatk; + const lifting_step* s = atk->get_step(j); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; // extension oth[-1] = oth[0]; @@ -536,7 +535,7 @@ namespace ojph { *dp -= (b + a * (sp[-1] + sp[0])) >> e; else for (ui32 i = aug_width; i > 0; --i, sp++, dp++) - *dp += (b - a * (sp[-1] + sp[0])) >> e; + *dp += (- b - a * (sp[-1] + sp[0])) >> e; // swap buffers si32* t = aug; aug = oth; oth = t; @@ -793,11 +792,11 @@ namespace ojph { ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass ui32 num_steps = atk->get_num_steps(); - for (ui32 j = 0; j < num_steps; ++j) + for (ui32 j = num_steps; j > 0; --j) { // first lifting step - const lifting_step* s = atk->get_step(j); - float a = s->irv.Aatk; + const lifting_step* s = atk->get_step(j - 1); + const float a = s->irv.Aatk; // extension lp[-1] = lp[0]; @@ -878,10 +877,10 @@ namespace ojph { } ui32 num_steps = atk->get_num_steps(); - for (ui32 j = num_steps; j > 0; --j) + for (ui32 j = 0; j < num_steps; ++j) { - const lifting_step* s = atk->get_step(j - 1); - float a = s->irv.Aatk; + const lifting_step* s = atk->get_step(j); + const float a = s->irv.Aatk; // extension oth[-1] = oth[0]; From c87d3e402b262687f29c898526c035f0f57f0024 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Tue, 9 Apr 2024 00:01:28 +1000 Subject: [PATCH 20/37] Small bug fix for previous commit. --- src/core/codestream/ojph_codestream_local.cpp | 3 +-- src/core/codestream/ojph_params_local.h | 4 +++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/core/codestream/ojph_codestream_local.cpp b/src/core/codestream/ojph_codestream_local.cpp index 5f72d3e8..e9f56d04 100644 --- a/src/core/codestream/ojph_codestream_local.cpp +++ b/src/core/codestream/ojph_codestream_local.cpp @@ -220,7 +220,6 @@ namespace ojph { point index; rect tile_rect; ojph::param_siz sz = access_siz(); - ui32 ds = 1 << skipped_res_for_recon; for (index.y = 0; index.y < num_tiles.h; ++index.y) { ui32 y0 = sz.get_tile_offset().y @@ -546,7 +545,7 @@ namespace ojph { ui32 num_comments) { //finalize - siz.check_validity(); + siz.check_validity(cod); cod.check_validity(siz); cod.update_atk(atk); qcd.check_validity(siz, cod); diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index 1ee508dc..f4f2c9f4 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -202,8 +202,10 @@ namespace ojph { cptr[comp_num].YRsiz = (ui8)downsampling.y; } - void check_validity() + void check_validity(const param_cod& cod) { + this->cod = &cod; + if (XTsiz == 0 && YTsiz == 0) { XTsiz = Xsiz + XOsiz; YTsiz = Ysiz + YOsiz; } if (Xsiz == 0 || Ysiz == 0 || XTsiz == 0 || YTsiz == 0) From b1c71574406078c24faa38e4a9c2c71a9ed8f1b3 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Tue, 9 Apr 2024 14:23:32 +1000 Subject: [PATCH 21/37] A small bug fix --- src/core/codestream/ojph_params_local.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index f4f2c9f4..1958b8e8 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -487,11 +487,21 @@ namespace ojph { //////////////////////////////////////// bool packets_may_use_sop() const - { return (Scod & 2) == 2; } + { + if (parent) + return (parent->Scod & 2) == 2; + else + return (Scod & 2) == 2; + } //////////////////////////////////////// bool packets_use_eph() const - { return (Scod & 4) == 4; } + { + if (parent) + return (parent->Scod & 4) == 4; + else + return (Scod & 4) == 4; + } //////////////////////////////////////// bool write(outfile_base *file); @@ -507,8 +517,8 @@ namespace ojph { void update_atk(const param_atk* atk); //////////////////////////////////////// - void link_cod(const param_cod* cod) - { this->next = cod; } + void link_cod(const param_cod* coc) + { this->next = coc; } //////////////////////////////////////// const param_cod* get_cod(ui32 comp_num) const @@ -544,7 +554,7 @@ namespace ojph { ui8 Scod; // serves as Scod and Scoc cod_SGcod SGCod; // Used in COD and copied to COC cod_SPcod SPcod; // serves as SPcod and SPcoc - const param_cod* next;// to link cod parameters + const param_cod* next;// to chain coc parameters to cod private: // COC only variables param_cod* parent; // parent COD structure From a18e7fb47ae972ebcdcb3627b25da3f72f4b9268 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Wed, 10 Apr 2024 22:56:02 +1000 Subject: [PATCH 22/37] Working on SIMD. SSE and AVX is largely done, except the core horizontal transform. --- src/core/transform/ojph_colour_sse.cpp | 2 +- src/core/transform/ojph_colour_sse2.cpp | 2 +- src/core/transform/ojph_transform.cpp | 490 +++------------------ src/core/transform/ojph_transform.h | 58 --- src/core/transform/ojph_transform_avx.cpp | 460 +++++++++---------- src/core/transform/ojph_transform_local.h | 334 +++++++------- src/core/transform/ojph_transform_sse.cpp | 421 +++++++++--------- src/core/transform/ojph_transform_sse2.cpp | 2 +- 8 files changed, 650 insertions(+), 1119 deletions(-) diff --git a/src/core/transform/ojph_colour_sse.cpp b/src/core/transform/ojph_colour_sse.cpp index 89cc86c2..edd1eaf2 100644 --- a/src/core/transform/ojph_colour_sse.cpp +++ b/src/core/transform/ojph_colour_sse.cpp @@ -42,7 +42,7 @@ #include "ojph_colour.h" #include "ojph_colour_local.h" -#include +#include namespace ojph { namespace local { diff --git a/src/core/transform/ojph_colour_sse2.cpp b/src/core/transform/ojph_colour_sse2.cpp index 4bb56f29..4a3cb145 100644 --- a/src/core/transform/ojph_colour_sse2.cpp +++ b/src/core/transform/ojph_colour_sse2.cpp @@ -41,7 +41,7 @@ #include "ojph_arch.h" #include "ojph_colour.h" -#include +#include namespace ojph { namespace local { diff --git a/src/core/transform/ojph_transform.cpp b/src/core/transform/ojph_transform.cpp index b031860e..eba4f006 100644 --- a/src/core/transform/ojph_transform.cpp +++ b/src/core/transform/ojph_transform.cpp @@ -53,40 +53,6 @@ namespace ojph { // Reversible functions ///////////////////////////////////////////////////////////////////////// - ///////////////////////////////////////////////////////////////////////// - void (*rev_vert_wvlt_fwd_predict) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - ui32 repeat) = NULL; - - ///////////////////////////////////////////////////////////////////////// - void (*rev_vert_wvlt_fwd_update) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - ui32 repeat) = NULL; - - ///////////////////////////////////////////////////////////////////////// - void (*rev_horz_wvlt_fwd_tx) - (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even) - = NULL; - - ///////////////////////////////////////////////////////////////////////// - void (*rev_vert_wvlt_bwd_predict) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - ui32 repeat) = NULL; - - ///////////////////////////////////////////////////////////////////////// - void (*rev_vert_wvlt_bwd_update) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - ui32 repeat) = NULL; - - ///////////////////////////////////////////////////////////////////////// - void (*rev_horz_wvlt_bwd_tx) - (line_buf* dst, line_buf *lsrc, line_buf *hsrc, ui32 width, bool even) - = NULL; - - - - - ///////////////////////////////////////////////////////////////////////// void (*rev_vert_ana_step) (const lifting_step* s, const line_buf* sig, const line_buf* other, @@ -106,39 +72,11 @@ namespace ojph { void (*rev_horz_syn) (const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, ui32 width, bool even) = NULL; - - - - ///////////////////////////////////////////////////////////////////////// // Irreversible functions ///////////////////////////////////////////////////////////////////////// - ///////////////////////////////////////////////////////////////////////// - void (*irrev_vert_wvlt_step) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - int step_num, ui32 repeat) = NULL; - - ///////////////////////////////////////////////////////////////////////// - void (*irrev_vert_wvlt_K) - (const line_buf *src, line_buf *dst, bool L_analysis_or_H_synthesis, - ui32 repeat) = NULL; - - ///////////////////////////////////////////////////////////////////////// - void (*irrev_horz_wvlt_fwd_tx) - (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even) - = NULL; - - ///////////////////////////////////////////////////////////////////////// - void (*irrev_horz_wvlt_bwd_tx) - (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even) - = NULL; - - - - - ///////////////////////////////////////////////////////////////////////// void (*irv_vert_ana_step) (const lifting_step* s, const line_buf* sig, const line_buf* other, @@ -163,10 +101,6 @@ namespace ojph { void (*irv_vert_times_K) (float K, const line_buf* aug, ui32 repeat) = NULL; - - - - //////////////////////////////////////////////////////////////////////////// static bool wavelet_transform_functions_initialized = false; @@ -178,23 +112,11 @@ namespace ojph { #if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN) - rev_vert_wvlt_fwd_predict = gen_rev_vert_wvlt_fwd_predict; - rev_vert_wvlt_fwd_update = gen_rev_vert_wvlt_fwd_update; - rev_horz_wvlt_fwd_tx = gen_rev_horz_wvlt_fwd_tx; - rev_vert_wvlt_bwd_predict = gen_rev_vert_wvlt_bwd_predict; - rev_vert_wvlt_bwd_update = gen_rev_vert_wvlt_bwd_update; - rev_horz_wvlt_bwd_tx = gen_rev_horz_wvlt_bwd_tx; - rev_vert_ana_step = gen_rev_vert_ana_step; rev_horz_ana = gen_rev_horz_ana; rev_vert_syn_step = gen_rev_vert_syn_step; rev_horz_syn = gen_rev_horz_syn; - irrev_vert_wvlt_step = gen_irrev_vert_wvlt_step; - irrev_vert_wvlt_K = gen_irrev_vert_wvlt_K; - irrev_horz_wvlt_fwd_tx = gen_irrev_horz_wvlt_fwd_tx; - irrev_horz_wvlt_bwd_tx = gen_irrev_horz_wvlt_bwd_tx; - irv_vert_ana_step = gen_irv_vert_ana_step; irv_horz_ana = gen_irv_horz_ana; irv_vert_syn_step = gen_irv_vert_syn_step; @@ -206,203 +128,74 @@ namespace ojph { if (level >= X86_CPU_EXT_LEVEL_SSE) { - irrev_vert_wvlt_step = sse_irrev_vert_wvlt_step; - irrev_vert_wvlt_K = sse_irrev_vert_wvlt_K; - irrev_horz_wvlt_fwd_tx = sse_irrev_horz_wvlt_fwd_tx; - irrev_horz_wvlt_bwd_tx = sse_irrev_horz_wvlt_bwd_tx; + irv_vert_ana_step = sse_irv_vert_ana_step; + irv_horz_ana = sse_irv_horz_ana; + irv_vert_syn_step = sse_irv_vert_syn_step; + irv_horz_syn = sse_irv_horz_syn; + irv_vert_times_K = sse_irv_vert_times_K; } - if (level >= X86_CPU_EXT_LEVEL_SSE2) - { - rev_vert_wvlt_fwd_predict = sse2_rev_vert_wvlt_fwd_predict; - rev_vert_wvlt_fwd_update = sse2_rev_vert_wvlt_fwd_update; - rev_horz_wvlt_fwd_tx = sse2_rev_horz_wvlt_fwd_tx; - rev_vert_wvlt_bwd_predict = sse2_rev_vert_wvlt_bwd_predict; - rev_vert_wvlt_bwd_update = sse2_rev_vert_wvlt_bwd_update; - rev_horz_wvlt_bwd_tx = sse2_rev_horz_wvlt_bwd_tx; - } + //if (level >= X86_CPU_EXT_LEVEL_SSE2) + //{ + // rev_vert_ana_step = sse2_rev_vert_ana_step; + // rev_horz_ana = sse2_rev_horz_ana; + // rev_vert_syn_step = sse2_rev_vert_syn_step; + // rev_horz_syn = sse2_rev_horz_syn; + //} if (level >= X86_CPU_EXT_LEVEL_AVX) { - irrev_vert_wvlt_step = avx_irrev_vert_wvlt_step; - irrev_vert_wvlt_K = avx_irrev_vert_wvlt_K; - irrev_horz_wvlt_fwd_tx = avx_irrev_horz_wvlt_fwd_tx; - irrev_horz_wvlt_bwd_tx = avx_irrev_horz_wvlt_bwd_tx; + irv_vert_ana_step = avx_irv_vert_ana_step; + irv_horz_ana = avx_irv_horz_ana; + irv_vert_syn_step = avx_irv_vert_syn_step; + irv_horz_syn = avx_irv_horz_syn; + irv_vert_times_K = avx_irv_vert_times_K; } - if (level >= X86_CPU_EXT_LEVEL_AVX2) - { - rev_vert_wvlt_fwd_predict = avx2_rev_vert_wvlt_fwd_predict; - rev_vert_wvlt_fwd_update = avx2_rev_vert_wvlt_fwd_update; - rev_horz_wvlt_fwd_tx = avx2_rev_horz_wvlt_fwd_tx; - rev_vert_wvlt_bwd_predict = avx2_rev_vert_wvlt_bwd_predict; - rev_vert_wvlt_bwd_update = avx2_rev_vert_wvlt_bwd_update; - rev_horz_wvlt_bwd_tx = avx2_rev_horz_wvlt_bwd_tx; - } + //if (level >= X86_CPU_EXT_LEVEL_AVX2) + //{ + // rev_vert_ana_step = avx2_rev_vert_ana_step; + // rev_horz_ana = avx2_rev_horz_ana; + // rev_vert_syn_step = avx2_rev_vert_syn_step; + // rev_horz_syn = avx2_rev_horz_syn; + //} + + //if (level >= X86_CPU_EXT_LEVEL_AVX512) + //{ + // rev_vert_ana_step = avx512_rev_vert_ana_step; + // rev_horz_ana = avx512_rev_horz_ana; + // rev_vert_syn_step = avx512_rev_vert_syn_step; + // rev_horz_syn = avx512_rev_horz_syn; + + // irv_vert_ana_step = avx512_irv_vert_ana_step; + // irv_horz_ana = avx512_irv_horz_ana; + // irv_vert_syn_step = avx512_irv_vert_syn_step; + // irv_horz_syn = avx512_irv_horz_syn; + // irv_vert_times_K = avx512_irv_vert_times_K; + //} + #endif // !OJPH_DISABLE_INTEL_SIMD #else // OJPH_ENABLE_WASM_SIMD - rev_vert_wvlt_fwd_predict = wasm_rev_vert_wvlt_fwd_predict; - rev_vert_wvlt_fwd_update = wasm_rev_vert_wvlt_fwd_update; - rev_horz_wvlt_fwd_tx = wasm_rev_horz_wvlt_fwd_tx; - rev_vert_wvlt_bwd_predict = wasm_rev_vert_wvlt_bwd_predict; - rev_vert_wvlt_bwd_update = wasm_rev_vert_wvlt_bwd_update; - rev_horz_wvlt_bwd_tx = wasm_rev_horz_wvlt_bwd_tx; - irrev_vert_wvlt_step = wasm_irrev_vert_wvlt_step; - irrev_vert_wvlt_K = wasm_irrev_vert_wvlt_K; - irrev_horz_wvlt_fwd_tx = wasm_irrev_horz_wvlt_fwd_tx; - irrev_horz_wvlt_bwd_tx = wasm_irrev_horz_wvlt_bwd_tx; + rev_vert_ana_step = wasm_rev_vert_ana_step; + rev_horz_ana = wasm_rev_horz_ana; + rev_vert_syn_step = wasm_rev_vert_syn_step; + rev_horz_syn = wasm_rev_horz_syn; + + irv_vert_ana_step = wasm_irv_vert_ana_step; + irv_horz_ana = wasm_irv_horz_ana; + irv_vert_syn_step = wasm_irv_vert_syn_step; + irv_horz_syn = wasm_irv_horz_syn; + irv_vert_times_K = wasm_irv_vert_times_K; #endif // !OJPH_ENABLE_WASM_SIMD wavelet_transform_functions_initialized = true; } ////////////////////////////////////////////////////////////////////////// - const float LIFTING_FACTORS::steps[8] = - { - -1.586134342059924f, -0.052980118572961f, +0.882911075530934f, - +0.443506852043971f, - +1.586134342059924f, +0.052980118572961f, -0.882911075530934f, - -0.443506852043971f - }; - const float LIFTING_FACTORS::K = 1.230174104914001f; - const float LIFTING_FACTORS::K_inv = (float)(1.0 / 1.230174104914001); - - ////////////////////////////////////////////////////////////////////////// #if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN) - ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_wvlt_fwd_predict(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - for (ui32 i = repeat; i > 0; --i) - *dst++ -= (*src1++ + *src2++) >> 1; - } - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_wvlt_fwd_update(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - for (ui32 i = repeat; i > 0; --i) - *dst++ += (*src1++ + *src2++ + 2) >> 2; - } - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, - line_buf *line_hdst, ui32 width, bool even) - { - if (width > 1) - { - si32 *src = line_src->i32; - si32 *ldst = line_ldst->i32, *hdst = line_hdst->i32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; - - // extension - src[-1] = src[1]; - src[width] = src[width-2]; - // predict - const si32* sp = src + (even ? 1 : 0); - si32 *dph = hdst; - for (ui32 i = H_width; i > 0; --i, sp+=2) - *dph++ = sp[0] - ((sp[-1] + sp[1]) >> 1); - - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - sp = src + (even ? 0 : 1); - const si32* sph = hdst + (even ? 0 : 1); - si32 *dpl = ldst; - for (ui32 i = L_width; i > 0; --i, sp+=2, sph++) - *dpl++ = *sp + ((2 + sph[-1] + sph[0]) >> 2); - } - else - { - if (even) - line_ldst->i32[0] = line_src->i32[0]; - else - line_hdst->i32[0] = line_src->i32[0] << 1; - } - } - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_wvlt_bwd_predict(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - for (ui32 i = repeat; i > 0; --i) - *dst++ += (*src1++ + *src2++) >> 1; - } - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_wvlt_bwd_update(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - for (ui32 i = repeat; i > 0; --i) - *dst++ -= (2 + *src1++ + *src2++) >> 2; - } - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_horz_wvlt_bwd_tx(line_buf* line_dst, line_buf *line_lsrc, - line_buf *line_hsrc, ui32 width, bool even) - { - if (width > 1) - { - si32 *lsrc = line_lsrc->i32, *hsrc = line_hsrc->i32; - si32 *dst = line_dst->i32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; - - // extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - const si32 *sph = hsrc + (even ? 0 : 1); - si32 *spl = lsrc; - for (ui32 i = L_width; i > 0; --i, sph++, spl++) - *spl -= ((2 + sph[-1] + sph[0]) >> 2); - - // extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width - 1]; - // inverse predict and combine - si32 *dp = dst + (even ? 0 : -1); - spl = lsrc + (even ? 0 : -1); - sph = hsrc; - for (ui32 i = L_width + (even ? 0 : 1); i > 0; --i, spl++, sph++) - { - *dp++ = *spl; - *dp++ = *sph + ((spl[0] + spl[1]) >> 1); - } - } - else - { - if (even) - line_dst->i32[0] = line_lsrc->i32[0]; - else - line_dst->i32[0] = line_hsrc->i32[0] >> 1; - } - } - - - - - ///////////////////////////////////////////////////////////////////////// void gen_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, const line_buf* other, const line_buf* aug, @@ -569,187 +362,6 @@ namespace ojph { } } - - - - - ////////////////////////////////////////////////////////////////////////// - void gen_irrev_vert_wvlt_step(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, - int step_num, ui32 repeat) - { - float *dst = line_dst->f32; - const float *src1 = line_src1->f32, *src2 = line_src2->f32; - float factor = LIFTING_FACTORS::steps[step_num]; - for (ui32 i = repeat; i > 0; --i) - *dst++ += factor * (*src1++ + *src2++); - } - - ///////////////////////////////////////////////////////////////////////// - void gen_irrev_vert_wvlt_K(const line_buf* line_src, - line_buf* line_dst, - bool L_analysis_or_H_synthesis, ui32 repeat) - { - float *dst = line_dst->f32; - const float *src = line_src->f32; - float factor = LIFTING_FACTORS::K_inv; - factor = L_analysis_or_H_synthesis ? factor : LIFTING_FACTORS::K; - for (ui32 i = repeat; i > 0; --i) - *dst++ = *src++ * factor; - } - - - ///////////////////////////////////////////////////////////////////////// - void gen_irrev_horz_wvlt_fwd_tx(line_buf* line_src, - line_buf *line_ldst, - line_buf *line_hdst, - ui32 width, bool even) - { - if (width > 1) - { - float *src = line_src->f32; - float *ldst = line_ldst->f32, *hdst = line_hdst->f32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; - - //extension - src[-1] = src[1]; - src[width] = src[width-2]; - // predict - float factor = LIFTING_FACTORS::steps[0]; - const float* sp = src + (even ? 1 : 0); - float *dph = hdst; - for (ui32 i = H_width; i > 0; --i, sp+=2) - *dph++ = sp[0] + factor * (sp[-1] + sp[1]); - - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - factor = LIFTING_FACTORS::steps[1]; - sp = src + (even ? 0 : 1); - const float* sph = hdst + (even ? 0 : 1); - float *dpl = ldst; - for (ui32 i = L_width; i > 0; --i, sp+=2, sph++) - *dpl++ = sp[0] + factor * (sph[-1] + sph[0]); - - //extension - ldst[-1] = ldst[0]; - ldst[L_width] = ldst[L_width-1]; - //predict - factor = LIFTING_FACTORS::steps[2]; - const float* spl = ldst + (even ? 1 : 0); - dph = hdst; - for (ui32 i = H_width; i > 0; --i, spl++) - *dph++ += factor * (spl[-1] + spl[0]); - - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - factor = LIFTING_FACTORS::steps[3]; - sph = hdst + (even ? 0 : 1); - dpl = ldst; - for (ui32 i = L_width; i > 0; --i, sph++) - *dpl++ += factor * (sph[-1] + sph[0]); - - //multipliers - float *dp = ldst; - for (ui32 i = L_width; i > 0; --i, dp++) - *dp *= LIFTING_FACTORS::K_inv; - dp = hdst; - for (ui32 i = H_width; i > 0; --i, dp++) - *dp *= LIFTING_FACTORS::K; - } - else - { - if (even) - line_ldst->f32[0] = line_src->f32[0]; - else - line_hdst->f32[0] = line_src->f32[0] + line_src->f32[0]; - } - } - - ///////////////////////////////////////////////////////////////////////// - void gen_irrev_horz_wvlt_bwd_tx(line_buf* line_dst, line_buf *line_lsrc, - line_buf *line_hsrc, ui32 width, - bool even) - { - if (width > 1) - { - float *lsrc = line_lsrc->f32, *hsrc = line_hsrc->f32; - float *dst = line_dst->f32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; - - //multipliers - float *dp = lsrc; - for (ui32 i = L_width; i > 0; --i, dp++) - *dp *= LIFTING_FACTORS::K; - dp = hsrc; - for (ui32 i = H_width; i > 0; --i, dp++) - *dp *= LIFTING_FACTORS::K_inv; - - //extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - float factor = LIFTING_FACTORS::steps[7]; - const float *sph = hsrc + (even ? 0 : 1); - float *dpl = lsrc; - for (ui32 i = L_width; i > 0; --i, dpl++, sph++) - *dpl += factor * (sph[-1] + sph[0]); - - //extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width-1]; - //inverse perdict - factor = LIFTING_FACTORS::steps[6]; - const float *spl = lsrc + (even ? 0 : -1); - float *dph = hsrc; - for (ui32 i = H_width; i > 0; --i, dph++, spl++) - *dph += factor * (spl[0] + spl[1]); - - //extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - factor = LIFTING_FACTORS::steps[5]; - sph = hsrc + (even ? 0 : 1); - dpl = lsrc; - for (ui32 i = L_width; i > 0; --i, dpl++, sph++) - *dpl += factor * (sph[-1] + sph[0]); - - //extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width-1]; - //inverse perdict and combine - factor = LIFTING_FACTORS::steps[4]; - dp = dst + (even ? 0 : -1); - spl = lsrc + (even ? 0 : -1); - sph = hsrc; - for (ui32 i = L_width+(even?0:1); i > 0; --i, spl++, sph++) - { - *dp++ = *spl; - *dp++ = *sph + factor * (spl[0] + spl[1]); - } - } - else - { - if (even) - line_dst->f32[0] = line_lsrc->f32[0]; - else - line_dst->f32[0] = line_hsrc->f32[0] * 0.5f; - } - } - - - - - ////////////////////////////////////////////////////////////////////////// void gen_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, const line_buf* other, const line_buf* aug, @@ -833,8 +445,6 @@ namespace ojph { else hdst->f32[0] = src->f32[0] * 2.0f; } - - } ////////////////////////////////////////////////////////////////////////// @@ -925,8 +535,6 @@ namespace ojph { *dst++ *= K; } - - #endif // !OJPH_ENABLE_WASM_SIMD } diff --git a/src/core/transform/ojph_transform.h b/src/core/transform/ojph_transform.h index b31df0ef..1aae8b82 100644 --- a/src/core/transform/ojph_transform.h +++ b/src/core/transform/ojph_transform.h @@ -54,37 +54,6 @@ namespace ojph { // Reversible functions ///////////////////////////////////////////////////////////////////////// - ///////////////////////////////////////////////////////////////////////// - extern void (*rev_vert_wvlt_fwd_predict) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - ui32 repeat); - - ///////////////////////////////////////////////////////////////////////// - extern void (*rev_vert_wvlt_fwd_update) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - ui32 repeat); - - ///////////////////////////////////////////////////////////////////////// - extern void (*rev_horz_wvlt_fwd_tx) - (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even); - - ///////////////////////////////////////////////////////////////////////// - extern void (*rev_vert_wvlt_bwd_predict) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - ui32 repeat); - - ///////////////////////////////////////////////////////////////////////// - extern void (*rev_vert_wvlt_bwd_update) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - ui32 repeat); - - ///////////////////////////////////////////////////////////////////////// - extern void (*rev_horz_wvlt_bwd_tx) - (line_buf* dst, line_buf *lsrc, line_buf *hsrc, ui32 width, bool even); - - - - ///////////////////////////////////////////////////////////////////////// extern void (*rev_vert_ana_step) (const lifting_step* s, const line_buf* sig, const line_buf* other, @@ -107,35 +76,10 @@ namespace ojph { - - ///////////////////////////////////////////////////////////////////////// // Irreversible functions ///////////////////////////////////////////////////////////////////////// - ///////////////////////////////////////////////////////////////////////// - extern void (*irrev_vert_wvlt_step) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - int step_num, ui32 repeat); - - ///////////////////////////////////////////////////////////////////////// - extern void (*irrev_vert_wvlt_K) - (const line_buf *src, line_buf *dst, bool L_analysis_or_H_synthesis, - ui32 repeat); - - ///////////////////////////////////////////////////////////////////////// - extern void (*irrev_horz_wvlt_fwd_tx) - (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even); - - ///////////////////////////////////////////////////////////////////////// - extern void (*irrev_horz_wvlt_bwd_tx) - (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even); - - - - - - ///////////////////////////////////////////////////////////////////////// extern void (*irv_vert_ana_step) (const lifting_step* s, const line_buf* sig, const line_buf* other, @@ -161,8 +105,6 @@ namespace ojph { (float K, const line_buf* aug, ui32 repeat); - - } } diff --git a/src/core/transform/ojph_transform_avx.cpp b/src/core/transform/ojph_transform_avx.cpp index 725d7ce8..743ceee6 100644 --- a/src/core/transform/ojph_transform_avx.cpp +++ b/src/core/transform/ojph_transform_avx.cpp @@ -36,6 +36,7 @@ //***************************************************************************/ #include +#include #include "ojph_defs.h" #include "ojph_arch.h" @@ -43,22 +44,23 @@ #include "ojph_transform.h" #include "ojph_transform_local.h" -#include +#include "ojph_params.h" +#include "../codestream/ojph_params_local.h" namespace ojph { namespace local { ////////////////////////////////////////////////////////////////////////// - void avx_irrev_vert_wvlt_step(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, int step_num, - ui32 repeat) + void avx_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat) { - float *dst = line_dst->f32; - const float *src1 = line_src1->f32, *src2 = line_src2->f32; - - __m256 factor = _mm256_set1_ps(LIFTING_FACTORS::steps[step_num]); - for (ui32 i = (repeat + 7) >> 3; i > 0; --i, dst+=8, src1+=8, src2+=8) + __m256 factor = _mm256_set1_ps(s->irv.Aatk); + + float* dst = aug->f32; + const float* src1 = sig->f32, * src2 = other->f32; + repeat = (repeat + 7) >> 3; + for (ui32 i = repeat; i > 0; --i, dst += 8, src1 += 8, src2 += 8) { __m256 s1 = _mm256_load_ps(src1); __m256 s2 = _mm256_load_ps(src2); @@ -69,261 +71,261 @@ namespace ojph { } ///////////////////////////////////////////////////////////////////////// - void avx_irrev_vert_wvlt_K(const line_buf* line_src, line_buf* line_dst, - bool L_analysis_or_H_synthesis, ui32 repeat) - { - float *dst = line_dst->f32; - const float *src = line_src->f32; - - float f = LIFTING_FACTORS::K_inv; - f = L_analysis_or_H_synthesis ? f : LIFTING_FACTORS::K; - __m256 factor = _mm256_set1_ps(f); - for (ui32 i = (repeat + 7) >> 3; i > 0; --i, dst+=8, src+=8) - { - __m256 s = _mm256_load_ps(src); - _mm256_store_ps(dst, _mm256_mul_ps(factor, s)); - } - } - - - ///////////////////////////////////////////////////////////////////////// - void avx_irrev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, - line_buf *line_hdst, ui32 width, - bool even) + void avx_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) { if (width > 1) { - float *src = line_src->f32; - float *ldst = line_ldst->f32, *hdst = line_hdst->f32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; + // split src into ldst and hdst + if (even) + { + float* dph = hdst->f32; + float* dpl = ldst->f32; + float* sp = src->f32; - //extension - src[-1] = src[1]; - src[width] = src[width-2]; - // predict - const float* sp = src + (even ? 1 : 0); - float *dph = hdst; - __m256 factor = _mm256_set1_ps(LIFTING_FACTORS::steps[0]); - for (ui32 i = (H_width + 3) >> 2; i > 0; --i) - { //this is doing twice the work it needs to do - //it can be definitely written better - __m256 s1 = _mm256_loadu_ps(sp - 1); - __m256 s2 = _mm256_loadu_ps(sp + 1); - __m256 d = _mm256_loadu_ps(sp); - s1 = _mm256_mul_ps(factor, _mm256_add_ps(s1, s2)); - __m256 d1 = _mm256_add_ps(d, s1); - sp += 8; - __m128 t1 = _mm256_extractf128_ps(d1, 0); - __m128 t2 = _mm256_extractf128_ps(d1, 1); - __m128 t = _mm_shuffle_ps(t1, t2, _MM_SHUFFLE(2, 0, 2, 0)); - _mm_store_ps(dph, t); - dph += 4; + for (int i = width; i > 0; i -= 16, sp += 16, dpl += 8, dph += 8) + { + __m256 a = _mm256_load_ps(sp); + __m256 b = _mm256_load_ps(sp + 8); + __m256 c = _mm256_permute2f128_ps(a, b, (2 << 4) | (0)); + __m256 d = _mm256_permute2f128_ps(a, b, (3 << 4) | (1)); + __m256 e = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(2, 0, 2, 0)); + __m256 f = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(3, 1, 3, 1)); + _mm256_store_ps(dpl, e); + _mm256_store_ps(dph, f); + } } - - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - __m128 factor128 = _mm_set1_ps(LIFTING_FACTORS::steps[1]); - sp = src + (even ? 0 : 1); - const float* sph = hdst + (even ? 0 : 1); - float *dpl = ldst; - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sp+=8, sph+=4, dpl+=4) + else { - __m256 d1 = _mm256_loadu_ps(sp); //is there an advantage here? - __m128 t1 = _mm256_extractf128_ps(d1, 0); - __m128 t2 = _mm256_extractf128_ps(d1, 1); - __m128 d = _mm_shuffle_ps(t1, t2, _MM_SHUFFLE(2, 0, 2, 0)); + float* dph = hdst->f32; + float* dpl = ldst->f32; + float* sp = src->f32; - __m128 s1 = _mm_loadu_ps(sph - 1); - __m128 s2 = _mm_loadu_ps(sph); - s1 = _mm_mul_ps(factor128, _mm_add_ps(s1, s2)); - d = _mm_add_ps(d, s1); - _mm_store_ps(dpl, d); + for (int i = width; i > 0; i -= 16, sp += 16, dpl += 8, dph += 8) + { + __m256 a = _mm256_load_ps(sp); + __m256 b = _mm256_load_ps(sp + 8); + __m256 c = _mm256_permute2f128_ps(a, b, (2 << 4) | (0)); + __m256 d = _mm256_permute2f128_ps(a, b, (3 << 4) | (1)); + __m256 e = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(2, 0, 2, 0)); + __m256 f = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(3, 1, 3, 1)); + _mm256_store_ps(dpl, f); + _mm256_store_ps(dph, e); + } } - //extension - ldst[-1] = ldst[0]; - ldst[L_width] = ldst[L_width-1]; - //predict - factor = _mm256_set1_ps(LIFTING_FACTORS::steps[2]); - const float* spl = ldst + (even ? 1 : 0); - dph = hdst; - for (ui32 i = (H_width + 7) >> 3; i > 0; --i, spl+=8, dph+=8) + // the actual horizontal transform + float* hp = hdst->f32, * lp = ldst->f32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) { - __m256 s1 = _mm256_loadu_ps(spl - 1); - __m256 s2 = _mm256_loadu_ps(spl); - __m256 d = _mm256_loadu_ps(dph); - s1 = _mm256_mul_ps(factor, _mm256_add_ps(s1, s2)); - d = _mm256_add_ps(d, s1); - _mm256_store_ps(dph, d); - } + // first lifting step + const lifting_step* s = atk->get_step(j - 1); + const float a = s->irv.Aatk; - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - factor = _mm256_set1_ps(LIFTING_FACTORS::steps[3]); - sph = hdst + (even ? 0 : 1); - dpl = ldst; - for (ui32 i = (L_width + 7) >> 3; i > 0; --i, sph+=8, dpl+=8) - { - __m256 s1 = _mm256_loadu_ps(sph - 1); - __m256 s2 = _mm256_loadu_ps(sph); - __m256 d = _mm256_loadu_ps(dpl); - s1 = _mm256_mul_ps(factor, _mm256_add_ps(s1, s2)); - d = _mm256_add_ps(d, s1); - _mm256_store_ps(dpl, d); - } + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const float* sp = lp + (even ? 1 : 0); + float* dp = hp; + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp += a * (sp[-1] + sp[0]); - //multipliers - float *dp = ldst; - factor = _mm256_set1_ps(LIFTING_FACTORS::K_inv); - for (ui32 i = (L_width + 7) >> 3; i > 0; --i, dp+=8) - { - __m256 d = _mm256_load_ps(dp); - _mm256_store_ps(dp, _mm256_mul_ps(factor, d)); + // swap buffers + float* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; } - dp = hdst; - factor = _mm256_set1_ps(LIFTING_FACTORS::K); - for (ui32 i = (H_width + 7) >> 3; i > 0; --i, dp+=8) - { - __m256 d = _mm256_load_ps(dp); - _mm256_store_ps(dp, _mm256_mul_ps(factor, d)); + + { // multiply by K or 1/K + float K = atk->get_K(); + float K_inv = 1.0f / K; + float* dp; + __m256 factor; + + factor = _mm256_set1_ps(K_inv); + dp = lp; + for (ui32 i = (l_width + 7) >> 3; i > 0; --i, dp += 8) + { + __m256 s = _mm256_load_ps(dp); + _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); + } + + factor = _mm256_set1_ps(K); + dp = hp; + for (ui32 i = (h_width + 7) >> 3; i > 0; --i, dp += 8) + { + __m256 s = _mm256_load_ps(dp); + _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); + } } } - else - { + else { if (even) - line_ldst->f32[0] = line_src->f32[0]; + ldst->f32[0] = src->f32[0]; else - line_hdst->f32[0] = line_src->f32[0] + line_src->f32[0]; + hdst->f32[0] = src->f32[0] * 2.0f; } } + + ////////////////////////////////////////////////////////////////////////// + void avx_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat) + { + __m256 factor = _mm256_set1_ps(s->irv.Aatk); - ///////////////////////////////////////////////////////////////////////// - void avx_irrev_horz_wvlt_bwd_tx(line_buf* line_dst, line_buf *line_lsrc, - line_buf *line_hsrc, ui32 width, - bool even) + float* dst = aug->f32; + const float* src1 = sig->f32, * src2 = other->f32; + repeat = (repeat + 7) >> 3; + for (ui32 i = repeat; i > 0; --i, dst += 8, src1 += 8, src2 += 8) + { + __m256 s1 = _mm256_load_ps(src1); + __m256 s2 = _mm256_load_ps(src2); + __m256 d = _mm256_load_ps(dst); + d = _mm256_sub_ps(d, _mm256_mul_ps(factor, _mm256_add_ps(s1, s2))); + _mm256_store_ps(dst, d); + } + } + + ////////////////////////////////////////////////////////////////////////// + void avx_irv_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) { if (width > 1) { - float *lsrc = line_lsrc->f32, *hsrc = line_hsrc->f32; - float *dst = line_dst->f32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; + bool ev = even; + float* oth = hsrc->f32, * aug = lsrc->f32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass - //multipliers - float *dp = lsrc; - __m256 factor = _mm256_set1_ps(LIFTING_FACTORS::K); - for (ui32 i = (L_width + 7) >> 3; i > 0; --i, dp+=8) - { - __m256 d = _mm256_load_ps(dp); - _mm256_store_ps(dp, _mm256_mul_ps(factor, d)); - } - dp = hsrc; - factor = _mm256_set1_ps(LIFTING_FACTORS::K_inv); - for (ui32 i = (H_width + 7) >> 3; i > 0; --i, dp+=8) - { - __m256 d = _mm256_load_ps(dp); - _mm256_store_ps(dp, _mm256_mul_ps(factor, d)); - } + { // multiply by K or 1/K + float K = atk->get_K(); + float K_inv = 1.0f / K; + float* dp; + __m256 factor; - //extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - factor = _mm256_set1_ps(LIFTING_FACTORS::steps[7]); - const float *sph = hsrc + (even ? 0 : 1); - float *dpl = lsrc; - for (ui32 i = (L_width + 7) >> 3; i > 0; --i, sph+=8, dpl+=8) - { - __m256 s1 = _mm256_loadu_ps(sph - 1); - __m256 s2 = _mm256_loadu_ps(sph); - __m256 d = _mm256_loadu_ps(dpl); - s1 = _mm256_mul_ps(factor, _mm256_add_ps(s1, s2)); - d = _mm256_add_ps(d, s1); - _mm256_store_ps(dpl, d); + factor = _mm256_set1_ps(K); + dp = aug; + for (ui32 i = (aug_width + 7) >> 3; i > 0; --i, dp += 8) + { + __m256 s = _mm256_load_ps(dp); + _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); + } + + factor = _mm256_set1_ps(K_inv); + dp = oth; + for (ui32 i = (oth_width + 7) >> 3; i > 0; --i, dp += 8) + { + __m256 s = _mm256_load_ps(dp); + _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); + } } - //extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width-1]; - //inverse perdict - factor = _mm256_set1_ps(LIFTING_FACTORS::steps[6]); - const float *spl = lsrc + (even ? 0 : -1); - float *dph = hsrc; - for (ui32 i = (H_width + 7) >> 3; i > 0; --i, dph+=8, spl+=8) + // the actual horizontal transform + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) { - __m256 s1 = _mm256_loadu_ps(spl); - __m256 s2 = _mm256_loadu_ps(spl + 1); - __m256 d = _mm256_loadu_ps(dph); - s1 = _mm256_mul_ps(factor, _mm256_add_ps(s1, s2)); - d = _mm256_add_ps(d, s1); - _mm256_store_ps(dph, d); + const lifting_step* s = atk->get_step(j); + const float a = s->irv.Aatk; + + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const float* sp = oth + (ev ? 0 : 1); + float* dp = aug; + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp -= a * (sp[-1] + sp[0]); + + // swap buffers + float* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; } - //extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - factor = _mm256_set1_ps(LIFTING_FACTORS::steps[5]); - sph = hsrc + (even ? 0 : 1); - dpl = lsrc; - for (ui32 i = (L_width + 7) >> 3; i > 0; --i, dpl+=8, sph+=8) + // combine both lsrc and hsrc into dst + if (even) { - __m256 s1 = _mm256_loadu_ps(sph - 1); - __m256 s2 = _mm256_loadu_ps(sph); - __m256 d = _mm256_loadu_ps(dpl); - s1 = _mm256_mul_ps(factor, _mm256_add_ps(s1, s2)); - d = _mm256_add_ps(d, s1); - _mm256_store_ps(dpl, d); + float* sph = hsrc->f32; + float* spl = lsrc->f32; + float* dp = dst->f32; + int i = width; + for ( ; i >= 8; i -= 16, dp += 16, spl += 8, sph += 8) + { + __m256 a = _mm256_load_ps(spl); + __m256 b = _mm256_load_ps(sph); + __m256 c = _mm256_unpacklo_ps(a, b); + __m256 d = _mm256_unpackhi_ps(a, b); + __m256 e = _mm256_permute2f128_ps(c, d, (2 << 4) | (0)); + __m256 f = _mm256_permute2f128_ps(c, d, (3 << 4) | (1)); + _mm256_store_ps(dp, e); + _mm256_store_ps(dp + 8, f); + } + for (; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) + { + __m128 a = _mm_load_ps(spl); + __m128 b = _mm_load_ps(sph); + __m128 c = _mm_unpacklo_ps(a, b); + __m128 d = _mm_unpackhi_ps(a, b); + _mm_store_ps(dp, c); + _mm_store_ps(dp + 4, d); + } } - - //extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width-1]; - //inverse perdict and combine - factor = _mm256_set1_ps(LIFTING_FACTORS::steps[4]); - dp = dst + (even ? 0 : -1); - spl = lsrc + (even ? 0 : -1); - sph = hsrc; - ui32 width = L_width + (even ? 0 : 1); - for (ui32 i = (width + 7) >> 3; i > 0; --i, spl+=8, sph+=8) + else { - __m256 s1 = _mm256_loadu_ps(spl); - __m256 s2 = _mm256_loadu_ps(spl + 1); - __m256 d = _mm256_load_ps(sph); - s2 = _mm256_mul_ps(factor, _mm256_add_ps(s1, s2)); - d = _mm256_add_ps(d, s2); - - __m128 a0 = _mm256_extractf128_ps(s1, 0); - __m128 a1 = _mm256_extractf128_ps(s1, 1); - __m128 a2 = _mm256_extractf128_ps(d, 0); - __m128 a3 = _mm256_extractf128_ps(d, 1); - _mm_storeu_ps(dp, _mm_unpacklo_ps(a0, a2)); dp += 4; - _mm_storeu_ps(dp, _mm_unpackhi_ps(a0, a2)); dp += 4; - _mm_storeu_ps(dp, _mm_unpacklo_ps(a1, a3)); dp += 4; - _mm_storeu_ps(dp, _mm_unpackhi_ps(a1, a3)); dp += 4; - -// s2 = _mm256_unpackhi_ps(s1, d); -// s1 = _mm256_unpacklo_ps(s1, d); -// d = _mm256_permute2f128_ps(s1, s2, (2 << 4) | 0); -// _mm256_storeu_ps(dp, d); -// d = _mm256_permute2f128_ps(s1, s2, (3 << 4) | 1); -// _mm256_storeu_ps(dp + 1, d); + float* sph = hsrc->f32; + float* spl = lsrc->f32; + float* dp = dst->f32; + int i = width; + for (; i >= 8; i -= 16, dp += 16, spl += 8, sph += 8) + { // i>=8 because we can exceed the aligned buffer by up to 7 + __m256 a = _mm256_load_ps(spl); + __m256 b = _mm256_load_ps(sph); + __m256 c = _mm256_unpacklo_ps(b, a); + __m256 d = _mm256_unpackhi_ps(b, a); + __m256 e = _mm256_permute2f128_ps(c, d, (2 << 4) | (0)); + __m256 f = _mm256_permute2f128_ps(c, d, (3 << 4) | (1)); + _mm256_store_ps(dp, e); + _mm256_store_ps(dp + 8, f); + } + for (; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) + { + __m128 a = _mm_load_ps(spl); + __m128 b = _mm_load_ps(sph); + __m128 c = _mm_unpacklo_ps(b, a); + __m128 d = _mm_unpackhi_ps(b, a); + _mm_store_ps(dp, c); + _mm_store_ps(dp + 4, d); + } } } - else - { + else { if (even) - line_dst->f32[0] = line_lsrc->f32[0]; + dst->f32[0] = lsrc->f32[0]; else - line_dst->f32[0] = line_hsrc->f32[0] * 0.5f; + dst->f32[0] = hsrc->f32[0] * 0.5f; } } - } -} + + ////////////////////////////////////////////////////////////////////////// + void avx_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) + { + __m256 factor = _mm256_set1_ps(K); + float* dst = aug->f32; + repeat = (repeat + 7) >> 3; + for (ui32 i = repeat; i > 0; --i, dst += 8 ) + { + __m256 s = _mm256_load_ps(dst); + _mm256_store_ps(dst, _mm256_mul_ps(factor, s)); + } + } + + + } // !local +} // !ojph diff --git a/src/core/transform/ojph_transform_local.h b/src/core/transform/ojph_transform_local.h index c484d279..816e9e8b 100644 --- a/src/core/transform/ojph_transform_local.h +++ b/src/core/transform/ojph_transform_local.h @@ -46,14 +46,6 @@ namespace ojph { namespace local { struct param_atk; - ////////////////////////////////////////////////////////////////////////// - struct LIFTING_FACTORS - { - static const float steps[8]; - static const float K; - static const float K_inv; - }; - ////////////////////////////////////////////////////////////////////////// // // @@ -66,38 +58,6 @@ namespace ojph { // Reversible functions ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_wvlt_fwd_predict(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_wvlt_fwd_update(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_horz_wvlt_fwd_tx(line_buf* src, line_buf *ldst, - line_buf *hdst, ui32 width, bool even); - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_wvlt_bwd_predict(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_wvlt_bwd_update(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_horz_wvlt_bwd_tx(line_buf* dst, line_buf *lsrc, - line_buf *hsrc, ui32 width, bool even); - - - - - ///////////////////////////////////////////////////////////////////////// void gen_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, const line_buf* other, const line_buf* aug, @@ -118,33 +78,10 @@ namespace ojph { const line_buf* lsrc, const line_buf* hsrc, ui32 width, bool even); - - - - ////////////////////////////////////////////////////////////////////////// // Irreversible functions ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - void gen_irrev_vert_wvlt_step(const line_buf* src1, const line_buf* src2, - line_buf *dst, int step_num, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void gen_irrev_vert_wvlt_K(const line_buf *src, line_buf *dst, - bool L_analysis_or_H_synthesis, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void gen_irrev_horz_wvlt_fwd_tx(line_buf* src, line_buf *ldst, - line_buf *hdst, ui32 width, bool even); - - ////////////////////////////////////////////////////////////////////////// - void gen_irrev_horz_wvlt_bwd_tx(line_buf* src, line_buf *ldst, - line_buf *hdst, ui32 width, bool even); - - - - ///////////////////////////////////////////////////////////////////////// void gen_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, const line_buf* other, const line_buf* aug, @@ -168,10 +105,6 @@ namespace ojph { ///////////////////////////////////////////////////////////////////////// void gen_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); - - - - ////////////////////////////////////////////////////////////////////////// // // @@ -184,21 +117,28 @@ namespace ojph { // Irreversible functions ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - void sse_irrev_vert_wvlt_step(const line_buf* src1, const line_buf* src2, - line_buf *dst, int step_num, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void sse_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void sse_irrev_vert_wvlt_K(const line_buf *src, line_buf *dst, - bool L_analysis_or_H_synthesis, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void sse_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); - ////////////////////////////////////////////////////////////////////////// - void sse_irrev_horz_wvlt_fwd_tx(line_buf* src, line_buf *ldst, - line_buf *hdst, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void sse_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void sse_irrev_horz_wvlt_bwd_tx(line_buf* src, line_buf *ldst, - line_buf *hdst, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void sse_irv_horz_syn(const param_atk *atk, const line_buf* dst, + const line_buf *lsrc, const line_buf *hsrc, + ui32 width, bool even); + + ///////////////////////////////////////////////////////////////////////// + void sse_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); ////////////////////////////////////////////////////////////////////////// // @@ -212,33 +152,25 @@ namespace ojph { // Reversible functions ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_wvlt_fwd_predict(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_wvlt_fwd_update(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_horz_wvlt_fwd_tx(line_buf* src, line_buf *ldst, - line_buf *hdst, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void sse2_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_wvlt_bwd_predict(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void sse2_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_wvlt_bwd_update(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void sse2_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_horz_wvlt_bwd_tx(line_buf* dst, line_buf *lsrc, - line_buf *hsrc, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void sse2_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even); ////////////////////////////////////////////////////////////////////////// @@ -253,21 +185,28 @@ namespace ojph { // Irreversible functions ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - void avx_irrev_vert_wvlt_step(const line_buf* src1, const line_buf* src2, - line_buf *dst, int step_num, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void avx_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void avx_irrev_vert_wvlt_K(const line_buf *src, line_buf *dst, - bool L_analysis_or_H_synthesis, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void avx_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); - ////////////////////////////////////////////////////////////////////////// - void avx_irrev_horz_wvlt_fwd_tx(line_buf* src, line_buf *ldst, - line_buf *hdst, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void avx_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void avx_irrev_horz_wvlt_bwd_tx(line_buf* src, line_buf *ldst, - line_buf *hdst, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void avx_irv_horz_syn(const param_atk *atk, const line_buf* dst, + const line_buf *lsrc, const line_buf *hsrc, + ui32 width, bool even); + + ///////////////////////////////////////////////////////////////////////// + void avx_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); ////////////////////////////////////////////////////////////////////////// // @@ -281,33 +220,85 @@ namespace ojph { // Reversible functions ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_wvlt_fwd_predict(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void avx2_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_wvlt_fwd_update(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void avx2_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); - ////////////////////////////////////////////////////////////////////////// - void avx2_rev_horz_wvlt_fwd_tx(line_buf* src, line_buf *ldst, - line_buf *hdst, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void avx2_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void avx2_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even); ////////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_wvlt_bwd_predict(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); + // + // + // AVX512 Functions + // + // + ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_wvlt_bwd_update(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); + // Irreversible functions + ////////////////////////////////////////////////////////////////////////// + + ///////////////////////////////////////////////////////////////////////// + void avx512_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void avx512_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); + + ///////////////////////////////////////////////////////////////////////// + void avx512_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void avx512_irv_horz_syn(const param_atk *atk, const line_buf* dst, + const line_buf *lsrc, const line_buf *hsrc, + ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void avx512_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); + + + ////////////////////////////////////////////////////////////////////////// + // Reversible functions ////////////////////////////////////////////////////////////////////////// - void avx2_rev_horz_wvlt_bwd_tx(line_buf* dst, line_buf *lsrc, - line_buf *hsrc, ui32 width, bool even); + + ///////////////////////////////////////////////////////////////////////// + void avx512_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void avx512_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); + + ///////////////////////////////////////////////////////////////////////// + void avx512_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void avx512_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even); ////////////////////////////////////////////////////////////////////////// // @@ -321,57 +312,52 @@ namespace ojph { // Reversible functions ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_wvlt_fwd_predict(const line_buf *line_src1, - const line_buf *line_src2, - line_buf *line_dst, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_wvlt_fwd_update(const line_buf *line_src1, - const line_buf *line_src2, - line_buf *line_dst, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void wasm_rev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, - line_buf *line_hdst, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void wasm_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_wvlt_bwd_predict(const line_buf *line_src1, - const line_buf *line_src2, - line_buf *line_dst, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void wasm_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); - ////////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_wvlt_bwd_update(const line_buf *line_src1, - const line_buf *line_src2, - line_buf *line_dst, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void wasm_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void wasm_rev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, - line_buf *line_hsrc, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void wasm_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even); ////////////////////////////////////////////////////////////////////////// // Irreversible functions ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - void wasm_irrev_vert_wvlt_step(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, int step_num, - ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void wasm_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void wasm_irrev_vert_wvlt_K(const line_buf *line_src, line_buf *line_dst, - bool L_analysis_or_H_synthesis, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void wasm_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); - ////////////////////////////////////////////////////////////////////////// - void wasm_irrev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, - line_buf *line_hdst, ui32 width, - bool even); + ///////////////////////////////////////////////////////////////////////// + void wasm_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void wasm_irrev_horz_wvlt_bwd_tx(line_buf *line_src, line_buf *line_ldst, - line_buf *line_hdst, ui32 width, - bool even); + ///////////////////////////////////////////////////////////////////////// + void wasm_irv_horz_syn(const param_atk *atk, const line_buf* dst, + const line_buf *lsrc, const line_buf *hsrc, + ui32 width, bool even); + + ///////////////////////////////////////////////////////////////////////// + void wasm_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); } } diff --git a/src/core/transform/ojph_transform_sse.cpp b/src/core/transform/ojph_transform_sse.cpp index c299bc8d..281ff4a6 100644 --- a/src/core/transform/ojph_transform_sse.cpp +++ b/src/core/transform/ojph_transform_sse.cpp @@ -36,6 +36,7 @@ //***************************************************************************/ #include +#include #include "ojph_defs.h" #include "ojph_arch.h" @@ -43,273 +44,265 @@ #include "ojph_transform.h" #include "ojph_transform_local.h" -#include +#include "ojph_params.h" +#include "../codestream/ojph_params_local.h" namespace ojph { namespace local { ////////////////////////////////////////////////////////////////////////// - void sse_irrev_vert_wvlt_step(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, - int step_num, ui32 repeat) + void sse_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat) { - float *dst = line_dst->f32; - const float *src1 = line_src1->f32, *src2 = line_src2->f32; + __m128 factor = _mm_set1_ps(s->irv.Aatk); - __m128 factor = _mm_set1_ps(LIFTING_FACTORS::steps[step_num]); - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) + float* dst = aug->f32; + const float* src1 = sig->f32, * src2 = other->f32; + repeat = (repeat + 3) >> 2; + for (ui32 i = repeat; i > 0; --i, dst += 4, src1 += 4, src2 += 4) { __m128 s1 = _mm_load_ps(src1); __m128 s2 = _mm_load_ps(src2); - __m128 d = _mm_load_ps(dst); + __m128 d = _mm_load_ps(dst); d = _mm_add_ps(d, _mm_mul_ps(factor, _mm_add_ps(s1, s2))); _mm_store_ps(dst, d); } } ///////////////////////////////////////////////////////////////////////// - void sse_irrev_vert_wvlt_K(const line_buf* line_src, line_buf* line_dst, - bool L_analysis_or_H_synthesis, ui32 repeat) - { - float *dst = line_dst->f32; - const float *src = line_src->f32; - - float f = LIFTING_FACTORS::K_inv; - f = L_analysis_or_H_synthesis ? f : LIFTING_FACTORS::K; - __m128 factor = _mm_set1_ps(f); - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src+=4) - { - __m128 s = _mm_load_ps(src); - _mm_store_ps(dst, _mm_mul_ps(factor, s)); - } - } - - ///////////////////////////////////////////////////////////////////////// - void sse_irrev_horz_wvlt_fwd_tx(line_buf* line_src, line_buf *line_ldst, - line_buf *line_hdst, ui32 width, - bool even) + void sse_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) { if (width > 1) { - float *src = line_src->f32; - float *ldst = line_ldst->f32, *hdst = line_hdst->f32; + // split src into ldst and hdst + if (even) + { + float* dph = hdst->f32; + float* dpl = ldst->f32; + float* sp = src->f32; - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; + for (int i = width; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) + { + __m128 a = _mm_load_ps(sp); + __m128 b = _mm_load_ps(sp + 4); - //extension - src[-1] = src[1]; - src[width] = src[width-2]; - // predict - const float* sp = src + (even ? 1 : 0); - float *dph = hdst; - __m128 factor = _mm_set1_ps(LIFTING_FACTORS::steps[0]); - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dph+=4) - { //this is doing twice the work it needs to do - //it can be definitely written better - __m128 s1 = _mm_loadu_ps(sp - 1); - __m128 s2 = _mm_loadu_ps(sp + 1); - __m128 d = _mm_loadu_ps(sp); - s1 = _mm_mul_ps(factor, _mm_add_ps(s1, s2)); - __m128 d1 = _mm_add_ps(d, s1); - sp += 4; - s1 = _mm_loadu_ps(sp - 1); - s2 = _mm_loadu_ps(sp + 1); - d = _mm_loadu_ps(sp); - s1 = _mm_mul_ps(factor, _mm_add_ps(s1, s2)); - __m128 d2 = _mm_add_ps(d, s1); - sp += 4; - d = _mm_shuffle_ps(d1, d2, _MM_SHUFFLE(2, 0, 2, 0)); - _mm_store_ps(dph, d); - } + __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); + __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - factor = _mm_set1_ps(LIFTING_FACTORS::steps[1]); - sp = src + (even ? 0 : 1); - const float* sph = hdst + (even ? 0 : 1); - float *dpl = ldst; - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sp+=8, sph+=4, dpl+=4) - { - __m128 s1 = _mm_loadu_ps(sph - 1); - __m128 s2 = _mm_loadu_ps(sph); - s1 = _mm_mul_ps(factor, _mm_add_ps(s1, s2)); - __m128 d1 = _mm_loadu_ps(sp); - __m128 d2 = _mm_loadu_ps(sp + 4); - __m128 d = _mm_shuffle_ps(d1, d2, _MM_SHUFFLE(2, 0, 2, 0)); - d = _mm_add_ps(d, s1); - _mm_store_ps(dpl, d); + _mm_store_ps(dpl, c); + _mm_store_ps(dph, d); + } } - - //extension - ldst[-1] = ldst[0]; - ldst[L_width] = ldst[L_width-1]; - //predict - factor = _mm_set1_ps(LIFTING_FACTORS::steps[2]); - const float* spl = ldst + (even ? 1 : 0); - dph = hdst; - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, spl+=4, dph+=4) + else { - __m128 s1 = _mm_loadu_ps(spl - 1); - __m128 s2 = _mm_loadu_ps(spl); - __m128 d = _mm_loadu_ps(dph); - s1 = _mm_mul_ps(factor, _mm_add_ps(s1, s2)); - d = _mm_add_ps(d, s1); - _mm_store_ps(dph, d); - } + float* dph = hdst->f32; + float* dpl = ldst->f32; + float* sp = src->f32; - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - factor = _mm_set1_ps(LIFTING_FACTORS::steps[3]); - sph = hdst + (even ? 0 : 1); - dpl = ldst; - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sph+=4, dpl+=4) - { - __m128 s1 = _mm_loadu_ps(sph - 1); - __m128 s2 = _mm_loadu_ps(sph); - __m128 d = _mm_loadu_ps(dpl); - s1 = _mm_mul_ps(factor, _mm_add_ps(s1, s2)); - d = _mm_add_ps(d, s1); - _mm_store_ps(dpl, d); + for (int i = width; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) + { + __m128 a = _mm_load_ps(sp); + __m128 b = _mm_load_ps(sp + 4); + + __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); + __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); + + _mm_store_ps(dpl, d); + _mm_store_ps(dph, c); + } } - //multipliers - float *dp = ldst; - factor = _mm_set1_ps(LIFTING_FACTORS::K_inv); - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dp+=4) + // the actual horizontal transform + float* hp = hdst->f32, * lp = ldst->f32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) { - __m128 d = _mm_load_ps(dp); - _mm_store_ps(dp, _mm_mul_ps(factor, d)); + // first lifting step + const lifting_step* s = atk->get_step(j - 1); + const float a = s->irv.Aatk; + + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const float* sp = lp + (even ? 1 : 0); + float* dp = hp; + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp += a * (sp[-1] + sp[0]); + + // swap buffers + float* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; } - dp = hdst; - factor = _mm_set1_ps(LIFTING_FACTORS::K); - for (int i = (H_width + 3) >> 2; i > 0; --i, dp+=4) - { - __m128 d = _mm_load_ps(dp); - _mm_store_ps(dp, _mm_mul_ps(factor, d)); + + { // multiply by K or 1/K + float K = atk->get_K(); + float K_inv = 1.0f / K; + float* dp; + __m128 factor; + + factor = _mm_set1_ps(K_inv); + dp = lp; + for (ui32 i = (l_width + 3) >> 2; i > 0; --i, dp += 4) + { + __m128 s = _mm_load_ps(dp); + _mm_store_ps(dp, _mm_mul_ps(factor, s)); + } + + factor = _mm_set1_ps(K); + dp = hp; + for (ui32 i = (h_width + 3) >> 2; i > 0; --i, dp += 4) + { + __m128 s = _mm_load_ps(dp); + _mm_store_ps(dp, _mm_mul_ps(factor, s)); + } } } - else - { + else { if (even) - line_ldst->f32[0] = line_src->f32[0]; + ldst->f32[0] = src->f32[0]; else - line_hdst->f32[0] = line_src->f32[0] + line_src->f32[0]; + hdst->f32[0] = src->f32[0] * 2.0f; } } + + ////////////////////////////////////////////////////////////////////////// + void sse_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat) + { + __m128 factor = _mm_set1_ps(s->irv.Aatk); - ///////////////////////////////////////////////////////////////////////// - void sse_irrev_horz_wvlt_bwd_tx(line_buf* line_dst, line_buf *line_lsrc, - line_buf *line_hsrc, ui32 width, - bool even) + float* dst = aug->f32; + const float* src1 = sig->f32, * src2 = other->f32; + repeat = (repeat + 3) >> 2; + for (ui32 i = repeat; i > 0; --i, dst += 4, src1 += 4, src2 += 4) + { + __m128 s1 = _mm_load_ps(src1); + __m128 s2 = _mm_load_ps(src2); + __m128 d = _mm_load_ps(dst); + d = _mm_sub_ps(d, _mm_mul_ps(factor, _mm_add_ps(s1, s2))); + _mm_store_ps(dst, d); + } + } + + ////////////////////////////////////////////////////////////////////////// + void sse_irv_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) { if (width > 1) { - float *lsrc = line_lsrc->f32, *hsrc = line_hsrc->f32; - float *dst = line_dst->f32; + bool ev = even; + float* oth = hsrc->f32, * aug = lsrc->f32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; + { // multiply by K or 1/K + float K = atk->get_K(); + float K_inv = 1.0f / K; + float* dp; + __m128 factor; - //multipliers - float *dp = lsrc; - __m128 factor = _mm_set1_ps(LIFTING_FACTORS::K); - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dp+=4) - { - __m128 d = _mm_load_ps(dp); - _mm_store_ps(dp, _mm_mul_ps(factor, d)); - } - dp = hsrc; - factor = _mm_set1_ps(LIFTING_FACTORS::K_inv); - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dp+=4) - { - __m128 d = _mm_load_ps(dp); - _mm_store_ps(dp, _mm_mul_ps(factor, d)); - } + factor = _mm_set1_ps(K); + dp = aug; + for (ui32 i = (aug_width + 3) >> 2; i > 0; --i, dp += 4) + { + __m128 s = _mm_load_ps(dp); + _mm_store_ps(dp, _mm_mul_ps(factor, s)); + } - //extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - factor = _mm_set1_ps(LIFTING_FACTORS::steps[7]); - const float *sph = hsrc + (even ? 0 : 1); - float *dpl = lsrc; - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dpl+=4, sph+=4) - { - __m128 s1 = _mm_loadu_ps(sph - 1); - __m128 s2 = _mm_loadu_ps(sph); - __m128 d = _mm_loadu_ps(dpl); - s1 = _mm_mul_ps(factor, _mm_add_ps(s1, s2)); - d = _mm_add_ps(d, s1); - _mm_store_ps(dpl, d); + factor = _mm_set1_ps(K_inv); + dp = oth; + for (ui32 i = (oth_width + 3) >> 2; i > 0; --i, dp += 4) + { + __m128 s = _mm_load_ps(dp); + _mm_store_ps(dp, _mm_mul_ps(factor, s)); + } } - //extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width-1]; - //inverse perdict - factor = _mm_set1_ps(LIFTING_FACTORS::steps[6]); - const float *spl = lsrc + (even ? 0 : -1); - float *dph = hsrc; - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dph+=4, spl+=4) + // the actual horizontal transform + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) { - __m128 s1 = _mm_loadu_ps(spl); - __m128 s2 = _mm_loadu_ps(spl + 1); - __m128 d = _mm_loadu_ps(dph); - s1 = _mm_mul_ps(factor, _mm_add_ps(s1, s2)); - d = _mm_add_ps(d, s1); - _mm_store_ps(dph, d); + const lifting_step* s = atk->get_step(j); + const float a = s->irv.Aatk; + + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const float* sp = oth + (ev ? 0 : 1); + float* dp = aug; + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp -= a * (sp[-1] + sp[0]); + + // swap buffers + float* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; } - //extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - factor = _mm_set1_ps(LIFTING_FACTORS::steps[5]); - sph = hsrc + (even ? 0 : 1); - dpl = lsrc; - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dpl+=4, sph+=4) + // combine both lsrc and hsrc into dst + if (even) { - __m128 s1 = _mm_loadu_ps(sph - 1); - __m128 s2 = _mm_loadu_ps(sph); - __m128 d = _mm_loadu_ps(dpl); - s1 = _mm_mul_ps(factor, _mm_add_ps(s1, s2)); - d = _mm_add_ps(d, s1); - _mm_store_ps(dpl, d); + float* sph = hsrc->f32; + float* spl = lsrc->f32; + float* dp = dst->f32; + int i = width; + for (; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) + { + __m128 a = _mm_load_ps(spl); + __m128 b = _mm_load_ps(sph); + __m128 c = _mm_unpacklo_ps(a, b); + __m128 d = _mm_unpackhi_ps(a, b); + _mm_store_ps(dp, c); + _mm_store_ps(dp + 4, d); + } } - - //extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width-1]; - //inverse perdict and combine - factor = _mm_set1_ps(LIFTING_FACTORS::steps[4]); - dp = dst + (even ? 0 : -1); - spl = lsrc + (even ? 0 : -1); - sph = hsrc; - ui32 width = L_width + (even ? 0 : 1); - for (ui32 i = (width + 3) >> 2; i > 0; --i, spl+=4, sph+=4, dp+=8) + else { - __m128 s1 = _mm_loadu_ps(spl); - __m128 s2 = _mm_loadu_ps(spl + 1); - __m128 d = _mm_load_ps(sph); - s2 = _mm_mul_ps(factor, _mm_add_ps(s1, s2)); - d = _mm_add_ps(d, s2); - _mm_storeu_ps(dp, _mm_unpacklo_ps(s1, d)); - _mm_storeu_ps(dp + 4, _mm_unpackhi_ps(s1, d)); + float* sph = hsrc->f32; + float* spl = lsrc->f32; + float* dp = dst->f32; + int i = width; + for (; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) + { + __m128 a = _mm_load_ps(spl); + __m128 b = _mm_load_ps(sph); + __m128 c = _mm_unpacklo_ps(b, a); + __m128 d = _mm_unpackhi_ps(b, a); + _mm_store_ps(dp, c); + _mm_store_ps(dp + 4, d); + } } } - else - { + else { if (even) - line_dst->f32[0] = line_lsrc->f32[0]; + dst->f32[0] = lsrc->f32[0]; else - line_dst->f32[0] = line_hsrc->f32[0] * 0.5f; + dst->f32[0] = hsrc->f32[0] * 0.5f; } } - } -} + + ////////////////////////////////////////////////////////////////////////// + void sse_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) + { + __m128 factor = _mm_set1_ps(K); + float* dst = aug->f32; + repeat = (repeat + 3) >> 2; + for (ui32 i = repeat; i > 0; --i, dst += 4) + { + __m128 s = _mm_load_ps(dst); + _mm_store_ps(dst, _mm_mul_ps(factor, s)); + } + } + + } // !local +} // !ojph diff --git a/src/core/transform/ojph_transform_sse2.cpp b/src/core/transform/ojph_transform_sse2.cpp index a607441a..5f3de49d 100644 --- a/src/core/transform/ojph_transform_sse2.cpp +++ b/src/core/transform/ojph_transform_sse2.cpp @@ -43,7 +43,7 @@ #include "ojph_transform.h" #include "ojph_transform_local.h" -#include +#include namespace ojph { namespace local { From fe24e552cbec80c1fe0b990134fa7f87bda97579 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Thu, 11 Apr 2024 10:34:50 +1000 Subject: [PATCH 23/37] Editorial + compilation fix + a potential bug fix --- src/core/transform/ojph_transform_avx.cpp | 58 ++++++++++++++++------- src/core/transform/ojph_transform_sse.cpp | 42 ++++++++-------- 2 files changed, 62 insertions(+), 38 deletions(-) diff --git a/src/core/transform/ojph_transform_avx.cpp b/src/core/transform/ojph_transform_avx.cpp index 743ceee6..81fc6c43 100644 --- a/src/core/transform/ojph_transform_avx.cpp +++ b/src/core/transform/ojph_transform_avx.cpp @@ -59,8 +59,8 @@ namespace ojph { float* dst = aug->f32; const float* src1 = sig->f32, * src2 = other->f32; - repeat = (repeat + 7) >> 3; - for (ui32 i = repeat; i > 0; --i, dst += 8, src1 += 8, src2 += 8) + int i = (int)repeat; + for ( ; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) { __m256 s1 = _mm256_load_ps(src1); __m256 s2 = _mm256_load_ps(src2); @@ -83,8 +83,8 @@ namespace ojph { float* dph = hdst->f32; float* dpl = ldst->f32; float* sp = src->f32; - - for (int i = width; i > 0; i -= 16, sp += 16, dpl += 8, dph += 8) + int i = (int)width; + for ( ; i > 8; i -= 16, sp += 16, dpl += 8, dph += 8) { __m256 a = _mm256_load_ps(sp); __m256 b = _mm256_load_ps(sp + 8); @@ -95,14 +95,23 @@ namespace ojph { _mm256_store_ps(dpl, e); _mm256_store_ps(dph, f); } + for (; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) + { + __m128 a = _mm_load_ps(sp); + __m128 b = _mm_load_ps(sp + 4); + __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); + __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); + _mm_store_ps(dpl, c); + _mm_store_ps(dph, d); + } } else { float* dph = hdst->f32; float* dpl = ldst->f32; float* sp = src->f32; - - for (int i = width; i > 0; i -= 16, sp += 16, dpl += 8, dph += 8) + int i = (int)width; + for ( ; i > 8; i -= 16, sp += 16, dpl += 8, dph += 8) { __m256 a = _mm256_load_ps(sp); __m256 b = _mm256_load_ps(sp + 8); @@ -113,6 +122,15 @@ namespace ojph { _mm256_store_ps(dpl, f); _mm256_store_ps(dph, e); } + for (; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) + { + __m128 a = _mm_load_ps(sp); + __m128 b = _mm_load_ps(sp + 4); + __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); + __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); + _mm_store_ps(dpl, d); + _mm_store_ps(dph, c); + } } // the actual horizontal transform @@ -149,7 +167,8 @@ namespace ojph { factor = _mm256_set1_ps(K_inv); dp = lp; - for (ui32 i = (l_width + 7) >> 3; i > 0; --i, dp += 8) + int i = (int)l_width; + for ( ; i > 0; i -= 8, dp += 8) { __m256 s = _mm256_load_ps(dp); _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); @@ -157,7 +176,8 @@ namespace ojph { factor = _mm256_set1_ps(K); dp = hp; - for (ui32 i = (h_width + 7) >> 3; i > 0; --i, dp += 8) + int i = (int)h_width; + for ( ; i > 0; i -= 8, dp += 8) { __m256 s = _mm256_load_ps(dp); _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); @@ -181,8 +201,8 @@ namespace ojph { float* dst = aug->f32; const float* src1 = sig->f32, * src2 = other->f32; - repeat = (repeat + 7) >> 3; - for (ui32 i = repeat; i > 0; --i, dst += 8, src1 += 8, src2 += 8) + int i = (int)repeat; + for ( ; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) { __m256 s1 = _mm256_load_ps(src1); __m256 s2 = _mm256_load_ps(src2); @@ -212,7 +232,8 @@ namespace ojph { factor = _mm256_set1_ps(K); dp = aug; - for (ui32 i = (aug_width + 7) >> 3; i > 0; --i, dp += 8) + int i = (int)aug_width; + for ( ; i > 0; i -= 8, dp += 8) { __m256 s = _mm256_load_ps(dp); _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); @@ -220,7 +241,8 @@ namespace ojph { factor = _mm256_set1_ps(K_inv); dp = oth; - for (ui32 i = (oth_width + 7) >> 3; i > 0; --i, dp += 8) + int i = (int)oth_width; + for ( ; i > 0; i -= 8, dp += 8) { __m256 s = _mm256_load_ps(dp); _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); @@ -255,8 +277,8 @@ namespace ojph { float* sph = hsrc->f32; float* spl = lsrc->f32; float* dp = dst->f32; - int i = width; - for ( ; i >= 8; i -= 16, dp += 16, spl += 8, sph += 8) + int i = (int)width; + for ( ; i > 8; i -= 16, dp += 16, spl += 8, sph += 8) { __m256 a = _mm256_load_ps(spl); __m256 b = _mm256_load_ps(sph); @@ -282,8 +304,8 @@ namespace ojph { float* sph = hsrc->f32; float* spl = lsrc->f32; float* dp = dst->f32; - int i = width; - for (; i >= 8; i -= 16, dp += 16, spl += 8, sph += 8) + int i = (int)width; + for (; i > 8; i -= 16, dp += 16, spl += 8, sph += 8) { // i>=8 because we can exceed the aligned buffer by up to 7 __m256 a = _mm256_load_ps(spl); __m256 b = _mm256_load_ps(sph); @@ -318,8 +340,8 @@ namespace ojph { { __m256 factor = _mm256_set1_ps(K); float* dst = aug->f32; - repeat = (repeat + 7) >> 3; - for (ui32 i = repeat; i > 0; --i, dst += 8 ) + int i = (int)repeat; + for ( ; i > 0; i -= 8, dst += 8 ) { __m256 s = _mm256_load_ps(dst); _mm256_store_ps(dst, _mm256_mul_ps(factor, s)); diff --git a/src/core/transform/ojph_transform_sse.cpp b/src/core/transform/ojph_transform_sse.cpp index 281ff4a6..3a4d39c8 100644 --- a/src/core/transform/ojph_transform_sse.cpp +++ b/src/core/transform/ojph_transform_sse.cpp @@ -59,8 +59,8 @@ namespace ojph { float* dst = aug->f32; const float* src1 = sig->f32, * src2 = other->f32; - repeat = (repeat + 3) >> 2; - for (ui32 i = repeat; i > 0; --i, dst += 4, src1 += 4, src2 += 4) + int i = (int)repeat; + for ( ; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) { __m128 s1 = _mm_load_ps(src1); __m128 s2 = _mm_load_ps(src2); @@ -84,14 +84,13 @@ namespace ojph { float* dpl = ldst->f32; float* sp = src->f32; - for (int i = width; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) + int i = (int)width; + for ( ; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) { __m128 a = _mm_load_ps(sp); __m128 b = _mm_load_ps(sp + 4); - __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); - _mm_store_ps(dpl, c); _mm_store_ps(dph, d); } @@ -102,14 +101,13 @@ namespace ojph { float* dpl = ldst->f32; float* sp = src->f32; - for (int i = width; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) + int i = (int)width; + for ( ; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) { __m128 a = _mm_load_ps(sp); __m128 b = _mm_load_ps(sp + 4); - __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); - _mm_store_ps(dpl, d); _mm_store_ps(dph, c); } @@ -149,7 +147,8 @@ namespace ojph { factor = _mm_set1_ps(K_inv); dp = lp; - for (ui32 i = (l_width + 3) >> 2; i > 0; --i, dp += 4) + int i = (int)l_width; + for ( ; i > 0; i -= 4, dp += 4) { __m128 s = _mm_load_ps(dp); _mm_store_ps(dp, _mm_mul_ps(factor, s)); @@ -157,7 +156,8 @@ namespace ojph { factor = _mm_set1_ps(K); dp = hp; - for (ui32 i = (h_width + 3) >> 2; i > 0; --i, dp += 4) + int i = (int)h_width; + for ( ; i > 0; i -= 4, dp += 4) { __m128 s = _mm_load_ps(dp); _mm_store_ps(dp, _mm_mul_ps(factor, s)); @@ -181,8 +181,8 @@ namespace ojph { float* dst = aug->f32; const float* src1 = sig->f32, * src2 = other->f32; - repeat = (repeat + 3) >> 2; - for (ui32 i = repeat; i > 0; --i, dst += 4, src1 += 4, src2 += 4) + int i = (int)repeat; + for ( ; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) { __m128 s1 = _mm_load_ps(src1); __m128 s2 = _mm_load_ps(src2); @@ -212,7 +212,8 @@ namespace ojph { factor = _mm_set1_ps(K); dp = aug; - for (ui32 i = (aug_width + 3) >> 2; i > 0; --i, dp += 4) + int i = (int)aug_width; + for ( ; i > 0; i -= 4, dp += 4) { __m128 s = _mm_load_ps(dp); _mm_store_ps(dp, _mm_mul_ps(factor, s)); @@ -220,7 +221,8 @@ namespace ojph { factor = _mm_set1_ps(K_inv); dp = oth; - for (ui32 i = (oth_width + 3) >> 2; i > 0; --i, dp += 4) + int i = (int)oth_width; + for ( ; i > 0; i -= 4, dp += 4) { __m128 s = _mm_load_ps(dp); _mm_store_ps(dp, _mm_mul_ps(factor, s)); @@ -255,8 +257,8 @@ namespace ojph { float* sph = hsrc->f32; float* spl = lsrc->f32; float* dp = dst->f32; - int i = width; - for (; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) + int i = (int)width; + for ( ; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) { __m128 a = _mm_load_ps(spl); __m128 b = _mm_load_ps(sph); @@ -271,8 +273,8 @@ namespace ojph { float* sph = hsrc->f32; float* spl = lsrc->f32; float* dp = dst->f32; - int i = width; - for (; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) + int i = (int)width; + for ( ; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) { __m128 a = _mm_load_ps(spl); __m128 b = _mm_load_ps(sph); @@ -296,8 +298,8 @@ namespace ojph { { __m128 factor = _mm_set1_ps(K); float* dst = aug->f32; - repeat = (repeat + 3) >> 2; - for (ui32 i = repeat; i > 0; --i, dst += 4) + int i = (int)repeat; + for ( ; i > 0; i -= 4, dst += 4) { __m128 s = _mm_load_ps(dst); _mm_store_ps(dst, _mm_mul_ps(factor, s)); From 5e4b627771abd338caecdf9d3088401633b118e7 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Thu, 11 Apr 2024 10:39:32 +1000 Subject: [PATCH 24/37] Syntax fix. --- src/core/transform/ojph_transform_avx.cpp | 10 ++++++---- src/core/transform/ojph_transform_sse.cpp | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/core/transform/ojph_transform_avx.cpp b/src/core/transform/ojph_transform_avx.cpp index 81fc6c43..66e3ec81 100644 --- a/src/core/transform/ojph_transform_avx.cpp +++ b/src/core/transform/ojph_transform_avx.cpp @@ -163,11 +163,12 @@ namespace ojph { float K = atk->get_K(); float K_inv = 1.0f / K; float* dp; + int i; __m256 factor; factor = _mm256_set1_ps(K_inv); dp = lp; - int i = (int)l_width; + i = (int)l_width; for ( ; i > 0; i -= 8, dp += 8) { __m256 s = _mm256_load_ps(dp); @@ -176,7 +177,7 @@ namespace ojph { factor = _mm256_set1_ps(K); dp = hp; - int i = (int)h_width; + i = (int)h_width; for ( ; i > 0; i -= 8, dp += 8) { __m256 s = _mm256_load_ps(dp); @@ -228,11 +229,12 @@ namespace ojph { float K = atk->get_K(); float K_inv = 1.0f / K; float* dp; + int i; __m256 factor; factor = _mm256_set1_ps(K); dp = aug; - int i = (int)aug_width; + i = (int)aug_width; for ( ; i > 0; i -= 8, dp += 8) { __m256 s = _mm256_load_ps(dp); @@ -241,7 +243,7 @@ namespace ojph { factor = _mm256_set1_ps(K_inv); dp = oth; - int i = (int)oth_width; + i = (int)oth_width; for ( ; i > 0; i -= 8, dp += 8) { __m256 s = _mm256_load_ps(dp); diff --git a/src/core/transform/ojph_transform_sse.cpp b/src/core/transform/ojph_transform_sse.cpp index 3a4d39c8..39776717 100644 --- a/src/core/transform/ojph_transform_sse.cpp +++ b/src/core/transform/ojph_transform_sse.cpp @@ -143,11 +143,12 @@ namespace ojph { float K = atk->get_K(); float K_inv = 1.0f / K; float* dp; + int i; __m128 factor; factor = _mm_set1_ps(K_inv); dp = lp; - int i = (int)l_width; + i = (int)l_width; for ( ; i > 0; i -= 4, dp += 4) { __m128 s = _mm_load_ps(dp); @@ -156,7 +157,7 @@ namespace ojph { factor = _mm_set1_ps(K); dp = hp; - int i = (int)h_width; + i = (int)h_width; for ( ; i > 0; i -= 4, dp += 4) { __m128 s = _mm_load_ps(dp); @@ -208,11 +209,12 @@ namespace ojph { float K = atk->get_K(); float K_inv = 1.0f / K; float* dp; + int i; __m128 factor; factor = _mm_set1_ps(K); dp = aug; - int i = (int)aug_width; + i = (int)aug_width; for ( ; i > 0; i -= 4, dp += 4) { __m128 s = _mm_load_ps(dp); @@ -221,7 +223,7 @@ namespace ojph { factor = _mm_set1_ps(K_inv); dp = oth; - int i = (int)oth_width; + i = (int)oth_width; for ( ; i > 0; i -= 4, dp += 4) { __m128 s = _mm_load_ps(dp); From 4b72faa72d1a4192115f3f77006c1eee5b036c7d Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Thu, 11 Apr 2024 12:52:21 +1000 Subject: [PATCH 25/37] A bug fix. --- src/core/codestream/ojph_resolution.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index 14743249..3b25009f 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -496,8 +496,6 @@ namespace ojph { if (res_num == 0) { assert(child_res == NULL); - assert(bands[0].exists() && !bands[1].exists() - && !bands[2].exists() && !bands[3].exists()); bands[0].exchange_buf(vert_even ? sig->line : aug->line); bands[0].push_line(); return; From 1e9bc418b707d5dba6717d4ad92caee54967e5bf Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Thu, 11 Apr 2024 13:03:50 +1000 Subject: [PATCH 26/37] A bug fix --- src/core/codestream/ojph_resolution.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index 3b25009f..6d6c500f 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -652,8 +652,6 @@ namespace ojph { if (res_num == 0) { assert(child_res == NULL); - assert(bands[0].exists() && !bands[1].exists() - && !bands[2].exists() && !bands[3].exists()); return bands[0].pull_line(); } From 7faf4576a7940b286917865e1002ae0201665ec9 Mon Sep 17 00:00:00 2001 From: aous72 Date: Thu, 11 Apr 2024 15:50:58 +1000 Subject: [PATCH 27/37] completed sse and avx. --- src/core/transform/ojph_transform_avx.cpp | 56 ++++++++++++++++++++--- src/core/transform/ojph_transform_sse.cpp | 56 ++++++++++++++++++++--- 2 files changed, 100 insertions(+), 12 deletions(-) diff --git a/src/core/transform/ojph_transform_avx.cpp b/src/core/transform/ojph_transform_avx.cpp index 66e3ec81..8499bf19 100644 --- a/src/core/transform/ojph_transform_avx.cpp +++ b/src/core/transform/ojph_transform_avx.cpp @@ -148,10 +148,32 @@ namespace ojph { lp[-1] = lp[0]; lp[l_width] = lp[l_width - 1]; // lifting step - const float* sp = lp + (even ? 1 : 0); + const float* sp = lp; float* dp = hp; - for (ui32 i = h_width; i > 0; --i, sp++, dp++) - *dp += a * (sp[-1] + sp[0]); + int i = (int)h_width; + __m256 f = _mm256_set1_ps(a); + if (even) + { + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256 m = _mm256_load_ps(sp); + __m256 n = _mm256_loadu_ps(sp + 1); + __m256 p = _mm256_load_ps(dp); + p = _mm256_add_ps(p, _mm256_mul_ps(f, _mm256_add_ps(m, n))); + _mm256_store_ps(dp, p); + } + } + else + { + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256 m = _mm256_load_ps(sp); + __m256 n = _mm256_loadu_ps(sp - 1); + __m256 p = _mm256_load_ps(dp); + p = _mm256_add_ps(p, _mm256_mul_ps(f, _mm256_add_ps(m, n))); + _mm256_store_ps(dp, p); + } + } // swap buffers float* t = lp; lp = hp; hp = t; @@ -262,10 +284,32 @@ namespace ojph { oth[-1] = oth[0]; oth[oth_width] = oth[oth_width - 1]; // lifting step - const float* sp = oth + (ev ? 0 : 1); + const float* sp = oth; float* dp = aug; - for (ui32 i = aug_width; i > 0; --i, sp++, dp++) - *dp -= a * (sp[-1] + sp[0]); + int i = (int)aug_width; + __m256 f = _mm256_set1_ps(a); + if (ev) + { + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256 m = _mm256_load_ps(sp); + __m256 n = _mm256_loadu_ps(sp - 1); + __m256 p = _mm256_load_ps(dp); + p = _mm256_sub_ps(p, _mm256_mul_ps(f, _mm256_add_ps(m, n))); + _mm256_store_ps(dp, p); + } + } + else + { + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256 m = _mm256_load_ps(sp); + __m256 n = _mm256_loadu_ps(sp + 1); + __m256 p = _mm256_load_ps(dp); + p = _mm256_sub_ps(p, _mm256_mul_ps(f, _mm256_add_ps(m, n))); + _mm256_store_ps(dp, p); + } + } // swap buffers float* t = aug; aug = oth; oth = t; diff --git a/src/core/transform/ojph_transform_sse.cpp b/src/core/transform/ojph_transform_sse.cpp index 39776717..69907841 100644 --- a/src/core/transform/ojph_transform_sse.cpp +++ b/src/core/transform/ojph_transform_sse.cpp @@ -128,10 +128,32 @@ namespace ojph { lp[-1] = lp[0]; lp[l_width] = lp[l_width - 1]; // lifting step - const float* sp = lp + (even ? 1 : 0); + const float* sp = lp; float* dp = hp; - for (ui32 i = h_width; i > 0; --i, sp++, dp++) - *dp += a * (sp[-1] + sp[0]); + int i = (int)h_width; + __m128 f = _mm_set1_ps(a); + if (even) + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128 m = _mm_load_ps(sp); + __m128 n = _mm_loadu_ps(sp + 1); + __m128 p = _mm_load_ps(dp); + p = _mm_add_ps(p, _mm_mul_ps(f, _mm_add_ps(m, n))); + _mm_store_ps(dp, p); + } + } + else + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128 m = _mm_load_ps(sp); + __m128 n = _mm_loadu_ps(sp - 1); + __m128 p = _mm_load_ps(dp); + p = _mm_add_ps(p, _mm_mul_ps(f, _mm_add_ps(m, n))); + _mm_store_ps(dp, p); + } + } // swap buffers float* t = lp; lp = hp; hp = t; @@ -242,10 +264,32 @@ namespace ojph { oth[-1] = oth[0]; oth[oth_width] = oth[oth_width - 1]; // lifting step - const float* sp = oth + (ev ? 0 : 1); + const float* sp = oth; float* dp = aug; - for (ui32 i = aug_width; i > 0; --i, sp++, dp++) - *dp -= a * (sp[-1] + sp[0]); + int i = (int)aug_width; + __m128 f = _mm_set1_ps(a); + if (ev) + { + for ( ; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128 m = _mm_load_ps(sp); + __m128 n = _mm_loadu_ps(sp - 1); + __m128 p = _mm_load_ps(dp); + p = _mm_sub_ps(p, _mm_mul_ps(f, _mm_add_ps(m, n))); + _mm_store_ps(dp, p); + } + } + else + { + for ( ; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128 m = _mm_load_ps(sp); + __m128 n = _mm_loadu_ps(sp + 1); + __m128 p = _mm_load_ps(dp); + p = _mm_sub_ps(p, _mm_mul_ps(f, _mm_add_ps(m, n))); + _mm_store_ps(dp, p); + } + } // swap buffers float* t = aug; aug = oth; oth = t; From 2a7ff07f00f2313ec3c7b2956817e13c2ee92958 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 12 Apr 2024 13:32:25 +1000 Subject: [PATCH 28/37] Corrected code for reversible path. Simplified DWT code. --- src/core/codestream/ojph_params.cpp | 2 +- src/core/codestream/ojph_resolution.cpp | 8 +- src/core/transform/ojph_transform.cpp | 168 ++++++++++----------- src/core/transform/ojph_transform.h | 29 +--- src/core/transform/ojph_transform_avx.cpp | 59 +++----- src/core/transform/ojph_transform_local.h | 172 ++++++++-------------- src/core/transform/ojph_transform_sse.cpp | 58 +++----- 7 files changed, 193 insertions(+), 303 deletions(-) diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index 268135c4..b6ada178 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -1724,7 +1724,7 @@ namespace ojph { d[0].rev.Batk = 2; d[0].rev.Eatk = 2; d[1].rev.Aatk = -1; - d[1].rev.Batk = 0; + d[1].rev.Batk = 1; d[1].rev.Eatk = 1; } diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index 6d6c500f..b82a810a 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -524,7 +524,7 @@ namespace ojph { line_buf* sp1 = sig->active ? sig->line : ssp[i].line; line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; const lifting_step* s = atk->get_step(num_steps - i - 1); - rev_vert_ana_step(s, sp1, sp2, dp, width); + rev_vert_step(s, sp1, sp2, dp, width, false); } lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } @@ -591,7 +591,7 @@ namespace ojph { line_buf* sp1 = sig->active ? sig->line : ssp[i].line; line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; const lifting_step* s = atk->get_step(num_steps - i - 1); - irv_vert_ana_step(s, sp1, sp2, dp, width); + irv_vert_step(s, sp1, sp2, dp, width, false); } lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } @@ -711,7 +711,7 @@ namespace ojph { line_buf* sp1 = sig->active ? sig->line : ssp[i].line; line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; const lifting_step* s = atk->get_step(i); - rev_vert_syn_step(s, dp, sp1, sp2, width); + rev_vert_step(s, sp1, sp2, dp, width, true); } lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } @@ -805,7 +805,7 @@ namespace ojph { line_buf* sp1 = sig->active ? sig->line : ssp[i].line; line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; const lifting_step* s = atk->get_step(i); - irv_vert_syn_step(s, dp, sp1, sp2, width); + irv_vert_step(s, sp1, sp2, dp, width, true); } lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } diff --git a/src/core/transform/ojph_transform.cpp b/src/core/transform/ojph_transform.cpp index eba4f006..2a219bca 100644 --- a/src/core/transform/ojph_transform.cpp +++ b/src/core/transform/ojph_transform.cpp @@ -54,20 +54,15 @@ namespace ojph { ///////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void (*rev_vert_ana_step) + void (*rev_vert_step) (const lifting_step* s, const line_buf* sig, const line_buf* other, - const line_buf* aug, ui32 repeat) = NULL; + const line_buf* aug, ui32 repeat, bool synthesis) = NULL; ///////////////////////////////////////////////////////////////////////// void (*rev_horz_ana) (const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even) = NULL; - ///////////////////////////////////////////////////////////////////////// - void (*rev_vert_syn_step) - (const lifting_step* s, const line_buf* aug, const line_buf* sig, - const line_buf* other, ui32 repeat) = NULL; - ///////////////////////////////////////////////////////////////////////// void (*rev_horz_syn) (const param_atk* atk, const line_buf* dst, const line_buf* lsrc, @@ -78,29 +73,24 @@ namespace ojph { ///////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void (*irv_vert_ana_step) + void (*irv_vert_step) (const lifting_step* s, const line_buf* sig, const line_buf* other, - const line_buf* aug, ui32 repeat) = NULL; + const line_buf* aug, ui32 repeat, bool synthesis) = NULL; + + ///////////////////////////////////////////////////////////////////////// + void (*irv_vert_times_K) + (float K, const line_buf* aug, ui32 repeat) = NULL; ///////////////////////////////////////////////////////////////////////// void (*irv_horz_ana) (const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even) = NULL; - ///////////////////////////////////////////////////////////////////////// - void (*irv_vert_syn_step) - (const lifting_step* s, const line_buf* aug, const line_buf* sig, - const line_buf* other, ui32 repeat) = NULL; - ///////////////////////////////////////////////////////////////////////// void (*irv_horz_syn) (const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, ui32 width, bool even) = NULL; - ///////////////////////////////////////////////////////////////////////// - void (*irv_vert_times_K) - (float K, const line_buf* aug, ui32 repeat) = NULL; - //////////////////////////////////////////////////////////////////////////// static bool wavelet_transform_functions_initialized = false; @@ -112,27 +102,24 @@ namespace ojph { #if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN) - rev_vert_ana_step = gen_rev_vert_ana_step; + rev_vert_step = gen_rev_vert_step; rev_horz_ana = gen_rev_horz_ana; - rev_vert_syn_step = gen_rev_vert_syn_step; rev_horz_syn = gen_rev_horz_syn; - irv_vert_ana_step = gen_irv_vert_ana_step; - irv_horz_ana = gen_irv_horz_ana; - irv_vert_syn_step = gen_irv_vert_syn_step; - irv_horz_syn = gen_irv_horz_syn; + irv_vert_step = gen_irv_vert_step; irv_vert_times_K = gen_irv_vert_times_K; + irv_horz_ana = gen_irv_horz_ana; + irv_horz_syn = gen_irv_horz_syn; #ifndef OJPH_DISABLE_INTEL_SIMD int level = get_cpu_ext_level(); if (level >= X86_CPU_EXT_LEVEL_SSE) { - irv_vert_ana_step = sse_irv_vert_ana_step; + irv_vert_step = sse_irv_vert_step; + irv_vert_times_K = sse_irv_vert_times_K; irv_horz_ana = sse_irv_horz_ana; - irv_vert_syn_step = sse_irv_vert_syn_step; irv_horz_syn = sse_irv_horz_syn; - irv_vert_times_K = sse_irv_vert_times_K; } //if (level >= X86_CPU_EXT_LEVEL_SSE2) @@ -145,11 +132,10 @@ namespace ojph { if (level >= X86_CPU_EXT_LEVEL_AVX) { - irv_vert_ana_step = avx_irv_vert_ana_step; + irv_vert_step = avx_irv_vert_step; + irv_vert_times_K = avx_irv_vert_times_K; irv_horz_ana = avx_irv_horz_ana; - irv_vert_syn_step = avx_irv_vert_syn_step; irv_horz_syn = avx_irv_horz_syn; - irv_vert_times_K = avx_irv_vert_times_K; } //if (level >= X86_CPU_EXT_LEVEL_AVX2) @@ -197,9 +183,9 @@ namespace ojph { #if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN) ///////////////////////////////////////////////////////////////////////// - void gen_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat) + void gen_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) { const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; @@ -207,12 +193,35 @@ namespace ojph { si32* dst = aug->i32; const si32* src1 = sig->i32, * src2 = other->i32; - if (a >= 0) - for (ui32 i = repeat; i > 0; --i) - *dst++ += (b + a * (*src1++ + *src2++)) >> e; - else - for (ui32 i = repeat; i > 0; --i) - *dst++ -= (- b - a * (*src1++ + *src2++)) >> e; + // The general definition of the wavelet in Part 2 is slightly + // different to part 2, although they are mathematically equivalent + // here, we identify the simpler form from Part 1 and employ them + if (a == 1 && b == 2 && e == 2) + { // normal update + if (synthesis) + for (ui32 i = repeat; i > 0; --i) + *dst++ -= (b + (*src1++ + *src2++)) >> e; + else + for (ui32 i = repeat; i > 0; --i) + *dst++ += (b + (*src1++ + *src2++)) >> e; + } + else if (a == -1 && b == 1 && e == 1) + { // normal predict + if (synthesis) + for (ui32 i = repeat; i > 0; --i) + *dst++ += (*src1++ + *src2++) >> e; + else + for (ui32 i = repeat; i > 0; --i) + *dst++ -= (*src1++ + *src2++) >> e; + } + else { // general case + if (synthesis) + for (ui32 i = repeat; i > 0; --i) + *dst++ -= (b + a * (*src1++ + *src2++)) >> e; + else + for (ui32 i = repeat; i > 0; --i) + *dst++ += (b + a * (*src1++ + *src2++)) >> e; + } } ///////////////////////////////////////////////////////////////////////// @@ -258,12 +267,15 @@ namespace ojph { // lifting step const si32* sp = lp + (even ? 1 : 0); si32* dp = hp; - if (a >= 0) + if (a == 1 && b == 2 && e == 2) // normal update for (ui32 i = h_width; i > 0; --i, sp++, dp++) - *dp += (b + a * (sp[-1] + sp[0])) >> e; - else + *dp += (b + (sp[-1] + sp[0])) >> e; + else if (a == -1 && b == 1 && e == 1) // normal predict + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp -= (sp[-1] + sp[0]) >> e; + else // general case for (ui32 i = h_width; i > 0; --i, sp++, dp++) - *dp -= (- b - a * (sp[-1] + sp[0])) >> e; + *dp += (b + a * (sp[-1] + sp[0])) >> e; // swap buffers si32* t = lp; lp = hp; hp = t; @@ -279,25 +291,6 @@ namespace ojph { } } - ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat) - { - const si32 a = s->rev.Aatk; - const si32 b = s->rev.Batk; - const ui32 e = s->rev.Eatk; - - si32* dst = aug->i32; - const si32* src1 = sig->i32, * src2 = other->i32; - if (a >= 0) - for (ui32 i = repeat; i > 0; --i) - *dst++ -= (b + a * (*src1++ + *src2++)) >> e; - else - for (ui32 i = repeat; i > 0; --i) - *dst++ += (- b - a * (*src1++ + *src2++)) >> e; - } - ////////////////////////////////////////////////////////////////////////// void gen_rev_horz_syn(const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, @@ -323,12 +316,15 @@ namespace ojph { // lifting step const si32* sp = oth + (ev ? 0 : 1); si32* dp = aug; - if (a >= 0) + if (a == 1 && b == 2 && e == 2) // normal update for (ui32 i = aug_width; i > 0; --i, sp++, dp++) - *dp -= (b + a * (sp[-1] + sp[0])) >> e; - else + *dp -= (b + (sp[-1] + sp[0])) >> e; + else if (a == -1 && b == 1 && e == 1) // normal predict + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp += (sp[-1] + sp[0]) >> e; + else // general case for (ui32 i = aug_width; i > 0; --i, sp++, dp++) - *dp += (- b - a * (sp[-1] + sp[0])) >> e; + *dp -= (b + a * (sp[-1] + sp[0])) >> e; // swap buffers si32* t = aug; aug = oth; oth = t; @@ -363,18 +359,29 @@ namespace ojph { } ////////////////////////////////////////////////////////////////////////// - void gen_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat) + void gen_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) { float a = s->irv.Aatk; + if (synthesis) + a = -a; + float* dst = aug->f32; const float* src1 = sig->f32, * src2 = other->f32; for (ui32 i = repeat; i > 0; --i) *dst++ += a * (*src1++ + *src2++); } - + + ////////////////////////////////////////////////////////////////////////// + void gen_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) + { + float* dst = aug->f32; + for (ui32 i = repeat; i > 0; --i) + *dst++ *= K; + } + ///////////////////////////////////////////////////////////////////////// void gen_irv_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, @@ -447,19 +454,6 @@ namespace ojph { } } - ////////////////////////////////////////////////////////////////////////// - void gen_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat) - { - float a = s->irv.Aatk; - - float* dst = aug->f32; - const float* src1 = sig->f32, * src2 = other->f32; - for (ui32 i = repeat; i > 0; --i) - *dst++ -= a * (*src1++ + *src2++); - } - ////////////////////////////////////////////////////////////////////////// void gen_irv_horz_syn(const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, @@ -527,14 +521,6 @@ namespace ojph { } } - ////////////////////////////////////////////////////////////////////////// - void gen_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) - { - float* dst = aug->f32; - for (ui32 i = repeat; i > 0; --i) - *dst++ *= K; - } - #endif // !OJPH_ENABLE_WASM_SIMD } diff --git a/src/core/transform/ojph_transform.h b/src/core/transform/ojph_transform.h index 1aae8b82..0e59632e 100644 --- a/src/core/transform/ojph_transform.h +++ b/src/core/transform/ojph_transform.h @@ -55,56 +55,43 @@ namespace ojph { ///////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - extern void (*rev_vert_ana_step) + extern void (*rev_vert_step) (const lifting_step* s, const line_buf* sig, const line_buf* other, - const line_buf* aug, ui32 repeat); + const line_buf* aug, ui32 repeat, bool synthesis); ///////////////////////////////////////////////////////////////////////// extern void (*rev_horz_ana) (const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - extern void (*rev_vert_syn_step) - (const lifting_step* s, const line_buf* aug, const line_buf* sig, - const line_buf* other, ui32 repeat); - ///////////////////////////////////////////////////////////////////////// extern void (*rev_horz_syn) (const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, ui32 width, bool even); - - ///////////////////////////////////////////////////////////////////////// // Irreversible functions ///////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - extern void (*irv_vert_ana_step) + extern void (*irv_vert_step) (const lifting_step* s, const line_buf* sig, const line_buf* other, - const line_buf* aug, ui32 repeat); + const line_buf* aug, ui32 repeat, bool synthesis); + + ///////////////////////////////////////////////////////////////////////// + extern void (*irv_vert_times_K) + (float K, const line_buf* aug, ui32 repeat); ///////////////////////////////////////////////////////////////////////// extern void (*irv_horz_ana) (const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - extern void (*irv_vert_syn_step) - (const lifting_step* s, const line_buf* aug, const line_buf* sig, - const line_buf* other, ui32 repeat); - ///////////////////////////////////////////////////////////////////////// extern void (*irv_horz_syn) (const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - extern void (*irv_vert_times_K) - (float K, const line_buf* aug, ui32 repeat); - - } } diff --git a/src/core/transform/ojph_transform_avx.cpp b/src/core/transform/ojph_transform_avx.cpp index 8499bf19..74f361ad 100644 --- a/src/core/transform/ojph_transform_avx.cpp +++ b/src/core/transform/ojph_transform_avx.cpp @@ -51,11 +51,15 @@ namespace ojph { namespace local { ////////////////////////////////////////////////////////////////////////// - void avx_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat) + void avx_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) { - __m256 factor = _mm256_set1_ps(s->irv.Aatk); + float a = s->irv.Aatk; + if (synthesis) + a = -a; + + __m256 factor = _mm256_set1_ps(a); float* dst = aug->f32; const float* src1 = sig->f32, * src2 = other->f32; @@ -70,6 +74,19 @@ namespace ojph { } } + ////////////////////////////////////////////////////////////////////////// + void avx_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) + { + __m256 factor = _mm256_set1_ps(K); + float* dst = aug->f32; + int i = (int)repeat; + for (; i > 0; i -= 8, dst += 8) + { + __m256 s = _mm256_load_ps(dst); + _mm256_store_ps(dst, _mm256_mul_ps(factor, s)); + } + } + ///////////////////////////////////////////////////////////////////////// void avx_irv_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, @@ -215,26 +232,6 @@ namespace ojph { } } - ////////////////////////////////////////////////////////////////////////// - void avx_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat) - { - __m256 factor = _mm256_set1_ps(s->irv.Aatk); - - float* dst = aug->f32; - const float* src1 = sig->f32, * src2 = other->f32; - int i = (int)repeat; - for ( ; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) - { - __m256 s1 = _mm256_load_ps(src1); - __m256 s2 = _mm256_load_ps(src2); - __m256 d = _mm256_load_ps(dst); - d = _mm256_sub_ps(d, _mm256_mul_ps(factor, _mm256_add_ps(s1, s2))); - _mm256_store_ps(dst, d); - } - } - ////////////////////////////////////////////////////////////////////////// void avx_irv_horz_syn(const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, @@ -381,19 +378,5 @@ namespace ojph { } } - ////////////////////////////////////////////////////////////////////////// - void avx_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) - { - __m256 factor = _mm256_set1_ps(K); - float* dst = aug->f32; - int i = (int)repeat; - for ( ; i > 0; i -= 8, dst += 8 ) - { - __m256 s = _mm256_load_ps(dst); - _mm256_store_ps(dst, _mm256_mul_ps(factor, s)); - } - } - - } // !local } // !ojph diff --git a/src/core/transform/ojph_transform_local.h b/src/core/transform/ojph_transform_local.h index 816e9e8b..fe7d1f27 100644 --- a/src/core/transform/ojph_transform_local.h +++ b/src/core/transform/ojph_transform_local.h @@ -55,56 +55,46 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// - // Reversible functions + // Irreversible functions ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void gen_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void gen_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); ///////////////////////////////////////////////////////////////////////// - void gen_rev_horz_ana(const param_atk* atk, const line_buf* ldst, - const line_buf* hdst, const line_buf* src, - ui32 width, bool even); + void gen_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); ///////////////////////////////////////////////////////////////////////// - void gen_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); + void gen_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); ///////////////////////////////////////////////////////////////////////// - void gen_rev_horz_syn(const param_atk* atk, const line_buf* dst, - const line_buf* lsrc, const line_buf* hsrc, + void gen_irv_horz_syn(const param_atk *atk, const line_buf* dst, + const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even); ////////////////////////////////////////////////////////////////////////// - // Irreversible functions + // Reversible functions ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void gen_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void gen_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); ///////////////////////////////////////////////////////////////////////// - void gen_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + void gen_rev_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); ///////////////////////////////////////////////////////////////////////// - void gen_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); - - ///////////////////////////////////////////////////////////////////////// - void gen_irv_horz_syn(const param_atk *atk, const line_buf* dst, - const line_buf *lsrc, const line_buf *hsrc, + void gen_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void gen_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); - ////////////////////////////////////////////////////////////////////////// // // @@ -118,28 +108,23 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void sse_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void sse_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); + + ///////////////////////////////////////////////////////////////////////// + void sse_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); ///////////////////////////////////////////////////////////////////////// void sse_irv_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void sse_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); - ///////////////////////////////////////////////////////////////////////// void sse_irv_horz_syn(const param_atk *atk, const line_buf* dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void sse_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); - ////////////////////////////////////////////////////////////////////////// // // @@ -153,20 +138,15 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void sse2_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); ///////////////////////////////////////////////////////////////////////// void sse2_rev_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); - ///////////////////////////////////////////////////////////////////////// void sse2_rev_horz_syn(const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, @@ -186,28 +166,23 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void avx_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void avx_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); + + ///////////////////////////////////////////////////////////////////////// + void avx_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); ///////////////////////////////////////////////////////////////////////// void avx_irv_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void avx_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); - ///////////////////////////////////////////////////////////////////////// void avx_irv_horz_syn(const param_atk *atk, const line_buf* dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void avx_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); - ////////////////////////////////////////////////////////////////////////// // // @@ -221,20 +196,15 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void avx2_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); ///////////////////////////////////////////////////////////////////////// void avx2_rev_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); - ///////////////////////////////////////////////////////////////////////// void avx2_rev_horz_syn(const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, @@ -253,48 +223,38 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void avx512_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void avx512_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); + + ///////////////////////////////////////////////////////////////////////// + void avx512_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); ///////////////////////////////////////////////////////////////////////// void avx512_irv_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void avx512_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); - ///////////////////////////////////////////////////////////////////////// void avx512_irv_horz_syn(const param_atk *atk, const line_buf* dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void avx512_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); - ////////////////////////////////////////////////////////////////////////// // Reversible functions ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void avx512_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void avx512_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); ///////////////////////////////////////////////////////////////////////// void avx512_rev_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void avx512_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); - ///////////////////////////////////////////////////////////////////////// void avx512_rev_horz_syn(const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, @@ -309,55 +269,45 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// - // Reversible functions + // Irreversible functions ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void wasm_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); ///////////////////////////////////////////////////////////////////////// - void wasm_rev_horz_ana(const param_atk* atk, const line_buf* ldst, - const line_buf* hdst, const line_buf* src, - ui32 width, bool even); + void wasm_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); ///////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); + void wasm_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); ///////////////////////////////////////////////////////////////////////// - void wasm_rev_horz_syn(const param_atk* atk, const line_buf* dst, - const line_buf* lsrc, const line_buf* hsrc, + void wasm_irv_horz_syn(const param_atk *atk, const line_buf* dst, + const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even); ////////////////////////////////////////////////////////////////////////// - // Irreversible functions + // Reversible functions ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void wasm_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void wasm_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); ///////////////////////////////////////////////////////////////////////// - void wasm_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + void wasm_rev_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); ///////////////////////////////////////////////////////////////////////// - void wasm_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); - - ///////////////////////////////////////////////////////////////////////// - void wasm_irv_horz_syn(const param_atk *atk, const line_buf* dst, - const line_buf *lsrc, const line_buf *hsrc, + void wasm_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, ui32 width, bool even); - - ///////////////////////////////////////////////////////////////////////// - void wasm_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); } } diff --git a/src/core/transform/ojph_transform_sse.cpp b/src/core/transform/ojph_transform_sse.cpp index 69907841..b61ea5e9 100644 --- a/src/core/transform/ojph_transform_sse.cpp +++ b/src/core/transform/ojph_transform_sse.cpp @@ -51,11 +51,15 @@ namespace ojph { namespace local { ////////////////////////////////////////////////////////////////////////// - void sse_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat) + void sse_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) { - __m128 factor = _mm_set1_ps(s->irv.Aatk); + float a = s->irv.Aatk; + if (synthesis) + a = -a; + + __m128 factor = _mm_set1_ps(a); float* dst = aug->f32; const float* src1 = sig->f32, * src2 = other->f32; @@ -70,6 +74,19 @@ namespace ojph { } } + ////////////////////////////////////////////////////////////////////////// + void sse_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) + { + __m128 factor = _mm_set1_ps(K); + float* dst = aug->f32; + int i = (int)repeat; + for (; i > 0; i -= 4, dst += 4) + { + __m128 s = _mm_load_ps(dst); + _mm_store_ps(dst, _mm_mul_ps(factor, s)); + } + } + ///////////////////////////////////////////////////////////////////////// void sse_irv_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, @@ -195,26 +212,6 @@ namespace ojph { } } - ////////////////////////////////////////////////////////////////////////// - void sse_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat) - { - __m128 factor = _mm_set1_ps(s->irv.Aatk); - - float* dst = aug->f32; - const float* src1 = sig->f32, * src2 = other->f32; - int i = (int)repeat; - for ( ; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) - { - __m128 s1 = _mm_load_ps(src1); - __m128 s2 = _mm_load_ps(src2); - __m128 d = _mm_load_ps(dst); - d = _mm_sub_ps(d, _mm_mul_ps(factor, _mm_add_ps(s1, s2))); - _mm_store_ps(dst, d); - } - } - ////////////////////////////////////////////////////////////////////////// void sse_irv_horz_syn(const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, @@ -339,18 +336,5 @@ namespace ojph { } } - ////////////////////////////////////////////////////////////////////////// - void sse_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) - { - __m128 factor = _mm_set1_ps(K); - float* dst = aug->f32; - int i = (int)repeat; - for ( ; i > 0; i -= 4, dst += 4) - { - __m128 s = _mm_load_ps(dst); - _mm_store_ps(dst, _mm_mul_ps(factor, s)); - } - } - } // !local } // !ojph From 03ef77acbcc04da174b03d9312987b45b4c92e8c Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 12 Apr 2024 21:31:14 +1000 Subject: [PATCH 29/37] Completed sse, sse2, avx, avx2. Still wasm and avx512. --- src/core/transform/ojph_transform.cpp | 89 +-- src/core/transform/ojph_transform_avx.cpp | 168 +----- src/core/transform/ojph_transform_avx2.cpp | 617 +++++++++++++++------ src/core/transform/ojph_transform_local.h | 165 ++++++ src/core/transform/ojph_transform_sse.cpp | 135 +---- src/core/transform/ojph_transform_sse2.cpp | 554 ++++++++++++------ 6 files changed, 1086 insertions(+), 642 deletions(-) diff --git a/src/core/transform/ojph_transform.cpp b/src/core/transform/ojph_transform.cpp index 2a219bca..95ab686c 100644 --- a/src/core/transform/ojph_transform.cpp +++ b/src/core/transform/ojph_transform.cpp @@ -112,6 +112,7 @@ namespace ojph { irv_horz_syn = gen_irv_horz_syn; #ifndef OJPH_DISABLE_INTEL_SIMD + int level = get_cpu_ext_level(); if (level >= X86_CPU_EXT_LEVEL_SSE) @@ -122,13 +123,12 @@ namespace ojph { irv_horz_syn = sse_irv_horz_syn; } - //if (level >= X86_CPU_EXT_LEVEL_SSE2) - //{ - // rev_vert_ana_step = sse2_rev_vert_ana_step; - // rev_horz_ana = sse2_rev_horz_ana; - // rev_vert_syn_step = sse2_rev_vert_syn_step; - // rev_horz_syn = sse2_rev_horz_syn; - //} + if (level >= X86_CPU_EXT_LEVEL_SSE2) + { + rev_vert_step = sse2_rev_vert_step; + rev_horz_ana = sse2_rev_horz_ana; + rev_horz_syn = sse2_rev_horz_syn; + } if (level >= X86_CPU_EXT_LEVEL_AVX) { @@ -138,26 +138,23 @@ namespace ojph { irv_horz_syn = avx_irv_horz_syn; } - //if (level >= X86_CPU_EXT_LEVEL_AVX2) - //{ - // rev_vert_ana_step = avx2_rev_vert_ana_step; - // rev_horz_ana = avx2_rev_horz_ana; - // rev_vert_syn_step = avx2_rev_vert_syn_step; - // rev_horz_syn = avx2_rev_horz_syn; - //} + if (level >= X86_CPU_EXT_LEVEL_AVX2) + { + rev_vert_step = avx2_rev_vert_step; + rev_horz_ana = avx2_rev_horz_ana; + rev_horz_syn = avx2_rev_horz_syn; + } //if (level >= X86_CPU_EXT_LEVEL_AVX512) //{ - // rev_vert_ana_step = avx512_rev_vert_ana_step; + // rev_vert_step = avx512_rev_vert_ana_step; // rev_horz_ana = avx512_rev_horz_ana; - // rev_vert_syn_step = avx512_rev_vert_syn_step; // rev_horz_syn = avx512_rev_horz_syn; - // irv_vert_ana_step = avx512_irv_vert_ana_step; - // irv_horz_ana = avx512_irv_horz_ana; + // irv_vert_step = avx512_irv_vert_step; + // irv_vert_times_K = avx512_irv_vert_times_K; // irv_vert_syn_step = avx512_irv_vert_syn_step; // irv_horz_syn = avx512_irv_horz_syn; - // irv_vert_times_K = avx512_irv_vert_times_K; //} #endif // !OJPH_DISABLE_INTEL_SIMD @@ -196,17 +193,17 @@ namespace ojph { // The general definition of the wavelet in Part 2 is slightly // different to part 2, although they are mathematically equivalent // here, we identify the simpler form from Part 1 and employ them - if (a == 1 && b == 2 && e == 2) - { // normal update + if (a == 1) + { // 5/3 update and any case with a == 1 if (synthesis) for (ui32 i = repeat; i > 0; --i) - *dst++ -= (b + (*src1++ + *src2++)) >> e; + *dst++ -= (b + *src1++ + *src2++) >> e; else for (ui32 i = repeat; i > 0; --i) - *dst++ += (b + (*src1++ + *src2++)) >> e; + *dst++ += (b + *src1++ + *src2++) >> e; } else if (a == -1 && b == 1 && e == 1) - { // normal predict + { // 5/3 predict if (synthesis) for (ui32 i = repeat; i > 0; --i) *dst++ += (*src1++ + *src2++) >> e; @@ -214,6 +211,15 @@ namespace ojph { for (ui32 i = repeat; i > 0; --i) *dst++ -= (*src1++ + *src2++) >> e; } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + if (synthesis) + for (ui32 i = repeat; i > 0; --i) + *dst++ -= (b - (*src1++ + *src2++)) >> e; + else + for (ui32 i = repeat; i > 0; --i) + *dst++ += (b - (*src1++ + *src2++)) >> e; + } else { // general case if (synthesis) for (ui32 i = repeat; i > 0; --i) @@ -267,15 +273,26 @@ namespace ojph { // lifting step const si32* sp = lp + (even ? 1 : 0); si32* dp = hp; - if (a == 1 && b == 2 && e == 2) // normal update + if (a == 1) + { // 5/3 update and any case with a == 1 for (ui32 i = h_width; i > 0; --i, sp++, dp++) *dp += (b + (sp[-1] + sp[0])) >> e; - else if (a == -1 && b == 1 && e == 1) // normal predict + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict for (ui32 i = h_width; i > 0; --i, sp++, dp++) *dp -= (sp[-1] + sp[0]) >> e; - else // general case + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp += (b - (sp[-1] + sp[0])) >> e; + } + else { + // general case for (ui32 i = h_width; i > 0; --i, sp++, dp++) *dp += (b + a * (sp[-1] + sp[0])) >> e; + } // swap buffers si32* t = lp; lp = hp; hp = t; @@ -316,15 +333,26 @@ namespace ojph { // lifting step const si32* sp = oth + (ev ? 0 : 1); si32* dp = aug; - if (a == 1 && b == 2 && e == 2) // normal update + if (a == 1) + { // 5/3 update and any case with a == 1 for (ui32 i = aug_width; i > 0; --i, sp++, dp++) *dp -= (b + (sp[-1] + sp[0])) >> e; - else if (a == -1 && b == 1 && e == 1) // normal predict + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict for (ui32 i = aug_width; i > 0; --i, sp++, dp++) *dp += (sp[-1] + sp[0]) >> e; - else // general case + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp -= (b - (sp[-1] + sp[0])) >> e; + } + else { + // general case for (ui32 i = aug_width; i > 0; --i, sp++, dp++) *dp -= (b + a * (sp[-1] + sp[0])) >> e; + } // swap buffers si32* t = aug; aug = oth; oth = t; @@ -413,7 +441,6 @@ namespace ojph { ui32 num_steps = atk->get_num_steps(); for (ui32 j = num_steps; j > 0; --j) { - // first lifting step const lifting_step* s = atk->get_step(j - 1); const float a = s->irv.Aatk; diff --git a/src/core/transform/ojph_transform_avx.cpp b/src/core/transform/ojph_transform_avx.cpp index 74f361ad..e7933ff1 100644 --- a/src/core/transform/ojph_transform_avx.cpp +++ b/src/core/transform/ojph_transform_avx.cpp @@ -41,15 +41,26 @@ #include "ojph_defs.h" #include "ojph_arch.h" #include "ojph_mem.h" -#include "ojph_transform.h" -#include "ojph_transform_local.h" - #include "ojph_params.h" #include "../codestream/ojph_params_local.h" +#include "ojph_transform.h" +#include "ojph_transform_local.h" + namespace ojph { namespace local { + ////////////////////////////////////////////////////////////////////////// + static inline void avx_multiply_const(float* p, float f, int width) + { + __m256 factor = _mm256_set1_ps(f); + for (; width > 0; width -= 8, p += 8) + { + __m256 s = _mm256_load_ps(p); + _mm256_store_ps(p, _mm256_mul_ps(factor, s)); + } + } + ////////////////////////////////////////////////////////////////////////// void avx_irv_vert_step(const lifting_step* s, const line_buf* sig, const line_buf* other, const line_buf* aug, @@ -95,59 +106,12 @@ namespace ojph { if (width > 1) { // split src into ldst and hdst - if (even) { - float* dph = hdst->f32; float* dpl = ldst->f32; - float* sp = src->f32; - int i = (int)width; - for ( ; i > 8; i -= 16, sp += 16, dpl += 8, dph += 8) - { - __m256 a = _mm256_load_ps(sp); - __m256 b = _mm256_load_ps(sp + 8); - __m256 c = _mm256_permute2f128_ps(a, b, (2 << 4) | (0)); - __m256 d = _mm256_permute2f128_ps(a, b, (3 << 4) | (1)); - __m256 e = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(2, 0, 2, 0)); - __m256 f = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(3, 1, 3, 1)); - _mm256_store_ps(dpl, e); - _mm256_store_ps(dph, f); - } - for (; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) - { - __m128 a = _mm_load_ps(sp); - __m128 b = _mm_load_ps(sp + 4); - __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); - __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); - _mm_store_ps(dpl, c); - _mm_store_ps(dph, d); - } - } - else - { float* dph = hdst->f32; - float* dpl = ldst->f32; float* sp = src->f32; - int i = (int)width; - for ( ; i > 8; i -= 16, sp += 16, dpl += 8, dph += 8) - { - __m256 a = _mm256_load_ps(sp); - __m256 b = _mm256_load_ps(sp + 8); - __m256 c = _mm256_permute2f128_ps(a, b, (2 << 4) | (0)); - __m256 d = _mm256_permute2f128_ps(a, b, (3 << 4) | (1)); - __m256 e = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(2, 0, 2, 0)); - __m256 f = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(3, 1, 3, 1)); - _mm256_store_ps(dpl, f); - _mm256_store_ps(dph, e); - } - for (; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) - { - __m128 a = _mm_load_ps(sp); - __m128 b = _mm_load_ps(sp + 4); - __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); - __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); - _mm_store_ps(dpl, d); - _mm_store_ps(dph, c); - } + int w = (int)width; + AVX_DEINTERLEAVE(dpl, dph, sp, w, even); } // the actual horizontal transform @@ -157,7 +121,6 @@ namespace ojph { ui32 num_steps = atk->get_num_steps(); for (ui32 j = num_steps; j > 0; --j) { - // first lifting step const lifting_step* s = atk->get_step(j - 1); const float a = s->irv.Aatk; @@ -201,27 +164,8 @@ namespace ojph { { // multiply by K or 1/K float K = atk->get_K(); float K_inv = 1.0f / K; - float* dp; - int i; - __m256 factor; - - factor = _mm256_set1_ps(K_inv); - dp = lp; - i = (int)l_width; - for ( ; i > 0; i -= 8, dp += 8) - { - __m256 s = _mm256_load_ps(dp); - _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); - } - - factor = _mm256_set1_ps(K); - dp = hp; - i = (int)h_width; - for ( ; i > 0; i -= 8, dp += 8) - { - __m256 s = _mm256_load_ps(dp); - _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); - } + avx_multiply_const(lp, K_inv, (int)l_width); + avx_multiply_const(hp, K, (int)h_width); } } else { @@ -247,27 +191,8 @@ namespace ojph { { // multiply by K or 1/K float K = atk->get_K(); float K_inv = 1.0f / K; - float* dp; - int i; - __m256 factor; - - factor = _mm256_set1_ps(K); - dp = aug; - i = (int)aug_width; - for ( ; i > 0; i -= 8, dp += 8) - { - __m256 s = _mm256_load_ps(dp); - _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); - } - - factor = _mm256_set1_ps(K_inv); - dp = oth; - i = (int)oth_width; - for ( ; i > 0; i -= 8, dp += 8) - { - __m256 s = _mm256_load_ps(dp); - _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); - } + avx_multiply_const(aug, K, (int)aug_width); + avx_multiply_const(oth, K_inv, (int)oth_width); } // the actual horizontal transform @@ -315,59 +240,12 @@ namespace ojph { } // combine both lsrc and hsrc into dst - if (even) { - float* sph = hsrc->f32; - float* spl = lsrc->f32; float* dp = dst->f32; - int i = (int)width; - for ( ; i > 8; i -= 16, dp += 16, spl += 8, sph += 8) - { - __m256 a = _mm256_load_ps(spl); - __m256 b = _mm256_load_ps(sph); - __m256 c = _mm256_unpacklo_ps(a, b); - __m256 d = _mm256_unpackhi_ps(a, b); - __m256 e = _mm256_permute2f128_ps(c, d, (2 << 4) | (0)); - __m256 f = _mm256_permute2f128_ps(c, d, (3 << 4) | (1)); - _mm256_store_ps(dp, e); - _mm256_store_ps(dp + 8, f); - } - for (; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) - { - __m128 a = _mm_load_ps(spl); - __m128 b = _mm_load_ps(sph); - __m128 c = _mm_unpacklo_ps(a, b); - __m128 d = _mm_unpackhi_ps(a, b); - _mm_store_ps(dp, c); - _mm_store_ps(dp + 4, d); - } - } - else - { - float* sph = hsrc->f32; float* spl = lsrc->f32; - float* dp = dst->f32; - int i = (int)width; - for (; i > 8; i -= 16, dp += 16, spl += 8, sph += 8) - { // i>=8 because we can exceed the aligned buffer by up to 7 - __m256 a = _mm256_load_ps(spl); - __m256 b = _mm256_load_ps(sph); - __m256 c = _mm256_unpacklo_ps(b, a); - __m256 d = _mm256_unpackhi_ps(b, a); - __m256 e = _mm256_permute2f128_ps(c, d, (2 << 4) | (0)); - __m256 f = _mm256_permute2f128_ps(c, d, (3 << 4) | (1)); - _mm256_store_ps(dp, e); - _mm256_store_ps(dp + 8, f); - } - for (; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) - { - __m128 a = _mm_load_ps(spl); - __m128 b = _mm_load_ps(sph); - __m128 c = _mm_unpacklo_ps(b, a); - __m128 d = _mm_unpackhi_ps(b, a); - _mm_store_ps(dp, c); - _mm_store_ps(dp + 4, d); - } + float* sph = hsrc->f32; + int w = (int)width; + AVX_INTERLEAVE(dp, spl, sph, w, even); } } else { diff --git a/src/core/transform/ojph_transform_avx2.cpp b/src/core/transform/ojph_transform_avx2.cpp index 915e246c..a7b16ddb 100644 --- a/src/core/transform/ojph_transform_avx2.cpp +++ b/src/core/transform/ojph_transform_avx2.cpp @@ -40,6 +40,9 @@ #include "ojph_defs.h" #include "ojph_arch.h" #include "ojph_mem.h" +#include "ojph_params.h" +#include "../codestream/ojph_params_local.h" + #include "ojph_transform.h" #include "ojph_transform_local.h" @@ -48,218 +51,470 @@ namespace ojph { namespace local { - ////////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_wvlt_fwd_predict(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) + ///////////////////////////////////////////////////////////////////////// + void avx2_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; + __m256i va = _mm256_set1_epi32(a); + __m256i vb = _mm256_set1_epi32(b); - for (ui32 i = (repeat + 7) >> 3; i > 0; --i, dst+=8, src1+=8, src2+=8) - { - __m256i s1 = _mm256_load_si256((__m256i*)src1); - __m256i s2 = _mm256_load_si256((__m256i*)src2); - __m256i d = _mm256_load_si256((__m256i*)dst); - s1 = _mm256_srai_epi32(_mm256_add_epi32(s1, s2), 1); - d = _mm256_sub_epi32(d, s1); - _mm256_store_si256((__m256i*)dst, d); + si32* dst = aug->i32; + const si32* src1 = sig->i32, * src2 = other->i32; + // The general definition of the wavelet in Part 2 is slightly + // different to part 2, although they are mathematically equivalent + // here, we identify the simpler form from Part 1 and employ them + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)src1); + __m256i s2 = _mm256_load_si256((__m256i*)src2); + __m256i d = _mm256_load_si256((__m256i*)dst); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_add_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dst, d); + } + else + for (; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)src1); + __m256i s2 = _mm256_load_si256((__m256i*)src2); + __m256i d = _mm256_load_si256((__m256i*)dst); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_add_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dst, d); + } } - } - - ////////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_wvlt_fwd_update(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - - __m256i offset = _mm256_set1_epi32(2); - for (ui32 i = (repeat + 7) >> 3; i > 0; --i, dst+=8, src1+=8, src2+=8) - { - __m256i s1 = _mm256_load_si256((__m256i*)src1); - s1 = _mm256_add_epi32(s1, offset); - __m256i s2 = _mm256_load_si256((__m256i*)src2); - s2 = _mm256_add_epi32(s2, s1); - __m256i d = _mm256_load_si256((__m256i*)dst); - d = _mm256_add_epi32(d, _mm256_srai_epi32(s2, 2)); - _mm256_store_si256((__m256i*)dst, d); + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)src1); + __m256i s2 = _mm256_load_si256((__m256i*)src2); + __m256i d = _mm256_load_si256((__m256i*)dst); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i w = _mm256_srai_epi32(t, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dst, d); + } + else + for (; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)src1); + __m256i s2 = _mm256_load_si256((__m256i*)src2); + __m256i d = _mm256_load_si256((__m256i*)dst); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i w = _mm256_srai_epi32(t, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dst, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)src1); + __m256i s2 = _mm256_load_si256((__m256i*)src2); + __m256i d = _mm256_load_si256((__m256i*)dst); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_sub_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dst, d); + } + else + for (; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)src1); + __m256i s2 = _mm256_load_si256((__m256i*)src2); + __m256i d = _mm256_load_si256((__m256i*)dst); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_sub_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dst, d); + } + } + else { // general case + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)src1); + __m256i s2 = _mm256_load_si256((__m256i*)src2); + __m256i d = _mm256_load_si256((__m256i*)dst); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i u = _mm256_mullo_epi32(va, t); + __m256i v = _mm256_add_epi32(vb, u); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dst, d); + } + else + for (; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)src1); + __m256i s2 = _mm256_load_si256((__m256i*)src2); + __m256i d = _mm256_load_si256((__m256i*)dst); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i u = _mm256_mullo_epi32(va, t); + __m256i v = _mm256_add_epi32(vb, u); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dst, d); + } } } - ////////////////////////////////////////////////////////////////////////// - void avx2_rev_horz_wvlt_fwd_tx(line_buf* line_src, line_buf *line_ldst, - line_buf *line_hdst,ui32 width, bool even) + ///////////////////////////////////////////////////////////////////////// + void avx2_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) { if (width > 1) { - si32 *src = line_src->i32; - si32 *ldst = line_ldst->i32, *hdst = line_hdst->i32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; - - // extension - src[-1] = src[1]; - src[width] = src[width-2]; - // predict - const si32* sp = src + (even ? 1 : 0); - si32 *dph = hdst; - const __m256i mask = _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7); - for (ui32 i = (H_width + 7) >> 3; i > 0; --i, dph+=8) - { //this is doing twice the work it needs to do - //it can be definitely written better - __m256i s1 = _mm256_loadu_si256((__m256i*)(sp-1)); - __m256i s2 = _mm256_loadu_si256((__m256i*)(sp+1)); - __m256i d = _mm256_loadu_si256((__m256i*)sp); - s1 = _mm256_srai_epi32(_mm256_add_epi32(s1, s2), 1); - __m256i d1 = _mm256_sub_epi32(d, s1); - sp += 8; - s1 = _mm256_loadu_si256((__m256i*)(sp-1)); - s2 = _mm256_loadu_si256((__m256i*)(sp+1)); - d = _mm256_loadu_si256((__m256i*)sp); - s1 = _mm256_srai_epi32(_mm256_add_epi32(s1, s2), 1); - __m256i d2 = _mm256_sub_epi32(d, s1); - sp += 8; - d1 = _mm256_permutevar8x32_epi32(d1, mask); - d2 = _mm256_permutevar8x32_epi32(d2, mask); - d = _mm256_permute2x128_si256(d1, d2, (2 << 4) | 0); - _mm256_store_si256((__m256i*)dph, d); + // combine both lsrc and hsrc into dst + { + float* dpl = ldst->f32; + float* dph = hdst->f32; + float* sp = src->f32; + int w = (int)width; + AVX_DEINTERLEAVE(dpl, dph, sp, w, even); } - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - sp = src + (even ? 0 : 1); - const si32* sph = hdst + (even ? 0 : 1); - si32 *dpl = ldst; - __m256i offset = _mm256_set1_epi32(2); - for (ui32 i = (L_width + 7) >> 3; i > 0; --i, sp+=16, sph+=8, dpl+=8) + si32* hp = hdst->i32, * lp = ldst->i32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) { - __m256i s1 = _mm256_loadu_si256((__m256i*)(sph-1)); - s1 = _mm256_add_epi32(s1, offset); - __m256i s2 = _mm256_loadu_si256((__m256i*)sph); - s2 = _mm256_add_epi32(s2, s1); - __m256i d1 = _mm256_loadu_si256((__m256i*)sp); - __m256i d2 = _mm256_loadu_si256((__m256i*)sp + 1); - d1 = _mm256_permutevar8x32_epi32(d1, mask); - d2 = _mm256_permutevar8x32_epi32(d2, mask); - __m256i d = _mm256_permute2x128_si256(d1, d2, (2 << 4) | 0); - d = _mm256_add_epi32(d, _mm256_srai_epi32(s2, 2)); - _mm256_store_si256((__m256i*)dpl, d); + // first lifting step + const lifting_step* s = atk->get_step(j - 1); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; + __m256i va = _mm256_set1_epi32(a); + __m256i vb = _mm256_set1_epi32(b); + + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const si32* sp = lp; + si32* dp = hp; + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)h_width; + if (even) + { + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp + 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_add_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } + else + { + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp - 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_add_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)h_width; + if (even) + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp + 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i w = _mm256_srai_epi32(t, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + else + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp - 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i w = _mm256_srai_epi32(t, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)h_width; + if (even) + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp + 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_sub_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + else + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp - 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_sub_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } + else { + // general case + int i = (int)h_width; + if (even) + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp + 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i u = _mm256_mullo_epi32(va, t); + __m256i v = _mm256_add_epi32(vb, u); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + else + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp - 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i u = _mm256_mullo_epi32(va, t); + __m256i v = _mm256_add_epi32(vb, u); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } + + // swap buffers + si32* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; } } - else - { + else { if (even) - line_ldst->i32[0] = line_src->i32[0]; + ldst->i32[0] = src->i32[0]; else - line_hdst->i32[0] = line_src->i32[0] << 1; + hdst->i32[0] = src->i32[0] << 1; } } - - ////////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_wvlt_bwd_predict(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - for (ui32 i = (repeat + 7) >> 3; i > 0; --i, dst+=8, src1+=8, src2+=8) - { - __m256i s1 = _mm256_load_si256((__m256i*)src1); - __m256i s2 = _mm256_load_si256((__m256i*)src2); - __m256i d = _mm256_load_si256((__m256i*)dst); - s1 = _mm256_srai_epi32(_mm256_add_epi32(s1, s2), 1); - d = _mm256_add_epi32(d, s1); - _mm256_store_si256((__m256i*)dst, d); - } - } - ////////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_wvlt_bwd_update(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - - __m256i offset = _mm256_set1_epi32(2); - for (ui32 i = (repeat + 7) >> 3; i > 0; --i, dst+=8, src1+=8, src2+=8) - { - __m256i s1 = _mm256_load_si256((__m256i*)src1); - s1 = _mm256_add_epi32(s1, offset); - __m256i s2 = _mm256_load_si256((__m256i*)src2); - s2 = _mm256_add_epi32(s2, s1); - __m256i d = _mm256_load_si256((__m256i*)dst); - d = _mm256_sub_epi32(d, _mm256_srai_epi32(s2, 2)); - _mm256_store_si256((__m256i*)dst, d); - } - } - - ////////////////////////////////////////////////////////////////////////// - void avx2_rev_horz_wvlt_bwd_tx(line_buf* line_dst, line_buf *line_lsrc, - line_buf *line_hsrc, ui32 width, bool even) + void avx2_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) { if (width > 1) { - si32 *lsrc = line_lsrc->i32, *hsrc = line_hsrc->i32; - si32 *dst = line_dst->i32; + bool ev = even; + si32* oth = hsrc->i32, * aug = lsrc->i32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) + { + const lifting_step* s = atk->get_step(j); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; + __m256i va = _mm256_set1_epi32(a); + __m256i vb = _mm256_set1_epi32(b); - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const si32* sp = oth; + si32* dp = aug; + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)aug_width; + if (ev) + { + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp - 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_add_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } + else + { + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp + 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_add_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp - 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i w = _mm256_srai_epi32(t, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + else + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp + 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i w = _mm256_srai_epi32(t, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp - 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_sub_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + else + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp + 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_sub_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } + else { + // general case + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp - 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i u = _mm256_mullo_epi32(va, t); + __m256i v = _mm256_add_epi32(vb, u); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + else + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp + 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i u = _mm256_mullo_epi32(va, t); + __m256i v = _mm256_add_epi32(vb, u); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } - // extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - const si32 *sph = hsrc + (even ? 0 : 1); - si32 *spl = lsrc; - __m256i offset = _mm256_set1_epi32(2); - for (ui32 i = (L_width + 7) >> 3; i > 0; --i, sph+=8, spl+=8) - { - __m256i s1 = _mm256_loadu_si256((__m256i*)(sph-1)); - s1 = _mm256_add_epi32(s1, offset); - __m256i s2 = _mm256_loadu_si256((__m256i*)sph); - s2 = _mm256_add_epi32(s2, s1); - __m256i d = _mm256_load_si256((__m256i*)spl); - d = _mm256_sub_epi32(d, _mm256_srai_epi32(s2, 2)); - _mm256_store_si256((__m256i*)spl, d); + // swap buffers + si32* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; } - // extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width - 1]; - // inverse predict and combine - si32 *dp = dst + (even ? 0 : -1); - spl = lsrc + (even ? 0 : -1); - sph = hsrc; - ui32 width = L_width + (even ? 0 : 1); - for (ui32 i = (width + 7) >> 3; i > 0; --i, sph+=8, spl+=8, dp+=16) + // combine both lsrc and hsrc into dst { - __m256i s1 = _mm256_loadu_si256((__m256i*)spl); - __m256i s2 = _mm256_loadu_si256((__m256i*)(spl+1)); - __m256i d = _mm256_load_si256((__m256i*)sph); - s2 = _mm256_srai_epi32(_mm256_add_epi32(s1, s2), 1); - d = _mm256_add_epi32(d, s2); - s2 = _mm256_unpackhi_epi32(s1, d); - s1 = _mm256_unpacklo_epi32(s1, d); - d = _mm256_permute2x128_si256(s1, s2, (2 << 4) | 0); - _mm256_storeu_si256((__m256i*)dp, d); - d = _mm256_permute2x128_si256(s1, s2, (3 << 4) | 1); - _mm256_storeu_si256((__m256i*)dp + 1, d); + float* dp = dst->f32; + float* spl = lsrc->f32; + float* sph = hsrc->f32; + int w = (int)width; + AVX_INTERLEAVE(dp, spl, sph, w, even); } } - else - { + else { if (even) - line_dst->i32[0] = line_lsrc->i32[0]; + dst->i32[0] = lsrc->i32[0]; else - line_dst->i32[0] = line_hsrc->i32[0] >> 1; + dst->i32[0] = hsrc->i32[0] >> 1; } } - } -} + + + + } // !local +} // !ojph diff --git a/src/core/transform/ojph_transform_local.h b/src/core/transform/ojph_transform_local.h index fe7d1f27..3ba9e6d0 100644 --- a/src/core/transform/ojph_transform_local.h +++ b/src/core/transform/ojph_transform_local.h @@ -45,6 +45,7 @@ namespace ojph { struct line_buf; namespace local { struct param_atk; + union lifting_step; ////////////////////////////////////////////////////////////////////////// // @@ -103,6 +104,60 @@ namespace ojph { // ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + // Supporting macros + ////////////////////////////////////////////////////////////////////////// + + ////////////////////////////////////////////////////////////////////////// + #define SSE_DEINTERLEAVE(dpl, dph, sp, width, even) \ + { \ + if (even) \ + for (; width > 0; width -= 8, sp += 8, dpl += 4, dph += 4) \ + { \ + __m128 a = _mm_load_ps(sp); \ + __m128 b = _mm_load_ps(sp + 4); \ + __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); \ + __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); \ + _mm_store_ps(dpl, c); \ + _mm_store_ps(dph, d); \ + } \ + else \ + for (; width > 0; width -= 8, sp += 8, dpl += 4, dph += 4) \ + { \ + __m128 a = _mm_load_ps(sp); \ + __m128 b = _mm_load_ps(sp + 4); \ + __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); \ + __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); \ + _mm_store_ps(dpl, d); \ + _mm_store_ps(dph, c); \ + } \ + } + + ////////////////////////////////////////////////////////////////////////// + #define SSE_INTERLEAVE(dp, spl, sph, width, even) \ + { \ + if (even) \ + for (; width > 0; width -= 8, dp += 8, spl += 4, sph += 4) \ + { \ + __m128 a = _mm_load_ps(spl); \ + __m128 b = _mm_load_ps(sph); \ + __m128 c = _mm_unpacklo_ps(a, b); \ + __m128 d = _mm_unpackhi_ps(a, b); \ + _mm_store_ps(dp, c); \ + _mm_store_ps(dp + 4, d); \ + } \ + else \ + for (; width > 0; width -= 8, dp += 8, spl += 4, sph += 4) \ + { \ + __m128 a = _mm_load_ps(spl); \ + __m128 b = _mm_load_ps(sph); \ + __m128 c = _mm_unpacklo_ps(b, a); \ + __m128 d = _mm_unpackhi_ps(b, a); \ + _mm_store_ps(dp, c); \ + _mm_store_ps(dp + 4, d); \ + } \ + } + ////////////////////////////////////////////////////////////////////////// // Irreversible functions ////////////////////////////////////////////////////////////////////////// @@ -161,6 +216,116 @@ namespace ojph { // ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + // Supporting macros + ////////////////////////////////////////////////////////////////////////// + + ////////////////////////////////////////////////////////////////////////// + // We split multiples of 16 followed by multiples of 8, because + // we assume byte_alignment == 32 + #define AVX_DEINTERLEAVE(dpl, dph, sp, width, even) \ + { \ + if (even) \ + { \ + for (; width > 8; width -= 16, sp += 16, dpl += 8, dph += 8) \ + { \ + __m256 a = _mm256_load_ps(sp); \ + __m256 b = _mm256_load_ps(sp + 8); \ + __m256 c = _mm256_permute2f128_ps(a, b, (2 << 4) | (0)); \ + __m256 d = _mm256_permute2f128_ps(a, b, (3 << 4) | (1)); \ + __m256 e = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(2, 0, 2, 0)); \ + __m256 f = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(3, 1, 3, 1)); \ + _mm256_store_ps(dpl, e); \ + _mm256_store_ps(dph, f); \ + } \ + for (; width > 0; width -= 8, sp += 8, dpl += 4, dph += 4) \ + { \ + __m128 a = _mm_load_ps(sp); \ + __m128 b = _mm_load_ps(sp + 4); \ + __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); \ + __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); \ + _mm_store_ps(dpl, c); \ + _mm_store_ps(dph, d); \ + } \ + } \ + else \ + { \ + for (; width > 8; width -= 16, sp += 16, dpl += 8, dph += 8) \ + { \ + __m256 a = _mm256_load_ps(sp); \ + __m256 b = _mm256_load_ps(sp + 8); \ + __m256 c = _mm256_permute2f128_ps(a, b, (2 << 4) | (0)); \ + __m256 d = _mm256_permute2f128_ps(a, b, (3 << 4) | (1)); \ + __m256 e = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(2, 0, 2, 0)); \ + __m256 f = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(3, 1, 3, 1)); \ + _mm256_store_ps(dpl, f); \ + _mm256_store_ps(dph, e); \ + } \ + for (; width > 0; width -= 8, sp += 8, dpl += 4, dph += 4) \ + { \ + __m128 a = _mm_load_ps(sp); \ + __m128 b = _mm_load_ps(sp + 4); \ + __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); \ + __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); \ + _mm_store_ps(dpl, d); \ + _mm_store_ps(dph, c); \ + } \ + } \ + } + + ////////////////////////////////////////////////////////////////////////// + // We split multiples of 16 followed by multiples of 8, because + // we assume byte_alignment == 32 + #define AVX_INTERLEAVE(dp, spl, sph, width, even) \ + { \ + if (even) \ + { \ + for (; width > 8; width -= 16, dp += 16, spl += 8, sph += 8) \ + { \ + __m256 a = _mm256_load_ps(spl); \ + __m256 b = _mm256_load_ps(sph); \ + __m256 c = _mm256_unpacklo_ps(a, b); \ + __m256 d = _mm256_unpackhi_ps(a, b); \ + __m256 e = _mm256_permute2f128_ps(c, d, (2 << 4) | (0)); \ + __m256 f = _mm256_permute2f128_ps(c, d, (3 << 4) | (1)); \ + _mm256_store_ps(dp, e); \ + _mm256_store_ps(dp + 8, f); \ + } \ + for (; width > 0; width -= 8, dp += 8, spl += 4, sph += 4) \ + { \ + __m128 a = _mm_load_ps(spl); \ + __m128 b = _mm_load_ps(sph); \ + __m128 c = _mm_unpacklo_ps(a, b); \ + __m128 d = _mm_unpackhi_ps(a, b); \ + _mm_store_ps(dp, c); \ + _mm_store_ps(dp + 4, d); \ + } \ + } \ + else \ + { \ + for (; width > 8; width -= 16, dp += 16, spl += 8, sph += 8) \ + { \ + __m256 a = _mm256_load_ps(spl); \ + __m256 b = _mm256_load_ps(sph); \ + __m256 c = _mm256_unpacklo_ps(b, a); \ + __m256 d = _mm256_unpackhi_ps(b, a); \ + __m256 e = _mm256_permute2f128_ps(c, d, (2 << 4) | (0)); \ + __m256 f = _mm256_permute2f128_ps(c, d, (3 << 4) | (1)); \ + _mm256_store_ps(dp, e); \ + _mm256_store_ps(dp + 8, f); \ + } \ + for (; width > 0; width -= 8, dp += 8, spl += 4, sph += 4) \ + { \ + __m128 a = _mm_load_ps(spl); \ + __m128 b = _mm_load_ps(sph); \ + __m128 c = _mm_unpacklo_ps(b, a); \ + __m128 d = _mm_unpackhi_ps(b, a); \ + _mm_store_ps(dp, c); \ + _mm_store_ps(dp + 4, d); \ + } \ + } \ + } + ////////////////////////////////////////////////////////////////////////// // Irreversible functions ////////////////////////////////////////////////////////////////////////// diff --git a/src/core/transform/ojph_transform_sse.cpp b/src/core/transform/ojph_transform_sse.cpp index b61ea5e9..897a1939 100644 --- a/src/core/transform/ojph_transform_sse.cpp +++ b/src/core/transform/ojph_transform_sse.cpp @@ -41,15 +41,26 @@ #include "ojph_defs.h" #include "ojph_arch.h" #include "ojph_mem.h" -#include "ojph_transform.h" -#include "ojph_transform_local.h" - #include "ojph_params.h" #include "../codestream/ojph_params_local.h" +#include "ojph_transform.h" +#include "ojph_transform_local.h" + namespace ojph { namespace local { + ////////////////////////////////////////////////////////////////////////// + static inline void sse_multiply_const(float* p, float f, int width) + { + __m128 factor = _mm_set1_ps(f); + for (; width > 0; width -= 4, p += 4) + { + __m128 s = _mm_load_ps(p); + _mm_store_ps(p, _mm_mul_ps(factor, s)); + } + } + ////////////////////////////////////////////////////////////////////////// void sse_irv_vert_step(const lifting_step* s, const line_buf* sig, const line_buf* other, const line_buf* aug, @@ -77,14 +88,7 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// void sse_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) { - __m128 factor = _mm_set1_ps(K); - float* dst = aug->f32; - int i = (int)repeat; - for (; i > 0; i -= 4, dst += 4) - { - __m128 s = _mm_load_ps(dst); - _mm_store_ps(dst, _mm_mul_ps(factor, s)); - } + sse_multiply_const(aug->f32, K, (int)repeat); } ///////////////////////////////////////////////////////////////////////// @@ -95,39 +99,12 @@ namespace ojph { if (width > 1) { // split src into ldst and hdst - if (even) { - float* dph = hdst->f32; float* dpl = ldst->f32; - float* sp = src->f32; - - int i = (int)width; - for ( ; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) - { - __m128 a = _mm_load_ps(sp); - __m128 b = _mm_load_ps(sp + 4); - __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); - __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); - _mm_store_ps(dpl, c); - _mm_store_ps(dph, d); - } - } - else - { float* dph = hdst->f32; - float* dpl = ldst->f32; float* sp = src->f32; - - int i = (int)width; - for ( ; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) - { - __m128 a = _mm_load_ps(sp); - __m128 b = _mm_load_ps(sp + 4); - __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); - __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); - _mm_store_ps(dpl, d); - _mm_store_ps(dph, c); - } + int w = (int)width; + SSE_DEINTERLEAVE(dpl, dph, sp, w, even); } // the actual horizontal transform @@ -137,7 +114,6 @@ namespace ojph { ui32 num_steps = atk->get_num_steps(); for (ui32 j = num_steps; j > 0; --j) { - // first lifting step const lifting_step* s = atk->get_step(j - 1); const float a = s->irv.Aatk; @@ -181,27 +157,8 @@ namespace ojph { { // multiply by K or 1/K float K = atk->get_K(); float K_inv = 1.0f / K; - float* dp; - int i; - __m128 factor; - - factor = _mm_set1_ps(K_inv); - dp = lp; - i = (int)l_width; - for ( ; i > 0; i -= 4, dp += 4) - { - __m128 s = _mm_load_ps(dp); - _mm_store_ps(dp, _mm_mul_ps(factor, s)); - } - - factor = _mm_set1_ps(K); - dp = hp; - i = (int)h_width; - for ( ; i > 0; i -= 4, dp += 4) - { - __m128 s = _mm_load_ps(dp); - _mm_store_ps(dp, _mm_mul_ps(factor, s)); - } + sse_multiply_const(lp, K_inv, (int)l_width); + sse_multiply_const(hp, K, (int)h_width); } } else { @@ -227,27 +184,8 @@ namespace ojph { { // multiply by K or 1/K float K = atk->get_K(); float K_inv = 1.0f / K; - float* dp; - int i; - __m128 factor; - - factor = _mm_set1_ps(K); - dp = aug; - i = (int)aug_width; - for ( ; i > 0; i -= 4, dp += 4) - { - __m128 s = _mm_load_ps(dp); - _mm_store_ps(dp, _mm_mul_ps(factor, s)); - } - - factor = _mm_set1_ps(K_inv); - dp = oth; - i = (int)oth_width; - for ( ; i > 0; i -= 4, dp += 4) - { - __m128 s = _mm_load_ps(dp); - _mm_store_ps(dp, _mm_mul_ps(factor, s)); - } + sse_multiply_const(aug, K, (int)aug_width); + sse_multiply_const(oth, K_inv, (int)oth_width); } // the actual horizontal transform @@ -295,37 +233,12 @@ namespace ojph { } // combine both lsrc and hsrc into dst - if (even) { - float* sph = hsrc->f32; - float* spl = lsrc->f32; float* dp = dst->f32; - int i = (int)width; - for ( ; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) - { - __m128 a = _mm_load_ps(spl); - __m128 b = _mm_load_ps(sph); - __m128 c = _mm_unpacklo_ps(a, b); - __m128 d = _mm_unpackhi_ps(a, b); - _mm_store_ps(dp, c); - _mm_store_ps(dp + 4, d); - } - } - else - { - float* sph = hsrc->f32; float* spl = lsrc->f32; - float* dp = dst->f32; - int i = (int)width; - for ( ; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) - { - __m128 a = _mm_load_ps(spl); - __m128 b = _mm_load_ps(sph); - __m128 c = _mm_unpacklo_ps(b, a); - __m128 d = _mm_unpackhi_ps(b, a); - _mm_store_ps(dp, c); - _mm_store_ps(dp + 4, d); - } + float* sph = hsrc->f32; + int w = (int)width; + SSE_INTERLEAVE(dp, spl, sph, w, even); } } else { diff --git a/src/core/transform/ojph_transform_sse2.cpp b/src/core/transform/ojph_transform_sse2.cpp index 5f3de49d..4939a219 100644 --- a/src/core/transform/ojph_transform_sse2.cpp +++ b/src/core/transform/ojph_transform_sse2.cpp @@ -40,6 +40,9 @@ #include "ojph_defs.h" #include "ojph_arch.h" #include "ojph_mem.h" +#include "ojph_params.h" +#include "../codestream/ojph_params_local.h" + #include "ojph_transform.h" #include "ojph_transform_local.h" @@ -48,211 +51,414 @@ namespace ojph { namespace local { - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_wvlt_fwd_predict(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) + ///////////////////////////////////////////////////////////////////////// + void sse2_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; + __m128i va = _mm_set1_epi32(a); + __m128i vb = _mm_set1_epi32(b); - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) - { - __m128i s1 = _mm_load_si128((__m128i*)src1); - __m128i s2 = _mm_load_si128((__m128i*)src2); - __m128i d = _mm_load_si128((__m128i*)dst); - s1 = _mm_srai_epi32(_mm_add_epi32(s1, s2), 1); - d = _mm_sub_epi32(d, s1); - _mm_store_si128((__m128i*)dst, d); + si32* dst = aug->i32; + const si32* src1 = sig->i32, * src2 = other->i32; + // The general definition of the wavelet in Part 2 is slightly + // different to part 2, although they are mathematically equivalent + // here, we identify the simpler form from Part 1 and employ them + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)src1); + __m128i s2 = _mm_load_si128((__m128i*)src2); + __m128i d = _mm_load_si128((__m128i*)dst); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_add_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_sub_epi32(d, w); + _mm_store_si128((__m128i*)dst, d); + } + else + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)src1); + __m128i s2 = _mm_load_si128((__m128i*)src2); + __m128i d = _mm_load_si128((__m128i*)dst); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_add_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_add_epi32(d, w); + _mm_store_si128((__m128i*)dst, d); + } } - } - - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_wvlt_fwd_update(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - - __m128i offset = _mm_set1_epi32(2); - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) - { - __m128i s1 = _mm_load_si128((__m128i*)src1); - s1 = _mm_add_epi32(s1, offset); - __m128i s2 = _mm_load_si128((__m128i*)src2); - s2 = _mm_add_epi32(s2, s1); - __m128i d = _mm_load_si128((__m128i*)dst); - d = _mm_add_epi32(d, _mm_srai_epi32(s2, 2)); - _mm_store_si128((__m128i*)dst, d); + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)src1); + __m128i s2 = _mm_load_si128((__m128i*)src2); + __m128i d = _mm_load_si128((__m128i*)dst); + __m128i t = _mm_add_epi32(s1, s2); + __m128i w = _mm_srai_epi32(t, e); + d = _mm_add_epi32(d, w); + _mm_store_si128((__m128i*)dst, d); + } + else + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)src1); + __m128i s2 = _mm_load_si128((__m128i*)src2); + __m128i d = _mm_load_si128((__m128i*)dst); + __m128i t = _mm_add_epi32(s1, s2); + __m128i w = _mm_srai_epi32(t, e); + d = _mm_sub_epi32(d, w); + _mm_store_si128((__m128i*)dst, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)src1); + __m128i s2 = _mm_load_si128((__m128i*)src2); + __m128i d = _mm_load_si128((__m128i*)dst); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_sub_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_sub_epi32(d, w); + _mm_store_si128((__m128i*)dst, d); + } + else + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)src1); + __m128i s2 = _mm_load_si128((__m128i*)src2); + __m128i d = _mm_load_si128((__m128i*)dst); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_sub_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_add_epi32(d, w); + _mm_store_si128((__m128i*)dst, d); + } + } + else { // general case + // 32bit multiplication is not supported in sse2; we need sse4.1, + // where we can use _mm_mullo_epi32, which multiplies 32bit x 32bit, + // keeping the LSBs + if (synthesis) + for (ui32 i = repeat; i > 0; --i) + *dst++ -= (b + a * (*src1++ + *src2++)) >> e; + else + for (ui32 i = repeat; i > 0; --i) + *dst++ += (b + a * (*src1++ + *src2++)) >> e; } } - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, - line_buf *line_hdst, ui32 width, bool even) + ///////////////////////////////////////////////////////////////////////// + void sse2_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) { if (width > 1) { - si32 *src = line_src->i32; - si32 *ldst = line_ldst->i32, *hdst = line_hdst->i32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; - - // extension - src[-1] = src[1]; - src[width] = src[width-2]; - // predict - const si32* sp = src + (even ? 1 : 0); - si32 *dph = hdst; - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dph+=4) - { //this is doing twice the work it needs to do - //it can be definitely written better - __m128i s1 = _mm_loadu_si128((__m128i*)(sp-1)); - __m128i s2 = _mm_loadu_si128((__m128i*)(sp+1)); - __m128i d = _mm_loadu_si128((__m128i*)sp); - s1 = _mm_srai_epi32(_mm_add_epi32(s1, s2), 1); - __m128i d1 = _mm_sub_epi32(d, s1); - sp += 4; - s1 = _mm_loadu_si128((__m128i*)(sp-1)); - s2 = _mm_loadu_si128((__m128i*)(sp+1)); - d = _mm_loadu_si128((__m128i*)sp); - s1 = _mm_srai_epi32(_mm_add_epi32(s1, s2), 1); - __m128i d2 = _mm_sub_epi32(d, s1); - sp += 4; - d = _mm_castps_si128(_mm_shuffle_ps( - _mm_castsi128_ps(d1), _mm_castsi128_ps(d2), 0x88)); - _mm_store_si128((__m128i*)dph, d); + // combine both lsrc and hsrc into dst + { + float* dpl = ldst->f32; + float* dph = hdst->f32; + float* sp = src->f32; + int w = (int)width; + SSE_DEINTERLEAVE(dpl, dph, sp, w, even); } - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - sp = src + (even ? 0 : 1); - const si32* sph = hdst + (even ? 0 : 1); - si32 *dpl = ldst; - __m128i offset = _mm_set1_epi32(2); - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sp+=8, sph+=4, dpl+=4) + si32* hp = hdst->i32, * lp = ldst->i32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) { - __m128i s1 = _mm_loadu_si128((__m128i*)(sph-1)); - s1 = _mm_add_epi32(s1, offset); - __m128i s2 = _mm_loadu_si128((__m128i*)sph); - s2 = _mm_add_epi32(s2, s1); - __m128i d1 = _mm_loadu_si128((__m128i*)sp); - __m128i d2 = _mm_loadu_si128((__m128i*)sp + 1); - __m128i d = _mm_castps_si128(_mm_shuffle_ps( - _mm_castsi128_ps(d1), _mm_castsi128_ps(d2), 0x88)); - d = _mm_add_epi32(d, _mm_srai_epi32(s2, 2)); - _mm_store_si128((__m128i*)dpl, d); + // first lifting step + const lifting_step* s = atk->get_step(j - 1); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; + __m128i va = _mm_set1_epi32(a); + __m128i vb = _mm_set1_epi32(b); + + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const si32* sp = lp; + si32* dp = hp; + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)h_width; + if (even) + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp + 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_add_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_add_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + } + else + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp - 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_add_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_add_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)h_width; + if (even) + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp + 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i w = _mm_srai_epi32(t, e); + d = _mm_sub_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + else + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp - 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i w = _mm_srai_epi32(t, e); + d = _mm_sub_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)h_width; + if (even) + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp + 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_sub_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_add_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + else + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp - 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_sub_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_add_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + } + else { + // general case + // 32bit multiplication is not supported in sse2; we need sse4.1, + // where we can use _mm_mullo_epi32, which multiplies + // 32bit x 32bit, keeping the LSBs + if (even) + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp += (b + a * (sp[0] + sp[1])) >> e; + else + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp += (b + a * (sp[-1] + sp[0])) >> e; + } + + // swap buffers + si32* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; } } - else - { + else { if (even) - line_ldst->i32[0] = line_src->i32[0]; + ldst->i32[0] = src->i32[0]; else - line_hdst->i32[0] = line_src->i32[0] << 1; - } - } - - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_wvlt_bwd_predict(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) - { - __m128i s1 = _mm_load_si128((__m128i*)src1); - __m128i s2 = _mm_load_si128((__m128i*)src2); - __m128i d = _mm_load_si128((__m128i*)dst); - s1 = _mm_srai_epi32(_mm_add_epi32(s1, s2), 1); - d = _mm_add_epi32(d, s1); - _mm_store_si128((__m128i*)dst, d); + hdst->i32[0] = src->i32[0] << 1; } } - - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_wvlt_bwd_update(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - __m128i offset = _mm_set1_epi32(2); - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) - { - __m128i s1 = _mm_load_si128((__m128i*)src1); - s1 = _mm_add_epi32(s1, offset); - __m128i s2 = _mm_load_si128((__m128i*)src2); - s2 = _mm_add_epi32(s2, s1); - __m128i d = _mm_load_si128((__m128i*)dst); - d = _mm_sub_epi32(d, _mm_srai_epi32(s2, 2)); - _mm_store_si128((__m128i*)dst, d); - } - } - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, - line_buf *line_hsrc, ui32 width, bool even) + void sse2_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) { if (width > 1) { - si32 *lsrc = line_lsrc->i32, *hsrc = line_hsrc->i32; - si32 *dst = line_dst->i32; + bool ev = even; + si32* oth = hsrc->i32, * aug = lsrc->i32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) + { + const lifting_step* s = atk->get_step(j); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; + __m128i va = _mm_set1_epi32(a); + __m128i vb = _mm_set1_epi32(b); - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const si32* sp = oth; + si32* dp = aug; + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)aug_width; + if (ev) + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp - 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_add_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_sub_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + } + else + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp + 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_add_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_sub_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp - 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i w = _mm_srai_epi32(t, e); + d = _mm_add_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + else + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp + 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i w = _mm_srai_epi32(t, e); + d = _mm_add_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp - 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_sub_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_sub_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + else + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp + 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_sub_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_sub_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + } + else { + // general case + // 32bit multiplication is not supported in sse2; we need sse4.1, + // where we can use _mm_mullo_epi32, which multiplies + // 32bit x 32bit, keeping the LSBs + if (ev) + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp -= (b + a * (sp[-1] + sp[0])) >> e; + else + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp -= (b + a * (sp[0] + sp[1])) >> e; + } - // extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - const si32 *sph = hsrc + (even ? 0 : 1); - si32 *spl = lsrc; - __m128i offset = _mm_set1_epi32(2); - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sph+=4, spl+=4) - { - __m128i s1 = _mm_loadu_si128((__m128i*)(sph-1)); - s1 = _mm_add_epi32(s1, offset); - __m128i s2 = _mm_loadu_si128((__m128i*)sph); - s2 = _mm_add_epi32(s2, s1); - __m128i d = _mm_load_si128((__m128i*)spl); - d = _mm_sub_epi32(d, _mm_srai_epi32(s2, 2)); - _mm_store_si128((__m128i*)spl, d); + // swap buffers + si32* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; } - // extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width - 1]; - // inverse predict and combine - si32 *dp = dst + (even ? 0 : -1); - spl = lsrc + (even ? 0 : -1); - sph = hsrc; - ui32 width = L_width + (even ? 0 : 1); - for (ui32 i = (width + 3) >> 2; i > 0; --i, sph+=4, spl+=4, dp+=8) + // combine both lsrc and hsrc into dst { - __m128i s1 = _mm_loadu_si128((__m128i*)spl); - __m128i s2 = _mm_loadu_si128((__m128i*)(spl+1)); - __m128i d = _mm_load_si128((__m128i*)sph); - s2 = _mm_srai_epi32(_mm_add_epi32(s1, s2), 1); - d = _mm_add_epi32(d, s2); - _mm_storeu_si128((__m128i*)dp, _mm_unpacklo_epi32(s1, d)); - _mm_storeu_si128((__m128i*)dp + 1, _mm_unpackhi_epi32(s1, d)); + float* dp = dst->f32; + float* spl = lsrc->f32; + float* sph = hsrc->f32; + int w = (int)width; + SSE_INTERLEAVE(dp, spl, sph, w, even); } } - else - { + else { if (even) - line_dst->i32[0] = line_lsrc->i32[0]; + dst->i32[0] = lsrc->i32[0]; else - line_dst->i32[0] = line_hsrc->i32[0] >> 1; + dst->i32[0] = hsrc->i32[0] >> 1; } } - } -} + + } // !local +} // !ojph From d1f505f2869c600c31532a0ae48aacb377336296 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 12 Apr 2024 21:42:34 +1000 Subject: [PATCH 30/37] Addresses compilation warnings. --- src/core/transform/ojph_transform_avx2.cpp | 6 +++--- src/core/transform/ojph_transform_sse2.cpp | 9 +++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/core/transform/ojph_transform_avx2.cpp b/src/core/transform/ojph_transform_avx2.cpp index a7b16ddb..243fe87f 100644 --- a/src/core/transform/ojph_transform_avx2.cpp +++ b/src/core/transform/ojph_transform_avx2.cpp @@ -58,7 +58,7 @@ namespace ojph { { const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; - const ui32 e = s->rev.Eatk; + const si32 e = s->rev.Eatk; __m256i va = _mm256_set1_epi32(a); __m256i vb = _mm256_set1_epi32(b); @@ -206,7 +206,7 @@ namespace ojph { const lifting_step* s = atk->get_step(j - 1); const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; - const ui32 e = s->rev.Eatk; + const si32 e = s->rev.Eatk; __m256i va = _mm256_set1_epi32(a); __m256i vb = _mm256_set1_epi32(b); @@ -364,7 +364,7 @@ namespace ojph { const lifting_step* s = atk->get_step(j); const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; - const ui32 e = s->rev.Eatk; + const si32 e = s->rev.Eatk; __m256i va = _mm256_set1_epi32(a); __m256i vb = _mm256_set1_epi32(b); diff --git a/src/core/transform/ojph_transform_sse2.cpp b/src/core/transform/ojph_transform_sse2.cpp index 4939a219..8328842a 100644 --- a/src/core/transform/ojph_transform_sse2.cpp +++ b/src/core/transform/ojph_transform_sse2.cpp @@ -58,8 +58,7 @@ namespace ojph { { const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; - const ui32 e = s->rev.Eatk; - __m128i va = _mm_set1_epi32(a); + const si32 e = s->rev.Eatk; __m128i vb = _mm_set1_epi32(b); si32* dst = aug->i32; @@ -188,8 +187,7 @@ namespace ojph { const lifting_step* s = atk->get_step(j - 1); const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; - const ui32 e = s->rev.Eatk; - __m128i va = _mm_set1_epi32(a); + const si32 e = s->rev.Eatk; __m128i vb = _mm_set1_epi32(b); // extension @@ -328,8 +326,7 @@ namespace ojph { const lifting_step* s = atk->get_step(j); const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; - const ui32 e = s->rev.Eatk; - __m128i va = _mm_set1_epi32(a); + const si32 e = s->rev.Eatk; __m128i vb = _mm_set1_epi32(b); // extension From 1c4a14ce94a3fcd2073318eb86027106033a396b Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 13 Apr 2024 09:45:20 +1000 Subject: [PATCH 31/37] avx512 dwt implemented --- src/core/CMakeLists.txt | 9 +- src/core/common/ojph_arch.h | 6 +- src/core/transform/ojph_transform.cpp | 24 +- src/core/transform/ojph_transform_avx.cpp | 9 +- src/core/transform/ojph_transform_avx2.cpp | 2 - src/core/transform/ojph_transform_avx512.cpp | 830 +++++++++++++++++++ src/core/transform/ojph_transform_local.h | 48 +- 7 files changed, 855 insertions(+), 73 deletions(-) create mode 100644 src/core/transform/ojph_transform_avx512.cpp diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 40b9649b..19123a2e 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -18,11 +18,12 @@ file(GLOB TRANSFORM_SSE "transform/*_sse.cpp") file(GLOB TRANSFORM_SSE2 "transform/*_sse2.cpp") file(GLOB TRANSFORM_AVX "transform/*_avx.cpp") file(GLOB TRANSFORM_AVX2 "transform/*_avx2.cpp") +file(GLOB TRANSFORM_AVX512 "transform/*_avx512.cpp") file(GLOB TRANSFORM_WASM "transform/*_wasm.cpp") list(REMOVE_ITEM CODESTREAM ${CODESTREAM_SSE} ${CODESTREAM_SSE2} ${CODESTREAM_AVX} ${CODESTREAM_AVX2} ${CODESTREAM_WASM}) list(REMOVE_ITEM CODING ${CODING_SSSE3} ${CODING_WASM} ${CODING_AVX512}) -list(REMOVE_ITEM TRANSFORM ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2} ${TRANSFORM_WASM}) +list(REMOVE_ITEM TRANSFORM ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2} ${TRANSFORM_AVX512} ${TRANSFORM_WASM}) list(APPEND SOURCES ${CODESTREAM} ${CODING} ${COMMON} ${OTHERS} ${TRANSFORM}) source_group("codestream" FILES ${CODESTREAM}) @@ -42,10 +43,10 @@ if(EMSCRIPTEN) source_group("coding" FILES ${CODING_WASM}) source_group("transform" FILES ${TRANSFORM_WASM}) elseif(NOT OJPH_DISABLE_INTEL_SIMD) - add_library(openjph ${SOURCES} ${CODESTREAM_SSE} ${CODESTREAM_SSE2} ${CODESTREAM_AVX} ${CODESTREAM_AVX2} ${CODING_SSSE3} ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2}) + add_library(openjph ${SOURCES} ${CODESTREAM_SSE} ${CODESTREAM_SSE2} ${CODESTREAM_AVX} ${CODESTREAM_AVX2} ${CODING_SSSE3} ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2} ${TRANSFORM_AVX512}) source_group("codestream" FILES ${CODESTREAM_SSE} ${CODESTREAM_SSE2} ${CODESTREAM_AVX} ${CODESTREAM_AVX2}) source_group("coding" FILES ${CODING_SSSE3}) - source_group("transform" FILES ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2}) + source_group("transform" FILES ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2} ${TRANSFORM_AVX512}) if (OJPH_ENABLE_INTEL_AVX512) target_sources(openjph PRIVATE ${CODING_AVX512}) source_group("coding" FILES ${CODING_AVX512}) @@ -71,6 +72,7 @@ if (MSVC) set_source_files_properties(transform/ojph_colour_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") set_source_files_properties(transform/ojph_transform_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") set_source_files_properties(transform/ojph_transform_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") + set_source_files_properties(transform/ojph_transform_avx512.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX512") else() set_source_files_properties(codestream/ojph_codestream_avx.cpp PROPERTIES COMPILE_FLAGS -mavx) set_source_files_properties(codestream/ojph_codestream_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) @@ -80,6 +82,7 @@ else() set_source_files_properties(transform/ojph_colour_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) set_source_files_properties(transform/ojph_transform_avx.cpp PROPERTIES COMPILE_FLAGS -mavx) set_source_files_properties(transform/ojph_transform_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) + set_source_files_properties(transform/ojph_transform_avx512.cpp PROPERTIES COMPILE_FLAGS -mavx512f) endif() if (MSVC) diff --git a/src/core/common/ojph_arch.h b/src/core/common/ojph_arch.h index 62b630bb..fa9d077d 100644 --- a/src/core/common/ojph_arch.h +++ b/src/core/common/ojph_arch.h @@ -194,11 +194,7 @@ namespace ojph { //////////////////////////////////////////////////////////////////////////// // constants //////////////////////////////////////////////////////////////////////////// -#ifdef OJPH_ENABLE_INTEL_AVX512 - const ui32 byte_alignment = 64; //64 bytes == 512 bits -#else - const ui32 byte_alignment = 32; //32 bytes == 256 bits -#endif + const ui32 byte_alignment = 64; // 64 bytes == 512 bits const ui32 log_byte_alignment = 31 - count_leading_zeros(byte_alignment); const ui32 object_alignment = 8; diff --git a/src/core/transform/ojph_transform.cpp b/src/core/transform/ojph_transform.cpp index 95ab686c..83eed644 100644 --- a/src/core/transform/ojph_transform.cpp +++ b/src/core/transform/ojph_transform.cpp @@ -145,17 +145,19 @@ namespace ojph { rev_horz_syn = avx2_rev_horz_syn; } - //if (level >= X86_CPU_EXT_LEVEL_AVX512) - //{ - // rev_vert_step = avx512_rev_vert_ana_step; - // rev_horz_ana = avx512_rev_horz_ana; - // rev_horz_syn = avx512_rev_horz_syn; - - // irv_vert_step = avx512_irv_vert_step; - // irv_vert_times_K = avx512_irv_vert_times_K; - // irv_vert_syn_step = avx512_irv_vert_syn_step; - // irv_horz_syn = avx512_irv_horz_syn; - //} +#ifdef OJPH_ENABLE_INTEL_AVX512 + if (level >= X86_CPU_EXT_LEVEL_AVX512) + { + rev_vert_step = avx512_rev_vert_step; + rev_horz_ana = avx512_rev_horz_ana; + rev_horz_syn = avx512_rev_horz_syn; + + irv_vert_step = avx512_irv_vert_step; + irv_vert_times_K = avx512_irv_vert_times_K; + irv_horz_ana = avx512_irv_horz_ana; + irv_horz_syn = avx512_irv_horz_syn; + } +#endif // !OJPH_ENABLE_INTEL_AVX512 #endif // !OJPH_DISABLE_INTEL_SIMD diff --git a/src/core/transform/ojph_transform_avx.cpp b/src/core/transform/ojph_transform_avx.cpp index e7933ff1..08566624 100644 --- a/src/core/transform/ojph_transform_avx.cpp +++ b/src/core/transform/ojph_transform_avx.cpp @@ -88,14 +88,7 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// void avx_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) { - __m256 factor = _mm256_set1_ps(K); - float* dst = aug->f32; - int i = (int)repeat; - for (; i > 0; i -= 8, dst += 8) - { - __m256 s = _mm256_load_ps(dst); - _mm256_store_ps(dst, _mm256_mul_ps(factor, s)); - } + avx_multiply_const(aug->f32, K, (int)repeat); } ///////////////////////////////////////////////////////////////////////// diff --git a/src/core/transform/ojph_transform_avx2.cpp b/src/core/transform/ojph_transform_avx2.cpp index 243fe87f..847cd4c4 100644 --- a/src/core/transform/ojph_transform_avx2.cpp +++ b/src/core/transform/ojph_transform_avx2.cpp @@ -514,7 +514,5 @@ namespace ojph { } } - - } // !local } // !ojph diff --git a/src/core/transform/ojph_transform_avx512.cpp b/src/core/transform/ojph_transform_avx512.cpp new file mode 100644 index 00000000..efb7655a --- /dev/null +++ b/src/core/transform/ojph_transform_avx512.cpp @@ -0,0 +1,830 @@ +//***************************************************************************/ +// This software is released under the 2-Clause BSD license, included +// below. +// +// Copyright (c) 2019, Aous Naman +// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia +// Copyright (c) 2019, The University of New South Wales, Australia +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************/ +// This file is part of the OpenJPH software implementation. +// File: ojph_transform_avx2.cpp +// Author: Aous Naman +// Date: 28 August 2019 +//***************************************************************************/ + +#include + +#include "ojph_defs.h" +#include "ojph_arch.h" +#include "ojph_mem.h" +#include "ojph_params.h" +#include "../codestream/ojph_params_local.h" + +#include "ojph_transform.h" +#include "ojph_transform_local.h" + +#include + +namespace ojph { + namespace local { + + ////////////////////////////////////////////////////////////////////////// + // We split multiples of 32 followed by multiples of 16, because + // we assume byte_alignment == 64 + static void avx512_deinterleave(float* dpl, float* dph, float* sp, + int width, bool even) + { + __m512i idx1 = _mm512_set_epi32( + 0x1E, 0x1C, 0x1A, 0x18, 0x16, 0x14, 0x12, 0x10, + 0x0E, 0x0C, 0x0A, 0x08, 0x06, 0x04, 0x02, 0x00 + ); + __m512i idx2 = _mm512_set_epi32( + 0x1F, 0x1D, 0x1B, 0x19, 0x17, 0x15, 0x13, 0x11, + 0x0F, 0x0D, 0x0B, 0x09, 0x07, 0x05, 0x03, 0x01 + ); + if (even) + { + for (; width > 16; width -= 32, sp += 32, dpl += 16, dph += 16) + { + __m512 a = _mm512_load_ps(sp); + __m512 b = _mm512_load_ps(sp + 16); + __m512 c = _mm512_permutex2var_ps(a, idx1, b); + __m512 d = _mm512_permutex2var_ps(a, idx2, b); + _mm512_store_ps(dpl, c); + _mm512_store_ps(dph, d); + } + for (; width > 0; width -= 16, sp += 16, dpl += 8, dph += 8) + { + __m256 a = _mm256_load_ps(sp); + __m256 b = _mm256_load_ps(sp + 8); + __m256 c = _mm256_permute2f128_ps(a, b, (2 << 4) | (0)); + __m256 d = _mm256_permute2f128_ps(a, b, (3 << 4) | (1)); + __m256 e = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(2, 0, 2, 0)); + __m256 f = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(3, 1, 3, 1)); + _mm256_store_ps(dpl, e); + _mm256_store_ps(dph, f); + } + } + else + { + for (; width > 16; width -= 32, sp += 32, dpl += 16, dph += 16) + { + __m512 a = _mm512_load_ps(sp); + __m512 b = _mm512_load_ps(sp + 16); + __m512 c = _mm512_permutex2var_ps(a, idx2, b); + __m512 d = _mm512_permutex2var_ps(a, idx1, b); + _mm512_store_ps(dpl, c); + _mm512_store_ps(dph, d); + } + for (; width > 0; width -= 16, sp += 16, dpl += 8, dph += 8) + { + __m256 a = _mm256_load_ps(sp); + __m256 b = _mm256_load_ps(sp + 8); + __m256 c = _mm256_permute2f128_ps(a, b, (2 << 4) | (0)); + __m256 d = _mm256_permute2f128_ps(a, b, (3 << 4) | (1)); + __m256 e = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(2, 0, 2, 0)); + __m256 f = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(3, 1, 3, 1)); + _mm256_store_ps(dpl, f); + _mm256_store_ps(dph, e); + } + } + } + + ////////////////////////////////////////////////////////////////////////// + // We split multiples of 32 followed by multiples of 16, because + // we assume byte_alignment == 64 + static void avx512_interleave(float* dp, float* spl, float* sph, + int width, bool even) + { + __m512i idx1 = _mm512_set_epi32( + 0x17, 0x7, 0x16, 0x6, 0x15, 0x5, 0x14, 0x4, + 0x13, 0x3, 0x12, 0x2, 0x11, 0x1, 0x10, 0x0 + ); + __m512i idx2 = _mm512_set_epi32( + 0x1F, 0xF, 0x1E, 0xE, 0x1D, 0xD, 0x1C, 0xC, + 0x1B, 0xB, 0x1A, 0xA, 0x19, 0x9, 0x18, 0x8 + ); + if (even) + { + for (; width > 16; width -= 32, dp += 32, spl += 16, sph += 16) + { + __m512 a = _mm512_load_ps(spl); + __m512 b = _mm512_load_ps(sph); + __m512 c = _mm512_permutex2var_ps(a, idx1, b); + __m512 d = _mm512_permutex2var_ps(a, idx2, b); + _mm512_store_ps(dp, c); + _mm512_store_ps(dp + 16, d); + } + for (; width > 0; width -= 16, dp += 16, spl += 8, sph += 8) + { + __m256 a = _mm256_load_ps(spl); + __m256 b = _mm256_load_ps(sph); + __m256 c = _mm256_unpacklo_ps(a, b); + __m256 d = _mm256_unpackhi_ps(a, b); + __m256 e = _mm256_permute2f128_ps(c, d, (2 << 4) | (0)); + __m256 f = _mm256_permute2f128_ps(c, d, (3 << 4) | (1)); + _mm256_store_ps(dp, e); + _mm256_store_ps(dp + 8, f); + } + } + else + { + for (; width > 16; width -= 32, dp += 32, spl += 16, sph += 16) + { + __m512 a = _mm512_load_ps(spl); + __m512 b = _mm512_load_ps(sph); + __m512 c = _mm512_permutex2var_ps(b, idx1, a); + __m512 d = _mm512_permutex2var_ps(b, idx2, a); + _mm512_store_ps(dp, c); + _mm512_store_ps(dp + 16, d); + } + for (; width > 0; width -= 16, dp += 16, spl += 8, sph += 8) + { + __m256 a = _mm256_load_ps(spl); + __m256 b = _mm256_load_ps(sph); + __m256 c = _mm256_unpacklo_ps(b, a); + __m256 d = _mm256_unpackhi_ps(b, a); + __m256 e = _mm256_permute2f128_ps(c, d, (2 << 4) | (0)); + __m256 f = _mm256_permute2f128_ps(c, d, (3 << 4) | (1)); + _mm256_store_ps(dp, e); + _mm256_store_ps(dp + 8, f); + } + } + } + + ////////////////////////////////////////////////////////////////////////// + static inline void avx512_multiply_const(float* p, float f, int width) + { + __m512 factor = _mm512_set1_ps(f); + for (; width > 0; width -= 16, p += 16) + { + __m512 s = _mm512_load_ps(p); + _mm512_store_ps(p, _mm512_mul_ps(factor, s)); + } + } + + ////////////////////////////////////////////////////////////////////////// + void avx512_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) + { + float a = s->irv.Aatk; + if (synthesis) + a = -a; + + __m512 factor = _mm512_set1_ps(a); + + float* dst = aug->f32; + const float* src1 = sig->f32, * src2 = other->f32; + int i = (int)repeat; + for ( ; i > 0; i -= 16, dst += 16, src1 += 16, src2 += 16) + { + __m512 s1 = _mm512_load_ps(src1); + __m512 s2 = _mm512_load_ps(src2); + __m512 d = _mm512_load_ps(dst); + d = _mm512_add_ps(d, _mm512_mul_ps(factor, _mm512_add_ps(s1, s2))); + _mm512_store_ps(dst, d); + } + } + + ////////////////////////////////////////////////////////////////////////// + void avx512_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) + { + avx512_multiply_const(aug->f32, K, (int)repeat); + } + + ///////////////////////////////////////////////////////////////////////// + void avx512_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) + { + if (width > 1) + { + // split src into ldst and hdst + { + float* dpl = ldst->f32; + float* dph = hdst->f32; + float* sp = src->f32; + int w = (int)width; + AVX_DEINTERLEAVE(dpl, dph, sp, w, even); + } + + // the actual horizontal transform + float* hp = hdst->f32, * lp = ldst->f32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) + { + const lifting_step* s = atk->get_step(j - 1); + const float a = s->irv.Aatk; + + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const float* sp = lp; + float* dp = hp; + int i = (int)h_width; + __m512 f = _mm512_set1_ps(a); + if (even) + { + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512 m = _mm512_load_ps(sp); + __m512 n = _mm512_loadu_ps(sp + 1); + __m512 p = _mm512_load_ps(dp); + p = _mm512_add_ps(p, _mm512_mul_ps(f, _mm512_add_ps(m, n))); + _mm512_store_ps(dp, p); + } + } + else + { + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512 m = _mm512_load_ps(sp); + __m512 n = _mm512_loadu_ps(sp - 1); + __m512 p = _mm512_load_ps(dp); + p = _mm512_add_ps(p, _mm512_mul_ps(f, _mm512_add_ps(m, n))); + _mm512_store_ps(dp, p); + } + } + + // swap buffers + float* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; + } + + { // multiply by K or 1/K + float K = atk->get_K(); + float K_inv = 1.0f / K; + avx512_multiply_const(lp, K_inv, (int)l_width); + avx512_multiply_const(hp, K, (int)h_width); + } + } + else { + if (even) + ldst->f32[0] = src->f32[0]; + else + hdst->f32[0] = src->f32[0] * 2.0f; + } + } + + ////////////////////////////////////////////////////////////////////////// + void avx512_irv_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) + { + if (width > 1) + { + bool ev = even; + float* oth = hsrc->f32, * aug = lsrc->f32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass + + { // multiply by K or 1/K + float K = atk->get_K(); + float K_inv = 1.0f / K; + avx512_multiply_const(aug, K, (int)aug_width); + avx512_multiply_const(oth, K_inv, (int)oth_width); + } + + // the actual horizontal transform + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) + { + const lifting_step* s = atk->get_step(j); + const float a = s->irv.Aatk; + + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const float* sp = oth; + float* dp = aug; + int i = (int)aug_width; + __m512 f = _mm512_set1_ps(a); + if (ev) + { + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512 m = _mm512_load_ps(sp); + __m512 n = _mm512_loadu_ps(sp - 1); + __m512 p = _mm512_load_ps(dp); + p = _mm512_sub_ps(p, _mm512_mul_ps(f, _mm512_add_ps(m, n))); + _mm512_store_ps(dp, p); + } + } + else + { + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512 m = _mm512_load_ps(sp); + __m512 n = _mm512_loadu_ps(sp + 1); + __m512 p = _mm512_load_ps(dp); + p = _mm512_sub_ps(p, _mm512_mul_ps(f, _mm512_add_ps(m, n))); + _mm512_store_ps(dp, p); + } + } + + // swap buffers + float* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; + } + + // combine both lsrc and hsrc into dst + avx512_interleave(dst->f32, lsrc->f32, hsrc->f32, (int)width, even); + } + else { + if (even) + dst->f32[0] = lsrc->f32[0]; + else + dst->f32[0] = hsrc->f32[0] * 0.5f; + } + } + + + ///////////////////////////////////////////////////////////////////////// + void avx512_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) + { + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const si32 e = s->rev.Eatk; + __m512i va = _mm512_set1_epi32(a); + __m512i vb = _mm512_set1_epi32(b); + + si32* dst = aug->i32; + const si32* src1 = sig->i32, * src2 = other->i32; + // The general definition of the wavelet in Part 2 is slightly + // different to part 2, although they are mathematically equivalent + // here, we identify the simpler form from Part 1 and employ them + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 16, dst += 16, src1 += 16, src2 += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)src1); + __m512i s2 = _mm512_load_si512((__m512i*)src2); + __m512i d = _mm512_load_si512((__m512i*)dst); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_add_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dst, d); + } + else + for (; i > 0; i -= 16, dst += 16, src1 += 16, src2 += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)src1); + __m512i s2 = _mm512_load_si512((__m512i*)src2); + __m512i d = _mm512_load_si512((__m512i*)dst); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_add_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dst, d); + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 16, dst += 16, src1 += 16, src2 += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)src1); + __m512i s2 = _mm512_load_si512((__m512i*)src2); + __m512i d = _mm512_load_si512((__m512i*)dst); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i w = _mm512_srai_epi32(t, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dst, d); + } + else + for (; i > 0; i -= 16, dst += 16, src1 += 16, src2 += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)src1); + __m512i s2 = _mm512_load_si512((__m512i*)src2); + __m512i d = _mm512_load_si512((__m512i*)dst); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i w = _mm512_srai_epi32(t, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dst, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 16, dst += 16, src1 += 16, src2 += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)src1); + __m512i s2 = _mm512_load_si512((__m512i*)src2); + __m512i d = _mm512_load_si512((__m512i*)dst); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_sub_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dst, d); + } + else + for (; i > 0; i -= 16, dst += 16, src1 += 16, src2 += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)src1); + __m512i s2 = _mm512_load_si512((__m512i*)src2); + __m512i d = _mm512_load_si512((__m512i*)dst); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_sub_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dst, d); + } + } + else { // general case + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 16, dst += 16, src1 += 16, src2 += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)src1); + __m512i s2 = _mm512_load_si512((__m512i*)src2); + __m512i d = _mm512_load_si512((__m512i*)dst); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i u = _mm512_mullo_epi32(va, t); + __m512i v = _mm512_add_epi32(vb, u); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dst, d); + } + else + for (; i > 0; i -= 16, dst += 16, src1 += 16, src2 += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)src1); + __m512i s2 = _mm512_load_si512((__m512i*)src2); + __m512i d = _mm512_load_si512((__m512i*)dst); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i u = _mm512_mullo_epi32(va, t); + __m512i v = _mm512_add_epi32(vb, u); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dst, d); + } + } + } + + ///////////////////////////////////////////////////////////////////////// + void avx512_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) + { + if (width > 1) + { + // combine both lsrc and hsrc into dst + { + float* dpl = ldst->f32; + float* dph = hdst->f32; + float* sp = src->f32; + int w = (int)width; + AVX_DEINTERLEAVE(dpl, dph, sp, w, even); + } + + si32* hp = hdst->i32, * lp = ldst->i32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) + { + // first lifting step + const lifting_step* s = atk->get_step(j - 1); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const si32 e = s->rev.Eatk; + __m512i va = _mm512_set1_epi32(a); + __m512i vb = _mm512_set1_epi32(b); + + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const si32* sp = lp; + si32* dp = hp; + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)h_width; + if (even) + { + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp + 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_add_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + else + { + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp - 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_add_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)h_width; + if (even) + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp + 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i w = _mm512_srai_epi32(t, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + else + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp - 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i w = _mm512_srai_epi32(t, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)h_width; + if (even) + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp + 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_sub_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + else + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp - 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_sub_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + else { + // general case + int i = (int)h_width; + if (even) + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp + 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i u = _mm512_mullo_epi32(va, t); + __m512i v = _mm512_add_epi32(vb, u); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + else + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp - 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i u = _mm512_mullo_epi32(va, t); + __m512i v = _mm512_add_epi32(vb, u); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + + // swap buffers + si32* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; + } + } + else { + if (even) + ldst->i32[0] = src->i32[0]; + else + hdst->i32[0] = src->i32[0] << 1; + } + } + + ////////////////////////////////////////////////////////////////////////// + void avx512_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) + { + if (width > 1) + { + bool ev = even; + si32* oth = hsrc->i32, * aug = lsrc->i32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) + { + const lifting_step* s = atk->get_step(j); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const si32 e = s->rev.Eatk; + __m512i va = _mm512_set1_epi32(a); + __m512i vb = _mm512_set1_epi32(b); + + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const si32* sp = oth; + si32* dp = aug; + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)aug_width; + if (ev) + { + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp - 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_add_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + else + { + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp + 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_add_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp - 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i w = _mm512_srai_epi32(t, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + else + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp + 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i w = _mm512_srai_epi32(t, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp - 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_sub_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + else + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp + 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_sub_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + else { + // general case + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp - 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i u = _mm512_mullo_epi32(va, t); + __m512i v = _mm512_add_epi32(vb, u); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + else + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp + 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i u = _mm512_mullo_epi32(va, t); + __m512i v = _mm512_add_epi32(vb, u); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + + // swap buffers + si32* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; + } + + // combine both lsrc and hsrc into dst + avx512_interleave(dst->f32, lsrc->f32, hsrc->f32, (int)width, even); + } + else { + if (even) + dst->i32[0] = lsrc->i32[0]; + else + dst->i32[0] = hsrc->i32[0] >> 1; + } + } + + } // !local +} // !ojph diff --git a/src/core/transform/ojph_transform_local.h b/src/core/transform/ojph_transform_local.h index 3ba9e6d0..ec2a2e12 100644 --- a/src/core/transform/ojph_transform_local.h +++ b/src/core/transform/ojph_transform_local.h @@ -221,13 +221,11 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// - // We split multiples of 16 followed by multiples of 8, because - // we assume byte_alignment == 32 #define AVX_DEINTERLEAVE(dpl, dph, sp, width, even) \ { \ if (even) \ { \ - for (; width > 8; width -= 16, sp += 16, dpl += 8, dph += 8) \ + for (; width > 0; width -= 16, sp += 16, dpl += 8, dph += 8) \ { \ __m256 a = _mm256_load_ps(sp); \ __m256 b = _mm256_load_ps(sp + 8); \ @@ -238,19 +236,10 @@ namespace ojph { _mm256_store_ps(dpl, e); \ _mm256_store_ps(dph, f); \ } \ - for (; width > 0; width -= 8, sp += 8, dpl += 4, dph += 4) \ - { \ - __m128 a = _mm_load_ps(sp); \ - __m128 b = _mm_load_ps(sp + 4); \ - __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); \ - __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); \ - _mm_store_ps(dpl, c); \ - _mm_store_ps(dph, d); \ - } \ } \ else \ { \ - for (; width > 8; width -= 16, sp += 16, dpl += 8, dph += 8) \ + for (; width > 0; width -= 16, sp += 16, dpl += 8, dph += 8) \ { \ __m256 a = _mm256_load_ps(sp); \ __m256 b = _mm256_load_ps(sp + 8); \ @@ -261,26 +250,15 @@ namespace ojph { _mm256_store_ps(dpl, f); \ _mm256_store_ps(dph, e); \ } \ - for (; width > 0; width -= 8, sp += 8, dpl += 4, dph += 4) \ - { \ - __m128 a = _mm_load_ps(sp); \ - __m128 b = _mm_load_ps(sp + 4); \ - __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); \ - __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); \ - _mm_store_ps(dpl, d); \ - _mm_store_ps(dph, c); \ - } \ } \ } ////////////////////////////////////////////////////////////////////////// - // We split multiples of 16 followed by multiples of 8, because - // we assume byte_alignment == 32 #define AVX_INTERLEAVE(dp, spl, sph, width, even) \ { \ if (even) \ { \ - for (; width > 8; width -= 16, dp += 16, spl += 8, sph += 8) \ + for (; width > 0; width -= 16, dp += 16, spl += 8, sph += 8) \ { \ __m256 a = _mm256_load_ps(spl); \ __m256 b = _mm256_load_ps(sph); \ @@ -291,19 +269,10 @@ namespace ojph { _mm256_store_ps(dp, e); \ _mm256_store_ps(dp + 8, f); \ } \ - for (; width > 0; width -= 8, dp += 8, spl += 4, sph += 4) \ - { \ - __m128 a = _mm_load_ps(spl); \ - __m128 b = _mm_load_ps(sph); \ - __m128 c = _mm_unpacklo_ps(a, b); \ - __m128 d = _mm_unpackhi_ps(a, b); \ - _mm_store_ps(dp, c); \ - _mm_store_ps(dp + 4, d); \ - } \ } \ else \ { \ - for (; width > 8; width -= 16, dp += 16, spl += 8, sph += 8) \ + for (; width > 0; width -= 16, dp += 16, spl += 8, sph += 8) \ { \ __m256 a = _mm256_load_ps(spl); \ __m256 b = _mm256_load_ps(sph); \ @@ -314,15 +283,6 @@ namespace ojph { _mm256_store_ps(dp, e); \ _mm256_store_ps(dp + 8, f); \ } \ - for (; width > 0; width -= 8, dp += 8, spl += 4, sph += 4) \ - { \ - __m128 a = _mm_load_ps(spl); \ - __m128 b = _mm_load_ps(sph); \ - __m128 c = _mm_unpacklo_ps(b, a); \ - __m128 d = _mm_unpackhi_ps(b, a); \ - _mm_store_ps(dp, c); \ - _mm_store_ps(dp + 4, d); \ - } \ } \ } From 30b32cc67f61f3aa63d9fb99b3d87cbd04c72bfa Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 13 Apr 2024 09:51:18 +1000 Subject: [PATCH 32/37] Fix compilation, and a missing optimization. --- src/core/transform/ojph_transform_avx512.cpp | 22 +++++--------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/src/core/transform/ojph_transform_avx512.cpp b/src/core/transform/ojph_transform_avx512.cpp index efb7655a..02edca60 100644 --- a/src/core/transform/ojph_transform_avx512.cpp +++ b/src/core/transform/ojph_transform_avx512.cpp @@ -224,13 +224,7 @@ namespace ojph { if (width > 1) { // split src into ldst and hdst - { - float* dpl = ldst->f32; - float* dph = hdst->f32; - float* sp = src->f32; - int w = (int)width; - AVX_DEINTERLEAVE(dpl, dph, sp, w, even); - } + avx512_deinterleave(ldst->f32, hdst->f32, src->f32, (int)width, even); // the actual horizontal transform float* hp = hdst->f32, * lp = ldst->f32; @@ -376,7 +370,7 @@ namespace ojph { { const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; - const si32 e = s->rev.Eatk; + const ui32 e = s->rev.Eatk; __m512i va = _mm512_set1_epi32(a); __m512i vb = _mm512_set1_epi32(b); @@ -506,13 +500,7 @@ namespace ojph { if (width > 1) { // combine both lsrc and hsrc into dst - { - float* dpl = ldst->f32; - float* dph = hdst->f32; - float* sp = src->f32; - int w = (int)width; - AVX_DEINTERLEAVE(dpl, dph, sp, w, even); - } + avx512_deinterleave(ldst->f32, hdst->f32, src->f32, (int)width, even); si32* hp = hdst->i32, * lp = ldst->i32; ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass @@ -524,7 +512,7 @@ namespace ojph { const lifting_step* s = atk->get_step(j - 1); const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; - const si32 e = s->rev.Eatk; + const ui32 e = s->rev.Eatk; __m512i va = _mm512_set1_epi32(a); __m512i vb = _mm512_set1_epi32(b); @@ -682,7 +670,7 @@ namespace ojph { const lifting_step* s = atk->get_step(j); const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; - const si32 e = s->rev.Eatk; + const ui32 e = s->rev.Eatk; __m512i va = _mm512_set1_epi32(a); __m512i vb = _mm512_set1_epi32(b); From f28a90fce49edf94d44865add3299650058ebe6d Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 13 Apr 2024 17:57:45 +1000 Subject: [PATCH 33/37] Wasm completed -- not tested yet. --- src/apps/ojph_compress/CMakeLists.txt | 2 +- src/apps/ojph_expand/CMakeLists.txt | 2 +- src/core/CMakeLists.txt | 2 +- src/core/transform/ojph_transform.cpp | 18 +- src/core/transform/ojph_transform_avx512.cpp | 10 +- src/core/transform/ojph_transform_wasm.cpp | 957 +++++++++++-------- tests/CMakeLists.txt | 16 +- 7 files changed, 596 insertions(+), 411 deletions(-) diff --git a/src/apps/ojph_compress/CMakeLists.txt b/src/apps/ojph_compress/CMakeLists.txt index bbb77abc..dadcca9b 100644 --- a/src/apps/ojph_compress/CMakeLists.txt +++ b/src/apps/ojph_compress/CMakeLists.txt @@ -17,7 +17,7 @@ source_group("others" FILES ${OJPH_IMG_IO}) source_group("common" FILES ${OJPH_IMG_IO_H}) if(EMSCRIPTEN) - add_compile_options(-std=c++11 -O3 -fexceptions -DOJPH_DISABLE_INTEL_SIMD) + add_compile_options(-std=c++11 -O3 -fexceptions) add_executable(ojph_compress ${SOURCES}) add_executable(ojph_compress_simd ${SOURCES} ${OJPH_IMG_IO_SSE4}) target_compile_options(ojph_compress_simd PRIVATE -DOJPH_ENABLE_WASM_SIMD -msimd128 -msse4.1) diff --git a/src/apps/ojph_expand/CMakeLists.txt b/src/apps/ojph_expand/CMakeLists.txt index c0ac185e..d4b65523 100644 --- a/src/apps/ojph_expand/CMakeLists.txt +++ b/src/apps/ojph_expand/CMakeLists.txt @@ -17,7 +17,7 @@ source_group("others" FILES ${OJPH_IMG_IO}) source_group("common" FILES ${OJPH_IMG_IO_H}) if(EMSCRIPTEN) - add_compile_options(-std=c++11 -O3 -fexceptions -DOJPH_DISABLE_INTEL_SIMD) + add_compile_options(-std=c++11 -O3 -fexceptions) add_executable(ojph_expand ${SOURCES}) add_executable(ojph_expand_simd ${SOURCES} ${OJPH_IMG_IO_SSE4}) target_compile_options(ojph_expand_simd PRIVATE -DOJPH_ENABLE_WASM_SIMD -msimd128 -msse4.1) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 19123a2e..40fffa48 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -33,7 +33,7 @@ source_group("others" FILES ${OTHERS}) source_group("transform" FILES ${TRANSFORM}) if(EMSCRIPTEN) - add_compile_options(-std=c++11 -O3 -fexceptions -DOJPH_DISABLE_INTEL_SIMD) + add_compile_options(-std=c++11 -O3 -fexceptions) add_library(openjph ${SOURCES}) add_library(openjphsimd ${SOURCES} ${CODESTREAM_WASM} ${CODING_WASM} ${TRANSFORM_WASM}) target_include_directories(openjph PUBLIC common) diff --git a/src/core/transform/ojph_transform.cpp b/src/core/transform/ojph_transform.cpp index 83eed644..0dc5f95c 100644 --- a/src/core/transform/ojph_transform.cpp +++ b/src/core/transform/ojph_transform.cpp @@ -162,16 +162,14 @@ namespace ojph { #endif // !OJPH_DISABLE_INTEL_SIMD #else // OJPH_ENABLE_WASM_SIMD - rev_vert_ana_step = wasm_rev_vert_ana_step; - rev_horz_ana = wasm_rev_horz_ana; - rev_vert_syn_step = wasm_rev_vert_syn_step; - rev_horz_syn = wasm_rev_horz_syn; - - irv_vert_ana_step = wasm_irv_vert_ana_step; - irv_horz_ana = wasm_irv_horz_ana; - irv_vert_syn_step = wasm_irv_vert_syn_step; - irv_horz_syn = wasm_irv_horz_syn; - irv_vert_times_K = wasm_irv_vert_times_K; + rev_vert_step = wasm_rev_vert_step; + rev_horz_ana = wasm_rev_horz_ana; + rev_horz_syn = wasm_rev_horz_syn; + + irv_vert_step = wasm_irv_vert_step; + irv_vert_times_K = wasm_irv_vert_times_K; + irv_horz_ana = wasm_irv_horz_ana; + irv_horz_syn = wasm_irv_horz_syn; #endif // !OJPH_ENABLE_WASM_SIMD wavelet_transform_functions_initialized = true; diff --git a/src/core/transform/ojph_transform_avx512.cpp b/src/core/transform/ojph_transform_avx512.cpp index 02edca60..504aa870 100644 --- a/src/core/transform/ojph_transform_avx512.cpp +++ b/src/core/transform/ojph_transform_avx512.cpp @@ -2,9 +2,9 @@ // This software is released under the 2-Clause BSD license, included // below. // -// Copyright (c) 2019, Aous Naman -// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia -// Copyright (c) 2019, The University of New South Wales, Australia +// Copyright (c) 2019-2024, Aous Naman +// Copyright (c) 2019-2024, Kakadu Software Pty Ltd, Australia +// Copyright (c) 2019-2024, The University of New South Wales, Australia // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -30,9 +30,9 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************/ // This file is part of the OpenJPH software implementation. -// File: ojph_transform_avx2.cpp +// File: ojph_transform_avx512.cpp // Author: Aous Naman -// Date: 28 August 2019 +// Date: 13 April 2024 //***************************************************************************/ #include diff --git a/src/core/transform/ojph_transform_wasm.cpp b/src/core/transform/ojph_transform_wasm.cpp index 8f48e352..7b9ffb10 100644 --- a/src/core/transform/ojph_transform_wasm.cpp +++ b/src/core/transform/ojph_transform_wasm.cpp @@ -41,6 +41,9 @@ #include "ojph_defs.h" #include "ojph_arch.h" #include "ojph_mem.h" +#include "ojph_params.h" +#include "../codestream/ojph_params_local.h" + #include "ojph_transform.h" #include "ojph_transform_local.h" @@ -48,473 +51,645 @@ namespace ojph { namespace local { ////////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_wvlt_fwd_predict(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) - { - v128_t s1 = wasm_v128_load(src1); - v128_t s2 = wasm_v128_load(src2); - v128_t d = wasm_v128_load(dst); - s1 = wasm_i32x4_shr(wasm_i32x4_add(s1, s2), 1); - d = wasm_i32x4_sub(d, s1); - wasm_v128_store(dst, d); - } - } - - ////////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_wvlt_fwd_update(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) + void wasm_deinterleave(float* dpl, float* dph, float* sp, + int width, bool even) { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - - v128_t offset = wasm_i32x4_splat(2); - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) - { - v128_t s1 = wasm_v128_load(src1); - s1 = wasm_i32x4_add(s1, offset); - v128_t s2 = wasm_v128_load(src2); - s2 = wasm_i32x4_add(s2, s1); - v128_t d = wasm_v128_load(dst); - d = wasm_i32x4_add(d, wasm_i32x4_shr(s2, 2)); - wasm_v128_store(dst, d); - } + if (even) + for (; width > 0; width -= 8, sp += 8, dpl += 4, dph += 4) + { + v128_t a = wasm_v128_load(sp); + v128_t b = wasm_v128_load(sp + 4); + v128_t c = wasm_i32x4_shuffle(a, b, 0, 2, 4 + 0, 4 + 2); + v128_t d = wasm_i32x4_shuffle(a, b, 1, 3, 4 + 1, 4 + 3); + // v128_t c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); + // v128_t d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); + wasm_v128_store(dpl, c); + wasm_v128_store(dph, d); + } + else + for (; width > 0; width -= 8, sp += 8, dpl += 4, dph += 4) + { + v128_t a = wasm_v128_load(sp); + v128_t b = wasm_v128_load(sp + 4); + v128_t c = wasm_i32x4_shuffle(a, b, 0, 2, 4 + 0, 4 + 2); + v128_t d = wasm_i32x4_shuffle(a, b, 1, 3, 4 + 1, 4 + 3); + // v128_t c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); + // v128_t d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); + wasm_v128_store(dpl, d); + wasm_v128_store(dph, c); + } } ////////////////////////////////////////////////////////////////////////// - void wasm_rev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, - line_buf *line_hdst, ui32 width, bool even) + void wasm_interleave(float* dp, float* spl, float* sph, + int width, bool even) { - if (width > 1) - { - si32 *src = line_src->i32; - si32 *ldst = line_ldst->i32, *hdst = line_hdst->i32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; - - // extension - src[-1] = src[1]; - src[width] = src[width-2]; - // predict - const si32* sp = src + (even ? 1 : 0); - si32 *dph = hdst; - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dph+=4) - { //this is doing twice the work it needs to do - //it can be definitely written better - v128_t s1 = wasm_v128_load(sp - 1); - v128_t s2 = wasm_v128_load(sp + 1); - v128_t d = wasm_v128_load(sp); - s1 = wasm_i32x4_shr(wasm_i32x4_add(s1, s2), 1); - v128_t d1 = wasm_i32x4_sub(d, s1); - sp += 4; - s1 = wasm_v128_load(sp - 1); - s2 = wasm_v128_load(sp + 1); - d = wasm_v128_load(sp); - s1 = wasm_i32x4_shr(wasm_i32x4_add(s1, s2), 1); - v128_t d2 = wasm_i32x4_sub(d, s1); - sp += 4; - d = wasm_i32x4_shuffle(d1, d2, 0, 2, 4, 6); - wasm_v128_store(dph, d); - } - - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - sp = src + (even ? 0 : 1); - const si32* sph = hdst + (even ? 0 : 1); - si32 *dpl = ldst; - v128_t offset = wasm_i32x4_splat(2); - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sp+=8, sph+=4, dpl+=4) + if (even) + for (; width > 0; width -= 8, dp += 8, spl += 4, sph += 4) { - v128_t s1 = wasm_v128_load(sph - 1); - s1 = wasm_i32x4_add(s1, offset); - v128_t s2 = wasm_v128_load(sph); - s2 = wasm_i32x4_add(s2, s1); - v128_t d1 = wasm_v128_load(sp); - v128_t d2 = wasm_v128_load(sp + 4); - v128_t d = wasm_i32x4_shuffle(d1, d2, 0, 2, 4, 6); - d = wasm_i32x4_add(d, wasm_i32x4_shr(s2, 2)); - wasm_v128_store(dpl, d); + v128_t a = wasm_v128_load(spl); + v128_t b = wasm_v128_load(sph); + v128_t c = wasm_i32x4_shuffle(a, b, 0, 4 + 0, 1, 4 + 1); + v128_t d = wasm_i32x4_shuffle(a, b, 2, 4 + 2, 3, 4 + 3); + // v128_t c = _mm_unpacklo_ps(a, b); + // v128_t d = _mm_unpackhi_ps(a, b); + wasm_v128_store(dp, c); + wasm_v128_store(dp + 4, d); } - } else - { - if (even) - line_ldst->i32[0] = line_src->i32[0]; - else - line_hdst->i32[0] = line_src->i32[0] << 1; - } + for (; width > 0; width -= 8, dp += 8, spl += 4, sph += 4) + { + v128_t a = wasm_v128_load(spl); + v128_t b = wasm_v128_load(sph); + v128_t c = wasm_i32x4_shuffle(b, a, 0, 4 + 0, 1, 4 + 1); + v128_t d = wasm_i32x4_shuffle(b, a, 2, 4 + 2, 3, 4 + 3); + // v128_t c = _mm_unpacklo_ps(b, a); + // v128_t d = _mm_unpackhi_ps(b, a); + wasm_v128_store(dp, c); + wasm_v128_store(dp + 4, d); + } } ////////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_wvlt_bwd_predict(const line_buf *line_src1, - const line_buf *line_src2, - line_buf *line_dst, ui32 repeat) + static inline void wasm_multiply_const(float* p, float f, int width) { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) + v128_t factor = wasm_f32x4_splat(f); + for (; width > 0; width -= 4, p += 4) { - v128_t s1 = wasm_v128_load(src1); - v128_t s2 = wasm_v128_load(src2); - v128_t d = wasm_v128_load(dst); - s1 = wasm_i32x4_shr(wasm_i32x4_add(s1, s2), 1); - d = wasm_i32x4_add(d, s1); - wasm_v128_store(dst, d); + v128_t s = wasm_v128_load(p); + wasm_v128_store(p, wasm_f32x4_mul(factor, s)); } } ////////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_wvlt_bwd_update(const line_buf *line_src1, - const line_buf *line_src2, - line_buf *line_dst, ui32 repeat) + void wasm_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - - v128_t offset = wasm_i32x4_splat(2); - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) + float a = s->irv.Aatk; + if (synthesis) + a = -a; + + v128_t factor = wasm_f32x4_splat(a); + + float* dst = aug->f32; + const float* src1 = sig->f32, * src2 = other->f32; + int i = (int)repeat; + for ( ; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) { v128_t s1 = wasm_v128_load(src1); - s1 = wasm_i32x4_add(s1, offset); v128_t s2 = wasm_v128_load(src2); - s2 = wasm_i32x4_add(s2, s1); - v128_t d = wasm_v128_load(dst); - d = wasm_i32x4_sub(d, wasm_i32x4_shr(s2, 2)); + v128_t d = wasm_v128_load(dst); + d = wasm_f32x4_add(d, wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2))); wasm_v128_store(dst, d); } } ////////////////////////////////////////////////////////////////////////// - void wasm_rev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, - line_buf *line_hsrc, ui32 width, bool even) + void wasm_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) + { + wasm_multiply_const(aug->f32, K, (int)repeat); + } + + ///////////////////////////////////////////////////////////////////////// + void wasm_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) { if (width > 1) { - si32 *lsrc = line_lsrc->i32, *hsrc = line_hsrc->i32; - si32 *dst = line_dst->i32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; + // split src into ldst and hdst + wasm_deinterleave(ldst->f32, hdst->f32, src->f32, (int)width, even); - // extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - const si32 *sph = hsrc + (even ? 0 : 1); - si32 *spl = lsrc; - v128_t offset = wasm_i32x4_splat(2); - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sph+=4, spl+=4) + // the actual horizontal transform + float* hp = hdst->f32, * lp = ldst->f32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) { - v128_t s1 = wasm_v128_load(sph - 1); - s1 = wasm_i32x4_add(s1, offset); - v128_t s2 = wasm_v128_load(sph); - s2 = wasm_i32x4_add(s2, s1); - v128_t d = wasm_v128_load(spl); - d = wasm_i32x4_sub(d, wasm_i32x4_shr(s2, 2)); - wasm_v128_store(spl, d); + const lifting_step* s = atk->get_step(j - 1); + const float a = s->irv.Aatk; + + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const float* sp = lp; + float* dp = hp; + int i = (int)h_width; + v128_t f = wasm_f32x4_splat(a); + if (even) + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t m = wasm_v128_load(sp); + v128_t n = wasm_v128_load(sp + 1); + v128_t p = wasm_v128_load(dp); + p = wasm_f32x4_add(p, wasm_f32x4_mul(f, wasm_f32x4_add(m, n))); + wasm_v128_store(dp, p); + } + } + else + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t m = wasm_v128_load(sp); + v128_t n = wasm_v128_load(sp - 1); + v128_t p = wasm_v128_load(dp); + p = wasm_f32x4_add(p, wasm_f32x4_mul(f, wasm_f32x4_add(m, n))); + wasm_v128_store(dp, p); + } + } + + // swap buffers + float* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; } - // extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width - 1]; - // inverse predict and combine - si32 *dp = dst + (even ? 0 : -1); - spl = lsrc + (even ? 0 : -1); - sph = hsrc; - ui32 width = L_width + (even ? 0 : 1); - for (ui32 i = (width + 3) >> 2; i > 0; --i, sph+=4, spl+=4, dp+=8) - { - v128_t s1 = wasm_v128_load(spl); - v128_t s2 = wasm_v128_load(spl + 1); - v128_t d = wasm_v128_load(sph); - s2 = wasm_i32x4_shr(wasm_i32x4_add(s1, s2), 1); - d = wasm_i32x4_add(d, s2); - wasm_v128_store(dp, wasm_i32x4_shuffle(s1, d, 0, 4, 1, 5)); - wasm_v128_store(dp + 4, wasm_i32x4_shuffle(s1, d, 2, 6, 3, 7)); + { // multiply by K or 1/K + float K = atk->get_K(); + float K_inv = 1.0f / K; + wasm_multiply_const(lp, K_inv, (int)l_width); + wasm_multiply_const(hp, K, (int)h_width); } } - else - { + else { if (even) - line_dst->i32[0] = line_lsrc->i32[0]; + ldst->f32[0] = src->f32[0]; else - line_dst->i32[0] = line_hsrc->i32[0] >> 1; + hdst->f32[0] = src->f32[0] * 2.0f; } } ////////////////////////////////////////////////////////////////////////// - void wasm_irrev_vert_wvlt_step(const line_buf *line_src1, - const line_buf *line_src2, - line_buf *line_dst, int step_num, - ui32 repeat) + void wasm_irv_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) { - float *dst = line_dst->f32; - const float *src1 = line_src1->f32, *src2 = line_src2->f32; - - v128_t factor = wasm_f32x4_splat(LIFTING_FACTORS::steps[step_num]); - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) + if (width > 1) { - v128_t s1 = wasm_v128_load(src1); - v128_t s2 = wasm_v128_load(src2); - v128_t d = wasm_v128_load(dst); - d = wasm_f32x4_add(d, wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2))); - wasm_v128_store(dst, d); + bool ev = even; + float* oth = hsrc->f32, * aug = lsrc->f32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass + + { // multiply by K or 1/K + float K = atk->get_K(); + float K_inv = 1.0f / K; + wasm_multiply_const(aug, K, (int)aug_width); + wasm_multiply_const(oth, K_inv, (int)oth_width); + } + + // the actual horizontal transform + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) + { + const lifting_step* s = atk->get_step(j); + const float a = s->irv.Aatk; + + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const float* sp = oth; + float* dp = aug; + int i = (int)aug_width; + v128_t f = wasm_f32x4_splat(a); + if (ev) + { + for ( ; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t m = wasm_v128_load(sp); + v128_t n = wasm_v128_load(sp - 1); + v128_t p = wasm_v128_load(dp); + p = wasm_f32x4_sub(p, wasm_f32x4_mul(f, wasm_f32x4_add(m, n))); + wasm_v128_store(dp, p); + } + } + else + { + for ( ; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t m = wasm_v128_load(sp); + v128_t n = wasm_v128_load(sp + 1); + v128_t p = wasm_v128_load(dp); + p = wasm_f32x4_sub(p, wasm_f32x4_mul(f, wasm_f32x4_add(m, n))); + wasm_v128_store(dp, p); + } + } + + // swap buffers + float* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; + } + + // combine both lsrc and hsrc into dst + wasm_interleave(dst->f32, lsrc->f32, hsrc->f32, (int)width, even); + } + else { + if (even) + dst->f32[0] = lsrc->f32[0]; + else + dst->f32[0] = hsrc->f32[0] * 0.5f; } } ///////////////////////////////////////////////////////////////////////// - void wasm_irrev_vert_wvlt_K(const line_buf *line_src, line_buf *line_dst, - bool L_analysis_or_H_synthesis, ui32 repeat) + void wasm_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) { - float *dst = line_dst->f32; - const float *src = line_src->f32; + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; + v128_t vb = wasm_i32x4_splat(b); - float f = LIFTING_FACTORS::K_inv; - f = L_analysis_or_H_synthesis ? f : LIFTING_FACTORS::K; - v128_t factor = wasm_f32x4_splat(f); - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src+=4) - { - v128_t s = wasm_v128_load(src); - wasm_v128_store(dst, wasm_f32x4_mul(factor, s)); + si32* dst = aug->i32; + const si32* src1 = sig->i32, * src2 = other->i32; + // The general definition of the wavelet in Part 2 is slightly + // different to part 2, although they are mathematically equivalent + // here, we identify the simpler form from Part 1 and employ them + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)src1); + v128_t s2 = wasm_v128_load((v128_t*)src2); + v128_t d = wasm_v128_load((v128_t*)dst); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_add(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dst, d); + } + else + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)src1); + v128_t s2 = wasm_v128_load((v128_t*)src2); + v128_t d = wasm_v128_load((v128_t*)dst); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_add(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dst, d); + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)src1); + v128_t s2 = wasm_v128_load((v128_t*)src2); + v128_t d = wasm_v128_load((v128_t*)dst); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t w = wasm_i32x4_shr(t, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dst, d); + } + else + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)src1); + v128_t s2 = wasm_v128_load((v128_t*)src2); + v128_t d = wasm_v128_load((v128_t*)dst); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t w = wasm_i32x4_shr(t, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dst, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)src1); + v128_t s2 = wasm_v128_load((v128_t*)src2); + v128_t d = wasm_v128_load((v128_t*)dst); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_sub(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dst, d); + } + else + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)src1); + v128_t s2 = wasm_v128_load((v128_t*)src2); + v128_t d = wasm_v128_load((v128_t*)dst); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_sub(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dst, d); + } + } + else { // general case + // 32bit multiplication is not supported in sse2; we need sse4.1, + // where we can use _mm_mullo_epi32, which multiplies 32bit x 32bit, + // keeping the LSBs + if (synthesis) + for (ui32 i = repeat; i > 0; --i) + *dst++ -= (b + a * (*src1++ + *src2++)) >> e; + else + for (ui32 i = repeat; i > 0; --i) + *dst++ += (b + a * (*src1++ + *src2++)) >> e; } } ///////////////////////////////////////////////////////////////////////// - void wasm_irrev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, - line_buf *line_hdst, ui32 width, - bool even) + void wasm_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) { if (width > 1) { - float *src = line_src->f32; - float *ldst = line_ldst->f32, *hdst = line_hdst->f32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; - - //extension - src[-1] = src[1]; - src[width] = src[width-2]; - // predict - const float* sp = src + (even ? 1 : 0); - float *dph = hdst; - v128_t factor = wasm_f32x4_splat(LIFTING_FACTORS::steps[0]); - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dph+=4) - { //this is doing twice the work it needs to do - //it can be definitely written better - v128_t s1 = wasm_v128_load(sp - 1); - v128_t s2 = wasm_v128_load(sp + 1); - v128_t d = wasm_v128_load(sp); - s1 = wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2)); - v128_t d1 = wasm_f32x4_add(d, s1); - sp += 4; - s1 = wasm_v128_load(sp - 1); - s2 = wasm_v128_load(sp + 1); - d = wasm_v128_load(sp); - s1 = wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2)); - v128_t d2 = wasm_f32x4_add(d, s1); - sp += 4; - d = wasm_i32x4_shuffle(d1, d2, 0, 2, 4, 6); - wasm_v128_store(dph, d); - } + // combine both lsrc and hsrc into dst + wasm_deinterleave(ldst->f32, hdst->f32, src->f32, (int)width, even); - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - factor = wasm_f32x4_splat(LIFTING_FACTORS::steps[1]); - sp = src + (even ? 0 : 1); - const float* sph = hdst + (even ? 0 : 1); - float *dpl = ldst; - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sp+=8, sph+=4, dpl+=4) + si32* hp = hdst->i32, * lp = ldst->i32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) { - v128_t s1 = wasm_v128_load(sph - 1); - v128_t s2 = wasm_v128_load(sph); - s1 = wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2)); - v128_t d1 = wasm_v128_load(sp); - v128_t d2 = wasm_v128_load(sp + 4); - v128_t d = wasm_i32x4_shuffle(d1, d2, 0, 2, 4, 6); - d = wasm_f32x4_add(d, s1); - wasm_v128_store(dpl, d); - } + // first lifting step + const lifting_step* s = atk->get_step(j - 1); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; + v128_t vb = wasm_i32x4_splat(b); - //extension - ldst[-1] = ldst[0]; - ldst[L_width] = ldst[L_width-1]; - //predict - factor = wasm_f32x4_splat(LIFTING_FACTORS::steps[2]); - const float* spl = ldst + (even ? 1 : 0); - dph = hdst; - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, spl+=4, dph+=4) - { - v128_t s1 = wasm_v128_load(spl - 1); - v128_t s2 = wasm_v128_load(spl); - v128_t d = wasm_v128_load(dph); - s1 = wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2)); - d = wasm_f32x4_add(d, s1); - wasm_v128_store(dph, d); - } + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const si32* sp = lp; + si32* dp = hp; + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)h_width; + if (even) + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_add(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dp, d); + } + } + else + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_add(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dp, d); + } + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)h_width; + if (even) + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t w = wasm_i32x4_shr(t, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dp, d); + } + else + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t w = wasm_i32x4_shr(t, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dp, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)h_width; + if (even) + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_sub(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dp, d); + } + else + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_sub(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dp, d); + } + } + else { + // general case + // 32bit multiplication is not supported in sse2; we need sse4.1, + // where we can use _mm_mullo_epi32, which multiplies + // 32bit x 32bit, keeping the LSBs + if (even) + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp += (b + a * (sp[0] + sp[1])) >> e; + else + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp += (b + a * (sp[-1] + sp[0])) >> e; + } - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - factor = wasm_f32x4_splat(LIFTING_FACTORS::steps[3]); - sph = hdst + (even ? 0 : 1); - dpl = ldst; - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sph+=4, dpl+=4) - { - v128_t s1 = wasm_v128_load(sph - 1); - v128_t s2 = wasm_v128_load(sph); - v128_t d = wasm_v128_load(dpl); - s1 = wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2)); - d = wasm_f32x4_add(d, s1); - wasm_v128_store(dpl, d); - } - - //multipliers - float *dp = ldst; - factor = wasm_f32x4_splat(LIFTING_FACTORS::K_inv); - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dp+=4) - { - v128_t d = wasm_v128_load(dp); - wasm_v128_store(dp, wasm_f32x4_mul(factor, d)); - } - dp = hdst; - factor = wasm_f32x4_splat(LIFTING_FACTORS::K); - for (int i = (H_width + 3) >> 2; i > 0; --i, dp+=4) - { - v128_t d = wasm_v128_load(dp); - wasm_v128_store(dp, wasm_f32x4_mul(factor, d)); + // swap buffers + si32* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; } } - else - { + else { if (even) - line_ldst->f32[0] = line_src->f32[0]; + ldst->i32[0] = src->i32[0]; else - line_hdst->f32[0] = line_src->f32[0] + line_src->f32[0]; + hdst->i32[0] = src->i32[0] << 1; } } - - ///////////////////////////////////////////////////////////////////////// - void wasm_irrev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, - line_buf *line_hsrc, ui32 width, - bool even) + + ////////////////////////////////////////////////////////////////////////// + void wasm_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) { if (width > 1) { - float *lsrc = line_lsrc->f32, *hsrc = line_hsrc->f32; - float *dst = line_dst->f32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; - - //multipliers - float *dp = lsrc; - v128_t factor = wasm_f32x4_splat(LIFTING_FACTORS::K); - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dp+=4) - { - v128_t d = wasm_v128_load(dp); - wasm_v128_store(dp, wasm_f32x4_mul(factor, d)); - } - dp = hsrc; - factor = wasm_f32x4_splat(LIFTING_FACTORS::K_inv); - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dp+=4) - { - v128_t d = wasm_v128_load(dp); - wasm_v128_store(dp, wasm_f32x4_mul(factor, d)); - } - - //extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - factor = wasm_f32x4_splat(LIFTING_FACTORS::steps[7]); - const float *sph = hsrc + (even ? 0 : 1); - float *dpl = lsrc; - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dpl+=4, sph+=4) + bool ev = even; + si32* oth = hsrc->i32, * aug = lsrc->i32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) { - v128_t s1 = wasm_v128_load(sph - 1); - v128_t s2 = wasm_v128_load(sph); - v128_t d = wasm_v128_load(dpl); - s1 = wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2)); - d = wasm_f32x4_add(d, s1); - wasm_v128_store(dpl, d); - } + const lifting_step* s = atk->get_step(j); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; + v128_t vb = wasm_i32x4_splat(b); - //extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width-1]; - //inverse perdict - factor = wasm_f32x4_splat(LIFTING_FACTORS::steps[6]); - const float *spl = lsrc + (even ? 0 : -1); - float *dph = hsrc; - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dph+=4, spl+=4) - { - v128_t s1 = wasm_v128_load(spl); - v128_t s2 = wasm_v128_load(spl + 1); - v128_t d = wasm_v128_load(dph); - s1 = wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2)); - d = wasm_f32x4_add(d, s1); - wasm_v128_store(dph, d); - } + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const si32* sp = oth; + si32* dp = aug; + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)aug_width; + if (ev) + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_add(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dp, d); + } + } + else + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_add(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dp, d); + } + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t w = wasm_i32x4_shr(t, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dp, d); + } + else + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t w = wasm_i32x4_shr(t, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dp, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_sub(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dp, d); + } + else + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_sub(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dp, d); + } + } + else { + // general case + // 32bit multiplication is not supported in sse2; we need sse4.1, + // where we can use _mm_mullo_epi32, which multiplies + // 32bit x 32bit, keeping the LSBs + if (ev) + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp -= (b + a * (sp[-1] + sp[0])) >> e; + else + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp -= (b + a * (sp[0] + sp[1])) >> e; + } - //extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - factor = wasm_f32x4_splat(LIFTING_FACTORS::steps[5]); - sph = hsrc + (even ? 0 : 1); - dpl = lsrc; - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dpl+=4, sph+=4) - { - v128_t s1 = wasm_v128_load(sph - 1); - v128_t s2 = wasm_v128_load(sph); - v128_t d = wasm_v128_load(dpl); - s1 = wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2)); - d = wasm_f32x4_add(d, s1); - wasm_v128_store(dpl, d); + // swap buffers + si32* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; } - //extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width-1]; - //inverse perdict and combine - factor = wasm_f32x4_splat(LIFTING_FACTORS::steps[4]); - dp = dst + (even ? 0 : -1); - spl = lsrc + (even ? 0 : -1); - sph = hsrc; - ui32 width = L_width + (even ? 0 : 1); - for (ui32 i = (width + 3) >> 2; i > 0; --i, spl+=4, sph+=4, dp+=8) - { - v128_t s1 = wasm_v128_load(spl); - v128_t s2 = wasm_v128_load(spl + 1); - v128_t d = wasm_v128_load(sph); - s2 = wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2)); - d = wasm_f32x4_add(d, s2); - wasm_v128_store(dp, wasm_i32x4_shuffle(s1, d, 0, 4, 1, 5)); - wasm_v128_store(dp + 4, wasm_i32x4_shuffle(s1, d, 2, 6, 3, 7)); - } + // combine both lsrc and hsrc into dst + wasm_interleave(dst->f32, lsrc->f32, hsrc->f32, (int)width, even); } - else - { + else { if (even) - line_dst->f32[0] = line_lsrc->f32[0]; + dst->i32[0] = lsrc->i32[0]; else - line_dst->f32[0] = line_hsrc->f32[0] * 0.5f; + dst->i32[0] = hsrc->i32[0] >> 1; } } - - } -} + + } // !local +} // !ojph diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 48c8f67d..000409ff 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -69,10 +69,22 @@ else() COMMAND ${CMAKE_COMMAND} -E copy "$" "./" COMMAND ${CMAKE_COMMAND} -E copy "$" "./" ) + if(EMSCRIPTEN) + add_custom_command(TARGET test_executables POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "$" "./" + COMMAND ${CMAKE_COMMAND} -E copy "$" "./" + ) + add_custom_command(TARGET test_executables POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "$/ojph_expand.wasm" "./" + COMMAND ${CMAKE_COMMAND} -E copy "$/ojph_compress.wasm" "./" + COMMAND ${CMAKE_COMMAND} -E copy "$/ojph_expand_simd.wasm" "./" + COMMAND ${CMAKE_COMMAND} -E copy "$/ojph_compress_simd.wasm" "./" + ) + endif(EMSCRIPTEN) if(MSYS) add_custom_command(TARGET test_executables POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy "../bin/msys-gtest.dll" "./" COMMAND ${CMAKE_COMMAND} -E copy "../bin/msys-gtest_main.dll" "./" ) - endif() -endif() + endif(MSYS) +endif(MSVC) From 21bc405c991dbcad3aae7876f157245ca41cbe3d Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 13 Apr 2024 20:08:27 +1000 Subject: [PATCH 34/37] Wasm simd is buggy. --- src/apps/ojph_compress/CMakeLists.txt | 1 + src/apps/ojph_expand/CMakeLists.txt | 1 + src/core/transform/ojph_transform_wasm.cpp | 110 ++++++++++++++++----- 3 files changed, 86 insertions(+), 26 deletions(-) diff --git a/src/apps/ojph_compress/CMakeLists.txt b/src/apps/ojph_compress/CMakeLists.txt index dadcca9b..27723789 100644 --- a/src/apps/ojph_compress/CMakeLists.txt +++ b/src/apps/ojph_compress/CMakeLists.txt @@ -18,6 +18,7 @@ source_group("common" FILES ${OJPH_IMG_IO_H}) if(EMSCRIPTEN) add_compile_options(-std=c++11 -O3 -fexceptions) + add_link_options(-sWASM=1 -sASSERTIONS=1 -sALLOW_MEMORY_GROWTH=1 -sNODERAWFS=1 -sENVIRONMENT=node -sEXIT_RUNTIME=1 -sEXCEPTION_CATCHING_ALLOWED=['fake']) add_executable(ojph_compress ${SOURCES}) add_executable(ojph_compress_simd ${SOURCES} ${OJPH_IMG_IO_SSE4}) target_compile_options(ojph_compress_simd PRIVATE -DOJPH_ENABLE_WASM_SIMD -msimd128 -msse4.1) diff --git a/src/apps/ojph_expand/CMakeLists.txt b/src/apps/ojph_expand/CMakeLists.txt index d4b65523..ac650c38 100644 --- a/src/apps/ojph_expand/CMakeLists.txt +++ b/src/apps/ojph_expand/CMakeLists.txt @@ -18,6 +18,7 @@ source_group("common" FILES ${OJPH_IMG_IO_H}) if(EMSCRIPTEN) add_compile_options(-std=c++11 -O3 -fexceptions) + add_link_options(-sWASM=1 -sASSERTIONS=1 -sALLOW_MEMORY_GROWTH=1 -sNODERAWFS=1 -sENVIRONMENT=node -sEXIT_RUNTIME=1 -sEXCEPTION_CATCHING_ALLOWED=['fake']) add_executable(ojph_expand ${SOURCES}) add_executable(ojph_expand_simd ${SOURCES} ${OJPH_IMG_IO_SSE4}) target_compile_options(ojph_expand_simd PRIVATE -DOJPH_ENABLE_WASM_SIMD -msimd128 -msse4.1) diff --git a/src/core/transform/ojph_transform_wasm.cpp b/src/core/transform/ojph_transform_wasm.cpp index 7b9ffb10..83cee30c 100644 --- a/src/core/transform/ojph_transform_wasm.cpp +++ b/src/core/transform/ojph_transform_wasm.cpp @@ -305,6 +305,7 @@ namespace ojph { const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; const ui32 e = s->rev.Eatk; + v128_t va = wasm_i32x4_splat(a); v128_t vb = wasm_i32x4_splat(b); si32* dst = aug->i32; @@ -394,16 +395,35 @@ namespace ojph { wasm_v128_store((v128_t*)dst, d); } } - else { // general case - // 32bit multiplication is not supported in sse2; we need sse4.1, - // where we can use _mm_mullo_epi32, which multiplies 32bit x 32bit, - // keeping the LSBs + else + { // general case + int i = (int)repeat; if (synthesis) - for (ui32 i = repeat; i > 0; --i) - *dst++ -= (b + a * (*src1++ + *src2++)) >> e; + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)src1); + v128_t s2 = wasm_v128_load((v128_t*)src2); + v128_t d = wasm_v128_load((v128_t*)dst); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t u = wasm_i32x4_mul(va, t); + v128_t v = wasm_i32x4_add(vb, u); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dst, d); + } else - for (ui32 i = repeat; i > 0; --i) - *dst++ += (b + a * (*src1++ + *src2++)) >> e; + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)src1); + v128_t s2 = wasm_v128_load((v128_t*)src2); + v128_t d = wasm_v128_load((v128_t*)dst); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t u = wasm_i32x4_mul(va, t); + v128_t v = wasm_i32x4_add(vb, u); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dst, d); + } } } @@ -428,6 +448,7 @@ namespace ojph { const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; const ui32 e = s->rev.Eatk; + v128_t va = wasm_i32x4_splat(a); v128_t vb = wasm_i32x4_splat(b); // extension @@ -522,17 +543,35 @@ namespace ojph { wasm_v128_store((v128_t*)dp, d); } } - else { - // general case - // 32bit multiplication is not supported in sse2; we need sse4.1, - // where we can use _mm_mullo_epi32, which multiplies - // 32bit x 32bit, keeping the LSBs + else + { // general case + int i = (int)h_width; if (even) - for (ui32 i = h_width; i > 0; --i, sp++, dp++) - *dp += (b + a * (sp[0] + sp[1])) >> e; + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t u = wasm_i32x4_mul(va, t); + v128_t v = wasm_i32x4_add(vb, u); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dp, d); + } else - for (ui32 i = h_width; i > 0; --i, sp++, dp++) - *dp += (b + a * (sp[-1] + sp[0])) >> e; + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t u = wasm_i32x4_mul(va, t); + v128_t v = wasm_i32x4_add(vb, u); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dp, d); + } } // swap buffers @@ -567,6 +606,7 @@ namespace ojph { const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; const ui32 e = s->rev.Eatk; + v128_t va = wasm_i32x4_splat(a); v128_t vb = wasm_i32x4_splat(b); // extension @@ -661,17 +701,35 @@ namespace ojph { wasm_v128_store((v128_t*)dp, d); } } - else { - // general case - // 32bit multiplication is not supported in sse2; we need sse4.1, - // where we can use _mm_mullo_epi32, which multiplies - // 32bit x 32bit, keeping the LSBs + else + { // general case + int i = (int)aug_width; if (ev) - for (ui32 i = aug_width; i > 0; --i, sp++, dp++) - *dp -= (b + a * (sp[-1] + sp[0])) >> e; + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t u = wasm_i32x4_mul(va, t); + v128_t v = wasm_i32x4_add(vb, u); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dp, d); + } else - for (ui32 i = aug_width; i > 0; --i, sp++, dp++) - *dp -= (b + a * (sp[0] + sp[1])) >> e; + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t u = wasm_i32x4_mul(va, t); + v128_t v = wasm_i32x4_add(vb, u); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dp, d); + } } // swap buffers From e40fa17ccbd44e49251a880813a1b513fe184d6b Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 13 Apr 2024 20:12:30 +1000 Subject: [PATCH 35/37] A small bug fix. --- src/core/transform/ojph_transform_wasm.cpp | 4 ++-- tests/test_executables.cpp | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/core/transform/ojph_transform_wasm.cpp b/src/core/transform/ojph_transform_wasm.cpp index 83cee30c..bd652dfa 100644 --- a/src/core/transform/ojph_transform_wasm.cpp +++ b/src/core/transform/ojph_transform_wasm.cpp @@ -550,7 +550,7 @@ namespace ojph { for (; i > 0; i -= 4, sp += 4, dp += 4) { v128_t s1 = wasm_v128_load((v128_t*)sp); - v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); + v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); v128_t d = wasm_v128_load((v128_t*)dp); v128_t t = wasm_i32x4_add(s1, s2); v128_t u = wasm_i32x4_mul(va, t); @@ -563,7 +563,7 @@ namespace ojph { for (; i > 0; i -= 4, sp += 4, dp += 4) { v128_t s1 = wasm_v128_load((v128_t*)sp); - v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); + v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); v128_t d = wasm_v128_load((v128_t*)dp); v128_t t = wasm_i32x4_add(s1, s2); v128_t u = wasm_i32x4_mul(va, t); diff --git a/tests/test_executables.cpp b/tests/test_executables.cpp index f42174f6..99b4f8c0 100644 --- a/tests/test_executables.cpp +++ b/tests/test_executables.cpp @@ -128,7 +128,6 @@ void run_ojph_compress(const std::string& ref_filename, + " -i " + REF_FILE_DIR + ref_filename + " -o " + OUT_FILE_DIR + base_filename + extended_base_fname + "." + out_ext + " " + extra_options; - std::cerr << command << std::endl; EXPECT_EQ(execute(command, result), 0); } catch (const std::runtime_error& error) { From a92f9216bd81e482d62e8995be405cd32d3b8c77 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 13 Apr 2024 21:52:16 +1000 Subject: [PATCH 36/37] Added one test. --- tests/test_executables.cpp | 646 +++++++++++++++-------------- tests/test_helpers/ht_cmdlines.txt | 1 + 2 files changed, 332 insertions(+), 315 deletions(-) diff --git a/tests/test_executables.cpp b/tests/test_executables.cpp index 99b4f8c0..8660f9d1 100644 --- a/tests/test_executables.cpp +++ b/tests/test_executables.cpp @@ -274,11 +274,11 @@ TEST(TestExecutables, OpenJPHExpandNoArguments) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_64x64.jph -precise -quiet -rate 0.5 -full TEST(TestExecutables, SimpleDecIrv9764x64) { - double mse[3] = { 39.2812, 36.3819, 47.642 }; - int pae[3] = { 74, 77, 73 }; + double mse[3] = { 39.2812, 36.3819, 47.642}; + int pae[3] = { 74, 77, 73}; run_ojph_expand("simple_dec_irv97_64x64", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -286,11 +286,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_32x32.jph -precise -quiet -rate 1 Cblk={32,32} -full TEST(TestExecutables, SimpleDecIrv9732x32) { - double mse[3] = { 18.6979, 17.1208, 22.7539 }; - int pae[3] = { 51, 48, 46 }; + double mse[3] = { 18.6979, 17.1208, 22.7539}; + int pae[3] = { 51, 48, 46}; run_ojph_expand("simple_dec_irv97_32x32", "jph", "ppm"); run_mse_pae("simple_dec_irv97_32x32", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -298,11 +298,11 @@ TEST(TestExecutables, SimpleDecIrv9732x32) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_16x16.jph -precise -quiet -rate 1 Cblk={16,16} -full TEST(TestExecutables, SimpleDecIrv9716x16) { - double mse[3] = { 20.1706, 18.5427, 24.6146 }; - int pae[3] = { 53, 51, 47 }; + double mse[3] = { 20.1706, 18.5427, 24.6146}; + int pae[3] = { 53, 51, 47}; run_ojph_expand("simple_dec_irv97_16x16", "jph", "ppm"); run_mse_pae("simple_dec_irv97_16x16", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -310,11 +310,11 @@ TEST(TestExecutables, SimpleDecIrv9716x16) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_4x4.jph -precise -quiet -rate 1 Cblk={4,4} -full TEST(TestExecutables, SimpleDecIrv974x4) { - double mse[3] = { 40.8623, 37.9308, 49.7276 }; - int pae[3] = { 75, 77, 80 }; + double mse[3] = { 40.8623, 37.9308, 49.7276}; + int pae[3] = { 75, 77, 80}; run_ojph_expand("simple_dec_irv97_4x4", "jph", "ppm"); run_mse_pae("simple_dec_irv97_4x4", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -322,11 +322,11 @@ TEST(TestExecutables, SimpleDecIrv974x4) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_1024x4.jph -precise -quiet -rate 1 Cblk={1024,4} -full TEST(TestExecutables, SimpleDecIrv971024x4) { - double mse[3] = { 19.8275, 18.2511, 24.2832 }; - int pae[3] = { 53, 52, 50 }; + double mse[3] = { 19.8275, 18.2511, 24.2832}; + int pae[3] = { 53, 52, 50}; run_ojph_expand("simple_dec_irv97_1024x4", "jph", "ppm"); run_mse_pae("simple_dec_irv97_1024x4", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -334,11 +334,11 @@ TEST(TestExecutables, SimpleDecIrv971024x4) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_4x1024.jph -precise -quiet -rate 1 Cblk={4,1024} -full TEST(TestExecutables, SimpleDecIrv974x1024) { - double mse[3] = { 19.9635, 18.4063, 24.1719 }; - int pae[3] = { 51, 48, 51 }; + double mse[3] = { 19.9635, 18.4063, 24.1719}; + int pae[3] = { 51, 48, 51}; run_ojph_expand("simple_dec_irv97_4x1024", "jph", "ppm"); run_mse_pae("simple_dec_irv97_4x1024", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -346,11 +346,11 @@ TEST(TestExecutables, SimpleDecIrv974x1024) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_512x8.jph -precise -quiet -rate 1 Cblk={512,8} -full TEST(TestExecutables, SimpleDecIrv97512x8) { - double mse[3] = { 18.7929, 17.2026, 22.9922 }; - int pae[3] = { 53, 52, 50 }; + double mse[3] = { 18.7929, 17.2026, 22.9922}; + int pae[3] = { 53, 52, 50}; run_ojph_expand("simple_dec_irv97_512x8", "jph", "ppm"); run_mse_pae("simple_dec_irv97_512x8", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -358,11 +358,11 @@ TEST(TestExecutables, SimpleDecIrv97512x8) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_8x512.jph -precise -quiet -rate 1 Cblk={8,512} -full TEST(TestExecutables, SimpleDecIrv978x512) { - double mse[3] = { 19.3661, 17.8067, 23.4574 }; - int pae[3] = { 51, 48, 52 }; + double mse[3] = { 19.3661, 17.8067, 23.4574}; + int pae[3] = { 51, 48, 52}; run_ojph_expand("simple_dec_irv97_8x512", "jph", "ppm"); run_mse_pae("simple_dec_irv97_8x512", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -370,11 +370,11 @@ TEST(TestExecutables, SimpleDecIrv978x512) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_256x16.jph -precise -quiet -rate 1 Cblk={256,16} -full TEST(TestExecutables, SimpleDecIrv97256x16) { - double mse[3] = { 18.6355, 17.0963, 22.6076 }; - int pae[3] = { 54, 51, 48 }; + double mse[3] = { 18.6355, 17.0963, 22.6076}; + int pae[3] = { 54, 51, 48}; run_ojph_expand("simple_dec_irv97_256x16", "jph", "ppm"); run_mse_pae("simple_dec_irv97_256x16", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -382,11 +382,11 @@ TEST(TestExecutables, SimpleDecIrv97256x16) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_16x256.jph -precise -quiet -rate 1 Cblk={16,256} -full TEST(TestExecutables, SimpleDecIrv9716x256) { - double mse[3] = { 18.5933, 17.0208, 22.5709 }; - int pae[3] = { 51, 48, 47 }; + double mse[3] = { 18.5933, 17.0208, 22.5709}; + int pae[3] = { 51, 48, 47}; run_ojph_expand("simple_dec_irv97_16x256", "jph", "ppm"); run_mse_pae("simple_dec_irv97_16x256", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -394,11 +394,11 @@ TEST(TestExecutables, SimpleDecIrv9716x256) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_128x32.jph -precise -quiet -rate 1 Cblk={128,32} -full TEST(TestExecutables, SimpleDecIrv97128x32) { - double mse[3] = { 18.4443, 16.9133, 22.4193 }; - int pae[3] = { 52, 50, 46 }; + double mse[3] = { 18.4443, 16.9133, 22.4193}; + int pae[3] = { 52, 50, 46}; run_ojph_expand("simple_dec_irv97_128x32", "jph", "ppm"); run_mse_pae("simple_dec_irv97_128x32", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -406,11 +406,11 @@ TEST(TestExecutables, SimpleDecIrv97128x32) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_32x128.jph -precise -quiet -rate 1 Cblk={32,128} -full TEST(TestExecutables, SimpleDecIrv9732x128) { - double mse[3] = { 18.4874, 16.9379, 22.4855 }; - int pae[3] = { 51, 48, 45 }; + double mse[3] = { 18.4874, 16.9379, 22.4855}; + int pae[3] = { 51, 48, 45}; run_ojph_expand("simple_dec_irv97_32x128", "jph", "ppm"); run_mse_pae("simple_dec_irv97_32x128", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -418,11 +418,11 @@ TEST(TestExecutables, SimpleDecIrv9732x128) { // Command-line options used to obtain this file is: // -o simple_dec_rev53_64x64.jph -precise -quiet Creversible=yes -full TEST(TestExecutables, SimpleDecRev5364x64) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_expand("simple_dec_rev53_64x64", "jph", "ppm"); run_mse_pae("simple_dec_rev53_64x64", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -431,11 +431,11 @@ TEST(TestExecutables, SimpleDecRev5364x64) { // -o simple_dec_rev53_32x32.jph -precise -quiet Creversible=yes Cblk={32,32} // -full TEST(TestExecutables, SimpleDecRev5332x32) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_expand("simple_dec_rev53_32x32", "jph", "ppm"); run_mse_pae("simple_dec_rev53_32x32", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -444,11 +444,11 @@ TEST(TestExecutables, SimpleDecRev5332x32) { // -o simple_dec_rev53_4x4.jph -precise -quiet Creversible=yes Cblk={4,4} // -full TEST(TestExecutables, SimpleDecRev534x4) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_expand("simple_dec_rev53_4x4", "jph", "ppm"); run_mse_pae("simple_dec_rev53_4x4", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -457,11 +457,11 @@ TEST(TestExecutables, SimpleDecRev534x4) { // -o simple_dec_rev53_1024x4.jph -precise -quiet Creversible=yes // Cblk={1024,4} -full TEST(TestExecutables, SimpleDecRev531024x4) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_expand("simple_dec_rev53_1024x4", "jph", "ppm"); run_mse_pae("simple_dec_rev53_1024x4", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -470,11 +470,11 @@ TEST(TestExecutables, SimpleDecRev531024x4) { // -o simple_dec_rev53_4x1024.jph -precise -quiet Creversible=yes // Cblk={4,1024} -full TEST(TestExecutables, SimpleDecRev534x1024) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_expand("simple_dec_rev53_4x1024", "jph", "ppm"); run_mse_pae("simple_dec_rev53_4x1024", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -485,11 +485,11 @@ TEST(TestExecutables, SimpleDecRev534x1024) { // Sdims={288,352},{144,176},{144,176} Ssampling={1,1},{2,2},{2,2} // Nprecision={8} Nsigned={no} -full TEST(TestExecutables, SimpleDecIrv9764x64Yuv) { - double mse[3] = { 20.2778, 6.27912, 4.15937 }; - int pae[3] = { 52, 22, 31 }; + double mse[3] = { 20.2778, 6.27912, 4.15937}; + int pae[3] = { 52, 22, 31}; run_ojph_expand("simple_dec_irv97_64x64_yuv", "jph", "yuv"); run_mse_pae("simple_dec_irv97_64x64_yuv", "yuv", "foreman_420.yuv", - ":352x288x8x420", 3, mse, pae); + ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -500,11 +500,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64Yuv) { // Sdims={288,352},{144,176},{144,176} Ssampling={1,1},{2,2},{2,2} // Nprecision={8} Nsigned={no} -full TEST(TestExecutables, SimpleDecRev5364x64Yuv) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_expand("simple_dec_rev53_64x64_yuv", "jph", "yuv"); run_mse_pae("simple_dec_rev53_64x64_yuv", "yuv", "foreman_420.yuv", - ":352x288x8x420", 3, mse, pae); + ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -515,11 +515,11 @@ TEST(TestExecutables, SimpleDecRev5364x64Yuv) { // Sdims={288,352},{144,176},{144,176} Ssampling={1,1},{2,2},{2,2} // Nprecision={8} Nsigned={no} Stiles={33,257} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesYuv) { - double mse[3] = { 34.4972, 10.1112, 7.96331 }; - int pae[3] = { 67, 30, 39 }; + double mse[3] = { 34.4972, 10.1112, 7.96331}; + int pae[3] = { 67, 30, 39}; run_ojph_expand("simple_dec_irv97_64x64_tiles_yuv", "jph", "yuv"); run_mse_pae("simple_dec_irv97_64x64_tiles_yuv", "yuv", "foreman_420.yuv", - ":352x288x8x420", 3, mse, pae); + ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -530,11 +530,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesYuv) { // Sdims={288,352},{144,176},{144,176} Ssampling={1,1},{2,2},{2,2} // Nprecision={8} Nsigned={no} Stiles={33,257} -full TEST(TestExecutables, SimpleDecRev5364x64TilesYuv) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_expand("simple_dec_rev53_64x64_tiles_yuv", "jph", "yuv"); run_mse_pae("simple_dec_rev53_64x64_tiles_yuv", "yuv", "foreman_420.yuv", - ":352x288x8x420", 3, mse, pae); + ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -544,11 +544,11 @@ TEST(TestExecutables, SimpleDecRev5364x64TilesYuv) { // Clevels=5 Corder=LRCP Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP) { - double mse[3] = { 71.8149, 68.7115, 89.4001 }; - int pae[3] = { 78, 78, 83 }; + double mse[3] = { 71.8149, 68.7115, 89.4001}; + int pae[3] = { 78, 78, 83}; run_ojph_expand("simple_dec_irv97_64x64_tiles_LRCP", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_LRCP", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -558,11 +558,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP) { // Clevels=5 Corder=RLCP Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP) { - double mse[3] = { 71.8149, 68.7115, 89.4001 }; - int pae[3] = { 78, 78, 83 }; + double mse[3] = { 71.8149, 68.7115, 89.4001}; + int pae[3] = { 78, 78, 83}; run_ojph_expand("simple_dec_irv97_64x64_tiles_RLCP", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RLCP", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -572,11 +572,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP) { // Clevels=5 Corder=RPCL Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL) { - double mse[3] = { 71.8149, 68.7115, 89.4001 }; - int pae[3] = { 78, 78, 83 }; + double mse[3] = { 71.8149, 68.7115, 89.4001}; + int pae[3] = { 78, 78, 83}; run_ojph_expand("simple_dec_irv97_64x64_tiles_RPCL", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RPCL", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -586,11 +586,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL) { // Clevels=5 Corder=PCRL Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL) { - double mse[3] = { 71.8149, 68.7115, 89.4001 }; - int pae[3] = { 78, 78, 83 }; + double mse[3] = { 71.8149, 68.7115, 89.4001}; + int pae[3] = { 78, 78, 83}; run_ojph_expand("simple_dec_irv97_64x64_tiles_PCRL", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_PCRL", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -600,11 +600,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL) { // Clevels=5 Corder=CPRL Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL) { - double mse[3] = { 71.8149, 68.7115, 89.4001 }; - int pae[3] = { 78, 78, 83 }; + double mse[3] = { 71.8149, 68.7115, 89.4001}; + int pae[3] = { 78, 78, 83}; run_ojph_expand("simple_dec_irv97_64x64_tiles_CPRL", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_CPRL", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -614,11 +614,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL) { // Clevels=5 Corder=LRCP Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP33) { - double mse[3] = { 56.2139, 51.4121, 69.0107 }; - int pae[3] = { 80, 81, 98 }; + double mse[3] = { 56.2139, 51.4121, 69.0107}; + int pae[3] = { 80, 81, 98}; run_ojph_expand("simple_dec_irv97_64x64_tiles_LRCP33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_LRCP33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -628,11 +628,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP33) { // Clevels=5 Corder=RLCP Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP33) { - double mse[3] = { 56.2139, 51.4121, 69.0107 }; - int pae[3] = { 80, 81, 98 }; + double mse[3] = { 56.2139, 51.4121, 69.0107}; + int pae[3] = { 80, 81, 98}; run_ojph_expand("simple_dec_irv97_64x64_tiles_RLCP33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RLCP33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -642,11 +642,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP33) { // Clevels=5 Corder=RPCL Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL33) { - double mse[3] = { 56.2139, 51.4121, 69.0107 }; - int pae[3] = { 80, 81, 98 }; + double mse[3] = { 56.2139, 51.4121, 69.0107}; + int pae[3] = { 80, 81, 98}; run_ojph_expand("simple_dec_irv97_64x64_tiles_RPCL33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RPCL33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -656,11 +656,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL33) { // Clevels=5 Corder=PCRL Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL33) { - double mse[3] = { 56.2139, 51.4121, 69.0107 }; - int pae[3] = { 80, 81, 98 }; + double mse[3] = { 56.2139, 51.4121, 69.0107}; + int pae[3] = { 80, 81, 98}; run_ojph_expand("simple_dec_irv97_64x64_tiles_PCRL33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_PCRL33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -670,11 +670,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL33) { // Clevels=5 Corder=CPRL Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL33) { - double mse[3] = { 56.2139, 51.4121, 69.0107 }; - int pae[3] = { 80, 81, 98 }; + double mse[3] = { 56.2139, 51.4121, 69.0107}; + int pae[3] = { 80, 81, 98}; run_ojph_expand("simple_dec_irv97_64x64_tiles_CPRL33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_CPRL33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -684,11 +684,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL33) { // Clevels=5 Corder=LRCP Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP33x33) { - double mse[3] = { 210.283, 210.214, 257.276 }; - int pae[3] = { 165, 161, 166 }; + double mse[3] = { 210.283, 210.214, 257.276}; + int pae[3] = { 165, 161, 166}; run_ojph_expand("simple_dec_irv97_64x64_tiles_LRCP33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_LRCP33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -698,11 +698,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP33x33) { // Clevels=5 Corder=RLCP Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP33x33) { - double mse[3] = { 210.283, 210.214, 257.276 }; - int pae[3] = { 165, 161, 166 }; + double mse[3] = { 210.283, 210.214, 257.276}; + int pae[3] = { 165, 161, 166}; run_ojph_expand("simple_dec_irv97_64x64_tiles_RLCP33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RLCP33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -712,11 +712,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP33x33) { // Clevels=5 Corder=RPCL Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL33x33) { - double mse[3] = { 210.283, 210.214, 257.276 }; - int pae[3] = { 165, 161, 166 }; + double mse[3] = { 210.283, 210.214, 257.276}; + int pae[3] = { 165, 161, 166}; run_ojph_expand("simple_dec_irv97_64x64_tiles_RPCL33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RPCL33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -726,11 +726,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL33x33) { // Clevels=5 Corder=PCRL Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL33x33) { - double mse[3] = { 210.283, 210.214, 257.276 }; - int pae[3] = { 165, 161, 166 }; + double mse[3] = { 210.283, 210.214, 257.276}; + int pae[3] = { 165, 161, 166}; run_ojph_expand("simple_dec_irv97_64x64_tiles_PCRL33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_PCRL33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -740,11 +740,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL33x33) { // Clevels=5 Corder=CPRL Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL33x33) { - double mse[3] = { 210.283, 210.214, 257.276 }; - int pae[3] = { 165, 161, 166 }; + double mse[3] = { 210.283, 210.214, 257.276}; + int pae[3] = { 165, 161, 166}; run_ojph_expand("simple_dec_irv97_64x64_tiles_CPRL33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_CPRL33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -753,11 +753,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL33x33) { // -o simple_dec_rev53_64x64_gray_tiles.jph -precise -quiet Creversible=yes // Clevels=5 Stiles={33,257} -full TEST(TestExecutables, SimpleDecRev5364x64GrayTiles) { - double mse[1] = { 0 }; - int pae[1] = { 0 }; + double mse[1] = { 0}; + int pae[1] = { 0}; run_ojph_expand("simple_dec_rev53_64x64_gray_tiles", "jph", "pgm"); run_mse_pae("simple_dec_rev53_64x64_gray_tiles", "pgm", "monarch.pgm", - "", 1, mse, pae); + "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -766,11 +766,11 @@ TEST(TestExecutables, SimpleDecRev5364x64GrayTiles) { // -o simple_dec_irv97_64x64_gray_tiles.jph -precise -quiet -rate 0.5 // Clevels=5 Stiles={33,257} -full TEST(TestExecutables, SimpleDecIrv9764x64GrayTiles) { - double mse[1] = { 18.9601 }; - int pae[1] = { 56 }; + double mse[1] = { 18.9601}; + int pae[1] = { 56}; run_ojph_expand("simple_dec_irv97_64x64_gray_tiles", "jph", "pgm"); run_mse_pae("simple_dec_irv97_64x64_gray_tiles", "pgm", "monarch.pgm", - "", 1, mse, pae); + "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -778,11 +778,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64GrayTiles) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_64x64_16bit.jph -precise -quiet -rate 0.5 -full TEST(TestExecutables, SimpleDecIrv9764x6416bit) { - double mse[3] = { 60507.2, 36672.5, 64809.8 }; - int pae[3] = { 2547, 1974, 1922 }; + double mse[3] = { 60507.2, 36672.5, 64809.8}; + int pae[3] = { 2547, 1974, 1922}; run_ojph_expand("simple_dec_irv97_64x64_16bit", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_16bit", "ppm", "mm.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -790,11 +790,11 @@ TEST(TestExecutables, SimpleDecIrv9764x6416bit) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_64x64_16bit_gray.jph -precise -quiet -rate 0.5 -full TEST(TestExecutables, SimpleDecIrv9764x6416bitGray) { - double mse[1] = { 19382.9 }; - int pae[1] = { 1618 }; + double mse[1] = { 19382.9}; + int pae[1] = { 1618}; run_ojph_expand("simple_dec_irv97_64x64_16bit_gray", "jph", "pgm"); run_mse_pae("simple_dec_irv97_64x64_16bit_gray", "pgm", "mm.pgm", - "", 1, mse, pae); + "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -802,11 +802,11 @@ TEST(TestExecutables, SimpleDecIrv9764x6416bitGray) { // Command-line options used to obtain this file is: // -o simple_dec_rev53_64x64_16bit.jph -precise -quiet Creversible=yes -full TEST(TestExecutables, SimpleDecRev5364x6416bit) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_expand("simple_dec_rev53_64x64_16bit", "jph", "ppm"); run_mse_pae("simple_dec_rev53_64x64_16bit", "ppm", "mm.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -815,11 +815,27 @@ TEST(TestExecutables, SimpleDecRev5364x6416bit) { // -o simple_dec_rev53_64x64_16bit_gray.jph -precise -quiet Creversible=yes // -full TEST(TestExecutables, SimpleDecRev5364x6416bitGray) { - double mse[1] = { 0 }; - int pae[1] = { 0 }; + double mse[1] = { 0}; + int pae[1] = { 0}; run_ojph_expand("simple_dec_rev53_64x64_16bit_gray", "jph", "pgm"); run_mse_pae("simple_dec_rev53_64x64_16bit_gray", "pgm", "mm.pgm", - "", 1, mse, pae); + "", 1, mse, pae); +} + +/////////////////////////////////////////////////////////////////////////////// +// Test ojph_expand with codeblocks when the rev53 wavelet is used. +// Command-line options used to obtain this file is: +// -o simple_dec_irv53_bhvhb_low_latency.jph -quiet Corder=PCRL Clevels=5 +// "Cmodes=HT|CAUSAL" -rate 2 -o simple_dec_irv53_bhvhb_low_latency.jph Catk=2 +// Kkernels:I2=I5X3 Cprecincts="{16,8192},{8,8192},{4,8192}" Cblk="{8,256}" +// Cdecomp="B(-:-:-),H(-),V(-),H(-),B(-:-:-)" Qstep=0.0001 -precise -no_weights +// -tolerance 0 +TEST(TestExecutables, SimpleDecIrv53BhvhbLowLatency) { + double mse[3] = { 5.52392, 4.01405, 6.8166}; + int pae[3] = { 16, 17, 23}; + run_ojph_expand("simple_dec_irv53_bhvhb_low_latency", "jph", "ppm"); + run_mse_pae("simple_dec_irv53_bhvhb_low_latency", "ppm", "Malamute.ppm", + ":I2=I5X3 Cprecincts=", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -828,14 +844,14 @@ TEST(TestExecutables, SimpleDecRev5364x6416bitGray) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_64x64.j2c -qstep 0.1 TEST(TestExecutables, SimpleEncIrv9764x64) { - double mse[3] = { 46.2004, 43.622, 56.7452 }; - int pae[3] = { 48, 46, 52 }; + double mse[3] = { 46.2004, 43.622, 56.7452}; + int pae[3] = { 48, 46, 52}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_64x64", "", "j2c", - "-qstep 0.1"); + "simple_enc_irv97_64x64", "", "j2c", + "-qstep 0.1"); run_ojph_compress_expand("simple_enc_irv97_64x64", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_64x64", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -844,14 +860,14 @@ TEST(TestExecutables, SimpleEncIrv9764x64) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_32x32.j2c -qstep 0.01 -block_size {32,32} TEST(TestExecutables, SimpleEncIrv9732x32) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_32x32", "", "j2c", - "-qstep 0.01 -block_size \"{32,32}\""); + "simple_enc_irv97_32x32", "", "j2c", + "-qstep 0.01 -block_size \"{32,32}\""); run_ojph_compress_expand("simple_enc_irv97_32x32", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_32x32", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -860,14 +876,14 @@ TEST(TestExecutables, SimpleEncIrv9732x32) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_16x16.j2c -qstep 0.01 -block_size {16,16} TEST(TestExecutables, SimpleEncIrv9716x16) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_16x16", "", "j2c", - "-qstep 0.01 -block_size \"{16,16}\""); + "simple_enc_irv97_16x16", "", "j2c", + "-qstep 0.01 -block_size \"{16,16}\""); run_ojph_compress_expand("simple_enc_irv97_16x16", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_16x16", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -876,14 +892,14 @@ TEST(TestExecutables, SimpleEncIrv9716x16) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_4x4.j2c -qstep 0.01 -block_size {4,4} TEST(TestExecutables, SimpleEncIrv974x4) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_4x4", "", "j2c", - "-qstep 0.01 -block_size \"{4,4}\""); + "simple_enc_irv97_4x4", "", "j2c", + "-qstep 0.01 -block_size \"{4,4}\""); run_ojph_compress_expand("simple_enc_irv97_4x4", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_4x4", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -892,14 +908,14 @@ TEST(TestExecutables, SimpleEncIrv974x4) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_1024x4.j2c -qstep 0.01 -block_size {4,1024} TEST(TestExecutables, SimpleEncIrv971024x4) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_1024x4", "", "j2c", - "-qstep 0.01 -block_size \"{4,1024}\""); + "simple_enc_irv97_1024x4", "", "j2c", + "-qstep 0.01 -block_size \"{4,1024}\""); run_ojph_compress_expand("simple_enc_irv97_1024x4", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_1024x4", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -908,14 +924,14 @@ TEST(TestExecutables, SimpleEncIrv971024x4) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_4x1024.j2c -qstep 0.01 -block_size {1024,4} TEST(TestExecutables, SimpleEncIrv974x1024) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_4x1024", "", "j2c", - "-qstep 0.01 -block_size \"{1024,4}\""); + "simple_enc_irv97_4x1024", "", "j2c", + "-qstep 0.01 -block_size \"{1024,4}\""); run_ojph_compress_expand("simple_enc_irv97_4x1024", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_4x1024", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -924,14 +940,14 @@ TEST(TestExecutables, SimpleEncIrv974x1024) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_512x8.j2c -qstep 0.01 -block_size {8,512} TEST(TestExecutables, SimpleEncIrv97512x8) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_512x8", "", "j2c", - "-qstep 0.01 -block_size \"{8,512}\""); + "simple_enc_irv97_512x8", "", "j2c", + "-qstep 0.01 -block_size \"{8,512}\""); run_ojph_compress_expand("simple_enc_irv97_512x8", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_512x8", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -940,14 +956,14 @@ TEST(TestExecutables, SimpleEncIrv97512x8) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_8x512.j2c -qstep 0.01 -block_size {512,8} TEST(TestExecutables, SimpleEncIrv978x512) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_8x512", "", "j2c", - "-qstep 0.01 -block_size \"{512,8}\""); + "simple_enc_irv97_8x512", "", "j2c", + "-qstep 0.01 -block_size \"{512,8}\""); run_ojph_compress_expand("simple_enc_irv97_8x512", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_8x512", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -956,14 +972,14 @@ TEST(TestExecutables, SimpleEncIrv978x512) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_256x16.j2c -qstep 0.01 -block_size {16,256} TEST(TestExecutables, SimpleEncIrv97256x16) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_256x16", "", "j2c", - "-qstep 0.01 -block_size \"{16,256}\""); + "simple_enc_irv97_256x16", "", "j2c", + "-qstep 0.01 -block_size \"{16,256}\""); run_ojph_compress_expand("simple_enc_irv97_256x16", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_256x16", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -972,14 +988,14 @@ TEST(TestExecutables, SimpleEncIrv97256x16) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_16x256.j2c -qstep 0.01 -block_size {256,16} TEST(TestExecutables, SimpleEncIrv9716x256) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_16x256", "", "j2c", - "-qstep 0.01 -block_size \"{256,16}\""); + "simple_enc_irv97_16x256", "", "j2c", + "-qstep 0.01 -block_size \"{256,16}\""); run_ojph_compress_expand("simple_enc_irv97_16x256", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_16x256", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -988,14 +1004,14 @@ TEST(TestExecutables, SimpleEncIrv9716x256) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_128x32.j2c -qstep 0.01 -block_size {32,128} TEST(TestExecutables, SimpleEncIrv97128x32) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_128x32", "", "j2c", - "-qstep 0.01 -block_size \"{32,128}\""); + "simple_enc_irv97_128x32", "", "j2c", + "-qstep 0.01 -block_size \"{32,128}\""); run_ojph_compress_expand("simple_enc_irv97_128x32", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_128x32", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1004,14 +1020,14 @@ TEST(TestExecutables, SimpleEncIrv97128x32) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_32x128.j2c -qstep 0.01 -block_size {128,32} TEST(TestExecutables, SimpleEncIrv9732x128) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_32x128", "", "j2c", - "-qstep 0.01 -block_size \"{128,32}\""); + "simple_enc_irv97_32x128", "", "j2c", + "-qstep 0.01 -block_size \"{128,32}\""); run_ojph_compress_expand("simple_enc_irv97_32x128", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_32x128", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1021,14 +1037,14 @@ TEST(TestExecutables, SimpleEncIrv9732x128) { // -o simple_enc_irv97_64x64_tiles_33x33_d5.j2c -qstep 0.01 -tile_size {33,33} // -num_decomps 5 TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D5) { - double mse[3] = { 1.88906, 1.30757, 2.5347 }; - int pae[3] = { 9, 6, 10 }; + double mse[3] = { 1.88906, 1.30757, 2.5347}; + int pae[3] = { 9, 6, 10}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_64x64_tiles_33x33_d5", "", "j2c", - "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 5"); + "simple_enc_irv97_64x64_tiles_33x33_d5", "", "j2c", + "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 5"); run_ojph_compress_expand("simple_enc_irv97_64x64_tiles_33x33_d5", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_64x64_tiles_33x33_d5", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1038,14 +1054,14 @@ TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D5) { // -o simple_enc_irv97_64x64_tiles_33x33_d6.j2c -qstep 0.01 -tile_size {33,33} // -num_decomps 6 TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D6) { - double mse[3] = { 1.88751, 1.30673, 2.53378 }; - int pae[3] = { 8, 6, 10 }; + double mse[3] = { 1.88751, 1.30673, 2.53378}; + int pae[3] = { 8, 6, 10}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_64x64_tiles_33x33_d6", "", "j2c", - "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 6"); + "simple_enc_irv97_64x64_tiles_33x33_d6", "", "j2c", + "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 6"); run_ojph_compress_expand("simple_enc_irv97_64x64_tiles_33x33_d6", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_64x64_tiles_33x33_d6", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1054,14 +1070,14 @@ TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D6) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_64x64_16bit.j2c -qstep 0.01 TEST(TestExecutables, SimpleEncIrv9764x6416bit) { - double mse[3] = { 51727.3, 32596.4, 45897.8 }; - int pae[3] = { 1512, 1481, 1778 }; + double mse[3] = { 51727.3, 32596.4, 45897.8}; + int pae[3] = { 1512, 1481, 1778}; run_ojph_compress("mm.ppm", - "simple_enc_irv97_64x64_16bit", "", "j2c", - "-qstep 0.01"); + "simple_enc_irv97_64x64_16bit", "", "j2c", + "-qstep 0.01"); run_ojph_compress_expand("simple_enc_irv97_64x64_16bit", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_64x64_16bit", "ppm", - "mm.ppm", "", 3, mse, pae); + "mm.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1070,14 +1086,14 @@ TEST(TestExecutables, SimpleEncIrv9764x6416bit) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_64x64_16bit_gray.j2c -qstep 0.01 TEST(TestExecutables, SimpleEncIrv9764x6416bitGray) { - double mse[1] = { 25150.6 }; - int pae[1] = { 1081 }; + double mse[1] = { 25150.6}; + int pae[1] = { 1081}; run_ojph_compress("mm.pgm", - "simple_enc_irv97_64x64_16bit_gray", "", "j2c", - "-qstep 0.01"); + "simple_enc_irv97_64x64_16bit_gray", "", "j2c", + "-qstep 0.01"); run_ojph_compress_expand("simple_enc_irv97_64x64_16bit_gray", "j2c", "pgm"); run_mse_pae("simple_enc_irv97_64x64_16bit_gray", "pgm", - "mm.pgm", "", 1, mse, pae); + "mm.pgm", "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1086,14 +1102,14 @@ TEST(TestExecutables, SimpleEncIrv9764x6416bitGray) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_64x64_16bit.j2c -reversible true TEST(TestExecutables, SimpleEncRev5364x6416bit) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("mm.ppm", - "simple_enc_rev53_64x64_16bit", "", "j2c", - "-reversible true"); + "simple_enc_rev53_64x64_16bit", "", "j2c", + "-reversible true"); run_ojph_compress_expand("simple_enc_rev53_64x64_16bit", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_64x64_16bit", "ppm", - "mm.ppm", "", 3, mse, pae); + "mm.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1102,14 +1118,14 @@ TEST(TestExecutables, SimpleEncRev5364x6416bit) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_64x64_16bit_gray.j2c -reversible true TEST(TestExecutables, SimpleEncRev5364x6416bitGray) { - double mse[1] = { 0 }; - int pae[1] = { 0 }; + double mse[1] = { 0}; + int pae[1] = { 0}; run_ojph_compress("mm.pgm", - "simple_enc_rev53_64x64_16bit_gray", "", "j2c", - "-reversible true"); + "simple_enc_rev53_64x64_16bit_gray", "", "j2c", + "-reversible true"); run_ojph_compress_expand("simple_enc_rev53_64x64_16bit_gray", "j2c", "pgm"); run_mse_pae("simple_enc_rev53_64x64_16bit_gray", "pgm", - "mm.pgm", "", 1, mse, pae); + "mm.pgm", "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1118,14 +1134,14 @@ TEST(TestExecutables, SimpleEncRev5364x6416bitGray) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_64x64_16bit.j2c -reversible true TEST(TestExecutables, SimpleEncRev5364x64) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_64x64", "", "j2c", - "-reversible true"); + "simple_enc_rev53_64x64", "", "j2c", + "-reversible true"); run_ojph_compress_expand("simple_enc_rev53_64x64", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_64x64", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1134,14 +1150,14 @@ TEST(TestExecutables, SimpleEncRev5364x64) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_32x32.j2c -reversible true -block_size {32,32} TEST(TestExecutables, SimpleEncRev5332x32) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_32x32", "", "j2c", - "-reversible true -block_size \"{32,32}\""); + "simple_enc_rev53_32x32", "", "j2c", + "-reversible true -block_size \"{32,32}\""); run_ojph_compress_expand("simple_enc_rev53_32x32", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_32x32", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1150,14 +1166,14 @@ TEST(TestExecutables, SimpleEncRev5332x32) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_4x4.j2c -reversible true -block_size {4,4} TEST(TestExecutables, SimpleEncRev534x4) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_4x4", "", "j2c", - "-reversible true -block_size \"{4,4}\""); + "simple_enc_rev53_4x4", "", "j2c", + "-reversible true -block_size \"{4,4}\""); run_ojph_compress_expand("simple_enc_rev53_4x4", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_4x4", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1166,14 +1182,14 @@ TEST(TestExecutables, SimpleEncRev534x4) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_1024x4.j2c -reversible true -block_size {4,1024} TEST(TestExecutables, SimpleEncRev531024x4) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_1024x4", "", "j2c", - "-reversible true -block_size \"{4,1024}\""); + "simple_enc_rev53_1024x4", "", "j2c", + "-reversible true -block_size \"{4,1024}\""); run_ojph_compress_expand("simple_enc_rev53_1024x4", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_1024x4", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1182,14 +1198,14 @@ TEST(TestExecutables, SimpleEncRev531024x4) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_4x1024.j2c -reversible true -block_size {1024,4} TEST(TestExecutables, SimpleEncRev534x1024) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_4x1024", "", "j2c", - "-reversible true -block_size \"{1024,4}\""); + "simple_enc_rev53_4x1024", "", "j2c", + "-reversible true -block_size \"{1024,4}\""); run_ojph_compress_expand("simple_enc_rev53_4x1024", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_4x1024", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1199,14 +1215,14 @@ TEST(TestExecutables, SimpleEncRev534x1024) { // -o simple_enc_rev53_64x64_tiles_33x33_d5.j2c -reversible true -tile_size // {32,32} -num_decomps 5 TEST(TestExecutables, SimpleEncRev5364x64Tiles33x33D5) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_64x64_tiles_33x33_d5", "", "j2c", - "-reversible true -tile_size \"{32,32}\" -num_decomps 5"); + "simple_enc_rev53_64x64_tiles_33x33_d5", "", "j2c", + "-reversible true -tile_size \"{32,32}\" -num_decomps 5"); run_ojph_compress_expand("simple_enc_rev53_64x64_tiles_33x33_d5", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_64x64_tiles_33x33_d5", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1216,14 +1232,14 @@ TEST(TestExecutables, SimpleEncRev5364x64Tiles33x33D5) { // -o simple_enc_rev53_64x64_tiles_33x33_d6.j2c -reversible true -tile_size // {32,32} -num_decomps 6 TEST(TestExecutables, SimpleEncRev5364x64Tiles33x33D6) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_64x64_tiles_33x33_d6", "", "j2c", - "-reversible true -tile_size \"{32,32}\" -num_decomps 6"); + "simple_enc_rev53_64x64_tiles_33x33_d6", "", "j2c", + "-reversible true -tile_size \"{32,32}\" -num_decomps 6"); run_ojph_compress_expand("simple_enc_rev53_64x64_tiles_33x33_d6", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_64x64_tiles_33x33_d6", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1233,16 +1249,16 @@ TEST(TestExecutables, SimpleEncRev5364x64Tiles33x33D6) { // -o simple_enc_irv97_64x64_yuv.j2c -qstep 0.1 -dims {352,288} -num_comps 3 // -downsamp {1,1},{2,2},{2,2} -bit_depth 8,8,8 -signed false,false,false TEST(TestExecutables, SimpleEncIrv9764x64Yuv) { - double mse[3] = { 30.3548, 7.69602, 5.22246 }; - int pae[3] = { 49, 27, 26 }; + double mse[3] = { 30.3548, 7.69602, 5.22246}; + int pae[3] = { 49, 27, 26}; run_ojph_compress("foreman_420.yuv", - "simple_enc_irv97_64x64_yuv", "", "j2c", - "-qstep 0.1 -dims \"{352,288}\" -num_comps 3 -downsamp" - " \"{1,1}\",\"{2,2}\",\"{2,2}\" -bit_depth 8,8,8" - " -signed false,false,false"); + "simple_enc_irv97_64x64_yuv", "", "j2c", + "-qstep 0.1 -dims \"{352,288}\" -num_comps 3 -downsamp" + " \"{1,1}\",\"{2,2}\",\"{2,2}\" -bit_depth 8,8,8" + " -signed false,false,false"); run_ojph_compress_expand("simple_enc_irv97_64x64_yuv", "j2c", "yuv"); run_mse_pae("simple_enc_irv97_64x64_yuv", "yuv", - "foreman_420.yuv", ":352x288x8x420", 3, mse, pae); + "foreman_420.yuv", ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1253,16 +1269,16 @@ TEST(TestExecutables, SimpleEncIrv9764x64Yuv) { // {352,288} -num_comps 3 -downsamp {1,1},{2,2},{2,2} -bit_depth 8,8,8 -signed // false,false,false TEST(TestExecutables, SimpleEncRev5364x64Yuv) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("foreman_420.yuv", - "simple_enc_rev53_64x64_yuv", "", "j2c", - "-reversible true -qstep 0.1 -dims \"{352,288}\"" - " -num_comps 3 -downsamp \"{1,1}\",\"{2,2}\",\"{2,2}\"" - " -bit_depth 8,8,8 -signed false,false,false"); + "simple_enc_rev53_64x64_yuv", "", "j2c", + "-reversible true -qstep 0.1 -dims \"{352,288}\"" + " -num_comps 3 -downsamp \"{1,1}\",\"{2,2}\",\"{2,2}\"" + " -bit_depth 8,8,8 -signed false,false,false"); run_ojph_compress_expand("simple_enc_rev53_64x64_yuv", "j2c", "yuv"); run_mse_pae("simple_enc_rev53_64x64_yuv", "yuv", - "foreman_420.yuv", ":352x288x8x420", 3, mse, pae); + "foreman_420.yuv", ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1271,14 +1287,14 @@ TEST(TestExecutables, SimpleEncRev5364x64Yuv) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_tall_narrow.j2c -qstep 0.1 TEST(TestExecutables, SimpleEncIrv97TallNarrow) { - double mse[3] = { 112.097, 79.2214, 71.1367 }; - int pae[3] = { 56, 41, 32 }; + double mse[3] = { 112.097, 79.2214, 71.1367}; + int pae[3] = { 56, 41, 32}; run_ojph_compress("tall_narrow.ppm", - "simple_enc_irv97_tall_narrow", "", "j2c", - "-qstep 0.1"); + "simple_enc_irv97_tall_narrow", "", "j2c", + "-qstep 0.1"); run_ojph_compress_expand("simple_enc_irv97_tall_narrow", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_tall_narrow", "ppm", - "tall_narrow.ppm", "", 3, mse, pae); + "tall_narrow.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1287,14 +1303,14 @@ TEST(TestExecutables, SimpleEncIrv97TallNarrow) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_tall_narrow1.j2c -image_offset {1,0} -qstep 0.1 TEST(TestExecutables, SimpleEncIrv97TallNarrow1) { - double mse[3] = { 100.906, 76.113, 72.8347 }; - int pae[3] = { 39, 35, 34 }; + double mse[3] = { 100.906, 76.113, 72.8347}; + int pae[3] = { 39, 35, 34}; run_ojph_compress("tall_narrow.ppm", - "simple_enc_irv97_tall_narrow1", "", "j2c", - "-image_offset \"{1,0}\" -qstep 0.1"); + "simple_enc_irv97_tall_narrow1", "", "j2c", + "-image_offset \"{1,0}\" -qstep 0.1"); run_ojph_compress_expand("simple_enc_irv97_tall_narrow1", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_tall_narrow1", "ppm", - "tall_narrow.ppm", "", 3, mse, pae); + "tall_narrow.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1303,14 +1319,14 @@ TEST(TestExecutables, SimpleEncIrv97TallNarrow1) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_tall_narrow.j2c -reversible true TEST(TestExecutables, SimpleEncRev53TallNarrow) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("tall_narrow.ppm", - "simple_enc_rev53_tall_narrow", "", "j2c", - "-reversible true"); + "simple_enc_rev53_tall_narrow", "", "j2c", + "-reversible true"); run_ojph_compress_expand("simple_enc_rev53_tall_narrow", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_tall_narrow", "ppm", - "tall_narrow.ppm", "", 3, mse, pae); + "tall_narrow.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1319,14 +1335,14 @@ TEST(TestExecutables, SimpleEncRev53TallNarrow) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_tall_narrow1.j2c -image_offset {1,0} -reversible true TEST(TestExecutables, SimpleEncRev53TallNarrow1) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("tall_narrow.ppm", - "simple_enc_rev53_tall_narrow1", "", "j2c", - "-image_offset \"{1,0}\" -reversible true"); + "simple_enc_rev53_tall_narrow1", "", "j2c", + "-image_offset \"{1,0}\" -reversible true"); run_ojph_compress_expand("simple_enc_rev53_tall_narrow1", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_tall_narrow1", "ppm", - "tall_narrow.ppm", "", 3, mse, pae); + "tall_narrow.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1335,14 +1351,14 @@ TEST(TestExecutables, SimpleEncRev53TallNarrow1) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_10bit_le_nuke11.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72010bitLeNuke11) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("dpx_1280x720_10bit.ppm", - "dpx_enc_1280x720_10bit_le_nuke11", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_10bit_le_nuke11", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_10bit_le_nuke11", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_10bit_le_nuke11", "ppm", - "dpx_1280x720_10bit.ppm", "", 3, mse, pae); + "dpx_1280x720_10bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1351,14 +1367,14 @@ TEST(TestExecutables, DpxEnc1280x72010bitLeNuke11) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_10bit_be_nuke11.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72010bitBeNuke11) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("dpx_1280x720_10bit.ppm", - "dpx_enc_1280x720_10bit_be_nuke11", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_10bit_be_nuke11", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_10bit_be_nuke11", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_10bit_be_nuke11", "ppm", - "dpx_1280x720_10bit.ppm", "", 3, mse, pae); + "dpx_1280x720_10bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1367,14 +1383,14 @@ TEST(TestExecutables, DpxEnc1280x72010bitBeNuke11) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_16bit_le_nuke11.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72016bitLeNuke11) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("dpx_1280x720_16bit.ppm", - "dpx_enc_1280x720_16bit_le_nuke11", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_16bit_le_nuke11", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_16bit_le_nuke11", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_16bit_le_nuke11", "ppm", - "dpx_1280x720_16bit.ppm", "", 3, mse, pae); + "dpx_1280x720_16bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1383,14 +1399,14 @@ TEST(TestExecutables, DpxEnc1280x72016bitLeNuke11) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_16bit_be_nuke11.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72016bitBeNuke11) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("dpx_1280x720_16bit.ppm", - "dpx_enc_1280x720_16bit_be_nuke11", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_16bit_be_nuke11", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_16bit_be_nuke11", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_16bit_be_nuke11", "ppm", - "dpx_1280x720_16bit.ppm", "", 3, mse, pae); + "dpx_1280x720_16bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1399,14 +1415,14 @@ TEST(TestExecutables, DpxEnc1280x72016bitBeNuke11) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_10bit_resolve18.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72010bitResolve18) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("dpx_1280x720_10bit.ppm", - "dpx_enc_1280x720_10bit_resolve18", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_10bit_resolve18", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_10bit_resolve18", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_10bit_resolve18", "ppm", - "dpx_1280x720_10bit.ppm", "", 3, mse, pae); + "dpx_1280x720_10bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1415,14 +1431,14 @@ TEST(TestExecutables, DpxEnc1280x72010bitResolve18) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_16bit_resolve18.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72016bitResolve18) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("dpx_1280x720_16bit.ppm", - "dpx_enc_1280x720_16bit_resolve18", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_16bit_resolve18", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_16bit_resolve18", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_16bit_resolve18", "ppm", - "dpx_1280x720_16bit.ppm", "", 3, mse, pae); + "dpx_1280x720_16bit.ppm", "", 3, mse, pae); } //////////////////////////////////////////////////////////////////////////////// diff --git a/tests/test_helpers/ht_cmdlines.txt b/tests/test_helpers/ht_cmdlines.txt index a8c0987d..0542a2d6 100644 --- a/tests/test_helpers/ht_cmdlines.txt +++ b/tests/test_helpers/ht_cmdlines.txt @@ -52,6 +52,7 @@ add_test(NAME simple_dec_irv97_64x64_16bit_gray COMMAND ${CMAKE_CURRENT_SOURCE_D add_test(NAME simple_dec_rev53_64x64_16bit COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -rdec "-i ${images_folder}/mm.ppm -o simple_dec_rev53_64x64_16bit.jph -precise -quiet Creversible=yes -full" "-i simple_dec_rev53_64x64_16bit.jph -o test1.ppm -precise -quiet" "-i simple_dec_rev53_64x64_16bit.jph -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") add_test(NAME simple_dec_rev53_64x64_16bit_gray COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -rdec "-i ${images_folder}/mm.pgm -o simple_dec_rev53_64x64_16bit_gray.jph -precise -quiet Creversible=yes -full" "-i simple_dec_rev53_64x64_16bit_gray.jph -o test1.pgm -precise -quiet" "-i simple_dec_rev53_64x64_16bit_gray.jph -o test2.pgm" "${images_folder}/mm.pgm" "test1.pgm" "test2.pgm") +add_test(NAME simple_dec_irv53_bhvhb_low_latency COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -dec "-i ${images_folder}/mm.ppm -o simple_dec_irv53_bhvhb_low_latency.jph -quiet Corder=PCRL Clevels=5 "Cmodes=HT|CAUSAL" -rate 2 -o simple_dec_irv53_bhvhb_low_latency.jph Catk=2 Kkernels:I2=I5X3 Cprecincts="{16,8192},{8,8192},{4,8192}" Cblk="{8,256}" Cdecomp="B(-:-:-),H(-),V(-),H(-),B(-:-:-)" Qstep=0.0001 -precise -no_weights -tolerance 0" "-i simple_dec_irv53_bhvhb_low_latency.jph -o test1.ppm -precise -quiet" "-i simple_dec_irv53_bhvhb_low_latency.jph -o test2.ppm" "${images_folder}/mm.pgm" "test1.pgm" "test2.pgm") ############################################################# # Encoding From 9345152e05e654b795b389ecfa0a3045efa45a5b Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 13 Apr 2024 22:18:15 +1000 Subject: [PATCH 37/37] Fixing tests. --- tests/test_executables.cpp | 10 +++++----- tests/test_helpers/convert_mse_pae_to_tests.cpp | 7 +++++-- tests/test_helpers/ht_cmdlines.txt | 2 +- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/test_executables.cpp b/tests/test_executables.cpp index 8660f9d1..9f77f75e 100644 --- a/tests/test_executables.cpp +++ b/tests/test_executables.cpp @@ -825,17 +825,17 @@ TEST(TestExecutables, SimpleDecRev5364x6416bitGray) { /////////////////////////////////////////////////////////////////////////////// // Test ojph_expand with codeblocks when the rev53 wavelet is used. // Command-line options used to obtain this file is: -// -o simple_dec_irv53_bhvhb_low_latency.jph -quiet Corder=PCRL Clevels=5 -// "Cmodes=HT|CAUSAL" -rate 2 -o simple_dec_irv53_bhvhb_low_latency.jph Catk=2 -// Kkernels:I2=I5X3 Cprecincts="{16,8192},{8,8192},{4,8192}" Cblk="{8,256}" -// Cdecomp="B(-:-:-),H(-),V(-),H(-),B(-:-:-)" Qstep=0.0001 -precise -no_weights +// -o simple_dec_irv53_bhvhb_low_latency.jph -quiet Corder=PCRL Clevels=5 +// Cmodes=HT|CAUSAL -rate 2 Catk=2 Kkernels:I2=I5X3 +// Cprecincts={16,8192},{8,8192},{4,8192} Cblk={8,256} +// Cdecomp=B(-:-:-),H(-),V(-),H(-),B(-:-:-) Qstep=0.0001 -precise -no_weights // -tolerance 0 TEST(TestExecutables, SimpleDecIrv53BhvhbLowLatency) { double mse[3] = { 5.52392, 4.01405, 6.8166}; int pae[3] = { 16, 17, 23}; run_ojph_expand("simple_dec_irv53_bhvhb_low_latency", "jph", "ppm"); run_mse_pae("simple_dec_irv53_bhvhb_low_latency", "ppm", "Malamute.ppm", - ":I2=I5X3 Cprecincts=", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// diff --git a/tests/test_helpers/convert_mse_pae_to_tests.cpp b/tests/test_helpers/convert_mse_pae_to_tests.cpp index 25bf084c..630b6230 100644 --- a/tests/test_helpers/convert_mse_pae_to_tests.cpp +++ b/tests/test_helpers/convert_mse_pae_to_tests.cpp @@ -200,8 +200,11 @@ void process_cmdlines(std::ifstream& file, start_pos = line.find(":"); if (start_pos != std::string::npos) { - size_t end_pos = line.find("\"", start_pos); - yuv_specs = line.substr(start_pos, end_pos - start_pos); + if (std::isdigit(line.at(start_pos + 1))) + { + size_t end_pos = line.find("\"", start_pos); + yuv_specs = line.substr(start_pos, end_pos - start_pos); + } } break; } diff --git a/tests/test_helpers/ht_cmdlines.txt b/tests/test_helpers/ht_cmdlines.txt index 0542a2d6..3b94c887 100644 --- a/tests/test_helpers/ht_cmdlines.txt +++ b/tests/test_helpers/ht_cmdlines.txt @@ -52,7 +52,7 @@ add_test(NAME simple_dec_irv97_64x64_16bit_gray COMMAND ${CMAKE_CURRENT_SOURCE_D add_test(NAME simple_dec_rev53_64x64_16bit COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -rdec "-i ${images_folder}/mm.ppm -o simple_dec_rev53_64x64_16bit.jph -precise -quiet Creversible=yes -full" "-i simple_dec_rev53_64x64_16bit.jph -o test1.ppm -precise -quiet" "-i simple_dec_rev53_64x64_16bit.jph -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") add_test(NAME simple_dec_rev53_64x64_16bit_gray COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -rdec "-i ${images_folder}/mm.pgm -o simple_dec_rev53_64x64_16bit_gray.jph -precise -quiet Creversible=yes -full" "-i simple_dec_rev53_64x64_16bit_gray.jph -o test1.pgm -precise -quiet" "-i simple_dec_rev53_64x64_16bit_gray.jph -o test2.pgm" "${images_folder}/mm.pgm" "test1.pgm" "test2.pgm") -add_test(NAME simple_dec_irv53_bhvhb_low_latency COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -dec "-i ${images_folder}/mm.ppm -o simple_dec_irv53_bhvhb_low_latency.jph -quiet Corder=PCRL Clevels=5 "Cmodes=HT|CAUSAL" -rate 2 -o simple_dec_irv53_bhvhb_low_latency.jph Catk=2 Kkernels:I2=I5X3 Cprecincts="{16,8192},{8,8192},{4,8192}" Cblk="{8,256}" Cdecomp="B(-:-:-),H(-),V(-),H(-),B(-:-:-)" Qstep=0.0001 -precise -no_weights -tolerance 0" "-i simple_dec_irv53_bhvhb_low_latency.jph -o test1.ppm -precise -quiet" "-i simple_dec_irv53_bhvhb_low_latency.jph -o test2.ppm" "${images_folder}/mm.pgm" "test1.pgm" "test2.pgm") +add_test(NAME simple_dec_irv53_bhvhb_low_latency COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -dec "-i ${images_folder}/mm.ppm -o simple_dec_irv53_bhvhb_low_latency.jph -quiet Corder=PCRL Clevels=5 Cmodes=HT|CAUSAL -rate 2 Catk=2 Kkernels:I2=I5X3 Cprecincts=\{16,8192\},\{8,8192\},\{4,8192\} Cblk=\{8,256\} Cdecomp=B(-:-:-),H(-),V(-),H(-),B(-:-:-) Qstep=0.0001 -precise -no_weights -tolerance 0" "-i simple_dec_irv53_bhvhb_low_latency.jph -o test1.ppm -precise -quiet" "-i simple_dec_irv53_bhvhb_low_latency.jph -o test2.ppm" "${images_folder}/mm.pgm" "test1.pgm" "test2.pgm") ############################################################# # Encoding