From 12d4e80f8e97b783af0683aa37ba42d1a4245b65 Mon Sep 17 00:00:00 2001
From: Aous Naman <aous72@yahoo.com>
Date: Thu, 2 Sep 2021 18:51:38 +1000
Subject: [PATCH 01/10] Added support for high throughput (HT) decoding.

---
 src/lib/openjp2/CMakeLists.txt        |    5 +-
 src/lib/openjp2/fbc_dec.c             | 2361 +++++++++++++++++++++++++
 src/lib/openjp2/j2k.c                 |    5 +-
 src/lib/openjp2/j2k.h                 |    1 +
 src/lib/openjp2/t1.c                  |   62 +-
 src/lib/openjp2/t1_generate_luts.c    |   12 +
 src/lib/openjp2/t1_ht_generate_luts.c |  967 ++++++++++
 src/lib/openjp2/t1_ht_luts.h          |  261 +++
 src/lib/openjp2/t2.c                  |   86 +-
 src/lib/openjp2/tcd.h                 |    7 +-
 10 files changed, 3723 insertions(+), 44 deletions(-)
 create mode 100644 src/lib/openjp2/fbc_dec.c
 create mode 100644 src/lib/openjp2/t1_ht_generate_luts.c
 create mode 100644 src/lib/openjp2/t1_ht_luts.h

diff --git a/src/lib/openjp2/CMakeLists.txt b/src/lib/openjp2/CMakeLists.txt
index 48259044a..86c9452ee 100644
--- a/src/lib/openjp2/CMakeLists.txt
+++ b/src/lib/openjp2/CMakeLists.txt
@@ -15,6 +15,7 @@ set(OPENJPEG_SRCS
   ${CMAKE_CURRENT_SOURCE_DIR}/bio.h
   ${CMAKE_CURRENT_SOURCE_DIR}/cio.c
   ${CMAKE_CURRENT_SOURCE_DIR}/cio.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/fbc_dec.c
   ${CMAKE_CURRENT_SOURCE_DIR}/dwt.c
   ${CMAKE_CURRENT_SOURCE_DIR}/dwt.h
   ${CMAKE_CURRENT_SOURCE_DIR}/event.c
@@ -134,9 +135,9 @@ install(
 endif()
 
 if(BUILD_LUTS_GENERATOR)
-# internal utility to generate t1_luts.h (part of the jp2 lib)
+# internal utility to generate t1_luts.h and t1_ht_luts.h (part of the jp2 lib)
 # no need to install:
-  add_executable(t1_generate_luts t1_generate_luts.c)
+  add_executable(t1_generate_luts t1_generate_luts.c t1_ht_generate_luts.c)
   if(UNIX)
     target_link_libraries(t1_generate_luts m)
   endif()
diff --git a/src/lib/openjp2/fbc_dec.c b/src/lib/openjp2/fbc_dec.c
new file mode 100644
index 000000000..30211c24f
--- /dev/null
+++ b/src/lib/openjp2/fbc_dec.c
@@ -0,0 +1,2361 @@
+//***************************************************************************/
+// This software is released under the 2-Clause BSD license, included
+// below.
+//
+// Copyright (c) 2021, Aous Naman 
+// Copyright (c) 2021, Kakadu Software Pty Ltd, Australia
+// Copyright (c) 2021, The University of New South Wales, Australia
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// 
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//***************************************************************************/
+// This file is part of the OpenJpeg software implementation.
+// File: fbc.c
+// Author: Aous Naman
+// Date: 01 September 2021
+//***************************************************************************/
+
+//***************************************************************************/
+/** @file fbc.cpp
+ *  @brief implements HTJ2K block decoder
+ */
+
+#include <assert.h>
+#include <string.h>
+#include "opj_includes.h"
+
+#include "t1_ht_luts.h"
+
+/////////////////////////////////////////////////////////////////////////////
+// compiler detection
+/////////////////////////////////////////////////////////////////////////////
+#ifdef _MSC_VER
+  #define OPJ_COMPILER_MSVC
+#elif (defined __GNUC__)
+  #define OPJ_COMPILER_GNUC
+#endif
+
+//************************************************************************/
+/** @brief Displays the error message for disabling the decoding of CUP 
+  *        pass due to insufficient precision once
+  */
+static OPJ_BOOL cannot_decode_due_to_insufficient_precision = OPJ_FALSE;
+
+//************************************************************************/
+/** @brief Displays the error message for disabling the decoding of SPP and 
+  *        MRP passes once
+  */
+static OPJ_BOOL cannot_decode_spp_mrp_msg = OPJ_FALSE;
+
+//************************************************************************/
+/** @brief Generates population count (i.e., the number of set bits)
+  *
+  *   @param [in]  val is the value for which population count is sought
+  */ 
+static inline OPJ_UINT32 population_count(OPJ_UINT32 val)
+{
+#ifdef OPJ_COMPILER_MSVC
+  return (OPJ_UINT32)__popcnt(val);
+#elif (defined OPJ_COMPILER_GNUC)
+  return (OPJ_UINT32)__builtin_popcount(val);
+#else
+  val -= ((val >> 1) & 0x55555555);
+  val = (((val >> 2) & 0x33333333) + (val & 0x33333333));
+  val = (((val >> 4) + val) & 0x0f0f0f0f);
+  val += (val >> 8);
+  val += (val >> 16);
+  return (OPJ_UINT32)(val & 0x0000003f);
+#endif
+}
+
+//************************************************************************/
+/** @brief Counts the number of leading zeros
+  *
+  *   @param [in]  val is the value for which leading zero count is sought
+  */ 
+#ifdef OPJ_COMPILER_MSVC
+  #pragma intrinsic(_BitScanReverse)
+#endif
+static inline OPJ_UINT32 count_leading_zeros(OPJ_UINT32 val)
+{
+#ifdef OPJ_COMPILER_MSVC
+  unsigned long result = 0;
+  _BitScanReverse(&result, val);
+  return 31U ^ (OPJ_UINT32)result;
+#elif (defined OPJ_COMPILER_GNUC)
+  return (OPJ_UINT32)__builtin_clz(val);
+#else
+  val |= (val >> 1);
+  val |= (val >> 2);
+  val |= (val >> 4);
+  val |= (val >> 8);
+  val |= (val >> 16);
+  return 32U - population_count(val);
+#endif
+}
+
+//************************************************************************/
+/** @brief MEL state structure for reading and decoding the MEL bitstream
+  *
+  *  A number of events is decoded from the MEL bitstream ahead of time
+  *  and stored in run/num_runs.
+  *  Each run represents the number of zero events before a one event.
+  */ 
+typedef struct dec_mel {
+  // data decoding machinary
+  OPJ_UINT8* data;  //!<the address of data (or bitstream)
+  OPJ_UINT64 tmp;   //!<temporary buffer for read data
+  int bits;         //!<number of bits stored in tmp
+  int size;         //!<number of bytes in MEL code
+  OPJ_BOOL unstuff; //!<true if the next bit needs to be unstuffed
+  int k;            //!<state of MEL decoder
+
+  // queue of decoded runs
+  int num_runs;    //!<number of decoded runs left in runs (maximum 8)
+  OPJ_UINT64 runs; //!<runs of decoded MEL codewords (7 bits/run)
+} dec_mel_t;
+
+//************************************************************************/
+/** @brief Reads and unstuffs the MEL bitstream
+  * 
+  *  This design needs more bytes in the codeblock buffer than the length
+  *  of the cleanup pass by up to 2 bytes.
+  *
+  *  Unstuffing removes the MSB of the byte following a byte whose
+  *  value is 0xFF; this prevents sequences larger than 0xFF7F in value
+  *  from appearing the bitstream.
+  *
+  *  @param [in]  melp is a pointer to dec_mel_t structure
+  */
+static inline
+void mel_read(dec_mel_t *melp)
+{
+  if (melp->bits > 32)  //there are enough bits in the tmp variable
+    return;             // return without reading new data
+  OPJ_UINT32 val = 0xFFFFFFFF;
+  //the next line (the if statement) needs to be tested first
+  //if (melp->size > 0)              // if there is data in the MEL segment
+    val = *(OPJ_UINT32*)melp->data;  // read 32 bits from MEL data
+      
+  // next we unstuff them before adding them to the buffer
+  int bits = 32 - melp->unstuff; // number of bits in val, subtract 1 if
+                                  // the previously read byte requires 
+                                  // unstuffing
+
+  // data is unstuffed and accumulated in t
+  // bits has the number of bits in t
+  OPJ_UINT32 t = (melp->size > 0) ? (val & 0xFF) : 0xFF; // feed 0xFF if the 
+                                  // MEL bitstream has been exhausted
+  if (melp->size == 1) t |= 0xF;  // if this is 1 byte before the last
+                                  // in MEL+VLC segments (remember they
+                                  // can overlap)
+  melp->data += melp->size-- > 0; // advance data by 1 byte if we have not
+                                  // reached the end of the MEL segment
+  OPJ_BOOL unstuff = ((val & 0xFF) == 0xFF); // true if the byte
+                                             // needs unstuffing
+
+  bits -= unstuff; // there is one less bit in t if unstuffing is needed
+  t = t << (8 - unstuff); // move up to make room for the next byte
+
+  //this is a repeat of the above
+  t |= (melp->size > 0) ? ((val>>8) & 0xFF) : 0xFF;
+  if (melp->size == 1) t |= 0xF;
+  melp->data += melp->size-- > 0;
+  unstuff = (((val >> 8) & 0xFF) == 0xFF);
+
+  bits -= unstuff;
+  t = t << (8 - unstuff);
+
+  t |= (melp->size > 0) ? ((val>>16) & 0xFF) : 0xFF;
+  if (melp->size == 1) t |= 0xF;
+  melp->data += melp->size-- > 0;
+  unstuff = (((val >> 16) & 0xFF) == 0xFF);
+
+  bits -= unstuff;
+  t = t << (8 - unstuff);
+
+  t |= (melp->size > 0) ? ((val>>24) & 0xFF) : 0xFF;
+  if (melp->size == 1) t |= 0xF;
+  melp->data += melp->size-- > 0;
+  melp->unstuff = (((val >> 24) & 0xFF) == 0xFF);
+
+  // move t to tmp, and push the result all the way up, so we read from
+  // the MSB
+  melp->tmp |= ((OPJ_UINT64)t) << (64 - bits - melp->bits);
+  melp->bits += bits; //increment the number of bits in tmp
+}
+
+//************************************************************************/
+/** @brief Decodes unstuffed MEL segment bits stored in tmp to runs
+  * 
+  *  Runs are stored in "runs" and the number of runs in "num_runs".
+  *  Each run represents a number of zero events that may or may not 
+  *  terminate in a 1 event.
+  *  Each run is stored in 7 bits.  The LSB is 1 if the run terminates in
+  *  a 1 event, 0 otherwise.  The next 6 bits, for the case terminating 
+  *  with 1, contain the number of consecutive 0 zero events * 2; for the 
+  *  case terminating with 0, they store (number of consecutive 0 zero 
+  *  events - 1) * 2.
+  *  A total of 6 bits (made up of 1 + 5) should have been enough.
+  *
+  *  @param [in]  melp is a pointer to dec_mel_t structure
+  */
+static inline
+void mel_decode(dec_mel_t *melp)
+{
+  static const int mel_exp[13] = { //MEL exponents
+    0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5
+  };
+
+  if (melp->bits < 6) // if there are less than 6 bits in tmp
+    mel_read(melp);   // then read from the MEL bitstream
+                      // 6 bits is the largest decodable MEL cwd
+
+  //repeat so long that there is enough decodable bits in tmp,
+  // and the runs store is not full (num_runs < 8)
+  while (melp->bits >= 6 && melp->num_runs < 8)
+  {
+    int eval = mel_exp[melp->k]; // number of bits associated with state
+    int run = 0;
+    if (melp->tmp & (1ull<<63)) //The next bit to decode (stored in MSB)
+    { //one is found
+      run = 1 << eval;  
+      run--; // consecutive runs of 0 events - 1
+      melp->k = melp->k + 1 < 12 ? melp->k + 1 : 12;//increment, max is 12
+      melp->tmp <<= 1; // consume one bit from tmp
+      melp->bits -= 1;
+      run = run << 1; // a stretch of zeros not terminating in one
+    }
+    else
+    { //0 is found
+      run = (int)(melp->tmp >> (63 - eval)) & ((1 << eval) - 1);
+      melp->k = melp->k - 1 > 0 ? melp->k - 1 : 0; //decrement, min is 0
+      melp->tmp <<= eval + 1; //consume eval + 1 bits (max is 6)
+      melp->bits -= eval + 1;
+      run = (run << 1) + 1; // a stretch of zeros terminating with one
+    }
+    eval = melp->num_runs * 7;                 // 7 bits per run
+    melp->runs &= ~((OPJ_UINT64)0x3F << eval); // 6 bits are sufficient
+    melp->runs |= ((OPJ_UINT64)run) << eval;   // store the value in runs
+    melp->num_runs++;                          // increment count  
+  }
+}
+
+//************************************************************************/
+/** @brief Initiates a dec_mel_t structure for MEL decoding and reads
+  *         some bytes in order to get the read address to a multiple
+  *         of 4 
+  *
+  *  @param [in]  melp is a pointer to dec_mel_t structure
+  *  @param [in]  bbuf is a pointer to byte buffer
+  *  @param [in]  lcup is the length of MagSgn+MEL+VLC segments
+  *  @param [in]  scup is the length of MEL+VLC segments
+  */
+static inline
+void mel_init(dec_mel_t *melp, OPJ_UINT8* bbuf, int lcup, int scup)
+{
+  melp->data = bbuf + lcup - scup; // move the pointer to the start of MEL
+  melp->bits = 0;                  // 0 bits in tmp
+  melp->tmp = 0;                   //
+  melp->unstuff = OPJ_FALSE;       // no unstuffing
+  melp->size = scup - 1;           // size is the length of MEL+VLC-1
+  melp->k = 0;                     // 0 for state 
+  melp->num_runs = 0;              // num_runs is 0
+  melp->runs = 0;                  //
+
+  //This code is borrowed; original is for a different architecture
+  //These few lines take care of the case where data is not at a multiple
+  // of 4 boundary.  It reads 1,2,3 up to 4 bytes from the MEL segment
+  int num = 4 - (int)((intptr_t)(melp->data) & 0x3);
+  for (int i = 0; i < num; ++i) { // this code is similar to mel_read
+    assert(melp->unstuff == OPJ_FALSE || melp->data[0] <= 0x8F);
+    OPJ_UINT64 d = (melp->size > 0) ? *melp->data : 0xFF; // if buffer is 
+                                                          // consumed set data 
+                                                          // to 0xFF
+    if (melp->size == 1) d |= 0xF; //if this is MEL+VLC-1, set LSBs to 0xF
+                                    // see the standard
+    melp->data += melp->size-- > 0; //increment if the end is not reached
+    int d_bits = 8 - melp->unstuff; //if unstuffing is needed, reduce by 1
+    melp->tmp = (melp->tmp << d_bits) | d; //store bits in tmp
+    melp->bits += d_bits;  //increment tmp by number of bits
+    melp->unstuff = ((d & 0xFF) == 0xFF); //true of next byte needs 
+                                          //unstuffing
+  }
+  melp->tmp <<= (64 - melp->bits); //push all the way up so the first bit
+                                    // is the MSB
+}
+
+//************************************************************************/
+/** @brief Retrieves one run from dec_mel_t; if there are no runs stored
+  *         MEL segment is decoded
+  *
+  * @param [in]  melp is a pointer to dec_mel_t structure
+  */    
+static inline
+int mel_get_run(dec_mel_t *melp)
+{
+  if (melp->num_runs == 0)  //if no runs, decode more bit from MEL segment
+    mel_decode(melp);
+
+  int t = melp->runs & 0x7F; //retrieve one run
+  melp->runs >>= 7;  // remove the retrieved run
+  melp->num_runs--;
+  return t; // return run
+}
+
+//************************************************************************/
+/** @brief A structure for reading and unstuffing a segment that grows
+  *         backward, such as VLC and MRP
+  */ 
+typedef struct rev_struct {
+  //storage
+  OPJ_UINT8* data;  //!<pointer to where to read data
+  OPJ_UINT64 tmp;	  //!<temporary buffer of read data
+  OPJ_UINT32 bits;  //!<number of bits stored in tmp
+  int size;         //!<number of bytes left
+  OPJ_BOOL unstuff; //!<true if the last byte is more than 0x8F
+                    //!<then the current byte is unstuffed if it is 0x7F
+} rev_struct_t;
+
+//************************************************************************/
+/** @brief Read and unstuff data from a backwardly-growing segment
+  *
+  *  This reader can read up to 8 bytes from before the VLC segment.
+  *  Care must be taken not read from unreadable memory, causing a 
+  *  segmentation fault.
+  * 
+  *  Note that there is another subroutine rev_read_mrp that is slightly
+  *  different.  The other one fills zeros when the buffer is exhausted.
+  *  This one basically does not care if the bytes are consumed, because
+  *  any extra data should not be used in the actual decoding.
+  *
+  *  Unstuffing is needed to prevent sequences more than 0xFF8F from 
+  *  appearing in the bits stream; since we are reading backward, we keep
+  *  watch when a value larger than 0x8F appears in the bitstream. 
+  *  If the byte following this is 0x7F, we unstuff this byte (ignore the 
+  *  MSB of that byte, which should be 0).
+  *
+  *  @param [in]  vlcp is a pointer to rev_struct_t structure
+  */
+inline void rev_read(rev_struct_t *vlcp)
+{
+  //process 4 bytes at a time
+  if (vlcp->bits > 32)  // if there are more than 32 bits in tmp, then 
+    return;             // reading 32 bits can overflow vlcp->tmp
+  OPJ_UINT32 val = 0;
+  //the next line (the if statement) needs to be tested first
+  if (vlcp->size > 0)  // if there are bytes left in the VLC segment
+  {
+    // We pad the data by 8 bytes at the beginning of the code stream 
+    // buffer
+    val = *(OPJ_UINT32*)vlcp->data; // then read 32 bits
+    vlcp->data -= 4;                // move data pointer back by 4
+    vlcp->size -= 4;                // reduce available byte by 4
+  }
+
+  //accumulate in tmp, number of bits in tmp are stored in bits
+  OPJ_UINT32 tmp = val >> 24;  //start with the MSB byte
+  OPJ_UINT32 bits;
+
+  // test unstuff (previous byte is >0x8F), and this byte is 0x7F
+  bits = 8 - ((vlcp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1 : 0);
+  OPJ_BOOL unstuff = (val >> 24) > 0x8F; //this is for the next byte
+
+  tmp |= ((val >> 16) & 0xFF) << bits; //process the next byte
+  bits += 8 - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1 : 0);
+  unstuff = ((val >> 16) & 0xFF) > 0x8F;
+
+  tmp |= ((val >> 8) & 0xFF) << bits;
+  bits += 8 - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1 : 0);
+  unstuff = ((val >> 8) & 0xFF) > 0x8F;
+
+  tmp |= (val & 0xFF) << bits;
+  bits += 8 - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1 : 0);
+  unstuff = (val & 0xFF) > 0x8F;
+
+  // now move the read and unstuffed bits into vlcp->tmp
+  vlcp->tmp |= (OPJ_UINT64)tmp << vlcp->bits;
+  vlcp->bits += bits;
+  vlcp->unstuff = unstuff; // this for the next read
+}
+
+//************************************************************************/
+/** @brief Initiates the rev_struct_t structure and reads a few bytes to 
+  *         move the read address to multiple of 4
+  *
+  *  There is another similar rev_init_mrp subroutine.  The difference is
+  *  that this one, rev_init, discards the first 12 bits (they have the
+  *  sum of the lengths of VLC and MEL segments), and first unstuff depends
+  *  on first 4 bits.
+  *
+  *  @param [in]  vlcp is a pointer to rev_struct_t structure
+  *  @param [in]  data is a pointer to byte at the start of the cleanup pass
+  *  @param [in]  lcup is the length of MagSgn+MEL+VLC segments
+  *  @param [in]  scup is the length of MEL+VLC segments
+  */
+inline void rev_init(rev_struct_t *vlcp, OPJ_UINT8* data, int lcup, int scup)
+{
+  //first byte has only the upper 4 bits
+  vlcp->data = data + lcup - 2;
+
+  //size can not be larger than this, in fact it should be smaller
+  vlcp->size = scup - 2;
+
+  OPJ_UINT32 d = *vlcp->data--; // read one byte (this is a half byte)
+  vlcp->tmp = d >> 4;           // both initialize and set
+  vlcp->bits = 4 - ((vlcp->tmp & 7) == 7); //check standard
+  vlcp->unstuff = (d | 0xF) > 0x8F; //this is useful for the next byte
+
+  //This code is designed for an architecture that read address should
+  // align to the read size (address multiple of 4 if read size is 4)
+  //These few lines take care of the case where data is not at a multiple
+  // of 4 boundary. It reads 1,2,3 up to 4 bytes from the VLC bitstream
+  int num = 1 + (int)((intptr_t)(vlcp->data) & 0x3);
+  int tnum = num < vlcp->size ? num : vlcp->size;
+  for (int i = 0; i < tnum; ++i) {
+    OPJ_UINT64 d;
+    d = *vlcp->data--;  // read one byte and move read pointer
+    //check if the last byte was >0x8F (unstuff == true) and this is 0x7F
+    OPJ_UINT32 d_bits = 8 - ((vlcp->unstuff && ((d & 0x7F) == 0x7F)) ? 1 : 0);
+    vlcp->tmp |= d << vlcp->bits; // move data to vlcp->tmp
+    vlcp->bits += d_bits;
+    vlcp->unstuff = d > 0x8F; // for next byte
+  }
+  vlcp->size -= tnum;
+  vlcp->data -= 3; // make ready to read 32 bits (address multiple of 4)
+  rev_read(vlcp);  // read another 32 buts
+}
+
+//************************************************************************/
+/** @brief Retrieves 32 bits from the head of a rev_struct structure 
+  *
+  *  By the end of this call, vlcp->tmp must have no less than 33 bits
+  *
+  *  @param [in]  vlcp is a pointer to rev_struct structure
+  */
+inline OPJ_UINT32 rev_fetch(rev_struct_t *vlcp)
+{
+  if (vlcp->bits < 32)  // if there are less then 32 bits, read more
+  {
+    rev_read(vlcp);     // read 32 bits, but unstuffing might reduce this
+    if (vlcp->bits < 32)// if there is still space in vlcp->tmp for 32 bits
+      rev_read(vlcp);   // read another 32
+  }
+  return (OPJ_UINT32)vlcp->tmp; // return the head (bottom-most) of vlcp->tmp
+}
+
+//************************************************************************/
+/** @brief Consumes num_bits from a rev_struct structure
+  *
+  *  @param [in]  vlcp is a pointer to rev_struct structure
+  *  @param [in]  num_bits is the number of bits to be removed
+  */
+inline OPJ_UINT32 rev_advance(rev_struct_t *vlcp, OPJ_UINT32 num_bits)
+{
+  assert(num_bits <= vlcp->bits); // vlcp->tmp must have more than num_bits
+  vlcp->tmp >>= num_bits;         // remove bits
+  vlcp->bits -= num_bits;         // decrement the number of bits
+  return (OPJ_UINT32)vlcp->tmp;
+}
+
+//************************************************************************/
+/** @brief Reads and unstuffs from rev_struct
+  *
+  *  This is different than rev_read in that this fills in zeros when the
+  *  the available data is consumed.  The other does not care about the
+  *  values when all data is consumed.
+  *
+  *  See rev_read for more information about unstuffing
+  *
+  *  @param [in]  mrp is a pointer to rev_struct structure
+  */
+inline void rev_read_mrp(rev_struct_t *mrp)
+{
+  //process 4 bytes at a time
+  if (mrp->bits > 32)
+    return;
+  OPJ_UINT32 val = 0;
+  //the next line (the if statement) needs to be tested first
+  //notice that second line can be simplified to mrp->data -= 4
+  // if (mrp->size > 0)
+  {
+    val = *(OPJ_UINT32*)mrp->data;      // read 32 bits
+    mrp->data -= mrp->size > 0 ? 4 : 0; // move back read pointer only if 
+                                        // there is data
+  }
+
+  //accumulate in tmp, and keep count in bits
+  OPJ_UINT32 tmp = (mrp->size-- > 0) ? (val >> 24) : 0; // fill zeros if all 
+  OPJ_UINT32 bits;                                      // bytes are used
+  //test if the last byte > 0x8F (unstuff must be true) and this is 0x7F
+  bits = 8 - ((mrp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1 : 0);
+  OPJ_BOOL unstuff = (val >> 24) > 0x8F;
+
+  //process the next byte
+  tmp |= (mrp->size-- > 0) ? (((val >> 16) & 0xFF) << bits) : 0;
+  bits += 8 - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1 : 0);
+  unstuff = ((val >> 16) & 0xFF) > 0x8F;
+
+  tmp |= (mrp->size-- > 0) ? (((val >> 8) & 0xFF) << bits) : 0;
+  bits += 8 - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1 : 0);
+  unstuff = ((val >> 8) & 0xFF) > 0x8F;
+
+  tmp |= (mrp->size-- > 0) ? ((val & 0xFF) << bits) : 0;
+  bits += 8 - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1 : 0);
+  unstuff = (val & 0xFF) > 0x8F;
+
+  mrp->tmp |= (OPJ_UINT64)tmp << mrp->bits; // move data to mrp pointer
+  mrp->bits += bits;
+  mrp->unstuff = unstuff;                   // next byte
+}
+
+//************************************************************************/
+/** @brief Initialized rev_struct structure for MRP segment, and reads
+  *         a number of bytes such that the next 32 bits read are from
+  *         an address that is a multiple of 4. Note this is designed for
+  *         an architecture that read size must be compatible with the
+  *         alignment of the read address
+  *
+  *  There is another simiar subroutine rev_init.  This subroutine does 
+  *  NOT skip the first 12 bits, and starts with unstuff set to true.
+  *
+  *  @param [in]  mrp is a pointer to rev_struct structure
+  *  @param [in]  data is a pointer to byte at the start of the cleanup pass
+  *  @param [in]  lcup is the length of MagSgn+MEL+VLC segments
+  *  @param [in]  len2 is the length of SPP+MRP segments
+  */
+inline void rev_init_mrp(rev_struct_t *mrp, OPJ_UINT8* data, int lcup, 
+                         int len2)
+{
+  mrp->data = data + lcup + len2 - 1;
+  mrp->size = len2;
+  mrp->unstuff = OPJ_TRUE;
+  mrp->bits = 0;
+  mrp->tmp = 0;
+
+  //This code is designed for an architecture that read address should
+  // align to the read size (address multiple of 4 if read size is 4)
+  //These few lines take care of the case where data is not at a multiple
+  // of 4 boundary.  It reads 1,2,3 up to 4 bytes from the MRP stream
+  int num = 1 + (int)((intptr_t)(mrp->data) & 0x3);
+  for (int i = 0; i < num; ++i) {
+    OPJ_UINT64 d;
+    //read a byte, 0 if no more data
+    d = (mrp->size-- > 0) ? *mrp->data-- : 0; 
+    //check if unstuffing is needed
+    OPJ_UINT32 d_bits = 8 - ((mrp->unstuff && ((d & 0x7F) == 0x7F)) ? 1 : 0);
+    mrp->tmp |= d << mrp->bits; // move data to vlcp->tmp
+    mrp->bits += d_bits;
+    mrp->unstuff = d > 0x8F; // for next byte
+  }
+  mrp->data -= 3; //make ready to read a 32 bits
+  rev_read_mrp(mrp);
+}
+
+//************************************************************************/
+/** @brief Retrieves 32 bits from the head of a rev_struct structure 
+  *
+  *  By the end of this call, mrp->tmp must have no less than 33 bits
+  *
+  *  @param [in]  mrp is a pointer to rev_struct structure
+  */
+inline OPJ_UINT32 rev_fetch_mrp(rev_struct_t *mrp)
+{
+  if (mrp->bits < 32) // if there are less than 32 bits in mrp->tmp
+  {
+    rev_read_mrp(mrp);    // read 30-32 bits from mrp
+    if (mrp->bits < 32)   // if there is a space of 32 bits
+      rev_read_mrp(mrp);  // read more
+  }
+  return (OPJ_UINT32)mrp->tmp;  // return the head of mrp->tmp
+}
+
+//************************************************************************/
+/** @brief Consumes num_bits from a rev_struct structure
+  *
+  *  @param [in]  mrp is a pointer to rev_struct structure
+  *  @param [in]  num_bits is the number of bits to be removed
+  */
+inline OPJ_UINT32 rev_advance_mrp(rev_struct_t *mrp, OPJ_UINT32 num_bits)
+{
+  assert(num_bits <= mrp->bits); // we must not consume more than mrp->bits
+  mrp->tmp >>= num_bits;         // discard the lowest num_bits bits
+  mrp->bits -= num_bits;
+  return (OPJ_UINT32)mrp->tmp;   // return data after consumption
+}
+
+//************************************************************************/
+/** @brief Decode initial UVLC to get the u value (or u_q)
+  *
+  *  @param [in]  vlc is the head of the VLC bitstream
+  *  @param [in]  mode is 0, 1, 2, 3, or 4. Values in 0 to 3 are composed of
+  *               u_off of 1st quad and 2nd quad of a quad pair.  The value
+  *               4 occurs when both bits are 1, and the event decoded
+  *               from MEL bitstream is also 1.
+  *  @param [out] u is the u value (or u_q) + 1.  Note: we produce u + 1;
+  *               this value is a partial calculation of u + kappa.
+  */
+inline OPJ_UINT32 decode_init_uvlc(OPJ_UINT32 vlc, OPJ_UINT32 mode, 
+                                   OPJ_UINT32 *u)
+{
+  //table stores possible decoding three bits from vlc
+  // there are 8 entries for xx1, x10, 100, 000, where x means do not care
+  // table value is made up of
+  // 2 bits in the LSB for prefix length 
+  // 3 bits for suffix length
+  // 3 bits in the MSB for prefix value (u_pfx in Table 3 of ITU T.814)
+  static const OPJ_UINT8 dec[8] = { // the index is the prefix codeword
+    3 | (5 << 2) | (5 << 5),        //000 == 000, prefix codeword "000"
+    1 | (0 << 2) | (1 << 5),        //001 == xx1, prefix codeword "1"
+    2 | (0 << 2) | (2 << 5),        //010 == x10, prefix codeword "01"
+    1 | (0 << 2) | (1 << 5),        //011 == xx1, prefix codeword "1"
+    3 | (1 << 2) | (3 << 5),        //100 == 100, prefix codeword "001"
+    1 | (0 << 2) | (1 << 5),        //101 == xx1, prefix codeword "1"
+    2 | (0 << 2) | (2 << 5),        //110 == x10, prefix codeword "01"
+    1 | (0 << 2) | (1 << 5)         //111 == xx1, prefix codeword "1"
+  };
+
+  OPJ_UINT32 consumed_bits = 0;
+  if (mode == 0)  // both u_off are 0
+  {
+    u[0] = u[1] = 1; //Kappa is 1 for initial line
+  }
+  else if (mode <= 2) // u_off are either 01 or 10
+  {
+    OPJ_UINT32 d = dec[vlc & 0x7];   //look at the least significant 3 bits
+    vlc >>= d & 0x3;                 //prefix length
+    consumed_bits += d & 0x3; 
+
+    OPJ_UINT32 suffix_len = ((d >> 2) & 0x7); 
+    consumed_bits += suffix_len;
+
+    d = (d >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+    u[0] = (mode == 1) ? d + 1 : 1; // kappa is 1 for initial line
+    u[1] = (mode == 1) ? 1 : d + 1; // kappa is 1 for initial line
+  }
+  else if (mode == 3) // both u_off are 1, and MEL event is 0
+  {
+    OPJ_UINT32 d1 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+    vlc >>= d1 & 0x3;                // Consume bits
+    consumed_bits += d1 & 0x3;
+
+    if ((d1 & 0x3) > 2)
+    {
+      //u_{q_2} prefix
+      u[1] = (vlc & 1) + 1 + 1; //Kappa is 1 for initial line
+      ++consumed_bits;
+      vlc >>= 1;
+
+      OPJ_UINT32 suffix_len = ((d1 >> 2) & 0x7);
+      consumed_bits += suffix_len;
+      d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+      u[0] = d1 + 1; //Kappa is 1 for initial line
+    }
+    else
+    {
+      OPJ_UINT32 d2 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+      vlc >>= d2 & 0x3;                // Consume bits
+      consumed_bits += d2 & 0x3;
+
+      OPJ_UINT32 suffix_len = ((d1 >> 2) & 0x7);
+      consumed_bits += suffix_len;
+
+      d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+      u[0] = d1 + 1; //Kappa is 1 for initial line
+      vlc >>= suffix_len;
+
+      suffix_len = ((d2 >> 2) & 0x7);
+      consumed_bits += suffix_len;
+
+      d2 = (d2 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+      u[1] = d2 + 1; //Kappa is 1 for initial line
+    }
+  }
+  else if (mode == 4) // both u_off are 1, and MEL event is 1
+  {
+    OPJ_UINT32 d1 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+    vlc >>= d1 & 0x3;                // Consume bits
+    consumed_bits += d1 & 0x3;
+
+    OPJ_UINT32 d2 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+    vlc >>= d2 & 0x3;                // Consume bits
+    consumed_bits += d2 & 0x3;
+
+    OPJ_UINT32 suffix_len = ((d1 >> 2) & 0x7);
+    consumed_bits += suffix_len;
+
+    d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+    u[0] = d1 + 3; // add 2+kappa
+    vlc >>= suffix_len;
+
+    suffix_len = ((d2 >> 2) & 0x7);
+    consumed_bits += suffix_len;
+
+    d2 = (d2 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+    u[1] = d2 + 3; // add 2+kappa
+  }
+  return consumed_bits;
+}
+
+//************************************************************************/
+/** @brief Decode non-initial UVLC to get the u value (or u_q)
+  *
+  *  @param [in]  vlc is the head of the VLC bitstream
+  *  @param [in]  mode is 0, 1, 2, or 3. The 1st bit is u_off of 1st quad 
+  *               and 2nd for 2nd quad of a quad pair
+  *  @param [out] u is the u value (or u_q) + 1.  Note: we produce u + 1;
+  *               this value is a partial calculation of u + kappa.
+  */
+inline OPJ_UINT32 decode_noninit_uvlc(OPJ_UINT32 vlc, OPJ_UINT32 mode, 
+                                      OPJ_UINT32 *u)
+{
+  //table stores possible decoding three bits from vlc
+  // there are 8 entries for xx1, x10, 100, 000, where x means do not care
+  // table value is made up of
+  // 2 bits in the LSB for prefix length 
+  // 3 bits for suffix length
+  // 3 bits in the MSB for prefix value (u_pfx in Table 3 of ITU T.814)
+  static const OPJ_UINT8 dec[8] = {
+    3 | (5 << 2) | (5 << 5), //000 == 000, prefix codeword "000"
+    1 | (0 << 2) | (1 << 5), //001 == xx1, prefix codeword "1"
+    2 | (0 << 2) | (2 << 5), //010 == x10, prefix codeword "01"
+    1 | (0 << 2) | (1 << 5), //011 == xx1, prefix codeword "1"
+    3 | (1 << 2) | (3 << 5), //100 == 100, prefix codeword "001"
+    1 | (0 << 2) | (1 << 5), //101 == xx1, prefix codeword "1"
+    2 | (0 << 2) | (2 << 5), //110 == x10, prefix codeword "01"
+    1 | (0 << 2) | (1 << 5)  //111 == xx1, prefix codeword "1"
+  };
+
+  OPJ_UINT32 consumed_bits = 0;
+  if (mode == 0)
+  {
+    u[0] = u[1] = 1; //for kappa
+  }
+  else if (mode <= 2) //u_off are either 01 or 10
+  {
+    OPJ_UINT32 d = dec[vlc & 0x7];  //look at the least significant 3 bits
+    vlc >>= d & 0x3;                //prefix length
+    consumed_bits += d & 0x3;
+
+    OPJ_UINT32 suffix_len = ((d >> 2) & 0x7);
+    consumed_bits += suffix_len;
+
+    d = (d >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+    u[0] = (mode == 1) ? d + 1 : 1; //for kappa
+    u[1] = (mode == 1) ? 1 : d + 1; //for kappa
+  }
+  else if (mode == 3) // both u_off are 1
+  {
+    OPJ_UINT32 d1 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+    vlc >>= d1 & 0x3;                // Consume bits
+    consumed_bits += d1 & 0x3;
+
+    OPJ_UINT32 d2 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+    vlc >>= d2 & 0x3;                // Consume bits
+    consumed_bits += d2 & 0x3;
+
+    OPJ_UINT32 suffix_len = ((d1 >> 2) & 0x7);
+    consumed_bits += suffix_len;
+
+    d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+    u[0] = d1 + 1;  //1 for kappa
+    vlc >>= suffix_len;
+
+    suffix_len = ((d2 >> 2) & 0x7);
+    consumed_bits += suffix_len;
+
+    d2 = (d2 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+    u[1] = d2 + 1;  //1 for kappa
+  }
+  return consumed_bits;
+}
+
+//************************************************************************/
+/** @brief State structure for reading and unstuffing of forward-growing 
+  *         bitstreams; these are: MagSgn and SPP bitstreams
+  */
+typedef struct frwd_struct {
+  const OPJ_UINT8* data; //!<pointer to bitstream
+  OPJ_UINT64 tmp;        //!<temporary buffer of read data
+  OPJ_UINT32 bits;       //!<number of bits stored in tmp
+  OPJ_BOOL unstuff;      //!<true if a bit needs to be unstuffed from next byte
+  int size;              //!<size of data
+  OPJ_UINT32 X;          //!<0 or 0xFF, X's are inserted at end of bitstream
+} frwd_struct_t;
+
+//************************************************************************/
+/** @brief Read and unstuffs 32 bits from forward-growing bitstream
+  *  
+  *  A subroutine to read from both the MagSgn or SPP bitstreams; 
+  *  in particular, when MagSgn bitstream is consumed, 0xFF's are fed, 
+  *  while when SPP is exhausted 0's are fed in.
+  *  X controls this value.
+  *
+  *  Unstuffing prevent sequences that are more than 0xFF7F from appearing
+  *  in the conpressed sequence.  So whenever a value of 0xFF is coded, the
+  *  MSB of the next byte is set 0 and must be ignored during decoding.
+  *
+  *  Reading can go beyond the end of buffer by up to 3 bytes.
+  *
+  *  @param  [in]  msp is a pointer to frwd_struct_t structure
+  *
+  */ 
+void frwd_read(frwd_struct_t *msp)
+{
+  assert(msp->bits <= 32); // assert that there is a space for 32 bits
+
+  OPJ_UINT32 val;
+  val = *(OPJ_UINT32*)msp->data;      // read 32 bits
+  msp->data += msp->size > 0 ? 4 : 0; // move pointer if data is not 
+                                      // exhausted
+
+  // we accumulate in t and keep a count of the number of bits in bits
+  OPJ_UINT32 bits = 8 - msp->unstuff;     // if previous byte was 0xFF
+  // get next byte, if bitstream is exhausted, replace it with X
+  OPJ_UINT32 t = msp->size-- > 0 ? (val & 0xFF) : msp->X;
+  OPJ_BOOL unstuff = ((val & 0xFF) == 0xFF);  // Do we need unstuffing next?
+
+  t |= (msp->size-- > 0 ? ((val >> 8) & 0xFF) : msp->X) << bits;
+  bits += 8 - unstuff;
+  unstuff = (((val >> 8) & 0xFF) == 0xFF);
+
+  t |= (msp->size-- > 0 ? ((val >> 16) & 0xFF) : msp->X) << bits;
+  bits += 8 - unstuff;
+  unstuff = (((val >> 16) & 0xFF) == 0xFF);
+
+  t |= (msp->size-- > 0 ? ((val >> 24) & 0xFF) : msp->X) << bits;
+  bits += 8 - unstuff;
+  msp->unstuff = (((val >> 24) & 0xFF) == 0xFF); // for next byte
+
+  msp->tmp |= ((OPJ_UINT64)t) << msp->bits;  // move data to msp->tmp
+  msp->bits += bits;
+}
+
+//************************************************************************/
+/** @brief Initialize frwd_struct_t struct and reads some bytes
+  *  
+  *  @param [in]  msp is a pointer to frwd_struct_t
+  *  @param [in]  data is a pointer to the start of data
+  *  @param [in]  size is the number of byte in the bitstream
+  *  @param [in]  X is the value fed in when the bitstream is exhausted.
+  *               See frwd_read.
+  */
+void frwd_init(frwd_struct_t *msp, const OPJ_UINT8* data, int size, 
+               OPJ_UINT32 X)
+{
+  msp->data = data;
+  msp->tmp = 0;
+  msp->bits = 0;
+  msp->unstuff = OPJ_FALSE;
+  msp->size = size;
+  msp->X = X;
+  assert(msp->X == 0 || msp->X == 0xFF);
+
+  //This code is designed for an architecture that read address should
+  // align to the read size (address multiple of 4 if read size is 4)
+  //These few lines take care of the case where data is not at a multiple
+  // of 4 boundary.  It reads 1,2,3 up to 4 bytes from the bitstream
+  int num = 4 - (int)((intptr_t)(msp->data) & 0x3);
+  for (int i = 0; i < num; ++i)
+  {
+    OPJ_UINT64 d;
+    //read a byte if the buffer is not exhausted, otherwise set it to X
+    d = msp->size-- > 0 ? *msp->data++ : msp->X;
+    msp->tmp |= (d << msp->bits);      // store data in msp->tmp
+    msp->bits += 8 - msp->unstuff;     // number of bits added to msp->tmp
+    msp->unstuff = ((d & 0xFF) == 0xFF); // unstuffing for next byte
+  }
+  frwd_read(msp); // read 32 bits more
+}
+
+//************************************************************************/
+/** @brief Consume num_bits bits from the bitstream of frwd_struct_t
+  *
+  *  @param [in]  msp is a pointer to frwd_struct_t
+  *  @param [in]  num_bits is the number of bit to consume
+  */
+inline void frwd_advance(frwd_struct_t *msp, OPJ_UINT32 num_bits)
+{
+  assert(num_bits <= msp->bits);
+  msp->tmp >>= num_bits;  // consume num_bits
+  msp->bits -= num_bits;
+}
+
+//************************************************************************/
+/** @brief Fetches 32 bits from the frwd_struct_t bitstream
+  *
+  *  @param [in]  msp is a pointer to frwd_struct_t
+  */
+OPJ_UINT32 frwd_fetch(frwd_struct_t *msp)
+{
+  if (msp->bits < 32)
+  {
+    frwd_read(msp);
+    if (msp->bits < 32) //need to test
+      frwd_read(msp);
+  }
+  return (OPJ_UINT32)msp->tmp;
+}
+
+//************************************************************************/
+/** @brief Allocates T1 buffers
+  */
+static OPJ_BOOL opj_t1_allocate_buffers(
+    opj_t1_t *t1,
+    OPJ_UINT32 w,
+    OPJ_UINT32 h)
+{
+    OPJ_UINT32 flagssize;
+    OPJ_UINT32 flags_stride;
+
+    /* No risk of overflow. Prior checks ensure those assert are met */
+    /* They are per the specification */
+    assert(w <= 1024);
+    assert(h <= 1024);
+    assert(w * h <= 4096);
+
+    /* encoder uses tile buffer, so no need to allocate */
+    {
+        OPJ_UINT32 datasize = w * h;
+
+        if (datasize > t1->datasize) {
+            opj_aligned_free(t1->data);
+            t1->data = (OPJ_INT32*) 
+              opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
+            if (!t1->data) {
+                /* FIXME event manager error callback */
+                return OPJ_FALSE;
+            }
+            t1->datasize = datasize;
+        }
+        /* memset first arg is declared to never be null by gcc */
+        if (t1->data != NULL) {
+            memset(t1->data, 0, datasize * sizeof(OPJ_INT32));
+        }
+    }
+
+    flags_stride = 0; // not used
+
+
+    // We expand these buffers to multiples of 16 bytes.
+    // We need 4 buffers of 129 integers each, expanded to 132 integers each
+    // We also need 514 bytes of buffer, expanded to 528 bytes
+    flagssize = 132U * sizeof(OPJ_UINT32) * 4U; // expanded to multiple of 16
+    flagssize += 528U; // 514 expanded to multiples of 16
+
+    {
+        if (flagssize > t1->flagssize) {
+
+            opj_aligned_free(t1->flags);
+            t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize);
+            if (!t1->flags) {
+                /* FIXME event manager error callback */
+                return OPJ_FALSE;
+            }
+        }
+        t1->flagssize = flagssize;
+
+        memset(t1->flags, 0, flagssize);
+    }
+
+    t1->w = w;
+    t1->h = h;
+
+    return OPJ_TRUE;
+}
+
+//************************************************************************/
+/** @brief Decodes one codeblock, processing the cleanup, siginificance
+  *         propagation, and magnitude refinement pass
+  */
+OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
+                               opj_tcd_cblk_dec_t* cblk,
+                               OPJ_UINT32 orient,
+                               OPJ_UINT32 roishift,
+                               OPJ_UINT32 cblksty,
+                               opj_event_mgr_t *p_manager,
+                               opj_mutex_t* p_manager_mutex,
+                               OPJ_BOOL check_pterm)
+{
+  // We ignor orient, because the same decoder is used for all subbands
+  // We also ignore check_pterm, because I am not sure how it applies
+  assert(cblksty == 0x40); // that is the only support mode
+  if (roishift != 0) {
+    if (p_manager_mutex)
+      opj_mutex_lock(p_manager_mutex);
+    opj_event_msg(p_manager, EVT_ERROR, "We do not support ROI in decoding "
+                            "HT codeblocks\n");
+    if (p_manager_mutex)
+      opj_mutex_unlock(p_manager_mutex);
+    return OPJ_FALSE;
+  }
+
+  OPJ_BYTE* cblkdata = NULL;
+
+  if (!opj_t1_allocate_buffers(
+              t1,
+              (OPJ_UINT32)(cblk->x1 - cblk->x0),
+              (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
+      return OPJ_FALSE;
+  }
+
+  /* Even if we have a single chunk, in multi-threaded decoding */
+  /* the insertion of our synthetic marker might potentially override */
+  /* valid codestream of other codeblocks decoded in parallel. */
+  if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
+      OPJ_UINT32 i;
+      OPJ_UINT32 cblk_len;
+
+      /* Compute whole codeblock length from chunk lengths */
+      cblk_len = 0;
+      for (i = 0; i < cblk->numchunks; i++) {
+          cblk_len += cblk->chunks[i].len;
+      }
+
+      /* Allocate temporary memory if needed */
+      if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
+          cblkdata = (OPJ_BYTE*)opj_realloc(
+              t1->cblkdatabuffer, cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
+          if (cblkdata == NULL) {
+              return OPJ_FALSE;
+          }
+          t1->cblkdatabuffer = cblkdata;
+          memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
+          t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
+      }
+
+      /* Concatenate all chunks */
+      cblkdata = t1->cblkdatabuffer;
+      cblk_len = 0;
+      for (i = 0; i < cblk->numchunks; i++) {
+          memcpy(cblkdata+cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
+          cblk_len += cblk->chunks[i].len;
+      }
+  } else if (cblk->numchunks == 1) {
+      cblkdata = cblk->chunks[0].data;
+  } else {
+      /* Not sure if that can happen in practice, but avoid Coverity to */
+      /* think we will dereference a null cblkdta pointer */
+      return OPJ_TRUE;
+  }
+
+  // coded_data is a pointer to bitstream
+  OPJ_UINT8* coded_data = cblkdata;
+  // decoded_data is a pointer to decoded codeblock data buf.
+  OPJ_UINT32* decoded_data = t1->data;
+  // num_passes is the number of passes: 1 if CUP only, 2 for CUP+SPP, and 
+  // 3 for CUP+SPP+MRP
+  OPJ_UINT32 num_passes = cblk->numsegs>0 ? cblk->segs[0].real_num_passes : 0;
+  num_passes += cblk->numsegs>1 ? cblk->segs[1].real_num_passes : 0;
+  // lengths1 is the length of cleanup pass
+  OPJ_UINT32 lengths1 = num_passes > 0 ? cblk->segs[0].len : 0;
+  // lengths2 is the length of refinement passes (either SPP only or SPP+MRP)
+  OPJ_UINT32 lengths2 = num_passes > 1 ? cblk->segs[1].len : 0;
+  // width is the decoded codeblock width 
+  OPJ_UINT32 width = cblk->x1 - cblk->x0;
+  // height is the decoded codeblock height
+  OPJ_UINT32 height = cblk->y1 - cblk->y0;
+  // stride is the decoded codeblock buffer stride 
+  OPJ_UINT32 stride = width;
+
+   /*  sigma1 and sigma2 contains significant (i.e., non-zero) pixel 
+    *  locations.  The buffers are used interchangeably, because we need
+    *  more than 4 rows of significance information at a given time.
+    *  Each 32 bits contain significance information for 4 rows of 8 
+    *  columns each.  If we denote 32 bits by 0xaaaaaaaa, the each "a" is
+    *  called a nibble and has significance information for 4 rows.
+    *  The least significant nibble has information for the first column,
+    *  and so on. The nibble's LSB is for the first row, and so on.
+    *  Since, at most, we can have 1024 columns in a quad, we need 128
+    *  entries; we added 1 for convenience when propagation of signifcance
+    *  goes outside the structure
+    *  To work in OpenJPEG these buffers has been expanded to 132.
+    */
+  OPJ_UINT32 *pflags = (OPJ_UINT32 *)t1->flags;
+  OPJ_UINT32 *sigma1 = pflags, *sigma2 = sigma1 + 132;
+  // mbr arrangement is similar to sigma; mbr contains locations 
+  // that become significant during significance propagation pass
+  OPJ_UINT32 *mbr1 = sigma2 + 132, *mbr2 = mbr1 + 132;
+  //a pointer to sigma
+  OPJ_UINT32 *sip = sigma1; //pointers to arrays to be used interchangeably
+  OPJ_UINT32 sip_shift = 0; //the amount of shift needed for sigma
+
+  if (num_passes > 1 && lengths2 == 0)
+  {
+    if (p_manager_mutex)
+      opj_mutex_lock(p_manager_mutex);
+    opj_event_msg(p_manager, EVT_WARNING, "A malformed codeblock that has "
+                  "more than one coding pass, but zero length for "
+                  "2nd and potential 3rd pass.\n");
+    if (p_manager_mutex)
+      opj_mutex_unlock(p_manager_mutex);
+    num_passes = 1;
+  }
+  if (num_passes > 3)
+  {
+    if (p_manager_mutex)
+      opj_mutex_lock(p_manager_mutex);
+    opj_event_msg(p_manager, EVT_WARNING, "We do not support more than 3 "
+                            "coding passes; This codeblocks has %d passes.\n",
+                            num_passes);
+    if (p_manager_mutex)
+      opj_mutex_unlock(p_manager_mutex);
+    return OPJ_FALSE;
+  }
+
+  if (cblk->numbps == 1 && num_passes > 1)
+    {
+      // We do not have enough precision to decode SgnProp nor MagRef passes.
+      // We decode the cleanup passes only
+      if (cannot_decode_spp_mrp_msg == OPJ_FALSE) {
+        if (p_manager_mutex)
+          opj_mutex_lock(p_manager_mutex);
+        cannot_decode_spp_mrp_msg = OPJ_TRUE;
+        opj_event_msg(p_manager, EVT_WARNING, "Not enough precision to decode "
+                                "the SgnProp nor MagRef passes.  This message "
+                                "will not be displayed again.\n");
+        if (p_manager_mutex)
+          opj_mutex_unlock(p_manager_mutex);
+      }
+      num_passes = 1;
+    }
+  if (cblk->numbps == 0)
+    {
+      // We do not have enough precision to decode the CUP pass with the 
+      // center of bin bit set.  The code can be modified to support this 
+      // case, without using the center of the bin.
+      if (cannot_decode_due_to_insufficient_precision == OPJ_FALSE) {
+        if (p_manager_mutex)
+          opj_mutex_lock(p_manager_mutex);
+        cannot_decode_due_to_insufficient_precision = OPJ_TRUE;
+        opj_event_msg(p_manager, EVT_WARNING, "Not enough precision to decode "
+                                "the cleanup pass. The code should be "
+                                "modified to support this case. This message "
+                                "will not be displayed again.\n");
+        if (p_manager_mutex)
+          opj_mutex_unlock(p_manager_mutex);
+      }
+      return OPJ_TRUE;
+    }
+
+  OPJ_UINT32 p = cblk->numbps; 
+  // zero planes plus 1
+  OPJ_UINT32 zero_planes_p1 = cblk->Mb - cblk->numbps + 1;
+
+  // read scup and fix the bytes there
+  int lcup, scup;
+  lcup = (int)lengths1;  // length of CUP
+  //scup is the length of MEL + VLC
+  scup = (((int)coded_data[lcup-1]) << 4) + (coded_data[lcup-2] & 0xF);
+  if (scup < 2 || scup > lcup || scup > 4079) //something is wrong
+    return OPJ_FALSE;
+
+  // init structures
+  dec_mel_t mel;
+  mel_init(&mel, coded_data, lcup, scup);
+  rev_struct_t vlc;
+  rev_init(&vlc, coded_data, lcup, scup);
+  frwd_struct_t magsgn;
+  frwd_init(&magsgn, coded_data, lcup - scup, 0xFF);
+  frwd_struct_t sigprop;
+  if (num_passes > 1) // needs to be tested
+    frwd_init(&sigprop, coded_data + lengths1, (int)lengths2, 0);
+  rev_struct_t magref;
+  if (num_passes > 2)
+    rev_init_mrp(&magref, coded_data, (int)lengths1, (int)lengths2);
+
+  /** State storage
+    *  One byte per quad; for 1024 columns, or 512 quads, we need
+    *  512 bytes. We are using 2 extra bytes one on the left and one on
+    *  the right for convenience.
+    *
+    *  The MSB bit in each byte is (\sigma^nw | \sigma^n), and the 7 LSBs
+    *  contain max(E^nw | E^n)
+    */
+
+  // 514 is enough for a block width of 1024, +2 extra
+  // here expanded to 528
+  OPJ_UINT8 *lsp, *line_state = (OPJ_UINT8 *)(mbr2 + 132); 
+
+  //initial 2 lines
+  /////////////////
+  lsp = line_state;              // point to line state
+  lsp[0] = 0;                    // for initial row of quad, we set to 0
+  int run = mel_get_run(&mel);   // decode runs of events from MEL bitstrm
+                                 // data represented as runs of 0 events
+                                 // See mel_decode description
+  OPJ_UINT32 vlc_val;            // fetched data from VLC bitstream
+  OPJ_UINT32 qinf[2] = { 0 };    // quad info decoded from VLC bitstream
+  OPJ_UINT32 c_q = 0;            // context for quad q
+  OPJ_UINT32* sp = decoded_data; // decoded codeblock samples
+
+  for (OPJ_UINT32 x = 0; x < width; x += 4) // one iteration per quad pair
+  {
+    // decode VLC
+    /////////////
+
+    //first quad
+    // Get the head of the VLC bitstream. One fetch is enough for two 
+    // quads, since the largest VLC code is 7 bits, and maximum number of 
+    // bits used for u is 8.  Therefore for two quads we need 30 bits 
+    // (if we include unstuffing, then 32 bits are enough, since we have 
+    // a maximum of one stuffing per two bytes)
+    vlc_val = rev_fetch(&vlc);
+
+    //decode VLC using the context c_q and the head of the VLC bitstream
+    qinf[0] = vlc_tbl0[ (c_q << 7) | (vlc_val & 0x7F) ];
+
+    if (c_q == 0) // if zero context, we need to use one MEL event
+    {
+      run -= 2; //the number of 0 events is multiplied by 2, so subtract 2
+
+      // Is the run terminated in 1? if so, use decoded VLC code, 
+      // otherwise, discard decoded data, since we will decoded again 
+      // using a different context
+      qinf[0] = (run == -1) ? qinf[0] : 0;
+
+      // is run -1 or -2? this means a run has been consumed
+      if (run < 0) 
+        run = mel_get_run(&mel);  // get another run
+    }
+
+    // prepare context for the next quad; eqn. 1 in ITU T.814
+    c_q = ((qinf[0] & 0x10) >> 4) | ((qinf[0] & 0xE0) >> 5);
+
+    //remove data from vlc stream (0 bits are removed if qinf is not used)
+    vlc_val = rev_advance(&vlc, qinf[0] & 0x7);
+
+    //update sigma
+    // The update depends on the value of x; consider one OPJ_UINT32
+    // if x is 0, 8, 16 and so on, then this line update c locations
+    //      nibble (4 bits) number   0 1 2 3 4 5 6 7
+    //                         LSB   c c 0 0 0 0 0 0 
+    //                               c c 0 0 0 0 0 0
+    //                               0 0 0 0 0 0 0 0
+    //                               0 0 0 0 0 0 0 0
+    // if x is 4, 12, 20, then this line update locations c
+    //      nibble (4 bits) number   0 1 2 3 4 5 6 7
+    //                         LSB   0 0 0 0 c c 0 0 
+    //                               0 0 0 0 c c 0 0
+    //                               0 0 0 0 0 0 0 0
+    //                               0 0 0 0 0 0 0 0
+    *sip |= (((qinf[0] & 0x30)>>4) | ((qinf[0] & 0xC0)>>2)) << sip_shift;
+
+    //second quad
+    qinf[1] = 0;
+    if (x + 2 < width) // do not run if codeblock is narrower
+    {
+      //decode VLC using the context c_q and the head of the VLC bitstream
+      qinf[1] = vlc_tbl0[(c_q << 7) | (vlc_val & 0x7F)]; 
+
+      // if context is zero, use one MEL event
+      if (c_q == 0) //zero context
+      {
+        run -= 2; //subtract 2, since events number if multiplied by 2
+
+        // if event is 0, discard decoded qinf
+        qinf[1] = (run == -1) ? qinf[1] : 0;
+
+        if (run < 0) // have we consumed all events in a run
+          run = mel_get_run(&mel); // if yes, then get another run
+      }
+
+      //prepare context for the next quad, eqn. 1 in ITU T.814
+      c_q = ((qinf[1] & 0x10) >> 4) | ((qinf[1] & 0xE0) >> 5);
+
+      //remove data from vlc stream, if qinf is not used, cwdlen is 0
+      vlc_val = rev_advance(&vlc, qinf[1] & 0x7);
+    }
+
+    //update sigma
+    // The update depends on the value of x; consider one OPJ_UINT32
+    // if x is 0, 8, 16 and so on, then this line update c locations
+    //      nibble (4 bits) number   0 1 2 3 4 5 6 7
+    //                         LSB   0 0 c c 0 0 0 0 
+    //                               0 0 c c 0 0 0 0
+    //                               0 0 0 0 0 0 0 0
+    //                               0 0 0 0 0 0 0 0
+    // if x is 4, 12, 20, then this line update locations c
+    //      nibble (4 bits) number   0 1 2 3 4 5 6 7
+    //                         LSB   0 0 0 0 0 0 c c 
+    //                               0 0 0 0 0 0 c c
+    //                               0 0 0 0 0 0 0 0
+    //                               0 0 0 0 0 0 0 0
+    *sip |= (((qinf[1] & 0x30) | ((qinf[1] & 0xC0)<<2))) << (4+sip_shift);
+
+    sip += x & 0x7 ? 1 : 0; // move sigma pointer to next entry
+    sip_shift ^= 0x10;      // increment/decrement sip_shift by 16
+
+    // retrieve u
+    /////////////
+    OPJ_UINT32 U_q[2]; // u values for the quad pair
+
+    // uvlc_mode is made up of u_offset bits from the quad pair
+    OPJ_UINT32 uvlc_mode = ((qinf[0] & 0x8) >> 3) | ((qinf[1] & 0x8) >> 2);
+    if (uvlc_mode == 3)  // if both u_offset are set, get an event from
+    {                    // the MEL run of events
+      run -= 2; //subtract 2, since events number if multiplied by 2
+      uvlc_mode += (run == -1) ? 1 : 0; //increment uvlc_mode if event is 1
+      if (run < 0) // if run is consumed (run is -1 or -2), get another run
+        run = mel_get_run(&mel);
+    }
+    //decode uvlc_mode to get u for both quads
+    OPJ_UINT32 consumed_bits = decode_init_uvlc(vlc_val, uvlc_mode, U_q);
+    if (U_q[0] > zero_planes_p1 || U_q[1] > zero_planes_p1)
+    {
+      if (p_manager_mutex)
+        opj_mutex_lock(p_manager_mutex);
+      opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. Decoding "
+                              "this codeblock is stopped.\n");
+      if (p_manager_mutex)
+        opj_mutex_unlock(p_manager_mutex);
+      return OPJ_FALSE;
+    }
+
+    //consume u bits in the VLC code
+    vlc_val = rev_advance(&vlc, consumed_bits);
+
+    //decode magsgn and update line_state
+    /////////////////////////////////////
+    OPJ_UINT32 m_n, v_n;
+    OPJ_UINT32 ms_val;
+
+    //We obtain a mask for the samples locations that needs evaluation
+    OPJ_UINT32 locs = 0xFF;
+    if (x + 4 > width) locs >>= (x + 4 - width) << 1; // limits width
+    locs = height > 1 ? locs : (locs & 0x55);         // limits height
+
+    //first quad, starting at first sample in quad and moving on
+    if (qinf[0] & 0x10) //is it signifcant? (sigma_n)
+    {
+      ms_val = frwd_fetch(&magsgn);         //get 32 bits of magsgn data
+      m_n = U_q[0] - ((qinf[0] >> 12) & 1); //evaluate m_n (number of bits
+                                  // to read from bitstream), using EMB e_k
+      frwd_advance(&magsgn, m_n);         //consume m_n
+      OPJ_UINT32 val = ms_val << 31;      //get sign bit
+      v_n = ms_val & ((1U << m_n) - 1);   //keep only m_n bits
+      v_n |= ((qinf[0] & 0x100) >> 8) << m_n;  //add EMB e_1 as MSB
+      v_n |= 1;                                //add center of bin    
+      //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
+      //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
+      sp[0] = val | ((v_n + 2) << (p - 1)); 
+    }
+    else if (locs & 0x1) // if this is outside the codeblock, set the 
+      sp[0] = 0;         // sample to zero
+
+    if (qinf[0] & 0x20) //sigma_n
+    {
+      ms_val = frwd_fetch(&magsgn);         //get 32 bits
+      m_n = U_q[0] - ((qinf[0] >> 13) & 1); //m_n, uses EMB e_k
+      frwd_advance(&magsgn, m_n);           //consume m_n
+      OPJ_UINT32 val = ms_val << 31;        //get sign bit
+      v_n = ms_val & ((1U << m_n) - 1);     //keep only m_n bits
+      v_n |= ((qinf[0] & 0x200) >> 9) << m_n; //add EMB e_1
+      v_n |= 1;                               //bin center
+      //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
+      //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
+      sp[stride] = val | ((v_n + 2) << (p - 1)); 
+
+      //update line_state: bit 7 (\sigma^N), and E^N
+      OPJ_UINT32 t = lsp[0] & 0x7F;          //keep E^NW
+      v_n = 32 - count_leading_zeros(v_n); 
+      lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n)); //max(E^NW, E^N) | s
+    }
+    else if (locs & 0x2) // if this is outside the codeblock, set the 
+      sp[stride] = 0;    //no need to update line_state
+
+    ++lsp; // move to next quad information
+    ++sp;  // move to next column of samples
+
+    //this is similar to the above two samples
+    if (qinf[0] & 0x40) 
+    {
+      ms_val = frwd_fetch(&magsgn);
+      m_n = U_q[0] - ((qinf[0] >> 14) & 1); 
+      frwd_advance(&magsgn, m_n);
+      OPJ_UINT32 val = ms_val << 31;
+      v_n = ms_val & ((1U << m_n) - 1);
+      v_n |= (((qinf[0] & 0x400) >> 10) << m_n);
+      v_n |= 1; 
+      sp[0] = val | ((v_n + 2) << (p - 1));
+    }
+    else if (locs & 0x4)
+      sp[0] = 0;
+
+    lsp[0] = 0;
+    if (qinf[0] & 0x80) 
+    {
+      ms_val = frwd_fetch(&magsgn);
+      m_n = U_q[0] - ((qinf[0] >> 15) & 1); //m_n
+      frwd_advance(&magsgn, m_n);
+      OPJ_UINT32 val = ms_val << 31;
+      v_n = ms_val & ((1U << m_n) - 1);
+      v_n |= ((qinf[0] & 0x800) >> 11) << m_n;
+      v_n |= 1; //center of bin
+      sp[stride] = val | ((v_n + 2) << (p - 1));
+
+      //line_state: bit 7 (\sigma^NW), and E^NW for next quad
+      lsp[0] = (OPJ_UINT8)(0x80 | (32 - count_leading_zeros(v_n)));
+    }
+    else if (locs & 0x8) //if outside set to 0
+      sp[stride] = 0;
+
+    ++sp; //move to next column
+
+    //second quad
+    if (qinf[1] & 0x10) 
+    {
+      ms_val = frwd_fetch(&magsgn);
+      m_n = U_q[1] - ((qinf[1] >> 12) & 1); //m_n
+      frwd_advance(&magsgn, m_n);
+      OPJ_UINT32 val = ms_val << 31;
+      v_n = ms_val & ((1U << m_n) - 1);
+      v_n |= (((qinf[1] & 0x100) >> 8) << m_n);
+      v_n |= 1;
+      sp[0] = val | ((v_n + 2) << (p - 1));
+    }
+    else if (locs & 0x10)
+      sp[0] = 0;
+
+    if (qinf[1] & 0x20)
+    {
+      ms_val = frwd_fetch(&magsgn);
+      m_n = U_q[1] - ((qinf[1] >> 13) & 1); //m_n
+      frwd_advance(&magsgn, m_n);
+      OPJ_UINT32 val = ms_val << 31;
+      v_n = ms_val & ((1U << m_n) - 1);
+      v_n |= (((qinf[1] & 0x200) >> 9) << m_n);
+      v_n |= 1;
+      sp[stride] = val | ((v_n + 2) << (p - 1));
+
+      //update line_state: bit 7 (\sigma^N), and E^N
+      OPJ_UINT32 t = lsp[0] & 0x7F;            //E^NW
+      v_n = 32 - count_leading_zeros(v_n);     //E^N
+      lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n)); //max(E^NW, E^N) | s
+    }
+    else if (locs & 0x20)
+      sp[stride] = 0;      //no need to update line_state
+
+    ++lsp; //move line state to next quad
+    ++sp;  //move to next sample
+
+    if (qinf[1] & 0x40)
+    {
+      ms_val = frwd_fetch(&magsgn);
+      m_n = U_q[1] - ((qinf[1] >> 14) & 1); //m_n
+      frwd_advance(&magsgn, m_n);
+      OPJ_UINT32 val = ms_val << 31;
+      v_n = ms_val & ((1U << m_n) - 1);
+      v_n |= (((qinf[1] & 0x400) >> 10) << m_n);
+      v_n |= 1;
+      sp[0] = val | ((v_n + 2) << (p - 1));
+    }
+    else if (locs & 0x40)
+      sp[0] = 0;
+
+    lsp[0] = 0;
+    if (qinf[1] & 0x80)
+    {
+      ms_val = frwd_fetch(&magsgn);
+      m_n = U_q[1] - ((qinf[1] >> 15) & 1); //m_n
+      frwd_advance(&magsgn, m_n);
+      OPJ_UINT32 val = ms_val << 31;
+      v_n = ms_val & ((1U << m_n) - 1);
+      v_n |= (((qinf[1] & 0x800) >> 11) << m_n);
+      v_n |= 1; //center of bin
+      sp[stride] = val | ((v_n + 2) << (p - 1));
+
+      //line_state: bit 7 (\sigma^NW), and E^NW for next quad
+      lsp[0] = (OPJ_UINT8)(0x80 | (32 - count_leading_zeros(v_n)));
+    }
+    else if (locs & 0x80)
+      sp[stride] = 0;
+
+    ++sp;
+  }
+
+  //non-initial lines
+  //////////////////////////
+  for (OPJ_UINT32 y = 2; y < height; /*done at the end of loop*/)
+  {
+    sip_shift ^= 0x2;  // shift sigma to the upper half od the nibble
+    sip_shift &= 0xFFFFFFEFU; //move back to 0 (it might have been at 0x10)
+    OPJ_UINT32 *sip = y & 0x4 ? sigma2 : sigma1; //choose sigma array
+
+    lsp = line_state;
+    OPJ_UINT8 ls0 = lsp[0];         // read the line state value
+    lsp[0] = 0;                     // and set it to zero
+    sp = decoded_data + y * stride; // generated samples
+    c_q = 0;                        // context
+    for (OPJ_UINT32 x = 0; x < width; x += 4)
+    {
+      // decode vlc
+      /////////////
+
+      //first quad
+      // get context, eqn. 2 ITU T.814
+      // c_q has \sigma^W | \sigma^SW
+      c_q |= (ls0 >> 7);          //\sigma^NW | \sigma^N
+      c_q |= (lsp[1] >> 5) & 0x4; //\sigma^NE | \sigma^NF
+
+      //the following is very similar to previous code, so please refer to 
+      // that
+      vlc_val = rev_fetch(&vlc);
+      qinf[0] = vlc_tbl1[(c_q << 7) | (vlc_val & 0x7F)];
+      if (c_q == 0) //zero context
+      {
+        run -= 2;
+        qinf[0] = (run == -1) ? qinf[0] : 0;
+        if (run < 0)
+          run = mel_get_run(&mel);
+      }
+      //prepare context for the next quad, \sigma^W | \sigma^SW
+      c_q = ((qinf[0] & 0x40) >> 5) | ((qinf[0] & 0x80) >> 6);
+
+      //remove data from vlc stream
+      vlc_val = rev_advance(&vlc, qinf[0] & 0x7);
+
+      //update sigma
+      // The update depends on the value of x and y; consider one OPJ_UINT32
+      // if x is 0, 8, 16 and so on, and y is 2, 6, etc., then this 
+      // line update c locations
+      //      nibble (4 bits) number   0 1 2 3 4 5 6 7
+      //                         LSB   0 0 0 0 0 0 0 0 
+      //                               0 0 0 0 0 0 0 0
+      //                               c c 0 0 0 0 0 0
+      //                               c c 0 0 0 0 0 0
+      *sip |= (((qinf[0]&0x30) >> 4) | ((qinf[0]&0xC0) >> 2)) << sip_shift;
+
+      //second quad
+      qinf[1] = 0;
+      if (x + 2 < width)
+      {
+        c_q |= (lsp[1] >> 7);
+        c_q |= (lsp[2] >> 5) & 0x4;
+        qinf[1] = vlc_tbl1[(c_q << 7) | (vlc_val & 0x7F)];
+        if (c_q == 0) //zero context
+        {
+          run -= 2;
+          qinf[1] = (run == -1) ? qinf[1] : 0;
+          if (run < 0)
+            run = mel_get_run(&mel);
+        }
+        //prepare context for the next quad
+        c_q = ((qinf[1] & 0x40) >> 5) | ((qinf[1] & 0x80) >> 6);
+        //remove data from vlc stream
+        vlc_val = rev_advance(&vlc, qinf[1] & 0x7);
+      }
+
+      //update sigma
+      *sip |= (((qinf[1]&0x30) | ((qinf[1]&0xC0) << 2))) << (4+sip_shift);
+
+      sip += x & 0x7 ? 1 : 0;
+      sip_shift ^= 0x10;
+
+      //retrieve u
+      ////////////
+      OPJ_UINT32 U_q[2];
+      OPJ_UINT32 uvlc_mode = ((qinf[0] & 0x8) >> 3) | ((qinf[1] & 0x8) >> 2);
+      OPJ_UINT32 consumed_bits = decode_noninit_uvlc(vlc_val, uvlc_mode, U_q);
+      vlc_val = rev_advance(&vlc, consumed_bits);
+
+      //calculate E^max and add it to U_q, eqns 5 and 6 in ITU T.814
+      if ((qinf[0] & 0xF0) & ((qinf[0] & 0xF0) - 1)) // is \gamma_q 1?
+      {
+        OPJ_UINT32 E = (ls0 & 0x7Fu);
+        E = E > (lsp[1] & 0x7Fu) ? E : (lsp[1]&0x7Fu); //max(E, E^NE, E^NF)
+        //since U_q alread has u_q + 1, we subtract 2 instead of 1
+        U_q[0] += E > 2 ? E - 2 : 0;
+      }
+
+      if ((qinf[1] & 0xF0) & ((qinf[1] & 0xF0) - 1)) //is \gamma_q 1? 
+      {
+        OPJ_UINT32 E = (lsp[1] & 0x7Fu);
+        E = E > (lsp[2] & 0x7Fu) ? E : (lsp[2]&0x7Fu); //max(E, E^NE, E^NF)
+        //since U_q alread has u_q + 1, we subtract 2 instead of 1
+        U_q[1] += E > 2 ? E - 2 : 0;
+      }
+
+      if (U_q[0] > zero_planes_p1 || U_q[1] > zero_planes_p1)
+      {
+        if (p_manager_mutex)
+          opj_mutex_lock(p_manager_mutex);
+        opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. "
+                                "Decoding this codeblock is stopped.\n");
+        if (p_manager_mutex)
+          opj_mutex_unlock(p_manager_mutex);
+        return OPJ_FALSE;
+      }
+
+      ls0 = lsp[2]; //for next double quad
+      lsp[1] = lsp[2] = 0;
+
+      //decode magsgn and update line_state
+      /////////////////////////////////////
+      OPJ_UINT32 m_n, v_n;
+      OPJ_UINT32 ms_val;
+
+      //locations where samples need update
+      OPJ_UINT32 locs = 0xFF;
+      if (x + 4 > width) locs >>= (x + 4 - width) << 1;
+      locs = height > 1 ? locs : (locs & 0x55);
+
+
+      if (qinf[0] & 0x10) //sigma_n
+      {
+        ms_val = frwd_fetch(&magsgn);
+        m_n = U_q[0] - ((qinf[0] >> 12) & 1); //m_n
+        frwd_advance(&magsgn, m_n);
+        OPJ_UINT32 val = ms_val << 31;
+        v_n = ms_val & ((1U << m_n) - 1);
+        v_n |= ((qinf[0] & 0x100) >> 8) << m_n;
+        v_n |= 1; //center of bin
+        sp[0] = val | ((v_n + 2) << (p - 1));
+      }
+      else if (locs & 0x1)
+        sp[0] = 0;
+
+      if (qinf[0] & 0x20) //sigma_n
+      {
+        ms_val = frwd_fetch(&magsgn);
+        m_n = U_q[0] - ((qinf[0] >> 13) & 1); //m_n
+        frwd_advance(&magsgn, m_n);
+        OPJ_UINT32 val = ms_val << 31;
+        v_n = ms_val & ((1U << m_n) - 1);
+        v_n |= ((qinf[0] & 0x200) >> 9) << m_n;
+        v_n |= 1; //center of bin
+        sp[stride] = val | ((v_n + 2) << (p - 1));
+
+        //update line_state: bit 7 (\sigma^N), and E^N
+        OPJ_UINT32 t = lsp[0] & 0x7F;          //E^NW
+        v_n = 32 - count_leading_zeros(v_n); 
+        lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n));
+      }
+      else if (locs & 0x2)
+        sp[stride] = 0; //no need to update line_state
+
+      ++lsp;
+      ++sp;
+
+      if (qinf[0] & 0x40) //sigma_n
+      {
+        ms_val = frwd_fetch(&magsgn);
+        m_n = U_q[0] - ((qinf[0] >> 14) & 1); //m_n
+        frwd_advance(&magsgn, m_n);
+        OPJ_UINT32 val = ms_val << 31;
+        v_n = ms_val & ((1U << m_n) - 1);
+        v_n |= (((qinf[0] & 0x400) >> 10) << m_n);
+        v_n |= 1;                            //center of bin
+        sp[0] = val | ((v_n + 2) << (p - 1));
+      }
+      else if (locs & 0x4)
+        sp[0] = 0;
+
+      if (qinf[0] & 0x80) //sigma_n
+      {
+        ms_val = frwd_fetch(&magsgn);
+        m_n = U_q[0] - ((qinf[0] >> 15) & 1); //m_n
+        frwd_advance(&magsgn, m_n);
+        OPJ_UINT32 val = ms_val << 31;
+        v_n = ms_val & ((1U << m_n) - 1);
+        v_n |= ((qinf[0] & 0x800) >> 11) << m_n;
+        v_n |= 1; //center of bin
+        sp[stride] = val | ((v_n + 2) << (p - 1));
+
+        //update line_state: bit 7 (\sigma^NW), and E^NW for next quad
+        lsp[0] = (OPJ_UINT8)(0x80 | (32 - count_leading_zeros(v_n)));
+      }
+      else if (locs & 0x8)
+        sp[stride] = 0;
+
+      ++sp;
+
+      if (qinf[1] & 0x10) //sigma_n
+      {
+        ms_val = frwd_fetch(&magsgn);
+        m_n = U_q[1] - ((qinf[1] >> 12) & 1); //m_n
+        frwd_advance(&magsgn, m_n);
+        OPJ_UINT32 val = ms_val << 31;
+        v_n = ms_val & ((1U << m_n) - 1);
+        v_n |= (((qinf[1] & 0x100) >> 8) << m_n);
+        v_n |= 1;                            //center of bin
+        sp[0] = val | ((v_n + 2) << (p - 1));
+      }
+      else if (locs & 0x10)
+        sp[0] = 0;
+
+      if (qinf[1] & 0x20) //sigma_n
+      {
+        ms_val = frwd_fetch(&magsgn);
+        m_n = U_q[1] - ((qinf[1] >> 13) & 1); //m_n
+        frwd_advance(&magsgn, m_n);
+        OPJ_UINT32 val = ms_val << 31;
+        v_n = ms_val & ((1U << m_n) - 1);
+        v_n |= (((qinf[1] & 0x200) >> 9) << m_n);
+        v_n |= 1; //center of bin
+        sp[stride] = val | ((v_n + 2) << (p - 1));
+
+        //update line_state: bit 7 (\sigma^N), and E^N
+        OPJ_UINT32 t = lsp[0] & 0x7F;          //E^NW
+        v_n = 32 - count_leading_zeros(v_n); 
+        lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n));
+      }
+      else if (locs & 0x20)
+        sp[stride] = 0; //no need to update line_state
+
+      ++lsp;
+      ++sp;
+
+      if (qinf[1] & 0x40) //sigma_n
+      {
+        ms_val = frwd_fetch(&magsgn);
+        m_n = U_q[1] - ((qinf[1] >> 14) & 1); //m_n
+        frwd_advance(&magsgn, m_n);
+        OPJ_UINT32 val = ms_val << 31;
+        v_n = ms_val & ((1U << m_n) - 1);
+        v_n |= (((qinf[1] & 0x400) >> 10) << m_n);
+        v_n |= 1;                            //center of bin
+        sp[0] = val | ((v_n + 2) << (p - 1));
+      }
+      else if (locs & 0x40)
+        sp[0] = 0;
+
+      if (qinf[1] & 0x80) //sigma_n
+      {
+        ms_val = frwd_fetch(&magsgn);
+        m_n = U_q[1] - ((qinf[1] >> 15) & 1); //m_n
+        frwd_advance(&magsgn, m_n);
+        OPJ_UINT32 val = ms_val << 31;
+        v_n = ms_val & ((1U << m_n) - 1);
+        v_n |= (((qinf[1] & 0x800) >> 11) << m_n);
+        v_n |= 1; //center of bin
+        sp[stride] = val | ((v_n + 2) << (p - 1));
+
+        //update line_state: bit 7 (\sigma^NW), and E^NW for next quad
+        lsp[0] = (OPJ_UINT8)(0x80 | (32 - count_leading_zeros(v_n)));
+      }
+      else if (locs & 0x80)
+        sp[stride] = 0;
+
+      ++sp;
+    }
+
+    y += 2;
+    if (num_passes > 1 && (y & 3) == 0) //executed at multiples of 4
+    { // This is for SPP and potentially MRP
+
+      if (num_passes > 2) //do MRP
+      {
+        // select the current stripe
+        OPJ_UINT32 *cur_sig = y & 0x4 ? sigma1 : sigma2;
+        // the address of the data that needs updating
+        OPJ_UINT32 *dpp = decoded_data + (y - 4) * stride;
+        OPJ_UINT32 half = 1 << (p - 2); // half the center of the bin
+        for (OPJ_UINT32 i = 0; i < width; i += 8)
+        {
+          //Process one entry from sigma array at a time
+          // Each nibble (4 bits) in the sigma array represents 4 rows,
+          // and the 32 bits contain 8 columns
+          OPJ_UINT32 cwd = rev_fetch_mrp(&magref); // get 32 bit data
+          OPJ_UINT32 sig = *cur_sig++; // 32 bit that will be processed now
+          OPJ_UINT32 col_mask = 0xFu;  // a mask for a column in sig
+          OPJ_UINT32 *dp = dpp + i;    // next column in decode samples
+          if (sig) // if any of the 32 bits are set
+          {
+            for (int j = 0; j < 8; ++j, dp++) //one column at a time
+            {
+              if (sig & col_mask) // lowest nibble
+              {
+                OPJ_UINT32 sample_mask = 0x11111111u & col_mask; //LSB
+
+                if (sig & sample_mask) //if LSB is set
+                {
+                  assert(dp[0] != 0); // decoded value cannot be zero
+                  OPJ_UINT32 sym = cwd & 1; // get it value
+                  // remove center of bin if sym is 0
+                  dp[0] ^= (1 - sym) << (p - 1);
+                  dp[0] |= half;      // put half the center of bin
+                  cwd >>= 1;          //consume word
+                }
+                sample_mask += sample_mask; //next row
+
+                if (sig & sample_mask)
+                {
+                  assert(dp[stride] != 0);
+                  OPJ_UINT32 sym = cwd & 1;
+                  dp[stride] ^= (1 - sym) << (p - 1);
+                  dp[stride] |= half;
+                  cwd >>= 1;
+                }
+                sample_mask += sample_mask;
+
+                if (sig & sample_mask)
+                {
+                  assert(dp[2 * stride] != 0);
+                  OPJ_UINT32 sym = cwd & 1;
+                  dp[2 * stride] ^= (1 - sym) << (p - 1);
+                  dp[2 * stride] |= half;
+                  cwd >>= 1;
+                }
+                sample_mask += sample_mask;
+
+                if (sig & sample_mask)
+                {
+                  assert(dp[3 * stride] != 0);
+                  OPJ_UINT32 sym = cwd & 1;
+                  dp[3 * stride] ^= (1 - sym) << (p - 1);
+                  dp[3 * stride] |= half;
+                  cwd >>= 1;
+                }
+                sample_mask += sample_mask;
+              }
+              col_mask <<= 4; //next column
+            }
+          }
+          // consume data according to the number of bits set
+          rev_advance_mrp(&magref, population_count(sig)); 
+        }
+      }
+
+      if (y >= 4) // update mbr array at the end of each stripe
+      {
+        //generate mbr corresponding to a stripe
+        OPJ_UINT32 *sig = y & 0x4 ? sigma1 : sigma2;
+        OPJ_UINT32 *mbr = y & 0x4 ? mbr1 : mbr2;
+
+        //data is processed in patches of 8 columns, each 
+        // each 32 bits in sigma1 or mbr1 represent 4 rows
+
+        //integrate horizontally
+        OPJ_UINT32 prev = 0; // previous columns
+        for (OPJ_UINT32 i = 0; i < width; i += 8, mbr++, sig++)
+        {
+          mbr[0] = sig[0];         //start with significant samples
+          mbr[0] |= prev >> 28;    //for first column, left neighbors
+          mbr[0] |= sig[0] << 4;   //left neighbors
+          mbr[0] |= sig[0] >> 4;   //right neighbors
+          mbr[0] |= sig[1] << 28;  //for last column, right neighbors
+          prev = sig[0];           // for next group of columns
+
+          //integrate vertically
+          OPJ_UINT32 t = mbr[0], z = mbr[0];
+          z |= (t & 0x77777777) << 1; //above neighbors
+          z |= (t & 0xEEEEEEEE) >> 1; //below neighbors
+          mbr[0] = z & ~sig[0]; //remove already significance samples
+        }
+      }
+
+      if (y >= 8) //wait until 8 rows has been processed
+      {
+        OPJ_UINT32 *cur_sig, *cur_mbr, *nxt_sig, *nxt_mbr;
+
+        // add membership from the next stripe, obtained above
+        cur_sig = y & 0x4 ? sigma2 : sigma1;
+        cur_mbr = y & 0x4 ? mbr2 : mbr1;
+        nxt_sig = y & 0x4 ? sigma1 : sigma2;  //future samples
+        OPJ_UINT32 prev = 0; // the columns before these group of 8 columns
+        for (OPJ_UINT32 i=0; i < width; i+=8, cur_mbr++, cur_sig++, nxt_sig++)
+        {
+          OPJ_UINT32 t = nxt_sig[0];
+          t |= prev >> 28;        //for first column, left neighbors
+          t |= nxt_sig[0] << 4;   //left neighbors
+          t |= nxt_sig[0] >> 4;   //right neighbors
+          t |= nxt_sig[1] << 28;  //for last column, right neighbors
+          prev = nxt_sig[0];      // for next group of columns
+
+          cur_mbr[0] |= (t & 0x11111111u) << 3; //propagate up to cur_mbr
+          cur_mbr[0] &= ~cur_sig[0]; //remove already significance samples
+        }
+
+        //find new locations and get signs
+        cur_sig = y & 0x4 ? sigma2 : sigma1;  
+        cur_mbr = y & 0x4 ? mbr2 : mbr1;
+        nxt_sig = y & 0x4 ? sigma1 : sigma2; //future samples
+        nxt_mbr = y & 0x4 ? mbr1 : mbr2;     //future samples
+        OPJ_UINT32 val = 3u << (p - 2); // sample values for newly discovered 
+                          // signficant samples including the bin center
+        for (OPJ_UINT32 i = 0; i < width;
+              i += 8, cur_sig++, cur_mbr++, nxt_sig++, nxt_mbr++)
+        {
+          OPJ_UINT32 mbr = *cur_mbr;
+          OPJ_UINT32 new_sig = 0;
+          if (mbr)  //are there any samples that migt be signficant 
+          {
+            for (OPJ_UINT32 n = 0; n < 8; n += 4)
+            {
+              OPJ_UINT32 cwd = frwd_fetch(&sigprop); //get 32 bits
+              OPJ_UINT32 cnt = 0;
+
+              OPJ_UINT32 *dp = decoded_data + (y - 8) * stride;
+              dp += i + n; //address for decoded samples
+
+              OPJ_UINT32 col_mask = 0xFu << (4 * n); //a mask to select a 
+                                                     //column
+
+              OPJ_UINT32 inv_sig = ~cur_sig[0]; // insignificant samples
+
+              //find the last sample we operate on
+              OPJ_UINT32 end = n + 4 + i < width ? n + 4 : width - i;
+
+              for (OPJ_UINT32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
+              {
+                if ((col_mask & mbr) == 0) //no samples need checking
+                  continue;
+
+                //scan mbr to find a new signficant sample
+                OPJ_UINT32 sample_mask = 0x11111111u & col_mask; // LSB
+                if (mbr & sample_mask)
+                {
+                  assert(dp[0] == 0); // the sample must have been 0
+                  if (cwd & 1) //if this sample has become significant
+                  { // must propagate it to nearby samples
+                    new_sig |= sample_mask;  // new significant samples
+                    OPJ_UINT32 t = 0x32u << (j * 4);// propagation to neighbors
+                    mbr |= t & inv_sig; //remove already signifcant samples
+                  }
+                  cwd >>= 1; ++cnt; //consume bit and increment number of
+                                    //consumed bits
+                }
+
+                sample_mask += sample_mask;  // next row
+                if (mbr & sample_mask)
+                {
+                  assert(dp[stride] == 0);
+                  if (cwd & 1)
+                  {
+                    new_sig |= sample_mask;
+                    OPJ_UINT32 t = 0x74u << (j * 4);
+                    mbr |= t & inv_sig;
+                  }
+                  cwd >>= 1; ++cnt;
+                }
+
+                sample_mask += sample_mask;
+                if (mbr & sample_mask)
+                {
+                  assert(dp[2 * stride] == 0);
+                  if (cwd & 1)
+                  {
+                    new_sig |= sample_mask;
+                    OPJ_UINT32 t = 0xE8u << (j * 4);
+                    mbr |= t & inv_sig;
+                  }
+                  cwd >>= 1; ++cnt;
+                }
+
+                sample_mask += sample_mask;
+                if (mbr & sample_mask)
+                {
+                  assert(dp[3 * stride] == 0);
+                  if (cwd & 1)
+                  {
+                    new_sig |= sample_mask;
+                    OPJ_UINT32 t = 0xC0u << (j * 4);
+                    mbr |= t & inv_sig;
+                  }
+                  cwd >>= 1; ++cnt;
+                }
+              }
+
+              //obtain signs here
+              if (new_sig & (0xFFFFu << (4 * n))) //if any
+              {
+                OPJ_UINT32 *dp = decoded_data + (y - 8) * stride;
+                dp += i + n; // decoded samples address
+                OPJ_UINT32 col_mask = 0xFu << (4 * n); //mask to select a 
+                                                       //column
+
+                for (OPJ_UINT32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
+                {
+                  if ((col_mask & new_sig) == 0) //if non is signficant
+                    continue;
+
+                  //scan 4 signs
+                  OPJ_UINT32 sample_mask = 0x11111111u & col_mask;
+                  if (new_sig & sample_mask)
+                  {
+                    assert(dp[0] == 0);
+                    dp[0] |= ((cwd & 1) << 31) | val; //put value and sign
+                    cwd >>= 1; ++cnt; //consume bit and increment number
+                                      //of consumed bits
+                  }
+
+                  sample_mask += sample_mask;
+                  if (new_sig & sample_mask)
+                  {
+                    assert(dp[stride] == 0);
+                    dp[stride] |= ((cwd & 1) << 31) | val;
+                    cwd >>= 1; ++cnt;
+                  }
+
+                  sample_mask += sample_mask;
+                  if (new_sig & sample_mask)
+                  {
+                    assert(dp[2 * stride] == 0);
+                    dp[2 * stride] |= ((cwd & 1) << 31) | val;
+                    cwd >>= 1; ++cnt;
+                  }
+
+                  sample_mask += sample_mask;
+                  if (new_sig & sample_mask)
+                  {
+                    assert(dp[3 * stride] == 0);
+                    dp[3 * stride] |= ((cwd & 1) << 31) | val;
+                    cwd >>= 1; ++cnt;
+                  }
+                }
+
+              }
+              frwd_advance(&sigprop, cnt); //consume the bits from bitstrm
+              cnt = 0;
+
+              //update the next 8 columns
+              if (n == 4)
+              {
+                //horizontally
+                OPJ_UINT32 t = new_sig >> 28;
+                t |= ((t & 0xE) >> 1) | ((t & 7) << 1);
+                cur_mbr[1] |= t & ~cur_sig[1];
+              }
+            }
+          }
+          //update the next stripe (vertically propagation)
+          new_sig |= cur_sig[0];
+          OPJ_UINT32 u = (new_sig & 0x88888888) >> 3;
+          OPJ_UINT32 t = u | (u << 4) | (u >> 4); //left and right neighbors
+          if (i > 0)
+            nxt_mbr[-1] |= (u << 28) & ~nxt_sig[-1];
+          nxt_mbr[0] |= t & ~nxt_sig[0];
+          nxt_mbr[1] |= (u >> 28) & ~nxt_sig[1];
+        }
+
+        //clear current sigma
+        //mbr need not be cleared because it is overwritten
+        cur_sig = y & 0x4 ? sigma2 : sigma1;
+        memset(cur_sig, 0, (((width + 7) >> 3) + 1) << 2);
+      }
+    }
+  }
+
+  //terminating
+  if (num_passes > 1) {
+
+    if (num_passes > 2 && ((height & 3) == 1 || (height & 3) == 2))
+    {//do magref
+      OPJ_UINT32 *cur_sig = height & 0x4 ? sigma2 : sigma1; //reversed
+      OPJ_UINT32 *dpp = decoded_data + (height & 0xFFFFFFFCu) * stride;
+      OPJ_UINT32 half = 1 << (p - 2);
+      for (OPJ_UINT32 i = 0; i < width; i += 8)
+      {
+        OPJ_UINT32 cwd = rev_fetch_mrp(&magref);
+        OPJ_UINT32 sig = *cur_sig++;
+        OPJ_UINT32 col_mask = 0xF;
+        OPJ_UINT32 *dp = dpp + i;
+        if (sig)
+        {
+          for (int j = 0; j < 8; ++j, dp++)
+          {
+            if (sig & col_mask)
+            {
+              OPJ_UINT32 sample_mask = 0x11111111 & col_mask;
+
+              if (sig & sample_mask)
+              {
+                assert(dp[0] != 0);
+                OPJ_UINT32 sym = cwd & 1;
+                dp[0] ^= (1 - sym) << (p - 1);
+                dp[0] |= half;
+                cwd >>= 1;
+              }
+              sample_mask += sample_mask;
+
+              if (sig & sample_mask)
+              {
+                assert(dp[stride] != 0);
+                OPJ_UINT32 sym = cwd & 1;
+                dp[stride] ^= (1 - sym) << (p - 1);
+                dp[stride] |= half;
+                cwd >>= 1;
+              }
+              sample_mask += sample_mask;
+
+              if (sig & sample_mask)
+              {
+                assert(dp[2 * stride] != 0);
+                OPJ_UINT32 sym = cwd & 1;
+                dp[2 * stride] ^= (1 - sym) << (p - 1);
+                dp[2 * stride] |= half;
+                cwd >>= 1;
+              }
+              sample_mask += sample_mask;
+
+              if (sig & sample_mask)
+              {
+                assert(dp[3 * stride] != 0);
+                OPJ_UINT32 sym = cwd & 1;
+                dp[3 * stride] ^= (1 - sym) << (p - 1);
+                dp[3 * stride] |= half;
+                cwd >>= 1;
+              }
+              sample_mask += sample_mask;
+            }
+            col_mask <<= 4;
+          }
+        }
+        rev_advance_mrp(&magref, population_count(sig));
+      }
+    }
+
+    //do the last incomplete stripe
+    // for cases of (height & 3) == 0 and 3
+    // the should have been processed previously
+    if ((height & 3) == 1 || (height & 3) == 2)
+    {
+      //generate mbr of first stripe
+      OPJ_UINT32 *sig = height & 0x4 ? sigma2 : sigma1;
+      OPJ_UINT32 *mbr = height & 0x4 ? mbr2 : mbr1;
+      //integrate horizontally
+      OPJ_UINT32 prev = 0;
+      for (OPJ_UINT32 i = 0; i < width; i += 8, mbr++, sig++)
+      {
+        mbr[0] = sig[0];
+        mbr[0] |= prev >> 28;    //for first column, left neighbors
+        mbr[0] |= sig[0] << 4;   //left neighbors
+        mbr[0] |= sig[0] >> 4;   //left neighbors
+        mbr[0] |= sig[1] << 28;  //for last column, right neighbors
+        prev = sig[0];
+
+        //integrate vertically
+        OPJ_UINT32 t = mbr[0], z = mbr[0];
+        z |= (t & 0x77777777) << 1; //above neighbors
+        z |= (t & 0xEEEEEEEE) >> 1; //below neighbors
+        mbr[0] = z & ~sig[0]; //remove already significance samples
+      }
+    }
+
+    OPJ_UINT32 st = height;
+    st -= height > 6 ? (((height + 1) & 3) + 3) : height;
+    for (OPJ_UINT32 y = st; y < height; y += 4)
+    {
+      OPJ_UINT32 *cur_sig, *cur_mbr, *nxt_sig, *nxt_mbr;
+
+      OPJ_UINT32 pattern = 0xFFFFFFFFu; // a pattern needed samples
+      if (height - y == 3)
+        pattern = 0x77777777u;
+      else if (height - y == 2)
+        pattern = 0x33333333u;
+      else if (height - y == 1)
+        pattern = 0x11111111u;
+
+      //add membership from the next stripe, obtained above
+      if (height - y > 4)
+      {
+        cur_sig = y & 0x4 ? sigma2 : sigma1;
+        cur_mbr = y & 0x4 ? mbr2 : mbr1;
+        nxt_sig = y & 0x4 ? sigma1 : sigma2;
+        OPJ_UINT32 prev = 0;
+        for (OPJ_UINT32 i=0; i<width; i += 8, cur_mbr++, cur_sig++, nxt_sig++)
+        {
+          OPJ_UINT32 t = nxt_sig[0];
+          t |= prev >> 28;     //for first column, left neighbors
+          t |= nxt_sig[0] << 4;   //left neighbors
+          t |= nxt_sig[0] >> 4;   //left neighbors
+          t |= nxt_sig[1] << 28;  //for last column, right neighbors
+          prev = nxt_sig[0];
+
+          cur_mbr[0] |= (t & 0x11111111) << 3;
+          //remove already significance samples
+          cur_mbr[0] &= ~cur_sig[0];
+        }
+      }
+
+      //find new locations and get signs
+      cur_sig = y & 0x4 ? sigma2 : sigma1;
+      cur_mbr = y & 0x4 ? mbr2 : mbr1;
+      nxt_sig = y & 0x4 ? sigma1 : sigma2;
+      nxt_mbr = y & 0x4 ? mbr1 : mbr2;
+      OPJ_UINT32 val = 3u << (p - 2);
+      for (OPJ_UINT32 i = 0; i < width; i += 8,
+            cur_sig++, cur_mbr++, nxt_sig++, nxt_mbr++)
+      {
+        OPJ_UINT32 mbr = *cur_mbr & pattern; //skip unneeded samples
+        OPJ_UINT32 new_sig = 0;
+        if (mbr)
+        {
+          for (OPJ_UINT32 n = 0; n < 8; n += 4)
+          {
+            OPJ_UINT32 cwd = frwd_fetch(&sigprop);
+            OPJ_UINT32 cnt = 0;
+
+            OPJ_UINT32 *dp = decoded_data + y * stride;
+            dp += i + n;
+
+            OPJ_UINT32 col_mask = 0xFu << (4 * n);
+
+            OPJ_UINT32 inv_sig = ~cur_sig[0] & pattern;
+
+            OPJ_UINT32 end = n + 4 + i < width ? n + 4 : width - i;
+            for (OPJ_UINT32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
+            {
+              if ((col_mask & mbr) == 0)
+                continue;
+
+              //scan 4 mbr
+              OPJ_UINT32 sample_mask = 0x11111111u & col_mask;
+              if (mbr & sample_mask)
+              {
+                assert(dp[0] == 0);
+                if (cwd & 1)
+                {
+                  new_sig |= sample_mask;
+                  OPJ_UINT32 t = 0x32u << (j * 4);
+                  mbr |= t & inv_sig;
+                }
+                cwd >>= 1; ++cnt;
+              }
+
+              sample_mask += sample_mask;
+              if (mbr & sample_mask)
+              {
+                assert(dp[stride] == 0);
+                if (cwd & 1)
+                {
+                  new_sig |= sample_mask;
+                  OPJ_UINT32 t = 0x74u << (j * 4);
+                  mbr |= t & inv_sig;
+                }
+                cwd >>= 1; ++cnt;
+              }
+
+              sample_mask += sample_mask;
+              if (mbr & sample_mask)
+              {
+                assert(dp[2 * stride] == 0);
+                if (cwd & 1)
+                {
+                  new_sig |= sample_mask;
+                  OPJ_UINT32 t = 0xE8u << (j * 4);
+                  mbr |= t & inv_sig;
+                }
+                cwd >>= 1; ++cnt;
+              }
+
+              sample_mask += sample_mask;
+              if (mbr & sample_mask)
+              {
+                assert(dp[3 * stride] == 0);
+                if (cwd & 1)
+                {
+                  new_sig |= sample_mask;
+                  OPJ_UINT32 t = 0xC0u << (j * 4);
+                  mbr |= t & inv_sig;
+                }
+                cwd >>= 1; ++cnt;
+              }
+            }
+
+            //signs here
+            if (new_sig & (0xFFFFu << (4 * n)))
+            {
+              OPJ_UINT32 *dp = decoded_data + y * stride;
+              dp += i + n;
+              OPJ_UINT32 col_mask = 0xFu << (4 * n);
+
+              for (OPJ_UINT32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
+              {
+                if ((col_mask & new_sig) == 0)
+                  continue;
+
+                //scan 4 signs
+                OPJ_UINT32 sample_mask = 0x11111111u & col_mask;
+                if (new_sig & sample_mask)
+                {
+                  assert(dp[0] == 0);
+                  dp[0] |= ((cwd & 1) << 31) | val;
+                  cwd >>= 1; ++cnt;
+                }
+
+                sample_mask += sample_mask;
+                if (new_sig & sample_mask)
+                {
+                  assert(dp[stride] == 0);
+                  dp[stride] |= ((cwd & 1) << 31) | val;
+                  cwd >>= 1; ++cnt;
+                }
+
+                sample_mask += sample_mask;
+                if (new_sig & sample_mask)
+                {
+                  assert(dp[2 * stride] == 0);
+                  dp[2 * stride] |= ((cwd & 1) << 31) | val;
+                  cwd >>= 1; ++cnt;
+                }
+
+                sample_mask += sample_mask;
+                if (new_sig & sample_mask)
+                {
+                  assert(dp[3 * stride] == 0);
+                  dp[3 * stride] |= ((cwd & 1) << 31) | val;
+                  cwd >>= 1; ++cnt;
+                }
+              }
+
+            }
+            frwd_advance(&sigprop, cnt);
+            cnt = 0;
+
+            //update next columns
+            if (n == 4)
+            {
+              //horizontally
+              OPJ_UINT32 t = new_sig >> 28;
+              t |= ((t & 0xE) >> 1) | ((t & 7) << 1);
+              cur_mbr[1] |= t & ~cur_sig[1];
+            }
+          }
+        }
+        //propagate down (vertically propagation)
+        new_sig |= cur_sig[0];
+        OPJ_UINT32 u = (new_sig & 0x88888888) >> 3;
+        OPJ_UINT32 t = u | (u << 4) | (u >> 4);
+        if (i > 0)
+          nxt_mbr[-1] |= (u << 28) & ~nxt_sig[-1];
+        nxt_mbr[0] |= t & ~nxt_sig[0];
+        nxt_mbr[1] |= (u >> 28) & ~nxt_sig[1];
+      }
+    }
+  }
+
+  //int shift = 29 - missing_msbs;
+  for (OPJ_UINT32 y = 0; y < height; ++y)
+  {
+    OPJ_UINT32* sp = decoded_data + y * stride; // generated samplesv
+    for (OPJ_UINT32 x = 0; x < width; ++x, ++sp)
+    {
+      OPJ_INT32 val = (*sp & 0x7FFFFFFF);
+      *sp = ((OPJ_UINT32)*sp & 0x80000000) ? -val : val;
+    }
+  }
+
+  return OPJ_TRUE;
+}
diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c
index 05b5586a9..8bbf0be8c 100644
--- a/src/lib/openjp2/j2k.c
+++ b/src/lib/openjp2/j2k.c
@@ -10617,7 +10617,10 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k,
     /* SPcod (G) / SPcoc (D) */
     opj_read_bytes(l_current_ptr, &l_tccp->cblksty, 1);
     ++l_current_ptr;
-    if (l_tccp->cblksty & 0xC0U) { /* 2 msb are reserved, assume we can't read */
+    if ((l_tccp->cblksty & 0x80U) != 0 || (l_tccp->cblksty & 0x48U) == 0x48U) { 
+    /* For HT, we only support one mode, bit 6 set, meaning that "all code-blocks 
+       within the corresponding tile-component shall be HT code-blocks, and 
+       bit 3 is reset, meaning that "No vertically causal context". */
         opj_event_msg(p_manager, EVT_ERROR,
                       "Error reading SPCod SPCoc element, Invalid code-block style found\n");
         return OPJ_FALSE;
diff --git a/src/lib/openjp2/j2k.h b/src/lib/openjp2/j2k.h
index 740ed9b6d..ac69a3763 100644
--- a/src/lib/openjp2/j2k.h
+++ b/src/lib/openjp2/j2k.h
@@ -61,6 +61,7 @@ The functions in J2K.C have for goal to read/write the several parts of the code
 #define J2K_CCP_CBLKSTY_VSC 0x08      /**< Vertically stripe causal context */
 #define J2K_CCP_CBLKSTY_PTERM 0x10    /**< Predictable termination */
 #define J2K_CCP_CBLKSTY_SEGSYM 0x20   /**< Segmentation symbols are used */
+#define J2K_CCP_CBLKSTY_HT 0x40       /**< (high throughput) HT codeblock */
 #define J2K_CCP_QNTSTY_NOQNT 0
 #define J2K_CCP_QNTSTY_SIQNT 1
 #define J2K_CCP_QNTSTY_SEQNT 2
diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c
index 1bea54b0d..bb97c7eab 100644
--- a/src/lib/openjp2/t1.c
+++ b/src/lib/openjp2/t1.c
@@ -217,6 +217,27 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
                                    opj_mutex_t* p_manager_mutex,
                                    OPJ_BOOL check_pterm);
 
+/**
+Decode 1 HT code-block
+@param t1 T1 handle
+@param cblk Code-block coding parameters
+@param orient
+@param roishift Region of interest shifting value
+@param cblksty Code-block style
+@param p_manager the event manager
+@param p_manager_mutex mutex for the event manager
+@param check_pterm whether PTERM correct termination should be checked
+*/
+OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
+                               opj_tcd_cblk_dec_t* cblk,
+                               OPJ_UINT32 orient,
+                               OPJ_UINT32 roishift,
+                               OPJ_UINT32 cblksty,
+                               opj_event_mgr_t *p_manager,
+                               opj_mutex_t* p_manager_mutex,
+                               OPJ_BOOL check_pterm);
+
+
 static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1,
                                         OPJ_UINT32 w,
                                         OPJ_UINT32 h);
@@ -1665,18 +1686,35 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
     }
     t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
 
-    if (OPJ_FALSE == opj_t1_decode_cblk(
-                t1,
-                cblk,
-                band->bandno,
-                (OPJ_UINT32)tccp->roishift,
-                tccp->cblksty,
-                job->p_manager,
-                job->p_manager_mutex,
-                job->check_pterm)) {
-        *(job->pret) = OPJ_FALSE;
-        opj_free(job);
-        return;
+    if ((tccp->cblksty & J2K_CCP_CBLKSTY_HT) != 0) {
+        if (OPJ_FALSE == opj_t1_ht_decode_cblk(
+                    t1,
+                    cblk,
+                    band->bandno,
+                    (OPJ_UINT32)tccp->roishift,
+                    tccp->cblksty,
+                    job->p_manager,
+                    job->p_manager_mutex,
+                    job->check_pterm)) {
+            *(job->pret) = OPJ_FALSE;
+            opj_free(job);
+            return;
+        }
+    }
+    else {
+        if (OPJ_FALSE == opj_t1_decode_cblk(
+                    t1,
+                    cblk,
+                    band->bandno,
+                    (OPJ_UINT32)tccp->roishift,
+                    tccp->cblksty,
+                    job->p_manager,
+                    job->p_manager_mutex,
+                    job->check_pterm)) {
+            *(job->pret) = OPJ_FALSE;
+            opj_free(job);
+            return;
+        }
     }
 
     x = cblk->x0 - band->x0;
diff --git a/src/lib/openjp2/t1_generate_luts.c b/src/lib/openjp2/t1_generate_luts.c
index 9ad6f2003..99c8c12fd 100644
--- a/src/lib/openjp2/t1_generate_luts.c
+++ b/src/lib/openjp2/t1_generate_luts.c
@@ -39,6 +39,12 @@
 
 #include "opj_includes.h"
 
+// defined elsewhere
+extern OPJ_BOOL vlc_init_tables();
+extern OPJ_BOOL vlc_tables_initialized;
+extern int vlc_tbl0[1024];
+extern int vlc_tbl1[1024];
+
 static int t1_init_ctxno_zc(OPJ_UINT32 f, OPJ_UINT32 orient)
 {
     int h, v, d, n, t, hv;
@@ -307,5 +313,11 @@ int main(int argc, char **argv)
     printf("static const OPJ_INT16 lut_nmsedec_ref0[1U << T1_NMSEDEC_BITS] = {\n    ");
     dump_array16(lut_nmsedec_ref0, 1U << T1_NMSEDEC_BITS);
 
+    vlc_tables_initialized = vlc_init_tables();
+    printf("static const OPJ_UINT16 vlc_tbl0[1024] = {\n    ");
+    dump_array16(vlc_tbl0, 1024);
+    printf("static const OPJ_UINT16 vlc_tbl1[1024] = {\n    ");
+    dump_array16(vlc_tbl1, 1024);
+
     return 0;
 }
diff --git a/src/lib/openjp2/t1_ht_generate_luts.c b/src/lib/openjp2/t1_ht_generate_luts.c
new file mode 100644
index 000000000..f759cb75d
--- /dev/null
+++ b/src/lib/openjp2/t1_ht_generate_luts.c
@@ -0,0 +1,967 @@
+//***************************************************************************/
+// This software is released under the 2-Clause BSD license, included
+// below.
+//
+// Copyright (c) 2021, Aous Naman 
+// Copyright (c) 2021, Kakadu Software Pty Ltd, Australia
+// Copyright (c) 2021, The University of New South Wales, Australia
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// 
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//***************************************************************************/
+// This file is part of the OpenJpeg software implementation.
+// File: t1_ht_generate_luts.c
+// Author: Aous Naman
+// Date: 01 September 2021
+//***************************************************************************/
+
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+
+typedef int OPJ_BOOL;
+#define OPJ_TRUE 1
+#define OPJ_FALSE 0
+
+#include "opj_stdint.h"
+
+typedef int8_t   OPJ_INT8;
+typedef uint8_t  OPJ_UINT8;
+typedef int16_t  OPJ_INT16;
+typedef uint16_t OPJ_UINT16;
+typedef int32_t  OPJ_INT32;
+typedef uint32_t OPJ_UINT32;
+typedef int64_t  OPJ_INT64;
+typedef uint64_t OPJ_UINT64;
+
+//************************************************************************/
+/** @brief HT decoding tables, as given in the standard
+  *
+  *  Data in the table is arranged in this format:
+  *   c_q is the context for a quad
+  *   rho is the signficance pattern for a quad
+  *   u_off indicate if u value is 0 (u_off is 0), or communicated
+  *   e_k, e_1 EMB patterns
+  *   cwd VLC codeword
+  *   cwd VLC codeword length
+  */
+typedef struct vlc_src_table { int c_q, rho, u_off, e_k, e_1, cwd, cwd_len;}
+vlc_src_table_t;
+
+// initial quad rows
+static vlc_src_table_t tbl0[] = {
+  {0, 0x1, 0x0, 0x0, 0x0, 0x06, 4},
+  {0, 0x1, 0x1, 0x1, 0x1, 0x3F, 7},
+  {0, 0x2, 0x0, 0x0, 0x0, 0x00, 3},
+  {0, 0x2, 0x1, 0x2, 0x2, 0x7F, 7},
+  {0, 0x3, 0x0, 0x0, 0x0, 0x11, 5},
+  {0, 0x3, 0x1, 0x2, 0x2, 0x5F, 7},
+  {0, 0x3, 0x1, 0x3, 0x1, 0x1F, 7},
+  {0, 0x4, 0x0, 0x0, 0x0, 0x02, 3},
+  {0, 0x4, 0x1, 0x4, 0x4, 0x13, 6},
+  {0, 0x5, 0x0, 0x0, 0x0, 0x0E, 5},
+  {0, 0x5, 0x1, 0x4, 0x4, 0x23, 6},
+  {0, 0x5, 0x1, 0x5, 0x1, 0x0F, 7},
+  {0, 0x6, 0x0, 0x0, 0x0, 0x03, 6},
+  {0, 0x6, 0x1, 0x0, 0x0, 0x6F, 7},
+  {0, 0x7, 0x0, 0x0, 0x0, 0x2F, 7},
+  {0, 0x7, 0x1, 0x2, 0x2, 0x4F, 7},
+  {0, 0x7, 0x1, 0x2, 0x0, 0x0D, 6},
+  {0, 0x8, 0x0, 0x0, 0x0, 0x04, 3},
+  {0, 0x8, 0x1, 0x8, 0x8, 0x3D, 6},
+  {0, 0x9, 0x0, 0x0, 0x0, 0x1D, 6},
+  {0, 0x9, 0x1, 0x0, 0x0, 0x2D, 6},
+  {0, 0xA, 0x0, 0x0, 0x0, 0x01, 5},
+  {0, 0xA, 0x1, 0x8, 0x8, 0x35, 6},
+  {0, 0xA, 0x1, 0xA, 0x2, 0x77, 7},
+  {0, 0xB, 0x0, 0x0, 0x0, 0x37, 7},
+  {0, 0xB, 0x1, 0x1, 0x1, 0x57, 7},
+  {0, 0xB, 0x1, 0x1, 0x0, 0x09, 6},
+  {0, 0xC, 0x0, 0x0, 0x0, 0x1E, 5},
+  {0, 0xC, 0x1, 0xC, 0xC, 0x17, 7},
+  {0, 0xC, 0x1, 0xC, 0x4, 0x15, 6},
+  {0, 0xC, 0x1, 0xC, 0x8, 0x25, 6},
+  {0, 0xD, 0x0, 0x0, 0x0, 0x67, 7},
+  {0, 0xD, 0x1, 0x1, 0x1, 0x27, 7},
+  {0, 0xD, 0x1, 0x5, 0x4, 0x47, 7},
+  {0, 0xD, 0x1, 0xD, 0x8, 0x07, 7},
+  {0, 0xE, 0x0, 0x0, 0x0, 0x7B, 7},
+  {0, 0xE, 0x1, 0x2, 0x2, 0x4B, 7},
+  {0, 0xE, 0x1, 0xA, 0x8, 0x05, 6},
+  {0, 0xE, 0x1, 0xE, 0x4, 0x3B, 7},
+  {0, 0xF, 0x0, 0x0, 0x0, 0x5B, 7},
+  {0, 0xF, 0x1, 0x9, 0x9, 0x1B, 7},
+  {0, 0xF, 0x1, 0xB, 0xA, 0x6B, 7},
+  {0, 0xF, 0x1, 0xF, 0xC, 0x2B, 7},
+  {0, 0xF, 0x1, 0xF, 0x8, 0x39, 6},
+  {0, 0xF, 0x1, 0xE, 0x6, 0x73, 7},
+  {0, 0xF, 0x1, 0xE, 0x2, 0x19, 6},
+  {0, 0xF, 0x1, 0xF, 0x5, 0x0B, 7},
+  {0, 0xF, 0x1, 0xF, 0x4, 0x29, 6},
+  {0, 0xF, 0x1, 0xF, 0x1, 0x33, 7},
+  {1, 0x0, 0x0, 0x0, 0x0, 0x00, 2},
+  {1, 0x1, 0x0, 0x0, 0x0, 0x0E, 4},
+  {1, 0x1, 0x1, 0x1, 0x1, 0x1F, 7},
+  {1, 0x2, 0x0, 0x0, 0x0, 0x06, 4},
+  {1, 0x2, 0x1, 0x2, 0x2, 0x3B, 6},
+  {1, 0x3, 0x0, 0x0, 0x0, 0x1B, 6},
+  {1, 0x3, 0x1, 0x0, 0x0, 0x3D, 6},
+  {1, 0x4, 0x0, 0x0, 0x0, 0x0A, 4},
+  {1, 0x4, 0x1, 0x4, 0x4, 0x2B, 6},
+  {1, 0x5, 0x0, 0x0, 0x0, 0x0B, 6},
+  {1, 0x5, 0x1, 0x4, 0x4, 0x33, 6},
+  {1, 0x5, 0x1, 0x5, 0x1, 0x7F, 7},
+  {1, 0x6, 0x0, 0x0, 0x0, 0x13, 6},
+  {1, 0x6, 0x1, 0x0, 0x0, 0x23, 6},
+  {1, 0x7, 0x0, 0x0, 0x0, 0x3F, 7},
+  {1, 0x7, 0x1, 0x2, 0x2, 0x5F, 7},
+  {1, 0x7, 0x1, 0x2, 0x0, 0x03, 6},
+  {1, 0x8, 0x0, 0x0, 0x0, 0x02, 4},
+  {1, 0x8, 0x1, 0x8, 0x8, 0x1D, 6},
+  {1, 0x9, 0x0, 0x0, 0x0, 0x2D, 6},
+  {1, 0x9, 0x1, 0x0, 0x0, 0x0D, 6},
+  {1, 0xA, 0x0, 0x0, 0x0, 0x35, 6},
+  {1, 0xA, 0x1, 0x8, 0x8, 0x15, 6},
+  {1, 0xA, 0x1, 0xA, 0x2, 0x6F, 7},
+  {1, 0xB, 0x0, 0x0, 0x0, 0x2F, 7},
+  {1, 0xB, 0x1, 0x1, 0x1, 0x4F, 7},
+  {1, 0xB, 0x1, 0x1, 0x0, 0x11, 6},
+  {1, 0xC, 0x0, 0x0, 0x0, 0x01, 5},
+  {1, 0xC, 0x1, 0x8, 0x8, 0x25, 6},
+  {1, 0xC, 0x1, 0xC, 0x4, 0x05, 6},
+  {1, 0xD, 0x0, 0x0, 0x0, 0x0F, 7},
+  {1, 0xD, 0x1, 0x1, 0x1, 0x17, 7},
+  {1, 0xD, 0x1, 0x5, 0x4, 0x39, 6},
+  {1, 0xD, 0x1, 0xD, 0x8, 0x77, 7},
+  {1, 0xE, 0x0, 0x0, 0x0, 0x37, 7},
+  {1, 0xE, 0x1, 0x2, 0x2, 0x57, 7},
+  {1, 0xE, 0x1, 0xA, 0x8, 0x19, 6},
+  {1, 0xE, 0x1, 0xE, 0x4, 0x67, 7},
+  {1, 0xF, 0x0, 0x0, 0x0, 0x07, 7},
+  {1, 0xF, 0x1, 0xB, 0x8, 0x29, 6},
+  {1, 0xF, 0x1, 0x8, 0x8, 0x27, 7},
+  {1, 0xF, 0x1, 0xA, 0x2, 0x09, 6},
+  {1, 0xF, 0x1, 0xE, 0x4, 0x31, 6},
+  {1, 0xF, 0x1, 0xF, 0x1, 0x47, 7},
+  {2, 0x0, 0x0, 0x0, 0x0, 0x00, 2},
+  {2, 0x1, 0x0, 0x0, 0x0, 0x0E, 4},
+  {2, 0x1, 0x1, 0x1, 0x1, 0x1B, 6},
+  {2, 0x2, 0x0, 0x0, 0x0, 0x06, 4},
+  {2, 0x2, 0x1, 0x2, 0x2, 0x3F, 7},
+  {2, 0x3, 0x0, 0x0, 0x0, 0x2B, 6},
+  {2, 0x3, 0x1, 0x1, 0x1, 0x33, 6},
+  {2, 0x3, 0x1, 0x3, 0x2, 0x7F, 7},
+  {2, 0x4, 0x0, 0x0, 0x0, 0x0A, 4},
+  {2, 0x4, 0x1, 0x4, 0x4, 0x0B, 6},
+  {2, 0x5, 0x0, 0x0, 0x0, 0x01, 5},
+  {2, 0x5, 0x1, 0x5, 0x5, 0x2F, 7},
+  {2, 0x5, 0x1, 0x5, 0x1, 0x13, 6},
+  {2, 0x5, 0x1, 0x5, 0x4, 0x23, 6},
+  {2, 0x6, 0x0, 0x0, 0x0, 0x03, 6},
+  {2, 0x6, 0x1, 0x0, 0x0, 0x5F, 7},
+  {2, 0x7, 0x0, 0x0, 0x0, 0x1F, 7},
+  {2, 0x7, 0x1, 0x2, 0x2, 0x6F, 7},
+  {2, 0x7, 0x1, 0x3, 0x1, 0x11, 6},
+  {2, 0x7, 0x1, 0x7, 0x4, 0x37, 7},
+  {2, 0x8, 0x0, 0x0, 0x0, 0x02, 4},
+  {2, 0x8, 0x1, 0x8, 0x8, 0x4F, 7},
+  {2, 0x9, 0x0, 0x0, 0x0, 0x3D, 6},
+  {2, 0x9, 0x1, 0x0, 0x0, 0x1D, 6},
+  {2, 0xA, 0x0, 0x0, 0x0, 0x2D, 6},
+  {2, 0xA, 0x1, 0x0, 0x0, 0x0D, 6},
+  {2, 0xB, 0x0, 0x0, 0x0, 0x0F, 7},
+  {2, 0xB, 0x1, 0x2, 0x2, 0x77, 7},
+  {2, 0xB, 0x1, 0x2, 0x0, 0x35, 6},
+  {2, 0xC, 0x0, 0x0, 0x0, 0x15, 6},
+  {2, 0xC, 0x1, 0x4, 0x4, 0x25, 6},
+  {2, 0xC, 0x1, 0xC, 0x8, 0x57, 7},
+  {2, 0xD, 0x0, 0x0, 0x0, 0x17, 7},
+  {2, 0xD, 0x1, 0x8, 0x8, 0x05, 6},
+  {2, 0xD, 0x1, 0xC, 0x4, 0x39, 6},
+  {2, 0xD, 0x1, 0xD, 0x1, 0x67, 7},
+  {2, 0xE, 0x0, 0x0, 0x0, 0x27, 7},
+  {2, 0xE, 0x1, 0x2, 0x2, 0x7B, 7},
+  {2, 0xE, 0x1, 0x2, 0x0, 0x19, 6},
+  {2, 0xF, 0x0, 0x0, 0x0, 0x47, 7},
+  {2, 0xF, 0x1, 0xF, 0x1, 0x29, 6},
+  {2, 0xF, 0x1, 0x1, 0x1, 0x09, 6},
+  {2, 0xF, 0x1, 0x3, 0x2, 0x07, 7},
+  {2, 0xF, 0x1, 0x7, 0x4, 0x31, 6},
+  {2, 0xF, 0x1, 0xF, 0x8, 0x3B, 7},
+  {3, 0x0, 0x0, 0x0, 0x0, 0x00, 3},
+  {3, 0x1, 0x0, 0x0, 0x0, 0x04, 4},
+  {3, 0x1, 0x1, 0x1, 0x1, 0x3D, 6},
+  {3, 0x2, 0x0, 0x0, 0x0, 0x0C, 5},
+  {3, 0x2, 0x1, 0x2, 0x2, 0x4F, 7},
+  {3, 0x3, 0x0, 0x0, 0x0, 0x1D, 6},
+  {3, 0x3, 0x1, 0x1, 0x1, 0x05, 6},
+  {3, 0x3, 0x1, 0x3, 0x2, 0x7F, 7},
+  {3, 0x4, 0x0, 0x0, 0x0, 0x16, 5},
+  {3, 0x4, 0x1, 0x4, 0x4, 0x2D, 6},
+  {3, 0x5, 0x0, 0x0, 0x0, 0x06, 5},
+  {3, 0x5, 0x1, 0x5, 0x5, 0x1A, 5},
+  {3, 0x5, 0x1, 0x5, 0x1, 0x0D, 6},
+  {3, 0x5, 0x1, 0x5, 0x4, 0x35, 6},
+  {3, 0x6, 0x0, 0x0, 0x0, 0x3F, 7},
+  {3, 0x6, 0x1, 0x4, 0x4, 0x5F, 7},
+  {3, 0x6, 0x1, 0x6, 0x2, 0x1F, 7},
+  {3, 0x7, 0x0, 0x0, 0x0, 0x6F, 7},
+  {3, 0x7, 0x1, 0x6, 0x6, 0x2F, 7},
+  {3, 0x7, 0x1, 0x6, 0x4, 0x15, 6},
+  {3, 0x7, 0x1, 0x7, 0x3, 0x77, 7},
+  {3, 0x7, 0x1, 0x7, 0x1, 0x25, 6},
+  {3, 0x7, 0x1, 0x7, 0x2, 0x0F, 7},
+  {3, 0x8, 0x0, 0x0, 0x0, 0x0A, 5},
+  {3, 0x8, 0x1, 0x8, 0x8, 0x07, 7},
+  {3, 0x9, 0x0, 0x0, 0x0, 0x39, 6},
+  {3, 0x9, 0x1, 0x1, 0x1, 0x37, 7},
+  {3, 0x9, 0x1, 0x9, 0x8, 0x57, 7},
+  {3, 0xA, 0x0, 0x0, 0x0, 0x19, 6},
+  {3, 0xA, 0x1, 0x8, 0x8, 0x29, 6},
+  {3, 0xA, 0x1, 0xA, 0x2, 0x17, 7},
+  {3, 0xB, 0x0, 0x0, 0x0, 0x67, 7},
+  {3, 0xB, 0x1, 0xB, 0x1, 0x27, 7},
+  {3, 0xB, 0x1, 0x1, 0x1, 0x47, 7},
+  {3, 0xB, 0x1, 0x3, 0x2, 0x09, 6},
+  {3, 0xB, 0x1, 0xB, 0x8, 0x7B, 7},
+  {3, 0xC, 0x0, 0x0, 0x0, 0x31, 6},
+  {3, 0xC, 0x1, 0x4, 0x4, 0x11, 6},
+  {3, 0xC, 0x1, 0xC, 0x8, 0x3B, 7},
+  {3, 0xD, 0x0, 0x0, 0x0, 0x5B, 7},
+  {3, 0xD, 0x1, 0x9, 0x9, 0x1B, 7},
+  {3, 0xD, 0x1, 0xD, 0x5, 0x2B, 7},
+  {3, 0xD, 0x1, 0xD, 0x1, 0x21, 6},
+  {3, 0xD, 0x1, 0xD, 0xC, 0x6B, 7},
+  {3, 0xD, 0x1, 0xD, 0x4, 0x01, 6},
+  {3, 0xD, 0x1, 0xD, 0x8, 0x4B, 7},
+  {3, 0xE, 0x0, 0x0, 0x0, 0x0B, 7},
+  {3, 0xE, 0x1, 0xE, 0x4, 0x73, 7},
+  {3, 0xE, 0x1, 0x4, 0x4, 0x13, 7},
+  {3, 0xE, 0x1, 0xC, 0x8, 0x3E, 6},
+  {3, 0xE, 0x1, 0xE, 0x2, 0x33, 7},
+  {3, 0xF, 0x0, 0x0, 0x0, 0x53, 7},
+  {3, 0xF, 0x1, 0xA, 0xA, 0x0E, 6},
+  {3, 0xF, 0x1, 0xB, 0x9, 0x63, 7},
+  {3, 0xF, 0x1, 0xF, 0xC, 0x03, 7},
+  {3, 0xF, 0x1, 0xF, 0x8, 0x12, 5},
+  {3, 0xF, 0x1, 0xE, 0x6, 0x23, 7},
+  {3, 0xF, 0x1, 0xF, 0x5, 0x1E, 6},
+  {3, 0xF, 0x1, 0xF, 0x4, 0x02, 5},
+  {3, 0xF, 0x1, 0xF, 0x3, 0x43, 7},
+  {3, 0xF, 0x1, 0xF, 0x1, 0x1C, 5},
+  {3, 0xF, 0x1, 0xF, 0x2, 0x2E, 6},
+  {4, 0x0, 0x0, 0x0, 0x0, 0x00, 2},
+  {4, 0x1, 0x0, 0x0, 0x0, 0x0E, 4},
+  {4, 0x1, 0x1, 0x1, 0x1, 0x3F, 7},
+  {4, 0x2, 0x0, 0x0, 0x0, 0x06, 4},
+  {4, 0x2, 0x1, 0x2, 0x2, 0x1B, 6},
+  {4, 0x3, 0x0, 0x0, 0x0, 0x2B, 6},
+  {4, 0x3, 0x1, 0x2, 0x2, 0x3D, 6},
+  {4, 0x3, 0x1, 0x3, 0x1, 0x7F, 7},
+  {4, 0x4, 0x0, 0x0, 0x0, 0x0A, 4},
+  {4, 0x4, 0x1, 0x4, 0x4, 0x5F, 7},
+  {4, 0x5, 0x0, 0x0, 0x0, 0x0B, 6},
+  {4, 0x5, 0x1, 0x0, 0x0, 0x33, 6},
+  {4, 0x6, 0x0, 0x0, 0x0, 0x13, 6},
+  {4, 0x6, 0x1, 0x0, 0x0, 0x23, 6},
+  {4, 0x7, 0x0, 0x0, 0x0, 0x1F, 7},
+  {4, 0x7, 0x1, 0x4, 0x4, 0x6F, 7},
+  {4, 0x7, 0x1, 0x4, 0x0, 0x03, 6},
+  {4, 0x8, 0x0, 0x0, 0x0, 0x02, 4},
+  {4, 0x8, 0x1, 0x8, 0x8, 0x1D, 6},
+  {4, 0x9, 0x0, 0x0, 0x0, 0x11, 6},
+  {4, 0x9, 0x1, 0x0, 0x0, 0x77, 7},
+  {4, 0xA, 0x0, 0x0, 0x0, 0x01, 5},
+  {4, 0xA, 0x1, 0xA, 0xA, 0x2F, 7},
+  {4, 0xA, 0x1, 0xA, 0x2, 0x2D, 6},
+  {4, 0xA, 0x1, 0xA, 0x8, 0x0D, 6},
+  {4, 0xB, 0x0, 0x0, 0x0, 0x4F, 7},
+  {4, 0xB, 0x1, 0xB, 0x2, 0x0F, 7},
+  {4, 0xB, 0x1, 0x0, 0x0, 0x35, 6},
+  {4, 0xC, 0x0, 0x0, 0x0, 0x15, 6},
+  {4, 0xC, 0x1, 0x8, 0x8, 0x25, 6},
+  {4, 0xC, 0x1, 0xC, 0x4, 0x37, 7},
+  {4, 0xD, 0x0, 0x0, 0x0, 0x57, 7},
+  {4, 0xD, 0x1, 0x1, 0x1, 0x07, 7},
+  {4, 0xD, 0x1, 0x1, 0x0, 0x05, 6},
+  {4, 0xE, 0x0, 0x0, 0x0, 0x17, 7},
+  {4, 0xE, 0x1, 0x4, 0x4, 0x39, 6},
+  {4, 0xE, 0x1, 0xC, 0x8, 0x19, 6},
+  {4, 0xE, 0x1, 0xE, 0x2, 0x67, 7},
+  {4, 0xF, 0x0, 0x0, 0x0, 0x27, 7},
+  {4, 0xF, 0x1, 0x9, 0x9, 0x47, 7},
+  {4, 0xF, 0x1, 0x9, 0x1, 0x29, 6},
+  {4, 0xF, 0x1, 0x7, 0x6, 0x7B, 7},
+  {4, 0xF, 0x1, 0x7, 0x2, 0x09, 6},
+  {4, 0xF, 0x1, 0xB, 0x8, 0x31, 6},
+  {4, 0xF, 0x1, 0xF, 0x4, 0x3B, 7},
+  {5, 0x0, 0x0, 0x0, 0x0, 0x00, 3},
+  {5, 0x1, 0x0, 0x0, 0x0, 0x1A, 5},
+  {5, 0x1, 0x1, 0x1, 0x1, 0x7F, 7},
+  {5, 0x2, 0x0, 0x0, 0x0, 0x0A, 5},
+  {5, 0x2, 0x1, 0x2, 0x2, 0x1D, 6},
+  {5, 0x3, 0x0, 0x0, 0x0, 0x2D, 6},
+  {5, 0x3, 0x1, 0x3, 0x3, 0x5F, 7},
+  {5, 0x3, 0x1, 0x3, 0x2, 0x39, 6},
+  {5, 0x3, 0x1, 0x3, 0x1, 0x3F, 7},
+  {5, 0x4, 0x0, 0x0, 0x0, 0x12, 5},
+  {5, 0x4, 0x1, 0x4, 0x4, 0x1F, 7},
+  {5, 0x5, 0x0, 0x0, 0x0, 0x0D, 6},
+  {5, 0x5, 0x1, 0x4, 0x4, 0x35, 6},
+  {5, 0x5, 0x1, 0x5, 0x1, 0x6F, 7},
+  {5, 0x6, 0x0, 0x0, 0x0, 0x15, 6},
+  {5, 0x6, 0x1, 0x2, 0x2, 0x25, 6},
+  {5, 0x6, 0x1, 0x6, 0x4, 0x2F, 7},
+  {5, 0x7, 0x0, 0x0, 0x0, 0x4F, 7},
+  {5, 0x7, 0x1, 0x6, 0x6, 0x57, 7},
+  {5, 0x7, 0x1, 0x6, 0x4, 0x05, 6},
+  {5, 0x7, 0x1, 0x7, 0x3, 0x0F, 7},
+  {5, 0x7, 0x1, 0x7, 0x2, 0x77, 7},
+  {5, 0x7, 0x1, 0x7, 0x1, 0x37, 7},
+  {5, 0x8, 0x0, 0x0, 0x0, 0x02, 5},
+  {5, 0x8, 0x1, 0x8, 0x8, 0x19, 6},
+  {5, 0x9, 0x0, 0x0, 0x0, 0x26, 6},
+  {5, 0x9, 0x1, 0x8, 0x8, 0x17, 7},
+  {5, 0x9, 0x1, 0x9, 0x1, 0x67, 7},
+  {5, 0xA, 0x0, 0x0, 0x0, 0x1C, 5},
+  {5, 0xA, 0x1, 0xA, 0xA, 0x29, 6},
+  {5, 0xA, 0x1, 0xA, 0x2, 0x09, 6},
+  {5, 0xA, 0x1, 0xA, 0x8, 0x31, 6},
+  {5, 0xB, 0x0, 0x0, 0x0, 0x27, 7},
+  {5, 0xB, 0x1, 0x9, 0x9, 0x07, 7},
+  {5, 0xB, 0x1, 0x9, 0x8, 0x11, 6},
+  {5, 0xB, 0x1, 0xB, 0x3, 0x47, 7},
+  {5, 0xB, 0x1, 0xB, 0x2, 0x21, 6},
+  {5, 0xB, 0x1, 0xB, 0x1, 0x7B, 7},
+  {5, 0xC, 0x0, 0x0, 0x0, 0x01, 6},
+  {5, 0xC, 0x1, 0x8, 0x8, 0x3E, 6},
+  {5, 0xC, 0x1, 0xC, 0x4, 0x3B, 7},
+  {5, 0xD, 0x0, 0x0, 0x0, 0x5B, 7},
+  {5, 0xD, 0x1, 0x9, 0x9, 0x6B, 7},
+  {5, 0xD, 0x1, 0x9, 0x8, 0x1E, 6},
+  {5, 0xD, 0x1, 0xD, 0x5, 0x1B, 7},
+  {5, 0xD, 0x1, 0xD, 0x4, 0x2E, 6},
+  {5, 0xD, 0x1, 0xD, 0x1, 0x2B, 7},
+  {5, 0xE, 0x0, 0x0, 0x0, 0x4B, 7},
+  {5, 0xE, 0x1, 0x6, 0x6, 0x0B, 7},
+  {5, 0xE, 0x1, 0xE, 0xA, 0x33, 7},
+  {5, 0xE, 0x1, 0xE, 0x2, 0x0E, 6},
+  {5, 0xE, 0x1, 0xE, 0xC, 0x73, 7},
+  {5, 0xE, 0x1, 0xE, 0x8, 0x36, 6},
+  {5, 0xE, 0x1, 0xE, 0x4, 0x53, 7},
+  {5, 0xF, 0x0, 0x0, 0x0, 0x13, 7},
+  {5, 0xF, 0x1, 0x7, 0x7, 0x43, 7},
+  {5, 0xF, 0x1, 0x7, 0x6, 0x16, 6},
+  {5, 0xF, 0x1, 0x7, 0x5, 0x63, 7},
+  {5, 0xF, 0x1, 0xF, 0xC, 0x23, 7},
+  {5, 0xF, 0x1, 0xF, 0x4, 0x0C, 5},
+  {5, 0xF, 0x1, 0xD, 0x9, 0x03, 7},
+  {5, 0xF, 0x1, 0xF, 0xA, 0x3D, 7},
+  {5, 0xF, 0x1, 0xF, 0x8, 0x14, 5},
+  {5, 0xF, 0x1, 0xF, 0x3, 0x7D, 7},
+  {5, 0xF, 0x1, 0xF, 0x2, 0x04, 5},
+  {5, 0xF, 0x1, 0xF, 0x1, 0x06, 6},
+  {6, 0x0, 0x0, 0x0, 0x0, 0x00, 3},
+  {6, 0x1, 0x0, 0x0, 0x0, 0x04, 4},
+  {6, 0x1, 0x1, 0x1, 0x1, 0x03, 6},
+  {6, 0x2, 0x0, 0x0, 0x0, 0x0C, 5},
+  {6, 0x2, 0x1, 0x2, 0x2, 0x0D, 6},
+  {6, 0x3, 0x0, 0x0, 0x0, 0x1A, 5},
+  {6, 0x3, 0x1, 0x3, 0x3, 0x3D, 6},
+  {6, 0x3, 0x1, 0x3, 0x1, 0x1D, 6},
+  {6, 0x3, 0x1, 0x3, 0x2, 0x2D, 6},
+  {6, 0x4, 0x0, 0x0, 0x0, 0x0A, 5},
+  {6, 0x4, 0x1, 0x4, 0x4, 0x3F, 7},
+  {6, 0x5, 0x0, 0x0, 0x0, 0x35, 6},
+  {6, 0x5, 0x1, 0x1, 0x1, 0x15, 6},
+  {6, 0x5, 0x1, 0x5, 0x4, 0x7F, 7},
+  {6, 0x6, 0x0, 0x0, 0x0, 0x25, 6},
+  {6, 0x6, 0x1, 0x2, 0x2, 0x5F, 7},
+  {6, 0x6, 0x1, 0x6, 0x4, 0x1F, 7},
+  {6, 0x7, 0x0, 0x0, 0x0, 0x6F, 7},
+  {6, 0x7, 0x1, 0x6, 0x6, 0x4F, 7},
+  {6, 0x7, 0x1, 0x6, 0x4, 0x05, 6},
+  {6, 0x7, 0x1, 0x7, 0x3, 0x2F, 7},
+  {6, 0x7, 0x1, 0x7, 0x1, 0x36, 6},
+  {6, 0x7, 0x1, 0x7, 0x2, 0x77, 7},
+  {6, 0x8, 0x0, 0x0, 0x0, 0x12, 5},
+  {6, 0x8, 0x1, 0x8, 0x8, 0x0F, 7},
+  {6, 0x9, 0x0, 0x0, 0x0, 0x39, 6},
+  {6, 0x9, 0x1, 0x1, 0x1, 0x37, 7},
+  {6, 0x9, 0x1, 0x9, 0x8, 0x57, 7},
+  {6, 0xA, 0x0, 0x0, 0x0, 0x19, 6},
+  {6, 0xA, 0x1, 0x2, 0x2, 0x29, 6},
+  {6, 0xA, 0x1, 0xA, 0x8, 0x17, 7},
+  {6, 0xB, 0x0, 0x0, 0x0, 0x67, 7},
+  {6, 0xB, 0x1, 0x9, 0x9, 0x47, 7},
+  {6, 0xB, 0x1, 0x9, 0x1, 0x09, 6},
+  {6, 0xB, 0x1, 0xB, 0xA, 0x27, 7},
+  {6, 0xB, 0x1, 0xB, 0x2, 0x31, 6},
+  {6, 0xB, 0x1, 0xB, 0x8, 0x7B, 7},
+  {6, 0xC, 0x0, 0x0, 0x0, 0x11, 6},
+  {6, 0xC, 0x1, 0xC, 0xC, 0x07, 7},
+  {6, 0xC, 0x1, 0xC, 0x8, 0x21, 6},
+  {6, 0xC, 0x1, 0xC, 0x4, 0x3B, 7},
+  {6, 0xD, 0x0, 0x0, 0x0, 0x5B, 7},
+  {6, 0xD, 0x1, 0x5, 0x5, 0x33, 7},
+  {6, 0xD, 0x1, 0x5, 0x4, 0x01, 6},
+  {6, 0xD, 0x1, 0xC, 0x8, 0x1B, 7},
+  {6, 0xD, 0x1, 0xD, 0x1, 0x6B, 7},
+  {6, 0xE, 0x0, 0x0, 0x0, 0x2B, 7},
+  {6, 0xE, 0x1, 0xE, 0x2, 0x4B, 7},
+  {6, 0xE, 0x1, 0x2, 0x2, 0x0B, 7},
+  {6, 0xE, 0x1, 0xE, 0xC, 0x73, 7},
+  {6, 0xE, 0x1, 0xE, 0x8, 0x3E, 6},
+  {6, 0xE, 0x1, 0xE, 0x4, 0x53, 7},
+  {6, 0xF, 0x0, 0x0, 0x0, 0x13, 7},
+  {6, 0xF, 0x1, 0x6, 0x6, 0x1E, 6},
+  {6, 0xF, 0x1, 0xE, 0xA, 0x2E, 6},
+  {6, 0xF, 0x1, 0xF, 0x3, 0x0E, 6},
+  {6, 0xF, 0x1, 0xF, 0x2, 0x02, 5},
+  {6, 0xF, 0x1, 0xB, 0x9, 0x63, 7},
+  {6, 0xF, 0x1, 0xF, 0xC, 0x16, 6},
+  {6, 0xF, 0x1, 0xF, 0x8, 0x06, 6},
+  {6, 0xF, 0x1, 0xF, 0x5, 0x23, 7},
+  {6, 0xF, 0x1, 0xF, 0x1, 0x1C, 5},
+  {6, 0xF, 0x1, 0xF, 0x4, 0x26, 6},
+  {7, 0x0, 0x0, 0x0, 0x0, 0x12, 5},
+  {7, 0x1, 0x0, 0x0, 0x0, 0x05, 6},
+  {7, 0x1, 0x1, 0x1, 0x1, 0x7F, 7},
+  {7, 0x2, 0x0, 0x0, 0x0, 0x39, 6},
+  {7, 0x2, 0x1, 0x2, 0x2, 0x3F, 7},
+  {7, 0x3, 0x0, 0x0, 0x0, 0x5F, 7},
+  {7, 0x3, 0x1, 0x3, 0x3, 0x1F, 7},
+  {7, 0x3, 0x1, 0x3, 0x2, 0x6F, 7},
+  {7, 0x3, 0x1, 0x3, 0x1, 0x2F, 7},
+  {7, 0x4, 0x0, 0x0, 0x0, 0x4F, 7},
+  {7, 0x4, 0x1, 0x4, 0x4, 0x0F, 7},
+  {7, 0x5, 0x0, 0x0, 0x0, 0x57, 7},
+  {7, 0x5, 0x1, 0x1, 0x1, 0x19, 6},
+  {7, 0x5, 0x1, 0x5, 0x4, 0x77, 7},
+  {7, 0x6, 0x0, 0x0, 0x0, 0x37, 7},
+  {7, 0x6, 0x1, 0x0, 0x0, 0x29, 6},
+  {7, 0x7, 0x0, 0x0, 0x0, 0x17, 7},
+  {7, 0x7, 0x1, 0x6, 0x6, 0x67, 7},
+  {7, 0x7, 0x1, 0x7, 0x3, 0x27, 7},
+  {7, 0x7, 0x1, 0x7, 0x2, 0x47, 7},
+  {7, 0x7, 0x1, 0x7, 0x5, 0x1B, 7},
+  {7, 0x7, 0x1, 0x7, 0x1, 0x09, 6},
+  {7, 0x7, 0x1, 0x7, 0x4, 0x07, 7},
+  {7, 0x8, 0x0, 0x0, 0x0, 0x7B, 7},
+  {7, 0x8, 0x1, 0x8, 0x8, 0x3B, 7},
+  {7, 0x9, 0x0, 0x0, 0x0, 0x5B, 7},
+  {7, 0x9, 0x1, 0x0, 0x0, 0x31, 6},
+  {7, 0xA, 0x0, 0x0, 0x0, 0x53, 7},
+  {7, 0xA, 0x1, 0x2, 0x2, 0x11, 6},
+  {7, 0xA, 0x1, 0xA, 0x8, 0x6B, 7},
+  {7, 0xB, 0x0, 0x0, 0x0, 0x2B, 7},
+  {7, 0xB, 0x1, 0x9, 0x9, 0x4B, 7},
+  {7, 0xB, 0x1, 0xB, 0x3, 0x0B, 7},
+  {7, 0xB, 0x1, 0xB, 0x1, 0x73, 7},
+  {7, 0xB, 0x1, 0xB, 0xA, 0x33, 7},
+  {7, 0xB, 0x1, 0xB, 0x2, 0x21, 6},
+  {7, 0xB, 0x1, 0xB, 0x8, 0x13, 7},
+  {7, 0xC, 0x0, 0x0, 0x0, 0x63, 7},
+  {7, 0xC, 0x1, 0x8, 0x8, 0x23, 7},
+  {7, 0xC, 0x1, 0xC, 0x4, 0x43, 7},
+  {7, 0xD, 0x0, 0x0, 0x0, 0x03, 7},
+  {7, 0xD, 0x1, 0x9, 0x9, 0x7D, 7},
+  {7, 0xD, 0x1, 0xD, 0x5, 0x5D, 7},
+  {7, 0xD, 0x1, 0xD, 0x1, 0x01, 6},
+  {7, 0xD, 0x1, 0xD, 0xC, 0x3D, 7},
+  {7, 0xD, 0x1, 0xD, 0x4, 0x3E, 6},
+  {7, 0xD, 0x1, 0xD, 0x8, 0x1D, 7},
+  {7, 0xE, 0x0, 0x0, 0x0, 0x6D, 7},
+  {7, 0xE, 0x1, 0x6, 0x6, 0x2D, 7},
+  {7, 0xE, 0x1, 0xE, 0xA, 0x0D, 7},
+  {7, 0xE, 0x1, 0xE, 0x2, 0x1E, 6},
+  {7, 0xE, 0x1, 0xE, 0xC, 0x4D, 7},
+  {7, 0xE, 0x1, 0xE, 0x8, 0x0E, 6},
+  {7, 0xE, 0x1, 0xE, 0x4, 0x75, 7},
+  {7, 0xF, 0x0, 0x0, 0x0, 0x15, 7},
+  {7, 0xF, 0x1, 0xF, 0xF, 0x06, 5},
+  {7, 0xF, 0x1, 0xF, 0xD, 0x35, 7},
+  {7, 0xF, 0x1, 0xF, 0x7, 0x55, 7},
+  {7, 0xF, 0x1, 0xF, 0x5, 0x1A, 5},
+  {7, 0xF, 0x1, 0xF, 0xB, 0x25, 7},
+  {7, 0xF, 0x1, 0xF, 0x3, 0x0A, 5},
+  {7, 0xF, 0x1, 0xF, 0x9, 0x2E, 6},
+  {7, 0xF, 0x1, 0xF, 0x1, 0x00, 4},
+  {7, 0xF, 0x1, 0xF, 0xE, 0x65, 7},
+  {7, 0xF, 0x1, 0xF, 0x6, 0x36, 6},
+  {7, 0xF, 0x1, 0xF, 0xA, 0x02, 5},
+  {7, 0xF, 0x1, 0xF, 0x2, 0x0C, 4},
+  {7, 0xF, 0x1, 0xF, 0xC, 0x16, 6},
+  {7, 0xF, 0x1, 0xF, 0x8, 0x04, 4},
+  {7, 0xF, 0x1, 0xF, 0x4, 0x08, 4}
+};
+
+// nono-initial quad rows
+static vlc_src_table_t tbl1[] = {
+  {0, 0x1, 0x0, 0x0, 0x0, 0x00, 3},
+  {0, 0x1, 0x1, 0x1, 0x1, 0x27, 6},
+  {0, 0x2, 0x0, 0x0, 0x0, 0x06, 3},
+  {0, 0x2, 0x1, 0x2, 0x2, 0x17, 6},
+  {0, 0x3, 0x0, 0x0, 0x0, 0x0D, 5},
+  {0, 0x3, 0x1, 0x0, 0x0, 0x3B, 6},
+  {0, 0x4, 0x0, 0x0, 0x0, 0x02, 3},
+  {0, 0x4, 0x1, 0x4, 0x4, 0x07, 6},
+  {0, 0x5, 0x0, 0x0, 0x0, 0x15, 5},
+  {0, 0x5, 0x1, 0x0, 0x0, 0x2B, 6},
+  {0, 0x6, 0x0, 0x0, 0x0, 0x01, 5},
+  {0, 0x6, 0x1, 0x0, 0x0, 0x7F, 7},
+  {0, 0x7, 0x0, 0x0, 0x0, 0x1F, 7},
+  {0, 0x7, 0x1, 0x0, 0x0, 0x1B, 6},
+  {0, 0x8, 0x0, 0x0, 0x0, 0x04, 3},
+  {0, 0x8, 0x1, 0x8, 0x8, 0x05, 5},
+  {0, 0x9, 0x0, 0x0, 0x0, 0x19, 5},
+  {0, 0x9, 0x1, 0x0, 0x0, 0x13, 6},
+  {0, 0xA, 0x0, 0x0, 0x0, 0x09, 5},
+  {0, 0xA, 0x1, 0x8, 0x8, 0x0B, 6},
+  {0, 0xA, 0x1, 0xA, 0x2, 0x3F, 7},
+  {0, 0xB, 0x0, 0x0, 0x0, 0x5F, 7},
+  {0, 0xB, 0x1, 0x0, 0x0, 0x33, 6},
+  {0, 0xC, 0x0, 0x0, 0x0, 0x11, 5},
+  {0, 0xC, 0x1, 0x8, 0x8, 0x23, 6},
+  {0, 0xC, 0x1, 0xC, 0x4, 0x6F, 7},
+  {0, 0xD, 0x0, 0x0, 0x0, 0x0F, 7},
+  {0, 0xD, 0x1, 0x0, 0x0, 0x03, 6},
+  {0, 0xE, 0x0, 0x0, 0x0, 0x2F, 7},
+  {0, 0xE, 0x1, 0x4, 0x4, 0x4F, 7},
+  {0, 0xE, 0x1, 0x4, 0x0, 0x3D, 6},
+  {0, 0xF, 0x0, 0x0, 0x0, 0x77, 7},
+  {0, 0xF, 0x1, 0x1, 0x1, 0x37, 7},
+  {0, 0xF, 0x1, 0x1, 0x0, 0x1D, 6},
+  {1, 0x0, 0x0, 0x0, 0x0, 0x00, 1},
+  {1, 0x1, 0x0, 0x0, 0x0, 0x05, 4},
+  {1, 0x1, 0x1, 0x1, 0x1, 0x7F, 7},
+  {1, 0x2, 0x0, 0x0, 0x0, 0x09, 4},
+  {1, 0x2, 0x1, 0x2, 0x2, 0x1F, 7},
+  {1, 0x3, 0x0, 0x0, 0x0, 0x1D, 5},
+  {1, 0x3, 0x1, 0x1, 0x1, 0x3F, 7},
+  {1, 0x3, 0x1, 0x3, 0x2, 0x5F, 7},
+  {1, 0x4, 0x0, 0x0, 0x0, 0x0D, 5},
+  {1, 0x4, 0x1, 0x4, 0x4, 0x37, 7},
+  {1, 0x5, 0x0, 0x0, 0x0, 0x03, 6},
+  {1, 0x5, 0x1, 0x0, 0x0, 0x6F, 7},
+  {1, 0x6, 0x0, 0x0, 0x0, 0x2F, 7},
+  {1, 0x6, 0x1, 0x0, 0x0, 0x4F, 7},
+  {1, 0x7, 0x0, 0x0, 0x0, 0x0F, 7},
+  {1, 0x7, 0x1, 0x0, 0x0, 0x77, 7},
+  {1, 0x8, 0x0, 0x0, 0x0, 0x01, 4},
+  {1, 0x8, 0x1, 0x8, 0x8, 0x17, 7},
+  {1, 0x9, 0x0, 0x0, 0x0, 0x0B, 6},
+  {1, 0x9, 0x1, 0x0, 0x0, 0x57, 7},
+  {1, 0xA, 0x0, 0x0, 0x0, 0x33, 6},
+  {1, 0xA, 0x1, 0x0, 0x0, 0x67, 7},
+  {1, 0xB, 0x0, 0x0, 0x0, 0x27, 7},
+  {1, 0xB, 0x1, 0x0, 0x0, 0x2B, 7},
+  {1, 0xC, 0x0, 0x0, 0x0, 0x13, 6},
+  {1, 0xC, 0x1, 0x0, 0x0, 0x47, 7},
+  {1, 0xD, 0x0, 0x0, 0x0, 0x07, 7},
+  {1, 0xD, 0x1, 0x0, 0x0, 0x7B, 7},
+  {1, 0xE, 0x0, 0x0, 0x0, 0x3B, 7},
+  {1, 0xE, 0x1, 0x0, 0x0, 0x5B, 7},
+  {1, 0xF, 0x0, 0x0, 0x0, 0x1B, 7},
+  {1, 0xF, 0x1, 0x4, 0x4, 0x6B, 7},
+  {1, 0xF, 0x1, 0x4, 0x0, 0x23, 6},
+  {2, 0x0, 0x0, 0x0, 0x0, 0x00, 1},
+  {2, 0x1, 0x0, 0x0, 0x0, 0x09, 4},
+  {2, 0x1, 0x1, 0x1, 0x1, 0x7F, 7},
+  {2, 0x2, 0x0, 0x0, 0x0, 0x01, 4},
+  {2, 0x2, 0x1, 0x2, 0x2, 0x23, 6},
+  {2, 0x3, 0x0, 0x0, 0x0, 0x3D, 6},
+  {2, 0x3, 0x1, 0x2, 0x2, 0x3F, 7},
+  {2, 0x3, 0x1, 0x3, 0x1, 0x1F, 7},
+  {2, 0x4, 0x0, 0x0, 0x0, 0x15, 5},
+  {2, 0x4, 0x1, 0x4, 0x4, 0x5F, 7},
+  {2, 0x5, 0x0, 0x0, 0x0, 0x03, 6},
+  {2, 0x5, 0x1, 0x0, 0x0, 0x6F, 7},
+  {2, 0x6, 0x0, 0x0, 0x0, 0x2F, 7},
+  {2, 0x6, 0x1, 0x0, 0x0, 0x4F, 7},
+  {2, 0x7, 0x0, 0x0, 0x0, 0x0F, 7},
+  {2, 0x7, 0x1, 0x0, 0x0, 0x17, 7},
+  {2, 0x8, 0x0, 0x0, 0x0, 0x05, 5},
+  {2, 0x8, 0x1, 0x8, 0x8, 0x77, 7},
+  {2, 0x9, 0x0, 0x0, 0x0, 0x37, 7},
+  {2, 0x9, 0x1, 0x0, 0x0, 0x57, 7},
+  {2, 0xA, 0x0, 0x0, 0x0, 0x1D, 6},
+  {2, 0xA, 0x1, 0xA, 0xA, 0x7B, 7},
+  {2, 0xA, 0x1, 0xA, 0x2, 0x2D, 6},
+  {2, 0xA, 0x1, 0xA, 0x8, 0x67, 7},
+  {2, 0xB, 0x0, 0x0, 0x0, 0x27, 7},
+  {2, 0xB, 0x1, 0xB, 0x2, 0x47, 7},
+  {2, 0xB, 0x1, 0x0, 0x0, 0x07, 7},
+  {2, 0xC, 0x0, 0x0, 0x0, 0x0D, 6},
+  {2, 0xC, 0x1, 0x0, 0x0, 0x3B, 7},
+  {2, 0xD, 0x0, 0x0, 0x0, 0x5B, 7},
+  {2, 0xD, 0x1, 0x0, 0x0, 0x1B, 7},
+  {2, 0xE, 0x0, 0x0, 0x0, 0x6B, 7},
+  {2, 0xE, 0x1, 0x4, 0x4, 0x2B, 7},
+  {2, 0xE, 0x1, 0x4, 0x0, 0x4B, 7},
+  {2, 0xF, 0x0, 0x0, 0x0, 0x0B, 7},
+  {2, 0xF, 0x1, 0x4, 0x4, 0x73, 7},
+  {2, 0xF, 0x1, 0x5, 0x1, 0x33, 7},
+  {2, 0xF, 0x1, 0x7, 0x2, 0x53, 7},
+  {2, 0xF, 0x1, 0xF, 0x8, 0x13, 7},
+  {3, 0x0, 0x0, 0x0, 0x0, 0x00, 2},
+  {3, 0x1, 0x0, 0x0, 0x0, 0x0A, 4},
+  {3, 0x1, 0x1, 0x1, 0x1, 0x0B, 6},
+  {3, 0x2, 0x0, 0x0, 0x0, 0x02, 4},
+  {3, 0x2, 0x1, 0x2, 0x2, 0x23, 6},
+  {3, 0x3, 0x0, 0x0, 0x0, 0x0E, 5},
+  {3, 0x3, 0x1, 0x3, 0x3, 0x7F, 7},
+  {3, 0x3, 0x1, 0x3, 0x2, 0x33, 6},
+  {3, 0x3, 0x1, 0x3, 0x1, 0x13, 6},
+  {3, 0x4, 0x0, 0x0, 0x0, 0x16, 5},
+  {3, 0x4, 0x1, 0x4, 0x4, 0x3F, 7},
+  {3, 0x5, 0x0, 0x0, 0x0, 0x03, 6},
+  {3, 0x5, 0x1, 0x1, 0x1, 0x3D, 6},
+  {3, 0x5, 0x1, 0x5, 0x4, 0x1F, 7},
+  {3, 0x6, 0x0, 0x0, 0x0, 0x1D, 6},
+  {3, 0x6, 0x1, 0x0, 0x0, 0x5F, 7},
+  {3, 0x7, 0x0, 0x0, 0x0, 0x2D, 6},
+  {3, 0x7, 0x1, 0x4, 0x4, 0x2F, 7},
+  {3, 0x7, 0x1, 0x5, 0x1, 0x1E, 6},
+  {3, 0x7, 0x1, 0x7, 0x2, 0x6F, 7},
+  {3, 0x8, 0x0, 0x0, 0x0, 0x06, 5},
+  {3, 0x8, 0x1, 0x8, 0x8, 0x4F, 7},
+  {3, 0x9, 0x0, 0x0, 0x0, 0x0D, 6},
+  {3, 0x9, 0x1, 0x0, 0x0, 0x35, 6},
+  {3, 0xA, 0x0, 0x0, 0x0, 0x15, 6},
+  {3, 0xA, 0x1, 0x2, 0x2, 0x25, 6},
+  {3, 0xA, 0x1, 0xA, 0x8, 0x0F, 7},
+  {3, 0xB, 0x0, 0x0, 0x0, 0x05, 6},
+  {3, 0xB, 0x1, 0x8, 0x8, 0x39, 6},
+  {3, 0xB, 0x1, 0xB, 0x3, 0x17, 7},
+  {3, 0xB, 0x1, 0xB, 0x2, 0x19, 6},
+  {3, 0xB, 0x1, 0xB, 0x1, 0x77, 7},
+  {3, 0xC, 0x0, 0x0, 0x0, 0x29, 6},
+  {3, 0xC, 0x1, 0x0, 0x0, 0x09, 6},
+  {3, 0xD, 0x0, 0x0, 0x0, 0x37, 7},
+  {3, 0xD, 0x1, 0x4, 0x4, 0x57, 7},
+  {3, 0xD, 0x1, 0x4, 0x0, 0x31, 6},
+  {3, 0xE, 0x0, 0x0, 0x0, 0x67, 7},
+  {3, 0xE, 0x1, 0x4, 0x4, 0x27, 7},
+  {3, 0xE, 0x1, 0xC, 0x8, 0x47, 7},
+  {3, 0xE, 0x1, 0xE, 0x2, 0x6B, 7},
+  {3, 0xF, 0x0, 0x0, 0x0, 0x11, 6},
+  {3, 0xF, 0x1, 0x6, 0x6, 0x07, 7},
+  {3, 0xF, 0x1, 0x7, 0x3, 0x7B, 7},
+  {3, 0xF, 0x1, 0xF, 0xA, 0x3B, 7},
+  {3, 0xF, 0x1, 0xF, 0x2, 0x21, 6},
+  {3, 0xF, 0x1, 0xF, 0x8, 0x01, 6},
+  {3, 0xF, 0x1, 0xA, 0x8, 0x5B, 7},
+  {3, 0xF, 0x1, 0xF, 0x5, 0x1B, 7},
+  {3, 0xF, 0x1, 0xF, 0x1, 0x3E, 6},
+  {3, 0xF, 0x1, 0xF, 0x4, 0x2B, 7},
+  {4, 0x0, 0x0, 0x0, 0x0, 0x00, 1},
+  {4, 0x1, 0x0, 0x0, 0x0, 0x0D, 5},
+  {4, 0x1, 0x1, 0x1, 0x1, 0x7F, 7},
+  {4, 0x2, 0x0, 0x0, 0x0, 0x15, 5},
+  {4, 0x2, 0x1, 0x2, 0x2, 0x3F, 7},
+  {4, 0x3, 0x0, 0x0, 0x0, 0x5F, 7},
+  {4, 0x3, 0x1, 0x0, 0x0, 0x6F, 7},
+  {4, 0x4, 0x0, 0x0, 0x0, 0x09, 4},
+  {4, 0x4, 0x1, 0x4, 0x4, 0x23, 6},
+  {4, 0x5, 0x0, 0x0, 0x0, 0x33, 6},
+  {4, 0x5, 0x1, 0x0, 0x0, 0x1F, 7},
+  {4, 0x6, 0x0, 0x0, 0x0, 0x13, 6},
+  {4, 0x6, 0x1, 0x0, 0x0, 0x2F, 7},
+  {4, 0x7, 0x0, 0x0, 0x0, 0x4F, 7},
+  {4, 0x7, 0x1, 0x0, 0x0, 0x57, 7},
+  {4, 0x8, 0x0, 0x0, 0x0, 0x01, 4},
+  {4, 0x8, 0x1, 0x8, 0x8, 0x0F, 7},
+  {4, 0x9, 0x0, 0x0, 0x0, 0x77, 7},
+  {4, 0x9, 0x1, 0x0, 0x0, 0x37, 7},
+  {4, 0xA, 0x0, 0x0, 0x0, 0x1D, 6},
+  {4, 0xA, 0x1, 0x0, 0x0, 0x17, 7},
+  {4, 0xB, 0x0, 0x0, 0x0, 0x67, 7},
+  {4, 0xB, 0x1, 0x0, 0x0, 0x6B, 7},
+  {4, 0xC, 0x0, 0x0, 0x0, 0x05, 5},
+  {4, 0xC, 0x1, 0xC, 0xC, 0x27, 7},
+  {4, 0xC, 0x1, 0xC, 0x8, 0x47, 7},
+  {4, 0xC, 0x1, 0xC, 0x4, 0x07, 7},
+  {4, 0xD, 0x0, 0x0, 0x0, 0x7B, 7},
+  {4, 0xD, 0x1, 0x0, 0x0, 0x3B, 7},
+  {4, 0xE, 0x0, 0x0, 0x0, 0x5B, 7},
+  {4, 0xE, 0x1, 0x2, 0x2, 0x1B, 7},
+  {4, 0xE, 0x1, 0x2, 0x0, 0x03, 6},
+  {4, 0xF, 0x0, 0x0, 0x0, 0x2B, 7},
+  {4, 0xF, 0x1, 0x1, 0x1, 0x4B, 7},
+  {4, 0xF, 0x1, 0x3, 0x2, 0x0B, 7},
+  {4, 0xF, 0x1, 0x3, 0x0, 0x3D, 6},
+  {5, 0x0, 0x0, 0x0, 0x0, 0x00, 2},
+  {5, 0x1, 0x0, 0x0, 0x0, 0x1E, 5},
+  {5, 0x1, 0x1, 0x1, 0x1, 0x3B, 6},
+  {5, 0x2, 0x0, 0x0, 0x0, 0x0A, 5},
+  {5, 0x2, 0x1, 0x2, 0x2, 0x3F, 7},
+  {5, 0x3, 0x0, 0x0, 0x0, 0x1B, 6},
+  {5, 0x3, 0x1, 0x0, 0x0, 0x0B, 6},
+  {5, 0x4, 0x0, 0x0, 0x0, 0x02, 4},
+  {5, 0x4, 0x1, 0x4, 0x4, 0x2B, 6},
+  {5, 0x5, 0x0, 0x0, 0x0, 0x0E, 5},
+  {5, 0x5, 0x1, 0x4, 0x4, 0x33, 6},
+  {5, 0x5, 0x1, 0x5, 0x1, 0x7F, 7},
+  {5, 0x6, 0x0, 0x0, 0x0, 0x13, 6},
+  {5, 0x6, 0x1, 0x0, 0x0, 0x6F, 7},
+  {5, 0x7, 0x0, 0x0, 0x0, 0x23, 6},
+  {5, 0x7, 0x1, 0x2, 0x2, 0x5F, 7},
+  {5, 0x7, 0x1, 0x2, 0x0, 0x15, 6},
+  {5, 0x8, 0x0, 0x0, 0x0, 0x16, 5},
+  {5, 0x8, 0x1, 0x8, 0x8, 0x03, 6},
+  {5, 0x9, 0x0, 0x0, 0x0, 0x3D, 6},
+  {5, 0x9, 0x1, 0x0, 0x0, 0x1F, 7},
+  {5, 0xA, 0x0, 0x0, 0x0, 0x1D, 6},
+  {5, 0xA, 0x1, 0x0, 0x0, 0x2D, 6},
+  {5, 0xB, 0x0, 0x0, 0x0, 0x0D, 6},
+  {5, 0xB, 0x1, 0x1, 0x1, 0x4F, 7},
+  {5, 0xB, 0x1, 0x1, 0x0, 0x35, 6},
+  {5, 0xC, 0x0, 0x0, 0x0, 0x06, 5},
+  {5, 0xC, 0x1, 0x4, 0x4, 0x25, 6},
+  {5, 0xC, 0x1, 0xC, 0x8, 0x2F, 7},
+  {5, 0xD, 0x0, 0x0, 0x0, 0x05, 6},
+  {5, 0xD, 0x1, 0x1, 0x1, 0x77, 7},
+  {5, 0xD, 0x1, 0x5, 0x4, 0x39, 6},
+  {5, 0xD, 0x1, 0xD, 0x8, 0x0F, 7},
+  {5, 0xE, 0x0, 0x0, 0x0, 0x19, 6},
+  {5, 0xE, 0x1, 0x2, 0x2, 0x57, 7},
+  {5, 0xE, 0x1, 0xA, 0x8, 0x01, 6},
+  {5, 0xE, 0x1, 0xE, 0x4, 0x37, 7},
+  {5, 0xF, 0x0, 0x0, 0x0, 0x1A, 5},
+  {5, 0xF, 0x1, 0x9, 0x9, 0x17, 7},
+  {5, 0xF, 0x1, 0xD, 0x5, 0x67, 7},
+  {5, 0xF, 0x1, 0xF, 0x3, 0x07, 7},
+  {5, 0xF, 0x1, 0xF, 0x1, 0x29, 6},
+  {5, 0xF, 0x1, 0x7, 0x6, 0x27, 7},
+  {5, 0xF, 0x1, 0xF, 0xC, 0x09, 6},
+  {5, 0xF, 0x1, 0xF, 0x4, 0x31, 6},
+  {5, 0xF, 0x1, 0xF, 0xA, 0x47, 7},
+  {5, 0xF, 0x1, 0xF, 0x8, 0x11, 6},
+  {5, 0xF, 0x1, 0xF, 0x2, 0x21, 6},
+  {6, 0x0, 0x0, 0x0, 0x0, 0x00, 3},
+  {6, 0x1, 0x0, 0x0, 0x0, 0x02, 4},
+  {6, 0x1, 0x1, 0x1, 0x1, 0x03, 6},
+  {6, 0x2, 0x0, 0x0, 0x0, 0x0C, 4},
+  {6, 0x2, 0x1, 0x2, 0x2, 0x3D, 6},
+  {6, 0x3, 0x0, 0x0, 0x0, 0x1D, 6},
+  {6, 0x3, 0x1, 0x2, 0x2, 0x0D, 6},
+  {6, 0x3, 0x1, 0x3, 0x1, 0x7F, 7},
+  {6, 0x4, 0x0, 0x0, 0x0, 0x04, 4},
+  {6, 0x4, 0x1, 0x4, 0x4, 0x2D, 6},
+  {6, 0x5, 0x0, 0x0, 0x0, 0x0A, 5},
+  {6, 0x5, 0x1, 0x4, 0x4, 0x35, 6},
+  {6, 0x5, 0x1, 0x5, 0x1, 0x2F, 7},
+  {6, 0x6, 0x0, 0x0, 0x0, 0x15, 6},
+  {6, 0x6, 0x1, 0x2, 0x2, 0x3F, 7},
+  {6, 0x6, 0x1, 0x6, 0x4, 0x5F, 7},
+  {6, 0x7, 0x0, 0x0, 0x0, 0x25, 6},
+  {6, 0x7, 0x1, 0x2, 0x2, 0x29, 6},
+  {6, 0x7, 0x1, 0x3, 0x1, 0x1F, 7},
+  {6, 0x7, 0x1, 0x7, 0x4, 0x6F, 7},
+  {6, 0x8, 0x0, 0x0, 0x0, 0x16, 5},
+  {6, 0x8, 0x1, 0x8, 0x8, 0x05, 6},
+  {6, 0x9, 0x0, 0x0, 0x0, 0x39, 6},
+  {6, 0x9, 0x1, 0x0, 0x0, 0x19, 6},
+  {6, 0xA, 0x0, 0x0, 0x0, 0x06, 5},
+  {6, 0xA, 0x1, 0xA, 0xA, 0x0F, 7},
+  {6, 0xA, 0x1, 0xA, 0x2, 0x09, 6},
+  {6, 0xA, 0x1, 0xA, 0x8, 0x4F, 7},
+  {6, 0xB, 0x0, 0x0, 0x0, 0x0E, 6},
+  {6, 0xB, 0x1, 0xB, 0x2, 0x77, 7},
+  {6, 0xB, 0x1, 0x2, 0x2, 0x37, 7},
+  {6, 0xB, 0x1, 0xA, 0x8, 0x57, 7},
+  {6, 0xB, 0x1, 0xB, 0x1, 0x47, 7},
+  {6, 0xC, 0x0, 0x0, 0x0, 0x1A, 5},
+  {6, 0xC, 0x1, 0xC, 0xC, 0x17, 7},
+  {6, 0xC, 0x1, 0xC, 0x8, 0x67, 7},
+  {6, 0xC, 0x1, 0xC, 0x4, 0x27, 7},
+  {6, 0xD, 0x0, 0x0, 0x0, 0x31, 6},
+  {6, 0xD, 0x1, 0xD, 0x4, 0x07, 7},
+  {6, 0xD, 0x1, 0x4, 0x4, 0x7B, 7},
+  {6, 0xD, 0x1, 0xC, 0x8, 0x3B, 7},
+  {6, 0xD, 0x1, 0xD, 0x1, 0x2B, 7},
+  {6, 0xE, 0x0, 0x0, 0x0, 0x11, 6},
+  {6, 0xE, 0x1, 0xE, 0x4, 0x5B, 7},
+  {6, 0xE, 0x1, 0x4, 0x4, 0x1B, 7},
+  {6, 0xE, 0x1, 0xE, 0xA, 0x6B, 7},
+  {6, 0xE, 0x1, 0xE, 0x8, 0x21, 6},
+  {6, 0xE, 0x1, 0xE, 0x2, 0x33, 7},
+  {6, 0xF, 0x0, 0x0, 0x0, 0x01, 6},
+  {6, 0xF, 0x1, 0x3, 0x3, 0x4B, 7},
+  {6, 0xF, 0x1, 0x7, 0x6, 0x0B, 7},
+  {6, 0xF, 0x1, 0xF, 0xA, 0x73, 7},
+  {6, 0xF, 0x1, 0xF, 0x2, 0x3E, 6},
+  {6, 0xF, 0x1, 0xB, 0x9, 0x53, 7},
+  {6, 0xF, 0x1, 0xF, 0xC, 0x63, 7},
+  {6, 0xF, 0x1, 0xF, 0x8, 0x1E, 6},
+  {6, 0xF, 0x1, 0xF, 0x5, 0x13, 7},
+  {6, 0xF, 0x1, 0xF, 0x4, 0x2E, 6},
+  {6, 0xF, 0x1, 0xF, 0x1, 0x23, 7},
+  {7, 0x0, 0x0, 0x0, 0x0, 0x04, 4},
+  {7, 0x1, 0x0, 0x0, 0x0, 0x33, 6},
+  {7, 0x1, 0x1, 0x1, 0x1, 0x13, 6},
+  {7, 0x2, 0x0, 0x0, 0x0, 0x23, 6},
+  {7, 0x2, 0x1, 0x2, 0x2, 0x7F, 7},
+  {7, 0x3, 0x0, 0x0, 0x0, 0x03, 6},
+  {7, 0x3, 0x1, 0x1, 0x1, 0x3F, 7},
+  {7, 0x3, 0x1, 0x3, 0x2, 0x6F, 7},
+  {7, 0x4, 0x0, 0x0, 0x0, 0x2D, 6},
+  {7, 0x4, 0x1, 0x4, 0x4, 0x5F, 7},
+  {7, 0x5, 0x0, 0x0, 0x0, 0x16, 5},
+  {7, 0x5, 0x1, 0x1, 0x1, 0x3D, 6},
+  {7, 0x5, 0x1, 0x5, 0x4, 0x1F, 7},
+  {7, 0x6, 0x0, 0x0, 0x0, 0x1D, 6},
+  {7, 0x6, 0x1, 0x0, 0x0, 0x77, 7},
+  {7, 0x7, 0x0, 0x0, 0x0, 0x06, 5},
+  {7, 0x7, 0x1, 0x7, 0x4, 0x2F, 7},
+  {7, 0x7, 0x1, 0x4, 0x4, 0x4F, 7},
+  {7, 0x7, 0x1, 0x7, 0x3, 0x0F, 7},
+  {7, 0x7, 0x1, 0x7, 0x1, 0x0D, 6},
+  {7, 0x7, 0x1, 0x7, 0x2, 0x57, 7},
+  {7, 0x8, 0x0, 0x0, 0x0, 0x35, 6},
+  {7, 0x8, 0x1, 0x8, 0x8, 0x37, 7},
+  {7, 0x9, 0x0, 0x0, 0x0, 0x15, 6},
+  {7, 0x9, 0x1, 0x0, 0x0, 0x27, 7},
+  {7, 0xA, 0x0, 0x0, 0x0, 0x25, 6},
+  {7, 0xA, 0x1, 0x0, 0x0, 0x29, 6},
+  {7, 0xB, 0x0, 0x0, 0x0, 0x1A, 5},
+  {7, 0xB, 0x1, 0xB, 0x1, 0x17, 7},
+  {7, 0xB, 0x1, 0x1, 0x1, 0x67, 7},
+  {7, 0xB, 0x1, 0x3, 0x2, 0x05, 6},
+  {7, 0xB, 0x1, 0xB, 0x8, 0x7B, 7},
+  {7, 0xC, 0x0, 0x0, 0x0, 0x39, 6},
+  {7, 0xC, 0x1, 0x0, 0x0, 0x19, 6},
+  {7, 0xD, 0x0, 0x0, 0x0, 0x0C, 5},
+  {7, 0xD, 0x1, 0xD, 0x1, 0x47, 7},
+  {7, 0xD, 0x1, 0x1, 0x1, 0x07, 7},
+  {7, 0xD, 0x1, 0x5, 0x4, 0x09, 6},
+  {7, 0xD, 0x1, 0xD, 0x8, 0x1B, 7},
+  {7, 0xE, 0x0, 0x0, 0x0, 0x31, 6},
+  {7, 0xE, 0x1, 0xE, 0x2, 0x3B, 7},
+  {7, 0xE, 0x1, 0x2, 0x2, 0x5B, 7},
+  {7, 0xE, 0x1, 0xA, 0x8, 0x3E, 6},
+  {7, 0xE, 0x1, 0xE, 0x4, 0x0B, 7},
+  {7, 0xF, 0x0, 0x0, 0x0, 0x00, 3},
+  {7, 0xF, 0x1, 0xF, 0xF, 0x6B, 7},
+  {7, 0xF, 0x1, 0xF, 0x7, 0x2B, 7},
+  {7, 0xF, 0x1, 0xF, 0xB, 0x4B, 7},
+  {7, 0xF, 0x1, 0xF, 0x3, 0x11, 6},
+  {7, 0xF, 0x1, 0x7, 0x6, 0x21, 6},
+  {7, 0xF, 0x1, 0xF, 0xA, 0x01, 6},
+  {7, 0xF, 0x1, 0xF, 0x2, 0x0A, 5},
+  {7, 0xF, 0x1, 0xB, 0x9, 0x1E, 6},
+  {7, 0xF, 0x1, 0xF, 0xC, 0x0E, 6},
+  {7, 0xF, 0x1, 0xF, 0x8, 0x12, 5},
+  {7, 0xF, 0x1, 0xF, 0x5, 0x2E, 6},
+  {7, 0xF, 0x1, 0xF, 0x1, 0x02, 5},
+  {7, 0xF, 0x1, 0xF, 0x4, 0x1C, 5}
+};
+
+//************************************************************************/
+/** @defgroup vlc_decoding_tables_grp VLC decoding tables
+  *  @{
+  *  VLC tables to decode VLC codewords to these fields: (in order)       \n
+  *  \li \c cwd_len : 3bits -> the codeword length of the VLC codeword;    
+  *                   the VLC cwd is in the LSB of bitstream              \n
+  *  \li \c u_off   : 1bit  -> u_offset, which is 1 if u value is not 0   \n
+  *  \li \c rho     : 4bits -> signficant samples within a quad           \n
+  *  \li \c e_1     : 4bits -> EMB e_1                                    \n
+  *  \li \c e_k     : 4bits -> EMB e_k                                    \n
+  *                                                                       \n
+  *  The table index is 10 bits and composed of two parts:                \n
+  *  The 7 LSBs contain a codeword which might be shorter than 7 bits;    
+  *  this word is the next decoable bits in the bitstream.                \n
+  *  The 3 MSB is the context of for the codeword.                        \n
+  */
+
+/// @brief vlc_tbl0 contains decoding information for initial row of quads
+int vlc_tbl0[1024] = { 0 };
+/// @brief vlc_tbl1 contains decoding information for non-initial row of 
+///        quads
+int vlc_tbl1[1024] = { 0 };
+/// @}
+
+//************************************************************************/
+/** @ingroup vlc_decoding_tables_grp
+  *  @brief Initializes vlc_tbl0 and vlc_tbl1 tables, from table0.h and
+  *         table1.h
+  */
+OPJ_BOOL vlc_init_tables()
+{
+  const OPJ_BOOL debug = OPJ_FALSE; //useful for checking 
+
+  // number of entries in the table
+  size_t tbl0_size = sizeof(tbl0) / sizeof(vlc_src_table_t); 
+
+  // number of entries in the table
+  size_t tbl1_size = sizeof(tbl1) / sizeof(vlc_src_table_t);
+
+  if (debug) memset(vlc_tbl0, 0, sizeof(vlc_tbl0)); //unnecessary
+
+  // this is to convert table entries into values for decoder look up
+  // There can be at most 1024 possibilites, not all of them are valid.
+  // 
+  for (int i = 0; i < 1024; ++i)
+  {
+    int cwd = i & 0x7F; // from i extract codeword
+    int c_q = i >> 7;   // from i extract context
+    // See if this case exist in the table, if so then set the entry in
+    // vlc_tbl0
+    for (size_t j = 0; j < tbl0_size; ++j) 
+      if (tbl0[j].c_q == c_q) // this is an and operation
+        if (tbl0[j].cwd == (cwd & ((1 << tbl0[j].cwd_len) - 1)))
+        {
+          if (debug) assert(vlc_tbl0[i] == 0);
+          // Put this entry into the table
+          vlc_tbl0[i] = (tbl0[j].rho << 4) | (tbl0[j].u_off << 3)
+            | (tbl0[j].e_k << 12) | (tbl0[j].e_1 << 8) | tbl0[j].cwd_len;
+        }
+  }
+
+  if (debug) memset(vlc_tbl1, 0, sizeof(vlc_tbl1)); //unnecessary
+
+  // this the same as above but for non-initial rows
+  for (int i = 0; i < 1024; ++i)
+  {
+    int cwd = i & 0x7F; //7 bits
+    int c_q = i >> 7;
+    for (size_t j = 0; j < tbl1_size; ++j)
+      if (tbl1[j].c_q == c_q) // this is an and operation
+        if (tbl1[j].cwd == (cwd & ((1 << tbl1[j].cwd_len) - 1)))
+        {
+          if (debug) assert(vlc_tbl1[i] == 0);
+          vlc_tbl1[i] = (tbl1[j].rho << 4) | (tbl1[j].u_off << 3)
+            | (tbl1[j].e_k << 12) | (tbl1[j].e_1 << 8) | tbl1[j].cwd_len;
+        }
+  }
+
+  return OPJ_TRUE;
+}
+
+//************************************************************************/
+/** @ingroup vlc_decoding_tables_grp
+  *  @brief Initializes VLC tables vlc_tbl0 and vlc_tbl1
+  */
+OPJ_BOOL vlc_tables_initialized = OPJ_FALSE;
+
diff --git a/src/lib/openjp2/t1_ht_luts.h b/src/lib/openjp2/t1_ht_luts.h
new file mode 100644
index 000000000..f39d5d057
--- /dev/null
+++ b/src/lib/openjp2/t1_ht_luts.h
@@ -0,0 +1,261 @@
+static const OPJ_UINT16 vlc_tbl0[1024] = {
+    0x0023, 0x00a5, 0x0043, 0x0066, 0x0083, 0xa8ee, 0x0014, 0xd8df,
+    0x0023, 0x10be, 0x0043, 0xf5ff, 0x0083, 0x207e, 0x0055, 0x515f,
+    0x0023, 0x0035, 0x0043, 0x444e, 0x0083, 0xc4ce, 0x0014, 0xcccf,
+    0x0023, 0xe2fe, 0x0043, 0x99ff, 0x0083, 0x0096, 0x00c5, 0x313f,
+    0x0023, 0x00a5, 0x0043, 0x445e, 0x0083, 0xc8ce, 0x0014, 0x11df,
+    0x0023, 0xf4fe, 0x0043, 0xfcff, 0x0083, 0x009e, 0x0055, 0x0077,
+    0x0023, 0x0035, 0x0043, 0xf1ff, 0x0083, 0x88ae, 0x0014, 0x00b7,
+    0x0023, 0xf8fe, 0x0043, 0xe4ef, 0x0083, 0x888e, 0x00c5, 0x111f,
+    0x0023, 0x00a5, 0x0043, 0x0066, 0x0083, 0xa8ee, 0x0014, 0x54df,
+    0x0023, 0x10be, 0x0043, 0x22ef, 0x0083, 0x207e, 0x0055, 0x227f,
+    0x0023, 0x0035, 0x0043, 0x444e, 0x0083, 0xc4ce, 0x0014, 0x11bf,
+    0x0023, 0xe2fe, 0x0043, 0x00f7, 0x0083, 0x0096, 0x00c5, 0x223f,
+    0x0023, 0x00a5, 0x0043, 0x445e, 0x0083, 0xc8ce, 0x0014, 0x00d7,
+    0x0023, 0xf4fe, 0x0043, 0xbaff, 0x0083, 0x009e, 0x0055, 0x006f,
+    0x0023, 0x0035, 0x0043, 0xe6ff, 0x0083, 0x88ae, 0x0014, 0xa2af,
+    0x0023, 0xf8fe, 0x0043, 0x00e7, 0x0083, 0x888e, 0x00c5, 0x222f,
+    0x0002, 0x00c5, 0x0084, 0x207e, 0x0002, 0xc4ce, 0x0024, 0x00f7,
+    0x0002, 0xa2fe, 0x0044, 0x0056, 0x0002, 0x009e, 0x0014, 0x00d7,
+    0x0002, 0x10be, 0x0084, 0x0066, 0x0002, 0x88ae, 0x0024, 0x11df,
+    0x0002, 0xa8ee, 0x0044, 0x0036, 0x0002, 0x888e, 0x0014, 0x111f,
+    0x0002, 0x00c5, 0x0084, 0x006e, 0x0002, 0x88ce, 0x0024, 0x88ff,
+    0x0002, 0xb8fe, 0x0044, 0x444e, 0x0002, 0x0096, 0x0014, 0x00b7,
+    0x0002, 0xe4fe, 0x0084, 0x445e, 0x0002, 0x00a6, 0x0024, 0x00e7,
+    0x0002, 0x54de, 0x0044, 0x222e, 0x0002, 0x003e, 0x0014, 0x0077,
+    0x0002, 0x00c5, 0x0084, 0x207e, 0x0002, 0xc4ce, 0x0024, 0xf1ff,
+    0x0002, 0xa2fe, 0x0044, 0x0056, 0x0002, 0x009e, 0x0014, 0x11bf,
+    0x0002, 0x10be, 0x0084, 0x0066, 0x0002, 0x88ae, 0x0024, 0x22ef,
+    0x0002, 0xa8ee, 0x0044, 0x0036, 0x0002, 0x888e, 0x0014, 0x227f,
+    0x0002, 0x00c5, 0x0084, 0x006e, 0x0002, 0x88ce, 0x0024, 0xe4ef,
+    0x0002, 0xb8fe, 0x0044, 0x444e, 0x0002, 0x0096, 0x0014, 0xa2af,
+    0x0002, 0xe4fe, 0x0084, 0x445e, 0x0002, 0x00a6, 0x0024, 0xd8df,
+    0x0002, 0x54de, 0x0044, 0x222e, 0x0002, 0x003e, 0x0014, 0x515f,
+    0x0002, 0x0055, 0x0084, 0x0066, 0x0002, 0x88de, 0x0024, 0x32ff,
+    0x0002, 0x11fe, 0x0044, 0x444e, 0x0002, 0x00ae, 0x0014, 0x00b7,
+    0x0002, 0x317e, 0x0084, 0x515e, 0x0002, 0x00c6, 0x0024, 0x00d7,
+    0x0002, 0x20ee, 0x0044, 0x111e, 0x0002, 0x009e, 0x0014, 0x0077,
+    0x0002, 0x0055, 0x0084, 0x545e, 0x0002, 0x44ce, 0x0024, 0x00e7,
+    0x0002, 0xf1fe, 0x0044, 0x0036, 0x0002, 0x00a6, 0x0014, 0x555f,
+    0x0002, 0x74fe, 0x0084, 0x113e, 0x0002, 0x20be, 0x0024, 0x747f,
+    0x0002, 0xc4de, 0x0044, 0xf8ff, 0x0002, 0x0096, 0x0014, 0x222f,
+    0x0002, 0x0055, 0x0084, 0x0066, 0x0002, 0x88de, 0x0024, 0x00f7,
+    0x0002, 0x11fe, 0x0044, 0x444e, 0x0002, 0x00ae, 0x0014, 0x888f,
+    0x0002, 0x317e, 0x0084, 0x515e, 0x0002, 0x00c6, 0x0024, 0xc8cf,
+    0x0002, 0x20ee, 0x0044, 0x111e, 0x0002, 0x009e, 0x0014, 0x006f,
+    0x0002, 0x0055, 0x0084, 0x545e, 0x0002, 0x44ce, 0x0024, 0xd1df,
+    0x0002, 0xf1fe, 0x0044, 0x0036, 0x0002, 0x00a6, 0x0014, 0x227f,
+    0x0002, 0x74fe, 0x0084, 0x113e, 0x0002, 0x20be, 0x0024, 0x22bf,
+    0x0002, 0xc4de, 0x0044, 0x22ef, 0x0002, 0x0096, 0x0014, 0x323f,
+    0x0003, 0xd4de, 0xf4fd, 0xfcff, 0x0014, 0x113e, 0x0055, 0x888f,
+    0x0003, 0x32be, 0x0085, 0x00e7, 0x0025, 0x515e, 0xaafe, 0x727f,
+    0x0003, 0x44ce, 0xf8fd, 0x44ef, 0x0014, 0x647e, 0x0045, 0xa2af,
+    0x0003, 0x00a6, 0x555d, 0x99df, 0xf1fd, 0x0036, 0xf5fe, 0x626f,
+    0x0003, 0xd1de, 0xf4fd, 0xe6ff, 0x0014, 0x717e, 0x0055, 0xb1bf,
+    0x0003, 0x88ae, 0x0085, 0xd5df, 0x0025, 0x444e, 0xf2fe, 0x667f,
+    0x0003, 0x00c6, 0xf8fd, 0xe2ef, 0x0014, 0x545e, 0x0045, 0x119f,
+    0x0003, 0x0096, 0x555d, 0xc8cf, 0xf1fd, 0x111e, 0xc8ee, 0x0067,
+    0x0003, 0xd4de, 0xf4fd, 0xf3ff, 0x0014, 0x113e, 0x0055, 0x11bf,
+    0x0003, 0x32be, 0x0085, 0xd8df, 0x0025, 0x515e, 0xaafe, 0x222f,
+    0x0003, 0x44ce, 0xf8fd, 0x00f7, 0x0014, 0x647e, 0x0045, 0x989f,
+    0x0003, 0x00a6, 0x555d, 0x00d7, 0xf1fd, 0x0036, 0xf5fe, 0x446f,
+    0x0003, 0xd1de, 0xf4fd, 0xb9ff, 0x0014, 0x717e, 0x0055, 0x00b7,
+    0x0003, 0x88ae, 0x0085, 0xdcdf, 0x0025, 0x444e, 0xf2fe, 0x0077,
+    0x0003, 0x00c6, 0xf8fd, 0xe4ef, 0x0014, 0x545e, 0x0045, 0x737f,
+    0x0003, 0x0096, 0x555d, 0xb8bf, 0xf1fd, 0x111e, 0xc8ee, 0x323f,
+    0x0002, 0x00a5, 0x0084, 0x407e, 0x0002, 0x10de, 0x0024, 0x11df,
+    0x0002, 0x72fe, 0x0044, 0x0056, 0x0002, 0xa8ae, 0x0014, 0xb2bf,
+    0x0002, 0x0096, 0x0084, 0x0066, 0x0002, 0x00c6, 0x0024, 0x00e7,
+    0x0002, 0xc8ee, 0x0044, 0x222e, 0x0002, 0x888e, 0x0014, 0x0077,
+    0x0002, 0x00a5, 0x0084, 0x006e, 0x0002, 0x88ce, 0x0024, 0x00f7,
+    0x0002, 0x91fe, 0x0044, 0x0036, 0x0002, 0xa2ae, 0x0014, 0xaaaf,
+    0x0002, 0xb8fe, 0x0084, 0x005e, 0x0002, 0x00be, 0x0024, 0xc4cf,
+    0x0002, 0x44ee, 0x0044, 0xf4ff, 0x0002, 0x223e, 0x0014, 0x111f,
+    0x0002, 0x00a5, 0x0084, 0x407e, 0x0002, 0x10de, 0x0024, 0x99ff,
+    0x0002, 0x72fe, 0x0044, 0x0056, 0x0002, 0xa8ae, 0x0014, 0x00b7,
+    0x0002, 0x0096, 0x0084, 0x0066, 0x0002, 0x00c6, 0x0024, 0x00d7,
+    0x0002, 0xc8ee, 0x0044, 0x222e, 0x0002, 0x888e, 0x0014, 0x444f,
+    0x0002, 0x00a5, 0x0084, 0x006e, 0x0002, 0x88ce, 0x0024, 0xe2ef,
+    0x0002, 0x91fe, 0x0044, 0x0036, 0x0002, 0xa2ae, 0x0014, 0x447f,
+    0x0002, 0xb8fe, 0x0084, 0x005e, 0x0002, 0x00be, 0x0024, 0x009f,
+    0x0002, 0x44ee, 0x0044, 0x76ff, 0x0002, 0x223e, 0x0014, 0x313f,
+    0x0003, 0x00c6, 0x0085, 0xd9ff, 0xf2fd, 0x647e, 0xf1fe, 0x99bf,
+    0x0003, 0xa2ae, 0x0025, 0x66ef, 0xf4fd, 0x0056, 0xe2ee, 0x737f,
+    0x0003, 0x98be, 0x0045, 0x00f7, 0xf8fd, 0x0066, 0x76fe, 0x889f,
+    0x0003, 0x888e, 0x0015, 0xd5df, 0x00a5, 0x222e, 0x98de, 0x444f,
+    0x0003, 0xb2be, 0x0085, 0xfcff, 0xf2fd, 0x226e, 0x0096, 0x00b7,
+    0x0003, 0xaaae, 0x0025, 0xd1df, 0xf4fd, 0x0036, 0xd4de, 0x646f,
+    0x0003, 0xa8ae, 0x0045, 0xeaef, 0xf8fd, 0x445e, 0xe8ee, 0x717f,
+    0x0003, 0x323e, 0x0015, 0xc4cf, 0x00a5, 0xfaff, 0x88ce, 0x313f,
+    0x0003, 0x00c6, 0x0085, 0x77ff, 0xf2fd, 0x647e, 0xf1fe, 0xb3bf,
+    0x0003, 0xa2ae, 0x0025, 0x00e7, 0xf4fd, 0x0056, 0xe2ee, 0x0077,
+    0x0003, 0x98be, 0x0045, 0xe4ef, 0xf8fd, 0x0066, 0x76fe, 0x667f,
+    0x0003, 0x888e, 0x0015, 0x00d7, 0x00a5, 0x222e, 0x98de, 0x333f,
+    0x0003, 0xb2be, 0x0085, 0x75ff, 0xf2fd, 0x226e, 0x0096, 0x919f,
+    0x0003, 0xaaae, 0x0025, 0x99df, 0xf4fd, 0x0036, 0xd4de, 0x515f,
+    0x0003, 0xa8ae, 0x0045, 0xecef, 0xf8fd, 0x445e, 0xe8ee, 0x727f,
+    0x0003, 0x323e, 0x0015, 0xb1bf, 0x00a5, 0xf3ff, 0x88ce, 0x111f,
+    0x0003, 0x54de, 0xf2fd, 0x111e, 0x0014, 0x647e, 0xf8fe, 0xcccf,
+    0x0003, 0x91be, 0x0045, 0x22ef, 0x0025, 0x222e, 0xf3fe, 0x888f,
+    0x0003, 0x00c6, 0x0085, 0x00f7, 0x0014, 0x115e, 0xfcfe, 0xa8af,
+    0x0003, 0x00a6, 0x0035, 0xc8df, 0xf1fd, 0x313e, 0x66fe, 0x646f,
+    0x0003, 0xc8ce, 0xf2fd, 0xf5ff, 0x0014, 0x0066, 0xf4fe, 0xbabf,
+    0x0003, 0x22ae, 0x0045, 0x00e7, 0x0025, 0x323e, 0xeafe, 0x737f,
+    0x0003, 0xb2be, 0x0085, 0x55df, 0x0014, 0x0056, 0x717e, 0x119f,
+    0x0003, 0x0096, 0x0035, 0xc4cf, 0xf1fd, 0x333e, 0xe8ee, 0x444f,
+    0x0003, 0x54de, 0xf2fd, 0x111e, 0x0014, 0x647e, 0xf8fe, 0x99bf,
+    0x0003, 0x91be, 0x0045, 0xe2ef, 0x0025, 0x222e, 0xf3fe, 0x667f,
+    0x0003, 0x00c6, 0x0085, 0xe4ef, 0x0014, 0x115e, 0xfcfe, 0x989f,
+    0x0003, 0x00a6, 0x0035, 0x00d7, 0xf1fd, 0x313e, 0x66fe, 0x226f,
+    0x0003, 0xc8ce, 0xf2fd, 0xb9ff, 0x0014, 0x0066, 0xf4fe, 0x00b7,
+    0x0003, 0x22ae, 0x0045, 0xd1df, 0x0025, 0x323e, 0xeafe, 0x0077,
+    0x0003, 0xb2be, 0x0085, 0xecef, 0x0014, 0x0056, 0x717e, 0x727f,
+    0x0003, 0x0096, 0x0035, 0xb8bf, 0xf1fd, 0x333e, 0xe8ee, 0x545f,
+    0xf1fc, 0xd1de, 0xfafd, 0x00d7, 0xf8fc, 0x0016, 0xfffd, 0x747f,
+    0xf4fc, 0x717e, 0xf3fd, 0xb3bf, 0xf2fc, 0xeaef, 0xe8ee, 0x444f,
+    0xf1fc, 0x22ae, 0x0005, 0xb8bf, 0xf8fc, 0x00f7, 0xfcfe, 0x0077,
+    0xf4fc, 0x115e, 0xf5fd, 0x757f, 0xf2fc, 0xd8df, 0xe2ee, 0x333f,
+    0xf1fc, 0xb2be, 0xfafd, 0x88cf, 0xf8fc, 0xfbff, 0xfffd, 0x737f,
+    0xf4fc, 0x006e, 0xf3fd, 0x00b7, 0xf2fc, 0x66ef, 0xf9fe, 0x313f,
+    0xf1fc, 0x009e, 0x0005, 0xbabf, 0xf8fc, 0xfdff, 0xf6fe, 0x0067,
+    0xf4fc, 0x0026, 0xf5fd, 0x888f, 0xf2fc, 0xdcdf, 0xd4de, 0x222f,
+    0xf1fc, 0xd1de, 0xfafd, 0xc4cf, 0xf8fc, 0x0016, 0xfffd, 0x727f,
+    0xf4fc, 0x717e, 0xf3fd, 0x99bf, 0xf2fc, 0xecef, 0xe8ee, 0x0047,
+    0xf1fc, 0x22ae, 0x0005, 0x00a7, 0xf8fc, 0xf7ff, 0xfcfe, 0x0057,
+    0xf4fc, 0x115e, 0xf5fd, 0x0097, 0xf2fc, 0xd5df, 0xe2ee, 0x0037,
+    0xf1fc, 0xb2be, 0xfafd, 0x00c7, 0xf8fc, 0xfeff, 0xfffd, 0x667f,
+    0xf4fc, 0x006e, 0xf3fd, 0xa8af, 0xf2fc, 0x00e7, 0xf9fe, 0x323f,
+    0xf1fc, 0x009e, 0x0005, 0xb1bf, 0xf8fc, 0xe4ef, 0xf6fe, 0x545f,
+    0xf4fc, 0x0026, 0xf5fd, 0x0087, 0xf2fc, 0x99df, 0xd4de, 0x111f
+};
+
+static const OPJ_UINT16 vlc_tbl1[1024] = {
+    0x0013, 0x0065, 0x0043, 0x00de, 0x0083, 0x888d, 0x0023, 0x444e,
+    0x0013, 0x00a5, 0x0043, 0x88ae, 0x0083, 0x0035, 0x0023, 0x00d7,
+    0x0013, 0x00c5, 0x0043, 0x009e, 0x0083, 0x0055, 0x0023, 0x222e,
+    0x0013, 0x0095, 0x0043, 0x007e, 0x0083, 0x10fe, 0x0023, 0x0077,
+    0x0013, 0x0065, 0x0043, 0x88ce, 0x0083, 0x888d, 0x0023, 0x111e,
+    0x0013, 0x00a5, 0x0043, 0x005e, 0x0083, 0x0035, 0x0023, 0x00e7,
+    0x0013, 0x00c5, 0x0043, 0x00be, 0x0083, 0x0055, 0x0023, 0x11ff,
+    0x0013, 0x0095, 0x0043, 0x003e, 0x0083, 0x40ee, 0x0023, 0xa2af,
+    0x0013, 0x0065, 0x0043, 0x00de, 0x0083, 0x888d, 0x0023, 0x444e,
+    0x0013, 0x00a5, 0x0043, 0x88ae, 0x0083, 0x0035, 0x0023, 0x44ef,
+    0x0013, 0x00c5, 0x0043, 0x009e, 0x0083, 0x0055, 0x0023, 0x222e,
+    0x0013, 0x0095, 0x0043, 0x007e, 0x0083, 0x10fe, 0x0023, 0x00b7,
+    0x0013, 0x0065, 0x0043, 0x88ce, 0x0083, 0x888d, 0x0023, 0x111e,
+    0x0013, 0x00a5, 0x0043, 0x005e, 0x0083, 0x0035, 0x0023, 0xc4cf,
+    0x0013, 0x00c5, 0x0043, 0x00be, 0x0083, 0x0055, 0x0023, 0x00f7,
+    0x0013, 0x0095, 0x0043, 0x003e, 0x0083, 0x40ee, 0x0023, 0x006f,
+    0x0001, 0x0084, 0x0001, 0x0056, 0x0001, 0x0014, 0x0001, 0x00d7,
+    0x0001, 0x0024, 0x0001, 0x0096, 0x0001, 0x0045, 0x0001, 0x0077,
+    0x0001, 0x0084, 0x0001, 0x00c6, 0x0001, 0x0014, 0x0001, 0x888f,
+    0x0001, 0x0024, 0x0001, 0x00f7, 0x0001, 0x0035, 0x0001, 0x222f,
+    0x0001, 0x0084, 0x0001, 0x40fe, 0x0001, 0x0014, 0x0001, 0x00b7,
+    0x0001, 0x0024, 0x0001, 0x00bf, 0x0001, 0x0045, 0x0001, 0x0067,
+    0x0001, 0x0084, 0x0001, 0x00a6, 0x0001, 0x0014, 0x0001, 0x444f,
+    0x0001, 0x0024, 0x0001, 0x00e7, 0x0001, 0x0035, 0x0001, 0x113f,
+    0x0001, 0x0084, 0x0001, 0x0056, 0x0001, 0x0014, 0x0001, 0x00cf,
+    0x0001, 0x0024, 0x0001, 0x0096, 0x0001, 0x0045, 0x0001, 0x006f,
+    0x0001, 0x0084, 0x0001, 0x00c6, 0x0001, 0x0014, 0x0001, 0x009f,
+    0x0001, 0x0024, 0x0001, 0x00ef, 0x0001, 0x0035, 0x0001, 0x323f,
+    0x0001, 0x0084, 0x0001, 0x40fe, 0x0001, 0x0014, 0x0001, 0x00af,
+    0x0001, 0x0024, 0x0001, 0x44ff, 0x0001, 0x0045, 0x0001, 0x005f,
+    0x0001, 0x0084, 0x0001, 0x00a6, 0x0001, 0x0014, 0x0001, 0x007f,
+    0x0001, 0x0024, 0x0001, 0x00df, 0x0001, 0x0035, 0x0001, 0x111f,
+    0x0001, 0x0024, 0x0001, 0x0056, 0x0001, 0x0085, 0x0001, 0x00bf,
+    0x0001, 0x0014, 0x0001, 0x00f7, 0x0001, 0x00c6, 0x0001, 0x0077,
+    0x0001, 0x0024, 0x0001, 0xf8ff, 0x0001, 0x0045, 0x0001, 0x007f,
+    0x0001, 0x0014, 0x0001, 0x00df, 0x0001, 0x00a6, 0x0001, 0x313f,
+    0x0001, 0x0024, 0x0001, 0x222e, 0x0001, 0x0085, 0x0001, 0x00b7,
+    0x0001, 0x0014, 0x0001, 0x44ef, 0x0001, 0xa2ae, 0x0001, 0x0067,
+    0x0001, 0x0024, 0x0001, 0x51ff, 0x0001, 0x0045, 0x0001, 0x0097,
+    0x0001, 0x0014, 0x0001, 0x00cf, 0x0001, 0x0036, 0x0001, 0x223f,
+    0x0001, 0x0024, 0x0001, 0x0056, 0x0001, 0x0085, 0x0001, 0xb2bf,
+    0x0001, 0x0014, 0x0001, 0x40ef, 0x0001, 0x00c6, 0x0001, 0x006f,
+    0x0001, 0x0024, 0x0001, 0x72ff, 0x0001, 0x0045, 0x0001, 0x009f,
+    0x0001, 0x0014, 0x0001, 0x00d7, 0x0001, 0x00a6, 0x0001, 0x444f,
+    0x0001, 0x0024, 0x0001, 0x222e, 0x0001, 0x0085, 0x0001, 0xa8af,
+    0x0001, 0x0014, 0x0001, 0x00e7, 0x0001, 0xa2ae, 0x0001, 0x005f,
+    0x0001, 0x0024, 0x0001, 0x44ff, 0x0001, 0x0045, 0x0001, 0x888f,
+    0x0001, 0x0014, 0x0001, 0xaaaf, 0x0001, 0x0036, 0x0001, 0x111f,
+    0x0002, 0xf8fe, 0x0024, 0x0056, 0x0002, 0x00b6, 0x0085, 0x66ff,
+    0x0002, 0x00ce, 0x0014, 0x111e, 0x0002, 0x0096, 0x0035, 0xa8af,
+    0x0002, 0x00f6, 0x0024, 0x313e, 0x0002, 0x00a6, 0x0045, 0xb3bf,
+    0x0002, 0xb2be, 0x0014, 0xf5ff, 0x0002, 0x0066, 0x517e, 0x545f,
+    0x0002, 0xf2fe, 0x0024, 0x222e, 0x0002, 0x22ae, 0x0085, 0x44ef,
+    0x0002, 0x00c6, 0x0014, 0xf4ff, 0x0002, 0x0076, 0x0035, 0x447f,
+    0x0002, 0x40de, 0x0024, 0x323e, 0x0002, 0x009e, 0x0045, 0x00d7,
+    0x0002, 0x88be, 0x0014, 0xfaff, 0x0002, 0x115e, 0xf1fe, 0x444f,
+    0x0002, 0xf8fe, 0x0024, 0x0056, 0x0002, 0x00b6, 0x0085, 0xc8ef,
+    0x0002, 0x00ce, 0x0014, 0x111e, 0x0002, 0x0096, 0x0035, 0x888f,
+    0x0002, 0x00f6, 0x0024, 0x313e, 0x0002, 0x00a6, 0x0045, 0x44df,
+    0x0002, 0xb2be, 0x0014, 0xa8ff, 0x0002, 0x0066, 0x517e, 0x006f,
+    0x0002, 0xf2fe, 0x0024, 0x222e, 0x0002, 0x22ae, 0x0085, 0x00e7,
+    0x0002, 0x00c6, 0x0014, 0xe2ef, 0x0002, 0x0076, 0x0035, 0x727f,
+    0x0002, 0x40de, 0x0024, 0x323e, 0x0002, 0x009e, 0x0045, 0xb1bf,
+    0x0002, 0x88be, 0x0014, 0x73ff, 0x0002, 0x115e, 0xf1fe, 0x333f,
+    0x0001, 0x0084, 0x0001, 0x20ee, 0x0001, 0x00c5, 0x0001, 0xc4cf,
+    0x0001, 0x0044, 0x0001, 0x32ff, 0x0001, 0x0015, 0x0001, 0x888f,
+    0x0001, 0x0084, 0x0001, 0x0066, 0x0001, 0x0025, 0x0001, 0x00af,
+    0x0001, 0x0044, 0x0001, 0x22ef, 0x0001, 0x00a6, 0x0001, 0x005f,
+    0x0001, 0x0084, 0x0001, 0x444e, 0x0001, 0x00c5, 0x0001, 0xcccf,
+    0x0001, 0x0044, 0x0001, 0x00f7, 0x0001, 0x0015, 0x0001, 0x006f,
+    0x0001, 0x0084, 0x0001, 0x0056, 0x0001, 0x0025, 0x0001, 0x009f,
+    0x0001, 0x0044, 0x0001, 0x00df, 0x0001, 0x30fe, 0x0001, 0x222f,
+    0x0001, 0x0084, 0x0001, 0x20ee, 0x0001, 0x00c5, 0x0001, 0xc8cf,
+    0x0001, 0x0044, 0x0001, 0x11ff, 0x0001, 0x0015, 0x0001, 0x0077,
+    0x0001, 0x0084, 0x0001, 0x0066, 0x0001, 0x0025, 0x0001, 0x007f,
+    0x0001, 0x0044, 0x0001, 0x00e7, 0x0001, 0x00a6, 0x0001, 0x0037,
+    0x0001, 0x0084, 0x0001, 0x444e, 0x0001, 0x00c5, 0x0001, 0x00b7,
+    0x0001, 0x0044, 0x0001, 0x00bf, 0x0001, 0x0015, 0x0001, 0x003f,
+    0x0001, 0x0084, 0x0001, 0x0056, 0x0001, 0x0025, 0x0001, 0x0097,
+    0x0001, 0x0044, 0x0001, 0x00d7, 0x0001, 0x30fe, 0x0001, 0x111f,
+    0x0002, 0xa8ee, 0x0044, 0x888e, 0x0002, 0x00d6, 0x00c5, 0xf3ff,
+    0x0002, 0xfcfe, 0x0025, 0x003e, 0x0002, 0x00b6, 0x0055, 0xd8df,
+    0x0002, 0xf8fe, 0x0044, 0x0066, 0x0002, 0x207e, 0x0085, 0x99ff,
+    0x0002, 0x00e6, 0x00f5, 0x0036, 0x0002, 0x00a6, 0x0015, 0x009f,
+    0x0002, 0xf2fe, 0x0044, 0x0076, 0x0002, 0x44ce, 0x00c5, 0x76ff,
+    0x0002, 0xf1fe, 0x0025, 0x444e, 0x0002, 0x00ae, 0x0055, 0xc8cf,
+    0x0002, 0xf4fe, 0x0044, 0x445e, 0x0002, 0x10be, 0x0085, 0xe4ef,
+    0x0002, 0x54de, 0x00f5, 0x111e, 0x0002, 0x0096, 0x0015, 0x222f,
+    0x0002, 0xa8ee, 0x0044, 0x888e, 0x0002, 0x00d6, 0x00c5, 0xfaff,
+    0x0002, 0xfcfe, 0x0025, 0x003e, 0x0002, 0x00b6, 0x0055, 0x11bf,
+    0x0002, 0xf8fe, 0x0044, 0x0066, 0x0002, 0x207e, 0x0085, 0x22ef,
+    0x0002, 0x00e6, 0x00f5, 0x0036, 0x0002, 0x00a6, 0x0015, 0x227f,
+    0x0002, 0xf2fe, 0x0044, 0x0076, 0x0002, 0x44ce, 0x00c5, 0xd5ff,
+    0x0002, 0xf1fe, 0x0025, 0x444e, 0x0002, 0x00ae, 0x0055, 0x006f,
+    0x0002, 0xf4fe, 0x0044, 0x445e, 0x0002, 0x10be, 0x0085, 0x11df,
+    0x0002, 0x54de, 0x00f5, 0x111e, 0x0002, 0x0096, 0x0015, 0x515f,
+    0x0003, 0x00f6, 0x0014, 0x111e, 0x0044, 0x888e, 0x00a5, 0xd4df,
+    0x0003, 0xa2ae, 0x0055, 0x76ff, 0x0024, 0x223e, 0x00b6, 0xaaaf,
+    0x0003, 0x00e6, 0x0014, 0xf5ff, 0x0044, 0x0066, 0x0085, 0xcccf,
+    0x0003, 0x009e, 0x00c5, 0x44ef, 0x0024, 0x0036, 0xf8fe, 0x317f,
+    0x0003, 0xe8ee, 0x0014, 0xf1ff, 0x0044, 0x0076, 0x00a5, 0xc4cf,
+    0x0003, 0x227e, 0x0055, 0xd1df, 0x0024, 0x444e, 0xf4fe, 0x515f,
+    0x0003, 0x00d6, 0x0014, 0xe2ef, 0x0044, 0x445e, 0x0085, 0x22bf,
+    0x0003, 0x0096, 0x00c5, 0xc8df, 0x0024, 0x222e, 0xf2fe, 0x226f,
+    0x0003, 0x00f6, 0x0014, 0x111e, 0x0044, 0x888e, 0x00a5, 0xb1bf,
+    0x0003, 0xa2ae, 0x0055, 0x33ff, 0x0024, 0x223e, 0x00b6, 0xa8af,
+    0x0003, 0x00e6, 0x0014, 0xb9ff, 0x0044, 0x0066, 0x0085, 0xa8bf,
+    0x0003, 0x009e, 0x00c5, 0xe4ef, 0x0024, 0x0036, 0xf8fe, 0x646f,
+    0x0003, 0xe8ee, 0x0014, 0xfcff, 0x0044, 0x0076, 0x00a5, 0xc8cf,
+    0x0003, 0x227e, 0x0055, 0xeaef, 0x0024, 0x444e, 0xf4fe, 0x747f,
+    0x0003, 0x00d6, 0x0014, 0xfaff, 0x0044, 0x445e, 0x0085, 0xb2bf,
+    0x0003, 0x0096, 0x00c5, 0x44df, 0x0024, 0x222e, 0xf2fe, 0x313f,
+    0x00f3, 0xfafe, 0xf1fd, 0x0036, 0x0004, 0x32be, 0x0075, 0x11df,
+    0x00f3, 0x54de, 0xf2fd, 0xe4ef, 0x00d5, 0x717e, 0xfcfe, 0x737f,
+    0x00f3, 0xf3fe, 0xf8fd, 0x111e, 0x0004, 0x0096, 0x0055, 0xb1bf,
+    0x00f3, 0x00ce, 0x00b5, 0xd8df, 0xf4fd, 0x0066, 0xb9fe, 0x545f,
+    0x00f3, 0x76fe, 0xf1fd, 0x0026, 0x0004, 0x00a6, 0x0075, 0x009f,
+    0x00f3, 0x00ae, 0xf2fd, 0xf7ff, 0x00d5, 0x0046, 0xf5fe, 0x747f,
+    0x00f3, 0x00e6, 0xf8fd, 0x0016, 0x0004, 0x0086, 0x0055, 0x888f,
+    0x00f3, 0x00c6, 0x00b5, 0xe2ef, 0xf4fd, 0x115e, 0xa8ee, 0x113f,
+    0x00f3, 0xfafe, 0xf1fd, 0x0036, 0x0004, 0x32be, 0x0075, 0xd1df,
+    0x00f3, 0x54de, 0xf2fd, 0xfbff, 0x00d5, 0x717e, 0xfcfe, 0x447f,
+    0x00f3, 0xf3fe, 0xf8fd, 0x111e, 0x0004, 0x0096, 0x0055, 0x727f,
+    0x00f3, 0x00ce, 0x00b5, 0x22ef, 0xf4fd, 0x0066, 0xb9fe, 0x444f,
+    0x00f3, 0x76fe, 0xf1fd, 0x0026, 0x0004, 0x00a6, 0x0075, 0x11bf,
+    0x00f3, 0x00ae, 0xf2fd, 0xffff, 0x00d5, 0x0046, 0xf5fe, 0x323f,
+    0x00f3, 0x00e6, 0xf8fd, 0x0016, 0x0004, 0x0086, 0x0055, 0x006f,
+    0x00f3, 0x00c6, 0x00b5, 0xb8bf, 0xf4fd, 0x115e, 0xa8ee, 0x222f
+};
\ No newline at end of file
diff --git a/src/lib/openjp2/t2.c b/src/lib/openjp2/t2.c
index 1481e16f4..ac001ccba 100644
--- a/src/lib/openjp2/t2.c
+++ b/src/lib/openjp2/t2.c
@@ -1229,6 +1229,7 @@ static OPJ_BOOL opj_t2_read_packet_header(opj_t2_t* p_t2,
                     ++i;
                 }
 
+                l_cblk->Mb = (OPJ_UINT32)l_band->numbps;
                 l_cblk->numbps = (OPJ_UINT32)l_band->numbps + 1 - i;
                 l_cblk->numlenbits = 3;
             }
@@ -1258,34 +1259,63 @@ static OPJ_BOOL opj_t2_read_packet_header(opj_t2_t* p_t2,
             }
             n = (OPJ_INT32)l_cblk->numnewpasses;
 
-            do {
-                OPJ_UINT32 bit_number;
-                l_cblk->segs[l_segno].numnewpasses = (OPJ_UINT32)opj_int_min((OPJ_INT32)(
-                        l_cblk->segs[l_segno].maxpasses - l_cblk->segs[l_segno].numpasses), n);
-                bit_number = l_cblk->numlenbits + opj_uint_floorlog2(
-                                 l_cblk->segs[l_segno].numnewpasses);
-                if (bit_number > 32) {
-                    opj_event_msg(p_manager, EVT_ERROR,
-                                  "Invalid bit number %d in opj_t2_read_packet_header()\n",
-                                  bit_number);
-                    opj_bio_destroy(l_bio);
-                    return OPJ_FALSE;
-                }
-                l_cblk->segs[l_segno].newlen = opj_bio_read(l_bio, bit_number);
-                JAS_FPRINTF(stderr, "included=%d numnewpasses=%d increment=%d len=%d \n",
-                            l_included, l_cblk->segs[l_segno].numnewpasses, l_increment,
-                            l_cblk->segs[l_segno].newlen);
-
-                n -= (OPJ_INT32)l_cblk->segs[l_segno].numnewpasses;
-                if (n > 0) {
-                    ++l_segno;
-
-                    if (! opj_t2_init_seg(l_cblk, l_segno, p_tcp->tccps[p_pi->compno].cblksty, 0)) {
-                        opj_bio_destroy(l_bio);
-                        return OPJ_FALSE;
-                    }
-                }
-            } while (n > 0);
+            if ((p_tcp->tccps[p_pi->compno].cblksty & J2K_CCP_CBLKSTY_HT) != 0)
+                do {
+                  OPJ_UINT32 bit_number;
+                  l_cblk->segs[l_segno].numnewpasses = l_segno == 0 ? 1 : n;
+                  bit_number = l_cblk->numlenbits + opj_uint_floorlog2(
+                                   l_cblk->segs[l_segno].numnewpasses);
+                  if (bit_number > 32) {
+                      opj_event_msg(p_manager, EVT_ERROR,
+                                    "Invalid bit number %d in opj_t2_read_packet_header()\n",
+                                    bit_number);
+                      opj_bio_destroy(l_bio);
+                      return OPJ_FALSE;
+                  }
+                  l_cblk->segs[l_segno].newlen = opj_bio_read(l_bio, bit_number);
+                  JAS_FPRINTF(stderr, "included=%d numnewpasses=%d increment=%d len=%d \n",
+                              l_included, l_cblk->segs[l_segno].numnewpasses, l_increment,
+                              l_cblk->segs[l_segno].newlen);
+
+                  n -= (OPJ_INT32)l_cblk->segs[l_segno].numnewpasses;
+                  if (n > 0) {
+                      ++l_segno;
+
+                      if (! opj_t2_init_seg(l_cblk, l_segno, p_tcp->tccps[p_pi->compno].cblksty, 0)) {
+                          opj_bio_destroy(l_bio);
+                          return OPJ_FALSE;
+                      }
+                  }
+                } while (n > 0);
+            else 
+                do {
+                  OPJ_UINT32 bit_number;
+                  l_cblk->segs[l_segno].numnewpasses = (OPJ_UINT32)opj_int_min((OPJ_INT32)(
+                          l_cblk->segs[l_segno].maxpasses - l_cblk->segs[l_segno].numpasses), n);
+                  bit_number = l_cblk->numlenbits + opj_uint_floorlog2(
+                                   l_cblk->segs[l_segno].numnewpasses);
+                  if (bit_number > 32) {
+                      opj_event_msg(p_manager, EVT_ERROR,
+                                    "Invalid bit number %d in opj_t2_read_packet_header()\n",
+                                    bit_number);
+                      opj_bio_destroy(l_bio);
+                      return OPJ_FALSE;
+                  }
+                  l_cblk->segs[l_segno].newlen = opj_bio_read(l_bio, bit_number);
+                  JAS_FPRINTF(stderr, "included=%d numnewpasses=%d increment=%d len=%d \n",
+                              l_included, l_cblk->segs[l_segno].numnewpasses, l_increment,
+                              l_cblk->segs[l_segno].newlen);
+
+                  n -= (OPJ_INT32)l_cblk->segs[l_segno].numnewpasses;
+                  if (n > 0) {
+                      ++l_segno;
+
+                      if (! opj_t2_init_seg(l_cblk, l_segno, p_tcp->tccps[p_pi->compno].cblksty, 0)) {
+                          opj_bio_destroy(l_bio);
+                          return OPJ_FALSE;
+                      }
+                  }
+              } while (n > 0);
 
             ++l_cblk;
         }
diff --git a/src/lib/openjp2/tcd.h b/src/lib/openjp2/tcd.h
index f1b52b8da..a89279d0f 100644
--- a/src/lib/openjp2/tcd.h
+++ b/src/lib/openjp2/tcd.h
@@ -122,7 +122,12 @@ typedef struct opj_tcd_cblk_dec {
     opj_tcd_seg_data_chunk_t* chunks; /* Array of chunks */
     /* position of the code-blocks : left upper corner (x0, y0) right low corner (x1,y1) */
     OPJ_INT32 x0, y0, x1, y1;
-    OPJ_UINT32 numbps;
+    /* Mb is The maximum number of bit-planes available for the representation of 
+       coefficients in any sub-band, b, as defined in Equation (E-2). See 
+       Section B.10.5 of the standard */
+    OPJ_UINT32 Mb;  /* currently used only to check if HT decoding is correct */
+    /* numbps is Mb - P as defined in Section B.10.5 of the standard */
+    OPJ_UINT32 numbps;  
     /* number of bits for len, for the current packet. Transitory value */
     OPJ_UINT32 numlenbits;
     /* number of pass added to the code-blocks, for the current packet. Transitory value */

From f00dad44c87e9d1e70111ca8f0f2c4fbe8dc6781 Mon Sep 17 00:00:00 2001
From: Aous Naman <aous72@yahoo.com>
Date: Thu, 2 Sep 2021 20:56:39 +1000
Subject: [PATCH 02/10] Fixed compilation errors.

---
 src/lib/openjp2/fbc_dec.c | 645 ++++++++++++++++++++++++--------------
 src/lib/openjp2/t2.c      |   2 +-
 2 files changed, 410 insertions(+), 237 deletions(-)

diff --git a/src/lib/openjp2/fbc_dec.c b/src/lib/openjp2/fbc_dec.c
index 30211c24f..1627fedb3 100644
--- a/src/lib/openjp2/fbc_dec.c
+++ b/src/lib/openjp2/fbc_dec.c
@@ -72,7 +72,8 @@ static OPJ_BOOL cannot_decode_spp_mrp_msg = OPJ_FALSE;
   *
   *   @param [in]  val is the value for which population count is sought
   */ 
-static inline OPJ_UINT32 population_count(OPJ_UINT32 val)
+static inline 
+OPJ_UINT32 population_count(OPJ_UINT32 val)
 {
 #ifdef OPJ_COMPILER_MSVC
   return (OPJ_UINT32)__popcnt(val);
@@ -96,7 +97,8 @@ static inline OPJ_UINT32 population_count(OPJ_UINT32 val)
 #ifdef OPJ_COMPILER_MSVC
   #pragma intrinsic(_BitScanReverse)
 #endif
-static inline OPJ_UINT32 count_leading_zeros(OPJ_UINT32 val)
+static inline 
+OPJ_UINT32 count_leading_zeros(OPJ_UINT32 val)
 {
 #ifdef OPJ_COMPILER_MSVC
   unsigned long result = 0;
@@ -150,29 +152,34 @@ typedef struct dec_mel {
 static inline
 void mel_read(dec_mel_t *melp)
 {
+  OPJ_UINT32 val; 
+  int bits;
+  OPJ_UINT32 t;
+  OPJ_BOOL unstuff;
+  
   if (melp->bits > 32)  //there are enough bits in the tmp variable
     return;             // return without reading new data
-  OPJ_UINT32 val = 0xFFFFFFFF;
+
+  val = 0xFFFFFFFF;
   //the next line (the if statement) needs to be tested first
   //if (melp->size > 0)              // if there is data in the MEL segment
     val = *(OPJ_UINT32*)melp->data;  // read 32 bits from MEL data
       
   // next we unstuff them before adding them to the buffer
-  int bits = 32 - melp->unstuff; // number of bits in val, subtract 1 if
-                                  // the previously read byte requires 
-                                  // unstuffing
+  bits = 32 - melp->unstuff; // number of bits in val, subtract 1 if
+                             // the previously read byte requires 
+                             // unstuffing
 
   // data is unstuffed and accumulated in t
   // bits has the number of bits in t
-  OPJ_UINT32 t = (melp->size > 0) ? (val & 0xFF) : 0xFF; // feed 0xFF if the 
+  t = (melp->size > 0) ? (val & 0xFF) : 0xFF; // feed 0xFF if the 
                                   // MEL bitstream has been exhausted
   if (melp->size == 1) t |= 0xF;  // if this is 1 byte before the last
                                   // in MEL+VLC segments (remember they
                                   // can overlap)
   melp->data += melp->size-- > 0; // advance data by 1 byte if we have not
                                   // reached the end of the MEL segment
-  OPJ_BOOL unstuff = ((val & 0xFF) == 0xFF); // true if the byte
-                                             // needs unstuffing
+  unstuff = ((val & 0xFF) == 0xFF); // true if the byte needs unstuffing
 
   bits -= unstuff; // there is one less bit in t if unstuffing is needed
   t = t << (8 - unstuff); // move up to make room for the next byte
@@ -274,6 +281,8 @@ void mel_decode(dec_mel_t *melp)
 static inline
 void mel_init(dec_mel_t *melp, OPJ_UINT8* bbuf, int lcup, int scup)
 {
+  int num;
+
   melp->data = bbuf + lcup - scup; // move the pointer to the start of MEL
   melp->bits = 0;                  // 0 bits in tmp
   melp->tmp = 0;                   //
@@ -286,16 +295,18 @@ void mel_init(dec_mel_t *melp, OPJ_UINT8* bbuf, int lcup, int scup)
   //This code is borrowed; original is for a different architecture
   //These few lines take care of the case where data is not at a multiple
   // of 4 boundary.  It reads 1,2,3 up to 4 bytes from the MEL segment
-  int num = 4 - (int)((intptr_t)(melp->data) & 0x3);
+  num = 4 - (int)((intptr_t)(melp->data) & 0x3);
   for (int i = 0; i < num; ++i) { // this code is similar to mel_read
+    OPJ_UINT64 d;
+    int d_bits;
+    
     assert(melp->unstuff == OPJ_FALSE || melp->data[0] <= 0x8F);
-    OPJ_UINT64 d = (melp->size > 0) ? *melp->data : 0xFF; // if buffer is 
-                                                          // consumed set data 
-                                                          // to 0xFF
+    d = (melp->size > 0) ? *melp->data : 0xFF; // if buffer is consumed 
+                                               // set data to 0xFF
     if (melp->size == 1) d |= 0xF; //if this is MEL+VLC-1, set LSBs to 0xF
                                     // see the standard
     melp->data += melp->size-- > 0; //increment if the end is not reached
-    int d_bits = 8 - melp->unstuff; //if unstuffing is needed, reduce by 1
+    d_bits = 8 - melp->unstuff; //if unstuffing is needed, reduce by 1
     melp->tmp = (melp->tmp << d_bits) | d; //store bits in tmp
     melp->bits += d_bits;  //increment tmp by number of bits
     melp->unstuff = ((d & 0xFF) == 0xFF); //true of next byte needs 
@@ -314,10 +325,11 @@ void mel_init(dec_mel_t *melp, OPJ_UINT8* bbuf, int lcup, int scup)
 static inline
 int mel_get_run(dec_mel_t *melp)
 {
+  int t;
   if (melp->num_runs == 0)  //if no runs, decode more bit from MEL segment
     mel_decode(melp);
 
-  int t = melp->runs & 0x7F; //retrieve one run
+  t = melp->runs & 0x7F; //retrieve one run
   melp->runs >>= 7;  // remove the retrieved run
   melp->num_runs--;
   return t; // return run
@@ -357,12 +369,18 @@ typedef struct rev_struct {
   *
   *  @param [in]  vlcp is a pointer to rev_struct_t structure
   */
-inline void rev_read(rev_struct_t *vlcp)
+static inline 
+void rev_read(rev_struct_t *vlcp)
 {
+  OPJ_UINT32 val;
+  OPJ_UINT32 tmp;
+  OPJ_UINT32 bits;
+  OPJ_BOOL unstuff;
+
   //process 4 bytes at a time
   if (vlcp->bits > 32)  // if there are more than 32 bits in tmp, then 
     return;             // reading 32 bits can overflow vlcp->tmp
-  OPJ_UINT32 val = 0;
+  val = 0;
   //the next line (the if statement) needs to be tested first
   if (vlcp->size > 0)  // if there are bytes left in the VLC segment
   {
@@ -374,23 +392,22 @@ inline void rev_read(rev_struct_t *vlcp)
   }
 
   //accumulate in tmp, number of bits in tmp are stored in bits
-  OPJ_UINT32 tmp = val >> 24;  //start with the MSB byte
-  OPJ_UINT32 bits;
+  tmp = val >> 24;  //start with the MSB byte
 
   // test unstuff (previous byte is >0x8F), and this byte is 0x7F
-  bits = 8 - ((vlcp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1 : 0);
-  OPJ_BOOL unstuff = (val >> 24) > 0x8F; //this is for the next byte
+  bits = 8u - ((vlcp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1u : 0u);
+  unstuff = (val >> 24) > 0x8F; //this is for the next byte
 
   tmp |= ((val >> 16) & 0xFF) << bits; //process the next byte
-  bits += 8 - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1 : 0);
+  bits += 8u - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1u : 0u);
   unstuff = ((val >> 16) & 0xFF) > 0x8F;
 
   tmp |= ((val >> 8) & 0xFF) << bits;
-  bits += 8 - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1 : 0);
+  bits += 8u - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1u : 0u);
   unstuff = ((val >> 8) & 0xFF) > 0x8F;
 
   tmp |= (val & 0xFF) << bits;
-  bits += 8 - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1 : 0);
+  bits += 8u - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1u : 0u);
   unstuff = (val & 0xFF) > 0x8F;
 
   // now move the read and unstuffed bits into vlcp->tmp
@@ -413,15 +430,20 @@ inline void rev_read(rev_struct_t *vlcp)
   *  @param [in]  lcup is the length of MagSgn+MEL+VLC segments
   *  @param [in]  scup is the length of MEL+VLC segments
   */
-inline void rev_init(rev_struct_t *vlcp, OPJ_UINT8* data, int lcup, int scup)
+static inline 
+void rev_init(rev_struct_t *vlcp, OPJ_UINT8* data, int lcup, int scup)
 {
+  OPJ_UINT32 d;
+  int num;
+  int tnum;
+
   //first byte has only the upper 4 bits
   vlcp->data = data + lcup - 2;
 
   //size can not be larger than this, in fact it should be smaller
   vlcp->size = scup - 2;
 
-  OPJ_UINT32 d = *vlcp->data--; // read one byte (this is a half byte)
+  d = *vlcp->data--;            // read one byte (this is a half byte)
   vlcp->tmp = d >> 4;           // both initialize and set
   vlcp->bits = 4 - ((vlcp->tmp & 7) == 7); //check standard
   vlcp->unstuff = (d | 0xF) > 0x8F; //this is useful for the next byte
@@ -430,13 +452,14 @@ inline void rev_init(rev_struct_t *vlcp, OPJ_UINT8* data, int lcup, int scup)
   // align to the read size (address multiple of 4 if read size is 4)
   //These few lines take care of the case where data is not at a multiple
   // of 4 boundary. It reads 1,2,3 up to 4 bytes from the VLC bitstream
-  int num = 1 + (int)((intptr_t)(vlcp->data) & 0x3);
-  int tnum = num < vlcp->size ? num : vlcp->size;
+  num = 1 + (int)((intptr_t)(vlcp->data) & 0x3);
+  tnum = num < vlcp->size ? num : vlcp->size;
   for (int i = 0; i < tnum; ++i) {
     OPJ_UINT64 d;
+    OPJ_UINT32 d_bits;
     d = *vlcp->data--;  // read one byte and move read pointer
     //check if the last byte was >0x8F (unstuff == true) and this is 0x7F
-    OPJ_UINT32 d_bits = 8 - ((vlcp->unstuff && ((d & 0x7F) == 0x7F)) ? 1 : 0);
+    d_bits = 8u - ((vlcp->unstuff && ((d & 0x7F) == 0x7F)) ? 1u : 0u);
     vlcp->tmp |= d << vlcp->bits; // move data to vlcp->tmp
     vlcp->bits += d_bits;
     vlcp->unstuff = d > 0x8F; // for next byte
@@ -453,7 +476,8 @@ inline void rev_init(rev_struct_t *vlcp, OPJ_UINT8* data, int lcup, int scup)
   *
   *  @param [in]  vlcp is a pointer to rev_struct structure
   */
-inline OPJ_UINT32 rev_fetch(rev_struct_t *vlcp)
+static inline 
+OPJ_UINT32 rev_fetch(rev_struct_t *vlcp)
 {
   if (vlcp->bits < 32)  // if there are less then 32 bits, read more
   {
@@ -470,7 +494,8 @@ inline OPJ_UINT32 rev_fetch(rev_struct_t *vlcp)
   *  @param [in]  vlcp is a pointer to rev_struct structure
   *  @param [in]  num_bits is the number of bits to be removed
   */
-inline OPJ_UINT32 rev_advance(rev_struct_t *vlcp, OPJ_UINT32 num_bits)
+static inline 
+OPJ_UINT32 rev_advance(rev_struct_t *vlcp, OPJ_UINT32 num_bits)
 {
   assert(num_bits <= vlcp->bits); // vlcp->tmp must have more than num_bits
   vlcp->tmp >>= num_bits;         // remove bits
@@ -489,12 +514,18 @@ inline OPJ_UINT32 rev_advance(rev_struct_t *vlcp, OPJ_UINT32 num_bits)
   *
   *  @param [in]  mrp is a pointer to rev_struct structure
   */
-inline void rev_read_mrp(rev_struct_t *mrp)
+static inline 
+void rev_read_mrp(rev_struct_t *mrp)
 {
+  OPJ_UINT32 val;
+  OPJ_UINT32 tmp; 
+  OPJ_UINT32 bits;
+  OPJ_BOOL unstuff;
+
   //process 4 bytes at a time
   if (mrp->bits > 32)
     return;
-  OPJ_UINT32 val = 0;
+  val = 0;
   //the next line (the if statement) needs to be tested first
   //notice that second line can be simplified to mrp->data -= 4
   // if (mrp->size > 0)
@@ -505,23 +536,23 @@ inline void rev_read_mrp(rev_struct_t *mrp)
   }
 
   //accumulate in tmp, and keep count in bits
-  OPJ_UINT32 tmp = (mrp->size-- > 0) ? (val >> 24) : 0; // fill zeros if all 
-  OPJ_UINT32 bits;                                      // bytes are used
+  tmp = (mrp->size-- > 0) ? (val >> 24) : 0; // fill zeros if all 
+                                                        
   //test if the last byte > 0x8F (unstuff must be true) and this is 0x7F
-  bits = 8 - ((mrp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1 : 0);
-  OPJ_BOOL unstuff = (val >> 24) > 0x8F;
+  bits = 8u - ((mrp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1u : 0u);
+  unstuff = (val >> 24) > 0x8F;
 
   //process the next byte
   tmp |= (mrp->size-- > 0) ? (((val >> 16) & 0xFF) << bits) : 0;
-  bits += 8 - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1 : 0);
+  bits += 8u - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1u : 0u);
   unstuff = ((val >> 16) & 0xFF) > 0x8F;
 
   tmp |= (mrp->size-- > 0) ? (((val >> 8) & 0xFF) << bits) : 0;
-  bits += 8 - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1 : 0);
+  bits += 8u - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1u : 0u);
   unstuff = ((val >> 8) & 0xFF) > 0x8F;
 
   tmp |= (mrp->size-- > 0) ? ((val & 0xFF) << bits) : 0;
-  bits += 8 - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1 : 0);
+  bits += 8u - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1u : 0u);
   unstuff = (val & 0xFF) > 0x8F;
 
   mrp->tmp |= (OPJ_UINT64)tmp << mrp->bits; // move data to mrp pointer
@@ -544,9 +575,11 @@ inline void rev_read_mrp(rev_struct_t *mrp)
   *  @param [in]  lcup is the length of MagSgn+MEL+VLC segments
   *  @param [in]  len2 is the length of SPP+MRP segments
   */
-inline void rev_init_mrp(rev_struct_t *mrp, OPJ_UINT8* data, int lcup, 
-                         int len2)
+static inline 
+void rev_init_mrp(rev_struct_t *mrp, OPJ_UINT8* data, int lcup, int len2)
 {
+  int num;
+
   mrp->data = data + lcup + len2 - 1;
   mrp->size = len2;
   mrp->unstuff = OPJ_TRUE;
@@ -557,13 +590,15 @@ inline void rev_init_mrp(rev_struct_t *mrp, OPJ_UINT8* data, int lcup,
   // align to the read size (address multiple of 4 if read size is 4)
   //These few lines take care of the case where data is not at a multiple
   // of 4 boundary.  It reads 1,2,3 up to 4 bytes from the MRP stream
-  int num = 1 + (int)((intptr_t)(mrp->data) & 0x3);
+  num = 1 + (int)((intptr_t)(mrp->data) & 0x3);
   for (int i = 0; i < num; ++i) {
     OPJ_UINT64 d;
+    OPJ_UINT32 d_bits;
+
     //read a byte, 0 if no more data
     d = (mrp->size-- > 0) ? *mrp->data-- : 0; 
     //check if unstuffing is needed
-    OPJ_UINT32 d_bits = 8 - ((mrp->unstuff && ((d & 0x7F) == 0x7F)) ? 1 : 0);
+    d_bits = 8u - ((mrp->unstuff && ((d & 0x7F) == 0x7F)) ? 1u : 0u);
     mrp->tmp |= d << mrp->bits; // move data to vlcp->tmp
     mrp->bits += d_bits;
     mrp->unstuff = d > 0x8F; // for next byte
@@ -579,7 +614,8 @@ inline void rev_init_mrp(rev_struct_t *mrp, OPJ_UINT8* data, int lcup,
   *
   *  @param [in]  mrp is a pointer to rev_struct structure
   */
-inline OPJ_UINT32 rev_fetch_mrp(rev_struct_t *mrp)
+static inline 
+OPJ_UINT32 rev_fetch_mrp(rev_struct_t *mrp)
 {
   if (mrp->bits < 32) // if there are less than 32 bits in mrp->tmp
   {
@@ -596,7 +632,8 @@ inline OPJ_UINT32 rev_fetch_mrp(rev_struct_t *mrp)
   *  @param [in]  mrp is a pointer to rev_struct structure
   *  @param [in]  num_bits is the number of bits to be removed
   */
-inline OPJ_UINT32 rev_advance_mrp(rev_struct_t *mrp, OPJ_UINT32 num_bits)
+static inline 
+OPJ_UINT32 rev_advance_mrp(rev_struct_t *mrp, OPJ_UINT32 num_bits)
 {
   assert(num_bits <= mrp->bits); // we must not consume more than mrp->bits
   mrp->tmp >>= num_bits;         // discard the lowest num_bits bits
@@ -615,8 +652,8 @@ inline OPJ_UINT32 rev_advance_mrp(rev_struct_t *mrp, OPJ_UINT32 num_bits)
   *  @param [out] u is the u value (or u_q) + 1.  Note: we produce u + 1;
   *               this value is a partial calculation of u + kappa.
   */
-inline OPJ_UINT32 decode_init_uvlc(OPJ_UINT32 vlc, OPJ_UINT32 mode, 
-                                   OPJ_UINT32 *u)
+static inline 
+OPJ_UINT32 decode_init_uvlc(OPJ_UINT32 vlc, OPJ_UINT32 mode, OPJ_UINT32 *u)
 {
   //table stores possible decoding three bits from vlc
   // there are 8 entries for xx1, x10, 100, 000, where x means do not care
@@ -642,11 +679,14 @@ inline OPJ_UINT32 decode_init_uvlc(OPJ_UINT32 vlc, OPJ_UINT32 mode,
   }
   else if (mode <= 2) // u_off are either 01 or 10
   {
-    OPJ_UINT32 d = dec[vlc & 0x7];   //look at the least significant 3 bits
+    OPJ_UINT32 d;
+    OPJ_UINT32 suffix_len;
+
+    d = dec[vlc & 0x7];   //look at the least significant 3 bits
     vlc >>= d & 0x3;                 //prefix length
     consumed_bits += d & 0x3; 
 
-    OPJ_UINT32 suffix_len = ((d >> 2) & 0x7); 
+    suffix_len = ((d >> 2) & 0x7); 
     consumed_bits += suffix_len;
 
     d = (d >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
@@ -661,23 +701,28 @@ inline OPJ_UINT32 decode_init_uvlc(OPJ_UINT32 vlc, OPJ_UINT32 mode,
 
     if ((d1 & 0x3) > 2)
     {
+      OPJ_UINT32 suffix_len;
+
       //u_{q_2} prefix
       u[1] = (vlc & 1) + 1 + 1; //Kappa is 1 for initial line
       ++consumed_bits;
       vlc >>= 1;
 
-      OPJ_UINT32 suffix_len = ((d1 >> 2) & 0x7);
+      suffix_len = ((d1 >> 2) & 0x7);
       consumed_bits += suffix_len;
       d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
       u[0] = d1 + 1; //Kappa is 1 for initial line
     }
     else
     {
-      OPJ_UINT32 d2 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
-      vlc >>= d2 & 0x3;                // Consume bits
+      OPJ_UINT32 d2;
+      OPJ_UINT32 suffix_len;
+
+      d2 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+      vlc >>= d2 & 0x3;     // Consume bits
       consumed_bits += d2 & 0x3;
 
-      OPJ_UINT32 suffix_len = ((d1 >> 2) & 0x7);
+      suffix_len = ((d1 >> 2) & 0x7);
       consumed_bits += suffix_len;
 
       d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
@@ -693,15 +738,19 @@ inline OPJ_UINT32 decode_init_uvlc(OPJ_UINT32 vlc, OPJ_UINT32 mode,
   }
   else if (mode == 4) // both u_off are 1, and MEL event is 1
   {
-    OPJ_UINT32 d1 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
-    vlc >>= d1 & 0x3;                // Consume bits
+    OPJ_UINT32 d1;
+    OPJ_UINT32 d2;
+    OPJ_UINT32 suffix_len;
+
+    d1 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+    vlc >>= d1 & 0x3;     // Consume bits
     consumed_bits += d1 & 0x3;
 
-    OPJ_UINT32 d2 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
-    vlc >>= d2 & 0x3;                // Consume bits
+    d2 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+    vlc >>= d2 & 0x3;     // Consume bits
     consumed_bits += d2 & 0x3;
 
-    OPJ_UINT32 suffix_len = ((d1 >> 2) & 0x7);
+    suffix_len = ((d1 >> 2) & 0x7);
     consumed_bits += suffix_len;
 
     d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
@@ -726,8 +775,8 @@ inline OPJ_UINT32 decode_init_uvlc(OPJ_UINT32 vlc, OPJ_UINT32 mode,
   *  @param [out] u is the u value (or u_q) + 1.  Note: we produce u + 1;
   *               this value is a partial calculation of u + kappa.
   */
-inline OPJ_UINT32 decode_noninit_uvlc(OPJ_UINT32 vlc, OPJ_UINT32 mode, 
-                                      OPJ_UINT32 *u)
+static inline 
+OPJ_UINT32 decode_noninit_uvlc(OPJ_UINT32 vlc, OPJ_UINT32 mode, OPJ_UINT32 *u)
 {
   //table stores possible decoding three bits from vlc
   // there are 8 entries for xx1, x10, 100, 000, where x means do not care
@@ -753,11 +802,14 @@ inline OPJ_UINT32 decode_noninit_uvlc(OPJ_UINT32 vlc, OPJ_UINT32 mode,
   }
   else if (mode <= 2) //u_off are either 01 or 10
   {
-    OPJ_UINT32 d = dec[vlc & 0x7];  //look at the least significant 3 bits
-    vlc >>= d & 0x3;                //prefix length
+    OPJ_UINT32 d;
+    OPJ_UINT32 suffix_len;
+
+    d = dec[vlc & 0x7];  //look at the least significant 3 bits
+    vlc >>= d & 0x3;     //prefix length
     consumed_bits += d & 0x3;
 
-    OPJ_UINT32 suffix_len = ((d >> 2) & 0x7);
+    suffix_len = ((d >> 2) & 0x7);
     consumed_bits += suffix_len;
 
     d = (d >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
@@ -766,15 +818,19 @@ inline OPJ_UINT32 decode_noninit_uvlc(OPJ_UINT32 vlc, OPJ_UINT32 mode,
   }
   else if (mode == 3) // both u_off are 1
   {
-    OPJ_UINT32 d1 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
-    vlc >>= d1 & 0x3;                // Consume bits
+    OPJ_UINT32 d1;
+    OPJ_UINT32 d2;
+    OPJ_UINT32 suffix_len;
+
+    d1 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+    vlc >>= d1 & 0x3;     // Consume bits
     consumed_bits += d1 & 0x3;
 
-    OPJ_UINT32 d2 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
-    vlc >>= d2 & 0x3;                // Consume bits
+    d2 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+    vlc >>= d2 & 0x3;     // Consume bits
     consumed_bits += d2 & 0x3;
 
-    OPJ_UINT32 suffix_len = ((d1 >> 2) & 0x7);
+    suffix_len = ((d1 >> 2) & 0x7);
     consumed_bits += suffix_len;
 
     d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
@@ -820,31 +876,36 @@ typedef struct frwd_struct {
   *  @param  [in]  msp is a pointer to frwd_struct_t structure
   *
   */ 
+static inline
 void frwd_read(frwd_struct_t *msp)
 {
+  OPJ_UINT32 val;
+  OPJ_UINT32 bits;
+  OPJ_UINT32 t;
+  OPJ_BOOL unstuff;
+
   assert(msp->bits <= 32); // assert that there is a space for 32 bits
 
-  OPJ_UINT32 val;
   val = *(OPJ_UINT32*)msp->data;      // read 32 bits
   msp->data += msp->size > 0 ? 4 : 0; // move pointer if data is not 
                                       // exhausted
 
   // we accumulate in t and keep a count of the number of bits in bits
-  OPJ_UINT32 bits = 8 - msp->unstuff;     // if previous byte was 0xFF
+  bits = 8u - (msp->unstuff ? 1u:0u);     // if previous byte was 0xFF
   // get next byte, if bitstream is exhausted, replace it with X
-  OPJ_UINT32 t = msp->size-- > 0 ? (val & 0xFF) : msp->X;
-  OPJ_BOOL unstuff = ((val & 0xFF) == 0xFF);  // Do we need unstuffing next?
+  t = msp->size-- > 0 ? (val & 0xFF) : msp->X;
+  unstuff = ((val & 0xFF) == 0xFF);  // Do we need unstuffing next?
 
   t |= (msp->size-- > 0 ? ((val >> 8) & 0xFF) : msp->X) << bits;
-  bits += 8 - unstuff;
+  bits += 8u - (unstuff ? 1u:0u);
   unstuff = (((val >> 8) & 0xFF) == 0xFF);
 
   t |= (msp->size-- > 0 ? ((val >> 16) & 0xFF) : msp->X) << bits;
-  bits += 8 - unstuff;
+  bits += 8u - (unstuff ? 1u:0u);
   unstuff = (((val >> 16) & 0xFF) == 0xFF);
 
   t |= (msp->size-- > 0 ? ((val >> 24) & 0xFF) : msp->X) << bits;
-  bits += 8 - unstuff;
+  bits += 8u - (unstuff ? 1u:0u);
   msp->unstuff = (((val >> 24) & 0xFF) == 0xFF); // for next byte
 
   msp->tmp |= ((OPJ_UINT64)t) << msp->bits;  // move data to msp->tmp
@@ -860,9 +921,12 @@ void frwd_read(frwd_struct_t *msp)
   *  @param [in]  X is the value fed in when the bitstream is exhausted.
   *               See frwd_read.
   */
+static inline
 void frwd_init(frwd_struct_t *msp, const OPJ_UINT8* data, int size, 
                OPJ_UINT32 X)
 {
+  int num;
+
   msp->data = data;
   msp->tmp = 0;
   msp->bits = 0;
@@ -875,15 +939,15 @@ void frwd_init(frwd_struct_t *msp, const OPJ_UINT8* data, int size,
   // align to the read size (address multiple of 4 if read size is 4)
   //These few lines take care of the case where data is not at a multiple
   // of 4 boundary.  It reads 1,2,3 up to 4 bytes from the bitstream
-  int num = 4 - (int)((intptr_t)(msp->data) & 0x3);
+  num = 4 - (int)((intptr_t)(msp->data) & 0x3);
   for (int i = 0; i < num; ++i)
   {
     OPJ_UINT64 d;
     //read a byte if the buffer is not exhausted, otherwise set it to X
     d = msp->size-- > 0 ? *msp->data++ : msp->X;
-    msp->tmp |= (d << msp->bits);      // store data in msp->tmp
-    msp->bits += 8 - msp->unstuff;     // number of bits added to msp->tmp
-    msp->unstuff = ((d & 0xFF) == 0xFF); // unstuffing for next byte
+    msp->tmp |= (d << msp->bits);           // store data in msp->tmp
+    msp->bits += 8u - (msp->unstuff?1u:0u); // number of bits added to msp->tmp
+    msp->unstuff = ((d & 0xFF) == 0xFF);    // unstuffing for next byte
   }
   frwd_read(msp); // read 32 bits more
 }
@@ -894,7 +958,8 @@ void frwd_init(frwd_struct_t *msp, const OPJ_UINT8* data, int size,
   *  @param [in]  msp is a pointer to frwd_struct_t
   *  @param [in]  num_bits is the number of bit to consume
   */
-inline void frwd_advance(frwd_struct_t *msp, OPJ_UINT32 num_bits)
+static inline 
+void frwd_advance(frwd_struct_t *msp, OPJ_UINT32 num_bits)
 {
   assert(num_bits <= msp->bits);
   msp->tmp >>= num_bits;  // consume num_bits
@@ -906,6 +971,7 @@ inline void frwd_advance(frwd_struct_t *msp, OPJ_UINT32 num_bits)
   *
   *  @param [in]  msp is a pointer to frwd_struct_t
   */
+static inline 
 OPJ_UINT32 frwd_fetch(frwd_struct_t *msp)
 {
   if (msp->bits < 32)
@@ -926,7 +992,6 @@ static OPJ_BOOL opj_t1_allocate_buffers(
     OPJ_UINT32 h)
 {
     OPJ_UINT32 flagssize;
-    OPJ_UINT32 flags_stride;
 
     /* No risk of overflow. Prior checks ensure those assert are met */
     /* They are per the specification */
@@ -954,9 +1019,6 @@ static OPJ_BOOL opj_t1_allocate_buffers(
         }
     }
 
-    flags_stride = 0; // not used
-
-
     // We expand these buffers to multiples of 16 bytes.
     // We need 4 buffers of 129 integers each, expanded to 132 integers each
     // We also need 514 bytes of buffer, expanded to 528 bytes
@@ -997,6 +1059,34 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
                                opj_mutex_t* p_manager_mutex,
                                OPJ_BOOL check_pterm)
 {
+  OPJ_BYTE* cblkdata = NULL;
+  OPJ_UINT8* coded_data;
+  OPJ_UINT32* decoded_data;
+  OPJ_UINT32 num_passes;
+  OPJ_UINT32 lengths1;
+  OPJ_UINT32 lengths2;
+  OPJ_INT32 width;
+  OPJ_INT32 height;
+  OPJ_INT32 stride;
+  OPJ_UINT32 *pflags, *sigma1, *sigma2, *mbr1, *mbr2, *sip, sip_shift;
+  OPJ_UINT32 p;
+  OPJ_UINT32 zero_planes_p1;
+  int lcup, scup;
+  dec_mel_t mel;
+  rev_struct_t vlc;
+  frwd_struct_t magsgn;
+  frwd_struct_t sigprop;
+  rev_struct_t magref;
+  OPJ_UINT8 *lsp, *line_state;
+  int run;  
+  OPJ_UINT32 vlc_val;           
+  OPJ_UINT32 qinf[2];
+  OPJ_UINT32 c_q;
+  OPJ_UINT32* sp;
+
+  (void)(orient);      // stops unused parameter message
+  (void)(check_pterm); // stops unused parameter message
+
   // We ignor orient, because the same decoder is used for all subbands
   // We also ignore check_pterm, because I am not sure how it applies
   assert(cblksty == 0x40); // that is the only support mode
@@ -1010,8 +1100,6 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
     return OPJ_FALSE;
   }
 
-  OPJ_BYTE* cblkdata = NULL;
-
   if (!opj_t1_allocate_buffers(
               t1,
               (OPJ_UINT32)(cblk->x1 - cblk->x0),
@@ -1059,24 +1147,24 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
       return OPJ_TRUE;
   }
 
-  // coded_data is a pointer to bitstream
-  OPJ_UINT8* coded_data = cblkdata;
-  // decoded_data is a pointer to decoded codeblock data buf.
-  OPJ_UINT32* decoded_data = t1->data;
-  // num_passes is the number of passes: 1 if CUP only, 2 for CUP+SPP, and 
-  // 3 for CUP+SPP+MRP
-  OPJ_UINT32 num_passes = cblk->numsegs>0 ? cblk->segs[0].real_num_passes : 0;
+  // OPJ_BYTE* coded_data is a pointer to bitstream
+  coded_data = cblkdata;
+  // OPJ_UINT32* decoded_data is a pointer to decoded codeblock data buf.
+  decoded_data = (OPJ_UINT32*)t1->data;
+  // OPJ_UINT32 num_passes is the number of passes: 1 if CUP only, 2 for 
+  // CUP+SPP, and 3 for CUP+SPP+MRP
+  num_passes = cblk->numsegs>0 ? cblk->segs[0].real_num_passes : 0;
   num_passes += cblk->numsegs>1 ? cblk->segs[1].real_num_passes : 0;
-  // lengths1 is the length of cleanup pass
-  OPJ_UINT32 lengths1 = num_passes > 0 ? cblk->segs[0].len : 0;
-  // lengths2 is the length of refinement passes (either SPP only or SPP+MRP)
-  OPJ_UINT32 lengths2 = num_passes > 1 ? cblk->segs[1].len : 0;
-  // width is the decoded codeblock width 
-  OPJ_UINT32 width = cblk->x1 - cblk->x0;
-  // height is the decoded codeblock height
-  OPJ_UINT32 height = cblk->y1 - cblk->y0;
-  // stride is the decoded codeblock buffer stride 
-  OPJ_UINT32 stride = width;
+  // OPJ_UINT32 lengths1 is the length of cleanup pass
+  lengths1 = num_passes > 0 ? cblk->segs[0].len : 0;
+  // OPJ_UINT32 lengths2 is the length of refinement passes (either SPP only or SPP+MRP)
+  lengths2 = num_passes > 1 ? cblk->segs[1].len : 0;
+  // OPJ_INT32 width is the decoded codeblock width 
+  width = cblk->x1 - cblk->x0;
+  // OPJ_INT32 height is the decoded codeblock height
+  height = cblk->y1 - cblk->y0;
+  // OPJ_INT32 stride is the decoded codeblock buffer stride 
+  stride = width;
 
    /*  sigma1 and sigma2 contains significant (i.e., non-zero) pixel 
     *  locations.  The buffers are used interchangeably, because we need
@@ -1091,14 +1179,17 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
     *  goes outside the structure
     *  To work in OpenJPEG these buffers has been expanded to 132.
     */
-  OPJ_UINT32 *pflags = (OPJ_UINT32 *)t1->flags;
-  OPJ_UINT32 *sigma1 = pflags, *sigma2 = sigma1 + 132;
+  // OPJ_UINT32 *pflags, *sigma1, *sigma2, *mbr1, *mbr2, *sip, sip_shift;
+  pflags = (OPJ_UINT32 *)t1->flags;
+  sigma1 = pflags;
+  sigma2 = sigma1 + 132;
   // mbr arrangement is similar to sigma; mbr contains locations 
   // that become significant during significance propagation pass
-  OPJ_UINT32 *mbr1 = sigma2 + 132, *mbr2 = mbr1 + 132;
+  mbr1 = sigma2 + 132;
+  mbr2 = mbr1 + 132;
   //a pointer to sigma
-  OPJ_UINT32 *sip = sigma1; //pointers to arrays to be used interchangeably
-  OPJ_UINT32 sip_shift = 0; //the amount of shift needed for sigma
+  sip = sigma1;  //pointers to arrays to be used interchangeably
+  sip_shift = 0; //the amount of shift needed for sigma
 
   if (num_passes > 1 && lengths2 == 0)
   {
@@ -1158,12 +1249,12 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
       return OPJ_TRUE;
     }
 
-  OPJ_UINT32 p = cblk->numbps; 
-  // zero planes plus 1
-  OPJ_UINT32 zero_planes_p1 = cblk->Mb - cblk->numbps + 1;
+  // OPJ_INT32
+  p = cblk->numbps; 
+  // OPJ_INT32 zero planes plus 1
+  zero_planes_p1 = cblk->Mb - cblk->numbps + 1;
 
   // read scup and fix the bytes there
-  int lcup, scup;
   lcup = (int)lengths1;  // length of CUP
   //scup is the length of MEL + VLC
   scup = (((int)coded_data[lcup-1]) << 4) + (coded_data[lcup-2] & 0xF);
@@ -1171,16 +1262,11 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
     return OPJ_FALSE;
 
   // init structures
-  dec_mel_t mel;
   mel_init(&mel, coded_data, lcup, scup);
-  rev_struct_t vlc;
   rev_init(&vlc, coded_data, lcup, scup);
-  frwd_struct_t magsgn;
   frwd_init(&magsgn, coded_data, lcup - scup, 0xFF);
-  frwd_struct_t sigprop;
   if (num_passes > 1) // needs to be tested
     frwd_init(&sigprop, coded_data + lengths1, (int)lengths2, 0);
-  rev_struct_t magref;
   if (num_passes > 2)
     rev_init_mrp(&magref, coded_data, (int)lengths1, (int)lengths2);
 
@@ -1195,22 +1281,29 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
   // 514 is enough for a block width of 1024, +2 extra
   // here expanded to 528
-  OPJ_UINT8 *lsp, *line_state = (OPJ_UINT8 *)(mbr2 + 132); 
+  line_state = (OPJ_UINT8 *)(mbr2 + 132); 
 
   //initial 2 lines
   /////////////////
-  lsp = line_state;              // point to line state
-  lsp[0] = 0;                    // for initial row of quad, we set to 0
-  int run = mel_get_run(&mel);   // decode runs of events from MEL bitstrm
-                                 // data represented as runs of 0 events
-                                 // See mel_decode description
-  OPJ_UINT32 vlc_val;            // fetched data from VLC bitstream
-  OPJ_UINT32 qinf[2] = { 0 };    // quad info decoded from VLC bitstream
-  OPJ_UINT32 c_q = 0;            // context for quad q
-  OPJ_UINT32* sp = decoded_data; // decoded codeblock samples
-
-  for (OPJ_UINT32 x = 0; x < width; x += 4) // one iteration per quad pair
+  lsp = line_state;           // point to line state
+  lsp[0] = 0;                 // for initial row of quad, we set to 0
+  run = mel_get_run(&mel);    // decode runs of events from MEL bitstrm
+                              // data represented as runs of 0 events
+                              // See mel_decode description
+  qinf[0] = qinf[1] = 0;      // quad info decoded from VLC bitstream
+  c_q = 0;                    // context for quad q
+  sp = decoded_data;          // decoded codeblock samples
+  // vlc_val;                 // fetched data from VLC bitstream
+
+  for (OPJ_INT32 x = 0; x < width; x += 4) // one iteration per quad pair
   {
+    OPJ_UINT32 U_q[2]; // u values for the quad pair
+    OPJ_UINT32 uvlc_mode;
+    OPJ_UINT32 consumed_bits;
+    OPJ_UINT32 m_n, v_n;
+    OPJ_UINT32 ms_val;
+    OPJ_UINT32 locs;
+
     // decode VLC
     /////////////
 
@@ -1308,10 +1401,9 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
     // retrieve u
     /////////////
-    OPJ_UINT32 U_q[2]; // u values for the quad pair
 
     // uvlc_mode is made up of u_offset bits from the quad pair
-    OPJ_UINT32 uvlc_mode = ((qinf[0] & 0x8) >> 3) | ((qinf[1] & 0x8) >> 2);
+    uvlc_mode = ((qinf[0] & 0x8) >> 3) | ((qinf[1] & 0x8) >> 2);
     if (uvlc_mode == 3)  // if both u_offset are set, get an event from
     {                    // the MEL run of events
       run -= 2; //subtract 2, since events number if multiplied by 2
@@ -1320,7 +1412,7 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
         run = mel_get_run(&mel);
     }
     //decode uvlc_mode to get u for both quads
-    OPJ_UINT32 consumed_bits = decode_init_uvlc(vlc_val, uvlc_mode, U_q);
+    consumed_bits = decode_init_uvlc(vlc_val, uvlc_mode, U_q);
     if (U_q[0] > zero_planes_p1 || U_q[1] > zero_planes_p1)
     {
       if (p_manager_mutex)
@@ -1337,22 +1429,22 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
     //decode magsgn and update line_state
     /////////////////////////////////////
-    OPJ_UINT32 m_n, v_n;
-    OPJ_UINT32 ms_val;
 
     //We obtain a mask for the samples locations that needs evaluation
-    OPJ_UINT32 locs = 0xFF;
+    locs = 0xFF;
     if (x + 4 > width) locs >>= (x + 4 - width) << 1; // limits width
     locs = height > 1 ? locs : (locs & 0x55);         // limits height
 
     //first quad, starting at first sample in quad and moving on
     if (qinf[0] & 0x10) //is it signifcant? (sigma_n)
     {
+      OPJ_UINT32 val;
+
       ms_val = frwd_fetch(&magsgn);         //get 32 bits of magsgn data
       m_n = U_q[0] - ((qinf[0] >> 12) & 1); //evaluate m_n (number of bits
                                   // to read from bitstream), using EMB e_k
       frwd_advance(&magsgn, m_n);         //consume m_n
-      OPJ_UINT32 val = ms_val << 31;      //get sign bit
+      val = ms_val << 31;                 //get sign bit
       v_n = ms_val & ((1U << m_n) - 1);   //keep only m_n bits
       v_n |= ((qinf[0] & 0x100) >> 8) << m_n;  //add EMB e_1 as MSB
       v_n |= 1;                                //add center of bin    
@@ -1365,10 +1457,12 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
     if (qinf[0] & 0x20) //sigma_n
     {
+      OPJ_UINT32 val, t;
+
       ms_val = frwd_fetch(&magsgn);         //get 32 bits
       m_n = U_q[0] - ((qinf[0] >> 13) & 1); //m_n, uses EMB e_k
       frwd_advance(&magsgn, m_n);           //consume m_n
-      OPJ_UINT32 val = ms_val << 31;        //get sign bit
+      val = ms_val << 31;                   //get sign bit
       v_n = ms_val & ((1U << m_n) - 1);     //keep only m_n bits
       v_n |= ((qinf[0] & 0x200) >> 9) << m_n; //add EMB e_1
       v_n |= 1;                               //bin center
@@ -1377,7 +1471,7 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
       sp[stride] = val | ((v_n + 2) << (p - 1)); 
 
       //update line_state: bit 7 (\sigma^N), and E^N
-      OPJ_UINT32 t = lsp[0] & 0x7F;          //keep E^NW
+      t = lsp[0] & 0x7F;       // keep E^NW
       v_n = 32 - count_leading_zeros(v_n); 
       lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n)); //max(E^NW, E^N) | s
     }
@@ -1390,10 +1484,12 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
     //this is similar to the above two samples
     if (qinf[0] & 0x40) 
     {
+      OPJ_UINT32 val;
+
       ms_val = frwd_fetch(&magsgn);
       m_n = U_q[0] - ((qinf[0] >> 14) & 1); 
       frwd_advance(&magsgn, m_n);
-      OPJ_UINT32 val = ms_val << 31;
+      val = ms_val << 31;
       v_n = ms_val & ((1U << m_n) - 1);
       v_n |= (((qinf[0] & 0x400) >> 10) << m_n);
       v_n |= 1; 
@@ -1405,10 +1501,11 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
     lsp[0] = 0;
     if (qinf[0] & 0x80) 
     {
+      OPJ_UINT32 val;
       ms_val = frwd_fetch(&magsgn);
       m_n = U_q[0] - ((qinf[0] >> 15) & 1); //m_n
       frwd_advance(&magsgn, m_n);
-      OPJ_UINT32 val = ms_val << 31;
+      val = ms_val << 31;
       v_n = ms_val & ((1U << m_n) - 1);
       v_n |= ((qinf[0] & 0x800) >> 11) << m_n;
       v_n |= 1; //center of bin
@@ -1425,10 +1522,12 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
     //second quad
     if (qinf[1] & 0x10) 
     {
+      OPJ_UINT32 val;
+
       ms_val = frwd_fetch(&magsgn);
       m_n = U_q[1] - ((qinf[1] >> 12) & 1); //m_n
       frwd_advance(&magsgn, m_n);
-      OPJ_UINT32 val = ms_val << 31;
+      val = ms_val << 31;
       v_n = ms_val & ((1U << m_n) - 1);
       v_n |= (((qinf[1] & 0x100) >> 8) << m_n);
       v_n |= 1;
@@ -1439,17 +1538,19 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
     if (qinf[1] & 0x20)
     {
+      OPJ_UINT32 val, t;
+
       ms_val = frwd_fetch(&magsgn);
       m_n = U_q[1] - ((qinf[1] >> 13) & 1); //m_n
       frwd_advance(&magsgn, m_n);
-      OPJ_UINT32 val = ms_val << 31;
+      val = ms_val << 31;
       v_n = ms_val & ((1U << m_n) - 1);
       v_n |= (((qinf[1] & 0x200) >> 9) << m_n);
       v_n |= 1;
       sp[stride] = val | ((v_n + 2) << (p - 1));
 
       //update line_state: bit 7 (\sigma^N), and E^N
-      OPJ_UINT32 t = lsp[0] & 0x7F;            //E^NW
+      t = lsp[0] & 0x7F;            //E^NW
       v_n = 32 - count_leading_zeros(v_n);     //E^N
       lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n)); //max(E^NW, E^N) | s
     }
@@ -1461,10 +1562,12 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
     if (qinf[1] & 0x40)
     {
+      OPJ_UINT32 val;
+
       ms_val = frwd_fetch(&magsgn);
       m_n = U_q[1] - ((qinf[1] >> 14) & 1); //m_n
       frwd_advance(&magsgn, m_n);
-      OPJ_UINT32 val = ms_val << 31;
+      val = ms_val << 31;
       v_n = ms_val & ((1U << m_n) - 1);
       v_n |= (((qinf[1] & 0x400) >> 10) << m_n);
       v_n |= 1;
@@ -1476,10 +1579,12 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
     lsp[0] = 0;
     if (qinf[1] & 0x80)
     {
+      OPJ_UINT32 val;
+
       ms_val = frwd_fetch(&magsgn);
       m_n = U_q[1] - ((qinf[1] >> 15) & 1); //m_n
       frwd_advance(&magsgn, m_n);
-      OPJ_UINT32 val = ms_val << 31;
+      val = ms_val << 31;
       v_n = ms_val & ((1U << m_n) - 1);
       v_n |= (((qinf[1] & 0x800) >> 11) << m_n);
       v_n |= 1; //center of bin
@@ -1496,19 +1601,28 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
   //non-initial lines
   //////////////////////////
-  for (OPJ_UINT32 y = 2; y < height; /*done at the end of loop*/)
+  for (OPJ_INT32 y = 2; y < height; /*done at the end of loop*/)
   {
+    OPJ_UINT32 *sip;
+    OPJ_UINT8 ls0;
+
     sip_shift ^= 0x2;  // shift sigma to the upper half od the nibble
     sip_shift &= 0xFFFFFFEFU; //move back to 0 (it might have been at 0x10)
-    OPJ_UINT32 *sip = y & 0x4 ? sigma2 : sigma1; //choose sigma array
+    sip = y & 0x4 ? sigma2 : sigma1; //choose sigma array
 
     lsp = line_state;
-    OPJ_UINT8 ls0 = lsp[0];         // read the line state value
+    ls0 = lsp[0];                   // read the line state value
     lsp[0] = 0;                     // and set it to zero
     sp = decoded_data + y * stride; // generated samples
     c_q = 0;                        // context
-    for (OPJ_UINT32 x = 0; x < width; x += 4)
+    for (OPJ_INT32 x = 0; x < width; x += 4)
     {
+      OPJ_UINT32 U_q[2];
+      OPJ_UINT32 uvlc_mode, consumed_bits;
+      OPJ_UINT32 m_n, v_n;
+      OPJ_UINT32 ms_val;
+      OPJ_UINT32 locs;
+
       // decode vlc
       /////////////
 
@@ -1574,9 +1688,8 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
       //retrieve u
       ////////////
-      OPJ_UINT32 U_q[2];
-      OPJ_UINT32 uvlc_mode = ((qinf[0] & 0x8) >> 3) | ((qinf[1] & 0x8) >> 2);
-      OPJ_UINT32 consumed_bits = decode_noninit_uvlc(vlc_val, uvlc_mode, U_q);
+      uvlc_mode = ((qinf[0] & 0x8) >> 3) | ((qinf[1] & 0x8) >> 2);
+      consumed_bits = decode_noninit_uvlc(vlc_val, uvlc_mode, U_q);
       vlc_val = rev_advance(&vlc, consumed_bits);
 
       //calculate E^max and add it to U_q, eqns 5 and 6 in ITU T.814
@@ -1612,21 +1725,21 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
       //decode magsgn and update line_state
       /////////////////////////////////////
-      OPJ_UINT32 m_n, v_n;
-      OPJ_UINT32 ms_val;
 
       //locations where samples need update
-      OPJ_UINT32 locs = 0xFF;
+      locs = 0xFF;
       if (x + 4 > width) locs >>= (x + 4 - width) << 1;
       locs = height > 1 ? locs : (locs & 0x55);
 
 
       if (qinf[0] & 0x10) //sigma_n
       {
+        OPJ_UINT32 val;
+
         ms_val = frwd_fetch(&magsgn);
         m_n = U_q[0] - ((qinf[0] >> 12) & 1); //m_n
         frwd_advance(&magsgn, m_n);
-        OPJ_UINT32 val = ms_val << 31;
+        val = ms_val << 31;
         v_n = ms_val & ((1U << m_n) - 1);
         v_n |= ((qinf[0] & 0x100) >> 8) << m_n;
         v_n |= 1; //center of bin
@@ -1637,17 +1750,19 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
       if (qinf[0] & 0x20) //sigma_n
       {
+        OPJ_UINT32 val, t;
+
         ms_val = frwd_fetch(&magsgn);
         m_n = U_q[0] - ((qinf[0] >> 13) & 1); //m_n
         frwd_advance(&magsgn, m_n);
-        OPJ_UINT32 val = ms_val << 31;
+        val = ms_val << 31;
         v_n = ms_val & ((1U << m_n) - 1);
         v_n |= ((qinf[0] & 0x200) >> 9) << m_n;
         v_n |= 1; //center of bin
         sp[stride] = val | ((v_n + 2) << (p - 1));
 
         //update line_state: bit 7 (\sigma^N), and E^N
-        OPJ_UINT32 t = lsp[0] & 0x7F;          //E^NW
+        t = lsp[0] & 0x7F;          //E^NW
         v_n = 32 - count_leading_zeros(v_n); 
         lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n));
       }
@@ -1659,10 +1774,12 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
       if (qinf[0] & 0x40) //sigma_n
       {
+        OPJ_UINT32 val;
+
         ms_val = frwd_fetch(&magsgn);
         m_n = U_q[0] - ((qinf[0] >> 14) & 1); //m_n
         frwd_advance(&magsgn, m_n);
-        OPJ_UINT32 val = ms_val << 31;
+        val = ms_val << 31;
         v_n = ms_val & ((1U << m_n) - 1);
         v_n |= (((qinf[0] & 0x400) >> 10) << m_n);
         v_n |= 1;                            //center of bin
@@ -1673,10 +1790,12 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
       if (qinf[0] & 0x80) //sigma_n
       {
+        OPJ_UINT32 val;
+
         ms_val = frwd_fetch(&magsgn);
         m_n = U_q[0] - ((qinf[0] >> 15) & 1); //m_n
         frwd_advance(&magsgn, m_n);
-        OPJ_UINT32 val = ms_val << 31;
+        val = ms_val << 31;
         v_n = ms_val & ((1U << m_n) - 1);
         v_n |= ((qinf[0] & 0x800) >> 11) << m_n;
         v_n |= 1; //center of bin
@@ -1692,10 +1811,12 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
       if (qinf[1] & 0x10) //sigma_n
       {
+        OPJ_UINT32 val;
+
         ms_val = frwd_fetch(&magsgn);
         m_n = U_q[1] - ((qinf[1] >> 12) & 1); //m_n
         frwd_advance(&magsgn, m_n);
-        OPJ_UINT32 val = ms_val << 31;
+        val = ms_val << 31;
         v_n = ms_val & ((1U << m_n) - 1);
         v_n |= (((qinf[1] & 0x100) >> 8) << m_n);
         v_n |= 1;                            //center of bin
@@ -1706,17 +1827,19 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
       if (qinf[1] & 0x20) //sigma_n
       {
+        OPJ_UINT32 val, t;
+
         ms_val = frwd_fetch(&magsgn);
         m_n = U_q[1] - ((qinf[1] >> 13) & 1); //m_n
         frwd_advance(&magsgn, m_n);
-        OPJ_UINT32 val = ms_val << 31;
+        val = ms_val << 31;
         v_n = ms_val & ((1U << m_n) - 1);
         v_n |= (((qinf[1] & 0x200) >> 9) << m_n);
         v_n |= 1; //center of bin
         sp[stride] = val | ((v_n + 2) << (p - 1));
 
         //update line_state: bit 7 (\sigma^N), and E^N
-        OPJ_UINT32 t = lsp[0] & 0x7F;          //E^NW
+        t = lsp[0] & 0x7F;          //E^NW
         v_n = 32 - count_leading_zeros(v_n); 
         lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n));
       }
@@ -1728,10 +1851,12 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
       if (qinf[1] & 0x40) //sigma_n
       {
+        OPJ_UINT32 val;
+
         ms_val = frwd_fetch(&magsgn);
         m_n = U_q[1] - ((qinf[1] >> 14) & 1); //m_n
         frwd_advance(&magsgn, m_n);
-        OPJ_UINT32 val = ms_val << 31;
+        val = ms_val << 31;
         v_n = ms_val & ((1U << m_n) - 1);
         v_n |= (((qinf[1] & 0x400) >> 10) << m_n);
         v_n |= 1;                            //center of bin
@@ -1742,10 +1867,12 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
       if (qinf[1] & 0x80) //sigma_n
       {
+        OPJ_UINT32 val;
+
         ms_val = frwd_fetch(&magsgn);
         m_n = U_q[1] - ((qinf[1] >> 15) & 1); //m_n
         frwd_advance(&magsgn, m_n);
-        OPJ_UINT32 val = ms_val << 31;
+        val = ms_val << 31;
         v_n = ms_val & ((1U << m_n) - 1);
         v_n |= (((qinf[1] & 0x800) >> 11) << m_n);
         v_n |= 1; //center of bin
@@ -1770,8 +1897,8 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
         OPJ_UINT32 *cur_sig = y & 0x4 ? sigma1 : sigma2;
         // the address of the data that needs updating
         OPJ_UINT32 *dpp = decoded_data + (y - 4) * stride;
-        OPJ_UINT32 half = 1 << (p - 2); // half the center of the bin
-        for (OPJ_UINT32 i = 0; i < width; i += 8)
+        OPJ_UINT32 half = 1u << (p - 2); // half the center of the bin
+        for (OPJ_INT32 i = 0; i < width; i += 8)
         {
           //Process one entry from sigma array at a time
           // Each nibble (4 bits) in the sigma array represents 4 rows,
@@ -1790,8 +1917,10 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
                 if (sig & sample_mask) //if LSB is set
                 {
+                  OPJ_UINT32 sym;
+
                   assert(dp[0] != 0); // decoded value cannot be zero
-                  OPJ_UINT32 sym = cwd & 1; // get it value
+                  sym = cwd & 1; // get it value
                   // remove center of bin if sym is 0
                   dp[0] ^= (1 - sym) << (p - 1);
                   dp[0] |= half;      // put half the center of bin
@@ -1801,8 +1930,10 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
                 if (sig & sample_mask)
                 {
+                  OPJ_UINT32 sym;
+
                   assert(dp[stride] != 0);
-                  OPJ_UINT32 sym = cwd & 1;
+                  sym = cwd & 1;
                   dp[stride] ^= (1 - sym) << (p - 1);
                   dp[stride] |= half;
                   cwd >>= 1;
@@ -1811,8 +1942,10 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
                 if (sig & sample_mask)
                 {
+                  OPJ_UINT32 sym;
+
                   assert(dp[2 * stride] != 0);
-                  OPJ_UINT32 sym = cwd & 1;
+                  sym = cwd & 1;
                   dp[2 * stride] ^= (1 - sym) << (p - 1);
                   dp[2 * stride] |= half;
                   cwd >>= 1;
@@ -1821,8 +1954,10 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
                 if (sig & sample_mask)
                 {
+                  OPJ_UINT32 sym;
+
                   assert(dp[3 * stride] != 0);
-                  OPJ_UINT32 sym = cwd & 1;
+                  sym = cwd & 1;
                   dp[3 * stride] ^= (1 - sym) << (p - 1);
                   dp[3 * stride] |= half;
                   cwd >>= 1;
@@ -1848,8 +1983,10 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
         //integrate horizontally
         OPJ_UINT32 prev = 0; // previous columns
-        for (OPJ_UINT32 i = 0; i < width; i += 8, mbr++, sig++)
+        for (OPJ_INT32 i = 0; i < width; i += 8, mbr++, sig++)
         {
+          OPJ_UINT32 t, z;
+
           mbr[0] = sig[0];         //start with significant samples
           mbr[0] |= prev >> 28;    //for first column, left neighbors
           mbr[0] |= sig[0] << 4;   //left neighbors
@@ -1858,7 +1995,7 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
           prev = sig[0];           // for next group of columns
 
           //integrate vertically
-          OPJ_UINT32 t = mbr[0], z = mbr[0];
+          t = mbr[0], z = mbr[0];
           z |= (t & 0x77777777) << 1; //above neighbors
           z |= (t & 0xEEEEEEEE) >> 1; //below neighbors
           mbr[0] = z & ~sig[0]; //remove already significance samples
@@ -1868,13 +2005,15 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
       if (y >= 8) //wait until 8 rows has been processed
       {
         OPJ_UINT32 *cur_sig, *cur_mbr, *nxt_sig, *nxt_mbr;
+        OPJ_UINT32 prev;
+        OPJ_UINT32 val;
 
         // add membership from the next stripe, obtained above
         cur_sig = y & 0x4 ? sigma2 : sigma1;
         cur_mbr = y & 0x4 ? mbr2 : mbr1;
         nxt_sig = y & 0x4 ? sigma1 : sigma2;  //future samples
-        OPJ_UINT32 prev = 0; // the columns before these group of 8 columns
-        for (OPJ_UINT32 i=0; i < width; i+=8, cur_mbr++, cur_sig++, nxt_sig++)
+        prev = 0; // the columns before these group of 8 columns
+        for (OPJ_INT32 i=0; i < width; i+=8, cur_mbr++, cur_sig++, nxt_sig++)
         {
           OPJ_UINT32 t = nxt_sig[0];
           t |= prev >> 28;        //for first column, left neighbors
@@ -1892,45 +2031,52 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
         cur_mbr = y & 0x4 ? mbr2 : mbr1;
         nxt_sig = y & 0x4 ? sigma1 : sigma2; //future samples
         nxt_mbr = y & 0x4 ? mbr1 : mbr2;     //future samples
-        OPJ_UINT32 val = 3u << (p - 2); // sample values for newly discovered 
-                          // signficant samples including the bin center
-        for (OPJ_UINT32 i = 0; i < width;
+        val = 3u << (p - 2); // sample values for newly discovered 
+                             // signficant samples including the bin center
+        for (OPJ_INT32 i = 0; i < width;
               i += 8, cur_sig++, cur_mbr++, nxt_sig++, nxt_mbr++)
         {
+          OPJ_UINT32 ux, tx;
           OPJ_UINT32 mbr = *cur_mbr;
           OPJ_UINT32 new_sig = 0;
           if (mbr)  //are there any samples that migt be signficant 
           {
-            for (OPJ_UINT32 n = 0; n < 8; n += 4)
+            for (OPJ_INT32 n = 0; n < 8; n += 4)
             {
+              OPJ_UINT32 col_mask;
+              OPJ_UINT32 inv_sig;
+              OPJ_INT32 end;
+
               OPJ_UINT32 cwd = frwd_fetch(&sigprop); //get 32 bits
               OPJ_UINT32 cnt = 0;
 
               OPJ_UINT32 *dp = decoded_data + (y - 8) * stride;
               dp += i + n; //address for decoded samples
 
-              OPJ_UINT32 col_mask = 0xFu << (4 * n); //a mask to select a 
-                                                     //column
+              col_mask = 0xFu << (4 * n); //a mask to select a column
 
-              OPJ_UINT32 inv_sig = ~cur_sig[0]; // insignificant samples
+              inv_sig = ~cur_sig[0]; // insignificant samples
 
               //find the last sample we operate on
-              OPJ_UINT32 end = n + 4 + i < width ? n + 4 : width - i;
+              end = n + 4 + i < width ? n + 4 : width - i;
 
-              for (OPJ_UINT32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
+              for (OPJ_INT32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
               {
+                OPJ_UINT32 sample_mask;
+
                 if ((col_mask & mbr) == 0) //no samples need checking
                   continue;
 
                 //scan mbr to find a new signficant sample
-                OPJ_UINT32 sample_mask = 0x11111111u & col_mask; // LSB
+                sample_mask = 0x11111111u & col_mask; // LSB
                 if (mbr & sample_mask)
                 {
                   assert(dp[0] == 0); // the sample must have been 0
                   if (cwd & 1) //if this sample has become significant
                   { // must propagate it to nearby samples
+                    OPJ_UINT32 t;
                     new_sig |= sample_mask;  // new significant samples
-                    OPJ_UINT32 t = 0x32u << (j * 4);// propagation to neighbors
+                    t = 0x32u << (j * 4);// propagation to neighbors
                     mbr |= t & inv_sig; //remove already signifcant samples
                   }
                   cwd >>= 1; ++cnt; //consume bit and increment number of
@@ -1943,8 +2089,9 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
                   assert(dp[stride] == 0);
                   if (cwd & 1)
                   {
+                    OPJ_UINT32 t;
                     new_sig |= sample_mask;
-                    OPJ_UINT32 t = 0x74u << (j * 4);
+                    t = 0x74u << (j * 4);
                     mbr |= t & inv_sig;
                   }
                   cwd >>= 1; ++cnt;
@@ -1956,8 +2103,9 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
                   assert(dp[2 * stride] == 0);
                   if (cwd & 1)
                   {
+                    OPJ_UINT32 t;
                     new_sig |= sample_mask;
-                    OPJ_UINT32 t = 0xE8u << (j * 4);
+                    t = 0xE8u << (j * 4);
                     mbr |= t & inv_sig;
                   }
                   cwd >>= 1; ++cnt;
@@ -1969,8 +2117,9 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
                   assert(dp[3 * stride] == 0);
                   if (cwd & 1)
                   {
+                    OPJ_UINT32 t;
                     new_sig |= sample_mask;
-                    OPJ_UINT32 t = 0xC0u << (j * 4);
+                    t = 0xC0u << (j * 4);
                     mbr |= t & inv_sig;
                   }
                   cwd >>= 1; ++cnt;
@@ -1980,18 +2129,20 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
               //obtain signs here
               if (new_sig & (0xFFFFu << (4 * n))) //if any
               {
+                OPJ_UINT32 col_mask;
                 OPJ_UINT32 *dp = decoded_data + (y - 8) * stride;
                 dp += i + n; // decoded samples address
-                OPJ_UINT32 col_mask = 0xFu << (4 * n); //mask to select a 
-                                                       //column
+                col_mask = 0xFu << (4 * n); //mask to select a column
 
-                for (OPJ_UINT32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
+                for (OPJ_INT32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
                 {
+                  OPJ_UINT32 sample_mask;
+
                   if ((col_mask & new_sig) == 0) //if non is signficant
                     continue;
 
                   //scan 4 signs
-                  OPJ_UINT32 sample_mask = 0x11111111u & col_mask;
+                  sample_mask = 0x11111111u & col_mask;
                   if (new_sig & sample_mask)
                   {
                     assert(dp[0] == 0);
@@ -2041,31 +2192,32 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
           }
           //update the next stripe (vertically propagation)
           new_sig |= cur_sig[0];
-          OPJ_UINT32 u = (new_sig & 0x88888888) >> 3;
-          OPJ_UINT32 t = u | (u << 4) | (u >> 4); //left and right neighbors
+          ux = (new_sig & 0x88888888) >> 3;
+          tx = ux | (ux << 4) | (ux >> 4); //left and right neighbors
           if (i > 0)
-            nxt_mbr[-1] |= (u << 28) & ~nxt_sig[-1];
-          nxt_mbr[0] |= t & ~nxt_sig[0];
-          nxt_mbr[1] |= (u >> 28) & ~nxt_sig[1];
+            nxt_mbr[-1] |= (ux << 28) & ~nxt_sig[-1];
+          nxt_mbr[0] |= tx & ~nxt_sig[0];
+          nxt_mbr[1] |= (ux >> 28) & ~nxt_sig[1];
         }
 
         //clear current sigma
         //mbr need not be cleared because it is overwritten
         cur_sig = y & 0x4 ? sigma2 : sigma1;
-        memset(cur_sig, 0, (((width + 7) >> 3) + 1) << 2);
+        memset(cur_sig, 0, ((((OPJ_UINT32)width + 7u) >> 3) + 1u) << 2);
       }
     }
   }
 
   //terminating
   if (num_passes > 1) {
+    OPJ_INT32 st;
 
     if (num_passes > 2 && ((height & 3) == 1 || (height & 3) == 2))
     {//do magref
       OPJ_UINT32 *cur_sig = height & 0x4 ? sigma2 : sigma1; //reversed
-      OPJ_UINT32 *dpp = decoded_data + (height & 0xFFFFFFFCu) * stride;
-      OPJ_UINT32 half = 1 << (p - 2);
-      for (OPJ_UINT32 i = 0; i < width; i += 8)
+      OPJ_UINT32 *dpp = decoded_data + (height & 0xFFFFFC) * stride;
+      OPJ_UINT32 half = 1u << (p - 2);
+      for (OPJ_INT32 i = 0; i < width; i += 8)
       {
         OPJ_UINT32 cwd = rev_fetch_mrp(&magref);
         OPJ_UINT32 sig = *cur_sig++;
@@ -2081,8 +2233,9 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
               if (sig & sample_mask)
               {
+                OPJ_UINT32 sym;
                 assert(dp[0] != 0);
-                OPJ_UINT32 sym = cwd & 1;
+                sym = cwd & 1;
                 dp[0] ^= (1 - sym) << (p - 1);
                 dp[0] |= half;
                 cwd >>= 1;
@@ -2091,8 +2244,9 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
               if (sig & sample_mask)
               {
+                OPJ_UINT32 sym;
                 assert(dp[stride] != 0);
-                OPJ_UINT32 sym = cwd & 1;
+                sym = cwd & 1;
                 dp[stride] ^= (1 - sym) << (p - 1);
                 dp[stride] |= half;
                 cwd >>= 1;
@@ -2101,8 +2255,9 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
               if (sig & sample_mask)
               {
+                OPJ_UINT32 sym;
                 assert(dp[2 * stride] != 0);
-                OPJ_UINT32 sym = cwd & 1;
+                sym = cwd & 1;
                 dp[2 * stride] ^= (1 - sym) << (p - 1);
                 dp[2 * stride] |= half;
                 cwd >>= 1;
@@ -2111,8 +2266,9 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
               if (sig & sample_mask)
               {
+                OPJ_UINT32 sym;
                 assert(dp[3 * stride] != 0);
-                OPJ_UINT32 sym = cwd & 1;
+                sym = cwd & 1;
                 dp[3 * stride] ^= (1 - sym) << (p - 1);
                 dp[3 * stride] |= half;
                 cwd >>= 1;
@@ -2136,8 +2292,10 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
       OPJ_UINT32 *mbr = height & 0x4 ? mbr2 : mbr1;
       //integrate horizontally
       OPJ_UINT32 prev = 0;
-      for (OPJ_UINT32 i = 0; i < width; i += 8, mbr++, sig++)
+      for (OPJ_INT32 i = 0; i < width; i += 8, mbr++, sig++)
       {
+        OPJ_UINT32 t, z;
+
         mbr[0] = sig[0];
         mbr[0] |= prev >> 28;    //for first column, left neighbors
         mbr[0] |= sig[0] << 4;   //left neighbors
@@ -2146,18 +2304,19 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
         prev = sig[0];
 
         //integrate vertically
-        OPJ_UINT32 t = mbr[0], z = mbr[0];
+        t = mbr[0], z = mbr[0];
         z |= (t & 0x77777777) << 1; //above neighbors
         z |= (t & 0xEEEEEEEE) >> 1; //below neighbors
         mbr[0] = z & ~sig[0]; //remove already significance samples
       }
     }
 
-    OPJ_UINT32 st = height;
+    st = height;
     st -= height > 6 ? (((height + 1) & 3) + 3) : height;
-    for (OPJ_UINT32 y = st; y < height; y += 4)
+    for (OPJ_INT32 y = st; y < height; y += 4)
     {
       OPJ_UINT32 *cur_sig, *cur_mbr, *nxt_sig, *nxt_mbr;
+      OPJ_UINT32 val;
 
       OPJ_UINT32 pattern = 0xFFFFFFFFu; // a pattern needed samples
       if (height - y == 3)
@@ -2170,11 +2329,12 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
       //add membership from the next stripe, obtained above
       if (height - y > 4)
       {
+        OPJ_UINT32 prev = 0;
         cur_sig = y & 0x4 ? sigma2 : sigma1;
         cur_mbr = y & 0x4 ? mbr2 : mbr1;
         nxt_sig = y & 0x4 ? sigma1 : sigma2;
-        OPJ_UINT32 prev = 0;
-        for (OPJ_UINT32 i=0; i<width; i += 8, cur_mbr++, cur_sig++, nxt_sig++)
+
+        for (OPJ_INT32 i=0; i<width; i += 8, cur_mbr++, cur_sig++, nxt_sig++)
         {
           OPJ_UINT32 t = nxt_sig[0];
           t |= prev >> 28;     //for first column, left neighbors
@@ -2194,41 +2354,49 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
       cur_mbr = y & 0x4 ? mbr2 : mbr1;
       nxt_sig = y & 0x4 ? sigma1 : sigma2;
       nxt_mbr = y & 0x4 ? mbr1 : mbr2;
-      OPJ_UINT32 val = 3u << (p - 2);
-      for (OPJ_UINT32 i = 0; i < width; i += 8,
+      val = 3u << (p - 2);
+      for (OPJ_INT32 i = 0; i < width; i += 8,
             cur_sig++, cur_mbr++, nxt_sig++, nxt_mbr++)
       {
         OPJ_UINT32 mbr = *cur_mbr & pattern; //skip unneeded samples
         OPJ_UINT32 new_sig = 0;
+        OPJ_UINT32 ux, tx;
         if (mbr)
         {
-          for (OPJ_UINT32 n = 0; n < 8; n += 4)
+          for (OPJ_INT32 n = 0; n < 8; n += 4)
           {
+            OPJ_UINT32 col_mask;
+            OPJ_UINT32 inv_sig;
+            OPJ_INT32 end;
+
             OPJ_UINT32 cwd = frwd_fetch(&sigprop);
             OPJ_UINT32 cnt = 0;
 
             OPJ_UINT32 *dp = decoded_data + y * stride;
             dp += i + n;
 
-            OPJ_UINT32 col_mask = 0xFu << (4 * n);
+            col_mask = 0xFu << (4 * n);
 
-            OPJ_UINT32 inv_sig = ~cur_sig[0] & pattern;
+            inv_sig = ~cur_sig[0] & pattern;
 
-            OPJ_UINT32 end = n + 4 + i < width ? n + 4 : width - i;
-            for (OPJ_UINT32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
+            end = n + 4 + i < width ? n + 4 : width - i;
+            for (OPJ_INT32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
             {
+              OPJ_UINT32 sample_mask;
+
               if ((col_mask & mbr) == 0)
                 continue;
 
               //scan 4 mbr
-              OPJ_UINT32 sample_mask = 0x11111111u & col_mask;
+              sample_mask = 0x11111111u & col_mask;
               if (mbr & sample_mask)
               {
                 assert(dp[0] == 0);
                 if (cwd & 1)
                 {
+                  OPJ_UINT32 t;
                   new_sig |= sample_mask;
-                  OPJ_UINT32 t = 0x32u << (j * 4);
+                  t = 0x32u << (j * 4);
                   mbr |= t & inv_sig;
                 }
                 cwd >>= 1; ++cnt;
@@ -2240,8 +2408,9 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
                 assert(dp[stride] == 0);
                 if (cwd & 1)
                 {
+                  OPJ_UINT32 t;
                   new_sig |= sample_mask;
-                  OPJ_UINT32 t = 0x74u << (j * 4);
+                  t = 0x74u << (j * 4);
                   mbr |= t & inv_sig;
                 }
                 cwd >>= 1; ++cnt;
@@ -2253,8 +2422,9 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
                 assert(dp[2 * stride] == 0);
                 if (cwd & 1)
                 {
+                  OPJ_UINT32 t;
                   new_sig |= sample_mask;
-                  OPJ_UINT32 t = 0xE8u << (j * 4);
+                  t = 0xE8u << (j * 4);
                   mbr |= t & inv_sig;
                 }
                 cwd >>= 1; ++cnt;
@@ -2266,8 +2436,9 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
                 assert(dp[3 * stride] == 0);
                 if (cwd & 1)
                 {
+                  OPJ_UINT32 t;
                   new_sig |= sample_mask;
-                  OPJ_UINT32 t = 0xC0u << (j * 4);
+                  t = 0xC0u << (j * 4);
                   mbr |= t & inv_sig;
                 }
                 cwd >>= 1; ++cnt;
@@ -2277,17 +2448,19 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
             //signs here
             if (new_sig & (0xFFFFu << (4 * n)))
             {
+              OPJ_UINT32 col_mask;
               OPJ_UINT32 *dp = decoded_data + y * stride;
               dp += i + n;
-              OPJ_UINT32 col_mask = 0xFu << (4 * n);
+              col_mask = 0xFu << (4 * n);
 
-              for (OPJ_UINT32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
+              for (OPJ_INT32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
               {
+                OPJ_UINT32 sample_mask;
                 if ((col_mask & new_sig) == 0)
                   continue;
 
                 //scan 4 signs
-                OPJ_UINT32 sample_mask = 0x11111111u & col_mask;
+                sample_mask = 0x11111111u & col_mask;
                 if (new_sig & sample_mask)
                 {
                   assert(dp[0] == 0);
@@ -2336,21 +2509,21 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
         }
         //propagate down (vertically propagation)
         new_sig |= cur_sig[0];
-        OPJ_UINT32 u = (new_sig & 0x88888888) >> 3;
-        OPJ_UINT32 t = u | (u << 4) | (u >> 4);
+        ux = (new_sig & 0x88888888) >> 3;
+        tx = ux | (ux << 4) | (ux >> 4);
         if (i > 0)
-          nxt_mbr[-1] |= (u << 28) & ~nxt_sig[-1];
-        nxt_mbr[0] |= t & ~nxt_sig[0];
-        nxt_mbr[1] |= (u >> 28) & ~nxt_sig[1];
+          nxt_mbr[-1] |= (ux << 28) & ~nxt_sig[-1];
+        nxt_mbr[0] |= tx & ~nxt_sig[0];
+        nxt_mbr[1] |= (ux >> 28) & ~nxt_sig[1];
       }
     }
   }
 
   //int shift = 29 - missing_msbs;
-  for (OPJ_UINT32 y = 0; y < height; ++y)
+  for (OPJ_INT32 y = 0; y < height; ++y)
   {
-    OPJ_UINT32* sp = decoded_data + y * stride; // generated samplesv
-    for (OPJ_UINT32 x = 0; x < width; ++x, ++sp)
+    OPJ_INT32* sp = (OPJ_INT32*)decoded_data + y * stride;
+    for (OPJ_INT32 x = 0; x < width; ++x, ++sp)
     {
       OPJ_INT32 val = (*sp & 0x7FFFFFFF);
       *sp = ((OPJ_UINT32)*sp & 0x80000000) ? -val : val;
diff --git a/src/lib/openjp2/t2.c b/src/lib/openjp2/t2.c
index ac001ccba..4626d69bb 100644
--- a/src/lib/openjp2/t2.c
+++ b/src/lib/openjp2/t2.c
@@ -1262,7 +1262,7 @@ static OPJ_BOOL opj_t2_read_packet_header(opj_t2_t* p_t2,
             if ((p_tcp->tccps[p_pi->compno].cblksty & J2K_CCP_CBLKSTY_HT) != 0)
                 do {
                   OPJ_UINT32 bit_number;
-                  l_cblk->segs[l_segno].numnewpasses = l_segno == 0 ? 1 : n;
+                  l_cblk->segs[l_segno].numnewpasses = l_segno == 0 ? 1u : (OPJ_UINT32)n;
                   bit_number = l_cblk->numlenbits + opj_uint_floorlog2(
                                    l_cblk->segs[l_segno].numnewpasses);
                   if (bit_number > 32) {

From b684247201349584a8dc07d99dcc5fcfd3f267e1 Mon Sep 17 00:00:00 2001
From: Aous Naman <aous72@yahoo.com>
Date: Sat, 4 Sep 2021 12:09:59 +1000
Subject: [PATCH 03/10] Formatted files with prepare_commit.sh.  Code fixed to
 compile with Visual Studio 10

---
 src/lib/openjp2/fbc_dec.c | 4026 ++++++++++++++++++-------------------
 src/lib/openjp2/j2k.c     |   16 +-
 src/lib/openjp2/j2k.h     |    3 +-
 src/lib/openjp2/t1.c      |    3 +-
 src/lib/openjp2/t2.c      |  106 +-
 src/lib/openjp2/tcd.h     |    6 +-
 6 files changed, 2065 insertions(+), 2095 deletions(-)

diff --git a/src/lib/openjp2/fbc_dec.c b/src/lib/openjp2/fbc_dec.c
index 1627fedb3..0b52ca7d3 100644
--- a/src/lib/openjp2/fbc_dec.c
+++ b/src/lib/openjp2/fbc_dec.c
@@ -2,21 +2,21 @@
 // This software is released under the 2-Clause BSD license, included
 // below.
 //
-// Copyright (c) 2021, Aous Naman 
+// Copyright (c) 2021, Aous Naman
 // Copyright (c) 2021, Kakadu Software Pty Ltd, Australia
 // Copyright (c) 2021, The University of New South Wales, Australia
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
-// 
+//
 // 1. Redistributions of source code must retain the above copyright
 // notice, this list of conditions and the following disclaimer.
-// 
+//
 // 2. Redistributions in binary form must reproduce the above copyright
 // notice, this list of conditions and the following disclaimer in the
 // documentation and/or other materials provided with the distribution.
-// 
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
@@ -36,7 +36,7 @@
 //***************************************************************************/
 
 //***************************************************************************/
-/** @file fbc.cpp
+/** @file fbc_dec.cpp
  *  @brief implements HTJ2K block decoder
  */
 
@@ -50,19 +50,19 @@
 // compiler detection
 /////////////////////////////////////////////////////////////////////////////
 #ifdef _MSC_VER
-  #define OPJ_COMPILER_MSVC
+#define OPJ_COMPILER_MSVC
 #elif (defined __GNUC__)
-  #define OPJ_COMPILER_GNUC
+#define OPJ_COMPILER_GNUC
 #endif
 
 //************************************************************************/
-/** @brief Displays the error message for disabling the decoding of CUP 
+/** @brief Displays the error message for disabling the decoding of CUP
   *        pass due to insufficient precision once
   */
 static OPJ_BOOL cannot_decode_due_to_insufficient_precision = OPJ_FALSE;
 
 //************************************************************************/
-/** @brief Displays the error message for disabling the decoding of SPP and 
+/** @brief Displays the error message for disabling the decoding of SPP and
   *        MRP passes once
   */
 static OPJ_BOOL cannot_decode_spp_mrp_msg = OPJ_FALSE;
@@ -71,21 +71,21 @@ static OPJ_BOOL cannot_decode_spp_mrp_msg = OPJ_FALSE;
 /** @brief Generates population count (i.e., the number of set bits)
   *
   *   @param [in]  val is the value for which population count is sought
-  */ 
-static inline 
+  */
+static INLINE
 OPJ_UINT32 population_count(OPJ_UINT32 val)
 {
 #ifdef OPJ_COMPILER_MSVC
-  return (OPJ_UINT32)__popcnt(val);
+    return (OPJ_UINT32)__popcnt(val);
 #elif (defined OPJ_COMPILER_GNUC)
-  return (OPJ_UINT32)__builtin_popcount(val);
+    return (OPJ_UINT32)__builtin_popcount(val);
 #else
-  val -= ((val >> 1) & 0x55555555);
-  val = (((val >> 2) & 0x33333333) + (val & 0x33333333));
-  val = (((val >> 4) + val) & 0x0f0f0f0f);
-  val += (val >> 8);
-  val += (val >> 16);
-  return (OPJ_UINT32)(val & 0x0000003f);
+    val -= ((val >> 1) & 0x55555555);
+    val = (((val >> 2) & 0x33333333) + (val & 0x33333333));
+    val = (((val >> 4) + val) & 0x0f0f0f0f);
+    val += (val >> 8);
+    val += (val >> 16);
+    return (OPJ_UINT32)(val & 0x0000003f);
 #endif
 }
 
@@ -93,26 +93,26 @@ OPJ_UINT32 population_count(OPJ_UINT32 val)
 /** @brief Counts the number of leading zeros
   *
   *   @param [in]  val is the value for which leading zero count is sought
-  */ 
+  */
 #ifdef OPJ_COMPILER_MSVC
-  #pragma intrinsic(_BitScanReverse)
+#pragma intrinsic(_BitScanReverse)
 #endif
-static inline 
+static INLINE
 OPJ_UINT32 count_leading_zeros(OPJ_UINT32 val)
 {
 #ifdef OPJ_COMPILER_MSVC
-  unsigned long result = 0;
-  _BitScanReverse(&result, val);
-  return 31U ^ (OPJ_UINT32)result;
+    unsigned long result = 0;
+    _BitScanReverse(&result, val);
+    return 31U ^ (OPJ_UINT32)result;
 #elif (defined OPJ_COMPILER_GNUC)
-  return (OPJ_UINT32)__builtin_clz(val);
+    return (OPJ_UINT32)__builtin_clz(val);
 #else
-  val |= (val >> 1);
-  val |= (val >> 2);
-  val |= (val >> 4);
-  val |= (val >> 8);
-  val |= (val >> 16);
-  return 32U - population_count(val);
+    val |= (val >> 1);
+    val |= (val >> 2);
+    val |= (val >> 4);
+    val |= (val >> 8);
+    val |= (val >> 16);
+    return 32U - population_count(val);
 #endif
 }
 
@@ -122,24 +122,24 @@ OPJ_UINT32 count_leading_zeros(OPJ_UINT32 val)
   *  A number of events is decoded from the MEL bitstream ahead of time
   *  and stored in run/num_runs.
   *  Each run represents the number of zero events before a one event.
-  */ 
+  */
 typedef struct dec_mel {
-  // data decoding machinary
-  OPJ_UINT8* data;  //!<the address of data (or bitstream)
-  OPJ_UINT64 tmp;   //!<temporary buffer for read data
-  int bits;         //!<number of bits stored in tmp
-  int size;         //!<number of bytes in MEL code
-  OPJ_BOOL unstuff; //!<true if the next bit needs to be unstuffed
-  int k;            //!<state of MEL decoder
-
-  // queue of decoded runs
-  int num_runs;    //!<number of decoded runs left in runs (maximum 8)
-  OPJ_UINT64 runs; //!<runs of decoded MEL codewords (7 bits/run)
+    // data decoding machinary
+    OPJ_UINT8* data;  //!<the address of data (or bitstream)
+    OPJ_UINT64 tmp;   //!<temporary buffer for read data
+    int bits;         //!<number of bits stored in tmp
+    int size;         //!<number of bytes in MEL code
+    OPJ_BOOL unstuff; //!<true if the next bit needs to be unstuffed
+    int k;            //!<state of MEL decoder
+
+    // queue of decoded runs
+    int num_runs;    //!<number of decoded runs left in runs (maximum 8)
+    OPJ_UINT64 runs; //!<runs of decoded MEL codewords (7 bits/run)
 } dec_mel_t;
 
 //************************************************************************/
 /** @brief Reads and unstuffs the MEL bitstream
-  * 
+  *
   *  This design needs more bytes in the codeblock buffer than the length
   *  of the cleanup pass by up to 2 bytes.
   *
@@ -149,171 +149,181 @@ typedef struct dec_mel {
   *
   *  @param [in]  melp is a pointer to dec_mel_t structure
   */
-static inline
+static INLINE
 void mel_read(dec_mel_t *melp)
 {
-  OPJ_UINT32 val; 
-  int bits;
-  OPJ_UINT32 t;
-  OPJ_BOOL unstuff;
-  
-  if (melp->bits > 32)  //there are enough bits in the tmp variable
-    return;             // return without reading new data
-
-  val = 0xFFFFFFFF;
-  //the next line (the if statement) needs to be tested first
-  //if (melp->size > 0)              // if there is data in the MEL segment
+    OPJ_UINT32 val;
+    int bits;
+    OPJ_UINT32 t;
+    OPJ_BOOL unstuff;
+
+    if (melp->bits > 32) { //there are enough bits in the tmp variable
+        return;    // return without reading new data
+    }
+    val = 0xFFFFFFFF;
+    //the next line (the if statement) needs to be tested first
+    //if (melp->size > 0)              // if there is data in the MEL segment
     val = *(OPJ_UINT32*)melp->data;  // read 32 bits from MEL data
-      
-  // next we unstuff them before adding them to the buffer
-  bits = 32 - melp->unstuff; // number of bits in val, subtract 1 if
-                             // the previously read byte requires 
-                             // unstuffing
-
-  // data is unstuffed and accumulated in t
-  // bits has the number of bits in t
-  t = (melp->size > 0) ? (val & 0xFF) : 0xFF; // feed 0xFF if the 
-                                  // MEL bitstream has been exhausted
-  if (melp->size == 1) t |= 0xF;  // if this is 1 byte before the last
-                                  // in MEL+VLC segments (remember they
-                                  // can overlap)
-  melp->data += melp->size-- > 0; // advance data by 1 byte if we have not
-                                  // reached the end of the MEL segment
-  unstuff = ((val & 0xFF) == 0xFF); // true if the byte needs unstuffing
-
-  bits -= unstuff; // there is one less bit in t if unstuffing is needed
-  t = t << (8 - unstuff); // move up to make room for the next byte
-
-  //this is a repeat of the above
-  t |= (melp->size > 0) ? ((val>>8) & 0xFF) : 0xFF;
-  if (melp->size == 1) t |= 0xF;
-  melp->data += melp->size-- > 0;
-  unstuff = (((val >> 8) & 0xFF) == 0xFF);
-
-  bits -= unstuff;
-  t = t << (8 - unstuff);
-
-  t |= (melp->size > 0) ? ((val>>16) & 0xFF) : 0xFF;
-  if (melp->size == 1) t |= 0xF;
-  melp->data += melp->size-- > 0;
-  unstuff = (((val >> 16) & 0xFF) == 0xFF);
-
-  bits -= unstuff;
-  t = t << (8 - unstuff);
-
-  t |= (melp->size > 0) ? ((val>>24) & 0xFF) : 0xFF;
-  if (melp->size == 1) t |= 0xF;
-  melp->data += melp->size-- > 0;
-  melp->unstuff = (((val >> 24) & 0xFF) == 0xFF);
-
-  // move t to tmp, and push the result all the way up, so we read from
-  // the MSB
-  melp->tmp |= ((OPJ_UINT64)t) << (64 - bits - melp->bits);
-  melp->bits += bits; //increment the number of bits in tmp
+
+    // next we unstuff them before adding them to the buffer
+    bits = 32 - melp->unstuff;      // number of bits in val, subtract 1 if
+    // the previously read byte requires
+    // unstuffing
+
+    // data is unstuffed and accumulated in t
+    // bits has the number of bits in t
+    t = (melp->size > 0) ? (val & 0xFF) : 0xFF; // feed 0xFF if the
+    // MEL bitstream has been exhausted
+    if (melp->size == 1) {
+        t |= 0xF;    // if this is 1 byte before the last
+    }
+    // in MEL+VLC segments (remember they
+    // can overlap)
+    melp->data += melp->size-- > 0; // advance data by 1 byte if we have not
+    // reached the end of the MEL segment
+    unstuff = ((val & 0xFF) == 0xFF); // true if the byte needs unstuffing
+
+    bits -= unstuff; // there is one less bit in t if unstuffing is needed
+    t = t << (8 - unstuff); // move up to make room for the next byte
+
+    //this is a repeat of the above
+    t |= (melp->size > 0) ? ((val >> 8) & 0xFF) : 0xFF;
+    if (melp->size == 1) {
+        t |= 0xF;
+    }
+    melp->data += melp->size-- > 0;
+    unstuff = (((val >> 8) & 0xFF) == 0xFF);
+
+    bits -= unstuff;
+    t = t << (8 - unstuff);
+
+    t |= (melp->size > 0) ? ((val >> 16) & 0xFF) : 0xFF;
+    if (melp->size == 1) {
+        t |= 0xF;
+    }
+    melp->data += melp->size-- > 0;
+    unstuff = (((val >> 16) & 0xFF) == 0xFF);
+
+    bits -= unstuff;
+    t = t << (8 - unstuff);
+
+    t |= (melp->size > 0) ? ((val >> 24) & 0xFF) : 0xFF;
+    if (melp->size == 1) {
+        t |= 0xF;
+    }
+    melp->data += melp->size-- > 0;
+    melp->unstuff = (((val >> 24) & 0xFF) == 0xFF);
+
+    // move t to tmp, and push the result all the way up, so we read from
+    // the MSB
+    melp->tmp |= ((OPJ_UINT64)t) << (64 - bits - melp->bits);
+    melp->bits += bits; //increment the number of bits in tmp
 }
 
 //************************************************************************/
 /** @brief Decodes unstuffed MEL segment bits stored in tmp to runs
-  * 
+  *
   *  Runs are stored in "runs" and the number of runs in "num_runs".
-  *  Each run represents a number of zero events that may or may not 
+  *  Each run represents a number of zero events that may or may not
   *  terminate in a 1 event.
   *  Each run is stored in 7 bits.  The LSB is 1 if the run terminates in
-  *  a 1 event, 0 otherwise.  The next 6 bits, for the case terminating 
-  *  with 1, contain the number of consecutive 0 zero events * 2; for the 
-  *  case terminating with 0, they store (number of consecutive 0 zero 
+  *  a 1 event, 0 otherwise.  The next 6 bits, for the case terminating
+  *  with 1, contain the number of consecutive 0 zero events * 2; for the
+  *  case terminating with 0, they store (number of consecutive 0 zero
   *  events - 1) * 2.
   *  A total of 6 bits (made up of 1 + 5) should have been enough.
   *
   *  @param [in]  melp is a pointer to dec_mel_t structure
   */
-static inline
+static INLINE
 void mel_decode(dec_mel_t *melp)
 {
-  static const int mel_exp[13] = { //MEL exponents
-    0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5
-  };
-
-  if (melp->bits < 6) // if there are less than 6 bits in tmp
-    mel_read(melp);   // then read from the MEL bitstream
-                      // 6 bits is the largest decodable MEL cwd
-
-  //repeat so long that there is enough decodable bits in tmp,
-  // and the runs store is not full (num_runs < 8)
-  while (melp->bits >= 6 && melp->num_runs < 8)
-  {
-    int eval = mel_exp[melp->k]; // number of bits associated with state
-    int run = 0;
-    if (melp->tmp & (1ull<<63)) //The next bit to decode (stored in MSB)
-    { //one is found
-      run = 1 << eval;  
-      run--; // consecutive runs of 0 events - 1
-      melp->k = melp->k + 1 < 12 ? melp->k + 1 : 12;//increment, max is 12
-      melp->tmp <<= 1; // consume one bit from tmp
-      melp->bits -= 1;
-      run = run << 1; // a stretch of zeros not terminating in one
+    static const int mel_exp[13] = { //MEL exponents
+        0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5
+    };
+
+    if (melp->bits < 6) { // if there are less than 6 bits in tmp
+        mel_read(melp);    // then read from the MEL bitstream
     }
-    else
-    { //0 is found
-      run = (int)(melp->tmp >> (63 - eval)) & ((1 << eval) - 1);
-      melp->k = melp->k - 1 > 0 ? melp->k - 1 : 0; //decrement, min is 0
-      melp->tmp <<= eval + 1; //consume eval + 1 bits (max is 6)
-      melp->bits -= eval + 1;
-      run = (run << 1) + 1; // a stretch of zeros terminating with one
+    // 6 bits is the largest decodable MEL cwd
+
+    //repeat so long that there is enough decodable bits in tmp,
+    // and the runs store is not full (num_runs < 8)
+    while (melp->bits >= 6 && melp->num_runs < 8) {
+        int eval = mel_exp[melp->k]; // number of bits associated with state
+        int run = 0;
+        if (melp->tmp & (1ull << 63)) { //The next bit to decode (stored in MSB)
+            //one is found
+            run = 1 << eval;
+            run--; // consecutive runs of 0 events - 1
+            melp->k = melp->k + 1 < 12 ? melp->k + 1 : 12;//increment, max is 12
+            melp->tmp <<= 1; // consume one bit from tmp
+            melp->bits -= 1;
+            run = run << 1; // a stretch of zeros not terminating in one
+        } else {
+            //0 is found
+            run = (int)(melp->tmp >> (63 - eval)) & ((1 << eval) - 1);
+            melp->k = melp->k - 1 > 0 ? melp->k - 1 : 0; //decrement, min is 0
+            melp->tmp <<= eval + 1; //consume eval + 1 bits (max is 6)
+            melp->bits -= eval + 1;
+            run = (run << 1) + 1; // a stretch of zeros terminating with one
+        }
+        eval = melp->num_runs * 7;                 // 7 bits per run
+        melp->runs &= ~((OPJ_UINT64)0x3F << eval); // 6 bits are sufficient
+        melp->runs |= ((OPJ_UINT64)run) << eval;   // store the value in runs
+        melp->num_runs++;                          // increment count
     }
-    eval = melp->num_runs * 7;                 // 7 bits per run
-    melp->runs &= ~((OPJ_UINT64)0x3F << eval); // 6 bits are sufficient
-    melp->runs |= ((OPJ_UINT64)run) << eval;   // store the value in runs
-    melp->num_runs++;                          // increment count  
-  }
 }
 
 //************************************************************************/
 /** @brief Initiates a dec_mel_t structure for MEL decoding and reads
   *         some bytes in order to get the read address to a multiple
-  *         of 4 
+  *         of 4
   *
   *  @param [in]  melp is a pointer to dec_mel_t structure
   *  @param [in]  bbuf is a pointer to byte buffer
   *  @param [in]  lcup is the length of MagSgn+MEL+VLC segments
   *  @param [in]  scup is the length of MEL+VLC segments
   */
-static inline
+static INLINE
 void mel_init(dec_mel_t *melp, OPJ_UINT8* bbuf, int lcup, int scup)
 {
-  int num;
-
-  melp->data = bbuf + lcup - scup; // move the pointer to the start of MEL
-  melp->bits = 0;                  // 0 bits in tmp
-  melp->tmp = 0;                   //
-  melp->unstuff = OPJ_FALSE;       // no unstuffing
-  melp->size = scup - 1;           // size is the length of MEL+VLC-1
-  melp->k = 0;                     // 0 for state 
-  melp->num_runs = 0;              // num_runs is 0
-  melp->runs = 0;                  //
-
-  //This code is borrowed; original is for a different architecture
-  //These few lines take care of the case where data is not at a multiple
-  // of 4 boundary.  It reads 1,2,3 up to 4 bytes from the MEL segment
-  num = 4 - (int)((intptr_t)(melp->data) & 0x3);
-  for (int i = 0; i < num; ++i) { // this code is similar to mel_read
-    OPJ_UINT64 d;
-    int d_bits;
-    
-    assert(melp->unstuff == OPJ_FALSE || melp->data[0] <= 0x8F);
-    d = (melp->size > 0) ? *melp->data : 0xFF; // if buffer is consumed 
-                                               // set data to 0xFF
-    if (melp->size == 1) d |= 0xF; //if this is MEL+VLC-1, set LSBs to 0xF
-                                    // see the standard
-    melp->data += melp->size-- > 0; //increment if the end is not reached
-    d_bits = 8 - melp->unstuff; //if unstuffing is needed, reduce by 1
-    melp->tmp = (melp->tmp << d_bits) | d; //store bits in tmp
-    melp->bits += d_bits;  //increment tmp by number of bits
-    melp->unstuff = ((d & 0xFF) == 0xFF); //true of next byte needs 
-                                          //unstuffing
-  }
-  melp->tmp <<= (64 - melp->bits); //push all the way up so the first bit
-                                    // is the MSB
+    int num;
+    int i;
+
+    melp->data = bbuf + lcup - scup; // move the pointer to the start of MEL
+    melp->bits = 0;                  // 0 bits in tmp
+    melp->tmp = 0;                   //
+    melp->unstuff = OPJ_FALSE;       // no unstuffing
+    melp->size = scup - 1;           // size is the length of MEL+VLC-1
+    melp->k = 0;                     // 0 for state
+    melp->num_runs = 0;              // num_runs is 0
+    melp->runs = 0;                  //
+
+    //This code is borrowed; original is for a different architecture
+    //These few lines take care of the case where data is not at a multiple
+    // of 4 boundary.  It reads 1,2,3 up to 4 bytes from the MEL segment
+    num = 4 - (int)((intptr_t)(melp->data) & 0x3);
+    for (i = 0; i < num; ++i) { // this code is similar to mel_read
+        OPJ_UINT64 d;
+        int d_bits;
+
+        assert(melp->unstuff == OPJ_FALSE || melp->data[0] <= 0x8F);
+        d = (melp->size > 0) ? *melp->data : 0xFF; // if buffer is consumed
+        // set data to 0xFF
+        if (melp->size == 1) {
+            d |= 0xF;    //if this is MEL+VLC-1, set LSBs to 0xF
+        }
+        // see the standard
+        melp->data += melp->size-- > 0; //increment if the end is not reached
+        d_bits = 8 - melp->unstuff; //if unstuffing is needed, reduce by 1
+        melp->tmp = (melp->tmp << d_bits) | d; //store bits in tmp
+        melp->bits += d_bits;  //increment tmp by number of bits
+        melp->unstuff = ((d & 0xFF) == 0xFF); //true of next byte needs
+        //unstuffing
+    }
+    melp->tmp <<= (64 - melp->bits); //push all the way up so the first bit
+    // is the MSB
 }
 
 //************************************************************************/
@@ -321,103 +331,104 @@ void mel_init(dec_mel_t *melp, OPJ_UINT8* bbuf, int lcup, int scup)
   *         MEL segment is decoded
   *
   * @param [in]  melp is a pointer to dec_mel_t structure
-  */    
-static inline
+  */
+static INLINE
 int mel_get_run(dec_mel_t *melp)
 {
-  int t;
-  if (melp->num_runs == 0)  //if no runs, decode more bit from MEL segment
-    mel_decode(melp);
-
-  t = melp->runs & 0x7F; //retrieve one run
-  melp->runs >>= 7;  // remove the retrieved run
-  melp->num_runs--;
-  return t; // return run
+    int t;
+    if (melp->num_runs == 0) { //if no runs, decode more bit from MEL segment
+        mel_decode(melp);
+    }
+
+    t = melp->runs & 0x7F; //retrieve one run
+    melp->runs >>= 7;  // remove the retrieved run
+    melp->num_runs--;
+    return t; // return run
 }
 
 //************************************************************************/
 /** @brief A structure for reading and unstuffing a segment that grows
   *         backward, such as VLC and MRP
-  */ 
+  */
 typedef struct rev_struct {
-  //storage
-  OPJ_UINT8* data;  //!<pointer to where to read data
-  OPJ_UINT64 tmp;	  //!<temporary buffer of read data
-  OPJ_UINT32 bits;  //!<number of bits stored in tmp
-  int size;         //!<number of bytes left
-  OPJ_BOOL unstuff; //!<true if the last byte is more than 0x8F
-                    //!<then the current byte is unstuffed if it is 0x7F
+    //storage
+    OPJ_UINT8* data;  //!<pointer to where to read data
+    OPJ_UINT64 tmp;     //!<temporary buffer of read data
+    OPJ_UINT32 bits;  //!<number of bits stored in tmp
+    int size;         //!<number of bytes left
+    OPJ_BOOL unstuff; //!<true if the last byte is more than 0x8F
+    //!<then the current byte is unstuffed if it is 0x7F
 } rev_struct_t;
 
 //************************************************************************/
 /** @brief Read and unstuff data from a backwardly-growing segment
   *
   *  This reader can read up to 8 bytes from before the VLC segment.
-  *  Care must be taken not read from unreadable memory, causing a 
+  *  Care must be taken not read from unreadable memory, causing a
   *  segmentation fault.
-  * 
+  *
   *  Note that there is another subroutine rev_read_mrp that is slightly
   *  different.  The other one fills zeros when the buffer is exhausted.
   *  This one basically does not care if the bytes are consumed, because
   *  any extra data should not be used in the actual decoding.
   *
-  *  Unstuffing is needed to prevent sequences more than 0xFF8F from 
+  *  Unstuffing is needed to prevent sequences more than 0xFF8F from
   *  appearing in the bits stream; since we are reading backward, we keep
-  *  watch when a value larger than 0x8F appears in the bitstream. 
-  *  If the byte following this is 0x7F, we unstuff this byte (ignore the 
+  *  watch when a value larger than 0x8F appears in the bitstream.
+  *  If the byte following this is 0x7F, we unstuff this byte (ignore the
   *  MSB of that byte, which should be 0).
   *
   *  @param [in]  vlcp is a pointer to rev_struct_t structure
   */
-static inline 
+static INLINE
 void rev_read(rev_struct_t *vlcp)
 {
-  OPJ_UINT32 val;
-  OPJ_UINT32 tmp;
-  OPJ_UINT32 bits;
-  OPJ_BOOL unstuff;
-
-  //process 4 bytes at a time
-  if (vlcp->bits > 32)  // if there are more than 32 bits in tmp, then 
-    return;             // reading 32 bits can overflow vlcp->tmp
-  val = 0;
-  //the next line (the if statement) needs to be tested first
-  if (vlcp->size > 0)  // if there are bytes left in the VLC segment
-  {
-    // We pad the data by 8 bytes at the beginning of the code stream 
-    // buffer
-    val = *(OPJ_UINT32*)vlcp->data; // then read 32 bits
-    vlcp->data -= 4;                // move data pointer back by 4
-    vlcp->size -= 4;                // reduce available byte by 4
-  }
-
-  //accumulate in tmp, number of bits in tmp are stored in bits
-  tmp = val >> 24;  //start with the MSB byte
-
-  // test unstuff (previous byte is >0x8F), and this byte is 0x7F
-  bits = 8u - ((vlcp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1u : 0u);
-  unstuff = (val >> 24) > 0x8F; //this is for the next byte
-
-  tmp |= ((val >> 16) & 0xFF) << bits; //process the next byte
-  bits += 8u - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1u : 0u);
-  unstuff = ((val >> 16) & 0xFF) > 0x8F;
-
-  tmp |= ((val >> 8) & 0xFF) << bits;
-  bits += 8u - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1u : 0u);
-  unstuff = ((val >> 8) & 0xFF) > 0x8F;
-
-  tmp |= (val & 0xFF) << bits;
-  bits += 8u - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1u : 0u);
-  unstuff = (val & 0xFF) > 0x8F;
-
-  // now move the read and unstuffed bits into vlcp->tmp
-  vlcp->tmp |= (OPJ_UINT64)tmp << vlcp->bits;
-  vlcp->bits += bits;
-  vlcp->unstuff = unstuff; // this for the next read
+    OPJ_UINT32 val;
+    OPJ_UINT32 tmp;
+    OPJ_UINT32 bits;
+    OPJ_BOOL unstuff;
+
+    //process 4 bytes at a time
+    if (vlcp->bits > 32) { // if there are more than 32 bits in tmp, then
+        return;    // reading 32 bits can overflow vlcp->tmp
+    }
+    val = 0;
+    //the next line (the if statement) needs to be tested first
+    if (vlcp->size > 0) { // if there are bytes left in the VLC segment
+        // We pad the data by 8 bytes at the beginning of the code stream
+        // buffer
+        val = *(OPJ_UINT32*)vlcp->data; // then read 32 bits
+        vlcp->data -= 4;                // move data pointer back by 4
+        vlcp->size -= 4;                // reduce available byte by 4
+    }
+
+    //accumulate in tmp, number of bits in tmp are stored in bits
+    tmp = val >> 24;  //start with the MSB byte
+
+    // test unstuff (previous byte is >0x8F), and this byte is 0x7F
+    bits = 8u - ((vlcp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1u : 0u);
+    unstuff = (val >> 24) > 0x8F; //this is for the next byte
+
+    tmp |= ((val >> 16) & 0xFF) << bits; //process the next byte
+    bits += 8u - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1u : 0u);
+    unstuff = ((val >> 16) & 0xFF) > 0x8F;
+
+    tmp |= ((val >> 8) & 0xFF) << bits;
+    bits += 8u - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1u : 0u);
+    unstuff = ((val >> 8) & 0xFF) > 0x8F;
+
+    tmp |= (val & 0xFF) << bits;
+    bits += 8u - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1u : 0u);
+    unstuff = (val & 0xFF) > 0x8F;
+
+    // now move the read and unstuffed bits into vlcp->tmp
+    vlcp->tmp |= (OPJ_UINT64)tmp << vlcp->bits;
+    vlcp->bits += bits;
+    vlcp->unstuff = unstuff; // this for the next read
 }
 
 //************************************************************************/
-/** @brief Initiates the rev_struct_t structure and reads a few bytes to 
+/** @brief Initiates the rev_struct_t structure and reads a few bytes to
   *         move the read address to multiple of 4
   *
   *  There is another similar rev_init_mrp subroutine.  The difference is
@@ -430,62 +441,61 @@ void rev_read(rev_struct_t *vlcp)
   *  @param [in]  lcup is the length of MagSgn+MEL+VLC segments
   *  @param [in]  scup is the length of MEL+VLC segments
   */
-static inline 
+static INLINE
 void rev_init(rev_struct_t *vlcp, OPJ_UINT8* data, int lcup, int scup)
 {
-  OPJ_UINT32 d;
-  int num;
-  int tnum;
-
-  //first byte has only the upper 4 bits
-  vlcp->data = data + lcup - 2;
-
-  //size can not be larger than this, in fact it should be smaller
-  vlcp->size = scup - 2;
-
-  d = *vlcp->data--;            // read one byte (this is a half byte)
-  vlcp->tmp = d >> 4;           // both initialize and set
-  vlcp->bits = 4 - ((vlcp->tmp & 7) == 7); //check standard
-  vlcp->unstuff = (d | 0xF) > 0x8F; //this is useful for the next byte
-
-  //This code is designed for an architecture that read address should
-  // align to the read size (address multiple of 4 if read size is 4)
-  //These few lines take care of the case where data is not at a multiple
-  // of 4 boundary. It reads 1,2,3 up to 4 bytes from the VLC bitstream
-  num = 1 + (int)((intptr_t)(vlcp->data) & 0x3);
-  tnum = num < vlcp->size ? num : vlcp->size;
-  for (int i = 0; i < tnum; ++i) {
-    OPJ_UINT64 d;
-    OPJ_UINT32 d_bits;
-    d = *vlcp->data--;  // read one byte and move read pointer
-    //check if the last byte was >0x8F (unstuff == true) and this is 0x7F
-    d_bits = 8u - ((vlcp->unstuff && ((d & 0x7F) == 0x7F)) ? 1u : 0u);
-    vlcp->tmp |= d << vlcp->bits; // move data to vlcp->tmp
-    vlcp->bits += d_bits;
-    vlcp->unstuff = d > 0x8F; // for next byte
-  }
-  vlcp->size -= tnum;
-  vlcp->data -= 3; // make ready to read 32 bits (address multiple of 4)
-  rev_read(vlcp);  // read another 32 buts
+    OPJ_UINT32 d;
+    int num, tnum, i;
+
+    //first byte has only the upper 4 bits
+    vlcp->data = data + lcup - 2;
+
+    //size can not be larger than this, in fact it should be smaller
+    vlcp->size = scup - 2;
+
+    d = *vlcp->data--;            // read one byte (this is a half byte)
+    vlcp->tmp = d >> 4;           // both initialize and set
+    vlcp->bits = 4 - ((vlcp->tmp & 7) == 7); //check standard
+    vlcp->unstuff = (d | 0xF) > 0x8F; //this is useful for the next byte
+
+    //This code is designed for an architecture that read address should
+    // align to the read size (address multiple of 4 if read size is 4)
+    //These few lines take care of the case where data is not at a multiple
+    // of 4 boundary. It reads 1,2,3 up to 4 bytes from the VLC bitstream
+    num = 1 + (int)((intptr_t)(vlcp->data) & 0x3);
+    tnum = num < vlcp->size ? num : vlcp->size;
+    for (i = 0; i < tnum; ++i) {
+        OPJ_UINT64 d;
+        OPJ_UINT32 d_bits;
+        d = *vlcp->data--;  // read one byte and move read pointer
+        //check if the last byte was >0x8F (unstuff == true) and this is 0x7F
+        d_bits = 8u - ((vlcp->unstuff && ((d & 0x7F) == 0x7F)) ? 1u : 0u);
+        vlcp->tmp |= d << vlcp->bits; // move data to vlcp->tmp
+        vlcp->bits += d_bits;
+        vlcp->unstuff = d > 0x8F; // for next byte
+    }
+    vlcp->size -= tnum;
+    vlcp->data -= 3; // make ready to read 32 bits (address multiple of 4)
+    rev_read(vlcp);  // read another 32 buts
 }
 
 //************************************************************************/
-/** @brief Retrieves 32 bits from the head of a rev_struct structure 
+/** @brief Retrieves 32 bits from the head of a rev_struct structure
   *
   *  By the end of this call, vlcp->tmp must have no less than 33 bits
   *
   *  @param [in]  vlcp is a pointer to rev_struct structure
   */
-static inline 
+static INLINE
 OPJ_UINT32 rev_fetch(rev_struct_t *vlcp)
 {
-  if (vlcp->bits < 32)  // if there are less then 32 bits, read more
-  {
-    rev_read(vlcp);     // read 32 bits, but unstuffing might reduce this
-    if (vlcp->bits < 32)// if there is still space in vlcp->tmp for 32 bits
-      rev_read(vlcp);   // read another 32
-  }
-  return (OPJ_UINT32)vlcp->tmp; // return the head (bottom-most) of vlcp->tmp
+    if (vlcp->bits < 32) { // if there are less then 32 bits, read more
+        rev_read(vlcp);     // read 32 bits, but unstuffing might reduce this
+        if (vlcp->bits < 32) { // if there is still space in vlcp->tmp for 32 bits
+            rev_read(vlcp);    // read another 32
+        }
+    }
+    return (OPJ_UINT32)vlcp->tmp; // return the head (bottom-most) of vlcp->tmp
 }
 
 //************************************************************************/
@@ -494,13 +504,13 @@ OPJ_UINT32 rev_fetch(rev_struct_t *vlcp)
   *  @param [in]  vlcp is a pointer to rev_struct structure
   *  @param [in]  num_bits is the number of bits to be removed
   */
-static inline 
+static INLINE
 OPJ_UINT32 rev_advance(rev_struct_t *vlcp, OPJ_UINT32 num_bits)
 {
-  assert(num_bits <= vlcp->bits); // vlcp->tmp must have more than num_bits
-  vlcp->tmp >>= num_bits;         // remove bits
-  vlcp->bits -= num_bits;         // decrement the number of bits
-  return (OPJ_UINT32)vlcp->tmp;
+    assert(num_bits <= vlcp->bits); // vlcp->tmp must have more than num_bits
+    vlcp->tmp >>= num_bits;         // remove bits
+    vlcp->bits -= num_bits;         // decrement the number of bits
+    return (OPJ_UINT32)vlcp->tmp;
 }
 
 //************************************************************************/
@@ -514,50 +524,51 @@ OPJ_UINT32 rev_advance(rev_struct_t *vlcp, OPJ_UINT32 num_bits)
   *
   *  @param [in]  mrp is a pointer to rev_struct structure
   */
-static inline 
+static INLINE
 void rev_read_mrp(rev_struct_t *mrp)
 {
-  OPJ_UINT32 val;
-  OPJ_UINT32 tmp; 
-  OPJ_UINT32 bits;
-  OPJ_BOOL unstuff;
-
-  //process 4 bytes at a time
-  if (mrp->bits > 32)
-    return;
-  val = 0;
-  //the next line (the if statement) needs to be tested first
-  //notice that second line can be simplified to mrp->data -= 4
-  // if (mrp->size > 0)
-  {
-    val = *(OPJ_UINT32*)mrp->data;      // read 32 bits
-    mrp->data -= mrp->size > 0 ? 4 : 0; // move back read pointer only if 
-                                        // there is data
-  }
-
-  //accumulate in tmp, and keep count in bits
-  tmp = (mrp->size-- > 0) ? (val >> 24) : 0; // fill zeros if all 
-                                                        
-  //test if the last byte > 0x8F (unstuff must be true) and this is 0x7F
-  bits = 8u - ((mrp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1u : 0u);
-  unstuff = (val >> 24) > 0x8F;
-
-  //process the next byte
-  tmp |= (mrp->size-- > 0) ? (((val >> 16) & 0xFF) << bits) : 0;
-  bits += 8u - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1u : 0u);
-  unstuff = ((val >> 16) & 0xFF) > 0x8F;
-
-  tmp |= (mrp->size-- > 0) ? (((val >> 8) & 0xFF) << bits) : 0;
-  bits += 8u - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1u : 0u);
-  unstuff = ((val >> 8) & 0xFF) > 0x8F;
-
-  tmp |= (mrp->size-- > 0) ? ((val & 0xFF) << bits) : 0;
-  bits += 8u - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1u : 0u);
-  unstuff = (val & 0xFF) > 0x8F;
-
-  mrp->tmp |= (OPJ_UINT64)tmp << mrp->bits; // move data to mrp pointer
-  mrp->bits += bits;
-  mrp->unstuff = unstuff;                   // next byte
+    OPJ_UINT32 val;
+    OPJ_UINT32 tmp;
+    OPJ_UINT32 bits;
+    OPJ_BOOL unstuff;
+
+    //process 4 bytes at a time
+    if (mrp->bits > 32) {
+        return;
+    }
+    val = 0;
+    //the next line (the if statement) needs to be tested first
+    //notice that second line can be simplified to mrp->data -= 4
+    // if (mrp->size > 0)
+    {
+        val = *(OPJ_UINT32*)mrp->data;      // read 32 bits
+        mrp->data -= mrp->size > 0 ? 4 : 0; // move back read pointer only if
+        // there is data
+    }
+
+    //accumulate in tmp, and keep count in bits
+    tmp = (mrp->size-- > 0) ? (val >> 24) : 0; // fill zeros if all
+
+    //test if the last byte > 0x8F (unstuff must be true) and this is 0x7F
+    bits = 8u - ((mrp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1u : 0u);
+    unstuff = (val >> 24) > 0x8F;
+
+    //process the next byte
+    tmp |= (mrp->size-- > 0) ? (((val >> 16) & 0xFF) << bits) : 0;
+    bits += 8u - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1u : 0u);
+    unstuff = ((val >> 16) & 0xFF) > 0x8F;
+
+    tmp |= (mrp->size-- > 0) ? (((val >> 8) & 0xFF) << bits) : 0;
+    bits += 8u - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1u : 0u);
+    unstuff = ((val >> 8) & 0xFF) > 0x8F;
+
+    tmp |= (mrp->size-- > 0) ? ((val & 0xFF) << bits) : 0;
+    bits += 8u - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1u : 0u);
+    unstuff = (val & 0xFF) > 0x8F;
+
+    mrp->tmp |= (OPJ_UINT64)tmp << mrp->bits; // move data to mrp pointer
+    mrp->bits += bits;
+    mrp->unstuff = unstuff;                   // next byte
 }
 
 //************************************************************************/
@@ -567,7 +578,7 @@ void rev_read_mrp(rev_struct_t *mrp)
   *         an architecture that read size must be compatible with the
   *         alignment of the read address
   *
-  *  There is another simiar subroutine rev_init.  This subroutine does 
+  *  There is another simiar subroutine rev_init.  This subroutine does
   *  NOT skip the first 12 bits, and starts with unstuff set to true.
   *
   *  @param [in]  mrp is a pointer to rev_struct structure
@@ -575,55 +586,55 @@ void rev_read_mrp(rev_struct_t *mrp)
   *  @param [in]  lcup is the length of MagSgn+MEL+VLC segments
   *  @param [in]  len2 is the length of SPP+MRP segments
   */
-static inline 
+static INLINE
 void rev_init_mrp(rev_struct_t *mrp, OPJ_UINT8* data, int lcup, int len2)
 {
-  int num;
-
-  mrp->data = data + lcup + len2 - 1;
-  mrp->size = len2;
-  mrp->unstuff = OPJ_TRUE;
-  mrp->bits = 0;
-  mrp->tmp = 0;
-
-  //This code is designed for an architecture that read address should
-  // align to the read size (address multiple of 4 if read size is 4)
-  //These few lines take care of the case where data is not at a multiple
-  // of 4 boundary.  It reads 1,2,3 up to 4 bytes from the MRP stream
-  num = 1 + (int)((intptr_t)(mrp->data) & 0x3);
-  for (int i = 0; i < num; ++i) {
-    OPJ_UINT64 d;
-    OPJ_UINT32 d_bits;
-
-    //read a byte, 0 if no more data
-    d = (mrp->size-- > 0) ? *mrp->data-- : 0; 
-    //check if unstuffing is needed
-    d_bits = 8u - ((mrp->unstuff && ((d & 0x7F) == 0x7F)) ? 1u : 0u);
-    mrp->tmp |= d << mrp->bits; // move data to vlcp->tmp
-    mrp->bits += d_bits;
-    mrp->unstuff = d > 0x8F; // for next byte
-  }
-  mrp->data -= 3; //make ready to read a 32 bits
-  rev_read_mrp(mrp);
+    int num, i;
+
+    mrp->data = data + lcup + len2 - 1;
+    mrp->size = len2;
+    mrp->unstuff = OPJ_TRUE;
+    mrp->bits = 0;
+    mrp->tmp = 0;
+
+    //This code is designed for an architecture that read address should
+    // align to the read size (address multiple of 4 if read size is 4)
+    //These few lines take care of the case where data is not at a multiple
+    // of 4 boundary.  It reads 1,2,3 up to 4 bytes from the MRP stream
+    num = 1 + (int)((intptr_t)(mrp->data) & 0x3);
+    for (i = 0; i < num; ++i) {
+        OPJ_UINT64 d;
+        OPJ_UINT32 d_bits;
+
+        //read a byte, 0 if no more data
+        d = (mrp->size-- > 0) ? *mrp->data-- : 0;
+        //check if unstuffing is needed
+        d_bits = 8u - ((mrp->unstuff && ((d & 0x7F) == 0x7F)) ? 1u : 0u);
+        mrp->tmp |= d << mrp->bits; // move data to vlcp->tmp
+        mrp->bits += d_bits;
+        mrp->unstuff = d > 0x8F; // for next byte
+    }
+    mrp->data -= 3; //make ready to read a 32 bits
+    rev_read_mrp(mrp);
 }
 
 //************************************************************************/
-/** @brief Retrieves 32 bits from the head of a rev_struct structure 
+/** @brief Retrieves 32 bits from the head of a rev_struct structure
   *
   *  By the end of this call, mrp->tmp must have no less than 33 bits
   *
   *  @param [in]  mrp is a pointer to rev_struct structure
   */
-static inline 
+static INLINE
 OPJ_UINT32 rev_fetch_mrp(rev_struct_t *mrp)
 {
-  if (mrp->bits < 32) // if there are less than 32 bits in mrp->tmp
-  {
-    rev_read_mrp(mrp);    // read 30-32 bits from mrp
-    if (mrp->bits < 32)   // if there is a space of 32 bits
-      rev_read_mrp(mrp);  // read more
-  }
-  return (OPJ_UINT32)mrp->tmp;  // return the head of mrp->tmp
+    if (mrp->bits < 32) { // if there are less than 32 bits in mrp->tmp
+        rev_read_mrp(mrp);    // read 30-32 bits from mrp
+        if (mrp->bits < 32) { // if there is a space of 32 bits
+            rev_read_mrp(mrp);    // read more
+        }
+    }
+    return (OPJ_UINT32)mrp->tmp;  // return the head of mrp->tmp
 }
 
 //************************************************************************/
@@ -632,13 +643,13 @@ OPJ_UINT32 rev_fetch_mrp(rev_struct_t *mrp)
   *  @param [in]  mrp is a pointer to rev_struct structure
   *  @param [in]  num_bits is the number of bits to be removed
   */
-static inline 
+static INLINE
 OPJ_UINT32 rev_advance_mrp(rev_struct_t *mrp, OPJ_UINT32 num_bits)
 {
-  assert(num_bits <= mrp->bits); // we must not consume more than mrp->bits
-  mrp->tmp >>= num_bits;         // discard the lowest num_bits bits
-  mrp->bits -= num_bits;
-  return (OPJ_UINT32)mrp->tmp;   // return data after consumption
+    assert(num_bits <= mrp->bits); // we must not consume more than mrp->bits
+    mrp->tmp >>= num_bits;         // discard the lowest num_bits bits
+    mrp->bits -= num_bits;
+    return (OPJ_UINT32)mrp->tmp;   // return data after consumption
 }
 
 //************************************************************************/
@@ -652,218 +663,203 @@ OPJ_UINT32 rev_advance_mrp(rev_struct_t *mrp, OPJ_UINT32 num_bits)
   *  @param [out] u is the u value (or u_q) + 1.  Note: we produce u + 1;
   *               this value is a partial calculation of u + kappa.
   */
-static inline 
+static INLINE
 OPJ_UINT32 decode_init_uvlc(OPJ_UINT32 vlc, OPJ_UINT32 mode, OPJ_UINT32 *u)
 {
-  //table stores possible decoding three bits from vlc
-  // there are 8 entries for xx1, x10, 100, 000, where x means do not care
-  // table value is made up of
-  // 2 bits in the LSB for prefix length 
-  // 3 bits for suffix length
-  // 3 bits in the MSB for prefix value (u_pfx in Table 3 of ITU T.814)
-  static const OPJ_UINT8 dec[8] = { // the index is the prefix codeword
-    3 | (5 << 2) | (5 << 5),        //000 == 000, prefix codeword "000"
-    1 | (0 << 2) | (1 << 5),        //001 == xx1, prefix codeword "1"
-    2 | (0 << 2) | (2 << 5),        //010 == x10, prefix codeword "01"
-    1 | (0 << 2) | (1 << 5),        //011 == xx1, prefix codeword "1"
-    3 | (1 << 2) | (3 << 5),        //100 == 100, prefix codeword "001"
-    1 | (0 << 2) | (1 << 5),        //101 == xx1, prefix codeword "1"
-    2 | (0 << 2) | (2 << 5),        //110 == x10, prefix codeword "01"
-    1 | (0 << 2) | (1 << 5)         //111 == xx1, prefix codeword "1"
-  };
-
-  OPJ_UINT32 consumed_bits = 0;
-  if (mode == 0)  // both u_off are 0
-  {
-    u[0] = u[1] = 1; //Kappa is 1 for initial line
-  }
-  else if (mode <= 2) // u_off are either 01 or 10
-  {
-    OPJ_UINT32 d;
-    OPJ_UINT32 suffix_len;
-
-    d = dec[vlc & 0x7];   //look at the least significant 3 bits
-    vlc >>= d & 0x3;                 //prefix length
-    consumed_bits += d & 0x3; 
-
-    suffix_len = ((d >> 2) & 0x7); 
-    consumed_bits += suffix_len;
-
-    d = (d >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
-    u[0] = (mode == 1) ? d + 1 : 1; // kappa is 1 for initial line
-    u[1] = (mode == 1) ? 1 : d + 1; // kappa is 1 for initial line
-  }
-  else if (mode == 3) // both u_off are 1, and MEL event is 0
-  {
-    OPJ_UINT32 d1 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
-    vlc >>= d1 & 0x3;                // Consume bits
-    consumed_bits += d1 & 0x3;
-
-    if ((d1 & 0x3) > 2)
-    {
-      OPJ_UINT32 suffix_len;
-
-      //u_{q_2} prefix
-      u[1] = (vlc & 1) + 1 + 1; //Kappa is 1 for initial line
-      ++consumed_bits;
-      vlc >>= 1;
+    //table stores possible decoding three bits from vlc
+    // there are 8 entries for xx1, x10, 100, 000, where x means do not care
+    // table value is made up of
+    // 2 bits in the LSB for prefix length
+    // 3 bits for suffix length
+    // 3 bits in the MSB for prefix value (u_pfx in Table 3 of ITU T.814)
+    static const OPJ_UINT8 dec[8] = { // the index is the prefix codeword
+        3 | (5 << 2) | (5 << 5),        //000 == 000, prefix codeword "000"
+        1 | (0 << 2) | (1 << 5),        //001 == xx1, prefix codeword "1"
+        2 | (0 << 2) | (2 << 5),        //010 == x10, prefix codeword "01"
+        1 | (0 << 2) | (1 << 5),        //011 == xx1, prefix codeword "1"
+        3 | (1 << 2) | (3 << 5),        //100 == 100, prefix codeword "001"
+        1 | (0 << 2) | (1 << 5),        //101 == xx1, prefix codeword "1"
+        2 | (0 << 2) | (2 << 5),        //110 == x10, prefix codeword "01"
+        1 | (0 << 2) | (1 << 5)         //111 == xx1, prefix codeword "1"
+    };
+
+    OPJ_UINT32 consumed_bits = 0;
+    if (mode == 0) { // both u_off are 0
+        u[0] = u[1] = 1; //Kappa is 1 for initial line
+    } else if (mode <= 2) { // u_off are either 01 or 10
+        OPJ_UINT32 d;
+        OPJ_UINT32 suffix_len;
+
+        d = dec[vlc & 0x7];   //look at the least significant 3 bits
+        vlc >>= d & 0x3;                 //prefix length
+        consumed_bits += d & 0x3;
+
+        suffix_len = ((d >> 2) & 0x7);
+        consumed_bits += suffix_len;
+
+        d = (d >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+        u[0] = (mode == 1) ? d + 1 : 1; // kappa is 1 for initial line
+        u[1] = (mode == 1) ? 1 : d + 1; // kappa is 1 for initial line
+    } else if (mode == 3) { // both u_off are 1, and MEL event is 0
+        OPJ_UINT32 d1 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+        vlc >>= d1 & 0x3;                // Consume bits
+        consumed_bits += d1 & 0x3;
+
+        if ((d1 & 0x3) > 2) {
+            OPJ_UINT32 suffix_len;
+
+            //u_{q_2} prefix
+            u[1] = (vlc & 1) + 1 + 1; //Kappa is 1 for initial line
+            ++consumed_bits;
+            vlc >>= 1;
+
+            suffix_len = ((d1 >> 2) & 0x7);
+            consumed_bits += suffix_len;
+            d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+            u[0] = d1 + 1; //Kappa is 1 for initial line
+        } else {
+            OPJ_UINT32 d2;
+            OPJ_UINT32 suffix_len;
+
+            d2 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+            vlc >>= d2 & 0x3;                // Consume bits
+            consumed_bits += d2 & 0x3;
+
+            suffix_len = ((d1 >> 2) & 0x7);
+            consumed_bits += suffix_len;
+
+            d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+            u[0] = d1 + 1; //Kappa is 1 for initial line
+            vlc >>= suffix_len;
+
+            suffix_len = ((d2 >> 2) & 0x7);
+            consumed_bits += suffix_len;
+
+            d2 = (d2 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+            u[1] = d2 + 1; //Kappa is 1 for initial line
+        }
+    } else if (mode == 4) { // both u_off are 1, and MEL event is 1
+        OPJ_UINT32 d1;
+        OPJ_UINT32 d2;
+        OPJ_UINT32 suffix_len;
 
-      suffix_len = ((d1 >> 2) & 0x7);
-      consumed_bits += suffix_len;
-      d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
-      u[0] = d1 + 1; //Kappa is 1 for initial line
-    }
-    else
-    {
-      OPJ_UINT32 d2;
-      OPJ_UINT32 suffix_len;
+        d1 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+        vlc >>= d1 & 0x3;                // Consume bits
+        consumed_bits += d1 & 0x3;
 
-      d2 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
-      vlc >>= d2 & 0x3;     // Consume bits
-      consumed_bits += d2 & 0x3;
+        d2 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+        vlc >>= d2 & 0x3;                // Consume bits
+        consumed_bits += d2 & 0x3;
 
-      suffix_len = ((d1 >> 2) & 0x7);
-      consumed_bits += suffix_len;
+        suffix_len = ((d1 >> 2) & 0x7);
+        consumed_bits += suffix_len;
 
-      d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
-      u[0] = d1 + 1; //Kappa is 1 for initial line
-      vlc >>= suffix_len;
+        d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+        u[0] = d1 + 3; // add 2+kappa
+        vlc >>= suffix_len;
 
-      suffix_len = ((d2 >> 2) & 0x7);
-      consumed_bits += suffix_len;
+        suffix_len = ((d2 >> 2) & 0x7);
+        consumed_bits += suffix_len;
 
-      d2 = (d2 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
-      u[1] = d2 + 1; //Kappa is 1 for initial line
+        d2 = (d2 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+        u[1] = d2 + 3; // add 2+kappa
     }
-  }
-  else if (mode == 4) // both u_off are 1, and MEL event is 1
-  {
-    OPJ_UINT32 d1;
-    OPJ_UINT32 d2;
-    OPJ_UINT32 suffix_len;
-
-    d1 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
-    vlc >>= d1 & 0x3;     // Consume bits
-    consumed_bits += d1 & 0x3;
-
-    d2 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
-    vlc >>= d2 & 0x3;     // Consume bits
-    consumed_bits += d2 & 0x3;
-
-    suffix_len = ((d1 >> 2) & 0x7);
-    consumed_bits += suffix_len;
-
-    d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
-    u[0] = d1 + 3; // add 2+kappa
-    vlc >>= suffix_len;
-
-    suffix_len = ((d2 >> 2) & 0x7);
-    consumed_bits += suffix_len;
-
-    d2 = (d2 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
-    u[1] = d2 + 3; // add 2+kappa
-  }
-  return consumed_bits;
+    return consumed_bits;
 }
 
 //************************************************************************/
 /** @brief Decode non-initial UVLC to get the u value (or u_q)
   *
   *  @param [in]  vlc is the head of the VLC bitstream
-  *  @param [in]  mode is 0, 1, 2, or 3. The 1st bit is u_off of 1st quad 
+  *  @param [in]  mode is 0, 1, 2, or 3. The 1st bit is u_off of 1st quad
   *               and 2nd for 2nd quad of a quad pair
   *  @param [out] u is the u value (or u_q) + 1.  Note: we produce u + 1;
   *               this value is a partial calculation of u + kappa.
   */
-static inline 
+static INLINE
 OPJ_UINT32 decode_noninit_uvlc(OPJ_UINT32 vlc, OPJ_UINT32 mode, OPJ_UINT32 *u)
 {
-  //table stores possible decoding three bits from vlc
-  // there are 8 entries for xx1, x10, 100, 000, where x means do not care
-  // table value is made up of
-  // 2 bits in the LSB for prefix length 
-  // 3 bits for suffix length
-  // 3 bits in the MSB for prefix value (u_pfx in Table 3 of ITU T.814)
-  static const OPJ_UINT8 dec[8] = {
-    3 | (5 << 2) | (5 << 5), //000 == 000, prefix codeword "000"
-    1 | (0 << 2) | (1 << 5), //001 == xx1, prefix codeword "1"
-    2 | (0 << 2) | (2 << 5), //010 == x10, prefix codeword "01"
-    1 | (0 << 2) | (1 << 5), //011 == xx1, prefix codeword "1"
-    3 | (1 << 2) | (3 << 5), //100 == 100, prefix codeword "001"
-    1 | (0 << 2) | (1 << 5), //101 == xx1, prefix codeword "1"
-    2 | (0 << 2) | (2 << 5), //110 == x10, prefix codeword "01"
-    1 | (0 << 2) | (1 << 5)  //111 == xx1, prefix codeword "1"
-  };
-
-  OPJ_UINT32 consumed_bits = 0;
-  if (mode == 0)
-  {
-    u[0] = u[1] = 1; //for kappa
-  }
-  else if (mode <= 2) //u_off are either 01 or 10
-  {
-    OPJ_UINT32 d;
-    OPJ_UINT32 suffix_len;
-
-    d = dec[vlc & 0x7];  //look at the least significant 3 bits
-    vlc >>= d & 0x3;     //prefix length
-    consumed_bits += d & 0x3;
-
-    suffix_len = ((d >> 2) & 0x7);
-    consumed_bits += suffix_len;
-
-    d = (d >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
-    u[0] = (mode == 1) ? d + 1 : 1; //for kappa
-    u[1] = (mode == 1) ? 1 : d + 1; //for kappa
-  }
-  else if (mode == 3) // both u_off are 1
-  {
-    OPJ_UINT32 d1;
-    OPJ_UINT32 d2;
-    OPJ_UINT32 suffix_len;
-
-    d1 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
-    vlc >>= d1 & 0x3;     // Consume bits
-    consumed_bits += d1 & 0x3;
-
-    d2 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
-    vlc >>= d2 & 0x3;     // Consume bits
-    consumed_bits += d2 & 0x3;
-
-    suffix_len = ((d1 >> 2) & 0x7);
-    consumed_bits += suffix_len;
-
-    d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
-    u[0] = d1 + 1;  //1 for kappa
-    vlc >>= suffix_len;
-
-    suffix_len = ((d2 >> 2) & 0x7);
-    consumed_bits += suffix_len;
-
-    d2 = (d2 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
-    u[1] = d2 + 1;  //1 for kappa
-  }
-  return consumed_bits;
+    //table stores possible decoding three bits from vlc
+    // there are 8 entries for xx1, x10, 100, 000, where x means do not care
+    // table value is made up of
+    // 2 bits in the LSB for prefix length
+    // 3 bits for suffix length
+    // 3 bits in the MSB for prefix value (u_pfx in Table 3 of ITU T.814)
+    static const OPJ_UINT8 dec[8] = {
+        3 | (5 << 2) | (5 << 5), //000 == 000, prefix codeword "000"
+        1 | (0 << 2) | (1 << 5), //001 == xx1, prefix codeword "1"
+        2 | (0 << 2) | (2 << 5), //010 == x10, prefix codeword "01"
+        1 | (0 << 2) | (1 << 5), //011 == xx1, prefix codeword "1"
+        3 | (1 << 2) | (3 << 5), //100 == 100, prefix codeword "001"
+        1 | (0 << 2) | (1 << 5), //101 == xx1, prefix codeword "1"
+        2 | (0 << 2) | (2 << 5), //110 == x10, prefix codeword "01"
+        1 | (0 << 2) | (1 << 5)  //111 == xx1, prefix codeword "1"
+    };
+
+    OPJ_UINT32 consumed_bits = 0;
+    if (mode == 0) {
+        u[0] = u[1] = 1; //for kappa
+    } else if (mode <= 2) { //u_off are either 01 or 10
+        OPJ_UINT32 d;
+        OPJ_UINT32 suffix_len;
+
+        d = dec[vlc & 0x7];  //look at the least significant 3 bits
+        vlc >>= d & 0x3;                //prefix length
+        consumed_bits += d & 0x3;
+
+        suffix_len = ((d >> 2) & 0x7);
+        consumed_bits += suffix_len;
+
+        d = (d >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+        u[0] = (mode == 1) ? d + 1 : 1; //for kappa
+        u[1] = (mode == 1) ? 1 : d + 1; //for kappa
+    } else if (mode == 3) { // both u_off are 1
+        OPJ_UINT32 d1;
+        OPJ_UINT32 d2;
+        OPJ_UINT32 suffix_len;
+
+        d1 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+        vlc >>= d1 & 0x3;                // Consume bits
+        consumed_bits += d1 & 0x3;
+
+        d2 = dec[vlc & 0x7];  // LSBs of VLC are prefix codeword
+        vlc >>= d2 & 0x3;                // Consume bits
+        consumed_bits += d2 & 0x3;
+
+        suffix_len = ((d1 >> 2) & 0x7);
+        consumed_bits += suffix_len;
+
+        d1 = (d1 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+        u[0] = d1 + 1;  //1 for kappa
+        vlc >>= suffix_len;
+
+        suffix_len = ((d2 >> 2) & 0x7);
+        consumed_bits += suffix_len;
+
+        d2 = (d2 >> 5) + (vlc & ((1U << suffix_len) - 1)); // u value
+        u[1] = d2 + 1;  //1 for kappa
+    }
+    return consumed_bits;
 }
 
 //************************************************************************/
-/** @brief State structure for reading and unstuffing of forward-growing 
+/** @brief State structure for reading and unstuffing of forward-growing
   *         bitstreams; these are: MagSgn and SPP bitstreams
   */
 typedef struct frwd_struct {
-  const OPJ_UINT8* data; //!<pointer to bitstream
-  OPJ_UINT64 tmp;        //!<temporary buffer of read data
-  OPJ_UINT32 bits;       //!<number of bits stored in tmp
-  OPJ_BOOL unstuff;      //!<true if a bit needs to be unstuffed from next byte
-  int size;              //!<size of data
-  OPJ_UINT32 X;          //!<0 or 0xFF, X's are inserted at end of bitstream
+    const OPJ_UINT8* data; //!<pointer to bitstream
+    OPJ_UINT64 tmp;        //!<temporary buffer of read data
+    OPJ_UINT32 bits;       //!<number of bits stored in tmp
+    OPJ_BOOL unstuff;      //!<true if a bit needs to be unstuffed from next byte
+    int size;              //!<size of data
+    OPJ_UINT32 X;          //!<0 or 0xFF, X's are inserted at end of bitstream
 } frwd_struct_t;
 
 //************************************************************************/
 /** @brief Read and unstuffs 32 bits from forward-growing bitstream
-  *  
-  *  A subroutine to read from both the MagSgn or SPP bitstreams; 
-  *  in particular, when MagSgn bitstream is consumed, 0xFF's are fed, 
+  *
+  *  A subroutine to read from both the MagSgn or SPP bitstreams;
+  *  in particular, when MagSgn bitstream is consumed, 0xFF's are fed,
   *  while when SPP is exhausted 0's are fed in.
   *  X controls this value.
   *
@@ -875,81 +871,80 @@ typedef struct frwd_struct {
   *
   *  @param  [in]  msp is a pointer to frwd_struct_t structure
   *
-  */ 
-static inline
+  */
+static INLINE
 void frwd_read(frwd_struct_t *msp)
 {
-  OPJ_UINT32 val;
-  OPJ_UINT32 bits;
-  OPJ_UINT32 t;
-  OPJ_BOOL unstuff;
+    OPJ_UINT32 val;
+    OPJ_UINT32 bits;
+    OPJ_UINT32 t;
+    OPJ_BOOL unstuff;
 
-  assert(msp->bits <= 32); // assert that there is a space for 32 bits
+    assert(msp->bits <= 32); // assert that there is a space for 32 bits
 
-  val = *(OPJ_UINT32*)msp->data;      // read 32 bits
-  msp->data += msp->size > 0 ? 4 : 0; // move pointer if data is not 
-                                      // exhausted
+    val = *(OPJ_UINT32*)msp->data;      // read 32 bits
+    msp->data += msp->size > 0 ? 4 : 0; // move pointer if data is not
+    // exhausted
 
-  // we accumulate in t and keep a count of the number of bits in bits
-  bits = 8u - (msp->unstuff ? 1u:0u);     // if previous byte was 0xFF
-  // get next byte, if bitstream is exhausted, replace it with X
-  t = msp->size-- > 0 ? (val & 0xFF) : msp->X;
-  unstuff = ((val & 0xFF) == 0xFF);  // Do we need unstuffing next?
+    // we accumulate in t and keep a count of the number of bits in bits
+    bits = 8u - (msp->unstuff ? 1u : 0u);   // if previous byte was 0xFF
+    // get next byte, if bitstream is exhausted, replace it with X
+    t = msp->size-- > 0 ? (val & 0xFF) : msp->X;
+    unstuff = ((val & 0xFF) == 0xFF);  // Do we need unstuffing next?
 
-  t |= (msp->size-- > 0 ? ((val >> 8) & 0xFF) : msp->X) << bits;
-  bits += 8u - (unstuff ? 1u:0u);
-  unstuff = (((val >> 8) & 0xFF) == 0xFF);
+    t |= (msp->size-- > 0 ? ((val >> 8) & 0xFF) : msp->X) << bits;
+    bits += 8u - (unstuff ? 1u : 0u);
+    unstuff = (((val >> 8) & 0xFF) == 0xFF);
 
-  t |= (msp->size-- > 0 ? ((val >> 16) & 0xFF) : msp->X) << bits;
-  bits += 8u - (unstuff ? 1u:0u);
-  unstuff = (((val >> 16) & 0xFF) == 0xFF);
+    t |= (msp->size-- > 0 ? ((val >> 16) & 0xFF) : msp->X) << bits;
+    bits += 8u - (unstuff ? 1u : 0u);
+    unstuff = (((val >> 16) & 0xFF) == 0xFF);
 
-  t |= (msp->size-- > 0 ? ((val >> 24) & 0xFF) : msp->X) << bits;
-  bits += 8u - (unstuff ? 1u:0u);
-  msp->unstuff = (((val >> 24) & 0xFF) == 0xFF); // for next byte
+    t |= (msp->size-- > 0 ? ((val >> 24) & 0xFF) : msp->X) << bits;
+    bits += 8u - (unstuff ? 1u : 0u);
+    msp->unstuff = (((val >> 24) & 0xFF) == 0xFF); // for next byte
 
-  msp->tmp |= ((OPJ_UINT64)t) << msp->bits;  // move data to msp->tmp
-  msp->bits += bits;
+    msp->tmp |= ((OPJ_UINT64)t) << msp->bits;  // move data to msp->tmp
+    msp->bits += bits;
 }
 
 //************************************************************************/
 /** @brief Initialize frwd_struct_t struct and reads some bytes
-  *  
+  *
   *  @param [in]  msp is a pointer to frwd_struct_t
   *  @param [in]  data is a pointer to the start of data
   *  @param [in]  size is the number of byte in the bitstream
   *  @param [in]  X is the value fed in when the bitstream is exhausted.
   *               See frwd_read.
   */
-static inline
-void frwd_init(frwd_struct_t *msp, const OPJ_UINT8* data, int size, 
+static INLINE
+void frwd_init(frwd_struct_t *msp, const OPJ_UINT8* data, int size,
                OPJ_UINT32 X)
 {
-  int num;
-
-  msp->data = data;
-  msp->tmp = 0;
-  msp->bits = 0;
-  msp->unstuff = OPJ_FALSE;
-  msp->size = size;
-  msp->X = X;
-  assert(msp->X == 0 || msp->X == 0xFF);
-
-  //This code is designed for an architecture that read address should
-  // align to the read size (address multiple of 4 if read size is 4)
-  //These few lines take care of the case where data is not at a multiple
-  // of 4 boundary.  It reads 1,2,3 up to 4 bytes from the bitstream
-  num = 4 - (int)((intptr_t)(msp->data) & 0x3);
-  for (int i = 0; i < num; ++i)
-  {
-    OPJ_UINT64 d;
-    //read a byte if the buffer is not exhausted, otherwise set it to X
-    d = msp->size-- > 0 ? *msp->data++ : msp->X;
-    msp->tmp |= (d << msp->bits);           // store data in msp->tmp
-    msp->bits += 8u - (msp->unstuff?1u:0u); // number of bits added to msp->tmp
-    msp->unstuff = ((d & 0xFF) == 0xFF);    // unstuffing for next byte
-  }
-  frwd_read(msp); // read 32 bits more
+    int num, i;
+
+    msp->data = data;
+    msp->tmp = 0;
+    msp->bits = 0;
+    msp->unstuff = OPJ_FALSE;
+    msp->size = size;
+    msp->X = X;
+    assert(msp->X == 0 || msp->X == 0xFF);
+
+    //This code is designed for an architecture that read address should
+    // align to the read size (address multiple of 4 if read size is 4)
+    //These few lines take care of the case where data is not at a multiple
+    // of 4 boundary.  It reads 1,2,3 up to 4 bytes from the bitstream
+    num = 4 - (int)((intptr_t)(msp->data) & 0x3);
+    for (i = 0; i < num; ++i) {
+        OPJ_UINT64 d;
+        //read a byte if the buffer is not exhausted, otherwise set it to X
+        d = msp->size-- > 0 ? *msp->data++ : msp->X;
+        msp->tmp |= (d << msp->bits);      // store data in msp->tmp
+        msp->bits += 8u - (msp->unstuff ? 1u : 0u); // number of bits added to msp->tmp
+        msp->unstuff = ((d & 0xFF) == 0xFF); // unstuffing for next byte
+    }
+    frwd_read(msp); // read 32 bits more
 }
 
 //************************************************************************/
@@ -958,12 +953,12 @@ void frwd_init(frwd_struct_t *msp, const OPJ_UINT8* data, int size,
   *  @param [in]  msp is a pointer to frwd_struct_t
   *  @param [in]  num_bits is the number of bit to consume
   */
-static inline 
+static INLINE
 void frwd_advance(frwd_struct_t *msp, OPJ_UINT32 num_bits)
 {
-  assert(num_bits <= msp->bits);
-  msp->tmp >>= num_bits;  // consume num_bits
-  msp->bits -= num_bits;
+    assert(num_bits <= msp->bits);
+    msp->tmp >>= num_bits;  // consume num_bits
+    msp->bits -= num_bits;
 }
 
 //************************************************************************/
@@ -971,16 +966,16 @@ void frwd_advance(frwd_struct_t *msp, OPJ_UINT32 num_bits)
   *
   *  @param [in]  msp is a pointer to frwd_struct_t
   */
-static inline 
+static INLINE
 OPJ_UINT32 frwd_fetch(frwd_struct_t *msp)
 {
-  if (msp->bits < 32)
-  {
-    frwd_read(msp);
-    if (msp->bits < 32) //need to test
-      frwd_read(msp);
-  }
-  return (OPJ_UINT32)msp->tmp;
+    if (msp->bits < 32) {
+        frwd_read(msp);
+        if (msp->bits < 32) { //need to test
+            frwd_read(msp);
+        }
+    }
+    return (OPJ_UINT32)msp->tmp;
 }
 
 //************************************************************************/
@@ -1005,8 +1000,8 @@ static OPJ_BOOL opj_t1_allocate_buffers(
 
         if (datasize > t1->datasize) {
             opj_aligned_free(t1->data);
-            t1->data = (OPJ_INT32*) 
-              opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
+            t1->data = (OPJ_INT32*)
+                       opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
             if (!t1->data) {
                 /* FIXME event manager error callback */
                 return OPJ_FALSE;
@@ -1059,1476 +1054,1445 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
                                opj_mutex_t* p_manager_mutex,
                                OPJ_BOOL check_pterm)
 {
-  OPJ_BYTE* cblkdata = NULL;
-  OPJ_UINT8* coded_data;
-  OPJ_UINT32* decoded_data;
-  OPJ_UINT32 num_passes;
-  OPJ_UINT32 lengths1;
-  OPJ_UINT32 lengths2;
-  OPJ_INT32 width;
-  OPJ_INT32 height;
-  OPJ_INT32 stride;
-  OPJ_UINT32 *pflags, *sigma1, *sigma2, *mbr1, *mbr2, *sip, sip_shift;
-  OPJ_UINT32 p;
-  OPJ_UINT32 zero_planes_p1;
-  int lcup, scup;
-  dec_mel_t mel;
-  rev_struct_t vlc;
-  frwd_struct_t magsgn;
-  frwd_struct_t sigprop;
-  rev_struct_t magref;
-  OPJ_UINT8 *lsp, *line_state;
-  int run;  
-  OPJ_UINT32 vlc_val;           
-  OPJ_UINT32 qinf[2];
-  OPJ_UINT32 c_q;
-  OPJ_UINT32* sp;
-
-  (void)(orient);      // stops unused parameter message
-  (void)(check_pterm); // stops unused parameter message
-
-  // We ignor orient, because the same decoder is used for all subbands
-  // We also ignore check_pterm, because I am not sure how it applies
-  assert(cblksty == 0x40); // that is the only support mode
-  if (roishift != 0) {
-    if (p_manager_mutex)
-      opj_mutex_lock(p_manager_mutex);
-    opj_event_msg(p_manager, EVT_ERROR, "We do not support ROI in decoding "
-                            "HT codeblocks\n");
-    if (p_manager_mutex)
-      opj_mutex_unlock(p_manager_mutex);
-    return OPJ_FALSE;
-  }
-
-  if (!opj_t1_allocate_buffers(
-              t1,
-              (OPJ_UINT32)(cblk->x1 - cblk->x0),
-              (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
-      return OPJ_FALSE;
-  }
-
-  /* Even if we have a single chunk, in multi-threaded decoding */
-  /* the insertion of our synthetic marker might potentially override */
-  /* valid codestream of other codeblocks decoded in parallel. */
-  if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
-      OPJ_UINT32 i;
-      OPJ_UINT32 cblk_len;
-
-      /* Compute whole codeblock length from chunk lengths */
-      cblk_len = 0;
-      for (i = 0; i < cblk->numchunks; i++) {
-          cblk_len += cblk->chunks[i].len;
-      }
-
-      /* Allocate temporary memory if needed */
-      if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
-          cblkdata = (OPJ_BYTE*)opj_realloc(
-              t1->cblkdatabuffer, cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
-          if (cblkdata == NULL) {
-              return OPJ_FALSE;
-          }
-          t1->cblkdatabuffer = cblkdata;
-          memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
-          t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
-      }
-
-      /* Concatenate all chunks */
-      cblkdata = t1->cblkdatabuffer;
-      cblk_len = 0;
-      for (i = 0; i < cblk->numchunks; i++) {
-          memcpy(cblkdata+cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
-          cblk_len += cblk->chunks[i].len;
-      }
-  } else if (cblk->numchunks == 1) {
-      cblkdata = cblk->chunks[0].data;
-  } else {
-      /* Not sure if that can happen in practice, but avoid Coverity to */
-      /* think we will dereference a null cblkdta pointer */
-      return OPJ_TRUE;
-  }
-
-  // OPJ_BYTE* coded_data is a pointer to bitstream
-  coded_data = cblkdata;
-  // OPJ_UINT32* decoded_data is a pointer to decoded codeblock data buf.
-  decoded_data = (OPJ_UINT32*)t1->data;
-  // OPJ_UINT32 num_passes is the number of passes: 1 if CUP only, 2 for 
-  // CUP+SPP, and 3 for CUP+SPP+MRP
-  num_passes = cblk->numsegs>0 ? cblk->segs[0].real_num_passes : 0;
-  num_passes += cblk->numsegs>1 ? cblk->segs[1].real_num_passes : 0;
-  // OPJ_UINT32 lengths1 is the length of cleanup pass
-  lengths1 = num_passes > 0 ? cblk->segs[0].len : 0;
-  // OPJ_UINT32 lengths2 is the length of refinement passes (either SPP only or SPP+MRP)
-  lengths2 = num_passes > 1 ? cblk->segs[1].len : 0;
-  // OPJ_INT32 width is the decoded codeblock width 
-  width = cblk->x1 - cblk->x0;
-  // OPJ_INT32 height is the decoded codeblock height
-  height = cblk->y1 - cblk->y0;
-  // OPJ_INT32 stride is the decoded codeblock buffer stride 
-  stride = width;
-
-   /*  sigma1 and sigma2 contains significant (i.e., non-zero) pixel 
-    *  locations.  The buffers are used interchangeably, because we need
-    *  more than 4 rows of significance information at a given time.
-    *  Each 32 bits contain significance information for 4 rows of 8 
-    *  columns each.  If we denote 32 bits by 0xaaaaaaaa, the each "a" is
-    *  called a nibble and has significance information for 4 rows.
-    *  The least significant nibble has information for the first column,
-    *  and so on. The nibble's LSB is for the first row, and so on.
-    *  Since, at most, we can have 1024 columns in a quad, we need 128
-    *  entries; we added 1 for convenience when propagation of signifcance
-    *  goes outside the structure
-    *  To work in OpenJPEG these buffers has been expanded to 132.
-    */
-  // OPJ_UINT32 *pflags, *sigma1, *sigma2, *mbr1, *mbr2, *sip, sip_shift;
-  pflags = (OPJ_UINT32 *)t1->flags;
-  sigma1 = pflags;
-  sigma2 = sigma1 + 132;
-  // mbr arrangement is similar to sigma; mbr contains locations 
-  // that become significant during significance propagation pass
-  mbr1 = sigma2 + 132;
-  mbr2 = mbr1 + 132;
-  //a pointer to sigma
-  sip = sigma1;  //pointers to arrays to be used interchangeably
-  sip_shift = 0; //the amount of shift needed for sigma
-
-  if (num_passes > 1 && lengths2 == 0)
-  {
-    if (p_manager_mutex)
-      opj_mutex_lock(p_manager_mutex);
-    opj_event_msg(p_manager, EVT_WARNING, "A malformed codeblock that has "
-                  "more than one coding pass, but zero length for "
-                  "2nd and potential 3rd pass.\n");
-    if (p_manager_mutex)
-      opj_mutex_unlock(p_manager_mutex);
-    num_passes = 1;
-  }
-  if (num_passes > 3)
-  {
-    if (p_manager_mutex)
-      opj_mutex_lock(p_manager_mutex);
-    opj_event_msg(p_manager, EVT_WARNING, "We do not support more than 3 "
-                            "coding passes; This codeblocks has %d passes.\n",
-                            num_passes);
-    if (p_manager_mutex)
-      opj_mutex_unlock(p_manager_mutex);
-    return OPJ_FALSE;
-  }
-
-  if (cblk->numbps == 1 && num_passes > 1)
-    {
-      // We do not have enough precision to decode SgnProp nor MagRef passes.
-      // We decode the cleanup passes only
-      if (cannot_decode_spp_mrp_msg == OPJ_FALSE) {
-        if (p_manager_mutex)
-          opj_mutex_lock(p_manager_mutex);
-        cannot_decode_spp_mrp_msg = OPJ_TRUE;
-        opj_event_msg(p_manager, EVT_WARNING, "Not enough precision to decode "
-                                "the SgnProp nor MagRef passes.  This message "
-                                "will not be displayed again.\n");
-        if (p_manager_mutex)
-          opj_mutex_unlock(p_manager_mutex);
-      }
-      num_passes = 1;
+    OPJ_BYTE* cblkdata = NULL;
+    OPJ_UINT8* coded_data;
+    OPJ_UINT32* decoded_data;
+    OPJ_UINT32 num_passes;
+    OPJ_UINT32 lengths1;
+    OPJ_UINT32 lengths2;
+    OPJ_INT32 width;
+    OPJ_INT32 height;
+    OPJ_INT32 stride;
+    OPJ_UINT32 *pflags, *sigma1, *sigma2, *mbr1, *mbr2, *sip, sip_shift;
+    OPJ_UINT32 p;
+    OPJ_UINT32 zero_planes_p1;
+    int lcup, scup;
+    dec_mel_t mel;
+    rev_struct_t vlc;
+    frwd_struct_t magsgn;
+    frwd_struct_t sigprop;
+    rev_struct_t magref;
+    OPJ_UINT8 *lsp, *line_state;
+    int run;
+    OPJ_UINT32 vlc_val;              // fetched data from VLC bitstream
+    OPJ_UINT32 qinf[2];
+    OPJ_UINT32 c_q;
+    OPJ_UINT32* sp;
+    OPJ_INT32 x, y; // loop indices
+
+    (void)(orient);      // stops unused parameter message
+    (void)(check_pterm); // stops unused parameter message
+
+    // We ignor orient, because the same decoder is used for all subbands
+    // We also ignore check_pterm, because I am not sure how it applies
+    assert(cblksty == 0x40); // that is the only support mode
+    if (roishift != 0) {
+        if (p_manager_mutex) {
+            opj_mutex_lock(p_manager_mutex);
+        }
+        opj_event_msg(p_manager, EVT_ERROR, "We do not support ROI in decoding "
+                      "HT codeblocks\n");
+        if (p_manager_mutex) {
+            opj_mutex_unlock(p_manager_mutex);
+        }
+        return OPJ_FALSE;
     }
-  if (cblk->numbps == 0)
-    {
-      // We do not have enough precision to decode the CUP pass with the 
-      // center of bin bit set.  The code can be modified to support this 
-      // case, without using the center of the bin.
-      if (cannot_decode_due_to_insufficient_precision == OPJ_FALSE) {
-        if (p_manager_mutex)
-          opj_mutex_lock(p_manager_mutex);
-        cannot_decode_due_to_insufficient_precision = OPJ_TRUE;
-        opj_event_msg(p_manager, EVT_WARNING, "Not enough precision to decode "
-                                "the cleanup pass. The code should be "
-                                "modified to support this case. This message "
-                                "will not be displayed again.\n");
-        if (p_manager_mutex)
-          opj_mutex_unlock(p_manager_mutex);
-      }
-      return OPJ_TRUE;
+
+    if (!opj_t1_allocate_buffers(
+                t1,
+                (OPJ_UINT32)(cblk->x1 - cblk->x0),
+                (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
+        return OPJ_FALSE;
     }
 
-  // OPJ_INT32
-  p = cblk->numbps; 
-  // OPJ_INT32 zero planes plus 1
-  zero_planes_p1 = cblk->Mb - cblk->numbps + 1;
-
-  // read scup and fix the bytes there
-  lcup = (int)lengths1;  // length of CUP
-  //scup is the length of MEL + VLC
-  scup = (((int)coded_data[lcup-1]) << 4) + (coded_data[lcup-2] & 0xF);
-  if (scup < 2 || scup > lcup || scup > 4079) //something is wrong
-    return OPJ_FALSE;
-
-  // init structures
-  mel_init(&mel, coded_data, lcup, scup);
-  rev_init(&vlc, coded_data, lcup, scup);
-  frwd_init(&magsgn, coded_data, lcup - scup, 0xFF);
-  if (num_passes > 1) // needs to be tested
-    frwd_init(&sigprop, coded_data + lengths1, (int)lengths2, 0);
-  if (num_passes > 2)
-    rev_init_mrp(&magref, coded_data, (int)lengths1, (int)lengths2);
-
-  /** State storage
-    *  One byte per quad; for 1024 columns, or 512 quads, we need
-    *  512 bytes. We are using 2 extra bytes one on the left and one on
-    *  the right for convenience.
-    *
-    *  The MSB bit in each byte is (\sigma^nw | \sigma^n), and the 7 LSBs
-    *  contain max(E^nw | E^n)
-    */
-
-  // 514 is enough for a block width of 1024, +2 extra
-  // here expanded to 528
-  line_state = (OPJ_UINT8 *)(mbr2 + 132); 
-
-  //initial 2 lines
-  /////////////////
-  lsp = line_state;           // point to line state
-  lsp[0] = 0;                 // for initial row of quad, we set to 0
-  run = mel_get_run(&mel);    // decode runs of events from MEL bitstrm
-                              // data represented as runs of 0 events
-                              // See mel_decode description
-  qinf[0] = qinf[1] = 0;      // quad info decoded from VLC bitstream
-  c_q = 0;                    // context for quad q
-  sp = decoded_data;          // decoded codeblock samples
-  // vlc_val;                 // fetched data from VLC bitstream
-
-  for (OPJ_INT32 x = 0; x < width; x += 4) // one iteration per quad pair
-  {
-    OPJ_UINT32 U_q[2]; // u values for the quad pair
-    OPJ_UINT32 uvlc_mode;
-    OPJ_UINT32 consumed_bits;
-    OPJ_UINT32 m_n, v_n;
-    OPJ_UINT32 ms_val;
-    OPJ_UINT32 locs;
-
-    // decode VLC
-    /////////////
-
-    //first quad
-    // Get the head of the VLC bitstream. One fetch is enough for two 
-    // quads, since the largest VLC code is 7 bits, and maximum number of 
-    // bits used for u is 8.  Therefore for two quads we need 30 bits 
-    // (if we include unstuffing, then 32 bits are enough, since we have 
-    // a maximum of one stuffing per two bytes)
-    vlc_val = rev_fetch(&vlc);
-
-    //decode VLC using the context c_q and the head of the VLC bitstream
-    qinf[0] = vlc_tbl0[ (c_q << 7) | (vlc_val & 0x7F) ];
-
-    if (c_q == 0) // if zero context, we need to use one MEL event
-    {
-      run -= 2; //the number of 0 events is multiplied by 2, so subtract 2
+    /* Even if we have a single chunk, in multi-threaded decoding */
+    /* the insertion of our synthetic marker might potentially override */
+    /* valid codestream of other codeblocks decoded in parallel. */
+    if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
+        OPJ_UINT32 i;
+        OPJ_UINT32 cblk_len;
+
+        /* Compute whole codeblock length from chunk lengths */
+        cblk_len = 0;
+        for (i = 0; i < cblk->numchunks; i++) {
+            cblk_len += cblk->chunks[i].len;
+        }
 
-      // Is the run terminated in 1? if so, use decoded VLC code, 
-      // otherwise, discard decoded data, since we will decoded again 
-      // using a different context
-      qinf[0] = (run == -1) ? qinf[0] : 0;
+        /* Allocate temporary memory if needed */
+        if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
+            cblkdata = (OPJ_BYTE*)opj_realloc(
+                           t1->cblkdatabuffer, cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
+            if (cblkdata == NULL) {
+                return OPJ_FALSE;
+            }
+            t1->cblkdatabuffer = cblkdata;
+            memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
+            t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
+        }
 
-      // is run -1 or -2? this means a run has been consumed
-      if (run < 0) 
-        run = mel_get_run(&mel);  // get another run
+        /* Concatenate all chunks */
+        cblkdata = t1->cblkdatabuffer;
+        cblk_len = 0;
+        for (i = 0; i < cblk->numchunks; i++) {
+            memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
+            cblk_len += cblk->chunks[i].len;
+        }
+    } else if (cblk->numchunks == 1) {
+        cblkdata = cblk->chunks[0].data;
+    } else {
+        /* Not sure if that can happen in practice, but avoid Coverity to */
+        /* think we will dereference a null cblkdta pointer */
+        return OPJ_TRUE;
     }
 
-    // prepare context for the next quad; eqn. 1 in ITU T.814
-    c_q = ((qinf[0] & 0x10) >> 4) | ((qinf[0] & 0xE0) >> 5);
-
-    //remove data from vlc stream (0 bits are removed if qinf is not used)
-    vlc_val = rev_advance(&vlc, qinf[0] & 0x7);
-
-    //update sigma
-    // The update depends on the value of x; consider one OPJ_UINT32
-    // if x is 0, 8, 16 and so on, then this line update c locations
-    //      nibble (4 bits) number   0 1 2 3 4 5 6 7
-    //                         LSB   c c 0 0 0 0 0 0 
-    //                               c c 0 0 0 0 0 0
-    //                               0 0 0 0 0 0 0 0
-    //                               0 0 0 0 0 0 0 0
-    // if x is 4, 12, 20, then this line update locations c
-    //      nibble (4 bits) number   0 1 2 3 4 5 6 7
-    //                         LSB   0 0 0 0 c c 0 0 
-    //                               0 0 0 0 c c 0 0
-    //                               0 0 0 0 0 0 0 0
-    //                               0 0 0 0 0 0 0 0
-    *sip |= (((qinf[0] & 0x30)>>4) | ((qinf[0] & 0xC0)>>2)) << sip_shift;
-
-    //second quad
-    qinf[1] = 0;
-    if (x + 2 < width) // do not run if codeblock is narrower
-    {
-      //decode VLC using the context c_q and the head of the VLC bitstream
-      qinf[1] = vlc_tbl0[(c_q << 7) | (vlc_val & 0x7F)]; 
-
-      // if context is zero, use one MEL event
-      if (c_q == 0) //zero context
-      {
-        run -= 2; //subtract 2, since events number if multiplied by 2
-
-        // if event is 0, discard decoded qinf
-        qinf[1] = (run == -1) ? qinf[1] : 0;
+    // OPJ_BYTE* coded_data is a pointer to bitstream
+    coded_data = cblkdata;
+    // OPJ_UINT32* decoded_data is a pointer to decoded codeblock data buf.
+    decoded_data = (OPJ_UINT32*)t1->data;
+    // OPJ_UINT32 num_passes is the number of passes: 1 if CUP only, 2 for
+    // CUP+SPP, and 3 for CUP+SPP+MRP
+    num_passes = cblk->numsegs > 0 ? cblk->segs[0].real_num_passes : 0;
+    num_passes += cblk->numsegs > 1 ? cblk->segs[1].real_num_passes : 0;
+    // OPJ_UINT32 lengths1 is the length of cleanup pass
+    lengths1 = num_passes > 0 ? cblk->segs[0].len : 0;
+    // OPJ_UINT32 lengths2 is the length of refinement passes (either SPP only or SPP+MRP)
+    lengths2 = num_passes > 1 ? cblk->segs[1].len : 0;
+    // OPJ_INT32 width is the decoded codeblock width
+    width = cblk->x1 - cblk->x0;
+    // OPJ_INT32 height is the decoded codeblock height
+    height = cblk->y1 - cblk->y0;
+    // OPJ_INT32 stride is the decoded codeblock buffer stride
+    stride = width;
+
+    /*  sigma1 and sigma2 contains significant (i.e., non-zero) pixel
+     *  locations.  The buffers are used interchangeably, because we need
+     *  more than 4 rows of significance information at a given time.
+     *  Each 32 bits contain significance information for 4 rows of 8
+     *  columns each.  If we denote 32 bits by 0xaaaaaaaa, the each "a" is
+     *  called a nibble and has significance information for 4 rows.
+     *  The least significant nibble has information for the first column,
+     *  and so on. The nibble's LSB is for the first row, and so on.
+     *  Since, at most, we can have 1024 columns in a quad, we need 128
+     *  entries; we added 1 for convenience when propagation of signifcance
+     *  goes outside the structure
+     *  To work in OpenJPEG these buffers has been expanded to 132.
+     */
+    // OPJ_UINT32 *pflags, *sigma1, *sigma2, *mbr1, *mbr2, *sip, sip_shift;
+    pflags = (OPJ_UINT32 *)t1->flags;
+    sigma1 = pflags;
+    sigma2 = sigma1 + 132;
+    // mbr arrangement is similar to sigma; mbr contains locations
+    // that become significant during significance propagation pass
+    mbr1 = sigma2 + 132;
+    mbr2 = mbr1 + 132;
+    //a pointer to sigma
+    sip = sigma1;  //pointers to arrays to be used interchangeably
+    sip_shift = 0; //the amount of shift needed for sigma
+
+    if (num_passes > 1 && lengths2 == 0) {
+        if (p_manager_mutex) {
+            opj_mutex_lock(p_manager_mutex);
+        }
+        opj_event_msg(p_manager, EVT_WARNING, "A malformed codeblock that has "
+                      "more than one coding pass, but zero length for "
+                      "2nd and potential 3rd pass.\n");
+        if (p_manager_mutex) {
+            opj_mutex_unlock(p_manager_mutex);
+        }
+        num_passes = 1;
+    }
+    if (num_passes > 3) {
+        if (p_manager_mutex) {
+            opj_mutex_lock(p_manager_mutex);
+        }
+        opj_event_msg(p_manager, EVT_WARNING, "We do not support more than 3 "
+                      "coding passes; This codeblocks has %d passes.\n",
+                      num_passes);
+        if (p_manager_mutex) {
+            opj_mutex_unlock(p_manager_mutex);
+        }
+        return OPJ_FALSE;
+    }
 
-        if (run < 0) // have we consumed all events in a run
-          run = mel_get_run(&mel); // if yes, then get another run
-      }
+    if (cblk->numbps == 1 && num_passes > 1) {
+        // We do not have enough precision to decode SgnProp nor MagRef passes.
+        // We decode the cleanup passes only
+        if (cannot_decode_spp_mrp_msg == OPJ_FALSE) {
+            if (p_manager_mutex) {
+                opj_mutex_lock(p_manager_mutex);
+            }
+            cannot_decode_spp_mrp_msg = OPJ_TRUE;
+            opj_event_msg(p_manager, EVT_WARNING, "Not enough precision to decode "
+                          "the SgnProp nor MagRef passes.  This message "
+                          "will not be displayed again.\n");
+            if (p_manager_mutex) {
+                opj_mutex_unlock(p_manager_mutex);
+            }
+        }
+        num_passes = 1;
+    }
+    if (cblk->numbps == 0) {
+        // We do not have enough precision to decode the CUP pass with the
+        // center of bin bit set.  The code can be modified to support this
+        // case, without using the center of the bin.
+        if (cannot_decode_due_to_insufficient_precision == OPJ_FALSE) {
+            if (p_manager_mutex) {
+                opj_mutex_lock(p_manager_mutex);
+            }
+            cannot_decode_due_to_insufficient_precision = OPJ_TRUE;
+            opj_event_msg(p_manager, EVT_WARNING, "Not enough precision to decode "
+                          "the cleanup pass. The code should be "
+                          "modified to support this case. This message "
+                          "will not be displayed again.\n");
+            if (p_manager_mutex) {
+                opj_mutex_unlock(p_manager_mutex);
+            }
+        }
+        return OPJ_TRUE;
+    }
 
-      //prepare context for the next quad, eqn. 1 in ITU T.814
-      c_q = ((qinf[1] & 0x10) >> 4) | ((qinf[1] & 0xE0) >> 5);
+    // OPJ_UINT32
+    p = cblk->numbps;
+    // OPJ_UINT32 zero planes plus 1
+    zero_planes_p1 = cblk->Mb - cblk->numbps + 1;
 
-      //remove data from vlc stream, if qinf is not used, cwdlen is 0
-      vlc_val = rev_advance(&vlc, qinf[1] & 0x7);
+    // read scup and fix the bytes there
+    lcup = (int)lengths1;  // length of CUP
+    //scup is the length of MEL + VLC
+    scup = (((int)coded_data[lcup - 1]) << 4) + (coded_data[lcup - 2] & 0xF);
+    if (scup < 2 || scup > lcup || scup > 4079) { //something is wrong
+        return OPJ_FALSE;
     }
 
-    //update sigma
-    // The update depends on the value of x; consider one OPJ_UINT32
-    // if x is 0, 8, 16 and so on, then this line update c locations
-    //      nibble (4 bits) number   0 1 2 3 4 5 6 7
-    //                         LSB   0 0 c c 0 0 0 0 
-    //                               0 0 c c 0 0 0 0
-    //                               0 0 0 0 0 0 0 0
-    //                               0 0 0 0 0 0 0 0
-    // if x is 4, 12, 20, then this line update locations c
-    //      nibble (4 bits) number   0 1 2 3 4 5 6 7
-    //                         LSB   0 0 0 0 0 0 c c 
-    //                               0 0 0 0 0 0 c c
-    //                               0 0 0 0 0 0 0 0
-    //                               0 0 0 0 0 0 0 0
-    *sip |= (((qinf[1] & 0x30) | ((qinf[1] & 0xC0)<<2))) << (4+sip_shift);
-
-    sip += x & 0x7 ? 1 : 0; // move sigma pointer to next entry
-    sip_shift ^= 0x10;      // increment/decrement sip_shift by 16
-
-    // retrieve u
-    /////////////
-
-    // uvlc_mode is made up of u_offset bits from the quad pair
-    uvlc_mode = ((qinf[0] & 0x8) >> 3) | ((qinf[1] & 0x8) >> 2);
-    if (uvlc_mode == 3)  // if both u_offset are set, get an event from
-    {                    // the MEL run of events
-      run -= 2; //subtract 2, since events number if multiplied by 2
-      uvlc_mode += (run == -1) ? 1 : 0; //increment uvlc_mode if event is 1
-      if (run < 0) // if run is consumed (run is -1 or -2), get another run
-        run = mel_get_run(&mel);
+    // init structures
+    mel_init(&mel, coded_data, lcup, scup);
+    rev_init(&vlc, coded_data, lcup, scup);
+    frwd_init(&magsgn, coded_data, lcup - scup, 0xFF);
+    if (num_passes > 1) { // needs to be tested
+        frwd_init(&sigprop, coded_data + lengths1, (int)lengths2, 0);
     }
-    //decode uvlc_mode to get u for both quads
-    consumed_bits = decode_init_uvlc(vlc_val, uvlc_mode, U_q);
-    if (U_q[0] > zero_planes_p1 || U_q[1] > zero_planes_p1)
-    {
-      if (p_manager_mutex)
-        opj_mutex_lock(p_manager_mutex);
-      opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. Decoding "
-                              "this codeblock is stopped.\n");
-      if (p_manager_mutex)
-        opj_mutex_unlock(p_manager_mutex);
-      return OPJ_FALSE;
+    if (num_passes > 2) {
+        rev_init_mrp(&magref, coded_data, (int)lengths1, (int)lengths2);
     }
 
-    //consume u bits in the VLC code
-    vlc_val = rev_advance(&vlc, consumed_bits);
+    /** State storage
+      *  One byte per quad; for 1024 columns, or 512 quads, we need
+      *  512 bytes. We are using 2 extra bytes one on the left and one on
+      *  the right for convenience.
+      *
+      *  The MSB bit in each byte is (\sigma^nw | \sigma^n), and the 7 LSBs
+      *  contain max(E^nw | E^n)
+      */
+
+    // 514 is enough for a block width of 1024, +2 extra
+    // here expanded to 528
+    line_state = (OPJ_UINT8 *)(mbr2 + 132);
+
+    //initial 2 lines
+    /////////////////
+    lsp = line_state;              // point to line state
+    lsp[0] = 0;                    // for initial row of quad, we set to 0
+    run = mel_get_run(&mel);    // decode runs of events from MEL bitstrm
+    // data represented as runs of 0 events
+    // See mel_decode description
+    qinf[0] = qinf[1] = 0;      // quad info decoded from VLC bitstream
+    c_q = 0;                    // context for quad q
+    sp = decoded_data;          // decoded codeblock samples
+    // vlc_val;                 // fetched data from VLC bitstream
+
+    for (x = 0; x < width; x += 4) { // one iteration per quad pair
+        OPJ_UINT32 U_q[2]; // u values for the quad pair
+        OPJ_UINT32 uvlc_mode;
+        OPJ_UINT32 consumed_bits;
+        OPJ_UINT32 m_n, v_n;
+        OPJ_UINT32 ms_val;
+        OPJ_UINT32 locs;
+
+        // decode VLC
+        /////////////
+
+        //first quad
+        // Get the head of the VLC bitstream. One fetch is enough for two
+        // quads, since the largest VLC code is 7 bits, and maximum number of
+        // bits used for u is 8.  Therefore for two quads we need 30 bits
+        // (if we include unstuffing, then 32 bits are enough, since we have
+        // a maximum of one stuffing per two bytes)
+        vlc_val = rev_fetch(&vlc);
+
+        //decode VLC using the context c_q and the head of the VLC bitstream
+        qinf[0] = vlc_tbl0[(c_q << 7) | (vlc_val & 0x7F) ];
+
+        if (c_q == 0) { // if zero context, we need to use one MEL event
+            run -= 2; //the number of 0 events is multiplied by 2, so subtract 2
+
+            // Is the run terminated in 1? if so, use decoded VLC code,
+            // otherwise, discard decoded data, since we will decoded again
+            // using a different context
+            qinf[0] = (run == -1) ? qinf[0] : 0;
+
+            // is run -1 or -2? this means a run has been consumed
+            if (run < 0) {
+                run = mel_get_run(&mel);    // get another run
+            }
+        }
 
-    //decode magsgn and update line_state
-    /////////////////////////////////////
+        // prepare context for the next quad; eqn. 1 in ITU T.814
+        c_q = ((qinf[0] & 0x10) >> 4) | ((qinf[0] & 0xE0) >> 5);
+
+        //remove data from vlc stream (0 bits are removed if qinf is not used)
+        vlc_val = rev_advance(&vlc, qinf[0] & 0x7);
+
+        //update sigma
+        // The update depends on the value of x; consider one OPJ_UINT32
+        // if x is 0, 8, 16 and so on, then this line update c locations
+        //      nibble (4 bits) number   0 1 2 3 4 5 6 7
+        //                         LSB   c c 0 0 0 0 0 0
+        //                               c c 0 0 0 0 0 0
+        //                               0 0 0 0 0 0 0 0
+        //                               0 0 0 0 0 0 0 0
+        // if x is 4, 12, 20, then this line update locations c
+        //      nibble (4 bits) number   0 1 2 3 4 5 6 7
+        //                         LSB   0 0 0 0 c c 0 0
+        //                               0 0 0 0 c c 0 0
+        //                               0 0 0 0 0 0 0 0
+        //                               0 0 0 0 0 0 0 0
+        *sip |= (((qinf[0] & 0x30) >> 4) | ((qinf[0] & 0xC0) >> 2)) << sip_shift;
+
+        //second quad
+        qinf[1] = 0;
+        if (x + 2 < width) { // do not run if codeblock is narrower
+            //decode VLC using the context c_q and the head of the VLC bitstream
+            qinf[1] = vlc_tbl0[(c_q << 7) | (vlc_val & 0x7F)];
+
+            // if context is zero, use one MEL event
+            if (c_q == 0) { //zero context
+                run -= 2; //subtract 2, since events number if multiplied by 2
+
+                // if event is 0, discard decoded qinf
+                qinf[1] = (run == -1) ? qinf[1] : 0;
+
+                if (run < 0) { // have we consumed all events in a run
+                    run = mel_get_run(&mel);    // if yes, then get another run
+                }
+            }
 
-    //We obtain a mask for the samples locations that needs evaluation
-    locs = 0xFF;
-    if (x + 4 > width) locs >>= (x + 4 - width) << 1; // limits width
-    locs = height > 1 ? locs : (locs & 0x55);         // limits height
+            //prepare context for the next quad, eqn. 1 in ITU T.814
+            c_q = ((qinf[1] & 0x10) >> 4) | ((qinf[1] & 0xE0) >> 5);
 
-    //first quad, starting at first sample in quad and moving on
-    if (qinf[0] & 0x10) //is it signifcant? (sigma_n)
-    {
-      OPJ_UINT32 val;
-
-      ms_val = frwd_fetch(&magsgn);         //get 32 bits of magsgn data
-      m_n = U_q[0] - ((qinf[0] >> 12) & 1); //evaluate m_n (number of bits
-                                  // to read from bitstream), using EMB e_k
-      frwd_advance(&magsgn, m_n);         //consume m_n
-      val = ms_val << 31;                 //get sign bit
-      v_n = ms_val & ((1U << m_n) - 1);   //keep only m_n bits
-      v_n |= ((qinf[0] & 0x100) >> 8) << m_n;  //add EMB e_1 as MSB
-      v_n |= 1;                                //add center of bin    
-      //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
-      //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
-      sp[0] = val | ((v_n + 2) << (p - 1)); 
-    }
-    else if (locs & 0x1) // if this is outside the codeblock, set the 
-      sp[0] = 0;         // sample to zero
+            //remove data from vlc stream, if qinf is not used, cwdlen is 0
+            vlc_val = rev_advance(&vlc, qinf[1] & 0x7);
+        }
 
-    if (qinf[0] & 0x20) //sigma_n
-    {
-      OPJ_UINT32 val, t;
-
-      ms_val = frwd_fetch(&magsgn);         //get 32 bits
-      m_n = U_q[0] - ((qinf[0] >> 13) & 1); //m_n, uses EMB e_k
-      frwd_advance(&magsgn, m_n);           //consume m_n
-      val = ms_val << 31;                   //get sign bit
-      v_n = ms_val & ((1U << m_n) - 1);     //keep only m_n bits
-      v_n |= ((qinf[0] & 0x200) >> 9) << m_n; //add EMB e_1
-      v_n |= 1;                               //bin center
-      //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
-      //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
-      sp[stride] = val | ((v_n + 2) << (p - 1)); 
-
-      //update line_state: bit 7 (\sigma^N), and E^N
-      t = lsp[0] & 0x7F;       // keep E^NW
-      v_n = 32 - count_leading_zeros(v_n); 
-      lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n)); //max(E^NW, E^N) | s
-    }
-    else if (locs & 0x2) // if this is outside the codeblock, set the 
-      sp[stride] = 0;    //no need to update line_state
+        //update sigma
+        // The update depends on the value of x; consider one OPJ_UINT32
+        // if x is 0, 8, 16 and so on, then this line update c locations
+        //      nibble (4 bits) number   0 1 2 3 4 5 6 7
+        //                         LSB   0 0 c c 0 0 0 0
+        //                               0 0 c c 0 0 0 0
+        //                               0 0 0 0 0 0 0 0
+        //                               0 0 0 0 0 0 0 0
+        // if x is 4, 12, 20, then this line update locations c
+        //      nibble (4 bits) number   0 1 2 3 4 5 6 7
+        //                         LSB   0 0 0 0 0 0 c c
+        //                               0 0 0 0 0 0 c c
+        //                               0 0 0 0 0 0 0 0
+        //                               0 0 0 0 0 0 0 0
+        *sip |= (((qinf[1] & 0x30) | ((qinf[1] & 0xC0) << 2))) << (4 + sip_shift);
+
+        sip += x & 0x7 ? 1 : 0; // move sigma pointer to next entry
+        sip_shift ^= 0x10;      // increment/decrement sip_shift by 16
+
+        // retrieve u
+        /////////////
+
+        // uvlc_mode is made up of u_offset bits from the quad pair
+        uvlc_mode = ((qinf[0] & 0x8) >> 3) | ((qinf[1] & 0x8) >> 2);
+        if (uvlc_mode == 3) { // if both u_offset are set, get an event from
+            // the MEL run of events
+            run -= 2; //subtract 2, since events number if multiplied by 2
+            uvlc_mode += (run == -1) ? 1 : 0; //increment uvlc_mode if event is 1
+            if (run < 0) { // if run is consumed (run is -1 or -2), get another run
+                run = mel_get_run(&mel);
+            }
+        }
+        //decode uvlc_mode to get u for both quads
+        consumed_bits = decode_init_uvlc(vlc_val, uvlc_mode, U_q);
+        if (U_q[0] > zero_planes_p1 || U_q[1] > zero_planes_p1) {
+            if (p_manager_mutex) {
+                opj_mutex_lock(p_manager_mutex);
+            }
+            opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. Decoding "
+                          "this codeblock is stopped.\n");
+            if (p_manager_mutex) {
+                opj_mutex_unlock(p_manager_mutex);
+            }
+            return OPJ_FALSE;
+        }
 
-    ++lsp; // move to next quad information
-    ++sp;  // move to next column of samples
+        //consume u bits in the VLC code
+        vlc_val = rev_advance(&vlc, consumed_bits);
 
-    //this is similar to the above two samples
-    if (qinf[0] & 0x40) 
-    {
-      OPJ_UINT32 val;
-
-      ms_val = frwd_fetch(&magsgn);
-      m_n = U_q[0] - ((qinf[0] >> 14) & 1); 
-      frwd_advance(&magsgn, m_n);
-      val = ms_val << 31;
-      v_n = ms_val & ((1U << m_n) - 1);
-      v_n |= (((qinf[0] & 0x400) >> 10) << m_n);
-      v_n |= 1; 
-      sp[0] = val | ((v_n + 2) << (p - 1));
-    }
-    else if (locs & 0x4)
-      sp[0] = 0;
+        //decode magsgn and update line_state
+        /////////////////////////////////////
 
-    lsp[0] = 0;
-    if (qinf[0] & 0x80) 
-    {
-      OPJ_UINT32 val;
-      ms_val = frwd_fetch(&magsgn);
-      m_n = U_q[0] - ((qinf[0] >> 15) & 1); //m_n
-      frwd_advance(&magsgn, m_n);
-      val = ms_val << 31;
-      v_n = ms_val & ((1U << m_n) - 1);
-      v_n |= ((qinf[0] & 0x800) >> 11) << m_n;
-      v_n |= 1; //center of bin
-      sp[stride] = val | ((v_n + 2) << (p - 1));
-
-      //line_state: bit 7 (\sigma^NW), and E^NW for next quad
-      lsp[0] = (OPJ_UINT8)(0x80 | (32 - count_leading_zeros(v_n)));
-    }
-    else if (locs & 0x8) //if outside set to 0
-      sp[stride] = 0;
+        //We obtain a mask for the samples locations that needs evaluation
+        locs = 0xFF;
+        if (x + 4 > width) {
+            locs >>= (x + 4 - width) << 1;    // limits width
+        }
+        locs = height > 1 ? locs : (locs & 0x55);         // limits height
+
+        //first quad, starting at first sample in quad and moving on
+        if (qinf[0] & 0x10) { //is it signifcant? (sigma_n)
+            OPJ_UINT32 val;
+
+            ms_val = frwd_fetch(&magsgn);         //get 32 bits of magsgn data
+            m_n = U_q[0] - ((qinf[0] >> 12) & 1); //evaluate m_n (number of bits
+            // to read from bitstream), using EMB e_k
+            frwd_advance(&magsgn, m_n);         //consume m_n
+            val = ms_val << 31;                 //get sign bit
+            v_n = ms_val & ((1U << m_n) - 1);   //keep only m_n bits
+            v_n |= ((qinf[0] & 0x100) >> 8) << m_n;  //add EMB e_1 as MSB
+            v_n |= 1;                                //add center of bin
+            //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
+            //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
+            sp[0] = val | ((v_n + 2) << (p - 1));
+        } else if (locs & 0x1) { // if this is outside the codeblock, set the
+            sp[0] = 0;    // sample to zero
+        }
 
-    ++sp; //move to next column
+        if (qinf[0] & 0x20) { //sigma_n
+            OPJ_UINT32 val, t;
+
+            ms_val = frwd_fetch(&magsgn);         //get 32 bits
+            m_n = U_q[0] - ((qinf[0] >> 13) & 1); //m_n, uses EMB e_k
+            frwd_advance(&magsgn, m_n);           //consume m_n
+            val = ms_val << 31;                   //get sign bit
+            v_n = ms_val & ((1U << m_n) - 1);     //keep only m_n bits
+            v_n |= ((qinf[0] & 0x200) >> 9) << m_n; //add EMB e_1
+            v_n |= 1;                               //bin center
+            //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
+            //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
+            sp[stride] = val | ((v_n + 2) << (p - 1));
+
+            //update line_state: bit 7 (\sigma^N), and E^N
+            t = lsp[0] & 0x7F;       // keep E^NW
+            v_n = 32 - count_leading_zeros(v_n);
+            lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n)); //max(E^NW, E^N) | s
+        } else if (locs & 0x2) { // if this is outside the codeblock, set the
+            sp[stride] = 0;    //no need to update line_state
+        }
 
-    //second quad
-    if (qinf[1] & 0x10) 
-    {
-      OPJ_UINT32 val;
-
-      ms_val = frwd_fetch(&magsgn);
-      m_n = U_q[1] - ((qinf[1] >> 12) & 1); //m_n
-      frwd_advance(&magsgn, m_n);
-      val = ms_val << 31;
-      v_n = ms_val & ((1U << m_n) - 1);
-      v_n |= (((qinf[1] & 0x100) >> 8) << m_n);
-      v_n |= 1;
-      sp[0] = val | ((v_n + 2) << (p - 1));
-    }
-    else if (locs & 0x10)
-      sp[0] = 0;
+        ++lsp; // move to next quad information
+        ++sp;  // move to next column of samples
+
+        //this is similar to the above two samples
+        if (qinf[0] & 0x40) {
+            OPJ_UINT32 val;
+
+            ms_val = frwd_fetch(&magsgn);
+            m_n = U_q[0] - ((qinf[0] >> 14) & 1);
+            frwd_advance(&magsgn, m_n);
+            val = ms_val << 31;
+            v_n = ms_val & ((1U << m_n) - 1);
+            v_n |= (((qinf[0] & 0x400) >> 10) << m_n);
+            v_n |= 1;
+            sp[0] = val | ((v_n + 2) << (p - 1));
+        } else if (locs & 0x4) {
+            sp[0] = 0;
+        }
 
-    if (qinf[1] & 0x20)
-    {
-      OPJ_UINT32 val, t;
-
-      ms_val = frwd_fetch(&magsgn);
-      m_n = U_q[1] - ((qinf[1] >> 13) & 1); //m_n
-      frwd_advance(&magsgn, m_n);
-      val = ms_val << 31;
-      v_n = ms_val & ((1U << m_n) - 1);
-      v_n |= (((qinf[1] & 0x200) >> 9) << m_n);
-      v_n |= 1;
-      sp[stride] = val | ((v_n + 2) << (p - 1));
-
-      //update line_state: bit 7 (\sigma^N), and E^N
-      t = lsp[0] & 0x7F;            //E^NW
-      v_n = 32 - count_leading_zeros(v_n);     //E^N
-      lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n)); //max(E^NW, E^N) | s
-    }
-    else if (locs & 0x20)
-      sp[stride] = 0;      //no need to update line_state
+        lsp[0] = 0;
+        if (qinf[0] & 0x80) {
+            OPJ_UINT32 val;
+            ms_val = frwd_fetch(&magsgn);
+            m_n = U_q[0] - ((qinf[0] >> 15) & 1); //m_n
+            frwd_advance(&magsgn, m_n);
+            val = ms_val << 31;
+            v_n = ms_val & ((1U << m_n) - 1);
+            v_n |= ((qinf[0] & 0x800) >> 11) << m_n;
+            v_n |= 1; //center of bin
+            sp[stride] = val | ((v_n + 2) << (p - 1));
+
+            //line_state: bit 7 (\sigma^NW), and E^NW for next quad
+            lsp[0] = (OPJ_UINT8)(0x80 | (32 - count_leading_zeros(v_n)));
+        } else if (locs & 0x8) { //if outside set to 0
+            sp[stride] = 0;
+        }
 
-    ++lsp; //move line state to next quad
-    ++sp;  //move to next sample
+        ++sp; //move to next column
+
+        //second quad
+        if (qinf[1] & 0x10) {
+            OPJ_UINT32 val;
+
+            ms_val = frwd_fetch(&magsgn);
+            m_n = U_q[1] - ((qinf[1] >> 12) & 1); //m_n
+            frwd_advance(&magsgn, m_n);
+            val = ms_val << 31;
+            v_n = ms_val & ((1U << m_n) - 1);
+            v_n |= (((qinf[1] & 0x100) >> 8) << m_n);
+            v_n |= 1;
+            sp[0] = val | ((v_n + 2) << (p - 1));
+        } else if (locs & 0x10) {
+            sp[0] = 0;
+        }
 
-    if (qinf[1] & 0x40)
-    {
-      OPJ_UINT32 val;
-
-      ms_val = frwd_fetch(&magsgn);
-      m_n = U_q[1] - ((qinf[1] >> 14) & 1); //m_n
-      frwd_advance(&magsgn, m_n);
-      val = ms_val << 31;
-      v_n = ms_val & ((1U << m_n) - 1);
-      v_n |= (((qinf[1] & 0x400) >> 10) << m_n);
-      v_n |= 1;
-      sp[0] = val | ((v_n + 2) << (p - 1));
-    }
-    else if (locs & 0x40)
-      sp[0] = 0;
+        if (qinf[1] & 0x20) {
+            OPJ_UINT32 val, t;
+
+            ms_val = frwd_fetch(&magsgn);
+            m_n = U_q[1] - ((qinf[1] >> 13) & 1); //m_n
+            frwd_advance(&magsgn, m_n);
+            val = ms_val << 31;
+            v_n = ms_val & ((1U << m_n) - 1);
+            v_n |= (((qinf[1] & 0x200) >> 9) << m_n);
+            v_n |= 1;
+            sp[stride] = val | ((v_n + 2) << (p - 1));
+
+            //update line_state: bit 7 (\sigma^N), and E^N
+            t = lsp[0] & 0x7F;            //E^NW
+            v_n = 32 - count_leading_zeros(v_n);     //E^N
+            lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n)); //max(E^NW, E^N) | s
+        } else if (locs & 0x20) {
+            sp[stride] = 0;    //no need to update line_state
+        }
 
-    lsp[0] = 0;
-    if (qinf[1] & 0x80)
-    {
-      OPJ_UINT32 val;
-
-      ms_val = frwd_fetch(&magsgn);
-      m_n = U_q[1] - ((qinf[1] >> 15) & 1); //m_n
-      frwd_advance(&magsgn, m_n);
-      val = ms_val << 31;
-      v_n = ms_val & ((1U << m_n) - 1);
-      v_n |= (((qinf[1] & 0x800) >> 11) << m_n);
-      v_n |= 1; //center of bin
-      sp[stride] = val | ((v_n + 2) << (p - 1));
-
-      //line_state: bit 7 (\sigma^NW), and E^NW for next quad
-      lsp[0] = (OPJ_UINT8)(0x80 | (32 - count_leading_zeros(v_n)));
-    }
-    else if (locs & 0x80)
-      sp[stride] = 0;
-
-    ++sp;
-  }
-
-  //non-initial lines
-  //////////////////////////
-  for (OPJ_INT32 y = 2; y < height; /*done at the end of loop*/)
-  {
-    OPJ_UINT32 *sip;
-    OPJ_UINT8 ls0;
-
-    sip_shift ^= 0x2;  // shift sigma to the upper half od the nibble
-    sip_shift &= 0xFFFFFFEFU; //move back to 0 (it might have been at 0x10)
-    sip = y & 0x4 ? sigma2 : sigma1; //choose sigma array
-
-    lsp = line_state;
-    ls0 = lsp[0];                   // read the line state value
-    lsp[0] = 0;                     // and set it to zero
-    sp = decoded_data + y * stride; // generated samples
-    c_q = 0;                        // context
-    for (OPJ_INT32 x = 0; x < width; x += 4)
-    {
-      OPJ_UINT32 U_q[2];
-      OPJ_UINT32 uvlc_mode, consumed_bits;
-      OPJ_UINT32 m_n, v_n;
-      OPJ_UINT32 ms_val;
-      OPJ_UINT32 locs;
-
-      // decode vlc
-      /////////////
-
-      //first quad
-      // get context, eqn. 2 ITU T.814
-      // c_q has \sigma^W | \sigma^SW
-      c_q |= (ls0 >> 7);          //\sigma^NW | \sigma^N
-      c_q |= (lsp[1] >> 5) & 0x4; //\sigma^NE | \sigma^NF
-
-      //the following is very similar to previous code, so please refer to 
-      // that
-      vlc_val = rev_fetch(&vlc);
-      qinf[0] = vlc_tbl1[(c_q << 7) | (vlc_val & 0x7F)];
-      if (c_q == 0) //zero context
-      {
-        run -= 2;
-        qinf[0] = (run == -1) ? qinf[0] : 0;
-        if (run < 0)
-          run = mel_get_run(&mel);
-      }
-      //prepare context for the next quad, \sigma^W | \sigma^SW
-      c_q = ((qinf[0] & 0x40) >> 5) | ((qinf[0] & 0x80) >> 6);
-
-      //remove data from vlc stream
-      vlc_val = rev_advance(&vlc, qinf[0] & 0x7);
-
-      //update sigma
-      // The update depends on the value of x and y; consider one OPJ_UINT32
-      // if x is 0, 8, 16 and so on, and y is 2, 6, etc., then this 
-      // line update c locations
-      //      nibble (4 bits) number   0 1 2 3 4 5 6 7
-      //                         LSB   0 0 0 0 0 0 0 0 
-      //                               0 0 0 0 0 0 0 0
-      //                               c c 0 0 0 0 0 0
-      //                               c c 0 0 0 0 0 0
-      *sip |= (((qinf[0]&0x30) >> 4) | ((qinf[0]&0xC0) >> 2)) << sip_shift;
-
-      //second quad
-      qinf[1] = 0;
-      if (x + 2 < width)
-      {
-        c_q |= (lsp[1] >> 7);
-        c_q |= (lsp[2] >> 5) & 0x4;
-        qinf[1] = vlc_tbl1[(c_q << 7) | (vlc_val & 0x7F)];
-        if (c_q == 0) //zero context
-        {
-          run -= 2;
-          qinf[1] = (run == -1) ? qinf[1] : 0;
-          if (run < 0)
-            run = mel_get_run(&mel);
+        ++lsp; //move line state to next quad
+        ++sp;  //move to next sample
+
+        if (qinf[1] & 0x40) {
+            OPJ_UINT32 val;
+
+            ms_val = frwd_fetch(&magsgn);
+            m_n = U_q[1] - ((qinf[1] >> 14) & 1); //m_n
+            frwd_advance(&magsgn, m_n);
+            val = ms_val << 31;
+            v_n = ms_val & ((1U << m_n) - 1);
+            v_n |= (((qinf[1] & 0x400) >> 10) << m_n);
+            v_n |= 1;
+            sp[0] = val | ((v_n + 2) << (p - 1));
+        } else if (locs & 0x40) {
+            sp[0] = 0;
         }
-        //prepare context for the next quad
-        c_q = ((qinf[1] & 0x40) >> 5) | ((qinf[1] & 0x80) >> 6);
-        //remove data from vlc stream
-        vlc_val = rev_advance(&vlc, qinf[1] & 0x7);
-      }
-
-      //update sigma
-      *sip |= (((qinf[1]&0x30) | ((qinf[1]&0xC0) << 2))) << (4+sip_shift);
-
-      sip += x & 0x7 ? 1 : 0;
-      sip_shift ^= 0x10;
-
-      //retrieve u
-      ////////////
-      uvlc_mode = ((qinf[0] & 0x8) >> 3) | ((qinf[1] & 0x8) >> 2);
-      consumed_bits = decode_noninit_uvlc(vlc_val, uvlc_mode, U_q);
-      vlc_val = rev_advance(&vlc, consumed_bits);
-
-      //calculate E^max and add it to U_q, eqns 5 and 6 in ITU T.814
-      if ((qinf[0] & 0xF0) & ((qinf[0] & 0xF0) - 1)) // is \gamma_q 1?
-      {
-        OPJ_UINT32 E = (ls0 & 0x7Fu);
-        E = E > (lsp[1] & 0x7Fu) ? E : (lsp[1]&0x7Fu); //max(E, E^NE, E^NF)
-        //since U_q alread has u_q + 1, we subtract 2 instead of 1
-        U_q[0] += E > 2 ? E - 2 : 0;
-      }
-
-      if ((qinf[1] & 0xF0) & ((qinf[1] & 0xF0) - 1)) //is \gamma_q 1? 
-      {
-        OPJ_UINT32 E = (lsp[1] & 0x7Fu);
-        E = E > (lsp[2] & 0x7Fu) ? E : (lsp[2]&0x7Fu); //max(E, E^NE, E^NF)
-        //since U_q alread has u_q + 1, we subtract 2 instead of 1
-        U_q[1] += E > 2 ? E - 2 : 0;
-      }
-
-      if (U_q[0] > zero_planes_p1 || U_q[1] > zero_planes_p1)
-      {
-        if (p_manager_mutex)
-          opj_mutex_lock(p_manager_mutex);
-        opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. "
-                                "Decoding this codeblock is stopped.\n");
-        if (p_manager_mutex)
-          opj_mutex_unlock(p_manager_mutex);
-        return OPJ_FALSE;
-      }
-
-      ls0 = lsp[2]; //for next double quad
-      lsp[1] = lsp[2] = 0;
-
-      //decode magsgn and update line_state
-      /////////////////////////////////////
-
-      //locations where samples need update
-      locs = 0xFF;
-      if (x + 4 > width) locs >>= (x + 4 - width) << 1;
-      locs = height > 1 ? locs : (locs & 0x55);
-
-
-      if (qinf[0] & 0x10) //sigma_n
-      {
-        OPJ_UINT32 val;
-
-        ms_val = frwd_fetch(&magsgn);
-        m_n = U_q[0] - ((qinf[0] >> 12) & 1); //m_n
-        frwd_advance(&magsgn, m_n);
-        val = ms_val << 31;
-        v_n = ms_val & ((1U << m_n) - 1);
-        v_n |= ((qinf[0] & 0x100) >> 8) << m_n;
-        v_n |= 1; //center of bin
-        sp[0] = val | ((v_n + 2) << (p - 1));
-      }
-      else if (locs & 0x1)
-        sp[0] = 0;
-
-      if (qinf[0] & 0x20) //sigma_n
-      {
-        OPJ_UINT32 val, t;
-
-        ms_val = frwd_fetch(&magsgn);
-        m_n = U_q[0] - ((qinf[0] >> 13) & 1); //m_n
-        frwd_advance(&magsgn, m_n);
-        val = ms_val << 31;
-        v_n = ms_val & ((1U << m_n) - 1);
-        v_n |= ((qinf[0] & 0x200) >> 9) << m_n;
-        v_n |= 1; //center of bin
-        sp[stride] = val | ((v_n + 2) << (p - 1));
-
-        //update line_state: bit 7 (\sigma^N), and E^N
-        t = lsp[0] & 0x7F;          //E^NW
-        v_n = 32 - count_leading_zeros(v_n); 
-        lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n));
-      }
-      else if (locs & 0x2)
-        sp[stride] = 0; //no need to update line_state
-
-      ++lsp;
-      ++sp;
-
-      if (qinf[0] & 0x40) //sigma_n
-      {
-        OPJ_UINT32 val;
-
-        ms_val = frwd_fetch(&magsgn);
-        m_n = U_q[0] - ((qinf[0] >> 14) & 1); //m_n
-        frwd_advance(&magsgn, m_n);
-        val = ms_val << 31;
-        v_n = ms_val & ((1U << m_n) - 1);
-        v_n |= (((qinf[0] & 0x400) >> 10) << m_n);
-        v_n |= 1;                            //center of bin
-        sp[0] = val | ((v_n + 2) << (p - 1));
-      }
-      else if (locs & 0x4)
-        sp[0] = 0;
-
-      if (qinf[0] & 0x80) //sigma_n
-      {
-        OPJ_UINT32 val;
-
-        ms_val = frwd_fetch(&magsgn);
-        m_n = U_q[0] - ((qinf[0] >> 15) & 1); //m_n
-        frwd_advance(&magsgn, m_n);
-        val = ms_val << 31;
-        v_n = ms_val & ((1U << m_n) - 1);
-        v_n |= ((qinf[0] & 0x800) >> 11) << m_n;
-        v_n |= 1; //center of bin
-        sp[stride] = val | ((v_n + 2) << (p - 1));
-
-        //update line_state: bit 7 (\sigma^NW), and E^NW for next quad
-        lsp[0] = (OPJ_UINT8)(0x80 | (32 - count_leading_zeros(v_n)));
-      }
-      else if (locs & 0x8)
-        sp[stride] = 0;
-
-      ++sp;
-
-      if (qinf[1] & 0x10) //sigma_n
-      {
-        OPJ_UINT32 val;
-
-        ms_val = frwd_fetch(&magsgn);
-        m_n = U_q[1] - ((qinf[1] >> 12) & 1); //m_n
-        frwd_advance(&magsgn, m_n);
-        val = ms_val << 31;
-        v_n = ms_val & ((1U << m_n) - 1);
-        v_n |= (((qinf[1] & 0x100) >> 8) << m_n);
-        v_n |= 1;                            //center of bin
-        sp[0] = val | ((v_n + 2) << (p - 1));
-      }
-      else if (locs & 0x10)
-        sp[0] = 0;
-
-      if (qinf[1] & 0x20) //sigma_n
-      {
-        OPJ_UINT32 val, t;
-
-        ms_val = frwd_fetch(&magsgn);
-        m_n = U_q[1] - ((qinf[1] >> 13) & 1); //m_n
-        frwd_advance(&magsgn, m_n);
-        val = ms_val << 31;
-        v_n = ms_val & ((1U << m_n) - 1);
-        v_n |= (((qinf[1] & 0x200) >> 9) << m_n);
-        v_n |= 1; //center of bin
-        sp[stride] = val | ((v_n + 2) << (p - 1));
-
-        //update line_state: bit 7 (\sigma^N), and E^N
-        t = lsp[0] & 0x7F;          //E^NW
-        v_n = 32 - count_leading_zeros(v_n); 
-        lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n));
-      }
-      else if (locs & 0x20)
-        sp[stride] = 0; //no need to update line_state
-
-      ++lsp;
-      ++sp;
-
-      if (qinf[1] & 0x40) //sigma_n
-      {
-        OPJ_UINT32 val;
-
-        ms_val = frwd_fetch(&magsgn);
-        m_n = U_q[1] - ((qinf[1] >> 14) & 1); //m_n
-        frwd_advance(&magsgn, m_n);
-        val = ms_val << 31;
-        v_n = ms_val & ((1U << m_n) - 1);
-        v_n |= (((qinf[1] & 0x400) >> 10) << m_n);
-        v_n |= 1;                            //center of bin
-        sp[0] = val | ((v_n + 2) << (p - 1));
-      }
-      else if (locs & 0x40)
-        sp[0] = 0;
-
-      if (qinf[1] & 0x80) //sigma_n
-      {
-        OPJ_UINT32 val;
-
-        ms_val = frwd_fetch(&magsgn);
-        m_n = U_q[1] - ((qinf[1] >> 15) & 1); //m_n
-        frwd_advance(&magsgn, m_n);
-        val = ms_val << 31;
-        v_n = ms_val & ((1U << m_n) - 1);
-        v_n |= (((qinf[1] & 0x800) >> 11) << m_n);
-        v_n |= 1; //center of bin
-        sp[stride] = val | ((v_n + 2) << (p - 1));
-
-        //update line_state: bit 7 (\sigma^NW), and E^NW for next quad
-        lsp[0] = (OPJ_UINT8)(0x80 | (32 - count_leading_zeros(v_n)));
-      }
-      else if (locs & 0x80)
-        sp[stride] = 0;
-
-      ++sp;
+
+        lsp[0] = 0;
+        if (qinf[1] & 0x80) {
+            OPJ_UINT32 val;
+
+            ms_val = frwd_fetch(&magsgn);
+            m_n = U_q[1] - ((qinf[1] >> 15) & 1); //m_n
+            frwd_advance(&magsgn, m_n);
+            val = ms_val << 31;
+            v_n = ms_val & ((1U << m_n) - 1);
+            v_n |= (((qinf[1] & 0x800) >> 11) << m_n);
+            v_n |= 1; //center of bin
+            sp[stride] = val | ((v_n + 2) << (p - 1));
+
+            //line_state: bit 7 (\sigma^NW), and E^NW for next quad
+            lsp[0] = (OPJ_UINT8)(0x80 | (32 - count_leading_zeros(v_n)));
+        } else if (locs & 0x80) {
+            sp[stride] = 0;
+        }
+
+        ++sp;
     }
 
-    y += 2;
-    if (num_passes > 1 && (y & 3) == 0) //executed at multiples of 4
-    { // This is for SPP and potentially MRP
-
-      if (num_passes > 2) //do MRP
-      {
-        // select the current stripe
-        OPJ_UINT32 *cur_sig = y & 0x4 ? sigma1 : sigma2;
-        // the address of the data that needs updating
-        OPJ_UINT32 *dpp = decoded_data + (y - 4) * stride;
-        OPJ_UINT32 half = 1u << (p - 2); // half the center of the bin
-        for (OPJ_INT32 i = 0; i < width; i += 8)
-        {
-          //Process one entry from sigma array at a time
-          // Each nibble (4 bits) in the sigma array represents 4 rows,
-          // and the 32 bits contain 8 columns
-          OPJ_UINT32 cwd = rev_fetch_mrp(&magref); // get 32 bit data
-          OPJ_UINT32 sig = *cur_sig++; // 32 bit that will be processed now
-          OPJ_UINT32 col_mask = 0xFu;  // a mask for a column in sig
-          OPJ_UINT32 *dp = dpp + i;    // next column in decode samples
-          if (sig) // if any of the 32 bits are set
-          {
-            for (int j = 0; j < 8; ++j, dp++) //one column at a time
-            {
-              if (sig & col_mask) // lowest nibble
-              {
-                OPJ_UINT32 sample_mask = 0x11111111u & col_mask; //LSB
-
-                if (sig & sample_mask) //if LSB is set
-                {
-                  OPJ_UINT32 sym;
-
-                  assert(dp[0] != 0); // decoded value cannot be zero
-                  sym = cwd & 1; // get it value
-                  // remove center of bin if sym is 0
-                  dp[0] ^= (1 - sym) << (p - 1);
-                  dp[0] |= half;      // put half the center of bin
-                  cwd >>= 1;          //consume word
+    //non-initial lines
+    //////////////////////////
+    for (y = 2; y < height; /*done at the end of loop*/) {
+        OPJ_UINT32 *sip;
+        OPJ_UINT8 ls0;
+        OPJ_INT32 x;
+
+        sip_shift ^= 0x2;  // shift sigma to the upper half od the nibble
+        sip_shift &= 0xFFFFFFEFU; //move back to 0 (it might have been at 0x10)
+        sip = y & 0x4 ? sigma2 : sigma1; //choose sigma array
+
+        lsp = line_state;
+        ls0 = lsp[0];                   // read the line state value
+        lsp[0] = 0;                     // and set it to zero
+        sp = decoded_data + y * stride; // generated samples
+        c_q = 0;                        // context
+        for (x = 0; x < width; x += 4) {
+            OPJ_UINT32 U_q[2];
+            OPJ_UINT32 uvlc_mode, consumed_bits;
+            OPJ_UINT32 m_n, v_n;
+            OPJ_UINT32 ms_val;
+            OPJ_UINT32 locs;
+
+            // decode vlc
+            /////////////
+
+            //first quad
+            // get context, eqn. 2 ITU T.814
+            // c_q has \sigma^W | \sigma^SW
+            c_q |= (ls0 >> 7);          //\sigma^NW | \sigma^N
+            c_q |= (lsp[1] >> 5) & 0x4; //\sigma^NE | \sigma^NF
+
+            //the following is very similar to previous code, so please refer to
+            // that
+            vlc_val = rev_fetch(&vlc);
+            qinf[0] = vlc_tbl1[(c_q << 7) | (vlc_val & 0x7F)];
+            if (c_q == 0) { //zero context
+                run -= 2;
+                qinf[0] = (run == -1) ? qinf[0] : 0;
+                if (run < 0) {
+                    run = mel_get_run(&mel);
+                }
+            }
+            //prepare context for the next quad, \sigma^W | \sigma^SW
+            c_q = ((qinf[0] & 0x40) >> 5) | ((qinf[0] & 0x80) >> 6);
+
+            //remove data from vlc stream
+            vlc_val = rev_advance(&vlc, qinf[0] & 0x7);
+
+            //update sigma
+            // The update depends on the value of x and y; consider one OPJ_UINT32
+            // if x is 0, 8, 16 and so on, and y is 2, 6, etc., then this
+            // line update c locations
+            //      nibble (4 bits) number   0 1 2 3 4 5 6 7
+            //                         LSB   0 0 0 0 0 0 0 0
+            //                               0 0 0 0 0 0 0 0
+            //                               c c 0 0 0 0 0 0
+            //                               c c 0 0 0 0 0 0
+            *sip |= (((qinf[0] & 0x30) >> 4) | ((qinf[0] & 0xC0) >> 2)) << sip_shift;
+
+            //second quad
+            qinf[1] = 0;
+            if (x + 2 < width) {
+                c_q |= (lsp[1] >> 7);
+                c_q |= (lsp[2] >> 5) & 0x4;
+                qinf[1] = vlc_tbl1[(c_q << 7) | (vlc_val & 0x7F)];
+                if (c_q == 0) { //zero context
+                    run -= 2;
+                    qinf[1] = (run == -1) ? qinf[1] : 0;
+                    if (run < 0) {
+                        run = mel_get_run(&mel);
+                    }
                 }
-                sample_mask += sample_mask; //next row
+                //prepare context for the next quad
+                c_q = ((qinf[1] & 0x40) >> 5) | ((qinf[1] & 0x80) >> 6);
+                //remove data from vlc stream
+                vlc_val = rev_advance(&vlc, qinf[1] & 0x7);
+            }
 
-                if (sig & sample_mask)
-                {
-                  OPJ_UINT32 sym;
+            //update sigma
+            *sip |= (((qinf[1] & 0x30) | ((qinf[1] & 0xC0) << 2))) << (4 + sip_shift);
 
-                  assert(dp[stride] != 0);
-                  sym = cwd & 1;
-                  dp[stride] ^= (1 - sym) << (p - 1);
-                  dp[stride] |= half;
-                  cwd >>= 1;
-                }
-                sample_mask += sample_mask;
+            sip += x & 0x7 ? 1 : 0;
+            sip_shift ^= 0x10;
 
-                if (sig & sample_mask)
-                {
-                  OPJ_UINT32 sym;
+            //retrieve u
+            ////////////
+            uvlc_mode = ((qinf[0] & 0x8) >> 3) | ((qinf[1] & 0x8) >> 2);
+            consumed_bits = decode_noninit_uvlc(vlc_val, uvlc_mode, U_q);
+            vlc_val = rev_advance(&vlc, consumed_bits);
 
-                  assert(dp[2 * stride] != 0);
-                  sym = cwd & 1;
-                  dp[2 * stride] ^= (1 - sym) << (p - 1);
-                  dp[2 * stride] |= half;
-                  cwd >>= 1;
-                }
-                sample_mask += sample_mask;
+            //calculate E^max and add it to U_q, eqns 5 and 6 in ITU T.814
+            if ((qinf[0] & 0xF0) & ((qinf[0] & 0xF0) - 1)) { // is \gamma_q 1?
+                OPJ_UINT32 E = (ls0 & 0x7Fu);
+                E = E > (lsp[1] & 0x7Fu) ? E : (lsp[1] & 0x7Fu); //max(E, E^NE, E^NF)
+                //since U_q alread has u_q + 1, we subtract 2 instead of 1
+                U_q[0] += E > 2 ? E - 2 : 0;
+            }
 
-                if (sig & sample_mask)
-                {
-                  OPJ_UINT32 sym;
+            if ((qinf[1] & 0xF0) & ((qinf[1] & 0xF0) - 1)) { //is \gamma_q 1?
+                OPJ_UINT32 E = (lsp[1] & 0x7Fu);
+                E = E > (lsp[2] & 0x7Fu) ? E : (lsp[2] & 0x7Fu); //max(E, E^NE, E^NF)
+                //since U_q alread has u_q + 1, we subtract 2 instead of 1
+                U_q[1] += E > 2 ? E - 2 : 0;
+            }
 
-                  assert(dp[3 * stride] != 0);
-                  sym = cwd & 1;
-                  dp[3 * stride] ^= (1 - sym) << (p - 1);
-                  dp[3 * stride] |= half;
-                  cwd >>= 1;
+            if (U_q[0] > zero_planes_p1 || U_q[1] > zero_planes_p1) {
+                if (p_manager_mutex) {
+                    opj_mutex_lock(p_manager_mutex);
                 }
-                sample_mask += sample_mask;
-              }
-              col_mask <<= 4; //next column
+                opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. "
+                              "Decoding this codeblock is stopped.\n");
+                if (p_manager_mutex) {
+                    opj_mutex_unlock(p_manager_mutex);
+                }
+                return OPJ_FALSE;
             }
-          }
-          // consume data according to the number of bits set
-          rev_advance_mrp(&magref, population_count(sig)); 
-        }
-      }
-
-      if (y >= 4) // update mbr array at the end of each stripe
-      {
-        //generate mbr corresponding to a stripe
-        OPJ_UINT32 *sig = y & 0x4 ? sigma1 : sigma2;
-        OPJ_UINT32 *mbr = y & 0x4 ? mbr1 : mbr2;
-
-        //data is processed in patches of 8 columns, each 
-        // each 32 bits in sigma1 or mbr1 represent 4 rows
-
-        //integrate horizontally
-        OPJ_UINT32 prev = 0; // previous columns
-        for (OPJ_INT32 i = 0; i < width; i += 8, mbr++, sig++)
-        {
-          OPJ_UINT32 t, z;
-
-          mbr[0] = sig[0];         //start with significant samples
-          mbr[0] |= prev >> 28;    //for first column, left neighbors
-          mbr[0] |= sig[0] << 4;   //left neighbors
-          mbr[0] |= sig[0] >> 4;   //right neighbors
-          mbr[0] |= sig[1] << 28;  //for last column, right neighbors
-          prev = sig[0];           // for next group of columns
-
-          //integrate vertically
-          t = mbr[0], z = mbr[0];
-          z |= (t & 0x77777777) << 1; //above neighbors
-          z |= (t & 0xEEEEEEEE) >> 1; //below neighbors
-          mbr[0] = z & ~sig[0]; //remove already significance samples
-        }
-      }
-
-      if (y >= 8) //wait until 8 rows has been processed
-      {
-        OPJ_UINT32 *cur_sig, *cur_mbr, *nxt_sig, *nxt_mbr;
-        OPJ_UINT32 prev;
-        OPJ_UINT32 val;
-
-        // add membership from the next stripe, obtained above
-        cur_sig = y & 0x4 ? sigma2 : sigma1;
-        cur_mbr = y & 0x4 ? mbr2 : mbr1;
-        nxt_sig = y & 0x4 ? sigma1 : sigma2;  //future samples
-        prev = 0; // the columns before these group of 8 columns
-        for (OPJ_INT32 i=0; i < width; i+=8, cur_mbr++, cur_sig++, nxt_sig++)
-        {
-          OPJ_UINT32 t = nxt_sig[0];
-          t |= prev >> 28;        //for first column, left neighbors
-          t |= nxt_sig[0] << 4;   //left neighbors
-          t |= nxt_sig[0] >> 4;   //right neighbors
-          t |= nxt_sig[1] << 28;  //for last column, right neighbors
-          prev = nxt_sig[0];      // for next group of columns
-
-          cur_mbr[0] |= (t & 0x11111111u) << 3; //propagate up to cur_mbr
-          cur_mbr[0] &= ~cur_sig[0]; //remove already significance samples
-        }
 
-        //find new locations and get signs
-        cur_sig = y & 0x4 ? sigma2 : sigma1;  
-        cur_mbr = y & 0x4 ? mbr2 : mbr1;
-        nxt_sig = y & 0x4 ? sigma1 : sigma2; //future samples
-        nxt_mbr = y & 0x4 ? mbr1 : mbr2;     //future samples
-        val = 3u << (p - 2); // sample values for newly discovered 
-                             // signficant samples including the bin center
-        for (OPJ_INT32 i = 0; i < width;
-              i += 8, cur_sig++, cur_mbr++, nxt_sig++, nxt_mbr++)
-        {
-          OPJ_UINT32 ux, tx;
-          OPJ_UINT32 mbr = *cur_mbr;
-          OPJ_UINT32 new_sig = 0;
-          if (mbr)  //are there any samples that migt be signficant 
-          {
-            for (OPJ_INT32 n = 0; n < 8; n += 4)
-            {
-              OPJ_UINT32 col_mask;
-              OPJ_UINT32 inv_sig;
-              OPJ_INT32 end;
-
-              OPJ_UINT32 cwd = frwd_fetch(&sigprop); //get 32 bits
-              OPJ_UINT32 cnt = 0;
-
-              OPJ_UINT32 *dp = decoded_data + (y - 8) * stride;
-              dp += i + n; //address for decoded samples
-
-              col_mask = 0xFu << (4 * n); //a mask to select a column
-
-              inv_sig = ~cur_sig[0]; // insignificant samples
-
-              //find the last sample we operate on
-              end = n + 4 + i < width ? n + 4 : width - i;
-
-              for (OPJ_INT32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
-              {
-                OPJ_UINT32 sample_mask;
-
-                if ((col_mask & mbr) == 0) //no samples need checking
-                  continue;
-
-                //scan mbr to find a new signficant sample
-                sample_mask = 0x11111111u & col_mask; // LSB
-                if (mbr & sample_mask)
-                {
-                  assert(dp[0] == 0); // the sample must have been 0
-                  if (cwd & 1) //if this sample has become significant
-                  { // must propagate it to nearby samples
-                    OPJ_UINT32 t;
-                    new_sig |= sample_mask;  // new significant samples
-                    t = 0x32u << (j * 4);// propagation to neighbors
-                    mbr |= t & inv_sig; //remove already signifcant samples
-                  }
-                  cwd >>= 1; ++cnt; //consume bit and increment number of
-                                    //consumed bits
-                }
+            ls0 = lsp[2]; //for next double quad
+            lsp[1] = lsp[2] = 0;
 
-                sample_mask += sample_mask;  // next row
-                if (mbr & sample_mask)
-                {
-                  assert(dp[stride] == 0);
-                  if (cwd & 1)
-                  {
-                    OPJ_UINT32 t;
-                    new_sig |= sample_mask;
-                    t = 0x74u << (j * 4);
-                    mbr |= t & inv_sig;
-                  }
-                  cwd >>= 1; ++cnt;
-                }
+            //decode magsgn and update line_state
+            /////////////////////////////////////
 
-                sample_mask += sample_mask;
-                if (mbr & sample_mask)
-                {
-                  assert(dp[2 * stride] == 0);
-                  if (cwd & 1)
-                  {
-                    OPJ_UINT32 t;
-                    new_sig |= sample_mask;
-                    t = 0xE8u << (j * 4);
-                    mbr |= t & inv_sig;
-                  }
-                  cwd >>= 1; ++cnt;
-                }
+            //locations where samples need update
+            locs = 0xFF;
+            if (x + 4 > width) {
+                locs >>= (x + 4 - width) << 1;
+            }
+            locs = height > 1 ? locs : (locs & 0x55);
+
+
+            if (qinf[0] & 0x10) { //sigma_n
+                OPJ_UINT32 val;
+
+                ms_val = frwd_fetch(&magsgn);
+                m_n = U_q[0] - ((qinf[0] >> 12) & 1); //m_n
+                frwd_advance(&magsgn, m_n);
+                val = ms_val << 31;
+                v_n = ms_val & ((1U << m_n) - 1);
+                v_n |= ((qinf[0] & 0x100) >> 8) << m_n;
+                v_n |= 1; //center of bin
+                sp[0] = val | ((v_n + 2) << (p - 1));
+            } else if (locs & 0x1) {
+                sp[0] = 0;
+            }
 
-                sample_mask += sample_mask;
-                if (mbr & sample_mask)
-                {
-                  assert(dp[3 * stride] == 0);
-                  if (cwd & 1)
-                  {
-                    OPJ_UINT32 t;
-                    new_sig |= sample_mask;
-                    t = 0xC0u << (j * 4);
-                    mbr |= t & inv_sig;
-                  }
-                  cwd >>= 1; ++cnt;
-                }
-              }
-
-              //obtain signs here
-              if (new_sig & (0xFFFFu << (4 * n))) //if any
-              {
-                OPJ_UINT32 col_mask;
-                OPJ_UINT32 *dp = decoded_data + (y - 8) * stride;
-                dp += i + n; // decoded samples address
-                col_mask = 0xFu << (4 * n); //mask to select a column
-
-                for (OPJ_INT32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
-                {
-                  OPJ_UINT32 sample_mask;
-
-                  if ((col_mask & new_sig) == 0) //if non is signficant
-                    continue;
-
-                  //scan 4 signs
-                  sample_mask = 0x11111111u & col_mask;
-                  if (new_sig & sample_mask)
-                  {
-                    assert(dp[0] == 0);
-                    dp[0] |= ((cwd & 1) << 31) | val; //put value and sign
-                    cwd >>= 1; ++cnt; //consume bit and increment number
-                                      //of consumed bits
-                  }
-
-                  sample_mask += sample_mask;
-                  if (new_sig & sample_mask)
-                  {
-                    assert(dp[stride] == 0);
-                    dp[stride] |= ((cwd & 1) << 31) | val;
-                    cwd >>= 1; ++cnt;
-                  }
-
-                  sample_mask += sample_mask;
-                  if (new_sig & sample_mask)
-                  {
-                    assert(dp[2 * stride] == 0);
-                    dp[2 * stride] |= ((cwd & 1) << 31) | val;
-                    cwd >>= 1; ++cnt;
-                  }
-
-                  sample_mask += sample_mask;
-                  if (new_sig & sample_mask)
-                  {
-                    assert(dp[3 * stride] == 0);
-                    dp[3 * stride] |= ((cwd & 1) << 31) | val;
-                    cwd >>= 1; ++cnt;
-                  }
-                }
+            if (qinf[0] & 0x20) { //sigma_n
+                OPJ_UINT32 val, t;
+
+                ms_val = frwd_fetch(&magsgn);
+                m_n = U_q[0] - ((qinf[0] >> 13) & 1); //m_n
+                frwd_advance(&magsgn, m_n);
+                val = ms_val << 31;
+                v_n = ms_val & ((1U << m_n) - 1);
+                v_n |= ((qinf[0] & 0x200) >> 9) << m_n;
+                v_n |= 1; //center of bin
+                sp[stride] = val | ((v_n + 2) << (p - 1));
+
+                //update line_state: bit 7 (\sigma^N), and E^N
+                t = lsp[0] & 0x7F;          //E^NW
+                v_n = 32 - count_leading_zeros(v_n);
+                lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n));
+            } else if (locs & 0x2) {
+                sp[stride] = 0;    //no need to update line_state
+            }
 
-              }
-              frwd_advance(&sigprop, cnt); //consume the bits from bitstrm
-              cnt = 0;
-
-              //update the next 8 columns
-              if (n == 4)
-              {
-                //horizontally
-                OPJ_UINT32 t = new_sig >> 28;
-                t |= ((t & 0xE) >> 1) | ((t & 7) << 1);
-                cur_mbr[1] |= t & ~cur_sig[1];
-              }
+            ++lsp;
+            ++sp;
+
+            if (qinf[0] & 0x40) { //sigma_n
+                OPJ_UINT32 val;
+
+                ms_val = frwd_fetch(&magsgn);
+                m_n = U_q[0] - ((qinf[0] >> 14) & 1); //m_n
+                frwd_advance(&magsgn, m_n);
+                val = ms_val << 31;
+                v_n = ms_val & ((1U << m_n) - 1);
+                v_n |= (((qinf[0] & 0x400) >> 10) << m_n);
+                v_n |= 1;                            //center of bin
+                sp[0] = val | ((v_n + 2) << (p - 1));
+            } else if (locs & 0x4) {
+                sp[0] = 0;
             }
-          }
-          //update the next stripe (vertically propagation)
-          new_sig |= cur_sig[0];
-          ux = (new_sig & 0x88888888) >> 3;
-          tx = ux | (ux << 4) | (ux >> 4); //left and right neighbors
-          if (i > 0)
-            nxt_mbr[-1] |= (ux << 28) & ~nxt_sig[-1];
-          nxt_mbr[0] |= tx & ~nxt_sig[0];
-          nxt_mbr[1] |= (ux >> 28) & ~nxt_sig[1];
-        }
 
-        //clear current sigma
-        //mbr need not be cleared because it is overwritten
-        cur_sig = y & 0x4 ? sigma2 : sigma1;
-        memset(cur_sig, 0, ((((OPJ_UINT32)width + 7u) >> 3) + 1u) << 2);
-      }
-    }
-  }
-
-  //terminating
-  if (num_passes > 1) {
-    OPJ_INT32 st;
-
-    if (num_passes > 2 && ((height & 3) == 1 || (height & 3) == 2))
-    {//do magref
-      OPJ_UINT32 *cur_sig = height & 0x4 ? sigma2 : sigma1; //reversed
-      OPJ_UINT32 *dpp = decoded_data + (height & 0xFFFFFC) * stride;
-      OPJ_UINT32 half = 1u << (p - 2);
-      for (OPJ_INT32 i = 0; i < width; i += 8)
-      {
-        OPJ_UINT32 cwd = rev_fetch_mrp(&magref);
-        OPJ_UINT32 sig = *cur_sig++;
-        OPJ_UINT32 col_mask = 0xF;
-        OPJ_UINT32 *dp = dpp + i;
-        if (sig)
-        {
-          for (int j = 0; j < 8; ++j, dp++)
-          {
-            if (sig & col_mask)
-            {
-              OPJ_UINT32 sample_mask = 0x11111111 & col_mask;
-
-              if (sig & sample_mask)
-              {
-                OPJ_UINT32 sym;
-                assert(dp[0] != 0);
-                sym = cwd & 1;
-                dp[0] ^= (1 - sym) << (p - 1);
-                dp[0] |= half;
-                cwd >>= 1;
-              }
-              sample_mask += sample_mask;
-
-              if (sig & sample_mask)
-              {
-                OPJ_UINT32 sym;
-                assert(dp[stride] != 0);
-                sym = cwd & 1;
-                dp[stride] ^= (1 - sym) << (p - 1);
-                dp[stride] |= half;
-                cwd >>= 1;
-              }
-              sample_mask += sample_mask;
-
-              if (sig & sample_mask)
-              {
-                OPJ_UINT32 sym;
-                assert(dp[2 * stride] != 0);
-                sym = cwd & 1;
-                dp[2 * stride] ^= (1 - sym) << (p - 1);
-                dp[2 * stride] |= half;
-                cwd >>= 1;
-              }
-              sample_mask += sample_mask;
-
-              if (sig & sample_mask)
-              {
-                OPJ_UINT32 sym;
-                assert(dp[3 * stride] != 0);
-                sym = cwd & 1;
-                dp[3 * stride] ^= (1 - sym) << (p - 1);
-                dp[3 * stride] |= half;
-                cwd >>= 1;
-              }
-              sample_mask += sample_mask;
+            if (qinf[0] & 0x80) { //sigma_n
+                OPJ_UINT32 val;
+
+                ms_val = frwd_fetch(&magsgn);
+                m_n = U_q[0] - ((qinf[0] >> 15) & 1); //m_n
+                frwd_advance(&magsgn, m_n);
+                val = ms_val << 31;
+                v_n = ms_val & ((1U << m_n) - 1);
+                v_n |= ((qinf[0] & 0x800) >> 11) << m_n;
+                v_n |= 1; //center of bin
+                sp[stride] = val | ((v_n + 2) << (p - 1));
+
+                //update line_state: bit 7 (\sigma^NW), and E^NW for next quad
+                lsp[0] = (OPJ_UINT8)(0x80 | (32 - count_leading_zeros(v_n)));
+            } else if (locs & 0x8) {
+                sp[stride] = 0;
             }
-            col_mask <<= 4;
-          }
-        }
-        rev_advance_mrp(&magref, population_count(sig));
-      }
-    }
 
-    //do the last incomplete stripe
-    // for cases of (height & 3) == 0 and 3
-    // the should have been processed previously
-    if ((height & 3) == 1 || (height & 3) == 2)
-    {
-      //generate mbr of first stripe
-      OPJ_UINT32 *sig = height & 0x4 ? sigma2 : sigma1;
-      OPJ_UINT32 *mbr = height & 0x4 ? mbr2 : mbr1;
-      //integrate horizontally
-      OPJ_UINT32 prev = 0;
-      for (OPJ_INT32 i = 0; i < width; i += 8, mbr++, sig++)
-      {
-        OPJ_UINT32 t, z;
-
-        mbr[0] = sig[0];
-        mbr[0] |= prev >> 28;    //for first column, left neighbors
-        mbr[0] |= sig[0] << 4;   //left neighbors
-        mbr[0] |= sig[0] >> 4;   //left neighbors
-        mbr[0] |= sig[1] << 28;  //for last column, right neighbors
-        prev = sig[0];
-
-        //integrate vertically
-        t = mbr[0], z = mbr[0];
-        z |= (t & 0x77777777) << 1; //above neighbors
-        z |= (t & 0xEEEEEEEE) >> 1; //below neighbors
-        mbr[0] = z & ~sig[0]; //remove already significance samples
-      }
-    }
+            ++sp;
+
+            if (qinf[1] & 0x10) { //sigma_n
+                OPJ_UINT32 val;
+
+                ms_val = frwd_fetch(&magsgn);
+                m_n = U_q[1] - ((qinf[1] >> 12) & 1); //m_n
+                frwd_advance(&magsgn, m_n);
+                val = ms_val << 31;
+                v_n = ms_val & ((1U << m_n) - 1);
+                v_n |= (((qinf[1] & 0x100) >> 8) << m_n);
+                v_n |= 1;                            //center of bin
+                sp[0] = val | ((v_n + 2) << (p - 1));
+            } else if (locs & 0x10) {
+                sp[0] = 0;
+            }
 
-    st = height;
-    st -= height > 6 ? (((height + 1) & 3) + 3) : height;
-    for (OPJ_INT32 y = st; y < height; y += 4)
-    {
-      OPJ_UINT32 *cur_sig, *cur_mbr, *nxt_sig, *nxt_mbr;
-      OPJ_UINT32 val;
-
-      OPJ_UINT32 pattern = 0xFFFFFFFFu; // a pattern needed samples
-      if (height - y == 3)
-        pattern = 0x77777777u;
-      else if (height - y == 2)
-        pattern = 0x33333333u;
-      else if (height - y == 1)
-        pattern = 0x11111111u;
-
-      //add membership from the next stripe, obtained above
-      if (height - y > 4)
-      {
-        OPJ_UINT32 prev = 0;
-        cur_sig = y & 0x4 ? sigma2 : sigma1;
-        cur_mbr = y & 0x4 ? mbr2 : mbr1;
-        nxt_sig = y & 0x4 ? sigma1 : sigma2;
-
-        for (OPJ_INT32 i=0; i<width; i += 8, cur_mbr++, cur_sig++, nxt_sig++)
-        {
-          OPJ_UINT32 t = nxt_sig[0];
-          t |= prev >> 28;     //for first column, left neighbors
-          t |= nxt_sig[0] << 4;   //left neighbors
-          t |= nxt_sig[0] >> 4;   //left neighbors
-          t |= nxt_sig[1] << 28;  //for last column, right neighbors
-          prev = nxt_sig[0];
-
-          cur_mbr[0] |= (t & 0x11111111) << 3;
-          //remove already significance samples
-          cur_mbr[0] &= ~cur_sig[0];
+            if (qinf[1] & 0x20) { //sigma_n
+                OPJ_UINT32 val, t;
+
+                ms_val = frwd_fetch(&magsgn);
+                m_n = U_q[1] - ((qinf[1] >> 13) & 1); //m_n
+                frwd_advance(&magsgn, m_n);
+                val = ms_val << 31;
+                v_n = ms_val & ((1U << m_n) - 1);
+                v_n |= (((qinf[1] & 0x200) >> 9) << m_n);
+                v_n |= 1; //center of bin
+                sp[stride] = val | ((v_n + 2) << (p - 1));
+
+                //update line_state: bit 7 (\sigma^N), and E^N
+                t = lsp[0] & 0x7F;          //E^NW
+                v_n = 32 - count_leading_zeros(v_n);
+                lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n));
+            } else if (locs & 0x20) {
+                sp[stride] = 0;    //no need to update line_state
+            }
+
+            ++lsp;
+            ++sp;
+
+            if (qinf[1] & 0x40) { //sigma_n
+                OPJ_UINT32 val;
+
+                ms_val = frwd_fetch(&magsgn);
+                m_n = U_q[1] - ((qinf[1] >> 14) & 1); //m_n
+                frwd_advance(&magsgn, m_n);
+                val = ms_val << 31;
+                v_n = ms_val & ((1U << m_n) - 1);
+                v_n |= (((qinf[1] & 0x400) >> 10) << m_n);
+                v_n |= 1;                            //center of bin
+                sp[0] = val | ((v_n + 2) << (p - 1));
+            } else if (locs & 0x40) {
+                sp[0] = 0;
+            }
+
+            if (qinf[1] & 0x80) { //sigma_n
+                OPJ_UINT32 val;
+
+                ms_val = frwd_fetch(&magsgn);
+                m_n = U_q[1] - ((qinf[1] >> 15) & 1); //m_n
+                frwd_advance(&magsgn, m_n);
+                val = ms_val << 31;
+                v_n = ms_val & ((1U << m_n) - 1);
+                v_n |= (((qinf[1] & 0x800) >> 11) << m_n);
+                v_n |= 1; //center of bin
+                sp[stride] = val | ((v_n + 2) << (p - 1));
+
+                //update line_state: bit 7 (\sigma^NW), and E^NW for next quad
+                lsp[0] = (OPJ_UINT8)(0x80 | (32 - count_leading_zeros(v_n)));
+            } else if (locs & 0x80) {
+                sp[stride] = 0;
+            }
+
+            ++sp;
         }
-      }
-
-      //find new locations and get signs
-      cur_sig = y & 0x4 ? sigma2 : sigma1;
-      cur_mbr = y & 0x4 ? mbr2 : mbr1;
-      nxt_sig = y & 0x4 ? sigma1 : sigma2;
-      nxt_mbr = y & 0x4 ? mbr1 : mbr2;
-      val = 3u << (p - 2);
-      for (OPJ_INT32 i = 0; i < width; i += 8,
-            cur_sig++, cur_mbr++, nxt_sig++, nxt_mbr++)
-      {
-        OPJ_UINT32 mbr = *cur_mbr & pattern; //skip unneeded samples
-        OPJ_UINT32 new_sig = 0;
-        OPJ_UINT32 ux, tx;
-        if (mbr)
-        {
-          for (OPJ_INT32 n = 0; n < 8; n += 4)
-          {
-            OPJ_UINT32 col_mask;
-            OPJ_UINT32 inv_sig;
-            OPJ_INT32 end;
-
-            OPJ_UINT32 cwd = frwd_fetch(&sigprop);
-            OPJ_UINT32 cnt = 0;
-
-            OPJ_UINT32 *dp = decoded_data + y * stride;
-            dp += i + n;
-
-            col_mask = 0xFu << (4 * n);
-
-            inv_sig = ~cur_sig[0] & pattern;
-
-            end = n + 4 + i < width ? n + 4 : width - i;
-            for (OPJ_INT32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
-            {
-              OPJ_UINT32 sample_mask;
-
-              if ((col_mask & mbr) == 0)
-                continue;
-
-              //scan 4 mbr
-              sample_mask = 0x11111111u & col_mask;
-              if (mbr & sample_mask)
-              {
-                assert(dp[0] == 0);
-                if (cwd & 1)
-                {
-                  OPJ_UINT32 t;
-                  new_sig |= sample_mask;
-                  t = 0x32u << (j * 4);
-                  mbr |= t & inv_sig;
-                }
-                cwd >>= 1; ++cnt;
-              }
-
-              sample_mask += sample_mask;
-              if (mbr & sample_mask)
-              {
-                assert(dp[stride] == 0);
-                if (cwd & 1)
-                {
-                  OPJ_UINT32 t;
-                  new_sig |= sample_mask;
-                  t = 0x74u << (j * 4);
-                  mbr |= t & inv_sig;
-                }
-                cwd >>= 1; ++cnt;
-              }
-
-              sample_mask += sample_mask;
-              if (mbr & sample_mask)
-              {
-                assert(dp[2 * stride] == 0);
-                if (cwd & 1)
-                {
-                  OPJ_UINT32 t;
-                  new_sig |= sample_mask;
-                  t = 0xE8u << (j * 4);
-                  mbr |= t & inv_sig;
-                }
-                cwd >>= 1; ++cnt;
-              }
-
-              sample_mask += sample_mask;
-              if (mbr & sample_mask)
-              {
-                assert(dp[3 * stride] == 0);
-                if (cwd & 1)
-                {
-                  OPJ_UINT32 t;
-                  new_sig |= sample_mask;
-                  t = 0xC0u << (j * 4);
-                  mbr |= t & inv_sig;
+
+        y += 2;
+        if (num_passes > 1 && (y & 3) == 0) { //executed at multiples of 4
+            // This is for SPP and potentially MRP
+
+            if (num_passes > 2) { //do MRP
+                // select the current stripe
+                OPJ_UINT32 *cur_sig = y & 0x4 ? sigma1 : sigma2;
+                // the address of the data that needs updating
+                OPJ_UINT32 *dpp = decoded_data + (y - 4) * stride;
+                OPJ_UINT32 half = 1u << (p - 2); // half the center of the bin
+                OPJ_INT32 i;
+                for (i = 0; i < width; i += 8) {
+                    //Process one entry from sigma array at a time
+                    // Each nibble (4 bits) in the sigma array represents 4 rows,
+                    // and the 32 bits contain 8 columns
+                    OPJ_UINT32 cwd = rev_fetch_mrp(&magref); // get 32 bit data
+                    OPJ_UINT32 sig = *cur_sig++; // 32 bit that will be processed now
+                    OPJ_UINT32 col_mask = 0xFu;  // a mask for a column in sig
+                    OPJ_UINT32 *dp = dpp + i;    // next column in decode samples
+                    if (sig) { // if any of the 32 bits are set
+                        int j;
+                        for (j = 0; j < 8; ++j, dp++) { //one column at a time
+                            if (sig & col_mask) { // lowest nibble
+                                OPJ_UINT32 sample_mask = 0x11111111u & col_mask; //LSB
+
+                                if (sig & sample_mask) { //if LSB is set
+                                    OPJ_UINT32 sym;
+
+                                    assert(dp[0] != 0); // decoded value cannot be zero
+                                    sym = cwd & 1; // get it value
+                                    // remove center of bin if sym is 0
+                                    dp[0] ^= (1 - sym) << (p - 1);
+                                    dp[0] |= half;      // put half the center of bin
+                                    cwd >>= 1;          //consume word
+                                }
+                                sample_mask += sample_mask; //next row
+
+                                if (sig & sample_mask) {
+                                    OPJ_UINT32 sym;
+
+                                    assert(dp[stride] != 0);
+                                    sym = cwd & 1;
+                                    dp[stride] ^= (1 - sym) << (p - 1);
+                                    dp[stride] |= half;
+                                    cwd >>= 1;
+                                }
+                                sample_mask += sample_mask;
+
+                                if (sig & sample_mask) {
+                                    OPJ_UINT32 sym;
+
+                                    assert(dp[2 * stride] != 0);
+                                    sym = cwd & 1;
+                                    dp[2 * stride] ^= (1 - sym) << (p - 1);
+                                    dp[2 * stride] |= half;
+                                    cwd >>= 1;
+                                }
+                                sample_mask += sample_mask;
+
+                                if (sig & sample_mask) {
+                                    OPJ_UINT32 sym;
+
+                                    assert(dp[3 * stride] != 0);
+                                    sym = cwd & 1;
+                                    dp[3 * stride] ^= (1 - sym) << (p - 1);
+                                    dp[3 * stride] |= half;
+                                    cwd >>= 1;
+                                }
+                                sample_mask += sample_mask;
+                            }
+                            col_mask <<= 4; //next column
+                        }
+                    }
+                    // consume data according to the number of bits set
+                    rev_advance_mrp(&magref, population_count(sig));
                 }
-                cwd >>= 1; ++cnt;
-              }
             }
 
-            //signs here
-            if (new_sig & (0xFFFFu << (4 * n)))
-            {
-              OPJ_UINT32 col_mask;
-              OPJ_UINT32 *dp = decoded_data + y * stride;
-              dp += i + n;
-              col_mask = 0xFu << (4 * n);
-
-              for (OPJ_INT32 j = n; j < end; ++j, ++dp, col_mask <<= 4)
-              {
-                OPJ_UINT32 sample_mask;
-                if ((col_mask & new_sig) == 0)
-                  continue;
-
-                //scan 4 signs
-                sample_mask = 0x11111111u & col_mask;
-                if (new_sig & sample_mask)
-                {
-                  assert(dp[0] == 0);
-                  dp[0] |= ((cwd & 1) << 31) | val;
-                  cwd >>= 1; ++cnt;
+            if (y >= 4) { // update mbr array at the end of each stripe
+                //generate mbr corresponding to a stripe
+                OPJ_UINT32 *sig = y & 0x4 ? sigma1 : sigma2;
+                OPJ_UINT32 *mbr = y & 0x4 ? mbr1 : mbr2;
+
+                //data is processed in patches of 8 columns, each
+                // each 32 bits in sigma1 or mbr1 represent 4 rows
+
+                //integrate horizontally
+                OPJ_UINT32 prev = 0; // previous columns
+                OPJ_INT32 i;
+                for (i = 0; i < width; i += 8, mbr++, sig++) {
+                    OPJ_UINT32 t, z;
+
+                    mbr[0] = sig[0];         //start with significant samples
+                    mbr[0] |= prev >> 28;    //for first column, left neighbors
+                    mbr[0] |= sig[0] << 4;   //left neighbors
+                    mbr[0] |= sig[0] >> 4;   //right neighbors
+                    mbr[0] |= sig[1] << 28;  //for last column, right neighbors
+                    prev = sig[0];           // for next group of columns
+
+                    //integrate vertically
+                    t = mbr[0], z = mbr[0];
+                    z |= (t & 0x77777777) << 1; //above neighbors
+                    z |= (t & 0xEEEEEEEE) >> 1; //below neighbors
+                    mbr[0] = z & ~sig[0]; //remove already significance samples
                 }
+            }
 
-                sample_mask += sample_mask;
-                if (new_sig & sample_mask)
-                {
-                  assert(dp[stride] == 0);
-                  dp[stride] |= ((cwd & 1) << 31) | val;
-                  cwd >>= 1; ++cnt;
+            if (y >= 8) { //wait until 8 rows has been processed
+                OPJ_UINT32 *cur_sig, *cur_mbr, *nxt_sig, *nxt_mbr;
+                OPJ_UINT32 prev;
+                OPJ_UINT32 val;
+                OPJ_INT32 i;
+
+                // add membership from the next stripe, obtained above
+                cur_sig = y & 0x4 ? sigma2 : sigma1;
+                cur_mbr = y & 0x4 ? mbr2 : mbr1;
+                nxt_sig = y & 0x4 ? sigma1 : sigma2;  //future samples
+                prev = 0; // the columns before these group of 8 columns
+                for (i = 0; i < width; i += 8, cur_mbr++, cur_sig++, nxt_sig++) {
+                    OPJ_UINT32 t = nxt_sig[0];
+                    t |= prev >> 28;        //for first column, left neighbors
+                    t |= nxt_sig[0] << 4;   //left neighbors
+                    t |= nxt_sig[0] >> 4;   //right neighbors
+                    t |= nxt_sig[1] << 28;  //for last column, right neighbors
+                    prev = nxt_sig[0];      // for next group of columns
+
+                    cur_mbr[0] |= (t & 0x11111111u) << 3; //propagate up to cur_mbr
+                    cur_mbr[0] &= ~cur_sig[0]; //remove already significance samples
                 }
 
-                sample_mask += sample_mask;
-                if (new_sig & sample_mask)
-                {
-                  assert(dp[2 * stride] == 0);
-                  dp[2 * stride] |= ((cwd & 1) << 31) | val;
-                  cwd >>= 1; ++cnt;
+                //find new locations and get signs
+                cur_sig = y & 0x4 ? sigma2 : sigma1;
+                cur_mbr = y & 0x4 ? mbr2 : mbr1;
+                nxt_sig = y & 0x4 ? sigma1 : sigma2; //future samples
+                nxt_mbr = y & 0x4 ? mbr1 : mbr2;     //future samples
+                val = 3u << (p - 2); // sample values for newly discovered
+                // signficant samples including the bin center
+                for (i = 0; i < width;
+                        i += 8, cur_sig++, cur_mbr++, nxt_sig++, nxt_mbr++) {
+                    OPJ_UINT32 ux, tx;
+                    OPJ_UINT32 mbr = *cur_mbr;
+                    OPJ_UINT32 new_sig = 0;
+                    if (mbr) { //are there any samples that migt be signficant
+                        OPJ_INT32 n;
+                        for (n = 0; n < 8; n += 4) {
+                            OPJ_UINT32 col_mask;
+                            OPJ_UINT32 inv_sig;
+                            OPJ_INT32 end;
+                            OPJ_INT32 j;
+
+                            OPJ_UINT32 cwd = frwd_fetch(&sigprop); //get 32 bits
+                            OPJ_UINT32 cnt = 0;
+
+                            OPJ_UINT32 *dp = decoded_data + (y - 8) * stride;
+                            dp += i + n; //address for decoded samples
+
+                            col_mask = 0xFu << (4 * n); //a mask to select a column
+
+                            inv_sig = ~cur_sig[0]; // insignificant samples
+
+                            //find the last sample we operate on
+                            end = n + 4 + i < width ? n + 4 : width - i;
+
+                            for (j = n; j < end; ++j, ++dp, col_mask <<= 4) {
+                                OPJ_UINT32 sample_mask;
+
+                                if ((col_mask & mbr) == 0) { //no samples need checking
+                                    continue;
+                                }
+
+                                //scan mbr to find a new signficant sample
+                                sample_mask = 0x11111111u & col_mask; // LSB
+                                if (mbr & sample_mask) {
+                                    assert(dp[0] == 0); // the sample must have been 0
+                                    if (cwd & 1) { //if this sample has become significant
+                                        // must propagate it to nearby samples
+                                        OPJ_UINT32 t;
+                                        new_sig |= sample_mask;  // new significant samples
+                                        t = 0x32u << (j * 4);// propagation to neighbors
+                                        mbr |= t & inv_sig; //remove already signifcant samples
+                                    }
+                                    cwd >>= 1;
+                                    ++cnt; //consume bit and increment number of
+                                    //consumed bits
+                                }
+
+                                sample_mask += sample_mask;  // next row
+                                if (mbr & sample_mask) {
+                                    assert(dp[stride] == 0);
+                                    if (cwd & 1) {
+                                        OPJ_UINT32 t;
+                                        new_sig |= sample_mask;
+                                        t = 0x74u << (j * 4);
+                                        mbr |= t & inv_sig;
+                                    }
+                                    cwd >>= 1;
+                                    ++cnt;
+                                }
+
+                                sample_mask += sample_mask;
+                                if (mbr & sample_mask) {
+                                    assert(dp[2 * stride] == 0);
+                                    if (cwd & 1) {
+                                        OPJ_UINT32 t;
+                                        new_sig |= sample_mask;
+                                        t = 0xE8u << (j * 4);
+                                        mbr |= t & inv_sig;
+                                    }
+                                    cwd >>= 1;
+                                    ++cnt;
+                                }
+
+                                sample_mask += sample_mask;
+                                if (mbr & sample_mask) {
+                                    assert(dp[3 * stride] == 0);
+                                    if (cwd & 1) {
+                                        OPJ_UINT32 t;
+                                        new_sig |= sample_mask;
+                                        t = 0xC0u << (j * 4);
+                                        mbr |= t & inv_sig;
+                                    }
+                                    cwd >>= 1;
+                                    ++cnt;
+                                }
+                            }
+
+                            //obtain signs here
+                            if (new_sig & (0xFFFFu << (4 * n))) { //if any
+                                OPJ_UINT32 col_mask;
+                                OPJ_INT32 j;
+                                OPJ_UINT32 *dp = decoded_data + (y - 8) * stride;
+                                dp += i + n; // decoded samples address
+                                col_mask = 0xFu << (4 * n); //mask to select a column
+
+                                for (j = n; j < end; ++j, ++dp, col_mask <<= 4) {
+                                    OPJ_UINT32 sample_mask;
+
+                                    if ((col_mask & new_sig) == 0) { //if non is signficant
+                                        continue;
+                                    }
+
+                                    //scan 4 signs
+                                    sample_mask = 0x11111111u & col_mask;
+                                    if (new_sig & sample_mask) {
+                                        assert(dp[0] == 0);
+                                        dp[0] |= ((cwd & 1) << 31) | val; //put value and sign
+                                        cwd >>= 1;
+                                        ++cnt; //consume bit and increment number
+                                        //of consumed bits
+                                    }
+
+                                    sample_mask += sample_mask;
+                                    if (new_sig & sample_mask) {
+                                        assert(dp[stride] == 0);
+                                        dp[stride] |= ((cwd & 1) << 31) | val;
+                                        cwd >>= 1;
+                                        ++cnt;
+                                    }
+
+                                    sample_mask += sample_mask;
+                                    if (new_sig & sample_mask) {
+                                        assert(dp[2 * stride] == 0);
+                                        dp[2 * stride] |= ((cwd & 1) << 31) | val;
+                                        cwd >>= 1;
+                                        ++cnt;
+                                    }
+
+                                    sample_mask += sample_mask;
+                                    if (new_sig & sample_mask) {
+                                        assert(dp[3 * stride] == 0);
+                                        dp[3 * stride] |= ((cwd & 1) << 31) | val;
+                                        cwd >>= 1;
+                                        ++cnt;
+                                    }
+                                }
+
+                            }
+                            frwd_advance(&sigprop, cnt); //consume the bits from bitstrm
+                            cnt = 0;
+
+                            //update the next 8 columns
+                            if (n == 4) {
+                                //horizontally
+                                OPJ_UINT32 t = new_sig >> 28;
+                                t |= ((t & 0xE) >> 1) | ((t & 7) << 1);
+                                cur_mbr[1] |= t & ~cur_sig[1];
+                            }
+                        }
+                    }
+                    //update the next stripe (vertically propagation)
+                    new_sig |= cur_sig[0];
+                    ux = (new_sig & 0x88888888) >> 3;
+                    tx = ux | (ux << 4) | (ux >> 4); //left and right neighbors
+                    if (i > 0) {
+                        nxt_mbr[-1] |= (ux << 28) & ~nxt_sig[-1];
+                    }
+                    nxt_mbr[0] |= tx & ~nxt_sig[0];
+                    nxt_mbr[1] |= (ux >> 28) & ~nxt_sig[1];
                 }
 
-                sample_mask += sample_mask;
-                if (new_sig & sample_mask)
-                {
-                  assert(dp[3 * stride] == 0);
-                  dp[3 * stride] |= ((cwd & 1) << 31) | val;
-                  cwd >>= 1; ++cnt;
+                //clear current sigma
+                //mbr need not be cleared because it is overwritten
+                cur_sig = y & 0x4 ? sigma2 : sigma1;
+                memset(cur_sig, 0, ((((OPJ_UINT32)width + 7u) >> 3) + 1u) << 2);
+            }
+        }
+    }
+
+    //terminating
+    if (num_passes > 1) {
+        OPJ_INT32 st, y;
+
+        if (num_passes > 2 && ((height & 3) == 1 || (height & 3) == 2)) {
+            //do magref
+            OPJ_UINT32 *cur_sig = height & 0x4 ? sigma2 : sigma1; //reversed
+            OPJ_UINT32 *dpp = decoded_data + (height & 0xFFFFFC) * stride;
+            OPJ_UINT32 half = 1u << (p - 2);
+            OPJ_INT32 i;
+            for (i = 0; i < width; i += 8) {
+                OPJ_UINT32 cwd = rev_fetch_mrp(&magref);
+                OPJ_UINT32 sig = *cur_sig++;
+                OPJ_UINT32 col_mask = 0xF;
+                OPJ_UINT32 *dp = dpp + i;
+                if (sig) {
+                    int j;
+                    for (j = 0; j < 8; ++j, dp++) {
+                        if (sig & col_mask) {
+                            OPJ_UINT32 sample_mask = 0x11111111 & col_mask;
+
+                            if (sig & sample_mask) {
+                                OPJ_UINT32 sym;
+                                assert(dp[0] != 0);
+                                sym = cwd & 1;
+                                dp[0] ^= (1 - sym) << (p - 1);
+                                dp[0] |= half;
+                                cwd >>= 1;
+                            }
+                            sample_mask += sample_mask;
+
+                            if (sig & sample_mask) {
+                                OPJ_UINT32 sym;
+                                assert(dp[stride] != 0);
+                                sym = cwd & 1;
+                                dp[stride] ^= (1 - sym) << (p - 1);
+                                dp[stride] |= half;
+                                cwd >>= 1;
+                            }
+                            sample_mask += sample_mask;
+
+                            if (sig & sample_mask) {
+                                OPJ_UINT32 sym;
+                                assert(dp[2 * stride] != 0);
+                                sym = cwd & 1;
+                                dp[2 * stride] ^= (1 - sym) << (p - 1);
+                                dp[2 * stride] |= half;
+                                cwd >>= 1;
+                            }
+                            sample_mask += sample_mask;
+
+                            if (sig & sample_mask) {
+                                OPJ_UINT32 sym;
+                                assert(dp[3 * stride] != 0);
+                                sym = cwd & 1;
+                                dp[3 * stride] ^= (1 - sym) << (p - 1);
+                                dp[3 * stride] |= half;
+                                cwd >>= 1;
+                            }
+                            sample_mask += sample_mask;
+                        }
+                        col_mask <<= 4;
+                    }
                 }
-              }
+                rev_advance_mrp(&magref, population_count(sig));
+            }
+        }
 
+        //do the last incomplete stripe
+        // for cases of (height & 3) == 0 and 3
+        // the should have been processed previously
+        if ((height & 3) == 1 || (height & 3) == 2) {
+            //generate mbr of first stripe
+            OPJ_UINT32 *sig = height & 0x4 ? sigma2 : sigma1;
+            OPJ_UINT32 *mbr = height & 0x4 ? mbr2 : mbr1;
+            //integrate horizontally
+            OPJ_UINT32 prev = 0;
+            OPJ_INT32 i;
+            for (i = 0; i < width; i += 8, mbr++, sig++) {
+                OPJ_UINT32 t, z;
+
+                mbr[0] = sig[0];
+                mbr[0] |= prev >> 28;    //for first column, left neighbors
+                mbr[0] |= sig[0] << 4;   //left neighbors
+                mbr[0] |= sig[0] >> 4;   //left neighbors
+                mbr[0] |= sig[1] << 28;  //for last column, right neighbors
+                prev = sig[0];
+
+                //integrate vertically
+                t = mbr[0], z = mbr[0];
+                z |= (t & 0x77777777) << 1; //above neighbors
+                z |= (t & 0xEEEEEEEE) >> 1; //below neighbors
+                mbr[0] = z & ~sig[0]; //remove already significance samples
             }
-            frwd_advance(&sigprop, cnt);
-            cnt = 0;
-
-            //update next columns
-            if (n == 4)
-            {
-              //horizontally
-              OPJ_UINT32 t = new_sig >> 28;
-              t |= ((t & 0xE) >> 1) | ((t & 7) << 1);
-              cur_mbr[1] |= t & ~cur_sig[1];
+        }
+
+        st = height;
+        st -= height > 6 ? (((height + 1) & 3) + 3) : height;
+        for (y = st; y < height; y += 4) {
+            OPJ_UINT32 *cur_sig, *cur_mbr, *nxt_sig, *nxt_mbr;
+            OPJ_UINT32 val;
+            OPJ_INT32 i;
+
+            OPJ_UINT32 pattern = 0xFFFFFFFFu; // a pattern needed samples
+            if (height - y == 3) {
+                pattern = 0x77777777u;
+            } else if (height - y == 2) {
+                pattern = 0x33333333u;
+            } else if (height - y == 1) {
+                pattern = 0x11111111u;
+            }
+
+            //add membership from the next stripe, obtained above
+            if (height - y > 4) {
+                OPJ_UINT32 prev = 0;
+                OPJ_INT32 i;
+                cur_sig = y & 0x4 ? sigma2 : sigma1;
+                cur_mbr = y & 0x4 ? mbr2 : mbr1;
+                nxt_sig = y & 0x4 ? sigma1 : sigma2;
+                for (i = 0; i < width; i += 8, cur_mbr++, cur_sig++, nxt_sig++) {
+                    OPJ_UINT32 t = nxt_sig[0];
+                    t |= prev >> 28;     //for first column, left neighbors
+                    t |= nxt_sig[0] << 4;   //left neighbors
+                    t |= nxt_sig[0] >> 4;   //left neighbors
+                    t |= nxt_sig[1] << 28;  //for last column, right neighbors
+                    prev = nxt_sig[0];
+
+                    cur_mbr[0] |= (t & 0x11111111) << 3;
+                    //remove already significance samples
+                    cur_mbr[0] &= ~cur_sig[0];
+                }
+            }
+
+            //find new locations and get signs
+            cur_sig = y & 0x4 ? sigma2 : sigma1;
+            cur_mbr = y & 0x4 ? mbr2 : mbr1;
+            nxt_sig = y & 0x4 ? sigma1 : sigma2;
+            nxt_mbr = y & 0x4 ? mbr1 : mbr2;
+            val = 3u << (p - 2);
+            for (i = 0; i < width; i += 8,
+                    cur_sig++, cur_mbr++, nxt_sig++, nxt_mbr++) {
+                OPJ_UINT32 mbr = *cur_mbr & pattern; //skip unneeded samples
+                OPJ_UINT32 new_sig = 0;
+                OPJ_UINT32 ux, tx;
+                if (mbr) {
+                    OPJ_INT32 n;
+                    for (n = 0; n < 8; n += 4) {
+                        OPJ_UINT32 col_mask;
+                        OPJ_UINT32 inv_sig;
+                        OPJ_INT32 end;
+                        OPJ_INT32 j;
+
+                        OPJ_UINT32 cwd = frwd_fetch(&sigprop);
+                        OPJ_UINT32 cnt = 0;
+
+                        OPJ_UINT32 *dp = decoded_data + y * stride;
+                        dp += i + n;
+
+                        col_mask = 0xFu << (4 * n);
+
+                        inv_sig = ~cur_sig[0] & pattern;
+
+                        end = n + 4 + i < width ? n + 4 : width - i;
+                        for (j = n; j < end; ++j, ++dp, col_mask <<= 4) {
+                            OPJ_UINT32 sample_mask;
+
+                            if ((col_mask & mbr) == 0) {
+                                continue;
+                            }
+
+                            //scan 4 mbr
+                            sample_mask = 0x11111111u & col_mask;
+                            if (mbr & sample_mask) {
+                                assert(dp[0] == 0);
+                                if (cwd & 1) {
+                                    OPJ_UINT32 t;
+                                    new_sig |= sample_mask;
+                                    t = 0x32u << (j * 4);
+                                    mbr |= t & inv_sig;
+                                }
+                                cwd >>= 1;
+                                ++cnt;
+                            }
+
+                            sample_mask += sample_mask;
+                            if (mbr & sample_mask) {
+                                assert(dp[stride] == 0);
+                                if (cwd & 1) {
+                                    OPJ_UINT32 t;
+                                    new_sig |= sample_mask;
+                                    t = 0x74u << (j * 4);
+                                    mbr |= t & inv_sig;
+                                }
+                                cwd >>= 1;
+                                ++cnt;
+                            }
+
+                            sample_mask += sample_mask;
+                            if (mbr & sample_mask) {
+                                assert(dp[2 * stride] == 0);
+                                if (cwd & 1) {
+                                    OPJ_UINT32 t;
+                                    new_sig |= sample_mask;
+                                    t = 0xE8u << (j * 4);
+                                    mbr |= t & inv_sig;
+                                }
+                                cwd >>= 1;
+                                ++cnt;
+                            }
+
+                            sample_mask += sample_mask;
+                            if (mbr & sample_mask) {
+                                assert(dp[3 * stride] == 0);
+                                if (cwd & 1) {
+                                    OPJ_UINT32 t;
+                                    new_sig |= sample_mask;
+                                    t = 0xC0u << (j * 4);
+                                    mbr |= t & inv_sig;
+                                }
+                                cwd >>= 1;
+                                ++cnt;
+                            }
+                        }
+
+                        //signs here
+                        if (new_sig & (0xFFFFu << (4 * n))) {
+                            OPJ_UINT32 col_mask;
+                            OPJ_INT32 j;
+                            OPJ_UINT32 *dp = decoded_data + y * stride;
+                            dp += i + n;
+                            col_mask = 0xFu << (4 * n);
+
+                            for (j = n; j < end; ++j, ++dp, col_mask <<= 4) {
+                                OPJ_UINT32 sample_mask;
+                                if ((col_mask & new_sig) == 0) {
+                                    continue;
+                                }
+
+                                //scan 4 signs
+                                sample_mask = 0x11111111u & col_mask;
+                                if (new_sig & sample_mask) {
+                                    assert(dp[0] == 0);
+                                    dp[0] |= ((cwd & 1) << 31) | val;
+                                    cwd >>= 1;
+                                    ++cnt;
+                                }
+
+                                sample_mask += sample_mask;
+                                if (new_sig & sample_mask) {
+                                    assert(dp[stride] == 0);
+                                    dp[stride] |= ((cwd & 1) << 31) | val;
+                                    cwd >>= 1;
+                                    ++cnt;
+                                }
+
+                                sample_mask += sample_mask;
+                                if (new_sig & sample_mask) {
+                                    assert(dp[2 * stride] == 0);
+                                    dp[2 * stride] |= ((cwd & 1) << 31) | val;
+                                    cwd >>= 1;
+                                    ++cnt;
+                                }
+
+                                sample_mask += sample_mask;
+                                if (new_sig & sample_mask) {
+                                    assert(dp[3 * stride] == 0);
+                                    dp[3 * stride] |= ((cwd & 1) << 31) | val;
+                                    cwd >>= 1;
+                                    ++cnt;
+                                }
+                            }
+
+                        }
+                        frwd_advance(&sigprop, cnt);
+                        cnt = 0;
+
+                        //update next columns
+                        if (n == 4) {
+                            //horizontally
+                            OPJ_UINT32 t = new_sig >> 28;
+                            t |= ((t & 0xE) >> 1) | ((t & 7) << 1);
+                            cur_mbr[1] |= t & ~cur_sig[1];
+                        }
+                    }
+                }
+                //propagate down (vertically propagation)
+                new_sig |= cur_sig[0];
+                ux = (new_sig & 0x88888888) >> 3;
+                tx = ux | (ux << 4) | (ux >> 4);
+                if (i > 0) {
+                    nxt_mbr[-1] |= (ux << 28) & ~nxt_sig[-1];
+                }
+                nxt_mbr[0] |= tx & ~nxt_sig[0];
+                nxt_mbr[1] |= (ux >> 28) & ~nxt_sig[1];
             }
-          }
         }
-        //propagate down (vertically propagation)
-        new_sig |= cur_sig[0];
-        ux = (new_sig & 0x88888888) >> 3;
-        tx = ux | (ux << 4) | (ux >> 4);
-        if (i > 0)
-          nxt_mbr[-1] |= (ux << 28) & ~nxt_sig[-1];
-        nxt_mbr[0] |= tx & ~nxt_sig[0];
-        nxt_mbr[1] |= (ux >> 28) & ~nxt_sig[1];
-      }
     }
-  }
 
-  //int shift = 29 - missing_msbs;
-  for (OPJ_INT32 y = 0; y < height; ++y)
-  {
-    OPJ_INT32* sp = (OPJ_INT32*)decoded_data + y * stride;
-    for (OPJ_INT32 x = 0; x < width; ++x, ++sp)
     {
-      OPJ_INT32 val = (*sp & 0x7FFFFFFF);
-      *sp = ((OPJ_UINT32)*sp & 0x80000000) ? -val : val;
+        OPJ_INT32 x, y;
+        for (y = 0; y < height; ++y) {
+            OPJ_INT32* sp = (OPJ_INT32*)decoded_data + y * stride;
+            for (x = 0; x < width; ++x, ++sp) {
+                OPJ_INT32 val = (*sp & 0x7FFFFFFF);
+                *sp = ((OPJ_UINT32) * sp & 0x80000000) ? -val : val;
+            }
+        }
     }
-  }
 
-  return OPJ_TRUE;
+    return OPJ_TRUE;
 }
diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c
index 8bbf0be8c..6586c7018 100644
--- a/src/lib/openjp2/j2k.c
+++ b/src/lib/openjp2/j2k.c
@@ -10617,12 +10617,18 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k,
     /* SPcod (G) / SPcoc (D) */
     opj_read_bytes(l_current_ptr, &l_tccp->cblksty, 1);
     ++l_current_ptr;
-    if ((l_tccp->cblksty & 0x80U) != 0 || (l_tccp->cblksty & 0x48U) == 0x48U) { 
-    /* For HT, we only support one mode, bit 6 set, meaning that "all code-blocks 
-       within the corresponding tile-component shall be HT code-blocks, and 
-       bit 3 is reset, meaning that "No vertically causal context". */
+    if ((l_tccp->cblksty & J2K_CCP_CBLKSTY_HTMIXED) != 0) {
+        /* We do not support HT mixed mode yet*/
         opj_event_msg(p_manager, EVT_ERROR,
-                      "Error reading SPCod SPCoc element, Invalid code-block style found\n");
+                      "Error reading SPCod SPCoc element. Unsupported Mixed HT code-block style found\n");
+        return OPJ_FALSE;
+    }
+
+    if ((l_tccp->cblksty & (J2K_CCP_CBLKSTY_HT | J2K_CCP_CBLKSTY_VSC)) ==
+            (J2K_CCP_CBLKSTY_HT | J2K_CCP_CBLKSTY_VSC)) {
+        /* For HT, we do not support vertically causal mode yet. */
+        opj_event_msg(p_manager, EVT_ERROR,
+                      "Error reading SPCod SPCoc element. Unsupported HT mode with vertically causal mode. \n");
         return OPJ_FALSE;
     }
 
diff --git a/src/lib/openjp2/j2k.h b/src/lib/openjp2/j2k.h
index ac69a3763..51e7c23e6 100644
--- a/src/lib/openjp2/j2k.h
+++ b/src/lib/openjp2/j2k.h
@@ -61,7 +61,8 @@ The functions in J2K.C have for goal to read/write the several parts of the code
 #define J2K_CCP_CBLKSTY_VSC 0x08      /**< Vertically stripe causal context */
 #define J2K_CCP_CBLKSTY_PTERM 0x10    /**< Predictable termination */
 #define J2K_CCP_CBLKSTY_SEGSYM 0x20   /**< Segmentation symbols are used */
-#define J2K_CCP_CBLKSTY_HT 0x40       /**< (high throughput) HT codeblock */
+#define J2K_CCP_CBLKSTY_HT 0x40       /**< (high throughput) HT codeblocks */
+#define J2K_CCP_CBLKSTY_HTMIXED 0x80  /**< MIXED mode HT codeblocks */
 #define J2K_CCP_QNTSTY_NOQNT 0
 #define J2K_CCP_QNTSTY_SIQNT 1
 #define J2K_CCP_QNTSTY_SEQNT 2
diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c
index bb97c7eab..f5fd23391 100644
--- a/src/lib/openjp2/t1.c
+++ b/src/lib/openjp2/t1.c
@@ -1700,8 +1700,7 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
             opj_free(job);
             return;
         }
-    }
-    else {
+    } else {
         if (OPJ_FALSE == opj_t1_decode_cblk(
                     t1,
                     cblk,
diff --git a/src/lib/openjp2/t2.c b/src/lib/openjp2/t2.c
index 4626d69bb..48f8949bf 100644
--- a/src/lib/openjp2/t2.c
+++ b/src/lib/openjp2/t2.c
@@ -1261,61 +1261,61 @@ static OPJ_BOOL opj_t2_read_packet_header(opj_t2_t* p_t2,
 
             if ((p_tcp->tccps[p_pi->compno].cblksty & J2K_CCP_CBLKSTY_HT) != 0)
                 do {
-                  OPJ_UINT32 bit_number;
-                  l_cblk->segs[l_segno].numnewpasses = l_segno == 0 ? 1u : (OPJ_UINT32)n;
-                  bit_number = l_cblk->numlenbits + opj_uint_floorlog2(
-                                   l_cblk->segs[l_segno].numnewpasses);
-                  if (bit_number > 32) {
-                      opj_event_msg(p_manager, EVT_ERROR,
-                                    "Invalid bit number %d in opj_t2_read_packet_header()\n",
-                                    bit_number);
-                      opj_bio_destroy(l_bio);
-                      return OPJ_FALSE;
-                  }
-                  l_cblk->segs[l_segno].newlen = opj_bio_read(l_bio, bit_number);
-                  JAS_FPRINTF(stderr, "included=%d numnewpasses=%d increment=%d len=%d \n",
-                              l_included, l_cblk->segs[l_segno].numnewpasses, l_increment,
-                              l_cblk->segs[l_segno].newlen);
-
-                  n -= (OPJ_INT32)l_cblk->segs[l_segno].numnewpasses;
-                  if (n > 0) {
-                      ++l_segno;
-
-                      if (! opj_t2_init_seg(l_cblk, l_segno, p_tcp->tccps[p_pi->compno].cblksty, 0)) {
-                          opj_bio_destroy(l_bio);
-                          return OPJ_FALSE;
-                      }
-                  }
+                    OPJ_UINT32 bit_number;
+                    l_cblk->segs[l_segno].numnewpasses = l_segno == 0 ? 1 : (OPJ_UINT32)n;
+                    bit_number = l_cblk->numlenbits + opj_uint_floorlog2(
+                                     l_cblk->segs[l_segno].numnewpasses);
+                    if (bit_number > 32) {
+                        opj_event_msg(p_manager, EVT_ERROR,
+                                      "Invalid bit number %d in opj_t2_read_packet_header()\n",
+                                      bit_number);
+                        opj_bio_destroy(l_bio);
+                        return OPJ_FALSE;
+                    }
+                    l_cblk->segs[l_segno].newlen = opj_bio_read(l_bio, bit_number);
+                    JAS_FPRINTF(stderr, "included=%d numnewpasses=%d increment=%d len=%d \n",
+                                l_included, l_cblk->segs[l_segno].numnewpasses, l_increment,
+                                l_cblk->segs[l_segno].newlen);
+
+                    n -= (OPJ_INT32)l_cblk->segs[l_segno].numnewpasses;
+                    if (n > 0) {
+                        ++l_segno;
+
+                        if (! opj_t2_init_seg(l_cblk, l_segno, p_tcp->tccps[p_pi->compno].cblksty, 0)) {
+                            opj_bio_destroy(l_bio);
+                            return OPJ_FALSE;
+                        }
+                    }
                 } while (n > 0);
-            else 
+            else
                 do {
-                  OPJ_UINT32 bit_number;
-                  l_cblk->segs[l_segno].numnewpasses = (OPJ_UINT32)opj_int_min((OPJ_INT32)(
-                          l_cblk->segs[l_segno].maxpasses - l_cblk->segs[l_segno].numpasses), n);
-                  bit_number = l_cblk->numlenbits + opj_uint_floorlog2(
-                                   l_cblk->segs[l_segno].numnewpasses);
-                  if (bit_number > 32) {
-                      opj_event_msg(p_manager, EVT_ERROR,
-                                    "Invalid bit number %d in opj_t2_read_packet_header()\n",
-                                    bit_number);
-                      opj_bio_destroy(l_bio);
-                      return OPJ_FALSE;
-                  }
-                  l_cblk->segs[l_segno].newlen = opj_bio_read(l_bio, bit_number);
-                  JAS_FPRINTF(stderr, "included=%d numnewpasses=%d increment=%d len=%d \n",
-                              l_included, l_cblk->segs[l_segno].numnewpasses, l_increment,
-                              l_cblk->segs[l_segno].newlen);
-
-                  n -= (OPJ_INT32)l_cblk->segs[l_segno].numnewpasses;
-                  if (n > 0) {
-                      ++l_segno;
-
-                      if (! opj_t2_init_seg(l_cblk, l_segno, p_tcp->tccps[p_pi->compno].cblksty, 0)) {
-                          opj_bio_destroy(l_bio);
-                          return OPJ_FALSE;
-                      }
-                  }
-              } while (n > 0);
+                    OPJ_UINT32 bit_number;
+                    l_cblk->segs[l_segno].numnewpasses = (OPJ_UINT32)opj_int_min((OPJ_INT32)(
+                            l_cblk->segs[l_segno].maxpasses - l_cblk->segs[l_segno].numpasses), n);
+                    bit_number = l_cblk->numlenbits + opj_uint_floorlog2(
+                                     l_cblk->segs[l_segno].numnewpasses);
+                    if (bit_number > 32) {
+                        opj_event_msg(p_manager, EVT_ERROR,
+                                      "Invalid bit number %d in opj_t2_read_packet_header()\n",
+                                      bit_number);
+                        opj_bio_destroy(l_bio);
+                        return OPJ_FALSE;
+                    }
+                    l_cblk->segs[l_segno].newlen = opj_bio_read(l_bio, bit_number);
+                    JAS_FPRINTF(stderr, "included=%d numnewpasses=%d increment=%d len=%d \n",
+                                l_included, l_cblk->segs[l_segno].numnewpasses, l_increment,
+                                l_cblk->segs[l_segno].newlen);
+
+                    n -= (OPJ_INT32)l_cblk->segs[l_segno].numnewpasses;
+                    if (n > 0) {
+                        ++l_segno;
+
+                        if (! opj_t2_init_seg(l_cblk, l_segno, p_tcp->tccps[p_pi->compno].cblksty, 0)) {
+                            opj_bio_destroy(l_bio);
+                            return OPJ_FALSE;
+                        }
+                    }
+                } while (n > 0);
 
             ++l_cblk;
         }
diff --git a/src/lib/openjp2/tcd.h b/src/lib/openjp2/tcd.h
index a89279d0f..340c2bf8a 100644
--- a/src/lib/openjp2/tcd.h
+++ b/src/lib/openjp2/tcd.h
@@ -122,12 +122,12 @@ typedef struct opj_tcd_cblk_dec {
     opj_tcd_seg_data_chunk_t* chunks; /* Array of chunks */
     /* position of the code-blocks : left upper corner (x0, y0) right low corner (x1,y1) */
     OPJ_INT32 x0, y0, x1, y1;
-    /* Mb is The maximum number of bit-planes available for the representation of 
-       coefficients in any sub-band, b, as defined in Equation (E-2). See 
+    /* Mb is The maximum number of bit-planes available for the representation of
+       coefficients in any sub-band, b, as defined in Equation (E-2). See
        Section B.10.5 of the standard */
     OPJ_UINT32 Mb;  /* currently used only to check if HT decoding is correct */
     /* numbps is Mb - P as defined in Section B.10.5 of the standard */
-    OPJ_UINT32 numbps;  
+    OPJ_UINT32 numbps;
     /* number of bits for len, for the current packet. Transitory value */
     OPJ_UINT32 numlenbits;
     /* number of pass added to the code-blocks, for the current packet. Transitory value */

From 4ebb7d294c72c34c84e8fa0511006dee6062e6fa Mon Sep 17 00:00:00 2001
From: Aous Naman <aous72@yahoo.com>
Date: Mon, 13 Sep 2021 12:20:15 +1000
Subject: [PATCH 04/10] This fixes the previous crash. Now, no codeblock
 decoding is performed if Mb == 0; that is, when the codeblock has no
 bitplanes to decode (K_max==0).  Also, a bug is fixed whereby the codeblock
 decoder can write below the last row of the codeblock when the number of rows
 is odd and larger than 2.

---
 src/lib/openjp2/fbc_dec.c | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/src/lib/openjp2/fbc_dec.c b/src/lib/openjp2/fbc_dec.c
index 0b52ca7d3..e7e2e1ba5 100644
--- a/src/lib/openjp2/fbc_dec.c
+++ b/src/lib/openjp2/fbc_dec.c
@@ -980,6 +980,10 @@ OPJ_UINT32 frwd_fetch(frwd_struct_t *msp)
 
 //************************************************************************/
 /** @brief Allocates T1 buffers
+  *
+  *  @param [in, out]  t1 is codeblock cofficients storage
+  *  @param [in]       w is codeblock width
+  *  @param [in]       h is codeblock height
   */
 static OPJ_BOOL opj_t1_allocate_buffers(
     opj_t1_t *t1,
@@ -1044,6 +1048,15 @@ static OPJ_BOOL opj_t1_allocate_buffers(
 //************************************************************************/
 /** @brief Decodes one codeblock, processing the cleanup, siginificance
   *         propagation, and magnitude refinement pass
+  *
+  *  @param [in, out]  t1 is codeblock cofficients storage
+  *  @param [in]       cblk is codeblock properties
+  *  @param [in]       orient is the subband to which the codeblock belongs (not needed)
+  *  @param [in]       roishift is region of interest shift
+  *  @param [in]       cblksty is codeblock style
+  *  @param [in]       p_manager is events print manager
+  *  @param [in]       p_manager_mutex a mutex to control access to p_manager
+  *  @param [in]       check_pterm: check termination (not used)
   */
 OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
                                opj_tcd_cblk_dec_t* cblk,
@@ -1085,7 +1098,7 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
     // We ignor orient, because the same decoder is used for all subbands
     // We also ignore check_pterm, because I am not sure how it applies
-    assert(cblksty == 0x40); // that is the only support mode
+    assert(cblksty == J2K_CCP_CBLKSTY_HT); // that is the only support mode
     if (roishift != 0) {
         if (p_manager_mutex) {
             opj_mutex_lock(p_manager_mutex);
@@ -1105,6 +1118,10 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
         return OPJ_FALSE;
     }
 
+    if (cblk->Mb == 0) {
+        return OPJ_TRUE;
+    }
+
     /* Even if we have a single chunk, in multi-threaded decoding */
     /* the insertion of our synthetic marker might potentially override */
     /* valid codestream of other codeblocks decoded in parallel. */
@@ -1457,8 +1474,8 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
             //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
             //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
             sp[0] = val | ((v_n + 2) << (p - 1));
-        } else if (locs & 0x1) { // if this is outside the codeblock, set the
-            sp[0] = 0;    // sample to zero
+        } else if (locs & 0x1) { // if this is inside the codeblock, set the
+            sp[0] = 0;           // sample to zero
         }
 
         if (qinf[0] & 0x20) { //sigma_n
@@ -1479,8 +1496,8 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
             t = lsp[0] & 0x7F;       // keep E^NW
             v_n = 32 - count_leading_zeros(v_n);
             lsp[0] = (OPJ_UINT8)(0x80 | (t > v_n ? t : v_n)); //max(E^NW, E^N) | s
-        } else if (locs & 0x2) { // if this is outside the codeblock, set the
-            sp[stride] = 0;    //no need to update line_state
+        } else if (locs & 0x2) { // if this is inside the codeblock, set the
+            sp[stride] = 0;      // sample to zero
         }
 
         ++lsp; // move to next quad information
@@ -1727,7 +1744,7 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
             if (x + 4 > width) {
                 locs >>= (x + 4 - width) << 1;
             }
-            locs = height > 1 ? locs : (locs & 0x55);
+            locs = y + 2 <= height ? locs : (locs & 0x55);
 
 
             if (qinf[0] & 0x10) { //sigma_n

From f5bf36cfbaa7603fba145e33a4f366070aaeb2df Mon Sep 17 00:00:00 2001
From: Even Rouault <even.rouault@spatialys.com>
Date: Wed, 15 Sep 2021 03:59:11 +0200
Subject: [PATCH 05/10] Add dummy parsing of HTJ2K CAP and CPF marker to avoid
 annoying warning (#1)

Add dummy parsing of HTJ2K CAP and CPF marker to avoid annoying warning.
---
 src/lib/openjp2/j2k.c | 82 +++++++++++++++++++++++++++++++++++++++++++
 src/lib/openjp2/j2k.h |  2 ++
 2 files changed, 84 insertions(+)

diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c
index 6586c7018..92c20415d 100644
--- a/src/lib/openjp2/j2k.c
+++ b/src/lib/openjp2/j2k.c
@@ -1177,6 +1177,32 @@ static OPJ_BOOL opj_j2k_read_cbd(opj_j2k_t *p_j2k,
                                  OPJ_UINT32 p_header_size,
                                  opj_event_mgr_t * p_manager);
 
+/**
+ * Reads a CAP marker (extended capabilities definition). Empty implementation. 
+ * Found in HTJ2K files
+ *
+ * @param       p_header_data   the data contained in the CAP box.
+ * @param       p_j2k                   the jpeg2000 codec.
+ * @param       p_header_size   the size of the data contained in the CAP marker.
+ * @param       p_manager               the user event manager.
+*/
+static OPJ_BOOL opj_j2k_read_cap(opj_j2k_t *p_j2k,
+                                 OPJ_BYTE * p_header_data,
+                                 OPJ_UINT32 p_header_size,
+                                 opj_event_mgr_t * p_manager);
+
+/**
+ * Reads a CPF marker (corresponding profile). Empty implementation. Found in HTJ2K files
+ * @param       p_header_data   the data contained in the CPF box.
+ * @param       p_j2k                   the jpeg2000 codec.
+ * @param       p_header_size   the size of the data contained in the CPF marker.
+ * @param       p_manager               the user event manager.
+*/
+static OPJ_BOOL opj_j2k_read_cpf(opj_j2k_t *p_j2k,
+                                 OPJ_BYTE * p_header_data,
+                                 OPJ_UINT32 p_header_size,
+                                 opj_event_mgr_t * p_manager);
+
 
 /**
  * Writes COC marker for each component.
@@ -1399,6 +1425,8 @@ static const opj_dec_memory_marker_handler_t j2k_memory_marker_handler_tab [] =
     {J2K_MS_COM, J2K_STATE_MH | J2K_STATE_TPH, opj_j2k_read_com},
     {J2K_MS_MCT, J2K_STATE_MH | J2K_STATE_TPH, opj_j2k_read_mct},
     {J2K_MS_CBD, J2K_STATE_MH, opj_j2k_read_cbd},
+    {J2K_MS_CAP, J2K_STATE_MH, opj_j2k_read_cap},
+    {J2K_MS_CPF, J2K_STATE_MH, opj_j2k_read_cpf},
     {J2K_MS_MCC, J2K_STATE_MH | J2K_STATE_TPH, opj_j2k_read_mcc},
     {J2K_MS_MCO, J2K_STATE_MH | J2K_STATE_TPH, opj_j2k_read_mco},
 #ifdef USE_JPWL
@@ -6594,6 +6622,60 @@ static OPJ_BOOL opj_j2k_read_cbd(opj_j2k_t *p_j2k,
     return OPJ_TRUE;
 }
 
+/**
+ * Reads a CAP marker (extended capabilities definition). Empty implementation. 
+ * Found in HTJ2K files.
+ * 
+ * @param       p_header_data   the data contained in the CAP box.
+ * @param       p_j2k                   the jpeg2000 codec.
+ * @param       p_header_size   the size of the data contained in the CAP marker.
+ * @param       p_manager               the user event manager.
+*/
+static OPJ_BOOL opj_j2k_read_cap(opj_j2k_t *p_j2k,
+                                 OPJ_BYTE * p_header_data,
+                                 OPJ_UINT32 p_header_size,
+                                 opj_event_mgr_t * p_manager
+                                )
+{
+    /* preconditions */
+    assert(p_header_data != 00);
+    assert(p_j2k != 00);
+    assert(p_manager != 00);
+
+    (void)p_j2k;
+    (void)p_header_data;
+    (void)p_header_size;
+    (void)p_manager;
+
+    return OPJ_TRUE;
+}
+
+/**
+ * Reads a CPF marker (corresponding profile). Empty implementation. Found in HTJ2K files
+ * @param       p_header_data   the data contained in the CPF box.
+ * @param       p_j2k                   the jpeg2000 codec.
+ * @param       p_header_size   the size of the data contained in the CPF marker.
+ * @param       p_manager               the user event manager.
+*/
+static OPJ_BOOL opj_j2k_read_cpf(opj_j2k_t *p_j2k,
+                                 OPJ_BYTE * p_header_data,
+                                 OPJ_UINT32 p_header_size,
+                                 opj_event_mgr_t * p_manager
+                                )
+{
+    /* preconditions */
+    assert(p_header_data != 00);
+    assert(p_j2k != 00);
+    assert(p_manager != 00);
+
+    (void)p_j2k;
+    (void)p_header_data;
+    (void)p_header_size;
+    (void)p_manager;
+
+    return OPJ_TRUE;
+}
+
 /* ----------------------------------------------------------------------- */
 /* J2K / JPT decoder interface                                             */
 /* ----------------------------------------------------------------------- */
diff --git a/src/lib/openjp2/j2k.h b/src/lib/openjp2/j2k.h
index 51e7c23e6..2b08e8407 100644
--- a/src/lib/openjp2/j2k.h
+++ b/src/lib/openjp2/j2k.h
@@ -73,9 +73,11 @@ The functions in J2K.C have for goal to read/write the several parts of the code
 #define J2K_MS_SOT 0xff90   /**< SOT marker value */
 #define J2K_MS_SOD 0xff93   /**< SOD marker value */
 #define J2K_MS_EOC 0xffd9   /**< EOC marker value */
+#define J2K_MS_CAP 0xff50   /**< CAP marker value */
 #define J2K_MS_SIZ 0xff51   /**< SIZ marker value */
 #define J2K_MS_COD 0xff52   /**< COD marker value */
 #define J2K_MS_COC 0xff53   /**< COC marker value */
+#define J2K_MS_CPF 0xff59   /**< CPF marker value */
 #define J2K_MS_RGN 0xff5e   /**< RGN marker value */
 #define J2K_MS_QCD 0xff5c   /**< QCD marker value */
 #define J2K_MS_QCC 0xff5d   /**< QCC marker value */

From fad594ede2e3482d2bbd0df822f56ff8760d1f4f Mon Sep 17 00:00:00 2001
From: Aous Naman <aous72@yahoo.com>
Date: Wed, 15 Sep 2021 20:31:00 +1000
Subject: [PATCH 06/10] This is useful to explain the various terms in the
 code.  It took me sometime to understand the quantities and what they mean,
 and therefore I want to leave a version of it for future work.  I will
 simplify the code after this.

---
 src/lib/openjp2/fbc_dec.c | 81 ++++++++++++++++++++++++++-------------
 1 file changed, 54 insertions(+), 27 deletions(-)

diff --git a/src/lib/openjp2/fbc_dec.c b/src/lib/openjp2/fbc_dec.c
index e7e2e1ba5..e62ebea76 100644
--- a/src/lib/openjp2/fbc_dec.c
+++ b/src/lib/openjp2/fbc_dec.c
@@ -56,14 +56,21 @@
 #endif
 
 //************************************************************************/
-/** @brief Displays the error message for disabling the decoding of CUP
-  *        pass due to insufficient precision once
+/** @brief Displays the error message when 32 bits are not sufficient to
+  * decode any passes
   */
 static OPJ_BOOL cannot_decode_due_to_insufficient_precision = OPJ_FALSE;
 
+//************************************************************************/
+/** @brief Displays the error message when we do not have enough precision
+  * to decode the cleanup pass and set the bin center to 1.  The code can
+  * be modified to support this case.
+  */
+static OPJ_BOOL modify_code_to_support_this_precision = OPJ_FALSE;
+
 //************************************************************************/
 /** @brief Displays the error message for disabling the decoding of SPP and
-  *        MRP passes once
+  * MRP passes
   */
 static OPJ_BOOL cannot_decode_spp_mrp_msg = OPJ_FALSE;
 
@@ -1070,6 +1077,7 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
     OPJ_BYTE* cblkdata = NULL;
     OPJ_UINT8* coded_data;
     OPJ_UINT32* decoded_data;
+    OPJ_UINT32 missing_msbs;
     OPJ_UINT32 num_passes;
     OPJ_UINT32 lengths1;
     OPJ_UINT32 lengths2;
@@ -1122,6 +1130,9 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
         return OPJ_TRUE;
     }
 
+    /* Mb = Kmax, numbps = Kmax + 1 - missing_msbs */
+    missing_msbs = (cblk->Mb + 1) - cblk->numbps;
+
     /* Even if we have a single chunk, in multi-threaded decoding */
     /* the insertion of our synthetic marker might potentially override */
     /* valid codestream of other codeblocks decoded in parallel. */
@@ -1231,47 +1242,62 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
         return OPJ_FALSE;
     }
 
-    if (cblk->numbps == 1 && num_passes > 1) {
-        // We do not have enough precision to decode SgnProp nor MagRef passes.
-        // We decode the cleanup passes only
-        if (cannot_decode_spp_mrp_msg == OPJ_FALSE) {
+    if (missing_msbs > 30) {
+        /* We do not have enough precision to decode any passes */
+        if (cannot_decode_due_to_insufficient_precision == OPJ_FALSE) {
             if (p_manager_mutex) {
                 opj_mutex_lock(p_manager_mutex);
             }
-            cannot_decode_spp_mrp_msg = OPJ_TRUE;
-            opj_event_msg(p_manager, EVT_WARNING, "Not enough precision to decode "
-                          "the SgnProp nor MagRef passes.  This message "
+            cannot_decode_due_to_insufficient_precision = OPJ_TRUE;
+            opj_event_msg(p_manager, EVT_ERROR, "32 bits are not enough to "
+                          "decode this codeblock. This message "
                           "will not be displayed again.\n");
             if (p_manager_mutex) {
                 opj_mutex_unlock(p_manager_mutex);
             }
         }
-        num_passes = 1;
-    }
-    if (cblk->numbps == 0) {
-        // We do not have enough precision to decode the CUP pass with the
-        // center of bin bit set.  The code can be modified to support this
-        // case, without using the center of the bin.
-        if (cannot_decode_due_to_insufficient_precision == OPJ_FALSE) {
+        return OPJ_FALSE;
+    } else if (missing_msbs == 30) {
+        /* We do not have enough precision to decode the CUP pass with the
+           center of bin bit set.  The code can be modified to support this
+           case, where we do not set the center of the bin. */
+        if (modify_code_to_support_this_precision == OPJ_FALSE) {
             if (p_manager_mutex) {
                 opj_mutex_lock(p_manager_mutex);
             }
-            cannot_decode_due_to_insufficient_precision = OPJ_TRUE;
-            opj_event_msg(p_manager, EVT_WARNING, "Not enough precision to decode "
-                          "the cleanup pass. The code should be "
-                          "modified to support this case. This message "
-                          "will not be displayed again.\n");
+            modify_code_to_support_this_precision = OPJ_TRUE;
+            opj_event_msg(p_manager, EVT_ERROR, "Not enough precision to decode "
+                          "the cleanup pass. The code can be modified to "
+                          "support this case. This message will not be "
+                          "displayed again.\n");
             if (p_manager_mutex) {
                 opj_mutex_unlock(p_manager_mutex);
             }
         }
-        return OPJ_TRUE;
+        return OPJ_FALSE;
+    } else if (missing_msbs == 29) { /* if p is 1, then num_passes must be 1 */
+        if (num_passes > 1) {
+            num_passes = 1;
+            if (cannot_decode_spp_mrp_msg == OPJ_FALSE) {
+                if (p_manager_mutex) {
+                    opj_mutex_lock(p_manager_mutex);
+                }
+                cannot_decode_spp_mrp_msg = OPJ_TRUE;
+                opj_event_msg(p_manager, EVT_WARNING, "Not enough precision to decode "
+                              "the SgnProp nor MagRef passes, which will be skipped. "
+                              "This message will not be displayed again.\n");
+                if (p_manager_mutex) {
+                    opj_mutex_unlock(p_manager_mutex);
+                }
+            }
+        }
     }
 
-    // OPJ_UINT32
-    p = cblk->numbps;
+    /* OPJ_UINT32 */
+    p = 30 - missing_msbs;
+
     // OPJ_UINT32 zero planes plus 1
-    zero_planes_p1 = cblk->Mb - cblk->numbps + 1;
+    zero_planes_p1 = missing_msbs + 1;
 
     // read scup and fix the bytes there
     lcup = (int)lengths1;  // length of CUP
@@ -2502,10 +2528,11 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
     {
         OPJ_INT32 x, y;
+        OPJ_UINT32 shift = 29u - cblk->Mb;
         for (y = 0; y < height; ++y) {
             OPJ_INT32* sp = (OPJ_INT32*)decoded_data + y * stride;
             for (x = 0; x < width; ++x, ++sp) {
-                OPJ_INT32 val = (*sp & 0x7FFFFFFF);
+                OPJ_INT32 val = (*sp & 0x7FFFFFFF) >> shift;
                 *sp = ((OPJ_UINT32) * sp & 0x80000000) ? -val : val;
             }
         }

From 7c754ed468d5cf7b00db6620d509a372996c8722 Mon Sep 17 00:00:00 2001
From: Aous Naman <aous72@yahoo.com>
Date: Thu, 16 Sep 2021 08:55:36 +1000
Subject: [PATCH 07/10] This should fix the code_style failure, I hope.

---
 src/lib/openjp2/j2k.c                 |    6 +-
 src/lib/openjp2/t1_ht_generate_luts.c | 1710 +++++++++++++------------
 2 files changed, 861 insertions(+), 855 deletions(-)

diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c
index 92c20415d..b10cd5ac9 100644
--- a/src/lib/openjp2/j2k.c
+++ b/src/lib/openjp2/j2k.c
@@ -1178,7 +1178,7 @@ static OPJ_BOOL opj_j2k_read_cbd(opj_j2k_t *p_j2k,
                                  opj_event_mgr_t * p_manager);
 
 /**
- * Reads a CAP marker (extended capabilities definition). Empty implementation. 
+ * Reads a CAP marker (extended capabilities definition). Empty implementation.
  * Found in HTJ2K files
  *
  * @param       p_header_data   the data contained in the CAP box.
@@ -6623,9 +6623,9 @@ static OPJ_BOOL opj_j2k_read_cbd(opj_j2k_t *p_j2k,
 }
 
 /**
- * Reads a CAP marker (extended capabilities definition). Empty implementation. 
+ * Reads a CAP marker (extended capabilities definition). Empty implementation.
  * Found in HTJ2K files.
- * 
+ *
  * @param       p_header_data   the data contained in the CAP box.
  * @param       p_j2k                   the jpeg2000 codec.
  * @param       p_header_size   the size of the data contained in the CAP marker.
diff --git a/src/lib/openjp2/t1_ht_generate_luts.c b/src/lib/openjp2/t1_ht_generate_luts.c
index f759cb75d..3fd14eb96 100644
--- a/src/lib/openjp2/t1_ht_generate_luts.c
+++ b/src/lib/openjp2/t1_ht_generate_luts.c
@@ -2,21 +2,21 @@
 // This software is released under the 2-Clause BSD license, included
 // below.
 //
-// Copyright (c) 2021, Aous Naman 
+// Copyright (c) 2021, Aous Naman
 // Copyright (c) 2021, Kakadu Software Pty Ltd, Australia
 // Copyright (c) 2021, The University of New South Wales, Australia
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
-// 
+//
 // 1. Redistributions of source code must retain the above copyright
 // notice, this list of conditions and the following disclaimer.
-// 
+//
 // 2. Redistributions in binary form must reproduce the above copyright
 // notice, this list of conditions and the following disclaimer in the
 // documentation and/or other materials provided with the distribution.
-// 
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
@@ -65,824 +65,826 @@ typedef uint64_t OPJ_UINT64;
   *   cwd VLC codeword
   *   cwd VLC codeword length
   */
-typedef struct vlc_src_table { int c_q, rho, u_off, e_k, e_1, cwd, cwd_len;}
+typedef struct vlc_src_table {
+    int c_q, rho, u_off, e_k, e_1, cwd, cwd_len;
+}
 vlc_src_table_t;
 
 // initial quad rows
 static vlc_src_table_t tbl0[] = {
-  {0, 0x1, 0x0, 0x0, 0x0, 0x06, 4},
-  {0, 0x1, 0x1, 0x1, 0x1, 0x3F, 7},
-  {0, 0x2, 0x0, 0x0, 0x0, 0x00, 3},
-  {0, 0x2, 0x1, 0x2, 0x2, 0x7F, 7},
-  {0, 0x3, 0x0, 0x0, 0x0, 0x11, 5},
-  {0, 0x3, 0x1, 0x2, 0x2, 0x5F, 7},
-  {0, 0x3, 0x1, 0x3, 0x1, 0x1F, 7},
-  {0, 0x4, 0x0, 0x0, 0x0, 0x02, 3},
-  {0, 0x4, 0x1, 0x4, 0x4, 0x13, 6},
-  {0, 0x5, 0x0, 0x0, 0x0, 0x0E, 5},
-  {0, 0x5, 0x1, 0x4, 0x4, 0x23, 6},
-  {0, 0x5, 0x1, 0x5, 0x1, 0x0F, 7},
-  {0, 0x6, 0x0, 0x0, 0x0, 0x03, 6},
-  {0, 0x6, 0x1, 0x0, 0x0, 0x6F, 7},
-  {0, 0x7, 0x0, 0x0, 0x0, 0x2F, 7},
-  {0, 0x7, 0x1, 0x2, 0x2, 0x4F, 7},
-  {0, 0x7, 0x1, 0x2, 0x0, 0x0D, 6},
-  {0, 0x8, 0x0, 0x0, 0x0, 0x04, 3},
-  {0, 0x8, 0x1, 0x8, 0x8, 0x3D, 6},
-  {0, 0x9, 0x0, 0x0, 0x0, 0x1D, 6},
-  {0, 0x9, 0x1, 0x0, 0x0, 0x2D, 6},
-  {0, 0xA, 0x0, 0x0, 0x0, 0x01, 5},
-  {0, 0xA, 0x1, 0x8, 0x8, 0x35, 6},
-  {0, 0xA, 0x1, 0xA, 0x2, 0x77, 7},
-  {0, 0xB, 0x0, 0x0, 0x0, 0x37, 7},
-  {0, 0xB, 0x1, 0x1, 0x1, 0x57, 7},
-  {0, 0xB, 0x1, 0x1, 0x0, 0x09, 6},
-  {0, 0xC, 0x0, 0x0, 0x0, 0x1E, 5},
-  {0, 0xC, 0x1, 0xC, 0xC, 0x17, 7},
-  {0, 0xC, 0x1, 0xC, 0x4, 0x15, 6},
-  {0, 0xC, 0x1, 0xC, 0x8, 0x25, 6},
-  {0, 0xD, 0x0, 0x0, 0x0, 0x67, 7},
-  {0, 0xD, 0x1, 0x1, 0x1, 0x27, 7},
-  {0, 0xD, 0x1, 0x5, 0x4, 0x47, 7},
-  {0, 0xD, 0x1, 0xD, 0x8, 0x07, 7},
-  {0, 0xE, 0x0, 0x0, 0x0, 0x7B, 7},
-  {0, 0xE, 0x1, 0x2, 0x2, 0x4B, 7},
-  {0, 0xE, 0x1, 0xA, 0x8, 0x05, 6},
-  {0, 0xE, 0x1, 0xE, 0x4, 0x3B, 7},
-  {0, 0xF, 0x0, 0x0, 0x0, 0x5B, 7},
-  {0, 0xF, 0x1, 0x9, 0x9, 0x1B, 7},
-  {0, 0xF, 0x1, 0xB, 0xA, 0x6B, 7},
-  {0, 0xF, 0x1, 0xF, 0xC, 0x2B, 7},
-  {0, 0xF, 0x1, 0xF, 0x8, 0x39, 6},
-  {0, 0xF, 0x1, 0xE, 0x6, 0x73, 7},
-  {0, 0xF, 0x1, 0xE, 0x2, 0x19, 6},
-  {0, 0xF, 0x1, 0xF, 0x5, 0x0B, 7},
-  {0, 0xF, 0x1, 0xF, 0x4, 0x29, 6},
-  {0, 0xF, 0x1, 0xF, 0x1, 0x33, 7},
-  {1, 0x0, 0x0, 0x0, 0x0, 0x00, 2},
-  {1, 0x1, 0x0, 0x0, 0x0, 0x0E, 4},
-  {1, 0x1, 0x1, 0x1, 0x1, 0x1F, 7},
-  {1, 0x2, 0x0, 0x0, 0x0, 0x06, 4},
-  {1, 0x2, 0x1, 0x2, 0x2, 0x3B, 6},
-  {1, 0x3, 0x0, 0x0, 0x0, 0x1B, 6},
-  {1, 0x3, 0x1, 0x0, 0x0, 0x3D, 6},
-  {1, 0x4, 0x0, 0x0, 0x0, 0x0A, 4},
-  {1, 0x4, 0x1, 0x4, 0x4, 0x2B, 6},
-  {1, 0x5, 0x0, 0x0, 0x0, 0x0B, 6},
-  {1, 0x5, 0x1, 0x4, 0x4, 0x33, 6},
-  {1, 0x5, 0x1, 0x5, 0x1, 0x7F, 7},
-  {1, 0x6, 0x0, 0x0, 0x0, 0x13, 6},
-  {1, 0x6, 0x1, 0x0, 0x0, 0x23, 6},
-  {1, 0x7, 0x0, 0x0, 0x0, 0x3F, 7},
-  {1, 0x7, 0x1, 0x2, 0x2, 0x5F, 7},
-  {1, 0x7, 0x1, 0x2, 0x0, 0x03, 6},
-  {1, 0x8, 0x0, 0x0, 0x0, 0x02, 4},
-  {1, 0x8, 0x1, 0x8, 0x8, 0x1D, 6},
-  {1, 0x9, 0x0, 0x0, 0x0, 0x2D, 6},
-  {1, 0x9, 0x1, 0x0, 0x0, 0x0D, 6},
-  {1, 0xA, 0x0, 0x0, 0x0, 0x35, 6},
-  {1, 0xA, 0x1, 0x8, 0x8, 0x15, 6},
-  {1, 0xA, 0x1, 0xA, 0x2, 0x6F, 7},
-  {1, 0xB, 0x0, 0x0, 0x0, 0x2F, 7},
-  {1, 0xB, 0x1, 0x1, 0x1, 0x4F, 7},
-  {1, 0xB, 0x1, 0x1, 0x0, 0x11, 6},
-  {1, 0xC, 0x0, 0x0, 0x0, 0x01, 5},
-  {1, 0xC, 0x1, 0x8, 0x8, 0x25, 6},
-  {1, 0xC, 0x1, 0xC, 0x4, 0x05, 6},
-  {1, 0xD, 0x0, 0x0, 0x0, 0x0F, 7},
-  {1, 0xD, 0x1, 0x1, 0x1, 0x17, 7},
-  {1, 0xD, 0x1, 0x5, 0x4, 0x39, 6},
-  {1, 0xD, 0x1, 0xD, 0x8, 0x77, 7},
-  {1, 0xE, 0x0, 0x0, 0x0, 0x37, 7},
-  {1, 0xE, 0x1, 0x2, 0x2, 0x57, 7},
-  {1, 0xE, 0x1, 0xA, 0x8, 0x19, 6},
-  {1, 0xE, 0x1, 0xE, 0x4, 0x67, 7},
-  {1, 0xF, 0x0, 0x0, 0x0, 0x07, 7},
-  {1, 0xF, 0x1, 0xB, 0x8, 0x29, 6},
-  {1, 0xF, 0x1, 0x8, 0x8, 0x27, 7},
-  {1, 0xF, 0x1, 0xA, 0x2, 0x09, 6},
-  {1, 0xF, 0x1, 0xE, 0x4, 0x31, 6},
-  {1, 0xF, 0x1, 0xF, 0x1, 0x47, 7},
-  {2, 0x0, 0x0, 0x0, 0x0, 0x00, 2},
-  {2, 0x1, 0x0, 0x0, 0x0, 0x0E, 4},
-  {2, 0x1, 0x1, 0x1, 0x1, 0x1B, 6},
-  {2, 0x2, 0x0, 0x0, 0x0, 0x06, 4},
-  {2, 0x2, 0x1, 0x2, 0x2, 0x3F, 7},
-  {2, 0x3, 0x0, 0x0, 0x0, 0x2B, 6},
-  {2, 0x3, 0x1, 0x1, 0x1, 0x33, 6},
-  {2, 0x3, 0x1, 0x3, 0x2, 0x7F, 7},
-  {2, 0x4, 0x0, 0x0, 0x0, 0x0A, 4},
-  {2, 0x4, 0x1, 0x4, 0x4, 0x0B, 6},
-  {2, 0x5, 0x0, 0x0, 0x0, 0x01, 5},
-  {2, 0x5, 0x1, 0x5, 0x5, 0x2F, 7},
-  {2, 0x5, 0x1, 0x5, 0x1, 0x13, 6},
-  {2, 0x5, 0x1, 0x5, 0x4, 0x23, 6},
-  {2, 0x6, 0x0, 0x0, 0x0, 0x03, 6},
-  {2, 0x6, 0x1, 0x0, 0x0, 0x5F, 7},
-  {2, 0x7, 0x0, 0x0, 0x0, 0x1F, 7},
-  {2, 0x7, 0x1, 0x2, 0x2, 0x6F, 7},
-  {2, 0x7, 0x1, 0x3, 0x1, 0x11, 6},
-  {2, 0x7, 0x1, 0x7, 0x4, 0x37, 7},
-  {2, 0x8, 0x0, 0x0, 0x0, 0x02, 4},
-  {2, 0x8, 0x1, 0x8, 0x8, 0x4F, 7},
-  {2, 0x9, 0x0, 0x0, 0x0, 0x3D, 6},
-  {2, 0x9, 0x1, 0x0, 0x0, 0x1D, 6},
-  {2, 0xA, 0x0, 0x0, 0x0, 0x2D, 6},
-  {2, 0xA, 0x1, 0x0, 0x0, 0x0D, 6},
-  {2, 0xB, 0x0, 0x0, 0x0, 0x0F, 7},
-  {2, 0xB, 0x1, 0x2, 0x2, 0x77, 7},
-  {2, 0xB, 0x1, 0x2, 0x0, 0x35, 6},
-  {2, 0xC, 0x0, 0x0, 0x0, 0x15, 6},
-  {2, 0xC, 0x1, 0x4, 0x4, 0x25, 6},
-  {2, 0xC, 0x1, 0xC, 0x8, 0x57, 7},
-  {2, 0xD, 0x0, 0x0, 0x0, 0x17, 7},
-  {2, 0xD, 0x1, 0x8, 0x8, 0x05, 6},
-  {2, 0xD, 0x1, 0xC, 0x4, 0x39, 6},
-  {2, 0xD, 0x1, 0xD, 0x1, 0x67, 7},
-  {2, 0xE, 0x0, 0x0, 0x0, 0x27, 7},
-  {2, 0xE, 0x1, 0x2, 0x2, 0x7B, 7},
-  {2, 0xE, 0x1, 0x2, 0x0, 0x19, 6},
-  {2, 0xF, 0x0, 0x0, 0x0, 0x47, 7},
-  {2, 0xF, 0x1, 0xF, 0x1, 0x29, 6},
-  {2, 0xF, 0x1, 0x1, 0x1, 0x09, 6},
-  {2, 0xF, 0x1, 0x3, 0x2, 0x07, 7},
-  {2, 0xF, 0x1, 0x7, 0x4, 0x31, 6},
-  {2, 0xF, 0x1, 0xF, 0x8, 0x3B, 7},
-  {3, 0x0, 0x0, 0x0, 0x0, 0x00, 3},
-  {3, 0x1, 0x0, 0x0, 0x0, 0x04, 4},
-  {3, 0x1, 0x1, 0x1, 0x1, 0x3D, 6},
-  {3, 0x2, 0x0, 0x0, 0x0, 0x0C, 5},
-  {3, 0x2, 0x1, 0x2, 0x2, 0x4F, 7},
-  {3, 0x3, 0x0, 0x0, 0x0, 0x1D, 6},
-  {3, 0x3, 0x1, 0x1, 0x1, 0x05, 6},
-  {3, 0x3, 0x1, 0x3, 0x2, 0x7F, 7},
-  {3, 0x4, 0x0, 0x0, 0x0, 0x16, 5},
-  {3, 0x4, 0x1, 0x4, 0x4, 0x2D, 6},
-  {3, 0x5, 0x0, 0x0, 0x0, 0x06, 5},
-  {3, 0x5, 0x1, 0x5, 0x5, 0x1A, 5},
-  {3, 0x5, 0x1, 0x5, 0x1, 0x0D, 6},
-  {3, 0x5, 0x1, 0x5, 0x4, 0x35, 6},
-  {3, 0x6, 0x0, 0x0, 0x0, 0x3F, 7},
-  {3, 0x6, 0x1, 0x4, 0x4, 0x5F, 7},
-  {3, 0x6, 0x1, 0x6, 0x2, 0x1F, 7},
-  {3, 0x7, 0x0, 0x0, 0x0, 0x6F, 7},
-  {3, 0x7, 0x1, 0x6, 0x6, 0x2F, 7},
-  {3, 0x7, 0x1, 0x6, 0x4, 0x15, 6},
-  {3, 0x7, 0x1, 0x7, 0x3, 0x77, 7},
-  {3, 0x7, 0x1, 0x7, 0x1, 0x25, 6},
-  {3, 0x7, 0x1, 0x7, 0x2, 0x0F, 7},
-  {3, 0x8, 0x0, 0x0, 0x0, 0x0A, 5},
-  {3, 0x8, 0x1, 0x8, 0x8, 0x07, 7},
-  {3, 0x9, 0x0, 0x0, 0x0, 0x39, 6},
-  {3, 0x9, 0x1, 0x1, 0x1, 0x37, 7},
-  {3, 0x9, 0x1, 0x9, 0x8, 0x57, 7},
-  {3, 0xA, 0x0, 0x0, 0x0, 0x19, 6},
-  {3, 0xA, 0x1, 0x8, 0x8, 0x29, 6},
-  {3, 0xA, 0x1, 0xA, 0x2, 0x17, 7},
-  {3, 0xB, 0x0, 0x0, 0x0, 0x67, 7},
-  {3, 0xB, 0x1, 0xB, 0x1, 0x27, 7},
-  {3, 0xB, 0x1, 0x1, 0x1, 0x47, 7},
-  {3, 0xB, 0x1, 0x3, 0x2, 0x09, 6},
-  {3, 0xB, 0x1, 0xB, 0x8, 0x7B, 7},
-  {3, 0xC, 0x0, 0x0, 0x0, 0x31, 6},
-  {3, 0xC, 0x1, 0x4, 0x4, 0x11, 6},
-  {3, 0xC, 0x1, 0xC, 0x8, 0x3B, 7},
-  {3, 0xD, 0x0, 0x0, 0x0, 0x5B, 7},
-  {3, 0xD, 0x1, 0x9, 0x9, 0x1B, 7},
-  {3, 0xD, 0x1, 0xD, 0x5, 0x2B, 7},
-  {3, 0xD, 0x1, 0xD, 0x1, 0x21, 6},
-  {3, 0xD, 0x1, 0xD, 0xC, 0x6B, 7},
-  {3, 0xD, 0x1, 0xD, 0x4, 0x01, 6},
-  {3, 0xD, 0x1, 0xD, 0x8, 0x4B, 7},
-  {3, 0xE, 0x0, 0x0, 0x0, 0x0B, 7},
-  {3, 0xE, 0x1, 0xE, 0x4, 0x73, 7},
-  {3, 0xE, 0x1, 0x4, 0x4, 0x13, 7},
-  {3, 0xE, 0x1, 0xC, 0x8, 0x3E, 6},
-  {3, 0xE, 0x1, 0xE, 0x2, 0x33, 7},
-  {3, 0xF, 0x0, 0x0, 0x0, 0x53, 7},
-  {3, 0xF, 0x1, 0xA, 0xA, 0x0E, 6},
-  {3, 0xF, 0x1, 0xB, 0x9, 0x63, 7},
-  {3, 0xF, 0x1, 0xF, 0xC, 0x03, 7},
-  {3, 0xF, 0x1, 0xF, 0x8, 0x12, 5},
-  {3, 0xF, 0x1, 0xE, 0x6, 0x23, 7},
-  {3, 0xF, 0x1, 0xF, 0x5, 0x1E, 6},
-  {3, 0xF, 0x1, 0xF, 0x4, 0x02, 5},
-  {3, 0xF, 0x1, 0xF, 0x3, 0x43, 7},
-  {3, 0xF, 0x1, 0xF, 0x1, 0x1C, 5},
-  {3, 0xF, 0x1, 0xF, 0x2, 0x2E, 6},
-  {4, 0x0, 0x0, 0x0, 0x0, 0x00, 2},
-  {4, 0x1, 0x0, 0x0, 0x0, 0x0E, 4},
-  {4, 0x1, 0x1, 0x1, 0x1, 0x3F, 7},
-  {4, 0x2, 0x0, 0x0, 0x0, 0x06, 4},
-  {4, 0x2, 0x1, 0x2, 0x2, 0x1B, 6},
-  {4, 0x3, 0x0, 0x0, 0x0, 0x2B, 6},
-  {4, 0x3, 0x1, 0x2, 0x2, 0x3D, 6},
-  {4, 0x3, 0x1, 0x3, 0x1, 0x7F, 7},
-  {4, 0x4, 0x0, 0x0, 0x0, 0x0A, 4},
-  {4, 0x4, 0x1, 0x4, 0x4, 0x5F, 7},
-  {4, 0x5, 0x0, 0x0, 0x0, 0x0B, 6},
-  {4, 0x5, 0x1, 0x0, 0x0, 0x33, 6},
-  {4, 0x6, 0x0, 0x0, 0x0, 0x13, 6},
-  {4, 0x6, 0x1, 0x0, 0x0, 0x23, 6},
-  {4, 0x7, 0x0, 0x0, 0x0, 0x1F, 7},
-  {4, 0x7, 0x1, 0x4, 0x4, 0x6F, 7},
-  {4, 0x7, 0x1, 0x4, 0x0, 0x03, 6},
-  {4, 0x8, 0x0, 0x0, 0x0, 0x02, 4},
-  {4, 0x8, 0x1, 0x8, 0x8, 0x1D, 6},
-  {4, 0x9, 0x0, 0x0, 0x0, 0x11, 6},
-  {4, 0x9, 0x1, 0x0, 0x0, 0x77, 7},
-  {4, 0xA, 0x0, 0x0, 0x0, 0x01, 5},
-  {4, 0xA, 0x1, 0xA, 0xA, 0x2F, 7},
-  {4, 0xA, 0x1, 0xA, 0x2, 0x2D, 6},
-  {4, 0xA, 0x1, 0xA, 0x8, 0x0D, 6},
-  {4, 0xB, 0x0, 0x0, 0x0, 0x4F, 7},
-  {4, 0xB, 0x1, 0xB, 0x2, 0x0F, 7},
-  {4, 0xB, 0x1, 0x0, 0x0, 0x35, 6},
-  {4, 0xC, 0x0, 0x0, 0x0, 0x15, 6},
-  {4, 0xC, 0x1, 0x8, 0x8, 0x25, 6},
-  {4, 0xC, 0x1, 0xC, 0x4, 0x37, 7},
-  {4, 0xD, 0x0, 0x0, 0x0, 0x57, 7},
-  {4, 0xD, 0x1, 0x1, 0x1, 0x07, 7},
-  {4, 0xD, 0x1, 0x1, 0x0, 0x05, 6},
-  {4, 0xE, 0x0, 0x0, 0x0, 0x17, 7},
-  {4, 0xE, 0x1, 0x4, 0x4, 0x39, 6},
-  {4, 0xE, 0x1, 0xC, 0x8, 0x19, 6},
-  {4, 0xE, 0x1, 0xE, 0x2, 0x67, 7},
-  {4, 0xF, 0x0, 0x0, 0x0, 0x27, 7},
-  {4, 0xF, 0x1, 0x9, 0x9, 0x47, 7},
-  {4, 0xF, 0x1, 0x9, 0x1, 0x29, 6},
-  {4, 0xF, 0x1, 0x7, 0x6, 0x7B, 7},
-  {4, 0xF, 0x1, 0x7, 0x2, 0x09, 6},
-  {4, 0xF, 0x1, 0xB, 0x8, 0x31, 6},
-  {4, 0xF, 0x1, 0xF, 0x4, 0x3B, 7},
-  {5, 0x0, 0x0, 0x0, 0x0, 0x00, 3},
-  {5, 0x1, 0x0, 0x0, 0x0, 0x1A, 5},
-  {5, 0x1, 0x1, 0x1, 0x1, 0x7F, 7},
-  {5, 0x2, 0x0, 0x0, 0x0, 0x0A, 5},
-  {5, 0x2, 0x1, 0x2, 0x2, 0x1D, 6},
-  {5, 0x3, 0x0, 0x0, 0x0, 0x2D, 6},
-  {5, 0x3, 0x1, 0x3, 0x3, 0x5F, 7},
-  {5, 0x3, 0x1, 0x3, 0x2, 0x39, 6},
-  {5, 0x3, 0x1, 0x3, 0x1, 0x3F, 7},
-  {5, 0x4, 0x0, 0x0, 0x0, 0x12, 5},
-  {5, 0x4, 0x1, 0x4, 0x4, 0x1F, 7},
-  {5, 0x5, 0x0, 0x0, 0x0, 0x0D, 6},
-  {5, 0x5, 0x1, 0x4, 0x4, 0x35, 6},
-  {5, 0x5, 0x1, 0x5, 0x1, 0x6F, 7},
-  {5, 0x6, 0x0, 0x0, 0x0, 0x15, 6},
-  {5, 0x6, 0x1, 0x2, 0x2, 0x25, 6},
-  {5, 0x6, 0x1, 0x6, 0x4, 0x2F, 7},
-  {5, 0x7, 0x0, 0x0, 0x0, 0x4F, 7},
-  {5, 0x7, 0x1, 0x6, 0x6, 0x57, 7},
-  {5, 0x7, 0x1, 0x6, 0x4, 0x05, 6},
-  {5, 0x7, 0x1, 0x7, 0x3, 0x0F, 7},
-  {5, 0x7, 0x1, 0x7, 0x2, 0x77, 7},
-  {5, 0x7, 0x1, 0x7, 0x1, 0x37, 7},
-  {5, 0x8, 0x0, 0x0, 0x0, 0x02, 5},
-  {5, 0x8, 0x1, 0x8, 0x8, 0x19, 6},
-  {5, 0x9, 0x0, 0x0, 0x0, 0x26, 6},
-  {5, 0x9, 0x1, 0x8, 0x8, 0x17, 7},
-  {5, 0x9, 0x1, 0x9, 0x1, 0x67, 7},
-  {5, 0xA, 0x0, 0x0, 0x0, 0x1C, 5},
-  {5, 0xA, 0x1, 0xA, 0xA, 0x29, 6},
-  {5, 0xA, 0x1, 0xA, 0x2, 0x09, 6},
-  {5, 0xA, 0x1, 0xA, 0x8, 0x31, 6},
-  {5, 0xB, 0x0, 0x0, 0x0, 0x27, 7},
-  {5, 0xB, 0x1, 0x9, 0x9, 0x07, 7},
-  {5, 0xB, 0x1, 0x9, 0x8, 0x11, 6},
-  {5, 0xB, 0x1, 0xB, 0x3, 0x47, 7},
-  {5, 0xB, 0x1, 0xB, 0x2, 0x21, 6},
-  {5, 0xB, 0x1, 0xB, 0x1, 0x7B, 7},
-  {5, 0xC, 0x0, 0x0, 0x0, 0x01, 6},
-  {5, 0xC, 0x1, 0x8, 0x8, 0x3E, 6},
-  {5, 0xC, 0x1, 0xC, 0x4, 0x3B, 7},
-  {5, 0xD, 0x0, 0x0, 0x0, 0x5B, 7},
-  {5, 0xD, 0x1, 0x9, 0x9, 0x6B, 7},
-  {5, 0xD, 0x1, 0x9, 0x8, 0x1E, 6},
-  {5, 0xD, 0x1, 0xD, 0x5, 0x1B, 7},
-  {5, 0xD, 0x1, 0xD, 0x4, 0x2E, 6},
-  {5, 0xD, 0x1, 0xD, 0x1, 0x2B, 7},
-  {5, 0xE, 0x0, 0x0, 0x0, 0x4B, 7},
-  {5, 0xE, 0x1, 0x6, 0x6, 0x0B, 7},
-  {5, 0xE, 0x1, 0xE, 0xA, 0x33, 7},
-  {5, 0xE, 0x1, 0xE, 0x2, 0x0E, 6},
-  {5, 0xE, 0x1, 0xE, 0xC, 0x73, 7},
-  {5, 0xE, 0x1, 0xE, 0x8, 0x36, 6},
-  {5, 0xE, 0x1, 0xE, 0x4, 0x53, 7},
-  {5, 0xF, 0x0, 0x0, 0x0, 0x13, 7},
-  {5, 0xF, 0x1, 0x7, 0x7, 0x43, 7},
-  {5, 0xF, 0x1, 0x7, 0x6, 0x16, 6},
-  {5, 0xF, 0x1, 0x7, 0x5, 0x63, 7},
-  {5, 0xF, 0x1, 0xF, 0xC, 0x23, 7},
-  {5, 0xF, 0x1, 0xF, 0x4, 0x0C, 5},
-  {5, 0xF, 0x1, 0xD, 0x9, 0x03, 7},
-  {5, 0xF, 0x1, 0xF, 0xA, 0x3D, 7},
-  {5, 0xF, 0x1, 0xF, 0x8, 0x14, 5},
-  {5, 0xF, 0x1, 0xF, 0x3, 0x7D, 7},
-  {5, 0xF, 0x1, 0xF, 0x2, 0x04, 5},
-  {5, 0xF, 0x1, 0xF, 0x1, 0x06, 6},
-  {6, 0x0, 0x0, 0x0, 0x0, 0x00, 3},
-  {6, 0x1, 0x0, 0x0, 0x0, 0x04, 4},
-  {6, 0x1, 0x1, 0x1, 0x1, 0x03, 6},
-  {6, 0x2, 0x0, 0x0, 0x0, 0x0C, 5},
-  {6, 0x2, 0x1, 0x2, 0x2, 0x0D, 6},
-  {6, 0x3, 0x0, 0x0, 0x0, 0x1A, 5},
-  {6, 0x3, 0x1, 0x3, 0x3, 0x3D, 6},
-  {6, 0x3, 0x1, 0x3, 0x1, 0x1D, 6},
-  {6, 0x3, 0x1, 0x3, 0x2, 0x2D, 6},
-  {6, 0x4, 0x0, 0x0, 0x0, 0x0A, 5},
-  {6, 0x4, 0x1, 0x4, 0x4, 0x3F, 7},
-  {6, 0x5, 0x0, 0x0, 0x0, 0x35, 6},
-  {6, 0x5, 0x1, 0x1, 0x1, 0x15, 6},
-  {6, 0x5, 0x1, 0x5, 0x4, 0x7F, 7},
-  {6, 0x6, 0x0, 0x0, 0x0, 0x25, 6},
-  {6, 0x6, 0x1, 0x2, 0x2, 0x5F, 7},
-  {6, 0x6, 0x1, 0x6, 0x4, 0x1F, 7},
-  {6, 0x7, 0x0, 0x0, 0x0, 0x6F, 7},
-  {6, 0x7, 0x1, 0x6, 0x6, 0x4F, 7},
-  {6, 0x7, 0x1, 0x6, 0x4, 0x05, 6},
-  {6, 0x7, 0x1, 0x7, 0x3, 0x2F, 7},
-  {6, 0x7, 0x1, 0x7, 0x1, 0x36, 6},
-  {6, 0x7, 0x1, 0x7, 0x2, 0x77, 7},
-  {6, 0x8, 0x0, 0x0, 0x0, 0x12, 5},
-  {6, 0x8, 0x1, 0x8, 0x8, 0x0F, 7},
-  {6, 0x9, 0x0, 0x0, 0x0, 0x39, 6},
-  {6, 0x9, 0x1, 0x1, 0x1, 0x37, 7},
-  {6, 0x9, 0x1, 0x9, 0x8, 0x57, 7},
-  {6, 0xA, 0x0, 0x0, 0x0, 0x19, 6},
-  {6, 0xA, 0x1, 0x2, 0x2, 0x29, 6},
-  {6, 0xA, 0x1, 0xA, 0x8, 0x17, 7},
-  {6, 0xB, 0x0, 0x0, 0x0, 0x67, 7},
-  {6, 0xB, 0x1, 0x9, 0x9, 0x47, 7},
-  {6, 0xB, 0x1, 0x9, 0x1, 0x09, 6},
-  {6, 0xB, 0x1, 0xB, 0xA, 0x27, 7},
-  {6, 0xB, 0x1, 0xB, 0x2, 0x31, 6},
-  {6, 0xB, 0x1, 0xB, 0x8, 0x7B, 7},
-  {6, 0xC, 0x0, 0x0, 0x0, 0x11, 6},
-  {6, 0xC, 0x1, 0xC, 0xC, 0x07, 7},
-  {6, 0xC, 0x1, 0xC, 0x8, 0x21, 6},
-  {6, 0xC, 0x1, 0xC, 0x4, 0x3B, 7},
-  {6, 0xD, 0x0, 0x0, 0x0, 0x5B, 7},
-  {6, 0xD, 0x1, 0x5, 0x5, 0x33, 7},
-  {6, 0xD, 0x1, 0x5, 0x4, 0x01, 6},
-  {6, 0xD, 0x1, 0xC, 0x8, 0x1B, 7},
-  {6, 0xD, 0x1, 0xD, 0x1, 0x6B, 7},
-  {6, 0xE, 0x0, 0x0, 0x0, 0x2B, 7},
-  {6, 0xE, 0x1, 0xE, 0x2, 0x4B, 7},
-  {6, 0xE, 0x1, 0x2, 0x2, 0x0B, 7},
-  {6, 0xE, 0x1, 0xE, 0xC, 0x73, 7},
-  {6, 0xE, 0x1, 0xE, 0x8, 0x3E, 6},
-  {6, 0xE, 0x1, 0xE, 0x4, 0x53, 7},
-  {6, 0xF, 0x0, 0x0, 0x0, 0x13, 7},
-  {6, 0xF, 0x1, 0x6, 0x6, 0x1E, 6},
-  {6, 0xF, 0x1, 0xE, 0xA, 0x2E, 6},
-  {6, 0xF, 0x1, 0xF, 0x3, 0x0E, 6},
-  {6, 0xF, 0x1, 0xF, 0x2, 0x02, 5},
-  {6, 0xF, 0x1, 0xB, 0x9, 0x63, 7},
-  {6, 0xF, 0x1, 0xF, 0xC, 0x16, 6},
-  {6, 0xF, 0x1, 0xF, 0x8, 0x06, 6},
-  {6, 0xF, 0x1, 0xF, 0x5, 0x23, 7},
-  {6, 0xF, 0x1, 0xF, 0x1, 0x1C, 5},
-  {6, 0xF, 0x1, 0xF, 0x4, 0x26, 6},
-  {7, 0x0, 0x0, 0x0, 0x0, 0x12, 5},
-  {7, 0x1, 0x0, 0x0, 0x0, 0x05, 6},
-  {7, 0x1, 0x1, 0x1, 0x1, 0x7F, 7},
-  {7, 0x2, 0x0, 0x0, 0x0, 0x39, 6},
-  {7, 0x2, 0x1, 0x2, 0x2, 0x3F, 7},
-  {7, 0x3, 0x0, 0x0, 0x0, 0x5F, 7},
-  {7, 0x3, 0x1, 0x3, 0x3, 0x1F, 7},
-  {7, 0x3, 0x1, 0x3, 0x2, 0x6F, 7},
-  {7, 0x3, 0x1, 0x3, 0x1, 0x2F, 7},
-  {7, 0x4, 0x0, 0x0, 0x0, 0x4F, 7},
-  {7, 0x4, 0x1, 0x4, 0x4, 0x0F, 7},
-  {7, 0x5, 0x0, 0x0, 0x0, 0x57, 7},
-  {7, 0x5, 0x1, 0x1, 0x1, 0x19, 6},
-  {7, 0x5, 0x1, 0x5, 0x4, 0x77, 7},
-  {7, 0x6, 0x0, 0x0, 0x0, 0x37, 7},
-  {7, 0x6, 0x1, 0x0, 0x0, 0x29, 6},
-  {7, 0x7, 0x0, 0x0, 0x0, 0x17, 7},
-  {7, 0x7, 0x1, 0x6, 0x6, 0x67, 7},
-  {7, 0x7, 0x1, 0x7, 0x3, 0x27, 7},
-  {7, 0x7, 0x1, 0x7, 0x2, 0x47, 7},
-  {7, 0x7, 0x1, 0x7, 0x5, 0x1B, 7},
-  {7, 0x7, 0x1, 0x7, 0x1, 0x09, 6},
-  {7, 0x7, 0x1, 0x7, 0x4, 0x07, 7},
-  {7, 0x8, 0x0, 0x0, 0x0, 0x7B, 7},
-  {7, 0x8, 0x1, 0x8, 0x8, 0x3B, 7},
-  {7, 0x9, 0x0, 0x0, 0x0, 0x5B, 7},
-  {7, 0x9, 0x1, 0x0, 0x0, 0x31, 6},
-  {7, 0xA, 0x0, 0x0, 0x0, 0x53, 7},
-  {7, 0xA, 0x1, 0x2, 0x2, 0x11, 6},
-  {7, 0xA, 0x1, 0xA, 0x8, 0x6B, 7},
-  {7, 0xB, 0x0, 0x0, 0x0, 0x2B, 7},
-  {7, 0xB, 0x1, 0x9, 0x9, 0x4B, 7},
-  {7, 0xB, 0x1, 0xB, 0x3, 0x0B, 7},
-  {7, 0xB, 0x1, 0xB, 0x1, 0x73, 7},
-  {7, 0xB, 0x1, 0xB, 0xA, 0x33, 7},
-  {7, 0xB, 0x1, 0xB, 0x2, 0x21, 6},
-  {7, 0xB, 0x1, 0xB, 0x8, 0x13, 7},
-  {7, 0xC, 0x0, 0x0, 0x0, 0x63, 7},
-  {7, 0xC, 0x1, 0x8, 0x8, 0x23, 7},
-  {7, 0xC, 0x1, 0xC, 0x4, 0x43, 7},
-  {7, 0xD, 0x0, 0x0, 0x0, 0x03, 7},
-  {7, 0xD, 0x1, 0x9, 0x9, 0x7D, 7},
-  {7, 0xD, 0x1, 0xD, 0x5, 0x5D, 7},
-  {7, 0xD, 0x1, 0xD, 0x1, 0x01, 6},
-  {7, 0xD, 0x1, 0xD, 0xC, 0x3D, 7},
-  {7, 0xD, 0x1, 0xD, 0x4, 0x3E, 6},
-  {7, 0xD, 0x1, 0xD, 0x8, 0x1D, 7},
-  {7, 0xE, 0x0, 0x0, 0x0, 0x6D, 7},
-  {7, 0xE, 0x1, 0x6, 0x6, 0x2D, 7},
-  {7, 0xE, 0x1, 0xE, 0xA, 0x0D, 7},
-  {7, 0xE, 0x1, 0xE, 0x2, 0x1E, 6},
-  {7, 0xE, 0x1, 0xE, 0xC, 0x4D, 7},
-  {7, 0xE, 0x1, 0xE, 0x8, 0x0E, 6},
-  {7, 0xE, 0x1, 0xE, 0x4, 0x75, 7},
-  {7, 0xF, 0x0, 0x0, 0x0, 0x15, 7},
-  {7, 0xF, 0x1, 0xF, 0xF, 0x06, 5},
-  {7, 0xF, 0x1, 0xF, 0xD, 0x35, 7},
-  {7, 0xF, 0x1, 0xF, 0x7, 0x55, 7},
-  {7, 0xF, 0x1, 0xF, 0x5, 0x1A, 5},
-  {7, 0xF, 0x1, 0xF, 0xB, 0x25, 7},
-  {7, 0xF, 0x1, 0xF, 0x3, 0x0A, 5},
-  {7, 0xF, 0x1, 0xF, 0x9, 0x2E, 6},
-  {7, 0xF, 0x1, 0xF, 0x1, 0x00, 4},
-  {7, 0xF, 0x1, 0xF, 0xE, 0x65, 7},
-  {7, 0xF, 0x1, 0xF, 0x6, 0x36, 6},
-  {7, 0xF, 0x1, 0xF, 0xA, 0x02, 5},
-  {7, 0xF, 0x1, 0xF, 0x2, 0x0C, 4},
-  {7, 0xF, 0x1, 0xF, 0xC, 0x16, 6},
-  {7, 0xF, 0x1, 0xF, 0x8, 0x04, 4},
-  {7, 0xF, 0x1, 0xF, 0x4, 0x08, 4}
+    {0, 0x1, 0x0, 0x0, 0x0, 0x06, 4},
+    {0, 0x1, 0x1, 0x1, 0x1, 0x3F, 7},
+    {0, 0x2, 0x0, 0x0, 0x0, 0x00, 3},
+    {0, 0x2, 0x1, 0x2, 0x2, 0x7F, 7},
+    {0, 0x3, 0x0, 0x0, 0x0, 0x11, 5},
+    {0, 0x3, 0x1, 0x2, 0x2, 0x5F, 7},
+    {0, 0x3, 0x1, 0x3, 0x1, 0x1F, 7},
+    {0, 0x4, 0x0, 0x0, 0x0, 0x02, 3},
+    {0, 0x4, 0x1, 0x4, 0x4, 0x13, 6},
+    {0, 0x5, 0x0, 0x0, 0x0, 0x0E, 5},
+    {0, 0x5, 0x1, 0x4, 0x4, 0x23, 6},
+    {0, 0x5, 0x1, 0x5, 0x1, 0x0F, 7},
+    {0, 0x6, 0x0, 0x0, 0x0, 0x03, 6},
+    {0, 0x6, 0x1, 0x0, 0x0, 0x6F, 7},
+    {0, 0x7, 0x0, 0x0, 0x0, 0x2F, 7},
+    {0, 0x7, 0x1, 0x2, 0x2, 0x4F, 7},
+    {0, 0x7, 0x1, 0x2, 0x0, 0x0D, 6},
+    {0, 0x8, 0x0, 0x0, 0x0, 0x04, 3},
+    {0, 0x8, 0x1, 0x8, 0x8, 0x3D, 6},
+    {0, 0x9, 0x0, 0x0, 0x0, 0x1D, 6},
+    {0, 0x9, 0x1, 0x0, 0x0, 0x2D, 6},
+    {0, 0xA, 0x0, 0x0, 0x0, 0x01, 5},
+    {0, 0xA, 0x1, 0x8, 0x8, 0x35, 6},
+    {0, 0xA, 0x1, 0xA, 0x2, 0x77, 7},
+    {0, 0xB, 0x0, 0x0, 0x0, 0x37, 7},
+    {0, 0xB, 0x1, 0x1, 0x1, 0x57, 7},
+    {0, 0xB, 0x1, 0x1, 0x0, 0x09, 6},
+    {0, 0xC, 0x0, 0x0, 0x0, 0x1E, 5},
+    {0, 0xC, 0x1, 0xC, 0xC, 0x17, 7},
+    {0, 0xC, 0x1, 0xC, 0x4, 0x15, 6},
+    {0, 0xC, 0x1, 0xC, 0x8, 0x25, 6},
+    {0, 0xD, 0x0, 0x0, 0x0, 0x67, 7},
+    {0, 0xD, 0x1, 0x1, 0x1, 0x27, 7},
+    {0, 0xD, 0x1, 0x5, 0x4, 0x47, 7},
+    {0, 0xD, 0x1, 0xD, 0x8, 0x07, 7},
+    {0, 0xE, 0x0, 0x0, 0x0, 0x7B, 7},
+    {0, 0xE, 0x1, 0x2, 0x2, 0x4B, 7},
+    {0, 0xE, 0x1, 0xA, 0x8, 0x05, 6},
+    {0, 0xE, 0x1, 0xE, 0x4, 0x3B, 7},
+    {0, 0xF, 0x0, 0x0, 0x0, 0x5B, 7},
+    {0, 0xF, 0x1, 0x9, 0x9, 0x1B, 7},
+    {0, 0xF, 0x1, 0xB, 0xA, 0x6B, 7},
+    {0, 0xF, 0x1, 0xF, 0xC, 0x2B, 7},
+    {0, 0xF, 0x1, 0xF, 0x8, 0x39, 6},
+    {0, 0xF, 0x1, 0xE, 0x6, 0x73, 7},
+    {0, 0xF, 0x1, 0xE, 0x2, 0x19, 6},
+    {0, 0xF, 0x1, 0xF, 0x5, 0x0B, 7},
+    {0, 0xF, 0x1, 0xF, 0x4, 0x29, 6},
+    {0, 0xF, 0x1, 0xF, 0x1, 0x33, 7},
+    {1, 0x0, 0x0, 0x0, 0x0, 0x00, 2},
+    {1, 0x1, 0x0, 0x0, 0x0, 0x0E, 4},
+    {1, 0x1, 0x1, 0x1, 0x1, 0x1F, 7},
+    {1, 0x2, 0x0, 0x0, 0x0, 0x06, 4},
+    {1, 0x2, 0x1, 0x2, 0x2, 0x3B, 6},
+    {1, 0x3, 0x0, 0x0, 0x0, 0x1B, 6},
+    {1, 0x3, 0x1, 0x0, 0x0, 0x3D, 6},
+    {1, 0x4, 0x0, 0x0, 0x0, 0x0A, 4},
+    {1, 0x4, 0x1, 0x4, 0x4, 0x2B, 6},
+    {1, 0x5, 0x0, 0x0, 0x0, 0x0B, 6},
+    {1, 0x5, 0x1, 0x4, 0x4, 0x33, 6},
+    {1, 0x5, 0x1, 0x5, 0x1, 0x7F, 7},
+    {1, 0x6, 0x0, 0x0, 0x0, 0x13, 6},
+    {1, 0x6, 0x1, 0x0, 0x0, 0x23, 6},
+    {1, 0x7, 0x0, 0x0, 0x0, 0x3F, 7},
+    {1, 0x7, 0x1, 0x2, 0x2, 0x5F, 7},
+    {1, 0x7, 0x1, 0x2, 0x0, 0x03, 6},
+    {1, 0x8, 0x0, 0x0, 0x0, 0x02, 4},
+    {1, 0x8, 0x1, 0x8, 0x8, 0x1D, 6},
+    {1, 0x9, 0x0, 0x0, 0x0, 0x2D, 6},
+    {1, 0x9, 0x1, 0x0, 0x0, 0x0D, 6},
+    {1, 0xA, 0x0, 0x0, 0x0, 0x35, 6},
+    {1, 0xA, 0x1, 0x8, 0x8, 0x15, 6},
+    {1, 0xA, 0x1, 0xA, 0x2, 0x6F, 7},
+    {1, 0xB, 0x0, 0x0, 0x0, 0x2F, 7},
+    {1, 0xB, 0x1, 0x1, 0x1, 0x4F, 7},
+    {1, 0xB, 0x1, 0x1, 0x0, 0x11, 6},
+    {1, 0xC, 0x0, 0x0, 0x0, 0x01, 5},
+    {1, 0xC, 0x1, 0x8, 0x8, 0x25, 6},
+    {1, 0xC, 0x1, 0xC, 0x4, 0x05, 6},
+    {1, 0xD, 0x0, 0x0, 0x0, 0x0F, 7},
+    {1, 0xD, 0x1, 0x1, 0x1, 0x17, 7},
+    {1, 0xD, 0x1, 0x5, 0x4, 0x39, 6},
+    {1, 0xD, 0x1, 0xD, 0x8, 0x77, 7},
+    {1, 0xE, 0x0, 0x0, 0x0, 0x37, 7},
+    {1, 0xE, 0x1, 0x2, 0x2, 0x57, 7},
+    {1, 0xE, 0x1, 0xA, 0x8, 0x19, 6},
+    {1, 0xE, 0x1, 0xE, 0x4, 0x67, 7},
+    {1, 0xF, 0x0, 0x0, 0x0, 0x07, 7},
+    {1, 0xF, 0x1, 0xB, 0x8, 0x29, 6},
+    {1, 0xF, 0x1, 0x8, 0x8, 0x27, 7},
+    {1, 0xF, 0x1, 0xA, 0x2, 0x09, 6},
+    {1, 0xF, 0x1, 0xE, 0x4, 0x31, 6},
+    {1, 0xF, 0x1, 0xF, 0x1, 0x47, 7},
+    {2, 0x0, 0x0, 0x0, 0x0, 0x00, 2},
+    {2, 0x1, 0x0, 0x0, 0x0, 0x0E, 4},
+    {2, 0x1, 0x1, 0x1, 0x1, 0x1B, 6},
+    {2, 0x2, 0x0, 0x0, 0x0, 0x06, 4},
+    {2, 0x2, 0x1, 0x2, 0x2, 0x3F, 7},
+    {2, 0x3, 0x0, 0x0, 0x0, 0x2B, 6},
+    {2, 0x3, 0x1, 0x1, 0x1, 0x33, 6},
+    {2, 0x3, 0x1, 0x3, 0x2, 0x7F, 7},
+    {2, 0x4, 0x0, 0x0, 0x0, 0x0A, 4},
+    {2, 0x4, 0x1, 0x4, 0x4, 0x0B, 6},
+    {2, 0x5, 0x0, 0x0, 0x0, 0x01, 5},
+    {2, 0x5, 0x1, 0x5, 0x5, 0x2F, 7},
+    {2, 0x5, 0x1, 0x5, 0x1, 0x13, 6},
+    {2, 0x5, 0x1, 0x5, 0x4, 0x23, 6},
+    {2, 0x6, 0x0, 0x0, 0x0, 0x03, 6},
+    {2, 0x6, 0x1, 0x0, 0x0, 0x5F, 7},
+    {2, 0x7, 0x0, 0x0, 0x0, 0x1F, 7},
+    {2, 0x7, 0x1, 0x2, 0x2, 0x6F, 7},
+    {2, 0x7, 0x1, 0x3, 0x1, 0x11, 6},
+    {2, 0x7, 0x1, 0x7, 0x4, 0x37, 7},
+    {2, 0x8, 0x0, 0x0, 0x0, 0x02, 4},
+    {2, 0x8, 0x1, 0x8, 0x8, 0x4F, 7},
+    {2, 0x9, 0x0, 0x0, 0x0, 0x3D, 6},
+    {2, 0x9, 0x1, 0x0, 0x0, 0x1D, 6},
+    {2, 0xA, 0x0, 0x0, 0x0, 0x2D, 6},
+    {2, 0xA, 0x1, 0x0, 0x0, 0x0D, 6},
+    {2, 0xB, 0x0, 0x0, 0x0, 0x0F, 7},
+    {2, 0xB, 0x1, 0x2, 0x2, 0x77, 7},
+    {2, 0xB, 0x1, 0x2, 0x0, 0x35, 6},
+    {2, 0xC, 0x0, 0x0, 0x0, 0x15, 6},
+    {2, 0xC, 0x1, 0x4, 0x4, 0x25, 6},
+    {2, 0xC, 0x1, 0xC, 0x8, 0x57, 7},
+    {2, 0xD, 0x0, 0x0, 0x0, 0x17, 7},
+    {2, 0xD, 0x1, 0x8, 0x8, 0x05, 6},
+    {2, 0xD, 0x1, 0xC, 0x4, 0x39, 6},
+    {2, 0xD, 0x1, 0xD, 0x1, 0x67, 7},
+    {2, 0xE, 0x0, 0x0, 0x0, 0x27, 7},
+    {2, 0xE, 0x1, 0x2, 0x2, 0x7B, 7},
+    {2, 0xE, 0x1, 0x2, 0x0, 0x19, 6},
+    {2, 0xF, 0x0, 0x0, 0x0, 0x47, 7},
+    {2, 0xF, 0x1, 0xF, 0x1, 0x29, 6},
+    {2, 0xF, 0x1, 0x1, 0x1, 0x09, 6},
+    {2, 0xF, 0x1, 0x3, 0x2, 0x07, 7},
+    {2, 0xF, 0x1, 0x7, 0x4, 0x31, 6},
+    {2, 0xF, 0x1, 0xF, 0x8, 0x3B, 7},
+    {3, 0x0, 0x0, 0x0, 0x0, 0x00, 3},
+    {3, 0x1, 0x0, 0x0, 0x0, 0x04, 4},
+    {3, 0x1, 0x1, 0x1, 0x1, 0x3D, 6},
+    {3, 0x2, 0x0, 0x0, 0x0, 0x0C, 5},
+    {3, 0x2, 0x1, 0x2, 0x2, 0x4F, 7},
+    {3, 0x3, 0x0, 0x0, 0x0, 0x1D, 6},
+    {3, 0x3, 0x1, 0x1, 0x1, 0x05, 6},
+    {3, 0x3, 0x1, 0x3, 0x2, 0x7F, 7},
+    {3, 0x4, 0x0, 0x0, 0x0, 0x16, 5},
+    {3, 0x4, 0x1, 0x4, 0x4, 0x2D, 6},
+    {3, 0x5, 0x0, 0x0, 0x0, 0x06, 5},
+    {3, 0x5, 0x1, 0x5, 0x5, 0x1A, 5},
+    {3, 0x5, 0x1, 0x5, 0x1, 0x0D, 6},
+    {3, 0x5, 0x1, 0x5, 0x4, 0x35, 6},
+    {3, 0x6, 0x0, 0x0, 0x0, 0x3F, 7},
+    {3, 0x6, 0x1, 0x4, 0x4, 0x5F, 7},
+    {3, 0x6, 0x1, 0x6, 0x2, 0x1F, 7},
+    {3, 0x7, 0x0, 0x0, 0x0, 0x6F, 7},
+    {3, 0x7, 0x1, 0x6, 0x6, 0x2F, 7},
+    {3, 0x7, 0x1, 0x6, 0x4, 0x15, 6},
+    {3, 0x7, 0x1, 0x7, 0x3, 0x77, 7},
+    {3, 0x7, 0x1, 0x7, 0x1, 0x25, 6},
+    {3, 0x7, 0x1, 0x7, 0x2, 0x0F, 7},
+    {3, 0x8, 0x0, 0x0, 0x0, 0x0A, 5},
+    {3, 0x8, 0x1, 0x8, 0x8, 0x07, 7},
+    {3, 0x9, 0x0, 0x0, 0x0, 0x39, 6},
+    {3, 0x9, 0x1, 0x1, 0x1, 0x37, 7},
+    {3, 0x9, 0x1, 0x9, 0x8, 0x57, 7},
+    {3, 0xA, 0x0, 0x0, 0x0, 0x19, 6},
+    {3, 0xA, 0x1, 0x8, 0x8, 0x29, 6},
+    {3, 0xA, 0x1, 0xA, 0x2, 0x17, 7},
+    {3, 0xB, 0x0, 0x0, 0x0, 0x67, 7},
+    {3, 0xB, 0x1, 0xB, 0x1, 0x27, 7},
+    {3, 0xB, 0x1, 0x1, 0x1, 0x47, 7},
+    {3, 0xB, 0x1, 0x3, 0x2, 0x09, 6},
+    {3, 0xB, 0x1, 0xB, 0x8, 0x7B, 7},
+    {3, 0xC, 0x0, 0x0, 0x0, 0x31, 6},
+    {3, 0xC, 0x1, 0x4, 0x4, 0x11, 6},
+    {3, 0xC, 0x1, 0xC, 0x8, 0x3B, 7},
+    {3, 0xD, 0x0, 0x0, 0x0, 0x5B, 7},
+    {3, 0xD, 0x1, 0x9, 0x9, 0x1B, 7},
+    {3, 0xD, 0x1, 0xD, 0x5, 0x2B, 7},
+    {3, 0xD, 0x1, 0xD, 0x1, 0x21, 6},
+    {3, 0xD, 0x1, 0xD, 0xC, 0x6B, 7},
+    {3, 0xD, 0x1, 0xD, 0x4, 0x01, 6},
+    {3, 0xD, 0x1, 0xD, 0x8, 0x4B, 7},
+    {3, 0xE, 0x0, 0x0, 0x0, 0x0B, 7},
+    {3, 0xE, 0x1, 0xE, 0x4, 0x73, 7},
+    {3, 0xE, 0x1, 0x4, 0x4, 0x13, 7},
+    {3, 0xE, 0x1, 0xC, 0x8, 0x3E, 6},
+    {3, 0xE, 0x1, 0xE, 0x2, 0x33, 7},
+    {3, 0xF, 0x0, 0x0, 0x0, 0x53, 7},
+    {3, 0xF, 0x1, 0xA, 0xA, 0x0E, 6},
+    {3, 0xF, 0x1, 0xB, 0x9, 0x63, 7},
+    {3, 0xF, 0x1, 0xF, 0xC, 0x03, 7},
+    {3, 0xF, 0x1, 0xF, 0x8, 0x12, 5},
+    {3, 0xF, 0x1, 0xE, 0x6, 0x23, 7},
+    {3, 0xF, 0x1, 0xF, 0x5, 0x1E, 6},
+    {3, 0xF, 0x1, 0xF, 0x4, 0x02, 5},
+    {3, 0xF, 0x1, 0xF, 0x3, 0x43, 7},
+    {3, 0xF, 0x1, 0xF, 0x1, 0x1C, 5},
+    {3, 0xF, 0x1, 0xF, 0x2, 0x2E, 6},
+    {4, 0x0, 0x0, 0x0, 0x0, 0x00, 2},
+    {4, 0x1, 0x0, 0x0, 0x0, 0x0E, 4},
+    {4, 0x1, 0x1, 0x1, 0x1, 0x3F, 7},
+    {4, 0x2, 0x0, 0x0, 0x0, 0x06, 4},
+    {4, 0x2, 0x1, 0x2, 0x2, 0x1B, 6},
+    {4, 0x3, 0x0, 0x0, 0x0, 0x2B, 6},
+    {4, 0x3, 0x1, 0x2, 0x2, 0x3D, 6},
+    {4, 0x3, 0x1, 0x3, 0x1, 0x7F, 7},
+    {4, 0x4, 0x0, 0x0, 0x0, 0x0A, 4},
+    {4, 0x4, 0x1, 0x4, 0x4, 0x5F, 7},
+    {4, 0x5, 0x0, 0x0, 0x0, 0x0B, 6},
+    {4, 0x5, 0x1, 0x0, 0x0, 0x33, 6},
+    {4, 0x6, 0x0, 0x0, 0x0, 0x13, 6},
+    {4, 0x6, 0x1, 0x0, 0x0, 0x23, 6},
+    {4, 0x7, 0x0, 0x0, 0x0, 0x1F, 7},
+    {4, 0x7, 0x1, 0x4, 0x4, 0x6F, 7},
+    {4, 0x7, 0x1, 0x4, 0x0, 0x03, 6},
+    {4, 0x8, 0x0, 0x0, 0x0, 0x02, 4},
+    {4, 0x8, 0x1, 0x8, 0x8, 0x1D, 6},
+    {4, 0x9, 0x0, 0x0, 0x0, 0x11, 6},
+    {4, 0x9, 0x1, 0x0, 0x0, 0x77, 7},
+    {4, 0xA, 0x0, 0x0, 0x0, 0x01, 5},
+    {4, 0xA, 0x1, 0xA, 0xA, 0x2F, 7},
+    {4, 0xA, 0x1, 0xA, 0x2, 0x2D, 6},
+    {4, 0xA, 0x1, 0xA, 0x8, 0x0D, 6},
+    {4, 0xB, 0x0, 0x0, 0x0, 0x4F, 7},
+    {4, 0xB, 0x1, 0xB, 0x2, 0x0F, 7},
+    {4, 0xB, 0x1, 0x0, 0x0, 0x35, 6},
+    {4, 0xC, 0x0, 0x0, 0x0, 0x15, 6},
+    {4, 0xC, 0x1, 0x8, 0x8, 0x25, 6},
+    {4, 0xC, 0x1, 0xC, 0x4, 0x37, 7},
+    {4, 0xD, 0x0, 0x0, 0x0, 0x57, 7},
+    {4, 0xD, 0x1, 0x1, 0x1, 0x07, 7},
+    {4, 0xD, 0x1, 0x1, 0x0, 0x05, 6},
+    {4, 0xE, 0x0, 0x0, 0x0, 0x17, 7},
+    {4, 0xE, 0x1, 0x4, 0x4, 0x39, 6},
+    {4, 0xE, 0x1, 0xC, 0x8, 0x19, 6},
+    {4, 0xE, 0x1, 0xE, 0x2, 0x67, 7},
+    {4, 0xF, 0x0, 0x0, 0x0, 0x27, 7},
+    {4, 0xF, 0x1, 0x9, 0x9, 0x47, 7},
+    {4, 0xF, 0x1, 0x9, 0x1, 0x29, 6},
+    {4, 0xF, 0x1, 0x7, 0x6, 0x7B, 7},
+    {4, 0xF, 0x1, 0x7, 0x2, 0x09, 6},
+    {4, 0xF, 0x1, 0xB, 0x8, 0x31, 6},
+    {4, 0xF, 0x1, 0xF, 0x4, 0x3B, 7},
+    {5, 0x0, 0x0, 0x0, 0x0, 0x00, 3},
+    {5, 0x1, 0x0, 0x0, 0x0, 0x1A, 5},
+    {5, 0x1, 0x1, 0x1, 0x1, 0x7F, 7},
+    {5, 0x2, 0x0, 0x0, 0x0, 0x0A, 5},
+    {5, 0x2, 0x1, 0x2, 0x2, 0x1D, 6},
+    {5, 0x3, 0x0, 0x0, 0x0, 0x2D, 6},
+    {5, 0x3, 0x1, 0x3, 0x3, 0x5F, 7},
+    {5, 0x3, 0x1, 0x3, 0x2, 0x39, 6},
+    {5, 0x3, 0x1, 0x3, 0x1, 0x3F, 7},
+    {5, 0x4, 0x0, 0x0, 0x0, 0x12, 5},
+    {5, 0x4, 0x1, 0x4, 0x4, 0x1F, 7},
+    {5, 0x5, 0x0, 0x0, 0x0, 0x0D, 6},
+    {5, 0x5, 0x1, 0x4, 0x4, 0x35, 6},
+    {5, 0x5, 0x1, 0x5, 0x1, 0x6F, 7},
+    {5, 0x6, 0x0, 0x0, 0x0, 0x15, 6},
+    {5, 0x6, 0x1, 0x2, 0x2, 0x25, 6},
+    {5, 0x6, 0x1, 0x6, 0x4, 0x2F, 7},
+    {5, 0x7, 0x0, 0x0, 0x0, 0x4F, 7},
+    {5, 0x7, 0x1, 0x6, 0x6, 0x57, 7},
+    {5, 0x7, 0x1, 0x6, 0x4, 0x05, 6},
+    {5, 0x7, 0x1, 0x7, 0x3, 0x0F, 7},
+    {5, 0x7, 0x1, 0x7, 0x2, 0x77, 7},
+    {5, 0x7, 0x1, 0x7, 0x1, 0x37, 7},
+    {5, 0x8, 0x0, 0x0, 0x0, 0x02, 5},
+    {5, 0x8, 0x1, 0x8, 0x8, 0x19, 6},
+    {5, 0x9, 0x0, 0x0, 0x0, 0x26, 6},
+    {5, 0x9, 0x1, 0x8, 0x8, 0x17, 7},
+    {5, 0x9, 0x1, 0x9, 0x1, 0x67, 7},
+    {5, 0xA, 0x0, 0x0, 0x0, 0x1C, 5},
+    {5, 0xA, 0x1, 0xA, 0xA, 0x29, 6},
+    {5, 0xA, 0x1, 0xA, 0x2, 0x09, 6},
+    {5, 0xA, 0x1, 0xA, 0x8, 0x31, 6},
+    {5, 0xB, 0x0, 0x0, 0x0, 0x27, 7},
+    {5, 0xB, 0x1, 0x9, 0x9, 0x07, 7},
+    {5, 0xB, 0x1, 0x9, 0x8, 0x11, 6},
+    {5, 0xB, 0x1, 0xB, 0x3, 0x47, 7},
+    {5, 0xB, 0x1, 0xB, 0x2, 0x21, 6},
+    {5, 0xB, 0x1, 0xB, 0x1, 0x7B, 7},
+    {5, 0xC, 0x0, 0x0, 0x0, 0x01, 6},
+    {5, 0xC, 0x1, 0x8, 0x8, 0x3E, 6},
+    {5, 0xC, 0x1, 0xC, 0x4, 0x3B, 7},
+    {5, 0xD, 0x0, 0x0, 0x0, 0x5B, 7},
+    {5, 0xD, 0x1, 0x9, 0x9, 0x6B, 7},
+    {5, 0xD, 0x1, 0x9, 0x8, 0x1E, 6},
+    {5, 0xD, 0x1, 0xD, 0x5, 0x1B, 7},
+    {5, 0xD, 0x1, 0xD, 0x4, 0x2E, 6},
+    {5, 0xD, 0x1, 0xD, 0x1, 0x2B, 7},
+    {5, 0xE, 0x0, 0x0, 0x0, 0x4B, 7},
+    {5, 0xE, 0x1, 0x6, 0x6, 0x0B, 7},
+    {5, 0xE, 0x1, 0xE, 0xA, 0x33, 7},
+    {5, 0xE, 0x1, 0xE, 0x2, 0x0E, 6},
+    {5, 0xE, 0x1, 0xE, 0xC, 0x73, 7},
+    {5, 0xE, 0x1, 0xE, 0x8, 0x36, 6},
+    {5, 0xE, 0x1, 0xE, 0x4, 0x53, 7},
+    {5, 0xF, 0x0, 0x0, 0x0, 0x13, 7},
+    {5, 0xF, 0x1, 0x7, 0x7, 0x43, 7},
+    {5, 0xF, 0x1, 0x7, 0x6, 0x16, 6},
+    {5, 0xF, 0x1, 0x7, 0x5, 0x63, 7},
+    {5, 0xF, 0x1, 0xF, 0xC, 0x23, 7},
+    {5, 0xF, 0x1, 0xF, 0x4, 0x0C, 5},
+    {5, 0xF, 0x1, 0xD, 0x9, 0x03, 7},
+    {5, 0xF, 0x1, 0xF, 0xA, 0x3D, 7},
+    {5, 0xF, 0x1, 0xF, 0x8, 0x14, 5},
+    {5, 0xF, 0x1, 0xF, 0x3, 0x7D, 7},
+    {5, 0xF, 0x1, 0xF, 0x2, 0x04, 5},
+    {5, 0xF, 0x1, 0xF, 0x1, 0x06, 6},
+    {6, 0x0, 0x0, 0x0, 0x0, 0x00, 3},
+    {6, 0x1, 0x0, 0x0, 0x0, 0x04, 4},
+    {6, 0x1, 0x1, 0x1, 0x1, 0x03, 6},
+    {6, 0x2, 0x0, 0x0, 0x0, 0x0C, 5},
+    {6, 0x2, 0x1, 0x2, 0x2, 0x0D, 6},
+    {6, 0x3, 0x0, 0x0, 0x0, 0x1A, 5},
+    {6, 0x3, 0x1, 0x3, 0x3, 0x3D, 6},
+    {6, 0x3, 0x1, 0x3, 0x1, 0x1D, 6},
+    {6, 0x3, 0x1, 0x3, 0x2, 0x2D, 6},
+    {6, 0x4, 0x0, 0x0, 0x0, 0x0A, 5},
+    {6, 0x4, 0x1, 0x4, 0x4, 0x3F, 7},
+    {6, 0x5, 0x0, 0x0, 0x0, 0x35, 6},
+    {6, 0x5, 0x1, 0x1, 0x1, 0x15, 6},
+    {6, 0x5, 0x1, 0x5, 0x4, 0x7F, 7},
+    {6, 0x6, 0x0, 0x0, 0x0, 0x25, 6},
+    {6, 0x6, 0x1, 0x2, 0x2, 0x5F, 7},
+    {6, 0x6, 0x1, 0x6, 0x4, 0x1F, 7},
+    {6, 0x7, 0x0, 0x0, 0x0, 0x6F, 7},
+    {6, 0x7, 0x1, 0x6, 0x6, 0x4F, 7},
+    {6, 0x7, 0x1, 0x6, 0x4, 0x05, 6},
+    {6, 0x7, 0x1, 0x7, 0x3, 0x2F, 7},
+    {6, 0x7, 0x1, 0x7, 0x1, 0x36, 6},
+    {6, 0x7, 0x1, 0x7, 0x2, 0x77, 7},
+    {6, 0x8, 0x0, 0x0, 0x0, 0x12, 5},
+    {6, 0x8, 0x1, 0x8, 0x8, 0x0F, 7},
+    {6, 0x9, 0x0, 0x0, 0x0, 0x39, 6},
+    {6, 0x9, 0x1, 0x1, 0x1, 0x37, 7},
+    {6, 0x9, 0x1, 0x9, 0x8, 0x57, 7},
+    {6, 0xA, 0x0, 0x0, 0x0, 0x19, 6},
+    {6, 0xA, 0x1, 0x2, 0x2, 0x29, 6},
+    {6, 0xA, 0x1, 0xA, 0x8, 0x17, 7},
+    {6, 0xB, 0x0, 0x0, 0x0, 0x67, 7},
+    {6, 0xB, 0x1, 0x9, 0x9, 0x47, 7},
+    {6, 0xB, 0x1, 0x9, 0x1, 0x09, 6},
+    {6, 0xB, 0x1, 0xB, 0xA, 0x27, 7},
+    {6, 0xB, 0x1, 0xB, 0x2, 0x31, 6},
+    {6, 0xB, 0x1, 0xB, 0x8, 0x7B, 7},
+    {6, 0xC, 0x0, 0x0, 0x0, 0x11, 6},
+    {6, 0xC, 0x1, 0xC, 0xC, 0x07, 7},
+    {6, 0xC, 0x1, 0xC, 0x8, 0x21, 6},
+    {6, 0xC, 0x1, 0xC, 0x4, 0x3B, 7},
+    {6, 0xD, 0x0, 0x0, 0x0, 0x5B, 7},
+    {6, 0xD, 0x1, 0x5, 0x5, 0x33, 7},
+    {6, 0xD, 0x1, 0x5, 0x4, 0x01, 6},
+    {6, 0xD, 0x1, 0xC, 0x8, 0x1B, 7},
+    {6, 0xD, 0x1, 0xD, 0x1, 0x6B, 7},
+    {6, 0xE, 0x0, 0x0, 0x0, 0x2B, 7},
+    {6, 0xE, 0x1, 0xE, 0x2, 0x4B, 7},
+    {6, 0xE, 0x1, 0x2, 0x2, 0x0B, 7},
+    {6, 0xE, 0x1, 0xE, 0xC, 0x73, 7},
+    {6, 0xE, 0x1, 0xE, 0x8, 0x3E, 6},
+    {6, 0xE, 0x1, 0xE, 0x4, 0x53, 7},
+    {6, 0xF, 0x0, 0x0, 0x0, 0x13, 7},
+    {6, 0xF, 0x1, 0x6, 0x6, 0x1E, 6},
+    {6, 0xF, 0x1, 0xE, 0xA, 0x2E, 6},
+    {6, 0xF, 0x1, 0xF, 0x3, 0x0E, 6},
+    {6, 0xF, 0x1, 0xF, 0x2, 0x02, 5},
+    {6, 0xF, 0x1, 0xB, 0x9, 0x63, 7},
+    {6, 0xF, 0x1, 0xF, 0xC, 0x16, 6},
+    {6, 0xF, 0x1, 0xF, 0x8, 0x06, 6},
+    {6, 0xF, 0x1, 0xF, 0x5, 0x23, 7},
+    {6, 0xF, 0x1, 0xF, 0x1, 0x1C, 5},
+    {6, 0xF, 0x1, 0xF, 0x4, 0x26, 6},
+    {7, 0x0, 0x0, 0x0, 0x0, 0x12, 5},
+    {7, 0x1, 0x0, 0x0, 0x0, 0x05, 6},
+    {7, 0x1, 0x1, 0x1, 0x1, 0x7F, 7},
+    {7, 0x2, 0x0, 0x0, 0x0, 0x39, 6},
+    {7, 0x2, 0x1, 0x2, 0x2, 0x3F, 7},
+    {7, 0x3, 0x0, 0x0, 0x0, 0x5F, 7},
+    {7, 0x3, 0x1, 0x3, 0x3, 0x1F, 7},
+    {7, 0x3, 0x1, 0x3, 0x2, 0x6F, 7},
+    {7, 0x3, 0x1, 0x3, 0x1, 0x2F, 7},
+    {7, 0x4, 0x0, 0x0, 0x0, 0x4F, 7},
+    {7, 0x4, 0x1, 0x4, 0x4, 0x0F, 7},
+    {7, 0x5, 0x0, 0x0, 0x0, 0x57, 7},
+    {7, 0x5, 0x1, 0x1, 0x1, 0x19, 6},
+    {7, 0x5, 0x1, 0x5, 0x4, 0x77, 7},
+    {7, 0x6, 0x0, 0x0, 0x0, 0x37, 7},
+    {7, 0x6, 0x1, 0x0, 0x0, 0x29, 6},
+    {7, 0x7, 0x0, 0x0, 0x0, 0x17, 7},
+    {7, 0x7, 0x1, 0x6, 0x6, 0x67, 7},
+    {7, 0x7, 0x1, 0x7, 0x3, 0x27, 7},
+    {7, 0x7, 0x1, 0x7, 0x2, 0x47, 7},
+    {7, 0x7, 0x1, 0x7, 0x5, 0x1B, 7},
+    {7, 0x7, 0x1, 0x7, 0x1, 0x09, 6},
+    {7, 0x7, 0x1, 0x7, 0x4, 0x07, 7},
+    {7, 0x8, 0x0, 0x0, 0x0, 0x7B, 7},
+    {7, 0x8, 0x1, 0x8, 0x8, 0x3B, 7},
+    {7, 0x9, 0x0, 0x0, 0x0, 0x5B, 7},
+    {7, 0x9, 0x1, 0x0, 0x0, 0x31, 6},
+    {7, 0xA, 0x0, 0x0, 0x0, 0x53, 7},
+    {7, 0xA, 0x1, 0x2, 0x2, 0x11, 6},
+    {7, 0xA, 0x1, 0xA, 0x8, 0x6B, 7},
+    {7, 0xB, 0x0, 0x0, 0x0, 0x2B, 7},
+    {7, 0xB, 0x1, 0x9, 0x9, 0x4B, 7},
+    {7, 0xB, 0x1, 0xB, 0x3, 0x0B, 7},
+    {7, 0xB, 0x1, 0xB, 0x1, 0x73, 7},
+    {7, 0xB, 0x1, 0xB, 0xA, 0x33, 7},
+    {7, 0xB, 0x1, 0xB, 0x2, 0x21, 6},
+    {7, 0xB, 0x1, 0xB, 0x8, 0x13, 7},
+    {7, 0xC, 0x0, 0x0, 0x0, 0x63, 7},
+    {7, 0xC, 0x1, 0x8, 0x8, 0x23, 7},
+    {7, 0xC, 0x1, 0xC, 0x4, 0x43, 7},
+    {7, 0xD, 0x0, 0x0, 0x0, 0x03, 7},
+    {7, 0xD, 0x1, 0x9, 0x9, 0x7D, 7},
+    {7, 0xD, 0x1, 0xD, 0x5, 0x5D, 7},
+    {7, 0xD, 0x1, 0xD, 0x1, 0x01, 6},
+    {7, 0xD, 0x1, 0xD, 0xC, 0x3D, 7},
+    {7, 0xD, 0x1, 0xD, 0x4, 0x3E, 6},
+    {7, 0xD, 0x1, 0xD, 0x8, 0x1D, 7},
+    {7, 0xE, 0x0, 0x0, 0x0, 0x6D, 7},
+    {7, 0xE, 0x1, 0x6, 0x6, 0x2D, 7},
+    {7, 0xE, 0x1, 0xE, 0xA, 0x0D, 7},
+    {7, 0xE, 0x1, 0xE, 0x2, 0x1E, 6},
+    {7, 0xE, 0x1, 0xE, 0xC, 0x4D, 7},
+    {7, 0xE, 0x1, 0xE, 0x8, 0x0E, 6},
+    {7, 0xE, 0x1, 0xE, 0x4, 0x75, 7},
+    {7, 0xF, 0x0, 0x0, 0x0, 0x15, 7},
+    {7, 0xF, 0x1, 0xF, 0xF, 0x06, 5},
+    {7, 0xF, 0x1, 0xF, 0xD, 0x35, 7},
+    {7, 0xF, 0x1, 0xF, 0x7, 0x55, 7},
+    {7, 0xF, 0x1, 0xF, 0x5, 0x1A, 5},
+    {7, 0xF, 0x1, 0xF, 0xB, 0x25, 7},
+    {7, 0xF, 0x1, 0xF, 0x3, 0x0A, 5},
+    {7, 0xF, 0x1, 0xF, 0x9, 0x2E, 6},
+    {7, 0xF, 0x1, 0xF, 0x1, 0x00, 4},
+    {7, 0xF, 0x1, 0xF, 0xE, 0x65, 7},
+    {7, 0xF, 0x1, 0xF, 0x6, 0x36, 6},
+    {7, 0xF, 0x1, 0xF, 0xA, 0x02, 5},
+    {7, 0xF, 0x1, 0xF, 0x2, 0x0C, 4},
+    {7, 0xF, 0x1, 0xF, 0xC, 0x16, 6},
+    {7, 0xF, 0x1, 0xF, 0x8, 0x04, 4},
+    {7, 0xF, 0x1, 0xF, 0x4, 0x08, 4}
 };
 
 // nono-initial quad rows
 static vlc_src_table_t tbl1[] = {
-  {0, 0x1, 0x0, 0x0, 0x0, 0x00, 3},
-  {0, 0x1, 0x1, 0x1, 0x1, 0x27, 6},
-  {0, 0x2, 0x0, 0x0, 0x0, 0x06, 3},
-  {0, 0x2, 0x1, 0x2, 0x2, 0x17, 6},
-  {0, 0x3, 0x0, 0x0, 0x0, 0x0D, 5},
-  {0, 0x3, 0x1, 0x0, 0x0, 0x3B, 6},
-  {0, 0x4, 0x0, 0x0, 0x0, 0x02, 3},
-  {0, 0x4, 0x1, 0x4, 0x4, 0x07, 6},
-  {0, 0x5, 0x0, 0x0, 0x0, 0x15, 5},
-  {0, 0x5, 0x1, 0x0, 0x0, 0x2B, 6},
-  {0, 0x6, 0x0, 0x0, 0x0, 0x01, 5},
-  {0, 0x6, 0x1, 0x0, 0x0, 0x7F, 7},
-  {0, 0x7, 0x0, 0x0, 0x0, 0x1F, 7},
-  {0, 0x7, 0x1, 0x0, 0x0, 0x1B, 6},
-  {0, 0x8, 0x0, 0x0, 0x0, 0x04, 3},
-  {0, 0x8, 0x1, 0x8, 0x8, 0x05, 5},
-  {0, 0x9, 0x0, 0x0, 0x0, 0x19, 5},
-  {0, 0x9, 0x1, 0x0, 0x0, 0x13, 6},
-  {0, 0xA, 0x0, 0x0, 0x0, 0x09, 5},
-  {0, 0xA, 0x1, 0x8, 0x8, 0x0B, 6},
-  {0, 0xA, 0x1, 0xA, 0x2, 0x3F, 7},
-  {0, 0xB, 0x0, 0x0, 0x0, 0x5F, 7},
-  {0, 0xB, 0x1, 0x0, 0x0, 0x33, 6},
-  {0, 0xC, 0x0, 0x0, 0x0, 0x11, 5},
-  {0, 0xC, 0x1, 0x8, 0x8, 0x23, 6},
-  {0, 0xC, 0x1, 0xC, 0x4, 0x6F, 7},
-  {0, 0xD, 0x0, 0x0, 0x0, 0x0F, 7},
-  {0, 0xD, 0x1, 0x0, 0x0, 0x03, 6},
-  {0, 0xE, 0x0, 0x0, 0x0, 0x2F, 7},
-  {0, 0xE, 0x1, 0x4, 0x4, 0x4F, 7},
-  {0, 0xE, 0x1, 0x4, 0x0, 0x3D, 6},
-  {0, 0xF, 0x0, 0x0, 0x0, 0x77, 7},
-  {0, 0xF, 0x1, 0x1, 0x1, 0x37, 7},
-  {0, 0xF, 0x1, 0x1, 0x0, 0x1D, 6},
-  {1, 0x0, 0x0, 0x0, 0x0, 0x00, 1},
-  {1, 0x1, 0x0, 0x0, 0x0, 0x05, 4},
-  {1, 0x1, 0x1, 0x1, 0x1, 0x7F, 7},
-  {1, 0x2, 0x0, 0x0, 0x0, 0x09, 4},
-  {1, 0x2, 0x1, 0x2, 0x2, 0x1F, 7},
-  {1, 0x3, 0x0, 0x0, 0x0, 0x1D, 5},
-  {1, 0x3, 0x1, 0x1, 0x1, 0x3F, 7},
-  {1, 0x3, 0x1, 0x3, 0x2, 0x5F, 7},
-  {1, 0x4, 0x0, 0x0, 0x0, 0x0D, 5},
-  {1, 0x4, 0x1, 0x4, 0x4, 0x37, 7},
-  {1, 0x5, 0x0, 0x0, 0x0, 0x03, 6},
-  {1, 0x5, 0x1, 0x0, 0x0, 0x6F, 7},
-  {1, 0x6, 0x0, 0x0, 0x0, 0x2F, 7},
-  {1, 0x6, 0x1, 0x0, 0x0, 0x4F, 7},
-  {1, 0x7, 0x0, 0x0, 0x0, 0x0F, 7},
-  {1, 0x7, 0x1, 0x0, 0x0, 0x77, 7},
-  {1, 0x8, 0x0, 0x0, 0x0, 0x01, 4},
-  {1, 0x8, 0x1, 0x8, 0x8, 0x17, 7},
-  {1, 0x9, 0x0, 0x0, 0x0, 0x0B, 6},
-  {1, 0x9, 0x1, 0x0, 0x0, 0x57, 7},
-  {1, 0xA, 0x0, 0x0, 0x0, 0x33, 6},
-  {1, 0xA, 0x1, 0x0, 0x0, 0x67, 7},
-  {1, 0xB, 0x0, 0x0, 0x0, 0x27, 7},
-  {1, 0xB, 0x1, 0x0, 0x0, 0x2B, 7},
-  {1, 0xC, 0x0, 0x0, 0x0, 0x13, 6},
-  {1, 0xC, 0x1, 0x0, 0x0, 0x47, 7},
-  {1, 0xD, 0x0, 0x0, 0x0, 0x07, 7},
-  {1, 0xD, 0x1, 0x0, 0x0, 0x7B, 7},
-  {1, 0xE, 0x0, 0x0, 0x0, 0x3B, 7},
-  {1, 0xE, 0x1, 0x0, 0x0, 0x5B, 7},
-  {1, 0xF, 0x0, 0x0, 0x0, 0x1B, 7},
-  {1, 0xF, 0x1, 0x4, 0x4, 0x6B, 7},
-  {1, 0xF, 0x1, 0x4, 0x0, 0x23, 6},
-  {2, 0x0, 0x0, 0x0, 0x0, 0x00, 1},
-  {2, 0x1, 0x0, 0x0, 0x0, 0x09, 4},
-  {2, 0x1, 0x1, 0x1, 0x1, 0x7F, 7},
-  {2, 0x2, 0x0, 0x0, 0x0, 0x01, 4},
-  {2, 0x2, 0x1, 0x2, 0x2, 0x23, 6},
-  {2, 0x3, 0x0, 0x0, 0x0, 0x3D, 6},
-  {2, 0x3, 0x1, 0x2, 0x2, 0x3F, 7},
-  {2, 0x3, 0x1, 0x3, 0x1, 0x1F, 7},
-  {2, 0x4, 0x0, 0x0, 0x0, 0x15, 5},
-  {2, 0x4, 0x1, 0x4, 0x4, 0x5F, 7},
-  {2, 0x5, 0x0, 0x0, 0x0, 0x03, 6},
-  {2, 0x5, 0x1, 0x0, 0x0, 0x6F, 7},
-  {2, 0x6, 0x0, 0x0, 0x0, 0x2F, 7},
-  {2, 0x6, 0x1, 0x0, 0x0, 0x4F, 7},
-  {2, 0x7, 0x0, 0x0, 0x0, 0x0F, 7},
-  {2, 0x7, 0x1, 0x0, 0x0, 0x17, 7},
-  {2, 0x8, 0x0, 0x0, 0x0, 0x05, 5},
-  {2, 0x8, 0x1, 0x8, 0x8, 0x77, 7},
-  {2, 0x9, 0x0, 0x0, 0x0, 0x37, 7},
-  {2, 0x9, 0x1, 0x0, 0x0, 0x57, 7},
-  {2, 0xA, 0x0, 0x0, 0x0, 0x1D, 6},
-  {2, 0xA, 0x1, 0xA, 0xA, 0x7B, 7},
-  {2, 0xA, 0x1, 0xA, 0x2, 0x2D, 6},
-  {2, 0xA, 0x1, 0xA, 0x8, 0x67, 7},
-  {2, 0xB, 0x0, 0x0, 0x0, 0x27, 7},
-  {2, 0xB, 0x1, 0xB, 0x2, 0x47, 7},
-  {2, 0xB, 0x1, 0x0, 0x0, 0x07, 7},
-  {2, 0xC, 0x0, 0x0, 0x0, 0x0D, 6},
-  {2, 0xC, 0x1, 0x0, 0x0, 0x3B, 7},
-  {2, 0xD, 0x0, 0x0, 0x0, 0x5B, 7},
-  {2, 0xD, 0x1, 0x0, 0x0, 0x1B, 7},
-  {2, 0xE, 0x0, 0x0, 0x0, 0x6B, 7},
-  {2, 0xE, 0x1, 0x4, 0x4, 0x2B, 7},
-  {2, 0xE, 0x1, 0x4, 0x0, 0x4B, 7},
-  {2, 0xF, 0x0, 0x0, 0x0, 0x0B, 7},
-  {2, 0xF, 0x1, 0x4, 0x4, 0x73, 7},
-  {2, 0xF, 0x1, 0x5, 0x1, 0x33, 7},
-  {2, 0xF, 0x1, 0x7, 0x2, 0x53, 7},
-  {2, 0xF, 0x1, 0xF, 0x8, 0x13, 7},
-  {3, 0x0, 0x0, 0x0, 0x0, 0x00, 2},
-  {3, 0x1, 0x0, 0x0, 0x0, 0x0A, 4},
-  {3, 0x1, 0x1, 0x1, 0x1, 0x0B, 6},
-  {3, 0x2, 0x0, 0x0, 0x0, 0x02, 4},
-  {3, 0x2, 0x1, 0x2, 0x2, 0x23, 6},
-  {3, 0x3, 0x0, 0x0, 0x0, 0x0E, 5},
-  {3, 0x3, 0x1, 0x3, 0x3, 0x7F, 7},
-  {3, 0x3, 0x1, 0x3, 0x2, 0x33, 6},
-  {3, 0x3, 0x1, 0x3, 0x1, 0x13, 6},
-  {3, 0x4, 0x0, 0x0, 0x0, 0x16, 5},
-  {3, 0x4, 0x1, 0x4, 0x4, 0x3F, 7},
-  {3, 0x5, 0x0, 0x0, 0x0, 0x03, 6},
-  {3, 0x5, 0x1, 0x1, 0x1, 0x3D, 6},
-  {3, 0x5, 0x1, 0x5, 0x4, 0x1F, 7},
-  {3, 0x6, 0x0, 0x0, 0x0, 0x1D, 6},
-  {3, 0x6, 0x1, 0x0, 0x0, 0x5F, 7},
-  {3, 0x7, 0x0, 0x0, 0x0, 0x2D, 6},
-  {3, 0x7, 0x1, 0x4, 0x4, 0x2F, 7},
-  {3, 0x7, 0x1, 0x5, 0x1, 0x1E, 6},
-  {3, 0x7, 0x1, 0x7, 0x2, 0x6F, 7},
-  {3, 0x8, 0x0, 0x0, 0x0, 0x06, 5},
-  {3, 0x8, 0x1, 0x8, 0x8, 0x4F, 7},
-  {3, 0x9, 0x0, 0x0, 0x0, 0x0D, 6},
-  {3, 0x9, 0x1, 0x0, 0x0, 0x35, 6},
-  {3, 0xA, 0x0, 0x0, 0x0, 0x15, 6},
-  {3, 0xA, 0x1, 0x2, 0x2, 0x25, 6},
-  {3, 0xA, 0x1, 0xA, 0x8, 0x0F, 7},
-  {3, 0xB, 0x0, 0x0, 0x0, 0x05, 6},
-  {3, 0xB, 0x1, 0x8, 0x8, 0x39, 6},
-  {3, 0xB, 0x1, 0xB, 0x3, 0x17, 7},
-  {3, 0xB, 0x1, 0xB, 0x2, 0x19, 6},
-  {3, 0xB, 0x1, 0xB, 0x1, 0x77, 7},
-  {3, 0xC, 0x0, 0x0, 0x0, 0x29, 6},
-  {3, 0xC, 0x1, 0x0, 0x0, 0x09, 6},
-  {3, 0xD, 0x0, 0x0, 0x0, 0x37, 7},
-  {3, 0xD, 0x1, 0x4, 0x4, 0x57, 7},
-  {3, 0xD, 0x1, 0x4, 0x0, 0x31, 6},
-  {3, 0xE, 0x0, 0x0, 0x0, 0x67, 7},
-  {3, 0xE, 0x1, 0x4, 0x4, 0x27, 7},
-  {3, 0xE, 0x1, 0xC, 0x8, 0x47, 7},
-  {3, 0xE, 0x1, 0xE, 0x2, 0x6B, 7},
-  {3, 0xF, 0x0, 0x0, 0x0, 0x11, 6},
-  {3, 0xF, 0x1, 0x6, 0x6, 0x07, 7},
-  {3, 0xF, 0x1, 0x7, 0x3, 0x7B, 7},
-  {3, 0xF, 0x1, 0xF, 0xA, 0x3B, 7},
-  {3, 0xF, 0x1, 0xF, 0x2, 0x21, 6},
-  {3, 0xF, 0x1, 0xF, 0x8, 0x01, 6},
-  {3, 0xF, 0x1, 0xA, 0x8, 0x5B, 7},
-  {3, 0xF, 0x1, 0xF, 0x5, 0x1B, 7},
-  {3, 0xF, 0x1, 0xF, 0x1, 0x3E, 6},
-  {3, 0xF, 0x1, 0xF, 0x4, 0x2B, 7},
-  {4, 0x0, 0x0, 0x0, 0x0, 0x00, 1},
-  {4, 0x1, 0x0, 0x0, 0x0, 0x0D, 5},
-  {4, 0x1, 0x1, 0x1, 0x1, 0x7F, 7},
-  {4, 0x2, 0x0, 0x0, 0x0, 0x15, 5},
-  {4, 0x2, 0x1, 0x2, 0x2, 0x3F, 7},
-  {4, 0x3, 0x0, 0x0, 0x0, 0x5F, 7},
-  {4, 0x3, 0x1, 0x0, 0x0, 0x6F, 7},
-  {4, 0x4, 0x0, 0x0, 0x0, 0x09, 4},
-  {4, 0x4, 0x1, 0x4, 0x4, 0x23, 6},
-  {4, 0x5, 0x0, 0x0, 0x0, 0x33, 6},
-  {4, 0x5, 0x1, 0x0, 0x0, 0x1F, 7},
-  {4, 0x6, 0x0, 0x0, 0x0, 0x13, 6},
-  {4, 0x6, 0x1, 0x0, 0x0, 0x2F, 7},
-  {4, 0x7, 0x0, 0x0, 0x0, 0x4F, 7},
-  {4, 0x7, 0x1, 0x0, 0x0, 0x57, 7},
-  {4, 0x8, 0x0, 0x0, 0x0, 0x01, 4},
-  {4, 0x8, 0x1, 0x8, 0x8, 0x0F, 7},
-  {4, 0x9, 0x0, 0x0, 0x0, 0x77, 7},
-  {4, 0x9, 0x1, 0x0, 0x0, 0x37, 7},
-  {4, 0xA, 0x0, 0x0, 0x0, 0x1D, 6},
-  {4, 0xA, 0x1, 0x0, 0x0, 0x17, 7},
-  {4, 0xB, 0x0, 0x0, 0x0, 0x67, 7},
-  {4, 0xB, 0x1, 0x0, 0x0, 0x6B, 7},
-  {4, 0xC, 0x0, 0x0, 0x0, 0x05, 5},
-  {4, 0xC, 0x1, 0xC, 0xC, 0x27, 7},
-  {4, 0xC, 0x1, 0xC, 0x8, 0x47, 7},
-  {4, 0xC, 0x1, 0xC, 0x4, 0x07, 7},
-  {4, 0xD, 0x0, 0x0, 0x0, 0x7B, 7},
-  {4, 0xD, 0x1, 0x0, 0x0, 0x3B, 7},
-  {4, 0xE, 0x0, 0x0, 0x0, 0x5B, 7},
-  {4, 0xE, 0x1, 0x2, 0x2, 0x1B, 7},
-  {4, 0xE, 0x1, 0x2, 0x0, 0x03, 6},
-  {4, 0xF, 0x0, 0x0, 0x0, 0x2B, 7},
-  {4, 0xF, 0x1, 0x1, 0x1, 0x4B, 7},
-  {4, 0xF, 0x1, 0x3, 0x2, 0x0B, 7},
-  {4, 0xF, 0x1, 0x3, 0x0, 0x3D, 6},
-  {5, 0x0, 0x0, 0x0, 0x0, 0x00, 2},
-  {5, 0x1, 0x0, 0x0, 0x0, 0x1E, 5},
-  {5, 0x1, 0x1, 0x1, 0x1, 0x3B, 6},
-  {5, 0x2, 0x0, 0x0, 0x0, 0x0A, 5},
-  {5, 0x2, 0x1, 0x2, 0x2, 0x3F, 7},
-  {5, 0x3, 0x0, 0x0, 0x0, 0x1B, 6},
-  {5, 0x3, 0x1, 0x0, 0x0, 0x0B, 6},
-  {5, 0x4, 0x0, 0x0, 0x0, 0x02, 4},
-  {5, 0x4, 0x1, 0x4, 0x4, 0x2B, 6},
-  {5, 0x5, 0x0, 0x0, 0x0, 0x0E, 5},
-  {5, 0x5, 0x1, 0x4, 0x4, 0x33, 6},
-  {5, 0x5, 0x1, 0x5, 0x1, 0x7F, 7},
-  {5, 0x6, 0x0, 0x0, 0x0, 0x13, 6},
-  {5, 0x6, 0x1, 0x0, 0x0, 0x6F, 7},
-  {5, 0x7, 0x0, 0x0, 0x0, 0x23, 6},
-  {5, 0x7, 0x1, 0x2, 0x2, 0x5F, 7},
-  {5, 0x7, 0x1, 0x2, 0x0, 0x15, 6},
-  {5, 0x8, 0x0, 0x0, 0x0, 0x16, 5},
-  {5, 0x8, 0x1, 0x8, 0x8, 0x03, 6},
-  {5, 0x9, 0x0, 0x0, 0x0, 0x3D, 6},
-  {5, 0x9, 0x1, 0x0, 0x0, 0x1F, 7},
-  {5, 0xA, 0x0, 0x0, 0x0, 0x1D, 6},
-  {5, 0xA, 0x1, 0x0, 0x0, 0x2D, 6},
-  {5, 0xB, 0x0, 0x0, 0x0, 0x0D, 6},
-  {5, 0xB, 0x1, 0x1, 0x1, 0x4F, 7},
-  {5, 0xB, 0x1, 0x1, 0x0, 0x35, 6},
-  {5, 0xC, 0x0, 0x0, 0x0, 0x06, 5},
-  {5, 0xC, 0x1, 0x4, 0x4, 0x25, 6},
-  {5, 0xC, 0x1, 0xC, 0x8, 0x2F, 7},
-  {5, 0xD, 0x0, 0x0, 0x0, 0x05, 6},
-  {5, 0xD, 0x1, 0x1, 0x1, 0x77, 7},
-  {5, 0xD, 0x1, 0x5, 0x4, 0x39, 6},
-  {5, 0xD, 0x1, 0xD, 0x8, 0x0F, 7},
-  {5, 0xE, 0x0, 0x0, 0x0, 0x19, 6},
-  {5, 0xE, 0x1, 0x2, 0x2, 0x57, 7},
-  {5, 0xE, 0x1, 0xA, 0x8, 0x01, 6},
-  {5, 0xE, 0x1, 0xE, 0x4, 0x37, 7},
-  {5, 0xF, 0x0, 0x0, 0x0, 0x1A, 5},
-  {5, 0xF, 0x1, 0x9, 0x9, 0x17, 7},
-  {5, 0xF, 0x1, 0xD, 0x5, 0x67, 7},
-  {5, 0xF, 0x1, 0xF, 0x3, 0x07, 7},
-  {5, 0xF, 0x1, 0xF, 0x1, 0x29, 6},
-  {5, 0xF, 0x1, 0x7, 0x6, 0x27, 7},
-  {5, 0xF, 0x1, 0xF, 0xC, 0x09, 6},
-  {5, 0xF, 0x1, 0xF, 0x4, 0x31, 6},
-  {5, 0xF, 0x1, 0xF, 0xA, 0x47, 7},
-  {5, 0xF, 0x1, 0xF, 0x8, 0x11, 6},
-  {5, 0xF, 0x1, 0xF, 0x2, 0x21, 6},
-  {6, 0x0, 0x0, 0x0, 0x0, 0x00, 3},
-  {6, 0x1, 0x0, 0x0, 0x0, 0x02, 4},
-  {6, 0x1, 0x1, 0x1, 0x1, 0x03, 6},
-  {6, 0x2, 0x0, 0x0, 0x0, 0x0C, 4},
-  {6, 0x2, 0x1, 0x2, 0x2, 0x3D, 6},
-  {6, 0x3, 0x0, 0x0, 0x0, 0x1D, 6},
-  {6, 0x3, 0x1, 0x2, 0x2, 0x0D, 6},
-  {6, 0x3, 0x1, 0x3, 0x1, 0x7F, 7},
-  {6, 0x4, 0x0, 0x0, 0x0, 0x04, 4},
-  {6, 0x4, 0x1, 0x4, 0x4, 0x2D, 6},
-  {6, 0x5, 0x0, 0x0, 0x0, 0x0A, 5},
-  {6, 0x5, 0x1, 0x4, 0x4, 0x35, 6},
-  {6, 0x5, 0x1, 0x5, 0x1, 0x2F, 7},
-  {6, 0x6, 0x0, 0x0, 0x0, 0x15, 6},
-  {6, 0x6, 0x1, 0x2, 0x2, 0x3F, 7},
-  {6, 0x6, 0x1, 0x6, 0x4, 0x5F, 7},
-  {6, 0x7, 0x0, 0x0, 0x0, 0x25, 6},
-  {6, 0x7, 0x1, 0x2, 0x2, 0x29, 6},
-  {6, 0x7, 0x1, 0x3, 0x1, 0x1F, 7},
-  {6, 0x7, 0x1, 0x7, 0x4, 0x6F, 7},
-  {6, 0x8, 0x0, 0x0, 0x0, 0x16, 5},
-  {6, 0x8, 0x1, 0x8, 0x8, 0x05, 6},
-  {6, 0x9, 0x0, 0x0, 0x0, 0x39, 6},
-  {6, 0x9, 0x1, 0x0, 0x0, 0x19, 6},
-  {6, 0xA, 0x0, 0x0, 0x0, 0x06, 5},
-  {6, 0xA, 0x1, 0xA, 0xA, 0x0F, 7},
-  {6, 0xA, 0x1, 0xA, 0x2, 0x09, 6},
-  {6, 0xA, 0x1, 0xA, 0x8, 0x4F, 7},
-  {6, 0xB, 0x0, 0x0, 0x0, 0x0E, 6},
-  {6, 0xB, 0x1, 0xB, 0x2, 0x77, 7},
-  {6, 0xB, 0x1, 0x2, 0x2, 0x37, 7},
-  {6, 0xB, 0x1, 0xA, 0x8, 0x57, 7},
-  {6, 0xB, 0x1, 0xB, 0x1, 0x47, 7},
-  {6, 0xC, 0x0, 0x0, 0x0, 0x1A, 5},
-  {6, 0xC, 0x1, 0xC, 0xC, 0x17, 7},
-  {6, 0xC, 0x1, 0xC, 0x8, 0x67, 7},
-  {6, 0xC, 0x1, 0xC, 0x4, 0x27, 7},
-  {6, 0xD, 0x0, 0x0, 0x0, 0x31, 6},
-  {6, 0xD, 0x1, 0xD, 0x4, 0x07, 7},
-  {6, 0xD, 0x1, 0x4, 0x4, 0x7B, 7},
-  {6, 0xD, 0x1, 0xC, 0x8, 0x3B, 7},
-  {6, 0xD, 0x1, 0xD, 0x1, 0x2B, 7},
-  {6, 0xE, 0x0, 0x0, 0x0, 0x11, 6},
-  {6, 0xE, 0x1, 0xE, 0x4, 0x5B, 7},
-  {6, 0xE, 0x1, 0x4, 0x4, 0x1B, 7},
-  {6, 0xE, 0x1, 0xE, 0xA, 0x6B, 7},
-  {6, 0xE, 0x1, 0xE, 0x8, 0x21, 6},
-  {6, 0xE, 0x1, 0xE, 0x2, 0x33, 7},
-  {6, 0xF, 0x0, 0x0, 0x0, 0x01, 6},
-  {6, 0xF, 0x1, 0x3, 0x3, 0x4B, 7},
-  {6, 0xF, 0x1, 0x7, 0x6, 0x0B, 7},
-  {6, 0xF, 0x1, 0xF, 0xA, 0x73, 7},
-  {6, 0xF, 0x1, 0xF, 0x2, 0x3E, 6},
-  {6, 0xF, 0x1, 0xB, 0x9, 0x53, 7},
-  {6, 0xF, 0x1, 0xF, 0xC, 0x63, 7},
-  {6, 0xF, 0x1, 0xF, 0x8, 0x1E, 6},
-  {6, 0xF, 0x1, 0xF, 0x5, 0x13, 7},
-  {6, 0xF, 0x1, 0xF, 0x4, 0x2E, 6},
-  {6, 0xF, 0x1, 0xF, 0x1, 0x23, 7},
-  {7, 0x0, 0x0, 0x0, 0x0, 0x04, 4},
-  {7, 0x1, 0x0, 0x0, 0x0, 0x33, 6},
-  {7, 0x1, 0x1, 0x1, 0x1, 0x13, 6},
-  {7, 0x2, 0x0, 0x0, 0x0, 0x23, 6},
-  {7, 0x2, 0x1, 0x2, 0x2, 0x7F, 7},
-  {7, 0x3, 0x0, 0x0, 0x0, 0x03, 6},
-  {7, 0x3, 0x1, 0x1, 0x1, 0x3F, 7},
-  {7, 0x3, 0x1, 0x3, 0x2, 0x6F, 7},
-  {7, 0x4, 0x0, 0x0, 0x0, 0x2D, 6},
-  {7, 0x4, 0x1, 0x4, 0x4, 0x5F, 7},
-  {7, 0x5, 0x0, 0x0, 0x0, 0x16, 5},
-  {7, 0x5, 0x1, 0x1, 0x1, 0x3D, 6},
-  {7, 0x5, 0x1, 0x5, 0x4, 0x1F, 7},
-  {7, 0x6, 0x0, 0x0, 0x0, 0x1D, 6},
-  {7, 0x6, 0x1, 0x0, 0x0, 0x77, 7},
-  {7, 0x7, 0x0, 0x0, 0x0, 0x06, 5},
-  {7, 0x7, 0x1, 0x7, 0x4, 0x2F, 7},
-  {7, 0x7, 0x1, 0x4, 0x4, 0x4F, 7},
-  {7, 0x7, 0x1, 0x7, 0x3, 0x0F, 7},
-  {7, 0x7, 0x1, 0x7, 0x1, 0x0D, 6},
-  {7, 0x7, 0x1, 0x7, 0x2, 0x57, 7},
-  {7, 0x8, 0x0, 0x0, 0x0, 0x35, 6},
-  {7, 0x8, 0x1, 0x8, 0x8, 0x37, 7},
-  {7, 0x9, 0x0, 0x0, 0x0, 0x15, 6},
-  {7, 0x9, 0x1, 0x0, 0x0, 0x27, 7},
-  {7, 0xA, 0x0, 0x0, 0x0, 0x25, 6},
-  {7, 0xA, 0x1, 0x0, 0x0, 0x29, 6},
-  {7, 0xB, 0x0, 0x0, 0x0, 0x1A, 5},
-  {7, 0xB, 0x1, 0xB, 0x1, 0x17, 7},
-  {7, 0xB, 0x1, 0x1, 0x1, 0x67, 7},
-  {7, 0xB, 0x1, 0x3, 0x2, 0x05, 6},
-  {7, 0xB, 0x1, 0xB, 0x8, 0x7B, 7},
-  {7, 0xC, 0x0, 0x0, 0x0, 0x39, 6},
-  {7, 0xC, 0x1, 0x0, 0x0, 0x19, 6},
-  {7, 0xD, 0x0, 0x0, 0x0, 0x0C, 5},
-  {7, 0xD, 0x1, 0xD, 0x1, 0x47, 7},
-  {7, 0xD, 0x1, 0x1, 0x1, 0x07, 7},
-  {7, 0xD, 0x1, 0x5, 0x4, 0x09, 6},
-  {7, 0xD, 0x1, 0xD, 0x8, 0x1B, 7},
-  {7, 0xE, 0x0, 0x0, 0x0, 0x31, 6},
-  {7, 0xE, 0x1, 0xE, 0x2, 0x3B, 7},
-  {7, 0xE, 0x1, 0x2, 0x2, 0x5B, 7},
-  {7, 0xE, 0x1, 0xA, 0x8, 0x3E, 6},
-  {7, 0xE, 0x1, 0xE, 0x4, 0x0B, 7},
-  {7, 0xF, 0x0, 0x0, 0x0, 0x00, 3},
-  {7, 0xF, 0x1, 0xF, 0xF, 0x6B, 7},
-  {7, 0xF, 0x1, 0xF, 0x7, 0x2B, 7},
-  {7, 0xF, 0x1, 0xF, 0xB, 0x4B, 7},
-  {7, 0xF, 0x1, 0xF, 0x3, 0x11, 6},
-  {7, 0xF, 0x1, 0x7, 0x6, 0x21, 6},
-  {7, 0xF, 0x1, 0xF, 0xA, 0x01, 6},
-  {7, 0xF, 0x1, 0xF, 0x2, 0x0A, 5},
-  {7, 0xF, 0x1, 0xB, 0x9, 0x1E, 6},
-  {7, 0xF, 0x1, 0xF, 0xC, 0x0E, 6},
-  {7, 0xF, 0x1, 0xF, 0x8, 0x12, 5},
-  {7, 0xF, 0x1, 0xF, 0x5, 0x2E, 6},
-  {7, 0xF, 0x1, 0xF, 0x1, 0x02, 5},
-  {7, 0xF, 0x1, 0xF, 0x4, 0x1C, 5}
+    {0, 0x1, 0x0, 0x0, 0x0, 0x00, 3},
+    {0, 0x1, 0x1, 0x1, 0x1, 0x27, 6},
+    {0, 0x2, 0x0, 0x0, 0x0, 0x06, 3},
+    {0, 0x2, 0x1, 0x2, 0x2, 0x17, 6},
+    {0, 0x3, 0x0, 0x0, 0x0, 0x0D, 5},
+    {0, 0x3, 0x1, 0x0, 0x0, 0x3B, 6},
+    {0, 0x4, 0x0, 0x0, 0x0, 0x02, 3},
+    {0, 0x4, 0x1, 0x4, 0x4, 0x07, 6},
+    {0, 0x5, 0x0, 0x0, 0x0, 0x15, 5},
+    {0, 0x5, 0x1, 0x0, 0x0, 0x2B, 6},
+    {0, 0x6, 0x0, 0x0, 0x0, 0x01, 5},
+    {0, 0x6, 0x1, 0x0, 0x0, 0x7F, 7},
+    {0, 0x7, 0x0, 0x0, 0x0, 0x1F, 7},
+    {0, 0x7, 0x1, 0x0, 0x0, 0x1B, 6},
+    {0, 0x8, 0x0, 0x0, 0x0, 0x04, 3},
+    {0, 0x8, 0x1, 0x8, 0x8, 0x05, 5},
+    {0, 0x9, 0x0, 0x0, 0x0, 0x19, 5},
+    {0, 0x9, 0x1, 0x0, 0x0, 0x13, 6},
+    {0, 0xA, 0x0, 0x0, 0x0, 0x09, 5},
+    {0, 0xA, 0x1, 0x8, 0x8, 0x0B, 6},
+    {0, 0xA, 0x1, 0xA, 0x2, 0x3F, 7},
+    {0, 0xB, 0x0, 0x0, 0x0, 0x5F, 7},
+    {0, 0xB, 0x1, 0x0, 0x0, 0x33, 6},
+    {0, 0xC, 0x0, 0x0, 0x0, 0x11, 5},
+    {0, 0xC, 0x1, 0x8, 0x8, 0x23, 6},
+    {0, 0xC, 0x1, 0xC, 0x4, 0x6F, 7},
+    {0, 0xD, 0x0, 0x0, 0x0, 0x0F, 7},
+    {0, 0xD, 0x1, 0x0, 0x0, 0x03, 6},
+    {0, 0xE, 0x0, 0x0, 0x0, 0x2F, 7},
+    {0, 0xE, 0x1, 0x4, 0x4, 0x4F, 7},
+    {0, 0xE, 0x1, 0x4, 0x0, 0x3D, 6},
+    {0, 0xF, 0x0, 0x0, 0x0, 0x77, 7},
+    {0, 0xF, 0x1, 0x1, 0x1, 0x37, 7},
+    {0, 0xF, 0x1, 0x1, 0x0, 0x1D, 6},
+    {1, 0x0, 0x0, 0x0, 0x0, 0x00, 1},
+    {1, 0x1, 0x0, 0x0, 0x0, 0x05, 4},
+    {1, 0x1, 0x1, 0x1, 0x1, 0x7F, 7},
+    {1, 0x2, 0x0, 0x0, 0x0, 0x09, 4},
+    {1, 0x2, 0x1, 0x2, 0x2, 0x1F, 7},
+    {1, 0x3, 0x0, 0x0, 0x0, 0x1D, 5},
+    {1, 0x3, 0x1, 0x1, 0x1, 0x3F, 7},
+    {1, 0x3, 0x1, 0x3, 0x2, 0x5F, 7},
+    {1, 0x4, 0x0, 0x0, 0x0, 0x0D, 5},
+    {1, 0x4, 0x1, 0x4, 0x4, 0x37, 7},
+    {1, 0x5, 0x0, 0x0, 0x0, 0x03, 6},
+    {1, 0x5, 0x1, 0x0, 0x0, 0x6F, 7},
+    {1, 0x6, 0x0, 0x0, 0x0, 0x2F, 7},
+    {1, 0x6, 0x1, 0x0, 0x0, 0x4F, 7},
+    {1, 0x7, 0x0, 0x0, 0x0, 0x0F, 7},
+    {1, 0x7, 0x1, 0x0, 0x0, 0x77, 7},
+    {1, 0x8, 0x0, 0x0, 0x0, 0x01, 4},
+    {1, 0x8, 0x1, 0x8, 0x8, 0x17, 7},
+    {1, 0x9, 0x0, 0x0, 0x0, 0x0B, 6},
+    {1, 0x9, 0x1, 0x0, 0x0, 0x57, 7},
+    {1, 0xA, 0x0, 0x0, 0x0, 0x33, 6},
+    {1, 0xA, 0x1, 0x0, 0x0, 0x67, 7},
+    {1, 0xB, 0x0, 0x0, 0x0, 0x27, 7},
+    {1, 0xB, 0x1, 0x0, 0x0, 0x2B, 7},
+    {1, 0xC, 0x0, 0x0, 0x0, 0x13, 6},
+    {1, 0xC, 0x1, 0x0, 0x0, 0x47, 7},
+    {1, 0xD, 0x0, 0x0, 0x0, 0x07, 7},
+    {1, 0xD, 0x1, 0x0, 0x0, 0x7B, 7},
+    {1, 0xE, 0x0, 0x0, 0x0, 0x3B, 7},
+    {1, 0xE, 0x1, 0x0, 0x0, 0x5B, 7},
+    {1, 0xF, 0x0, 0x0, 0x0, 0x1B, 7},
+    {1, 0xF, 0x1, 0x4, 0x4, 0x6B, 7},
+    {1, 0xF, 0x1, 0x4, 0x0, 0x23, 6},
+    {2, 0x0, 0x0, 0x0, 0x0, 0x00, 1},
+    {2, 0x1, 0x0, 0x0, 0x0, 0x09, 4},
+    {2, 0x1, 0x1, 0x1, 0x1, 0x7F, 7},
+    {2, 0x2, 0x0, 0x0, 0x0, 0x01, 4},
+    {2, 0x2, 0x1, 0x2, 0x2, 0x23, 6},
+    {2, 0x3, 0x0, 0x0, 0x0, 0x3D, 6},
+    {2, 0x3, 0x1, 0x2, 0x2, 0x3F, 7},
+    {2, 0x3, 0x1, 0x3, 0x1, 0x1F, 7},
+    {2, 0x4, 0x0, 0x0, 0x0, 0x15, 5},
+    {2, 0x4, 0x1, 0x4, 0x4, 0x5F, 7},
+    {2, 0x5, 0x0, 0x0, 0x0, 0x03, 6},
+    {2, 0x5, 0x1, 0x0, 0x0, 0x6F, 7},
+    {2, 0x6, 0x0, 0x0, 0x0, 0x2F, 7},
+    {2, 0x6, 0x1, 0x0, 0x0, 0x4F, 7},
+    {2, 0x7, 0x0, 0x0, 0x0, 0x0F, 7},
+    {2, 0x7, 0x1, 0x0, 0x0, 0x17, 7},
+    {2, 0x8, 0x0, 0x0, 0x0, 0x05, 5},
+    {2, 0x8, 0x1, 0x8, 0x8, 0x77, 7},
+    {2, 0x9, 0x0, 0x0, 0x0, 0x37, 7},
+    {2, 0x9, 0x1, 0x0, 0x0, 0x57, 7},
+    {2, 0xA, 0x0, 0x0, 0x0, 0x1D, 6},
+    {2, 0xA, 0x1, 0xA, 0xA, 0x7B, 7},
+    {2, 0xA, 0x1, 0xA, 0x2, 0x2D, 6},
+    {2, 0xA, 0x1, 0xA, 0x8, 0x67, 7},
+    {2, 0xB, 0x0, 0x0, 0x0, 0x27, 7},
+    {2, 0xB, 0x1, 0xB, 0x2, 0x47, 7},
+    {2, 0xB, 0x1, 0x0, 0x0, 0x07, 7},
+    {2, 0xC, 0x0, 0x0, 0x0, 0x0D, 6},
+    {2, 0xC, 0x1, 0x0, 0x0, 0x3B, 7},
+    {2, 0xD, 0x0, 0x0, 0x0, 0x5B, 7},
+    {2, 0xD, 0x1, 0x0, 0x0, 0x1B, 7},
+    {2, 0xE, 0x0, 0x0, 0x0, 0x6B, 7},
+    {2, 0xE, 0x1, 0x4, 0x4, 0x2B, 7},
+    {2, 0xE, 0x1, 0x4, 0x0, 0x4B, 7},
+    {2, 0xF, 0x0, 0x0, 0x0, 0x0B, 7},
+    {2, 0xF, 0x1, 0x4, 0x4, 0x73, 7},
+    {2, 0xF, 0x1, 0x5, 0x1, 0x33, 7},
+    {2, 0xF, 0x1, 0x7, 0x2, 0x53, 7},
+    {2, 0xF, 0x1, 0xF, 0x8, 0x13, 7},
+    {3, 0x0, 0x0, 0x0, 0x0, 0x00, 2},
+    {3, 0x1, 0x0, 0x0, 0x0, 0x0A, 4},
+    {3, 0x1, 0x1, 0x1, 0x1, 0x0B, 6},
+    {3, 0x2, 0x0, 0x0, 0x0, 0x02, 4},
+    {3, 0x2, 0x1, 0x2, 0x2, 0x23, 6},
+    {3, 0x3, 0x0, 0x0, 0x0, 0x0E, 5},
+    {3, 0x3, 0x1, 0x3, 0x3, 0x7F, 7},
+    {3, 0x3, 0x1, 0x3, 0x2, 0x33, 6},
+    {3, 0x3, 0x1, 0x3, 0x1, 0x13, 6},
+    {3, 0x4, 0x0, 0x0, 0x0, 0x16, 5},
+    {3, 0x4, 0x1, 0x4, 0x4, 0x3F, 7},
+    {3, 0x5, 0x0, 0x0, 0x0, 0x03, 6},
+    {3, 0x5, 0x1, 0x1, 0x1, 0x3D, 6},
+    {3, 0x5, 0x1, 0x5, 0x4, 0x1F, 7},
+    {3, 0x6, 0x0, 0x0, 0x0, 0x1D, 6},
+    {3, 0x6, 0x1, 0x0, 0x0, 0x5F, 7},
+    {3, 0x7, 0x0, 0x0, 0x0, 0x2D, 6},
+    {3, 0x7, 0x1, 0x4, 0x4, 0x2F, 7},
+    {3, 0x7, 0x1, 0x5, 0x1, 0x1E, 6},
+    {3, 0x7, 0x1, 0x7, 0x2, 0x6F, 7},
+    {3, 0x8, 0x0, 0x0, 0x0, 0x06, 5},
+    {3, 0x8, 0x1, 0x8, 0x8, 0x4F, 7},
+    {3, 0x9, 0x0, 0x0, 0x0, 0x0D, 6},
+    {3, 0x9, 0x1, 0x0, 0x0, 0x35, 6},
+    {3, 0xA, 0x0, 0x0, 0x0, 0x15, 6},
+    {3, 0xA, 0x1, 0x2, 0x2, 0x25, 6},
+    {3, 0xA, 0x1, 0xA, 0x8, 0x0F, 7},
+    {3, 0xB, 0x0, 0x0, 0x0, 0x05, 6},
+    {3, 0xB, 0x1, 0x8, 0x8, 0x39, 6},
+    {3, 0xB, 0x1, 0xB, 0x3, 0x17, 7},
+    {3, 0xB, 0x1, 0xB, 0x2, 0x19, 6},
+    {3, 0xB, 0x1, 0xB, 0x1, 0x77, 7},
+    {3, 0xC, 0x0, 0x0, 0x0, 0x29, 6},
+    {3, 0xC, 0x1, 0x0, 0x0, 0x09, 6},
+    {3, 0xD, 0x0, 0x0, 0x0, 0x37, 7},
+    {3, 0xD, 0x1, 0x4, 0x4, 0x57, 7},
+    {3, 0xD, 0x1, 0x4, 0x0, 0x31, 6},
+    {3, 0xE, 0x0, 0x0, 0x0, 0x67, 7},
+    {3, 0xE, 0x1, 0x4, 0x4, 0x27, 7},
+    {3, 0xE, 0x1, 0xC, 0x8, 0x47, 7},
+    {3, 0xE, 0x1, 0xE, 0x2, 0x6B, 7},
+    {3, 0xF, 0x0, 0x0, 0x0, 0x11, 6},
+    {3, 0xF, 0x1, 0x6, 0x6, 0x07, 7},
+    {3, 0xF, 0x1, 0x7, 0x3, 0x7B, 7},
+    {3, 0xF, 0x1, 0xF, 0xA, 0x3B, 7},
+    {3, 0xF, 0x1, 0xF, 0x2, 0x21, 6},
+    {3, 0xF, 0x1, 0xF, 0x8, 0x01, 6},
+    {3, 0xF, 0x1, 0xA, 0x8, 0x5B, 7},
+    {3, 0xF, 0x1, 0xF, 0x5, 0x1B, 7},
+    {3, 0xF, 0x1, 0xF, 0x1, 0x3E, 6},
+    {3, 0xF, 0x1, 0xF, 0x4, 0x2B, 7},
+    {4, 0x0, 0x0, 0x0, 0x0, 0x00, 1},
+    {4, 0x1, 0x0, 0x0, 0x0, 0x0D, 5},
+    {4, 0x1, 0x1, 0x1, 0x1, 0x7F, 7},
+    {4, 0x2, 0x0, 0x0, 0x0, 0x15, 5},
+    {4, 0x2, 0x1, 0x2, 0x2, 0x3F, 7},
+    {4, 0x3, 0x0, 0x0, 0x0, 0x5F, 7},
+    {4, 0x3, 0x1, 0x0, 0x0, 0x6F, 7},
+    {4, 0x4, 0x0, 0x0, 0x0, 0x09, 4},
+    {4, 0x4, 0x1, 0x4, 0x4, 0x23, 6},
+    {4, 0x5, 0x0, 0x0, 0x0, 0x33, 6},
+    {4, 0x5, 0x1, 0x0, 0x0, 0x1F, 7},
+    {4, 0x6, 0x0, 0x0, 0x0, 0x13, 6},
+    {4, 0x6, 0x1, 0x0, 0x0, 0x2F, 7},
+    {4, 0x7, 0x0, 0x0, 0x0, 0x4F, 7},
+    {4, 0x7, 0x1, 0x0, 0x0, 0x57, 7},
+    {4, 0x8, 0x0, 0x0, 0x0, 0x01, 4},
+    {4, 0x8, 0x1, 0x8, 0x8, 0x0F, 7},
+    {4, 0x9, 0x0, 0x0, 0x0, 0x77, 7},
+    {4, 0x9, 0x1, 0x0, 0x0, 0x37, 7},
+    {4, 0xA, 0x0, 0x0, 0x0, 0x1D, 6},
+    {4, 0xA, 0x1, 0x0, 0x0, 0x17, 7},
+    {4, 0xB, 0x0, 0x0, 0x0, 0x67, 7},
+    {4, 0xB, 0x1, 0x0, 0x0, 0x6B, 7},
+    {4, 0xC, 0x0, 0x0, 0x0, 0x05, 5},
+    {4, 0xC, 0x1, 0xC, 0xC, 0x27, 7},
+    {4, 0xC, 0x1, 0xC, 0x8, 0x47, 7},
+    {4, 0xC, 0x1, 0xC, 0x4, 0x07, 7},
+    {4, 0xD, 0x0, 0x0, 0x0, 0x7B, 7},
+    {4, 0xD, 0x1, 0x0, 0x0, 0x3B, 7},
+    {4, 0xE, 0x0, 0x0, 0x0, 0x5B, 7},
+    {4, 0xE, 0x1, 0x2, 0x2, 0x1B, 7},
+    {4, 0xE, 0x1, 0x2, 0x0, 0x03, 6},
+    {4, 0xF, 0x0, 0x0, 0x0, 0x2B, 7},
+    {4, 0xF, 0x1, 0x1, 0x1, 0x4B, 7},
+    {4, 0xF, 0x1, 0x3, 0x2, 0x0B, 7},
+    {4, 0xF, 0x1, 0x3, 0x0, 0x3D, 6},
+    {5, 0x0, 0x0, 0x0, 0x0, 0x00, 2},
+    {5, 0x1, 0x0, 0x0, 0x0, 0x1E, 5},
+    {5, 0x1, 0x1, 0x1, 0x1, 0x3B, 6},
+    {5, 0x2, 0x0, 0x0, 0x0, 0x0A, 5},
+    {5, 0x2, 0x1, 0x2, 0x2, 0x3F, 7},
+    {5, 0x3, 0x0, 0x0, 0x0, 0x1B, 6},
+    {5, 0x3, 0x1, 0x0, 0x0, 0x0B, 6},
+    {5, 0x4, 0x0, 0x0, 0x0, 0x02, 4},
+    {5, 0x4, 0x1, 0x4, 0x4, 0x2B, 6},
+    {5, 0x5, 0x0, 0x0, 0x0, 0x0E, 5},
+    {5, 0x5, 0x1, 0x4, 0x4, 0x33, 6},
+    {5, 0x5, 0x1, 0x5, 0x1, 0x7F, 7},
+    {5, 0x6, 0x0, 0x0, 0x0, 0x13, 6},
+    {5, 0x6, 0x1, 0x0, 0x0, 0x6F, 7},
+    {5, 0x7, 0x0, 0x0, 0x0, 0x23, 6},
+    {5, 0x7, 0x1, 0x2, 0x2, 0x5F, 7},
+    {5, 0x7, 0x1, 0x2, 0x0, 0x15, 6},
+    {5, 0x8, 0x0, 0x0, 0x0, 0x16, 5},
+    {5, 0x8, 0x1, 0x8, 0x8, 0x03, 6},
+    {5, 0x9, 0x0, 0x0, 0x0, 0x3D, 6},
+    {5, 0x9, 0x1, 0x0, 0x0, 0x1F, 7},
+    {5, 0xA, 0x0, 0x0, 0x0, 0x1D, 6},
+    {5, 0xA, 0x1, 0x0, 0x0, 0x2D, 6},
+    {5, 0xB, 0x0, 0x0, 0x0, 0x0D, 6},
+    {5, 0xB, 0x1, 0x1, 0x1, 0x4F, 7},
+    {5, 0xB, 0x1, 0x1, 0x0, 0x35, 6},
+    {5, 0xC, 0x0, 0x0, 0x0, 0x06, 5},
+    {5, 0xC, 0x1, 0x4, 0x4, 0x25, 6},
+    {5, 0xC, 0x1, 0xC, 0x8, 0x2F, 7},
+    {5, 0xD, 0x0, 0x0, 0x0, 0x05, 6},
+    {5, 0xD, 0x1, 0x1, 0x1, 0x77, 7},
+    {5, 0xD, 0x1, 0x5, 0x4, 0x39, 6},
+    {5, 0xD, 0x1, 0xD, 0x8, 0x0F, 7},
+    {5, 0xE, 0x0, 0x0, 0x0, 0x19, 6},
+    {5, 0xE, 0x1, 0x2, 0x2, 0x57, 7},
+    {5, 0xE, 0x1, 0xA, 0x8, 0x01, 6},
+    {5, 0xE, 0x1, 0xE, 0x4, 0x37, 7},
+    {5, 0xF, 0x0, 0x0, 0x0, 0x1A, 5},
+    {5, 0xF, 0x1, 0x9, 0x9, 0x17, 7},
+    {5, 0xF, 0x1, 0xD, 0x5, 0x67, 7},
+    {5, 0xF, 0x1, 0xF, 0x3, 0x07, 7},
+    {5, 0xF, 0x1, 0xF, 0x1, 0x29, 6},
+    {5, 0xF, 0x1, 0x7, 0x6, 0x27, 7},
+    {5, 0xF, 0x1, 0xF, 0xC, 0x09, 6},
+    {5, 0xF, 0x1, 0xF, 0x4, 0x31, 6},
+    {5, 0xF, 0x1, 0xF, 0xA, 0x47, 7},
+    {5, 0xF, 0x1, 0xF, 0x8, 0x11, 6},
+    {5, 0xF, 0x1, 0xF, 0x2, 0x21, 6},
+    {6, 0x0, 0x0, 0x0, 0x0, 0x00, 3},
+    {6, 0x1, 0x0, 0x0, 0x0, 0x02, 4},
+    {6, 0x1, 0x1, 0x1, 0x1, 0x03, 6},
+    {6, 0x2, 0x0, 0x0, 0x0, 0x0C, 4},
+    {6, 0x2, 0x1, 0x2, 0x2, 0x3D, 6},
+    {6, 0x3, 0x0, 0x0, 0x0, 0x1D, 6},
+    {6, 0x3, 0x1, 0x2, 0x2, 0x0D, 6},
+    {6, 0x3, 0x1, 0x3, 0x1, 0x7F, 7},
+    {6, 0x4, 0x0, 0x0, 0x0, 0x04, 4},
+    {6, 0x4, 0x1, 0x4, 0x4, 0x2D, 6},
+    {6, 0x5, 0x0, 0x0, 0x0, 0x0A, 5},
+    {6, 0x5, 0x1, 0x4, 0x4, 0x35, 6},
+    {6, 0x5, 0x1, 0x5, 0x1, 0x2F, 7},
+    {6, 0x6, 0x0, 0x0, 0x0, 0x15, 6},
+    {6, 0x6, 0x1, 0x2, 0x2, 0x3F, 7},
+    {6, 0x6, 0x1, 0x6, 0x4, 0x5F, 7},
+    {6, 0x7, 0x0, 0x0, 0x0, 0x25, 6},
+    {6, 0x7, 0x1, 0x2, 0x2, 0x29, 6},
+    {6, 0x7, 0x1, 0x3, 0x1, 0x1F, 7},
+    {6, 0x7, 0x1, 0x7, 0x4, 0x6F, 7},
+    {6, 0x8, 0x0, 0x0, 0x0, 0x16, 5},
+    {6, 0x8, 0x1, 0x8, 0x8, 0x05, 6},
+    {6, 0x9, 0x0, 0x0, 0x0, 0x39, 6},
+    {6, 0x9, 0x1, 0x0, 0x0, 0x19, 6},
+    {6, 0xA, 0x0, 0x0, 0x0, 0x06, 5},
+    {6, 0xA, 0x1, 0xA, 0xA, 0x0F, 7},
+    {6, 0xA, 0x1, 0xA, 0x2, 0x09, 6},
+    {6, 0xA, 0x1, 0xA, 0x8, 0x4F, 7},
+    {6, 0xB, 0x0, 0x0, 0x0, 0x0E, 6},
+    {6, 0xB, 0x1, 0xB, 0x2, 0x77, 7},
+    {6, 0xB, 0x1, 0x2, 0x2, 0x37, 7},
+    {6, 0xB, 0x1, 0xA, 0x8, 0x57, 7},
+    {6, 0xB, 0x1, 0xB, 0x1, 0x47, 7},
+    {6, 0xC, 0x0, 0x0, 0x0, 0x1A, 5},
+    {6, 0xC, 0x1, 0xC, 0xC, 0x17, 7},
+    {6, 0xC, 0x1, 0xC, 0x8, 0x67, 7},
+    {6, 0xC, 0x1, 0xC, 0x4, 0x27, 7},
+    {6, 0xD, 0x0, 0x0, 0x0, 0x31, 6},
+    {6, 0xD, 0x1, 0xD, 0x4, 0x07, 7},
+    {6, 0xD, 0x1, 0x4, 0x4, 0x7B, 7},
+    {6, 0xD, 0x1, 0xC, 0x8, 0x3B, 7},
+    {6, 0xD, 0x1, 0xD, 0x1, 0x2B, 7},
+    {6, 0xE, 0x0, 0x0, 0x0, 0x11, 6},
+    {6, 0xE, 0x1, 0xE, 0x4, 0x5B, 7},
+    {6, 0xE, 0x1, 0x4, 0x4, 0x1B, 7},
+    {6, 0xE, 0x1, 0xE, 0xA, 0x6B, 7},
+    {6, 0xE, 0x1, 0xE, 0x8, 0x21, 6},
+    {6, 0xE, 0x1, 0xE, 0x2, 0x33, 7},
+    {6, 0xF, 0x0, 0x0, 0x0, 0x01, 6},
+    {6, 0xF, 0x1, 0x3, 0x3, 0x4B, 7},
+    {6, 0xF, 0x1, 0x7, 0x6, 0x0B, 7},
+    {6, 0xF, 0x1, 0xF, 0xA, 0x73, 7},
+    {6, 0xF, 0x1, 0xF, 0x2, 0x3E, 6},
+    {6, 0xF, 0x1, 0xB, 0x9, 0x53, 7},
+    {6, 0xF, 0x1, 0xF, 0xC, 0x63, 7},
+    {6, 0xF, 0x1, 0xF, 0x8, 0x1E, 6},
+    {6, 0xF, 0x1, 0xF, 0x5, 0x13, 7},
+    {6, 0xF, 0x1, 0xF, 0x4, 0x2E, 6},
+    {6, 0xF, 0x1, 0xF, 0x1, 0x23, 7},
+    {7, 0x0, 0x0, 0x0, 0x0, 0x04, 4},
+    {7, 0x1, 0x0, 0x0, 0x0, 0x33, 6},
+    {7, 0x1, 0x1, 0x1, 0x1, 0x13, 6},
+    {7, 0x2, 0x0, 0x0, 0x0, 0x23, 6},
+    {7, 0x2, 0x1, 0x2, 0x2, 0x7F, 7},
+    {7, 0x3, 0x0, 0x0, 0x0, 0x03, 6},
+    {7, 0x3, 0x1, 0x1, 0x1, 0x3F, 7},
+    {7, 0x3, 0x1, 0x3, 0x2, 0x6F, 7},
+    {7, 0x4, 0x0, 0x0, 0x0, 0x2D, 6},
+    {7, 0x4, 0x1, 0x4, 0x4, 0x5F, 7},
+    {7, 0x5, 0x0, 0x0, 0x0, 0x16, 5},
+    {7, 0x5, 0x1, 0x1, 0x1, 0x3D, 6},
+    {7, 0x5, 0x1, 0x5, 0x4, 0x1F, 7},
+    {7, 0x6, 0x0, 0x0, 0x0, 0x1D, 6},
+    {7, 0x6, 0x1, 0x0, 0x0, 0x77, 7},
+    {7, 0x7, 0x0, 0x0, 0x0, 0x06, 5},
+    {7, 0x7, 0x1, 0x7, 0x4, 0x2F, 7},
+    {7, 0x7, 0x1, 0x4, 0x4, 0x4F, 7},
+    {7, 0x7, 0x1, 0x7, 0x3, 0x0F, 7},
+    {7, 0x7, 0x1, 0x7, 0x1, 0x0D, 6},
+    {7, 0x7, 0x1, 0x7, 0x2, 0x57, 7},
+    {7, 0x8, 0x0, 0x0, 0x0, 0x35, 6},
+    {7, 0x8, 0x1, 0x8, 0x8, 0x37, 7},
+    {7, 0x9, 0x0, 0x0, 0x0, 0x15, 6},
+    {7, 0x9, 0x1, 0x0, 0x0, 0x27, 7},
+    {7, 0xA, 0x0, 0x0, 0x0, 0x25, 6},
+    {7, 0xA, 0x1, 0x0, 0x0, 0x29, 6},
+    {7, 0xB, 0x0, 0x0, 0x0, 0x1A, 5},
+    {7, 0xB, 0x1, 0xB, 0x1, 0x17, 7},
+    {7, 0xB, 0x1, 0x1, 0x1, 0x67, 7},
+    {7, 0xB, 0x1, 0x3, 0x2, 0x05, 6},
+    {7, 0xB, 0x1, 0xB, 0x8, 0x7B, 7},
+    {7, 0xC, 0x0, 0x0, 0x0, 0x39, 6},
+    {7, 0xC, 0x1, 0x0, 0x0, 0x19, 6},
+    {7, 0xD, 0x0, 0x0, 0x0, 0x0C, 5},
+    {7, 0xD, 0x1, 0xD, 0x1, 0x47, 7},
+    {7, 0xD, 0x1, 0x1, 0x1, 0x07, 7},
+    {7, 0xD, 0x1, 0x5, 0x4, 0x09, 6},
+    {7, 0xD, 0x1, 0xD, 0x8, 0x1B, 7},
+    {7, 0xE, 0x0, 0x0, 0x0, 0x31, 6},
+    {7, 0xE, 0x1, 0xE, 0x2, 0x3B, 7},
+    {7, 0xE, 0x1, 0x2, 0x2, 0x5B, 7},
+    {7, 0xE, 0x1, 0xA, 0x8, 0x3E, 6},
+    {7, 0xE, 0x1, 0xE, 0x4, 0x0B, 7},
+    {7, 0xF, 0x0, 0x0, 0x0, 0x00, 3},
+    {7, 0xF, 0x1, 0xF, 0xF, 0x6B, 7},
+    {7, 0xF, 0x1, 0xF, 0x7, 0x2B, 7},
+    {7, 0xF, 0x1, 0xF, 0xB, 0x4B, 7},
+    {7, 0xF, 0x1, 0xF, 0x3, 0x11, 6},
+    {7, 0xF, 0x1, 0x7, 0x6, 0x21, 6},
+    {7, 0xF, 0x1, 0xF, 0xA, 0x01, 6},
+    {7, 0xF, 0x1, 0xF, 0x2, 0x0A, 5},
+    {7, 0xF, 0x1, 0xB, 0x9, 0x1E, 6},
+    {7, 0xF, 0x1, 0xF, 0xC, 0x0E, 6},
+    {7, 0xF, 0x1, 0xF, 0x8, 0x12, 5},
+    {7, 0xF, 0x1, 0xF, 0x5, 0x2E, 6},
+    {7, 0xF, 0x1, 0xF, 0x1, 0x02, 5},
+    {7, 0xF, 0x1, 0xF, 0x4, 0x1C, 5}
 };
 
 //************************************************************************/
 /** @defgroup vlc_decoding_tables_grp VLC decoding tables
   *  @{
   *  VLC tables to decode VLC codewords to these fields: (in order)       \n
-  *  \li \c cwd_len : 3bits -> the codeword length of the VLC codeword;    
+  *  \li \c cwd_len : 3bits -> the codeword length of the VLC codeword;
   *                   the VLC cwd is in the LSB of bitstream              \n
   *  \li \c u_off   : 1bit  -> u_offset, which is 1 if u value is not 0   \n
   *  \li \c rho     : 4bits -> signficant samples within a quad           \n
@@ -890,14 +892,14 @@ static vlc_src_table_t tbl1[] = {
   *  \li \c e_k     : 4bits -> EMB e_k                                    \n
   *                                                                       \n
   *  The table index is 10 bits and composed of two parts:                \n
-  *  The 7 LSBs contain a codeword which might be shorter than 7 bits;    
+  *  The 7 LSBs contain a codeword which might be shorter than 7 bits;
   *  this word is the next decoable bits in the bitstream.                \n
   *  The 3 MSB is the context of for the codeword.                        \n
   */
 
 /// @brief vlc_tbl0 contains decoding information for initial row of quads
 int vlc_tbl0[1024] = { 0 };
-/// @brief vlc_tbl1 contains decoding information for non-initial row of 
+/// @brief vlc_tbl1 contains decoding information for non-initial row of
 ///        quads
 int vlc_tbl1[1024] = { 0 };
 /// @}
@@ -909,54 +911,58 @@ int vlc_tbl1[1024] = { 0 };
   */
 OPJ_BOOL vlc_init_tables()
 {
-  const OPJ_BOOL debug = OPJ_FALSE; //useful for checking 
+    const OPJ_BOOL debug = OPJ_FALSE; //useful for checking
 
-  // number of entries in the table
-  size_t tbl0_size = sizeof(tbl0) / sizeof(vlc_src_table_t); 
+    // number of entries in the table
+    size_t tbl0_size = sizeof(tbl0) / sizeof(vlc_src_table_t);
 
-  // number of entries in the table
-  size_t tbl1_size = sizeof(tbl1) / sizeof(vlc_src_table_t);
+    // number of entries in the table
+    size_t tbl1_size = sizeof(tbl1) / sizeof(vlc_src_table_t);
 
-  if (debug) memset(vlc_tbl0, 0, sizeof(vlc_tbl0)); //unnecessary
+    if (debug) {
+        memset(vlc_tbl0, 0, sizeof(vlc_tbl0));    //unnecessary
+    }
 
-  // this is to convert table entries into values for decoder look up
-  // There can be at most 1024 possibilites, not all of them are valid.
-  // 
-  for (int i = 0; i < 1024; ++i)
-  {
-    int cwd = i & 0x7F; // from i extract codeword
-    int c_q = i >> 7;   // from i extract context
-    // See if this case exist in the table, if so then set the entry in
-    // vlc_tbl0
-    for (size_t j = 0; j < tbl0_size; ++j) 
-      if (tbl0[j].c_q == c_q) // this is an and operation
-        if (tbl0[j].cwd == (cwd & ((1 << tbl0[j].cwd_len) - 1)))
-        {
-          if (debug) assert(vlc_tbl0[i] == 0);
-          // Put this entry into the table
-          vlc_tbl0[i] = (tbl0[j].rho << 4) | (tbl0[j].u_off << 3)
-            | (tbl0[j].e_k << 12) | (tbl0[j].e_1 << 8) | tbl0[j].cwd_len;
-        }
-  }
+    // this is to convert table entries into values for decoder look up
+    // There can be at most 1024 possibilites, not all of them are valid.
+    //
+    for (int i = 0; i < 1024; ++i) {
+        int cwd = i & 0x7F; // from i extract codeword
+        int c_q = i >> 7;   // from i extract context
+        // See if this case exist in the table, if so then set the entry in
+        // vlc_tbl0
+        for (size_t j = 0; j < tbl0_size; ++j)
+            if (tbl0[j].c_q == c_q) // this is an and operation
+                if (tbl0[j].cwd == (cwd & ((1 << tbl0[j].cwd_len) - 1))) {
+                    if (debug) {
+                        assert(vlc_tbl0[i] == 0);
+                    }
+                    // Put this entry into the table
+                    vlc_tbl0[i] = (tbl0[j].rho << 4) | (tbl0[j].u_off << 3)
+                                  | (tbl0[j].e_k << 12) | (tbl0[j].e_1 << 8) | tbl0[j].cwd_len;
+                }
+    }
 
-  if (debug) memset(vlc_tbl1, 0, sizeof(vlc_tbl1)); //unnecessary
+    if (debug) {
+        memset(vlc_tbl1, 0, sizeof(vlc_tbl1));    //unnecessary
+    }
 
-  // this the same as above but for non-initial rows
-  for (int i = 0; i < 1024; ++i)
-  {
-    int cwd = i & 0x7F; //7 bits
-    int c_q = i >> 7;
-    for (size_t j = 0; j < tbl1_size; ++j)
-      if (tbl1[j].c_q == c_q) // this is an and operation
-        if (tbl1[j].cwd == (cwd & ((1 << tbl1[j].cwd_len) - 1)))
-        {
-          if (debug) assert(vlc_tbl1[i] == 0);
-          vlc_tbl1[i] = (tbl1[j].rho << 4) | (tbl1[j].u_off << 3)
-            | (tbl1[j].e_k << 12) | (tbl1[j].e_1 << 8) | tbl1[j].cwd_len;
-        }
-  }
+    // this the same as above but for non-initial rows
+    for (int i = 0; i < 1024; ++i) {
+        int cwd = i & 0x7F; //7 bits
+        int c_q = i >> 7;
+        for (size_t j = 0; j < tbl1_size; ++j)
+            if (tbl1[j].c_q == c_q) // this is an and operation
+                if (tbl1[j].cwd == (cwd & ((1 << tbl1[j].cwd_len) - 1))) {
+                    if (debug) {
+                        assert(vlc_tbl1[i] == 0);
+                    }
+                    vlc_tbl1[i] = (tbl1[j].rho << 4) | (tbl1[j].u_off << 3)
+                                  | (tbl1[j].e_k << 12) | (tbl1[j].e_1 << 8) | tbl1[j].cwd_len;
+                }
+    }
 
-  return OPJ_TRUE;
+    return OPJ_TRUE;
 }
 
 //************************************************************************/

From f4436c74f1d95db8493cfe671ffb6900073cee98 Mon Sep 17 00:00:00 2001
From: Aous Naman <aous72@yahoo.com>
Date: Mon, 20 Sep 2021 22:51:14 +1000
Subject: [PATCH 08/10] A couple of modifications: 1. Added support for
 Vertically causal decoding in HT mode. 2. HT block decoder should not read
 from outside a codeblock lenght -- this required modification of data reading
 algorithm, which improved them. 3. Improve messaging around unreasonable or
 illegal conditions that may occur during block decoding; two of these
 conditions are better moved to t2.c.

---
 src/lib/openjp2/fbc_dec.c | 303 ++++++++++++++++++++++----------------
 src/lib/openjp2/j2k.c     |  10 +-
 2 files changed, 173 insertions(+), 140 deletions(-)

diff --git a/src/lib/openjp2/fbc_dec.c b/src/lib/openjp2/fbc_dec.c
index e62ebea76..71e3bb868 100644
--- a/src/lib/openjp2/fbc_dec.c
+++ b/src/lib/openjp2/fbc_dec.c
@@ -55,24 +55,11 @@
 #define OPJ_COMPILER_GNUC
 #endif
 
-//************************************************************************/
-/** @brief Displays the error message when 32 bits are not sufficient to
-  * decode any passes
-  */
-static OPJ_BOOL cannot_decode_due_to_insufficient_precision = OPJ_FALSE;
-
-//************************************************************************/
-/** @brief Displays the error message when we do not have enough precision
-  * to decode the cleanup pass and set the bin center to 1.  The code can
-  * be modified to support this case.
-  */
-static OPJ_BOOL modify_code_to_support_this_precision = OPJ_FALSE;
-
 //************************************************************************/
 /** @brief Displays the error message for disabling the decoding of SPP and
   * MRP passes
   */
-static OPJ_BOOL cannot_decode_spp_mrp_msg = OPJ_FALSE;
+static OPJ_BOOL only_cleanup_pass_is_decoded = OPJ_FALSE;
 
 //************************************************************************/
 /** @brief Generates population count (i.e., the number of set bits)
@@ -167,10 +154,29 @@ void mel_read(dec_mel_t *melp)
     if (melp->bits > 32) { //there are enough bits in the tmp variable
         return;    // return without reading new data
     }
-    val = 0xFFFFFFFF;
-    //the next line (the if statement) needs to be tested first
-    //if (melp->size > 0)              // if there is data in the MEL segment
-    val = *(OPJ_UINT32*)melp->data;  // read 32 bits from MEL data
+
+    val = 0xFFFFFFFF;      // feed in 0xFF if buffer is exhausted
+    if (melp->size > 4) {  // if there is more than 4 bytes the MEL segment
+        val = *(OPJ_UINT32*)melp->data;  // read 32 bits from MEL data
+        melp->data += 4;           // advance pointer
+        melp->size -= 4;           // reduce counter
+    } else if (melp->size > 0) { // 4 or less
+        OPJ_UINT32 m, v;
+        int i = 0;
+        while (melp->size > 1) {
+            OPJ_UINT32 v = *melp->data++; // read one byte at a time
+            OPJ_UINT32 m = ~(0xFFu << i); // mask of location
+            val = (val & m) | (v << i);   // put byte in its correct location
+            --melp->size;
+            i += 8;
+        }
+        // size equal to 1
+        v = *melp->data++;  // the one before the last is different
+        v |= 0xF;                         // MEL and VLC segments can overlap
+        m = ~(0xFFu << i);
+        val = (val & m) | (v << i);
+        --melp->size;
+    }
 
     // next we unstuff them before adding them to the buffer
     bits = 32 - melp->unstuff;      // number of bits in val, subtract 1 if
@@ -179,46 +185,23 @@ void mel_read(dec_mel_t *melp)
 
     // data is unstuffed and accumulated in t
     // bits has the number of bits in t
-    t = (melp->size > 0) ? (val & 0xFF) : 0xFF; // feed 0xFF if the
-    // MEL bitstream has been exhausted
-    if (melp->size == 1) {
-        t |= 0xF;    // if this is 1 byte before the last
-    }
-    // in MEL+VLC segments (remember they
-    // can overlap)
-    melp->data += melp->size-- > 0; // advance data by 1 byte if we have not
-    // reached the end of the MEL segment
+    t = val & 0xFF;
     unstuff = ((val & 0xFF) == 0xFF); // true if the byte needs unstuffing
-
     bits -= unstuff; // there is one less bit in t if unstuffing is needed
     t = t << (8 - unstuff); // move up to make room for the next byte
 
     //this is a repeat of the above
-    t |= (melp->size > 0) ? ((val >> 8) & 0xFF) : 0xFF;
-    if (melp->size == 1) {
-        t |= 0xF;
-    }
-    melp->data += melp->size-- > 0;
+    t |= (val >> 8) & 0xFF;
     unstuff = (((val >> 8) & 0xFF) == 0xFF);
-
     bits -= unstuff;
     t = t << (8 - unstuff);
 
-    t |= (melp->size > 0) ? ((val >> 16) & 0xFF) : 0xFF;
-    if (melp->size == 1) {
-        t |= 0xF;
-    }
-    melp->data += melp->size-- > 0;
+    t |= (val >> 16) & 0xFF;
     unstuff = (((val >> 16) & 0xFF) == 0xFF);
-
     bits -= unstuff;
     t = t << (8 - unstuff);
 
-    t |= (melp->size > 0) ? ((val >> 24) & 0xFF) : 0xFF;
-    if (melp->size == 1) {
-        t |= 0xF;
-    }
-    melp->data += melp->size-- > 0;
+    t |= (val >> 24) & 0xFF;
     melp->unstuff = (((val >> 24) & 0xFF) == 0xFF);
 
     // move t to tmp, and push the result all the way up, so we read from
@@ -401,12 +384,19 @@ void rev_read(rev_struct_t *vlcp)
     }
     val = 0;
     //the next line (the if statement) needs to be tested first
-    if (vlcp->size > 0) { // if there are bytes left in the VLC segment
-        // We pad the data by 8 bytes at the beginning of the code stream
-        // buffer
-        val = *(OPJ_UINT32*)vlcp->data; // then read 32 bits
+    if (vlcp->size > 3) { // if there are more than 3 bytes left in VLC
+        // (vlcp->data - 3) move pointer back to read 32 bits at once
+        val = *(OPJ_UINT32*)(vlcp->data - 3); // then read 32 bits
         vlcp->data -= 4;                // move data pointer back by 4
         vlcp->size -= 4;                // reduce available byte by 4
+    } else if (vlcp->size > 0) { // 4 or less
+        int i = 24;
+        while (vlcp->size > 0) {
+            OPJ_UINT32 v = *vlcp->data--; // read one byte at a time
+            val |= (v << i);              // put byte in its correct location
+            --vlcp->size;
+            i -= 8;
+        }
     }
 
     //accumulate in tmp, number of bits in tmp are stored in bits
@@ -468,7 +458,8 @@ void rev_init(rev_struct_t *vlcp, OPJ_UINT8* data, int lcup, int scup)
     //This code is designed for an architecture that read address should
     // align to the read size (address multiple of 4 if read size is 4)
     //These few lines take care of the case where data is not at a multiple
-    // of 4 boundary. It reads 1,2,3 up to 4 bytes from the VLC bitstream
+    // of 4 boundary. It reads 1,2,3 up to 4 bytes from the VLC bitstream.
+    // To read 32 bits, read from (vlcp->data - 3)
     num = 1 + (int)((intptr_t)(vlcp->data) & 0x3);
     tnum = num < vlcp->size ? num : vlcp->size;
     for (i = 0; i < tnum; ++i) {
@@ -482,7 +473,6 @@ void rev_init(rev_struct_t *vlcp, OPJ_UINT8* data, int lcup, int scup)
         vlcp->unstuff = d > 0x8F; // for next byte
     }
     vlcp->size -= tnum;
-    vlcp->data -= 3; // make ready to read 32 bits (address multiple of 4)
     rev_read(vlcp);  // read another 32 buts
 }
 
@@ -544,32 +534,39 @@ void rev_read_mrp(rev_struct_t *mrp)
         return;
     }
     val = 0;
-    //the next line (the if statement) needs to be tested first
-    //notice that second line can be simplified to mrp->data -= 4
-    // if (mrp->size > 0)
-    {
-        val = *(OPJ_UINT32*)mrp->data;      // read 32 bits
-        mrp->data -= mrp->size > 0 ? 4 : 0; // move back read pointer only if
-        // there is data
+    if (mrp->size > 3) { // If there are 3 byte or more
+        // (mrp->data - 3) move pointer back to read 32 bits at once
+        val = *(OPJ_UINT32*)(mrp->data - 3); // read 32 bits
+        mrp->data -= 4;                      // move back pointer
+        mrp->size -= 4;                      // reduce count
+    } else if (mrp->size > 0) {
+        int i = 24;
+        while (mrp->size > 0) {
+            OPJ_UINT32 v = *mrp->data--; // read one byte at a time
+            val |= (v << i);             // put byte in its correct location
+            --mrp->size;
+            i -= 8;
+        }
     }
 
+
     //accumulate in tmp, and keep count in bits
-    tmp = (mrp->size-- > 0) ? (val >> 24) : 0; // fill zeros if all
+    tmp = val >> 24;
 
     //test if the last byte > 0x8F (unstuff must be true) and this is 0x7F
     bits = 8u - ((mrp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1u : 0u);
     unstuff = (val >> 24) > 0x8F;
 
     //process the next byte
-    tmp |= (mrp->size-- > 0) ? (((val >> 16) & 0xFF) << bits) : 0;
+    tmp |= ((val >> 16) & 0xFF) << bits;
     bits += 8u - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1u : 0u);
     unstuff = ((val >> 16) & 0xFF) > 0x8F;
 
-    tmp |= (mrp->size-- > 0) ? (((val >> 8) & 0xFF) << bits) : 0;
+    tmp |= ((val >> 8) & 0xFF) << bits;
     bits += 8u - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1u : 0u);
     unstuff = ((val >> 8) & 0xFF) > 0x8F;
 
-    tmp |= (mrp->size-- > 0) ? ((val & 0xFF) << bits) : 0;
+    tmp |= (val & 0xFF) << bits;
     bits += 8u - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1u : 0u);
     unstuff = (val & 0xFF) > 0x8F;
 
@@ -621,7 +618,6 @@ void rev_init_mrp(rev_struct_t *mrp, OPJ_UINT8* data, int lcup, int len2)
         mrp->bits += d_bits;
         mrp->unstuff = d > 0x8F; // for next byte
     }
-    mrp->data -= 3; //make ready to read a 32 bits
     rev_read_mrp(mrp);
 }
 
@@ -889,25 +885,39 @@ void frwd_read(frwd_struct_t *msp)
 
     assert(msp->bits <= 32); // assert that there is a space for 32 bits
 
-    val = *(OPJ_UINT32*)msp->data;      // read 32 bits
-    msp->data += msp->size > 0 ? 4 : 0; // move pointer if data is not
-    // exhausted
+    val = 0u;
+    if (msp->size > 3) {
+        val = *(OPJ_UINT32*)msp->data;  // read 32 bits
+        msp->data += 4;           // increment pointer
+        msp->size -= 4;           // reduce size
+    } else if (msp->size > 0) {
+        int i = 0;
+        val = msp->X != 0 ? 0xFFFFFFFFu : 0;
+        while (msp->size > 0) {
+            OPJ_UINT32 v = *msp->data++;  // read one byte at a time
+            OPJ_UINT32 m = ~(0xFFu << i); // mask of location
+            val = (val & m) | (v << i);   // put one byte in its correct location
+            --msp->size;
+            i += 8;
+        }
+    } else {
+        val = msp->X != 0 ? 0xFFFFFFFFu : 0;
+    }
 
     // we accumulate in t and keep a count of the number of bits in bits
-    bits = 8u - (msp->unstuff ? 1u : 0u);   // if previous byte was 0xFF
-    // get next byte, if bitstream is exhausted, replace it with X
-    t = msp->size-- > 0 ? (val & 0xFF) : msp->X;
+    bits = 8u - (msp->unstuff ? 1u : 0u);
+    t = val & 0xFF;
     unstuff = ((val & 0xFF) == 0xFF);  // Do we need unstuffing next?
 
-    t |= (msp->size-- > 0 ? ((val >> 8) & 0xFF) : msp->X) << bits;
+    t |= ((val >> 8) & 0xFF) << bits;
     bits += 8u - (unstuff ? 1u : 0u);
     unstuff = (((val >> 8) & 0xFF) == 0xFF);
 
-    t |= (msp->size-- > 0 ? ((val >> 16) & 0xFF) : msp->X) << bits;
+    t |= ((val >> 16) & 0xFF) << bits;
     bits += 8u - (unstuff ? 1u : 0u);
     unstuff = (((val >> 16) & 0xFF) == 0xFF);
 
-    t |= (msp->size-- > 0 ? ((val >> 24) & 0xFF) : msp->X) << bits;
+    t |= ((val >> 24) & 0xFF) << bits;
     bits += 8u - (unstuff ? 1u : 0u);
     msp->unstuff = (((val >> 24) & 0xFF) == 0xFF); // for next byte
 
@@ -1077,7 +1087,7 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
     OPJ_BYTE* cblkdata = NULL;
     OPJ_UINT8* coded_data;
     OPJ_UINT32* decoded_data;
-    OPJ_UINT32 missing_msbs;
+    OPJ_UINT32 zero_bplanes;
     OPJ_UINT32 num_passes;
     OPJ_UINT32 lengths1;
     OPJ_UINT32 lengths2;
@@ -1086,7 +1096,7 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
     OPJ_INT32 stride;
     OPJ_UINT32 *pflags, *sigma1, *sigma2, *mbr1, *mbr2, *sip, sip_shift;
     OPJ_UINT32 p;
-    OPJ_UINT32 zero_planes_p1;
+    OPJ_UINT32 zero_bplanes_p1;
     int lcup, scup;
     dec_mel_t mel;
     rev_struct_t vlc;
@@ -1100,13 +1110,13 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
     OPJ_UINT32 c_q;
     OPJ_UINT32* sp;
     OPJ_INT32 x, y; // loop indices
+    OPJ_BOOL stripe_causal = (cblksty & J2K_CCP_CBLKSTY_VSC) != 0;
 
     (void)(orient);      // stops unused parameter message
     (void)(check_pterm); // stops unused parameter message
 
     // We ignor orient, because the same decoder is used for all subbands
     // We also ignore check_pterm, because I am not sure how it applies
-    assert(cblksty == J2K_CCP_CBLKSTY_HT); // that is the only support mode
     if (roishift != 0) {
         if (p_manager_mutex) {
             opj_mutex_lock(p_manager_mutex);
@@ -1130,8 +1140,8 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
         return OPJ_TRUE;
     }
 
-    /* Mb = Kmax, numbps = Kmax + 1 - missing_msbs */
-    missing_msbs = (cblk->Mb + 1) - cblk->numbps;
+    /* numbps = Mb + 1 - zero_bplanes, Mb = Kmax, zero_bplanes = missing_msbs */
+    zero_bplanes = (cblk->Mb + 1) - cblk->numbps;
 
     /* Even if we have a single chunk, in multi-threaded decoding */
     /* the insertion of our synthetic marker might potentially override */
@@ -1223,7 +1233,7 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
         }
         opj_event_msg(p_manager, EVT_WARNING, "A malformed codeblock that has "
                       "more than one coding pass, but zero length for "
-                      "2nd and potential 3rd pass.\n");
+                      "2nd and potentially the 3rd pass in an HT codeblock.\n");
         if (p_manager_mutex) {
             opj_mutex_unlock(p_manager_mutex);
         }
@@ -1233,77 +1243,103 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
         if (p_manager_mutex) {
             opj_mutex_lock(p_manager_mutex);
         }
-        opj_event_msg(p_manager, EVT_WARNING, "We do not support more than 3 "
-                      "coding passes; This codeblocks has %d passes.\n",
-                      num_passes);
+        opj_event_msg(p_manager, EVT_ERROR, "We do not support more than 3 "
+                      "coding passes in an HT codeblock; This codeblocks has "
+                      "%d passes.\n", num_passes);
         if (p_manager_mutex) {
             opj_mutex_unlock(p_manager_mutex);
         }
         return OPJ_FALSE;
     }
 
-    if (missing_msbs > 30) {
-        /* We do not have enough precision to decode any passes */
-        if (cannot_decode_due_to_insufficient_precision == OPJ_FALSE) {
-            if (p_manager_mutex) {
-                opj_mutex_lock(p_manager_mutex);
-            }
-            cannot_decode_due_to_insufficient_precision = OPJ_TRUE;
-            opj_event_msg(p_manager, EVT_ERROR, "32 bits are not enough to "
-                          "decode this codeblock. This message "
-                          "will not be displayed again.\n");
-            if (p_manager_mutex) {
-                opj_mutex_unlock(p_manager_mutex);
-            }
+    if (cblk->Mb > 30) {
+        /* This check is better moved to opj_t2_read_packet_header() in t2.c
+           We do not have enough precision to decode any passes
+           The design of openjpeg assumes that the bits of a 32-bit integer are
+           assigned as follows:
+           bit 31 is for sign
+           bits 30-1 are for magnitude
+           bit 0 is for the center of the quantization bin
+           Therefore we can only do values of cblk->Mb <= 30
+         */
+        if (p_manager_mutex) {
+            opj_mutex_lock(p_manager_mutex);
+        }
+        opj_event_msg(p_manager, EVT_ERROR, "32 bits are not enough to "
+                      "decode this codeblock, since the number of "
+                      "bitplane, %d, is larger than 30.\n", cblk->Mb);
+        if (p_manager_mutex) {
+            opj_mutex_unlock(p_manager_mutex);
         }
         return OPJ_FALSE;
-    } else if (missing_msbs == 30) {
-        /* We do not have enough precision to decode the CUP pass with the
-           center of bin bit set.  The code can be modified to support this
-           case, where we do not set the center of the bin. */
-        if (modify_code_to_support_this_precision == OPJ_FALSE) {
+    }
+    if (zero_bplanes > cblk->Mb) {
+        /* This check is better moved to opj_t2_read_packet_header() in t2.c,
+           in the line "l_cblk->numbps = (OPJ_UINT32)l_band->numbps + 1 - i;"
+           where i is the zero bitplanes, and should be no larger than cblk->Mb
+           We cannot have more zero bitplanes than there are planes. */
+        if (p_manager_mutex) {
+            opj_mutex_lock(p_manager_mutex);
+        }
+        opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. "
+                      "Decoding this codeblock is stopped. There are "
+                      "%d zero bitplanes in %d bitplanes.\n",
+                      zero_bplanes, cblk->Mb);
+
+        if (p_manager_mutex) {
+            opj_mutex_unlock(p_manager_mutex);
+        }
+        return OPJ_FALSE;
+    } else if (zero_bplanes == cblk->Mb && num_passes > 1) {
+        /* When the number of zero bitplanes is equal to the number of bitplanes,
+           only the cleanup pass makes sense*/
+        if (only_cleanup_pass_is_decoded == OPJ_FALSE) {
             if (p_manager_mutex) {
                 opj_mutex_lock(p_manager_mutex);
             }
-            modify_code_to_support_this_precision = OPJ_TRUE;
-            opj_event_msg(p_manager, EVT_ERROR, "Not enough precision to decode "
-                          "the cleanup pass. The code can be modified to "
-                          "support this case. This message will not be "
-                          "displayed again.\n");
+            /* We have a second check to prevent the possibility of an overrun condition,
+               in the very unlikely event of a second thread discovering that
+               only_cleanup_pass_is_decoded is false before the first thread changing
+               the condition. */
+            if (only_cleanup_pass_is_decoded == OPJ_FALSE) {
+                only_cleanup_pass_is_decoded = OPJ_TRUE;
+                opj_event_msg(p_manager, EVT_WARNING, "Malformed HT codeblock. "
+                              "When the number of zero planes bitplanes is "
+                              "equal to the number of bitplanes, only the cleanup "
+                              "pass makes sense, but we have %d passes in this "
+                              "codeblock. Therefore, only the cleanup pass will be "
+                              "decoded. This message will not be displayed again.\n",
+                              num_passes);
+            }
             if (p_manager_mutex) {
                 opj_mutex_unlock(p_manager_mutex);
             }
         }
-        return OPJ_FALSE;
-    } else if (missing_msbs == 29) { /* if p is 1, then num_passes must be 1 */
-        if (num_passes > 1) {
-            num_passes = 1;
-            if (cannot_decode_spp_mrp_msg == OPJ_FALSE) {
-                if (p_manager_mutex) {
-                    opj_mutex_lock(p_manager_mutex);
-                }
-                cannot_decode_spp_mrp_msg = OPJ_TRUE;
-                opj_event_msg(p_manager, EVT_WARNING, "Not enough precision to decode "
-                              "the SgnProp nor MagRef passes, which will be skipped. "
-                              "This message will not be displayed again.\n");
-                if (p_manager_mutex) {
-                    opj_mutex_unlock(p_manager_mutex);
-                }
-            }
-        }
+        num_passes = 1;
     }
 
     /* OPJ_UINT32 */
-    p = 30 - missing_msbs;
+    p = cblk->numbps;
 
     // OPJ_UINT32 zero planes plus 1
-    zero_planes_p1 = missing_msbs + 1;
+    zero_bplanes_p1 = zero_bplanes + 1;
 
     // read scup and fix the bytes there
     lcup = (int)lengths1;  // length of CUP
     //scup is the length of MEL + VLC
     scup = (((int)coded_data[lcup - 1]) << 4) + (coded_data[lcup - 2] & 0xF);
     if (scup < 2 || scup > lcup || scup > 4079) { //something is wrong
+        /* The standard stipulates 2 <= Scup <= min(Lcup, 4079) */
+        if (p_manager_mutex) {
+            opj_mutex_lock(p_manager_mutex);
+        }
+        opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. "
+                      "One of the following condition is not met: "
+                      "2 <= Scup <= min(Lcup, 4079)\n");
+
+        if (p_manager_mutex) {
+            opj_mutex_unlock(p_manager_mutex);
+        }
         return OPJ_FALSE;
     }
 
@@ -1460,12 +1496,13 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
         }
         //decode uvlc_mode to get u for both quads
         consumed_bits = decode_init_uvlc(vlc_val, uvlc_mode, U_q);
-        if (U_q[0] > zero_planes_p1 || U_q[1] > zero_planes_p1) {
+        if (U_q[0] > zero_bplanes_p1 || U_q[1] > zero_bplanes_p1) {
             if (p_manager_mutex) {
                 opj_mutex_lock(p_manager_mutex);
             }
             opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. Decoding "
-                          "this codeblock is stopped.\n");
+                          "this codeblock is stopped. U_q is larger than zero "
+                          "bitplanes + 1 \n");
             if (p_manager_mutex) {
                 opj_mutex_unlock(p_manager_mutex);
             }
@@ -1747,12 +1784,13 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
                 U_q[1] += E > 2 ? E - 2 : 0;
             }
 
-            if (U_q[0] > zero_planes_p1 || U_q[1] > zero_planes_p1) {
+            if (U_q[0] > zero_bplanes_p1 || U_q[1] > zero_bplanes_p1) {
                 if (p_manager_mutex) {
                     opj_mutex_lock(p_manager_mutex);
                 }
                 opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. "
-                              "Decoding this codeblock is stopped.\n");
+                              "Decoding this codeblock is stopped. U_q is"
+                              "larger than bitplanes + 1 \n");
                 if (p_manager_mutex) {
                     opj_mutex_unlock(p_manager_mutex);
                 }
@@ -2046,7 +2084,9 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
                     t |= nxt_sig[1] << 28;  //for last column, right neighbors
                     prev = nxt_sig[0];      // for next group of columns
 
-                    cur_mbr[0] |= (t & 0x11111111u) << 3; //propagate up to cur_mbr
+                    if (!stripe_causal) {
+                        cur_mbr[0] |= (t & 0x11111111u) << 3; //propagate up to cur_mbr
+                    }
                     cur_mbr[0] &= ~cur_sig[0]; //remove already significance samples
                 }
 
@@ -2355,7 +2395,9 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
                     t |= nxt_sig[1] << 28;  //for last column, right neighbors
                     prev = nxt_sig[0];
 
-                    cur_mbr[0] |= (t & 0x11111111) << 3;
+                    if (!stripe_causal) {
+                        cur_mbr[0] |= (t & 0x11111111u) << 3;
+                    }
                     //remove already significance samples
                     cur_mbr[0] &= ~cur_sig[0];
                 }
@@ -2528,11 +2570,10 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
 
     {
         OPJ_INT32 x, y;
-        OPJ_UINT32 shift = 29u - cblk->Mb;
         for (y = 0; y < height; ++y) {
             OPJ_INT32* sp = (OPJ_INT32*)decoded_data + y * stride;
             for (x = 0; x < width; ++x, ++sp) {
-                OPJ_INT32 val = (*sp & 0x7FFFFFFF) >> shift;
+                OPJ_INT32 val = (*sp & 0x7FFFFFFF);
                 *sp = ((OPJ_UINT32) * sp & 0x80000000) ? -val : val;
             }
         }
diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c
index b10cd5ac9..c45e4dbd5 100644
--- a/src/lib/openjp2/j2k.c
+++ b/src/lib/openjp2/j2k.c
@@ -10700,20 +10700,12 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k,
     opj_read_bytes(l_current_ptr, &l_tccp->cblksty, 1);
     ++l_current_ptr;
     if ((l_tccp->cblksty & J2K_CCP_CBLKSTY_HTMIXED) != 0) {
-        /* We do not support HT mixed mode yet*/
+        /* We do not support HT mixed mode yet.  For conformance, it should be supported.*/
         opj_event_msg(p_manager, EVT_ERROR,
                       "Error reading SPCod SPCoc element. Unsupported Mixed HT code-block style found\n");
         return OPJ_FALSE;
     }
 
-    if ((l_tccp->cblksty & (J2K_CCP_CBLKSTY_HT | J2K_CCP_CBLKSTY_VSC)) ==
-            (J2K_CCP_CBLKSTY_HT | J2K_CCP_CBLKSTY_VSC)) {
-        /* For HT, we do not support vertically causal mode yet. */
-        opj_event_msg(p_manager, EVT_ERROR,
-                      "Error reading SPCod SPCoc element. Unsupported HT mode with vertically causal mode. \n");
-        return OPJ_FALSE;
-    }
-
     /* SPcod (H) / SPcoc (E) */
     opj_read_bytes(l_current_ptr, &l_tccp->qmfbid, 1);
     ++l_current_ptr;

From a219d5d9daae43a538cf72e4e12f534b2a607175 Mon Sep 17 00:00:00 2001
From: Even Rouault <even.rouault@spatialys.com>
Date: Fri, 24 Sep 2021 08:53:57 +0200
Subject: [PATCH 09/10] opj_t1_ht_decode_cblk(): avoid out-of-bounds read on
 ds0_ht_02_b11.j2k (#2)

Avoids the following issue:

$ valgrind bin/opj_decompress -i ~/OpenHTJ2K/conformance_data/ds0_ht_02_b11.j2k -o out.ppm -threads 0

==4037690== Invalid read of size 1
==4037690==    at 0x48589FA: opj_t1_ht_decode_cblk (fbc_dec.c:1262)
==4037690==    by 0x48B28E5: opj_t1_clbl_decode_processor (t1.c:1690)
==4037690==    by 0x4854A33: opj_thread_pool_submit_job (thread.c:835)
==4037690==    by 0x48B37C3: opj_t1_decode_cblks (t1.c:1943)
==4037690==    by 0x48BD668: opj_tcd_t1_decode (tcd.c:2000)
==4037690==    by 0x48BCADF: opj_tcd_decode_tile (tcd.c:1654)
==4037690==    by 0x487D348: opj_j2k_decode_tile (j2k.c:9759)
==4037690==    by 0x4881CDA: opj_j2k_decode_tiles (j2k.c:11566)
==4037690==    by 0x487B333: opj_j2k_exec (j2k.c:8903)
==4037690==    by 0x4882AD1: opj_j2k_decode (j2k.c:11912)
==4037690==    by 0x488EF5C: opj_decode (openjpeg.c:494)
==4037690==    by 0x1103AC: main (opj_decompress.c:1547)
==4037690==  Address 0x52884ef is 1 bytes before a block of size 2 alloc'd
==4037690==    at 0x483B723: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==4037690==    by 0x483E017: realloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==4037690==    by 0x48C0676: opj_realloc (opj_malloc.c:244)
==4037690==    by 0x48584E6: opj_t1_ht_decode_cblk (fbc_dec.c:1123)
==4037690==    by 0x48B28E5: opj_t1_clbl_decode_processor (t1.c:1690)
==4037690==    by 0x4854A33: opj_thread_pool_submit_job (thread.c:835)
==4037690==    by 0x48B37C3: opj_t1_decode_cblks (t1.c:1943)
==4037690==    by 0x48BD668: opj_tcd_t1_decode (tcd.c:2000)
==4037690==    by 0x48BCADF: opj_tcd_decode_tile (tcd.c:1654)
==4037690==    by 0x487D348: opj_j2k_decode_tile (j2k.c:9759)
==4037690==    by 0x4881CDA: opj_j2k_decode_tiles (j2k.c:11566)
==4037690==    by 0x487B333: opj_j2k_exec (j2k.c:8903)

I've also simplified a bit the allocation of the concatenated code block
buffer, to remove the OPJ_COMMON_CBLK_DATA_EXTRA that I believe is a trick only
needed for regular code block decoding, not HT.
---
 src/lib/openjp2/fbc_dec.c | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/src/lib/openjp2/fbc_dec.c b/src/lib/openjp2/fbc_dec.c
index 71e3bb868..079d2313b 100644
--- a/src/lib/openjp2/fbc_dec.c
+++ b/src/lib/openjp2/fbc_dec.c
@@ -1111,6 +1111,7 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
     OPJ_UINT32* sp;
     OPJ_INT32 x, y; // loop indices
     OPJ_BOOL stripe_causal = (cblksty & J2K_CCP_CBLKSTY_VSC) != 0;
+    OPJ_UINT32 cblk_len = 0;
 
     (void)(orient);      // stops unused parameter message
     (void)(check_pterm); // stops unused parameter message
@@ -1143,29 +1144,27 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
     /* numbps = Mb + 1 - zero_bplanes, Mb = Kmax, zero_bplanes = missing_msbs */
     zero_bplanes = (cblk->Mb + 1) - cblk->numbps;
 
-    /* Even if we have a single chunk, in multi-threaded decoding */
-    /* the insertion of our synthetic marker might potentially override */
-    /* valid codestream of other codeblocks decoded in parallel. */
-    if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
+    /* Compute whole codeblock length from chunk lengths */
+    cblk_len = 0;
+    {
         OPJ_UINT32 i;
-        OPJ_UINT32 cblk_len;
-
-        /* Compute whole codeblock length from chunk lengths */
-        cblk_len = 0;
         for (i = 0; i < cblk->numchunks; i++) {
             cblk_len += cblk->chunks[i].len;
         }
+    }
+
+    if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
+        OPJ_UINT32 i;
 
         /* Allocate temporary memory if needed */
-        if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
+        if (cblk_len > t1->cblkdatabuffersize) {
             cblkdata = (OPJ_BYTE*)opj_realloc(
-                           t1->cblkdatabuffer, cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
+                           t1->cblkdatabuffer, cblk_len);
             if (cblkdata == NULL) {
                 return OPJ_FALSE;
             }
             t1->cblkdatabuffer = cblkdata;
-            memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
-            t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
+            t1->cblkdatabuffersize = cblk_len;
         }
 
         /* Concatenate all chunks */
@@ -1327,6 +1326,18 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
     // read scup and fix the bytes there
     lcup = (int)lengths1;  // length of CUP
     //scup is the length of MEL + VLC
+    if (lcup < 2 || (OPJ_UINT32)lcup - 2 >= cblk_len) {
+        if (p_manager_mutex) {
+            opj_mutex_lock(p_manager_mutex);
+        }
+        opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. "
+                      "Invalid lcup value\n");
+
+        if (p_manager_mutex) {
+            opj_mutex_unlock(p_manager_mutex);
+        }
+        return OPJ_FALSE;
+    }
     scup = (((int)coded_data[lcup - 1]) << 4) + (coded_data[lcup - 2] & 0xF);
     if (scup < 2 || scup > lcup || scup > 4079) { //something is wrong
         /* The standard stipulates 2 <= Scup <= min(Lcup, 4079) */

From cfb1fb83df9efd07e36488d1d4da669f9e227c59 Mon Sep 17 00:00:00 2001
From: Aous Naman <aous72@yahoo.com>
Date: Fri, 24 Sep 2021 21:21:50 +1000
Subject: [PATCH 10/10] Improved length checks.  Hardening against one problem
 discovered by fuzzing, whereby s decoded vlc codeword indicates a significant
 sample outside of a codeblock.

---
 src/lib/openjp2/fbc_dec.c | 38 +++++++++++++++++++++++++++++++++-----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/src/lib/openjp2/fbc_dec.c b/src/lib/openjp2/fbc_dec.c
index 079d2313b..080e4f783 100644
--- a/src/lib/openjp2/fbc_dec.c
+++ b/src/lib/openjp2/fbc_dec.c
@@ -1323,21 +1323,22 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
     // OPJ_UINT32 zero planes plus 1
     zero_bplanes_p1 = zero_bplanes + 1;
 
-    // read scup and fix the bytes there
-    lcup = (int)lengths1;  // length of CUP
-    //scup is the length of MEL + VLC
-    if (lcup < 2 || (OPJ_UINT32)lcup - 2 >= cblk_len) {
+    if (lengths1 < 2 || (OPJ_UINT32)lengths1 > cblk_len ||
+            (OPJ_UINT32)(lengths1 + lengths2) > cblk_len) {
         if (p_manager_mutex) {
             opj_mutex_lock(p_manager_mutex);
         }
         opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. "
-                      "Invalid lcup value\n");
+                      "Invalid codeblock length values.\n");
 
         if (p_manager_mutex) {
             opj_mutex_unlock(p_manager_mutex);
         }
         return OPJ_FALSE;
     }
+    // read scup and fix the bytes there
+    lcup = (int)lengths1;  // length of CUP
+    //scup is the length of MEL + VLC
     scup = (((int)coded_data[lcup - 1]) << 4) + (coded_data[lcup - 2] & 0xF);
     if (scup < 2 || scup > lcup || scup > 4079) { //something is wrong
         /* The standard stipulates 2 <= Scup <= min(Lcup, 4079) */
@@ -1533,6 +1534,19 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
         }
         locs = height > 1 ? locs : (locs & 0x55);         // limits height
 
+        if ((((qinf[0] & 0xF0) >> 4) | (qinf[1] & 0xF0)) & ~locs) {
+            if (p_manager_mutex) {
+                opj_mutex_lock(p_manager_mutex);
+            }
+            opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. "
+                          "VLC code produces significant samples outside "
+                          "the codeblock area.\n");
+            if (p_manager_mutex) {
+                opj_mutex_unlock(p_manager_mutex);
+            }
+            return OPJ_FALSE;
+        }
+
         //first quad, starting at first sample in quad and moving on
         if (qinf[0] & 0x10) { //is it signifcant? (sigma_n)
             OPJ_UINT32 val;
@@ -1821,6 +1835,20 @@ OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
             }
             locs = y + 2 <= height ? locs : (locs & 0x55);
 
+            if ((((qinf[0] & 0xF0) >> 4) | (qinf[1] & 0xF0)) & ~locs) {
+                if (p_manager_mutex) {
+                    opj_mutex_lock(p_manager_mutex);
+                }
+                opj_event_msg(p_manager, EVT_ERROR, "Malformed HT codeblock. "
+                              "VLC code produces significant samples outside "
+                              "the codeblock area.\n");
+                if (p_manager_mutex) {
+                    opj_mutex_unlock(p_manager_mutex);
+                }
+                return OPJ_FALSE;
+            }
+
+
 
             if (qinf[0] & 0x10) { //sigma_n
                 OPJ_UINT32 val;