Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Subtile decoding: memory use reduction and perf improvements #1010

Merged
merged 29 commits into from
Sep 5, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
84bbb4a
opj_t1_allocate_buffers(): remove useless overflow checks
rouault Aug 21, 2017
0a25dce
opj_j2k_setup_encoder(): validate code block width/height
rouault Aug 21, 2017
aa71981
opj_compress: reorder checks related to code block dimensions, to avo…
rouault Aug 21, 2017
f9e9942
Sub-tile decoding: only allocate tile component buffer of the needed …
rouault Sep 1, 2017
eee5104
opj_dwt_decode_partial_tile(): avoid undefined behaviour in lifting o…
rouault Sep 1, 2017
c37e360
opj_tcd_init_tile(): fix typo on overflow detection condition (introd…
rouault Sep 1, 2017
d5153ba
Remove limitation that prevents from opening images bigger than 4 bil…
rouault Sep 1, 2017
d1299d9
Fix compiler warning in release mode
rouault Sep 1, 2017
008a12d
TCD: allow tile buffer to be greater than 4GB on 64 bit hosts (but nu…
rouault Sep 1, 2017
98b9310
Various changes to allow tile buffers of more than 4giga pixels
rouault Sep 1, 2017
5d07d46
opj_j2k_decode_tiles(): apply whole single tile image decoding optimi…
rouault Sep 1, 2017
0ae3cba
Allow several repeated calls to opj_set_decode_area() and opj_decode(…
rouault Sep 1, 2017
b2cc8f7
Optimize reading/write into sparse array
rouault Sep 1, 2017
1644665
opj_j2k_update_image_data(): avoid zero-ing the buffer if not needed
rouault Sep 1, 2017
82a43d8
Optimize opj_dwt_decode_partial_1() when cas == 0
rouault Sep 1, 2017
18635df
test_decode_area: accept user bounds in -strip_height mode
rouault Sep 1, 2017
ccac773
Tiny perf improvement in T1 stage for subtile decoding
rouault Sep 1, 2017
873004c
Sub-tile decoding: speed up vertical pass in IDWT5x3 by processing 4 …
rouault Sep 1, 2017
470f3ed
opj_dwt_decode_partial_1_parallel(): add SSE2 optimization
rouault Sep 1, 2017
ae19001
opj_tcd_dc_level_shift_decode(): optimize lossy case
rouault Sep 1, 2017
83b5a16
opj_dwt_decode_partial_97(): simplify/more efficient use of sparse ar…
rouault Sep 1, 2017
8a17be8
opj_v4dwt_decode_step2_sse(): loop unroll
rouault Sep 1, 2017
7017e67
sparse_array: optimizations for lossy case
rouault Sep 1, 2017
559d16e
opj_t1_decode_cblk(): move some code to codeblock processor for (theo…
rouault Sep 1, 2017
2c365fe
Replace error message 'Not enough memory for tile data' by 'Size of t…
rouault Sep 1, 2017
4c7effa
opj_t1_clbl_decode_processor(): use SSE2 in subtile decoding code pat…
rouault Sep 1, 2017
676d4c8
opj_j2k_update_image_data(): avoid allocating image buffer if we can …
rouault Sep 1, 2017
c1e0fba
opj_v4dwt_decode_step1_sse(): rework a bit to improve code generation
rouault Sep 1, 2017
579b893
Replace uses of size_t by OPJ_SIZE_T
rouault Sep 4, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ if(BUILD_JPIP_SERVER)
endif()
add_subdirectory(src/lib)
option(BUILD_LUTS_GENERATOR "Build utility to generate t1_luts.h" OFF)
option(BUILD_BENCH_DWT "Build bench_dwt utility (development benchmark)" OFF)
option(BUILD_UNIT_TESTS "Build unit tests (bench_dwt, test_sparse_array, etc..)" OFF)

#-----------------------------------------------------------------------------
# Build Applications
Expand Down
5 changes: 3 additions & 2 deletions src/bin/jp2/opj_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -907,8 +907,9 @@ static int parse_cmdline_encoder(int argc, char **argv,
case 'b': { /* code-block dimension */
int cblockw_init = 0, cblockh_init = 0;
sscanf(opj_optarg, "%d,%d", &cblockw_init, &cblockh_init);
if (cblockw_init * cblockh_init > 4096 || cblockw_init > 1024
|| cblockw_init < 4 || cblockh_init > 1024 || cblockh_init < 4) {
if (cblockw_init > 1024 || cblockw_init < 4 ||
cblockh_init > 1024 || cblockh_init < 4 ||
cblockw_init * cblockh_init > 4096) {
fprintf(stderr,
"!! Size of code_block error (option -b) !!\n\nRestriction :\n"
" * width*height<=4096\n * 4<=width,height<= 1024\n\n");
Expand Down
18 changes: 14 additions & 4 deletions src/lib/openjp2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ set(OPENJPEG_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/opj_malloc.c
${CMAKE_CURRENT_SOURCE_DIR}/opj_malloc.h
${CMAKE_CURRENT_SOURCE_DIR}/opj_stdint.h
${CMAKE_CURRENT_SOURCE_DIR}/sparse_array.c
${CMAKE_CURRENT_SOURCE_DIR}/sparse_array.h
)
if(BUILD_JPIP)
add_definitions(-DUSE_JPIP)
Expand Down Expand Up @@ -192,12 +194,20 @@ if(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
TARGET_LINK_LIBRARIES(${OPENJPEG_LIBRARY_NAME} ${CMAKE_THREAD_LIBS_INIT})
endif(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)

if(BUILD_BENCH_DWT)
add_executable(bench_dwt bench_dwt.c dwt.c opj_malloc.c thread.c)
if(BUILD_UNIT_TESTS)
add_executable(bench_dwt bench_dwt.c)
if(UNIX)
target_link_libraries(bench_dwt m)
target_link_libraries(bench_dwt m ${OPENJPEG_LIBRARY_NAME})
endif()
if(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
target_link_libraries(bench_dwt ${CMAKE_THREAD_LIBS_INIT})
endif(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
endif(BUILD_BENCH_DWT)

add_executable(test_sparse_array test_sparse_array.c)
if(UNIX)
target_link_libraries(test_sparse_array m ${OPENJPEG_LIBRARY_NAME})
endif()
if(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
target_link_libraries(test_sparse_array ${CMAKE_THREAD_LIBS_INIT})
endif(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
endif(BUILD_UNIT_TESTS)
9 changes: 5 additions & 4 deletions src/lib/openjp2/bench_dwt.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,11 @@ int main(int argc, char** argv)

memset(&tcd, 0, sizeof(tcd));
tcd.thread_pool = tp;
tcd.decoded_x0 = (OPJ_UINT32)tilec.x0;
tcd.decoded_y0 = (OPJ_UINT32)tilec.y0;
tcd.decoded_x1 = (OPJ_UINT32)tilec.x1;
tcd.decoded_y1 = (OPJ_UINT32)tilec.y1;
tcd.whole_tile_decoding = OPJ_TRUE;
tcd.win_x0 = (OPJ_UINT32)tilec.x0;
tcd.win_y0 = (OPJ_UINT32)tilec.y0;
tcd.win_x1 = (OPJ_UINT32)tilec.x1;
tcd.win_y1 = (OPJ_UINT32)tilec.y1;
tcd.tcd_image = &tcd_image;
memset(&tcd_image, 0, sizeof(tcd_image));
tcd_image.tiles = &tcd_tile;
Expand Down
Loading