Skip to content

Commit

Permalink
Add postprocessor and preprocessor cpp to replace the cuda version
Browse files Browse the repository at this point in the history
- Modify the colorspace to support uchar4 and int4 on CPU
- Update decoder to use the processor

BUG: The image raw is having a width / stride issue

Signed-off-by: Anthony Liot <[email protected]>
  • Loading branch information
anthonyliot committed Jan 11, 2024
1 parent 89878b2 commit 39d28a2
Show file tree
Hide file tree
Showing 8 changed files with 1,152 additions and 17 deletions.
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.8.0 FATAL_ERROR)
# change version also in configure.ac
project(gpujpeg VERSION 0.21.0 LANGUAGES C)
project(gpujpeg VERSION 0.21.0 LANGUAGES C CXX)
include(CheckLanguage)

check_language(CUDA)
Expand Down Expand Up @@ -157,7 +157,7 @@ message(STATUS "Configured options: ${COMPILED_OPTIONS}")

# GPUJPEG library
file(GLOB H_FILES libgpujpeg/*.h ${CMAKE_CURRENT_BINARY_DIR}/libgpujpeg/gpujpeg_version.h)
file(GLOB_RECURSE C_FILES src/*.c src/*.cu)
file(GLOB_RECURSE C_FILES src/*.c src/*.cu src/*.cpp)
list(REMOVE_ITEM C_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/main.c")
add_library(gpujpeg ${H_FILES} ${C_FILES})
target_include_directories(${PROJECT_NAME}
Expand Down
27 changes: 27 additions & 0 deletions src/gpujpeg_colorspace.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,33 @@
#include <assert.h>
#include "../libgpujpeg/gpujpeg_type.h"

// TODO: NEED IMPLEMENTATION
#ifndef GPUJPEG_USE_CUDA
#include <cmath>
#include <algorithm>

#define __device__
#define round std::round

/// Create uchar4 structure
struct uchar4 {
uint8_t x;
uint8_t y;
uint8_t z;
uint8_t w;
};
typedef struct uchar4 uchar4;

/// Create int4 structure
struct int4 {
uint32_t x;
uint32_t y;
uint32_t z;
uint32_t w;
};
typedef struct int4 int4;
#endif

/**
* Color transform debug info
*/
Expand Down
55 changes: 51 additions & 4 deletions src/gpujpeg_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,17 @@ gpujpeg_component_print8(struct gpujpeg_component* component, uint8_t* d_data)
cudaFreeHost(data);
#else
// TODO: NEED IMPLEMENTATION
printf("[WARNING] gpujpeg_component_print8(): NOT YET IMPLEMENTED\n");
data = malloc(data_size * sizeof(uint8_t));
memcpy(data, d_data, data_size * sizeof(uint8_t));

printf("Print Data\n");
for ( int y = 0; y < component->data_height; y++ ) {
for ( int x = 0; x < component->data_width; x++ ) {
printf("%3u ", data[y * component->data_width + x]);
}
printf("\n");
}
free(data);
#endif
}

Expand All @@ -490,7 +500,17 @@ gpujpeg_component_print16(struct gpujpeg_component* component, int16_t* d_data)
cudaFreeHost(data);
#else
// TODO: NEED IMPLEMENTATION
printf("[WARNING] gpujpeg_component_print16(): NOT YET IMPLEMENTED\n");
data = malloc(data_size * sizeof(int16_t));
memcpy(data, d_data, data_size * sizeof(int16_t));

printf("Print Data\n");
for ( int y = 0; y < component->data_height; y++ ) {
for ( int x = 0; x < component->data_width; x++ ) {
printf("%3d ", data[y * component->data_width + x]);
}
printf("\n");
}
free(data);
#endif
}

Expand Down Expand Up @@ -1193,7 +1213,34 @@ gpujpeg_coder_deinit(struct gpujpeg_coder* coder)
cudaFree(coder->d_block_list);
#else
// TODO: NEED IMPLEMENTATION
printf("[WARNING] gpujpeg_coder_deinit(): NOT YET IMPLEMENTED\n");
if (coder->component != NULL)
free(coder->component);
if (coder->d_component != NULL)
free(coder->d_component);
if ( coder->data_raw != NULL )
free(coder->data_raw);
if ( coder->d_data_raw_allocated != NULL )
free(coder->d_data_raw_allocated);
if ( coder->d_data != NULL )
free(coder->d_data);
if ( coder->data_quantized != NULL )
free(coder->data_quantized);
if ( coder->d_data_quantized != NULL )
free(coder->d_data_quantized);
if ( coder->data_compressed != NULL )
free(coder->data_compressed);
if ( coder->d_data_compressed != NULL )
free(coder->d_data_compressed);
if ( coder->segment != NULL )
free(coder->segment);
if ( coder->d_segment != NULL )
free(coder->d_segment);
if ( coder->d_temp_huffman != NULL )
free(coder->d_temp_huffman);
if ( coder->block_list != NULL )
free(coder->block_list);
if ( coder->d_block_list != NULL )
free(coder->d_block_list);
#endif
GPUJPEG_CUSTOM_TIMER_DESTROY(coder->duration_memory_to, return -1);
GPUJPEG_CUSTOM_TIMER_DESTROY(coder->duration_memory_from, return -1);
Expand Down Expand Up @@ -1367,7 +1414,7 @@ gpujpeg_image_destroy(uint8_t* image)
cudaFreeHost(image);
#else
// TODO: NEED IMPLEMENTATION
printf("[WARNING] gpujpeg_image_destroy(): NOT YET IMPLEMENTED\n");
free(image);
#endif
return 0;
}
Expand Down
50 changes: 41 additions & 9 deletions src/gpujpeg_decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,12 @@
#include "gpujpeg_decoder_internal.h"
#include "gpujpeg_huffman_cpu_decoder.h"
#include "gpujpeg_util.h"
#include "gpujpeg_postprocessor.h"

#ifdef GPUJPEG_USE_CUDA
#include "gpujpeg_dct_gpu.h"
#include "gpujpeg_huffman_gpu_decoder.h"
#include "gpujpeg_postprocessor.h"

#endif

/* Documented at declaration */
Expand Down Expand Up @@ -214,14 +215,10 @@ gpujpeg_decoder_init(struct gpujpeg_decoder* decoder, const struct gpujpeg_param
}

// Init postprocessor
#ifdef GPUJPEG_USE_CUDA
if ( gpujpeg_preprocessor_decoder_init(&decoder->coder) != 0 ) {
fprintf(stderr, "[GPUJPEG] [Error] Failed to init postprocessor!\n");
return -1;
}
#else
// TODO: NOT YET IMPLEMENTED\n
#endif

return 0;
}
Expand Down Expand Up @@ -373,19 +370,34 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i
}

// Preprocessing
#ifdef GPUJPEG_USE_CUDA
GPUJPEG_CUSTOM_TIMER_START(coder->duration_preprocessor, coder->param.perf_stats, decoder->stream, return -1);
rc = gpujpeg_preprocessor_decode(&decoder->coder, decoder->stream);
if (rc != GPUJPEG_NOERR) {
return rc;
}
GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_preprocessor, coder->param.perf_stats, decoder->stream, return -1);

#ifdef GPUJPEG_USE_CUDA
// Wait for async operations before copying from the device
cudaStreamSynchronize(decoder->stream);
#else
// TODO: NEED IMPLEMENTATION
printf("[WARNING] gpujpeg_decoder_decode(): NOT YET IMPLEMENTED\n");
// // TODO: NEED IMPLEMENTATION
// printf("[WARNING] gpujpeg_decoder_decode(): NOT YET IMPLEMENTED\n");
// const int height = coder->param_image.height;
// const int width = coder->param_image.width;
// for (int y = 0; y < height; y++) {
// for (int x = 0; x < width; x++) {
// int Y = coder->component[0].d_data[y * width + x];
// int U = coder->component[1].d_data[y * width + x] - 128;
// int V = coder->component[2].d_data[y * width + x] - 128;
// int R = Y + 1.402 * V;
// int G = Y - 0.344136 * U - 0.714136 * V;
// int B = Y + 1.772 * U;
// coder->d_data_raw[3 * (y * width + x)] = R;
// coder->d_data_raw[3 * (y * width + x) + 1] = G;
// coder->d_data_raw[3 * (y * width + x) + 2] = B;
// }
// }
#endif

GPUJPEG_CUSTOM_TIMER_STOP(coder->duration_in_gpu, coder->param.perf_stats, decoder->stream, return -1);
Expand Down Expand Up @@ -518,7 +530,27 @@ gpujpeg_decoder_destroy(struct gpujpeg_decoder* decoder)
}
#else
// TODO: NEED IMPLEMENTATION
printf("[WARNING] gpujpeg_decoder_destroy(): NOT YET IMPLEMENTED\n");
for (int comp_type = 0; comp_type < GPUJPEG_MAX_COMPONENT_COUNT; comp_type++) {
if (decoder->table_quantization[comp_type].d_table != NULL) {
free(decoder->table_quantization[comp_type].d_table);
}
}

for ( int comp_type = 0; comp_type < GPUJPEG_MAX_COMPONENT_COUNT; comp_type++ ) {
for ( int huff_type = 0; huff_type < GPUJPEG_HUFFMAN_TYPE_COUNT; huff_type++ ) {
free(decoder->d_table_huffman[comp_type][huff_type]);
}
}

if (decoder->reader != NULL) {
gpujpeg_reader_destroy(decoder->reader);
}

// ??
// if (decoder->huffman_gpu_decoder != NULL) {
// gpujpeg_huffman_gpu_decoder_destroy(decoder->huffman_gpu_decoder);
// }

#endif

free(decoder);
Expand Down
7 changes: 5 additions & 2 deletions src/gpujpeg_encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@
#include "gpujpeg_huffman_cpu_encoder.h"
#include "gpujpeg_marker.h"
#include "gpujpeg_util.h"
#include "gpujpeg_preprocessor.h"

#ifdef GPUJPEG_USE_CUDA
#include "gpujpeg_dct_gpu.h"
#include "gpujpeg_huffman_gpu_encoder.h"
#include "gpujpeg_preprocessor.h"
#endif

/* Documented at declaration */
Expand Down Expand Up @@ -109,7 +109,10 @@ gpujpeg_encoder_create(cudaStream_t stream)
gpujpeg_cuda_check_error("Encoder table allocation", return NULL);
#else
// TODO: NEED IMPLEMENTATION
printf("[WARNING] gpujpeg_encoder_create(): NOT YET IMPLEMENTED\n");
for ( int comp_type = 0; comp_type < GPUJPEG_COMPONENT_TYPE_COUNT; comp_type++ ) {
encoder->table_quantization[comp_type].d_table = malloc(64 * sizeof(uint16_t));
encoder->table_quantization[comp_type].d_table_forward = malloc(64 * sizeof(float));
}
#endif

// Init huffman tables for encoder
Expand Down
Loading

0 comments on commit 39d28a2

Please sign in to comment.