Skip to content

Commit

Permalink
gpujpegtool,lib: move statistics printout directly to the lib
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinPulec committed Oct 7, 2024
1 parent 5139742 commit 93ad3da
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 73 deletions.
81 changes: 81 additions & 0 deletions src/gpujpeg_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -2046,4 +2046,85 @@ gpujpeg_device_reset(void)
cudaDeviceReset();
}

/**
* If gpujpeg_coder.param.perf_stat is on, finishes statistics counting - mainly CPU time and its store for aggregate
* printout in coder_process_stats_overall() - and prints per-image statistics.
*/
void
coder_process_stats(struct gpujpeg_coder* coder)
{
if ( !coder->param.perf_stats ) {
return;
}
coder->stop_time = gpujpeg_get_time();
const double duration_ms = (coder->stop_time - coder->start_time) * 1000.0;

if ( coder->frames == 0 ) {
coder->first_frame_duration = duration_ms;
}
coder->aggregate_duration += duration_ms;
coder->frames += 1;

struct gpujpeg_duration_stats stats;
gpujpeg_coder_get_stats(coder, &stats);

if ( coder->encoder ) {
if ( coder->param.verbose >= 1 ) {
printf(" -Copy To Device: %10.4f ms\n", stats.duration_memory_to);
if ( stats.duration_memory_map != 0.0 && stats.duration_memory_unmap != 0.0 ) {
printf(" -OpenGL Memory Map: %10.4f ms\n", stats.duration_memory_map);
printf(" -OpenGL Memory Unmap:%9.4f ms\n", stats.duration_memory_unmap);
}
printf(" -Preprocessing: %10.4f ms\n", stats.duration_preprocessor);
printf(" -DCT & Quantization:%10.4f ms\n", stats.duration_dct_quantization);
printf(" -Huffman Encoder: %10.4f ms\n", stats.duration_huffman_coder);
printf(" -Copy From Device: %10.4f ms\n", stats.duration_memory_from);
printf(" -Stream Formatter: %10.4f ms\n", stats.duration_stream);
}
printf("Encode Image GPU: %10.4f ms (only in-GPU processing)\n", stats.duration_in_gpu);
printf("Encode Image Bare: %10.4f ms (without copy to/from GPU memory)\n",
duration_ms - stats.duration_memory_to - stats.duration_memory_from);
printf("Encode Image: %10.4f ms\n", duration_ms);
}
else {
if ( coder->param.verbose >= 1 ) {
printf(" -Stream Reader: %10.4f ms\n", stats.duration_stream);
printf(" -Copy To Device: %10.4f ms\n", stats.duration_memory_to);
printf(" -Huffman Decoder: %10.4f ms\n", stats.duration_huffman_coder);
printf(" -DCT & Quantization:%10.4f ms\n", stats.duration_dct_quantization);
printf(" -Postprocessing: %10.4f ms\n", stats.duration_preprocessor);
printf(" -Copy From Device: %10.4f ms\n", stats.duration_memory_from);
if ( stats.duration_memory_map != 0.0 && stats.duration_memory_unmap != 0.0 ) {
printf(" -OpenGL Memory Map: %10.4f ms\n", stats.duration_memory_map);
printf(" -OpenGL Memory Unmap:%9.4f ms\n", stats.duration_memory_unmap);
}
}
printf("Decode Image GPU: %10.4f ms (only in-GPU processing)\n", stats.duration_in_gpu);
printf("Decode Image Bare: %10.4f ms (without copy to/from GPU memory)\n",
duration_ms - stats.duration_memory_to - stats.duration_memory_from);
printf("Decode Image: %10.4f ms\n", duration_ms);
}
}

/**
* @brief prints overal statistics
* @sa coder_process_stats
*
* call on encoder/decoder destroy
*/
void
coder_process_stats_overall(struct gpujpeg_coder* coder) {
if ( !coder->param.perf_stats || coder->frames <= 1 ) { // aggregate stats not needed for 0 or 1 frame
return;
}
printf("\n");
printf("Avg %s Duration: %10.4f ms\n", coder->encoder ? "Encode" : "Decode",
coder->aggregate_duration / (double)coder->frames);
if ( coder->param.verbose >= 1 ) {
printf("Avg w/o 1st Iter: %10.4f ms\n",
(coder->aggregate_duration - coder->first_frame_duration) / ((double)coder->frames - 1));
}
printf("\n");
}

/* vi: set expandtab sw=4 : */
10 changes: 10 additions & 0 deletions src/gpujpeg_common_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ struct gpujpeg_coder
int cuda_cc_minor; ///< CUDA Compute capability (minor version)

// Operation durations
double start_time, stop_time;
struct gpujpeg_timer duration_memory_to;
struct gpujpeg_timer duration_memory_from;
struct gpujpeg_timer duration_memory_map;
Expand All @@ -368,6 +369,10 @@ struct gpujpeg_coder
struct gpujpeg_timer duration_huffman_coder;
struct gpujpeg_timer duration_stream;
struct gpujpeg_timer duration_in_gpu;
// aggregate statistics
double first_frame_duration;
double aggregate_duration;
long frames;

size_t allocated_gpu_memory_size; ///< for gpujpeg_encoder_max_pixels() only (remove?)

Expand Down Expand Up @@ -410,6 +415,11 @@ gpujpeg_coder_get_stats(struct gpujpeg_coder *coder, struct gpujpeg_duration_sta
int
gpujpeg_coder_deinit(struct gpujpeg_coder* coder);

void
coder_process_stats(struct gpujpeg_coder* coder);
void
coder_process_stats_overall(struct gpujpeg_coder* coder);

struct gpujpeg_component;

/**
Expand Down
7 changes: 7 additions & 0 deletions src/gpujpeg_decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
*/

#include "../libgpujpeg/gpujpeg_decoder.h"
#include "gpujpeg_common_internal.h"
#include "gpujpeg_dct_cpu.h"
#include "gpujpeg_dct_gpu.h"
#include "gpujpeg_decoder_internal.h"
Expand Down Expand Up @@ -210,6 +211,8 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i
int rc;
int unsupp_gpu_huffman_params = 0;

coder->start_time = coder->param.perf_stats ? gpujpeg_get_time() : 0;

GPUJPEG_CUSTOM_TIMER_START(coder->duration_stream, coder->param.perf_stats, decoder->stream, return -1);

// Read JPEG image data
Expand Down Expand Up @@ -403,6 +406,8 @@ gpujpeg_decoder_decode(struct gpujpeg_decoder* decoder, uint8_t* image, size_t i
assert(0);
}

coder_process_stats(coder);

return 0;
}

Expand All @@ -426,6 +431,8 @@ gpujpeg_decoder_destroy(struct gpujpeg_decoder* decoder)
{
assert(decoder != NULL);

coder_process_stats_overall(&decoder->coder);

if (0 != gpujpeg_coder_deinit(&decoder->coder)) {
return -1;
}
Expand Down
7 changes: 7 additions & 0 deletions src/gpujpeg_encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <string.h>
#include "../libgpujpeg/gpujpeg_common.h"
#include "../libgpujpeg/gpujpeg_encoder.h"
#include "gpujpeg_common_internal.h"
#include "gpujpeg_preprocessor.h"
#include "gpujpeg_dct_cpu.h"
#include "gpujpeg_dct_gpu.h"
Expand Down Expand Up @@ -331,6 +332,8 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, const struct gpujpeg_par
struct gpujpeg_coder* coder = &encoder->coder;
int rc;

coder->start_time = param->perf_stats ? gpujpeg_get_time() : 0;

const bool img_changed = !gpujpeg_image_parameters_equals(&coder->param_image, param_image);
struct gpujpeg_parameters param_adjusted = adjust_params(coder, param, param_image, img_changed);

Expand Down Expand Up @@ -584,6 +587,8 @@ gpujpeg_encoder_encode(struct gpujpeg_encoder* encoder, const struct gpujpeg_par

coder->d_data_raw = NULL;

coder_process_stats(coder);

return 0;
}

Expand All @@ -606,6 +611,8 @@ gpujpeg_encoder_destroy(struct gpujpeg_encoder* encoder)
{
assert(encoder != NULL);

coder_process_stats_overall(&encoder->coder);

if (encoder->huffman_gpu_encoder != NULL) {
gpujpeg_huffman_gpu_encoder_destroy(encoder->huffman_gpu_encoder);
}
Expand Down
73 changes: 0 additions & 73 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -553,52 +553,16 @@ main(int argc, char *argv[])
// Encode image
uint8_t* image_compressed = NULL;
size_t image_compressed_size = 0;
double duration_all_iterations = 0;
double duration_first_iteration = 0;
for ( int iteration = 0; iteration < iterate; iteration++ ) {
if ( iterate > 1 ) {
printf("\nIteration #%d:\n", iteration + 1);
}

duration = gpujpeg_get_time();

rc = gpujpeg_encoder_encode(encoder, &param, &param_image, &encoder_input, &image_compressed, &image_compressed_size);
if ( rc != GPUJPEG_NOERR ) {
fprintf(stderr, "Failed to encode image [%s]!\n", argv[index]);
ret = EXIT_FAILURE; continue;
}

duration = gpujpeg_get_time() - duration;
duration_all_iterations += duration;
if ( iteration == 0 ) {
duration_first_iteration = duration;
}
struct gpujpeg_duration_stats stats;
rc = gpujpeg_encoder_get_stats(encoder, &stats);

if ( rc == 0 && param.verbose >= 1 ) {
printf(" -Copy To Device: %10.4f ms\n", stats.duration_memory_to);
if ( stats.duration_memory_map != 0.0 && stats.duration_memory_unmap != 0.0 ) {
printf(" -OpenGL Memory Map: %10.4f ms\n", stats.duration_memory_map);
printf(" -OpenGL Memory Unmap:%9.4f ms\n", stats.duration_memory_unmap);
}
printf(" -Preprocessing: %10.4f ms\n", stats.duration_preprocessor);
printf(" -DCT & Quantization:%10.4f ms\n", stats.duration_dct_quantization);
printf(" -Huffman Encoder: %10.4f ms\n", stats.duration_huffman_coder);
printf(" -Copy From Device: %10.4f ms\n", stats.duration_memory_from);
printf(" -Stream Formatter: %10.4f ms\n", stats.duration_stream);
}
printf("Encode Image GPU: %10.4f ms (only in-GPU processing)\n", stats.duration_in_gpu);
printf("Encode Image Bare: %10.4f ms (without copy to/from GPU memory)\n", duration * 1000.0 - stats.duration_memory_to - stats.duration_memory_from);
printf("Encode Image: %10.4f ms\n", duration * 1000.0);
}
if ( iterate > 1 ) {
printf("\n");
printf("Avg Encode Duration: %10.4f ms\n", duration_all_iterations * 1000.0 / iterate);
if ( param.verbose >= 1 ) {
printf("Avg w/o 1st Iter: %10.4f ms\n", (duration_all_iterations - duration_first_iteration) * 1000.0 / (iterate - 1));
}
printf("\n");
}

duration = gpujpeg_get_time();
Expand Down Expand Up @@ -720,16 +684,11 @@ main(int argc, char *argv[])
gpujpeg_decoder_output_set_default(&decoder_output);
}

double duration_all_iterations = 0;
double duration_first_iteration = 0;

for ( int iteration = 0; iteration < iterate; iteration++ ) {
if ( iterate > 1 ) {
printf("\nIteration #%d:\n", iteration + 1);
}

duration = gpujpeg_get_time();

// Decode image
if ( (rc = gpujpeg_decoder_decode(decoder, image, image_size, &decoder_output)) != 0 ) {
if (rc == GPUJPEG_ERR_RESTART_CHANGE && param_image.width != 0 && param_image.height != 0) {
Expand All @@ -738,38 +697,6 @@ main(int argc, char *argv[])
fprintf(stderr, "Failed to decode image [%s]!\n", argv[index]);
ret = EXIT_FAILURE; continue;
}

duration = gpujpeg_get_time() - duration;
duration_all_iterations += duration;
if ( iteration == 0 ) {
duration_first_iteration = duration;
}
struct gpujpeg_duration_stats stats;
rc = gpujpeg_decoder_get_stats(decoder, &stats);

if ( rc == 0 && param.verbose >= 1 ) {
printf(" -Stream Reader: %10.4f ms\n", stats.duration_stream);
printf(" -Copy To Device: %10.4f ms\n", stats.duration_memory_to);
printf(" -Huffman Decoder: %10.4f ms\n", stats.duration_huffman_coder);
printf(" -DCT & Quantization:%10.4f ms\n", stats.duration_dct_quantization);
printf(" -Postprocessing: %10.4f ms\n", stats.duration_preprocessor);
printf(" -Copy From Device: %10.4f ms\n", stats.duration_memory_from);
if ( stats.duration_memory_map != 0.0 && stats.duration_memory_unmap != 0.0 ) {
printf(" -OpenGL Memory Map: %10.4f ms\n", stats.duration_memory_map);
printf(" -OpenGL Memory Unmap:%9.4f ms\n", stats.duration_memory_unmap);
}
}
printf("Decode Image GPU: %10.4f ms (only in-GPU processing)\n", stats.duration_in_gpu);
printf("Decode Image Bare: %10.4f ms (without copy to/from GPU memory)\n", duration * 1000.0 - stats.duration_memory_to - stats.duration_memory_from);
printf("Decode Image: %10.4f ms\n", duration * 1000.0);
}
if ( iterate > 1 ) {
printf("\n");
printf("Avg Decode Duration: %10.4f ms\n", duration_all_iterations * 1000.0 / iterate);
if ( param.verbose >= 1 ) {
printf("Avg w/o 1st Iter: %10.4f ms\n", (duration_all_iterations - duration_first_iteration) * 1000.0 / (iterate - 1));
}
printf("\n");
}

uint8_t* data = NULL;
Expand Down

0 comments on commit 93ad3da

Please sign in to comment.