Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce possible PrecisionCascade versions of (un)zip functions #12

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions core/zip/inc/RZip.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ extern "C" unsigned long R__memcompress(char *tgt, unsigned long tgtsize, char *
extern "C" void R__zipMultipleAlgorithm(int cxlevel, int *srcsize, char *src, int *tgtsize, char *tgt, int *irep, ROOT::RCompressionSetting::EAlgorithm::EValues,
EDataType datatype = EDataType::kNoType_t, int configsize = 0, char *configarray = nullptr);

extern "C" void R__zipPrecisionCascade(int *cxlevels, int *srcsize, char *src, int **tgtsizes, char **tgts, int tgt_number, int *irep, ROOT::RCompressionSetting::EAlgorithm::EValues,
EDataType datatype = EDataType::kNoType_t, int configsize = 0, char *configarray = nullptr);

/**
* This is a historical definition, prior to ROOT supporting multiple algorithms in a single file. Use
* R__zipMultipleAlgorithm instead.
Expand All @@ -33,6 +36,8 @@ extern "C" void R__zip(int cxlevel, int *srcsize, char *src, int *tgtsize, char

extern "C" void R__unzip(int *srcsize, unsigned char *src, int *tgtsize, unsigned char *tgt, int *irep, int configsize = 0, char *configarray = nullptr);

extern "C" void R__unzipPrecisionCascade(int **srcsizes, unsigned char **srcs, int *tgtsize, unsigned char *tgt, int& src_number, int *irep, int configsize = 0, char *configarray = nullptr);

extern "C" int R__unzip_header(int *srcsize, unsigned char *src, int *tgtsize);

enum { kMAXZIPBUF = 0xffffff };
Expand Down
103 changes: 100 additions & 3 deletions core/zip/src/RZip.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ void R__zipMultipleAlgorithm(int cxlevel, int *srcsize, char *src, int *tgtsize,
} else if (compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kZSTD) {
R__zipZSTD(cxlevel, srcsize, src, tgtsize, tgt, irep);
} else if (compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kBLAST) {
R__zipBLAST(cxlevel, srcsize, src, tgtsize, tgt, irep, datatype);
R__zipBLAST(&cxlevel, srcsize, src, &tgtsize, &tgt, 1, irep, datatype);
} else if (compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kOldCompressionAlgo || compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kUseGlobal) {
R__zipOld(cxlevel, srcsize, src, tgtsize, tgt, irep);
} else {
Expand All @@ -118,6 +118,31 @@ void R__zipMultipleAlgorithm(int cxlevel, int *srcsize, char *src, int *tgtsize,
}
}

void R__zipPrecisionCascade(int* cxlevels, int *srcsize, char *src, int **tgtsizes, char **tgts, int tgt_number, int *irep, ROOT::RCompressionSetting::EAlgorithm::EValues compressionAlgorithm,
EDataType datatype /* = kNoType_t */,
int /* configsize = 0 */, char * /* configarray = nullptr */)
{

if (*srcsize < 1 + HDRSIZE + 1) {
*irep = 0;
return;
}

for (int tgt_idx=0; tgt_idx<tgt_number; tgt_idx++) {
// can only be 0 for the last of multiple (not just one) targets
// otherwise must be a positive value
int cxlevel_min = (tgt_idx > 0 && tgt_idx == tgt_number-1 ? 0 : 1);
if (cxlevels[tgt_idx] < cxlevel_min) {
*irep = 0;
return;
}
}

if (compressionAlgorithm == ROOT::RCompressionSetting::EAlgorithm::kBLAST) {
R__zipBLAST(cxlevels, srcsize, src, tgtsizes, tgts, tgt_number, irep, datatype);
}
}

// The very old algorithm for backward compatibility
// 0 for selecting with R__ZipMode in a backward compatible way
// 3 for selecting in other cases
Expand Down Expand Up @@ -339,7 +364,7 @@ void R__unzip(int *srcsize, uch *src, int *tgtsize, uch *tgt, int *irep, int /*
long ibufcnt, obufcnt;

*irep = 0L;

/* C H E C K H E A D E R */

if (*srcsize < HDRSIZE) {
Expand Down Expand Up @@ -383,7 +408,7 @@ void R__unzip(int *srcsize, uch *src, int *tgtsize, uch *tgt, int *irep, int /*
R__unzipZSTD(srcsize, src, tgtsize, tgt, irep);
return;
} else if (is_valid_header_blast(src)) {
R__unzipBLAST(srcsize, src, tgtsize, tgt, irep);
R__unzipBLAST(&srcsize, &src, tgtsize, tgt, 1, irep);
return;
}

Expand All @@ -405,6 +430,78 @@ void R__unzip(int *srcsize, uch *src, int *tgtsize, uch *tgt, int *irep, int /*
*irep = isize;
}

void R__unzipPrecisionCascade(int **srcsizes, uch **srcs, int *tgtsize, uch *tgt, int& src_number, int *irep, int /* configsize = 0 */, char * /* configarray = nullptr */)
{

long isize = 0;
long ibufcnt, obufcnt;

*irep = 0L;

/* This check can be done here, or in R__unzipBLAST, depending on whether
the check may apply to other [future] PrecisionCascade algorithms
if (src_number > srcs[0][10]) {
src_number = srcs[0][10]; // more sources than we need?!? Ignore extras for now
} else if (src_number < srcs[0][10] && srcs[src_number-1][2] == 0) {
// This is a simple check that full precision is only ever possible with all sources from compression.
// However, it is possible that full precision was not saved at compression,
// so even using all saved sources may not return full precision
fprintf(stderr, "R__unzipPrecisionCascade: too few sources provided (%d) to obtain full precision (requires at least %d sources)", src_number, srcs[0][10]);
return;
}
*/

obufcnt = *tgtsize;

for (int src_idx=0; src_idx<src_number; src_idx++) {


/* C H E C K H E A D E R */

int *srcsize = srcsizes[src_idx];
if (*srcsize < HDRSIZE) {
fprintf(stderr, "R__unzipPrecisionCascade: too small source (index %d)\n",src_idx);
return;
}

uch *src = srcs[src_idx];
if (!is_valid_header(src)) {
fprintf(stderr, "Error R__unzipPrecisionCascade: error in header\n");
return;
}

ibufcnt = (long)src[3] | ((long)src[4] << 8) | ((long)src[5] << 16);
long isize_temp = (long)src[6] | ((long)src[7] << 8) | ((long)src[8] << 16);

if (src_idx) {
if (isize_temp != isize) {
fprintf(stderr, "R__unzipPrecisionCascade: mismatching source headers\n");
return;
}
} else {
if (obufcnt < isize_temp) {
fprintf(stderr, "R__unzipPrecisionCascade: too small target\n");
return;
}
isize = isize_temp;
}

if (ibufcnt + HDRSIZE != *srcsize) {
fprintf(stderr, "R__unzipPrecisionCascade: discrepancy in source length\n");
return;
}
}

/* ZLIB and other standard compression algorithms */
if (is_valid_header_blast(srcs[0])) {
R__unzipBLAST(srcsizes, srcs, tgtsize, tgt, src_number, irep);
return;
}

*irep = isize;

}

void R__unzipZLIB(int *srcsize, unsigned char *src, int *tgtsize, unsigned char *tgt, int *irep)
{
z_stream stream; /* decompression stream */
Expand Down