Skip to content

Commit

Permalink
WIP checkpoint commit
Browse files Browse the repository at this point in the history
* Add safety
* But: this does involve copying the dictionary:
    It would be nice to somehow interact with CoW for Perls
      that support CoW, to prevent needing to copy a possibly multi-megabyte dictionary?
    Given the dictionary can be passed as a ref (to avoid Perl itself copying the buffer)
      I do still wonder if it is possible to (optionally) steal the buffer from the
      provided dictionary.
    Although the multi-megabyte copy isn't actually that big of a deal.
  • Loading branch information
anall committed Dec 16, 2020
1 parent 16f28f3 commit dd55729
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 40 deletions.
80 changes: 44 additions & 36 deletions Lzma.xs
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,22 @@ typedef struct di_stream {

} di_stream;

typedef struct di_filter {
lzma_filter filter;
SV* dict;
} di_filter;

typedef di_stream * deflateStream ;
typedef di_stream * Compress__Raw__Lzma ;
typedef di_stream * Compress__Raw__Lzma__Encoder ;
typedef di_stream * Compress__Raw__Lzma__Decoder ;

typedef lzma_filter * Lzma__Filter ;
typedef lzma_filter * Lzma__Filter__Lzma;
typedef lzma_filter * Lzma__Filter__Lzma__PresetDict;
typedef lzma_filter * Lzma__Filter__BCJ ;
typedef lzma_filter * Lzma__Filter__Delta ;
typedef lzma_filter * Lzma__Filter__SubBlock ;
typedef di_filter * Lzma__Filter ;
typedef di_filter * Lzma__Filter__Lzma;
typedef di_filter * Lzma__Filter__Lzma__PresetDict;
typedef di_filter * Lzma__Filter__BCJ ;
typedef di_filter * Lzma__Filter__Delta ;
typedef di_filter * Lzma__Filter__SubBlock ;

typedef di_stream * inflateStream ;
typedef lzma_options_lzma * Compress__Raw__Lzma__Options;
Expand Down Expand Up @@ -364,13 +369,13 @@ setupFilters(di_stream* s, AV* filters, const char* properties)
{
SV * fptr = (SV*) * av_fetch(f, i, FALSE) ;
IV tmp = SvIV((SV*)SvRV(fptr));
lzma_filter* filter = INT2PTR(lzma_filter*, tmp);
di_filter* filter = INT2PTR(di_filter*, tmp);

/* Keep a reference to the filter so it doesn't get destroyed */
s->sv_filters[i] = newSVsv(fptr) ;

s->filters[i].id = filter->id;
s->filters[i].options = filter->options;
s->filters[i].id = filter->filter.id;
s->filters[i].options = filter->filter.options;
}
}

Expand Down Expand Up @@ -1346,16 +1351,18 @@ int
id(filter)
Lzma::Filter filter
CODE:
RETVAL = filter->id;
RETVAL = filter->filter.id;
OUTPUT:
RETVAL

void
DESTROY(s)
Lzma::Filter s
CODE:
if (s->options)
Safefree(s->options) ;
if (s->filter.options)
Safefree(s->filter.options) ;
if (s->dict)
SvREFCNT_dec(s->dict);
Safefree(s) ;


Expand All @@ -1374,10 +1381,10 @@ _mk(want_lzma2, dict_size, lc, lp, pb, mode, nice_len, mf, depth)
uint32_t depth
CODE:
lzma_options_lzma* p;
ZMALLOC(RETVAL, lzma_filter) ;
RETVAL->id = want_lzma2 ? LZMA_FILTER_LZMA2 : LZMA_FILTER_LZMA1 ;
ZMALLOC(RETVAL->options, lzma_options_lzma) ;
p = (lzma_options_lzma*)RETVAL->options;
ZMALLOC(RETVAL, di_filter) ;
RETVAL->filter.id = want_lzma2 ? LZMA_FILTER_LZMA2 : LZMA_FILTER_LZMA1 ;
ZMALLOC(RETVAL->filter.options, lzma_options_lzma) ;
p = (lzma_options_lzma*)RETVAL->filter.options;
setDefaultOptions(p);
p->dict_size = dict_size ;
p->lc = lc ;
Expand Down Expand Up @@ -1406,15 +1413,16 @@ _mk(want_lzma2, dict_size, lc, lp, pb, mode, nice_len, mf, depth, preset_dict)
SV* preset_dict
CODE:
lzma_options_lzma* p;
ZMALLOC(RETVAL, lzma_filter) ;
RETVAL->id = want_lzma2 ? LZMA_FILTER_LZMA2 : LZMA_FILTER_LZMA1 ;
ZMALLOC(RETVAL->options, lzma_options_lzma) ;
p = (lzma_options_lzma*)RETVAL->options;
ZMALLOC(RETVAL, di_filter) ;
RETVAL->filter.id = want_lzma2 ? LZMA_FILTER_LZMA2 : LZMA_FILTER_LZMA1 ;
ZMALLOC(RETVAL->filter.options, lzma_options_lzma) ;
p = (lzma_options_lzma*)RETVAL->filter.options;
setDefaultOptions(p);
// FIXME: actually steal ownership of the buffer in the SV
if ( preset_dict ) {
size_t preset_len;
p->preset_dict = SvPV(preset_dict, preset_len);
RETVAL->dict = newSVsv(preset_dict);

size_t preset_len = 0;
p->preset_dict = (void *)SvPVbyte_force(RETVAL->dict,preset_len);
p->preset_dict_size = preset_len;
}
p->dict_size = dict_size ;
Expand All @@ -1434,10 +1442,10 @@ _mkPreset(want_lzma2, preset)
uint32_t preset
CODE:
lzma_options_lzma* p;
ZMALLOC(RETVAL, lzma_filter) ;
RETVAL->id = want_lzma2 ? LZMA_FILTER_LZMA2 : LZMA_FILTER_LZMA1 ;
ZMALLOC(RETVAL->options, lzma_options_lzma) ;
p = (lzma_options_lzma*)RETVAL->options;
ZMALLOC(RETVAL, di_filter) ;
RETVAL->filter.id = want_lzma2 ? LZMA_FILTER_LZMA2 : LZMA_FILTER_LZMA1 ;
ZMALLOC(RETVAL->filter.options, lzma_options_lzma) ;
p = (lzma_options_lzma*)RETVAL->filter.options;
lzma_lzma_preset(p, preset);
OUTPUT:
RETVAL
Expand All @@ -1449,10 +1457,10 @@ _mk(id, offset=0)
int id
int offset
CODE:
ZMALLOC(RETVAL, lzma_filter) ;
ZMALLOC(RETVAL->options, lzma_options_bcj) ;
RETVAL->id = id;
((lzma_options_bcj*)(RETVAL->options))->start_offset = offset;
ZMALLOC(RETVAL, di_filter) ;
ZMALLOC(RETVAL->filter.options, lzma_options_bcj) ;
RETVAL->filter.id = id;
((lzma_options_bcj*)(RETVAL->filter.options))->start_offset = offset;
OUTPUT:
RETVAL

Expand All @@ -1463,11 +1471,11 @@ _mk(type=LZMA_DELTA_TYPE_BYTE, dist=LZMA_DELTA_DIST_MIN)
lzma_delta_type type
uint32_t dist
CODE:
ZMALLOC(RETVAL, lzma_filter) ;
ZMALLOC(RETVAL->options, lzma_options_delta) ;
RETVAL->id = LZMA_FILTER_DELTA;
((lzma_options_delta*)(RETVAL->options))->type = type;
((lzma_options_delta*)(RETVAL->options))->dist = dist;
ZMALLOC(RETVAL, di_filter) ;
ZMALLOC(RETVAL->filter.options, lzma_options_delta) ;
RETVAL->filter.id = LZMA_FILTER_DELTA;
((lzma_options_delta*)(RETVAL->filter.options))->type = type;
((lzma_options_delta*)(RETVAL->filter.options))->dist = dist;
OUTPUT:
RETVAL

Expand Down
3 changes: 2 additions & 1 deletion lib/Compress/Raw/Lzma.pm
Original file line number Diff line number Diff line change
Expand Up @@ -929,6 +929,7 @@ sub Lzma::Filter::Lzma::PresetDict::mk
die "Nice $Nice not in range 2-273"
if $Nice < 2 || $Nice > 273;

my $PresetDict = $got->value('PresetDict');
my $obj = Lzma::Filter::Lzma::PresetDict::_mk($type,
$DictSize,
$Lc,
Expand All @@ -938,7 +939,7 @@ sub Lzma::Filter::Lzma::PresetDict::mk
$Nice,
$Mf,
$got->value('Depth'),
$got->value('PresetDict'),
ref($PresetDict) eq 'SCALAR' ? $$PresetDict : $PresetDict,
);

bless $obj, $pkg
Expand Down
7 changes: 4 additions & 3 deletions t/10preset_dict.t
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ use Test::More tests => 15;

BEGIN { use_ok('Compress::Raw::Lzma', 2); }

my $dict = "sphinx of black" . (join("",map { chr(int(rand(26))+ord('a')) } (0..16000) )) . " quartz judge my vow";
my $dict = "sphinx of black quartz judge my vow";
my $to_compress = "sphinx of black quartz judge my vow" x 100;

my $filter = Lzma::Filter::Lzma2::PresetDict(
PresetDict => $dict,
PresetDict => \$dict,
DictSize => 1024 * 1024 * 8,
Lc => 0,
Lp => 3,
Expand All @@ -38,7 +38,6 @@ my $filter_no_dict = Lzma::Filter::Lzma2(
Mf => LZMA_MF_HC4,
Depth => 77);


my ($x,$err,$status);
my $out_no_dict;
{
Expand Down Expand Up @@ -69,6 +68,8 @@ my $out_dict;
cmp_ok length($out_dict), '<', length($out_no_dict), " compressed w/ dictionary is shorter than without";
}

substr($dict,0,2) = 'xx'; # clobber the dictionary, just to make sure this doesn't break anything

my $out_decompressed;
{
my ($x,$err,$status);
Expand Down

0 comments on commit dd55729

Please sign in to comment.