Skip to content

Commit

Permalink
Merge pull request #6 from anall/wip/preset_dictionary
Browse files Browse the repository at this point in the history
Expose liblzma's 'preset_dict' feature
  • Loading branch information
pmqs authored Dec 18, 2020
2 parents 91be320 + 68fe695 commit 3432a76
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 32 deletions.
78 changes: 49 additions & 29 deletions Lzma.xs
Original file line number Diff line number Diff line change
Expand Up @@ -67,16 +67,21 @@ typedef struct di_stream {

} di_stream;

typedef struct di_filter {
lzma_filter filter;
SV* dict;
} di_filter;

typedef di_stream * deflateStream ;
typedef di_stream * Compress__Raw__Lzma ;
typedef di_stream * Compress__Raw__Lzma__Encoder ;
typedef di_stream * Compress__Raw__Lzma__Decoder ;

typedef lzma_filter * Lzma__Filter ;
typedef lzma_filter * Lzma__Filter__Lzma;
typedef lzma_filter * Lzma__Filter__BCJ ;
typedef lzma_filter * Lzma__Filter__Delta ;
typedef lzma_filter * Lzma__Filter__SubBlock ;
typedef di_filter * Lzma__Filter ;
typedef di_filter * Lzma__Filter__Lzma;
typedef di_filter * Lzma__Filter__BCJ ;
typedef di_filter * Lzma__Filter__Delta ;
typedef di_filter * Lzma__Filter__SubBlock ;

typedef di_stream * inflateStream ;
typedef lzma_options_lzma * Compress__Raw__Lzma__Options;
Expand Down Expand Up @@ -363,13 +368,13 @@ setupFilters(di_stream* s, AV* filters, const char* properties)
{
SV * fptr = (SV*) * av_fetch(f, i, FALSE) ;
IV tmp = SvIV((SV*)SvRV(fptr));
lzma_filter* filter = INT2PTR(lzma_filter*, tmp);
di_filter* filter = INT2PTR(di_filter*, tmp);

/* Keep a reference to the filter so it doesn't get destroyed */
s->sv_filters[i] = newSVsv(fptr) ;

s->filters[i].id = filter->id;
s->filters[i].options = filter->options;
s->filters[i].id = filter->filter.id;
s->filters[i].options = filter->filter.options;
}
}

Expand Down Expand Up @@ -1345,23 +1350,25 @@ int
id(filter)
Lzma::Filter filter
CODE:
RETVAL = filter->id;
RETVAL = filter->filter.id;
OUTPUT:
RETVAL

void
DESTROY(s)
Lzma::Filter s
CODE:
if (s->options)
Safefree(s->options) ;
if (s->filter.options)
Safefree(s->filter.options) ;
if (s->dict)
SvREFCNT_dec(s->dict);
Safefree(s) ;


MODULE = Lzma::Filter::Lzma PACKAGE = Lzma::Filter::Lzma

Lzma::Filter::Lzma
_mk(want_lzma2, dict_size, lc, lp, pb, mode, nice_len, mf, depth)
_mk(want_lzma2, dict_size, lc, lp, pb, mode, nice_len, mf, depth, preset_dict)
bool want_lzma2
uint32_t dict_size
uint32_t lc
Expand All @@ -1371,13 +1378,26 @@ _mk(want_lzma2, dict_size, lc, lp, pb, mode, nice_len, mf, depth)
uint32_t nice_len
lzma_match_finder mf
uint32_t depth
SV* preset_dict
CODE:
lzma_options_lzma* p;
ZMALLOC(RETVAL, lzma_filter) ;
RETVAL->id = want_lzma2 ? LZMA_FILTER_LZMA2 : LZMA_FILTER_LZMA1 ;
ZMALLOC(RETVAL->options, lzma_options_lzma) ;
p = (lzma_options_lzma*)RETVAL->options;
ZMALLOC(RETVAL, di_filter) ;
RETVAL->filter.id = want_lzma2 ? LZMA_FILTER_LZMA2 : LZMA_FILTER_LZMA1 ;
ZMALLOC(RETVAL->filter.options, lzma_options_lzma) ;
p = (lzma_options_lzma*)RETVAL->filter.options;
setDefaultOptions(p);

RETVAL->dict = newSVsv( deRef(preset_dict, "preset dict") );

size_t preset_len = 0;
p->preset_dict = (void *)SvPVbyte_force(RETVAL->dict,preset_len);
p->preset_dict_size = preset_len;
if ( p->preset_dict_size == 0 ) {
SvREFCNT_dec(RETVAL->dict);
p->preset_dict = NULL;
RETVAL->dict = NULL;
}

p->dict_size = dict_size ;
p->lc = lc ;
p->lp = lp ;
Expand All @@ -1395,10 +1415,10 @@ _mkPreset(want_lzma2, preset)
uint32_t preset
CODE:
lzma_options_lzma* p;
ZMALLOC(RETVAL, lzma_filter) ;
RETVAL->id = want_lzma2 ? LZMA_FILTER_LZMA2 : LZMA_FILTER_LZMA1 ;
ZMALLOC(RETVAL->options, lzma_options_lzma) ;
p = (lzma_options_lzma*)RETVAL->options;
ZMALLOC(RETVAL, di_filter) ;
RETVAL->filter.id = want_lzma2 ? LZMA_FILTER_LZMA2 : LZMA_FILTER_LZMA1 ;
ZMALLOC(RETVAL->filter.options, lzma_options_lzma) ;
p = (lzma_options_lzma*)RETVAL->filter.options;
lzma_lzma_preset(p, preset);
OUTPUT:
RETVAL
Expand All @@ -1410,10 +1430,10 @@ _mk(id, offset=0)
int id
int offset
CODE:
ZMALLOC(RETVAL, lzma_filter) ;
ZMALLOC(RETVAL->options, lzma_options_bcj) ;
RETVAL->id = id;
((lzma_options_bcj*)(RETVAL->options))->start_offset = offset;
ZMALLOC(RETVAL, di_filter) ;
ZMALLOC(RETVAL->filter.options, lzma_options_bcj) ;
RETVAL->filter.id = id;
((lzma_options_bcj*)(RETVAL->filter.options))->start_offset = offset;
OUTPUT:
RETVAL

Expand All @@ -1424,11 +1444,11 @@ _mk(type=LZMA_DELTA_TYPE_BYTE, dist=LZMA_DELTA_DIST_MIN)
lzma_delta_type type
uint32_t dist
CODE:
ZMALLOC(RETVAL, lzma_filter) ;
ZMALLOC(RETVAL->options, lzma_options_delta) ;
RETVAL->id = LZMA_FILTER_DELTA;
((lzma_options_delta*)(RETVAL->options))->type = type;
((lzma_options_delta*)(RETVAL->options))->dist = dist;
ZMALLOC(RETVAL, di_filter) ;
ZMALLOC(RETVAL->filter.options, lzma_options_delta) ;
RETVAL->filter.id = LZMA_FILTER_DELTA;
((lzma_options_delta*)(RETVAL->filter.options))->type = type;
((lzma_options_delta*)(RETVAL->filter.options))->dist = dist;
OUTPUT:
RETVAL

Expand Down
17 changes: 14 additions & 3 deletions lib/Compress/Raw/Lzma.pm
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

package Compress::Raw::Lzma;

use strict ;
Expand Down Expand Up @@ -789,7 +788,7 @@ sub Lzma::Filter::Lzma::mk
my $got = Compress::Raw::Lzma::ParseParameters(0,
{
'DictSize' => [1, 1, Parse_unsigned(), LZMA_DICT_SIZE_DEFAULT()],
#'PreserDict' => [1, 1, Parse_unsigned(), undef],
'PresetDict' => [1, 1, Parse_string(), undef],
'Lc' => [1, 1, Parse_unsigned(), LZMA_LC_DEFAULT()],
'Lp' => [1, 1, Parse_unsigned(), LZMA_LP_DEFAULT()],
'Pb' => [1, 1, Parse_unsigned(), LZMA_PB_DEFAULT()],
Expand Down Expand Up @@ -847,6 +846,7 @@ sub Lzma::Filter::Lzma::mk
$Nice,
$Mf,
$got->value('Depth'),
$got->value('PresetDict'),
);

bless $obj, $pkg
Expand Down Expand Up @@ -882,7 +882,6 @@ sub Lzma::Filter::Lzma1::Preset
Lzma::Filter::Lzma::mkPreset(0, @_);
}


@Lzma::Filter::Lzma2::ISA = qw(Lzma::Filter::Lzma);
sub Lzma::Filter::Lzma2
{
Expand Down Expand Up @@ -1522,6 +1521,18 @@ least C<LZMA_DICT_SIZE_MIN>.
Defaults to C<LZMA_DICT_SIZE_DEFAULT>.
=item PresetDict => $dict
Provide an initital dictionary. This value is used to initialize the LZ77 history window.
This feature only works with correctly with raw encoding and decoding.
You may not be able to decode other formats that have been encoded with a preset dictionary.
C<$dict> should contain typical strings that occur in the files being compressed,
with the most probably strings near the end fo the preset dictionary.
If C<$dict> is larger than C<DictSize>, only the last C<DictSize> bytes are processed.
=item Lc => $value
Number of literal context bits.
Expand Down
86 changes: 86 additions & 0 deletions t/10preset_dict.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
BEGIN {
if ($ENV{PERL_CORE}) {
chdir 't' if -d 't';
@INC = ("../lib", "lib/compress");
}
}

use lib qw(t);
use strict;
use warnings;
use bytes;

use Test::More tests => 15;

BEGIN { use_ok('Compress::Raw::Lzma', 2); }

my $dict = "sphinx of black quartz judge my vow";
my $to_compress = "sphinx of black quartz judge my vow" x 100;

my $filter = Lzma::Filter::Lzma2(
PresetDict => \$dict,
DictSize => 1024 * 1024 * 8,
Lc => 0,
Lp => 3,
Pb => LZMA_PB_MAX,
Mode => LZMA_MODE_NORMAL,
Nice => 128,
Mf => LZMA_MF_HC4,
Depth => 77);

my $filter_no_dict = Lzma::Filter::Lzma2(
DictSize => 1024 * 1024 * 8,
Lc => 0,
Lp => 3,
Pb => LZMA_PB_MAX,
Mode => LZMA_MODE_NORMAL,
Nice => 128,
Mf => LZMA_MF_HC4,
Depth => 77);

my ($x,$err,$status);
my $out_no_dict;
{
(my $enc, $err) = Compress::Raw::Lzma::RawEncoder->new(Filter => [$filter_no_dict], AppendOutput => 1);
ok $enc;
cmp_ok $err, '==', LZMA_OK, " status is LZMA_OK";

my $tmp = $to_compress;
$status = $enc->code($tmp, $out_no_dict);
cmp_ok $status, '==', LZMA_OK, " status is LZMA_OK";

cmp_ok $enc->flush($out_no_dict), '==', LZMA_STREAM_END, " flush returned LZMA_STREAM_END";
}

my $out_dict;
{
my ($x,$err,$status);
(my $enc, $err) = Compress::Raw::Lzma::RawEncoder->new(Filter => [$filter], AppendOutput => 1);
ok $enc;
cmp_ok $err, '==', LZMA_OK, " status is LZMA_OK";

my $tmp = $to_compress;
$status = $enc->code($tmp, $out_dict);
cmp_ok $status, '==', LZMA_OK, " status is LZMA_OK";

cmp_ok $enc->flush($out_dict), '==', LZMA_STREAM_END, " flush returned LZMA_STREAM_END";

cmp_ok length($out_dict), '<', length($out_no_dict), " compressed w/ dictionary is shorter than without";
}

substr($dict,0,2) = 'xx'; # clobber the dictionary, just to make sure this doesn't break anything

my $out_decompressed;
{
my ($x,$err,$status);
(my $dec, $err) = Compress::Raw::Lzma::RawDecoder->new(Filter => [$filter], AppendOutput => 1);
ok $dec;
cmp_ok $err, '==', LZMA_OK, " status is LZMA_OK";

my $out;
$status = $dec->code($out_dict, $out_decompressed);
cmp_ok $status, '==', LZMA_STREAM_END " status is LZMA_STREAM_END";

is length($out_decompressed), length($to_compress);
ok $out_decompressed eq $to_compress;
}

0 comments on commit 3432a76

Please sign in to comment.