Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose liblzma's 'preset_dict' feature #6

Merged
merged 1 commit into from
Dec 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 49 additions & 29 deletions Lzma.xs
Original file line number Diff line number Diff line change
Expand Up @@ -67,16 +67,21 @@ typedef struct di_stream {

} di_stream;

typedef struct di_filter {
lzma_filter filter;
SV* dict;
} di_filter;

typedef di_stream * deflateStream ;
typedef di_stream * Compress__Raw__Lzma ;
typedef di_stream * Compress__Raw__Lzma__Encoder ;
typedef di_stream * Compress__Raw__Lzma__Decoder ;

typedef lzma_filter * Lzma__Filter ;
typedef lzma_filter * Lzma__Filter__Lzma;
typedef lzma_filter * Lzma__Filter__BCJ ;
typedef lzma_filter * Lzma__Filter__Delta ;
typedef lzma_filter * Lzma__Filter__SubBlock ;
typedef di_filter * Lzma__Filter ;
typedef di_filter * Lzma__Filter__Lzma;
typedef di_filter * Lzma__Filter__BCJ ;
typedef di_filter * Lzma__Filter__Delta ;
typedef di_filter * Lzma__Filter__SubBlock ;

typedef di_stream * inflateStream ;
typedef lzma_options_lzma * Compress__Raw__Lzma__Options;
Expand Down Expand Up @@ -363,13 +368,13 @@ setupFilters(di_stream* s, AV* filters, const char* properties)
{
SV * fptr = (SV*) * av_fetch(f, i, FALSE) ;
IV tmp = SvIV((SV*)SvRV(fptr));
lzma_filter* filter = INT2PTR(lzma_filter*, tmp);
di_filter* filter = INT2PTR(di_filter*, tmp);

/* Keep a reference to the filter so it doesn't get destroyed */
s->sv_filters[i] = newSVsv(fptr) ;

s->filters[i].id = filter->id;
s->filters[i].options = filter->options;
s->filters[i].id = filter->filter.id;
s->filters[i].options = filter->filter.options;
}
}

Expand Down Expand Up @@ -1345,23 +1350,25 @@ int
id(filter)
Lzma::Filter filter
CODE:
RETVAL = filter->id;
RETVAL = filter->filter.id;
OUTPUT:
RETVAL

void
DESTROY(s)
Lzma::Filter s
CODE:
if (s->options)
Safefree(s->options) ;
if (s->filter.options)
Safefree(s->filter.options) ;
if (s->dict)
SvREFCNT_dec(s->dict);
Safefree(s) ;


MODULE = Lzma::Filter::Lzma PACKAGE = Lzma::Filter::Lzma

Lzma::Filter::Lzma
_mk(want_lzma2, dict_size, lc, lp, pb, mode, nice_len, mf, depth)
_mk(want_lzma2, dict_size, lc, lp, pb, mode, nice_len, mf, depth, preset_dict)
bool want_lzma2
uint32_t dict_size
uint32_t lc
Expand All @@ -1371,13 +1378,26 @@ _mk(want_lzma2, dict_size, lc, lp, pb, mode, nice_len, mf, depth)
uint32_t nice_len
lzma_match_finder mf
uint32_t depth
SV* preset_dict
CODE:
lzma_options_lzma* p;
ZMALLOC(RETVAL, lzma_filter) ;
RETVAL->id = want_lzma2 ? LZMA_FILTER_LZMA2 : LZMA_FILTER_LZMA1 ;
ZMALLOC(RETVAL->options, lzma_options_lzma) ;
p = (lzma_options_lzma*)RETVAL->options;
ZMALLOC(RETVAL, di_filter) ;
RETVAL->filter.id = want_lzma2 ? LZMA_FILTER_LZMA2 : LZMA_FILTER_LZMA1 ;
ZMALLOC(RETVAL->filter.options, lzma_options_lzma) ;
p = (lzma_options_lzma*)RETVAL->filter.options;
setDefaultOptions(p);

RETVAL->dict = newSVsv( deRef(preset_dict, "preset dict") );

size_t preset_len = 0;
p->preset_dict = (void *)SvPVbyte_force(RETVAL->dict,preset_len);
p->preset_dict_size = preset_len;
if ( p->preset_dict_size == 0 ) {
SvREFCNT_dec(RETVAL->dict);
p->preset_dict = NULL;
RETVAL->dict = NULL;
}

p->dict_size = dict_size ;
p->lc = lc ;
p->lp = lp ;
Expand All @@ -1395,10 +1415,10 @@ _mkPreset(want_lzma2, preset)
uint32_t preset
CODE:
lzma_options_lzma* p;
ZMALLOC(RETVAL, lzma_filter) ;
RETVAL->id = want_lzma2 ? LZMA_FILTER_LZMA2 : LZMA_FILTER_LZMA1 ;
ZMALLOC(RETVAL->options, lzma_options_lzma) ;
p = (lzma_options_lzma*)RETVAL->options;
ZMALLOC(RETVAL, di_filter) ;
RETVAL->filter.id = want_lzma2 ? LZMA_FILTER_LZMA2 : LZMA_FILTER_LZMA1 ;
ZMALLOC(RETVAL->filter.options, lzma_options_lzma) ;
p = (lzma_options_lzma*)RETVAL->filter.options;
lzma_lzma_preset(p, preset);
OUTPUT:
RETVAL
Expand All @@ -1410,10 +1430,10 @@ _mk(id, offset=0)
int id
int offset
CODE:
ZMALLOC(RETVAL, lzma_filter) ;
ZMALLOC(RETVAL->options, lzma_options_bcj) ;
RETVAL->id = id;
((lzma_options_bcj*)(RETVAL->options))->start_offset = offset;
ZMALLOC(RETVAL, di_filter) ;
ZMALLOC(RETVAL->filter.options, lzma_options_bcj) ;
RETVAL->filter.id = id;
((lzma_options_bcj*)(RETVAL->filter.options))->start_offset = offset;
OUTPUT:
RETVAL

Expand All @@ -1424,11 +1444,11 @@ _mk(type=LZMA_DELTA_TYPE_BYTE, dist=LZMA_DELTA_DIST_MIN)
lzma_delta_type type
uint32_t dist
CODE:
ZMALLOC(RETVAL, lzma_filter) ;
ZMALLOC(RETVAL->options, lzma_options_delta) ;
RETVAL->id = LZMA_FILTER_DELTA;
((lzma_options_delta*)(RETVAL->options))->type = type;
((lzma_options_delta*)(RETVAL->options))->dist = dist;
ZMALLOC(RETVAL, di_filter) ;
ZMALLOC(RETVAL->filter.options, lzma_options_delta) ;
RETVAL->filter.id = LZMA_FILTER_DELTA;
((lzma_options_delta*)(RETVAL->filter.options))->type = type;
((lzma_options_delta*)(RETVAL->filter.options))->dist = dist;
OUTPUT:
RETVAL

Expand Down
17 changes: 14 additions & 3 deletions lib/Compress/Raw/Lzma.pm
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

package Compress::Raw::Lzma;

use strict ;
Expand Down Expand Up @@ -789,7 +788,7 @@ sub Lzma::Filter::Lzma::mk
my $got = Compress::Raw::Lzma::ParseParameters(0,
{
'DictSize' => [1, 1, Parse_unsigned(), LZMA_DICT_SIZE_DEFAULT()],
#'PreserDict' => [1, 1, Parse_unsigned(), undef],
'PresetDict' => [1, 1, Parse_string(), undef],
'Lc' => [1, 1, Parse_unsigned(), LZMA_LC_DEFAULT()],
'Lp' => [1, 1, Parse_unsigned(), LZMA_LP_DEFAULT()],
'Pb' => [1, 1, Parse_unsigned(), LZMA_PB_DEFAULT()],
Expand Down Expand Up @@ -847,6 +846,7 @@ sub Lzma::Filter::Lzma::mk
$Nice,
$Mf,
$got->value('Depth'),
$got->value('PresetDict'),
);

bless $obj, $pkg
Expand Down Expand Up @@ -882,7 +882,6 @@ sub Lzma::Filter::Lzma1::Preset
Lzma::Filter::Lzma::mkPreset(0, @_);
}


@Lzma::Filter::Lzma2::ISA = qw(Lzma::Filter::Lzma);
sub Lzma::Filter::Lzma2
{
Expand Down Expand Up @@ -1522,6 +1521,18 @@ least C<LZMA_DICT_SIZE_MIN>.
Defaults to C<LZMA_DICT_SIZE_DEFAULT>.
=item PresetDict => $dict
Provide an initital dictionary. This value is used to initialize the LZ77 history window.
This feature only works with correctly with raw encoding and decoding.
You may not be able to decode other formats that have been encoded with a preset dictionary.
C<$dict> should contain typical strings that occur in the files being compressed,
with the most probably strings near the end fo the preset dictionary.
If C<$dict> is larger than C<DictSize>, only the last C<DictSize> bytes are processed.
=item Lc => $value
Number of literal context bits.
Expand Down
86 changes: 86 additions & 0 deletions t/10preset_dict.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
BEGIN {
if ($ENV{PERL_CORE}) {
chdir 't' if -d 't';
@INC = ("../lib", "lib/compress");
}
}

use lib qw(t);
use strict;
use warnings;
use bytes;

use Test::More tests => 15;

BEGIN { use_ok('Compress::Raw::Lzma', 2); }

my $dict = "sphinx of black quartz judge my vow";
my $to_compress = "sphinx of black quartz judge my vow" x 100;

my $filter = Lzma::Filter::Lzma2(
PresetDict => \$dict,
DictSize => 1024 * 1024 * 8,
Lc => 0,
Lp => 3,
Pb => LZMA_PB_MAX,
Mode => LZMA_MODE_NORMAL,
Nice => 128,
Mf => LZMA_MF_HC4,
Depth => 77);

my $filter_no_dict = Lzma::Filter::Lzma2(
DictSize => 1024 * 1024 * 8,
Lc => 0,
Lp => 3,
Pb => LZMA_PB_MAX,
Mode => LZMA_MODE_NORMAL,
Nice => 128,
Mf => LZMA_MF_HC4,
Depth => 77);

my ($x,$err,$status);
my $out_no_dict;
{
(my $enc, $err) = Compress::Raw::Lzma::RawEncoder->new(Filter => [$filter_no_dict], AppendOutput => 1);
ok $enc;
cmp_ok $err, '==', LZMA_OK, " status is LZMA_OK";

my $tmp = $to_compress;
$status = $enc->code($tmp, $out_no_dict);
cmp_ok $status, '==', LZMA_OK, " status is LZMA_OK";

cmp_ok $enc->flush($out_no_dict), '==', LZMA_STREAM_END, " flush returned LZMA_STREAM_END";
}

my $out_dict;
{
my ($x,$err,$status);
(my $enc, $err) = Compress::Raw::Lzma::RawEncoder->new(Filter => [$filter], AppendOutput => 1);
ok $enc;
cmp_ok $err, '==', LZMA_OK, " status is LZMA_OK";

my $tmp = $to_compress;
$status = $enc->code($tmp, $out_dict);
cmp_ok $status, '==', LZMA_OK, " status is LZMA_OK";

cmp_ok $enc->flush($out_dict), '==', LZMA_STREAM_END, " flush returned LZMA_STREAM_END";

cmp_ok length($out_dict), '<', length($out_no_dict), " compressed w/ dictionary is shorter than without";
}

substr($dict,0,2) = 'xx'; # clobber the dictionary, just to make sure this doesn't break anything

my $out_decompressed;
{
my ($x,$err,$status);
(my $dec, $err) = Compress::Raw::Lzma::RawDecoder->new(Filter => [$filter], AppendOutput => 1);
ok $dec;
cmp_ok $err, '==', LZMA_OK, " status is LZMA_OK";

my $out;
$status = $dec->code($out_dict, $out_decompressed);
cmp_ok $status, '==', LZMA_STREAM_END " status is LZMA_STREAM_END";

is length($out_decompressed), length($to_compress);
ok $out_decompressed eq $to_compress;
}