From 18f691803eeff894458b1c5661702459eb4b874c Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Thu, 25 Feb 2021 21:59:26 +0100 Subject: [PATCH] Support snappy de/compress_raw_into (#40) * Support snappy de/compress_raw_into * drop snappy de/compress_vec for raw; update benchmarks --- benchmarks/README.md | 180 +++++++++++++++++++++------------------ benchmarks/test_bench.py | 26 ++++++ src/lib.rs | 21 +++++ src/snappy.rs | 89 ++++++++++++++----- tests/test_variants.py | 25 ++++++ 5 files changed, 238 insertions(+), 103 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index ffe5285f..381032ab 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -106,67 +106,75 @@ There are two types here, `framed` and `raw`; the recommended one being snappy's `make bench-snappy-framed` ```bash ------------------------------------------------------------------------------------------------------------- benchmark: 24 tests ------------------------------------------------------------------------------------------------------------ -Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -test_snappy_framed[Mark.Twain-Tom.Sawyer.txt-cramjam] 66.9240 (1.0) 154.2760 (1.0) 72.3902 (1.0) 9.7916 (1.0) 69.3480 (1.0) 3.1895 (1.0) 529;887 13,814.0175 (1.0) 5297 1 -test_snappy_framed[Mark.Twain-Tom.Sawyer.txt-snappy] 127.9920 (1.91) 269.7400 (1.75) 136.3288 (1.88) 15.0409 (1.54) 129.3530 (1.87) 7.1885 (2.25) 496;644 7,335.2064 (0.53) 4257 1 -test_snappy_framed[alice29.txt-cramjam] 668.5400 (9.99) 1,039.1390 (6.74) 704.8739 (9.74) 41.9223 (4.28) 692.8285 (9.99) 43.8280 (13.74) 153;70 1,418.6935 (0.10) 1362 1 -test_snappy_framed[alice29.txt-snappy] 1,369.6490 (20.47) 1,804.7350 (11.70) 1,451.3804 (20.05) 73.8900 (7.55) 1,436.7360 (20.72) 64.1737 (20.12) 164;51 688.9993 (0.05) 657 1 -test_snappy_framed[asyoulik.txt-cramjam] 594.3560 (8.88) 909.6040 (5.90) 654.6089 (9.04) 58.4995 (5.97) 635.0080 (9.16) 35.4340 (11.11) 229;170 1,527.6298 (0.11) 1505 1 -test_snappy_framed[asyoulik.txt-snappy] 1,159.2780 (17.32) 1,648.2730 (10.68) 1,226.9825 (16.95) 69.1631 (7.06) 1,208.1210 (17.42) 62.4248 (19.57) 100;62 815.0076 (0.06) 801 1 -test_snappy_framed[fireworks.jpeg-cramjam] 82.0640 (1.23) 188.1610 (1.22) 93.4016 (1.29) 11.2305 (1.15) 89.0435 (1.28) 7.2770 (2.28) 1139;1063 10,706.4547 (0.78) 9054 1 -test_snappy_framed[fireworks.jpeg-snappy] 642.6270 (9.60) 1,080.5030 (7.00) 691.3958 (9.55) 46.4952 (4.75) 683.1805 (9.85) 41.4210 (12.99) 406;100 1,446.3496 (0.10) 1374 1 -test_snappy_framed[geo.protodata-cramjam] 207.2620 (3.10) 404.8150 (2.62) 221.4278 (3.06) 21.8556 (2.23) 213.4810 (3.08) 13.1203 (4.11) 504;529 4,516.1454 (0.33) 4301 1 -test_snappy_framed[geo.protodata-snappy] 727.0740 (10.86) 1,081.0720 (7.01) 774.2563 (10.70) 51.2997 (5.24) 760.7860 (10.97) 44.8170 (14.05) 134;97 1,291.5619 (0.09) 1094 1 -test_snappy_framed[html-cramjam] 209.9630 (3.14) 379.6720 (2.46) 223.6982 (3.09) 18.5359 (1.89) 217.6590 (3.14) 13.9565 (4.38) 501;360 4,470.3094 (0.32) 3931 1 -test_snappy_framed[html-snappy] 662.3100 (9.90) 1,010.6180 (6.55) 708.9370 (9.79) 51.5825 (5.27) 696.5880 (10.04) 47.8210 (14.99) 191;106 1,410.5626 (0.10) 1404 1 -test_snappy_framed[html_x_4-cramjam] 813.9490 (12.16) 1,234.0220 (8.00) 875.8345 (12.10) 64.5237 (6.59) 859.8725 (12.40) 51.4810 (16.14) 138;83 1,141.7682 (0.08) 1122 1 -test_snappy_framed[html_x_4-snappy] 2,665.8330 (39.83) 3,384.9470 (21.94) 2,865.7697 (39.59) 118.5848 (12.11) 2,843.3000 (41.00) 136.0955 (42.67) 97;13 348.9464 (0.03) 337 1 -test_snappy_framed[kppkn.gtb-cramjam] 569.1570 (8.50) 941.9720 (6.11) 612.0818 (8.46) 48.9091 (5.00) 598.0705 (8.62) 40.8400 (12.80) 145;100 1,633.7685 (0.12) 1518 1 -test_snappy_framed[kppkn.gtb-snappy] 1,418.3590 (21.19) 1,969.5570 (12.77) 1,541.2999 (21.29) 97.1448 (9.92) 1,517.0740 (21.88) 86.4275 (27.10) 132;59 648.8030 (0.05) 648 1 -test_snappy_framed[lcet10.txt-cramjam] 1,760.2000 (26.30) 2,350.9470 (15.24) 1,882.3709 (26.00) 99.5655 (10.17) 1,852.9640 (26.72) 107.8530 (33.82) 110;25 531.2449 (0.04) 473 1 -test_snappy_framed[lcet10.txt-snappy] 3,777.4390 (56.44) 4,641.5480 (30.09) 4,027.3592 (55.63) 167.2311 (17.08) 3,996.9500 (57.64) 213.6315 (66.98) 69;7 248.3017 (0.02) 233 1 -test_snappy_framed[paper-100k.pdf-cramjam] 79.9450 (1.19) 166.5010 (1.08) 87.5469 (1.21) 9.9207 (1.01) 84.0460 (1.21) 3.9595 (1.24) 1020;1387 11,422.4499 (0.83) 9037 1 -test_snappy_framed[paper-100k.pdf-snappy] 541.7030 (8.09) 832.1680 (5.39) 572.2008 (7.90) 37.8356 (3.86) 562.3680 (8.11) 40.0157 (12.55) 178;77 1,747.6383 (0.13) 1701 1 -test_snappy_framed[plrabn12.txt-cramjam] 2,624.9240 (39.22) 3,795.2400 (24.60) 2,784.9409 (38.47) 156.2531 (15.96) 2,745.7185 (39.59) 123.5140 (38.73) 37;29 359.0741 (0.03) 350 1 -test_snappy_framed[plrabn12.txt-snappy] 5,029.2020 (75.15) 6,492.6980 (42.08) 5,319.8571 (73.49) 213.0427 (21.76) 5,263.8185 (75.90) 233.6150 (73.25) 36;8 187.9750 (0.01) 186 1 -test_snappy_framed[urls.10K-cramjam] 2,206.7890 (32.97) 3,433.8090 (22.26) 2,489.4787 (34.39) 195.0847 (19.92) 2,530.7845 (36.49) 303.4110 (95.13) 132;3 401.6905 (0.03) 386 1 -test_snappy_framed[urls.10K-snappy] 5,900.9280 (88.17) 7,273.0480 (47.14) 6,206.1277 (85.73) 222.2105 (22.69) 6,145.8630 (88.62) 200.1830 (62.76) 32;10 161.1311 (0.01) 157 1 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------------------------------------- benchmark: 28 tests ------------------------------------------------------------------------------------------------------------------------- +Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +test_snappy_framed[Mark.Twain-Tom.Sawyer.txt-cramjam] 66.3110 (1.0) 132.0670 (1.0) 71.5088 (1.0) 8.0543 (1.0) 68.8310 (1.0) 2.5820 (1.0) 472;777 13,984.2887 (1.0) 4909 1 +test_snappy_framed[Mark.Twain-Tom.Sawyer.txt-snappy] 128.1280 (1.93) 280.2570 (2.12) 141.0132 (1.97) 13.1797 (1.64) 136.6060 (1.98) 10.3185 (4.00) 445;314 7,091.5369 (0.51) 3901 1 +test_snappy_framed[alice29.txt-cramjam] 667.8120 (10.07) 1,122.6380 (8.50) 728.5935 (10.19) 70.0799 (8.70) 709.3380 (10.31) 42.2965 (16.38) 152;155 1,372.5075 (0.10) 1407 1 +test_snappy_framed[alice29.txt-snappy] 1,363.7070 (20.57) 1,933.3930 (14.64) 1,463.5245 (20.47) 79.0356 (9.81) 1,446.3070 (21.01) 80.3440 (31.12) 101;31 683.2820 (0.05) 544 1 +test_snappy_framed[asyoulik.txt-cramjam] 580.7250 (8.76) 1,038.6950 (7.86) 614.7546 (8.60) 42.1717 (5.24) 604.4420 (8.78) 39.3620 (15.24) 152;95 1,626.6652 (0.12) 1648 1 +test_snappy_framed[asyoulik.txt-snappy] 1,158.6120 (17.47) 1,628.6620 (12.33) 1,227.1376 (17.16) 68.0189 (8.45) 1,213.2820 (17.63) 52.7795 (20.44) 89;68 814.9045 (0.06) 780 1 +test_snappy_framed[fifty-four-mb-random-cramjam] 77,210.1300 (>1000.0) 81,672.0980 (618.41) 78,480.3618 (>1000.0) 1,149.2224 (142.68) 78,176.9650 (>1000.0) 885.1638 (342.82) 2;1 12.7420 (0.00) 13 1 +test_snappy_framed[fifty-four-mb-random-snappy] 5,559,679.8660 (>1000.0) 8,568,313.0780 (>1000.0) 7,917,293.5574 (>1000.0) 1,319,007.0957 (>1000.0) 8,465,963.1640 (>1000.0) 830,941.5408 (>1000.0) 1;1 0.1263 (0.00) 5 1 +test_snappy_framed[fifty-four-mb-repeating-cramjam] 53,625.9030 (808.70) 56,103.9790 (424.81) 54,536.0511 (762.65) 865.3173 (107.44) 54,204.6480 (787.50) 1,152.4509 (446.34) 5;0 18.3365 (0.00) 18 1 +test_snappy_framed[fifty-four-mb-repeating-snappy] 408,793.3940 (>1000.0) 413,610.6820 (>1000.0) 411,025.2038 (>1000.0) 2,027.2435 (251.70) 410,251.0240 (>1000.0) 3,348.1738 (>1000.0) 2;0 2.4329 (0.00) 5 1 +test_snappy_framed[fireworks.jpeg-cramjam] 93.1590 (1.40) 229.4800 (1.74) 102.4807 (1.43) 11.1360 (1.38) 99.3520 (1.44) 6.0148 (2.33) 863;919 9,757.9334 (0.70) 8747 1 +test_snappy_framed[fireworks.jpeg-snappy] 634.3260 (9.57) 969.7800 (7.34) 670.6240 (9.38) 41.6387 (5.17) 659.5460 (9.58) 43.4297 (16.82) 158;81 1,491.1486 (0.11) 1377 1 +test_snappy_framed[geo.protodata-cramjam] 202.6670 (3.06) 448.9580 (3.40) 216.7288 (3.03) 19.7819 (2.46) 211.0980 (3.07) 12.7265 (4.93) 409;402 4,614.0605 (0.33) 4327 1 +test_snappy_framed[geo.protodata-snappy] 725.8820 (10.95) 1,141.0820 (8.64) 772.4960 (10.80) 44.3410 (5.51) 768.2020 (11.16) 35.2232 (13.64) 269;96 1,294.5051 (0.09) 1283 1 +test_snappy_framed[html-cramjam] 210.3770 (3.17) 385.4390 (2.92) 223.8841 (3.13) 20.1802 (2.51) 213.4705 (3.10) 14.0190 (5.43) 456;391 4,466.5962 (0.32) 3874 1 +test_snappy_framed[html-snappy] 661.9320 (9.98) 1,073.4110 (8.13) 699.2566 (9.78) 44.5358 (5.53) 687.3445 (9.99) 46.4350 (17.98) 151;66 1,430.0901 (0.10) 1360 1 +test_snappy_framed[html_x_4-cramjam] 799.6210 (12.06) 1,471.1290 (11.14) 876.7533 (12.26) 94.8879 (11.78) 851.4875 (12.37) 51.6310 (20.00) 118;123 1,140.5717 (0.08) 1202 1 +test_snappy_framed[html_x_4-snappy] 2,718.3700 (40.99) 3,591.7680 (27.20) 2,945.9552 (41.20) 133.4797 (16.57) 2,910.0090 (42.28) 125.9050 (48.76) 77;28 339.4485 (0.02) 334 1 +test_snappy_framed[kppkn.gtb-cramjam] 570.4850 (8.60) 942.4960 (7.14) 604.2190 (8.45) 37.0589 (4.60) 599.5330 (8.71) 39.6085 (15.34) 157;79 1,655.0290 (0.12) 1568 1 +test_snappy_framed[kppkn.gtb-snappy] 1,418.7900 (21.40) 1,968.2600 (14.90) 1,498.6198 (20.96) 70.0844 (8.70) 1,487.2870 (21.61) 58.7050 (22.74) 144;47 667.2807 (0.05) 619 1 +test_snappy_framed[lcet10.txt-cramjam] 1,753.5210 (26.44) 2,618.3510 (19.83) 1,927.7128 (26.96) 161.5183 (20.05) 1,880.7385 (27.32) 111.6060 (43.22) 78;55 518.7495 (0.04) 494 1 +test_snappy_framed[lcet10.txt-snappy] 3,772.7230 (56.89) 4,574.4210 (34.64) 4,001.1636 (55.95) 142.0375 (17.63) 3,985.7400 (57.91) 172.0220 (66.62) 65;9 249.9273 (0.02) 239 1 +test_snappy_framed[paper-100k.pdf-cramjam] 71.3860 (1.08) 282.0590 (2.14) 81.4524 (1.14) 10.4778 (1.30) 79.0920 (1.15) 6.5700 (2.54) 918;899 12,277.1087 (0.88) 9596 1 +test_snappy_framed[paper-100k.pdf-snappy] 533.3570 (8.04) 821.8800 (6.22) 563.8659 (7.89) 35.0754 (4.35) 556.9420 (8.09) 38.9318 (15.08) 146;75 1,773.4712 (0.13) 1609 1 +test_snappy_framed[plrabn12.txt-cramjam] 2,542.3630 (38.34) 3,281.8630 (24.85) 2,737.9968 (38.29) 111.7024 (13.87) 2,720.9985 (39.53) 138.1680 (53.51) 89;11 365.2305 (0.03) 354 1 +test_snappy_framed[plrabn12.txt-snappy] 5,096.0520 (76.85) 6,126.9320 (46.39) 5,441.2705 (76.09) 190.2210 (23.62) 5,427.8250 (78.86) 247.3365 (95.79) 53;3 183.7806 (0.01) 181 1 +test_snappy_framed[urls.10K-cramjam] 2,274.0300 (34.29) 3,110.8600 (23.56) 2,521.7336 (35.26) 130.5882 (16.21) 2,493.3860 (36.22) 139.6460 (54.08) 103;21 396.5526 (0.03) 396 1 +test_snappy_framed[urls.10K-snappy] 5,976.5190 (90.13) 6,779.0760 (51.33) 6,312.5501 (88.28) 177.9607 (22.10) 6,286.4100 (91.33) 233.4860 (90.43) 49;1 158.4146 (0.01) 152 1 +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ``` `make bench-snappy-raw` ```bash ----------------------------------------------------------------------------------------------------------- benchmark: 24 tests ---------------------------------------------------------------------------------------------------------- -Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ -test_snappy_raw[Mark.Twain-Tom.Sawyer.txt-cramjam] 52.4700 (3.41) 115.3440 (2.15) 56.0561 (3.36) 6.2831 (2.30) 53.4030 (3.35) 3.1860 (5.31) 497;515 17,839.2709 (0.30) 4861 1 -test_snappy_raw[Mark.Twain-Tom.Sawyer.txt-snappy] 52.9360 (3.44) 110.8180 (2.07) 56.3054 (3.38) 6.1002 (2.23) 54.0580 (3.40) 3.0085 (5.01) 1021;1043 17,760.2996 (0.30) 11152 1 -test_snappy_raw[alice29.txt-cramjam] 611.5550 (39.69) 960.9500 (17.93) 646.3615 (38.78) 47.3504 (17.32) 634.9345 (39.88) 39.8180 (66.36) 115;86 1,547.1217 (0.03) 1582 1 -test_snappy_raw[alice29.txt-snappy] 599.5750 (38.91) 943.2070 (17.59) 630.4942 (37.82) 37.0993 (13.57) 623.3310 (39.15) 36.2815 (60.47) 104;66 1,586.0575 (0.03) 1433 1 -test_snappy_raw[asyoulik.txt-cramjam] 541.9610 (35.17) 900.4050 (16.80) 570.1357 (34.20) 37.2299 (13.62) 563.6110 (35.40) 35.4925 (59.16) 132;73 1,753.9685 (0.03) 1660 1 -test_snappy_raw[asyoulik.txt-snappy] 532.0560 (34.53) 829.2950 (15.47) 557.7337 (33.46) 31.0406 (11.36) 552.0020 (34.67) 33.1413 (55.24) 163;69 1,792.9705 (0.03) 1805 1 -test_snappy_raw[fireworks.jpeg-cramjam] 40.8240 (2.65) 86.5680 (1.61) 43.1699 (2.59) 4.9490 (1.81) 41.2540 (2.59) 1.9842 (3.31) 768;868 23,164.3098 (0.39) 8581 1 -test_snappy_raw[fireworks.jpeg-snappy] 15.4080 (1.0) 53.6070 (1.0) 16.6693 (1.0) 2.7334 (1.0) 15.9200 (1.0) 0.6000 (1.0) 2163;2428 59,990.6697 (1.0) 37582 1 -test_snappy_raw[geo.protodata-cramjam] 162.4500 (10.54) 319.1430 (5.95) 171.8914 (10.31) 16.4284 (6.01) 164.2120 (10.31) 9.8320 (16.39) 472;475 5,817.6275 (0.10) 5195 1 -test_snappy_raw[geo.protodata-snappy] 142.5780 (9.25) 289.1950 (5.39) 151.9618 (9.12) 14.7480 (5.40) 146.5465 (9.21) 8.7910 (14.65) 496;513 6,580.6001 (0.11) 5564 1 -test_snappy_raw[html-cramjam] 171.6680 (11.14) 353.2230 (6.59) 182.8124 (10.97) 16.0733 (5.88) 177.6490 (11.16) 10.3890 (17.32) 445;445 5,470.0872 (0.09) 4338 1 -test_snappy_raw[html-snappy] 156.5440 (10.16) 289.5440 (5.40) 166.6782 (10.00) 15.6719 (5.73) 159.3490 (10.01) 9.7975 (16.33) 555;545 5,999.5841 (0.10) 5796 1 -test_snappy_raw[html_x_4-cramjam] 687.9800 (44.65) 1,191.3240 (22.22) 732.5265 (43.94) 61.2486 (22.41) 719.6200 (45.20) 37.0050 (61.68) 73;73 1,365.1383 (0.02) 1154 1 -test_snappy_raw[html_x_4-snappy] 635.2680 (41.23) 1,000.9560 (18.67) 665.0148 (39.89) 33.7608 (12.35) 658.7670 (41.38) 38.5295 (64.22) 132;46 1,503.7259 (0.03) 1424 1 -test_snappy_raw[kppkn.gtb-cramjam] 513.0010 (33.29) 876.9870 (16.36) 538.6927 (32.32) 37.5722 (13.75) 529.6960 (33.27) 32.9270 (54.88) 126;76 1,856.3459 (0.03) 1614 1 -test_snappy_raw[kppkn.gtb-snappy] 504.0010 (32.71) 819.8180 (15.29) 531.0207 (31.86) 36.3322 (13.29) 521.1320 (32.73) 33.4015 (55.67) 136;80 1,883.1657 (0.03) 1605 1 -test_snappy_raw[lcet10.txt-cramjam] 1,621.1720 (105.22) 2,280.9690 (42.55) 1,702.4589 (102.13) 88.1738 (32.26) 1,684.9780 (105.84) 74.5165 (124.20) 53;40 587.3857 (0.01) 563 1 -test_snappy_raw[lcet10.txt-snappy] 1,590.3340 (103.21) 2,250.7950 (41.99) 1,675.8838 (100.54) 87.7583 (32.11) 1,661.6440 (104.37) 76.0752 (126.79) 52;35 596.7001 (0.01) 591 1 -test_snappy_raw[paper-100k.pdf-cramjam] 39.0380 (2.53) 83.4620 (1.56) 41.1788 (2.47) 4.3254 (1.58) 39.5485 (2.48) 2.2180 (3.70) 857;895 24,284.3381 (0.40) 10786 1 -test_snappy_raw[paper-100k.pdf-snappy] 20.1120 (1.31) 109.0120 (2.03) 21.8335 (1.31) 3.4175 (1.25) 20.9960 (1.32) 0.8850 (1.48) 1381;1503 45,801.1219 (0.76) 21602 1 -test_snappy_raw[plrabn12.txt-cramjam] 2,218.0820 (143.96) 3,440.0420 (64.17) 2,339.6884 (140.36) 136.2661 (49.85) 2,309.2965 (145.06) 95.7930 (159.66) 35;31 427.4073 (0.01) 354 1 -test_snappy_raw[plrabn12.txt-snappy] 2,316.6310 (150.35) 3,451.9990 (64.39) 2,440.0261 (146.38) 149.6083 (54.73) 2,407.0390 (151.20) 97.8185 (163.03) 30;27 409.8317 (0.01) 417 1 -test_snappy_raw[urls.10K-cramjam] 1,908.7650 (123.88) 2,981.4070 (55.62) 2,040.6282 (122.42) 160.7180 (58.80) 2,011.1260 (126.33) 81.4197 (135.70) 28;40 490.0452 (0.01) 423 1 -test_snappy_raw[urls.10K-snappy] 1,940.1910 (125.92) 3,101.1850 (57.85) 2,052.7867 (123.15) 146.2709 (53.51) 2,021.3160 (126.97) 89.7110 (149.52) 25;25 487.1427 (0.01) 394 1 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------- benchmark: 28 tests -------------------------------------------------------------------------------------------------------------- +Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +test_snappy_raw[Mark.Twain-Tom.Sawyer.txt-cramjam] 51.3180 (3.37) 106.5710 (2.12) 54.9889 (3.29) 6.7249 (2.09) 52.6220 (3.32) 1.8468 (2.57) 627;721 18,185.5023 (0.30) 5755 1 +test_snappy_raw[Mark.Twain-Tom.Sawyer.txt-snappy] 52.5980 (3.45) 110.6400 (2.20) 56.2056 (3.36) 6.7379 (2.10) 53.6730 (3.39) 1.6682 (2.32) 1269;1560 17,791.8193 (0.30) 11281 1 +test_snappy_raw[alice29.txt-cramjam] 599.5640 (39.35) 955.8480 (18.98) 634.3983 (37.97) 39.8069 (12.38) 624.0110 (39.38) 40.4120 (56.29) 134;81 1,576.2968 (0.03) 1469 1 +test_snappy_raw[alice29.txt-snappy] 600.3480 (39.41) 1,046.0080 (20.77) 641.8216 (38.42) 52.8951 (16.45) 626.3260 (39.52) 41.8230 (58.25) 148;121 1,558.0655 (0.03) 1631 1 +test_snappy_raw[asyoulik.txt-cramjam] 531.3770 (34.88) 950.9300 (18.88) 567.7280 (33.98) 53.5173 (16.65) 553.5235 (34.93) 38.8605 (54.13) 150;129 1,761.4068 (0.03) 1664 1 +test_snappy_raw[asyoulik.txt-snappy] 532.8600 (34.98) 826.0870 (16.41) 566.0961 (33.88) 42.7028 (13.28) 553.0790 (34.90) 38.2127 (53.23) 97;70 1,766.4846 (0.03) 981 1 +test_snappy_raw[fifty-four-mb-random-cramjam] 64,900.1350 (>1000.0) 66,259.3000 (>1000.0) 65,499.6381 (>1000.0) 488.8944 (152.06) 65,556.7080 (>1000.0) 900.7005 (>1000.0) 9;0 15.2673 (0.00) 16 1 +test_snappy_raw[fifty-four-mb-random-snappy] 55,702.4500 (>1000.0) 63,684.2990 (>1000.0) 56,684.4748 (>1000.0) 1,802.6270 (560.68) 56,194.9530 (>1000.0) 807.8321 (>1000.0) 1;1 17.6415 (0.00) 18 1 +test_snappy_raw[fifty-four-mb-repeating-cramjam] 61,115.8000 (>1000.0) 63,571.1790 (>1000.0) 62,403.5144 (>1000.0) 668.1779 (207.83) 62,227.2180 (>1000.0) 969.2140 (>1000.0) 7;0 16.0247 (0.00) 17 1 +test_snappy_raw[fifty-four-mb-repeating-snappy] 32,122.7550 (>1000.0) 33,752.5920 (670.29) 32,768.2958 (>1000.0) 441.6249 (137.36) 32,726.9150 (>1000.0) 592.4183 (825.17) 12;0 30.5173 (0.00) 31 1 +test_snappy_raw[fireworks.jpeg-cramjam] 24.0780 (1.58) 185.5780 (3.69) 25.9138 (1.55) 4.1937 (1.30) 24.5250 (1.55) 0.8781 (1.22) 2273;2403 38,589.5241 (0.64) 25542 1 +test_snappy_raw[fireworks.jpeg-snappy] 15.2350 (1.0) 50.3550 (1.0) 16.7069 (1.0) 3.2151 (1.0) 15.8470 (1.0) 0.8130 (1.13) 1175;1223 59,855.6622 (1.0) 15115 1 +test_snappy_raw[geo.protodata-cramjam] 155.5390 (10.21) 383.3330 (7.61) 165.6040 (9.91) 16.6777 (5.19) 160.6820 (10.14) 9.3510 (13.02) 576;655 6,038.5013 (0.10) 5400 1 +test_snappy_raw[geo.protodata-snappy] 142.9101 (9.38) 268.0700 (5.32) 152.4274 (9.12) 12.7332 (3.96) 147.1700 (9.29) 8.9320 (12.44) 669;569 6,560.5007 (0.11) 6050 1 +test_snappy_raw[html-cramjam] 165.7090 (10.88) 331.3710 (6.58) 176.3145 (10.55) 16.6429 (5.18) 171.4330 (10.82) 10.0080 (13.94) 573;606 5,671.6825 (0.09) 5450 1 +test_snappy_raw[html-snappy] 156.6590 (10.28) 376.6970 (7.48) 166.9049 (9.99) 16.5424 (5.15) 162.0930 (10.23) 9.4865 (13.21) 576;624 5,991.4352 (0.10) 5383 1 +test_snappy_raw[html_x_4-cramjam] 662.5010 (43.49) 1,178.5590 (23.40) 702.4728 (42.05) 58.7407 (18.27) 685.1415 (43.23) 41.8875 (58.34) 110;94 1,423.5426 (0.02) 1140 1 +test_snappy_raw[html_x_4-snappy] 632.7050 (41.53) 1,083.1810 (21.51) 676.2055 (40.47) 59.6778 (18.56) 658.1900 (41.53) 43.0540 (59.97) 147;126 1,478.8404 (0.02) 1453 1 +test_snappy_raw[kppkn.gtb-cramjam] 500.7480 (32.87) 863.8660 (17.16) 528.4980 (31.63) 37.5560 (11.68) 517.9300 (32.68) 32.8555 (45.76) 143;96 1,892.1548 (0.03) 1692 1 +test_snappy_raw[kppkn.gtb-snappy] 504.1240 (33.09) 855.1690 (16.98) 532.1999 (31.86) 36.5428 (11.37) 522.2600 (32.96) 34.3139 (47.80) 160;103 1,878.9933 (0.03) 1834 1 +test_snappy_raw[lcet10.txt-cramjam] 1,585.7740 (104.09) 2,621.0520 (52.05) 1,683.1401 (100.75) 123.0068 (38.26) 1,654.2825 (104.39) 82.9070 (115.48) 45;40 594.1276 (0.01) 538 1 +test_snappy_raw[lcet10.txt-snappy] 1,590.9740 (104.43) 2,517.1320 (49.99) 1,686.2719 (100.93) 104.7156 (32.57) 1,664.4925 (105.04) 74.3340 (103.54) 58;42 593.0242 (0.01) 582 1 +test_snappy_raw[paper-100k.pdf-cramjam] 27.2180 (1.79) 198.5880 (3.94) 29.5554 (1.77) 4.9856 (1.55) 27.9830 (1.77) 0.9690 (1.35) 2051;2213 33,834.7155 (0.57) 20315 1 +test_snappy_raw[paper-100k.pdf-snappy] 20.1430 (1.32) 57.0260 (1.13) 21.8590 (1.31) 3.2314 (1.01) 21.0150 (1.33) 0.7179 (1.0) 1753;1970 45,747.6927 (0.76) 26477 1 +test_snappy_raw[plrabn12.txt-cramjam] 2,393.2550 (157.09) 3,470.2860 (68.92) 2,615.1149 (156.53) 126.0164 (39.20) 2,584.6890 (163.10) 128.8117 (179.42) 59;17 382.3924 (0.01) 385 1 +test_snappy_raw[plrabn12.txt-snappy] 2,185.9910 (143.48) 3,105.9010 (61.68) 2,312.9505 (138.44) 122.6518 (38.15) 2,280.8130 (143.93) 94.9631 (132.27) 48;32 432.3482 (0.01) 394 1 +test_snappy_raw[urls.10K-cramjam] 1,851.8370 (121.55) 2,722.8100 (54.07) 1,961.6530 (117.42) 100.0348 (31.11) 1,936.3300 (122.19) 84.8327 (118.16) 67;27 509.7741 (0.01) 413 1 +test_snappy_raw[urls.10K-snappy] 1,813.8860 (119.06) 2,805.6660 (55.72) 2,047.6586 (122.56) 121.4122 (37.76) 2,024.1400 (127.73) 93.0140 (129.56) 75;44 488.3627 (0.01) 498 1 +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ ``` @@ -179,34 +187,38 @@ Again, since basically no variants implement similar functionality as we saw in benchmarks, this benchmark is specific to `cramjam` ```bash ------------------------------------------------------------------------------------------------------------------------- benchmark: 24 tests ------------------------------------------------------------------------------------------------------------------------- -Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -test_cramjam_snappy_de_compress_into[Mark.Twain-Tom.Sawyer.txt-compress_into] 39.8240 (1.87) 85.1870 (1.49) 42.1991 (1.85) 4.7949 (1.36) 40.4410 (1.84) 1.5060 (2.22) 1579;1739 23,697.2108 (0.54) 18429 1 -test_cramjam_snappy_de_compress_into[Mark.Twain-Tom.Sawyer.txt-decompress_into] 21.2630 (1.0) 57.2360 (1.0) 22.8284 (1.0) 3.5271 (1.0) 21.9730 (1.0) 0.6780 (1.0) 776;888 43,805.1557 (1.0) 9271 1 -test_cramjam_snappy_de_compress_into[alice29.txt-compress_into] 462.6910 (21.76) 812.9580 (14.20) 508.4205 (22.27) 37.7018 (10.69) 496.9830 (22.62) 26.8805 (39.65) 215;177 1,966.8760 (0.04) 1976 1 -test_cramjam_snappy_de_compress_into[alice29.txt-decompress_into] 190.4900 (8.96) 446.4370 (7.80) 210.2874 (9.21) 25.0303 (7.10) 202.8200 (9.23) 16.8353 (24.83) 274;244 4,755.3978 (0.11) 3229 1 -test_cramjam_snappy_de_compress_into[asyoulik.txt-compress_into] 405.8760 (19.09) 697.8110 (12.19) 430.5061 (18.86) 33.1445 (9.40) 421.2690 (19.17) 25.4420 (37.52) 187;147 2,322.8476 (0.05) 2260 1 -test_cramjam_snappy_de_compress_into[asyoulik.txt-decompress_into] 166.7260 (7.84) 345.2520 (6.03) 175.3985 (7.68) 15.2901 (4.34) 168.3880 (7.66) 9.8003 (14.45) 552;511 5,701.3018 (0.13) 5725 1 -test_cramjam_snappy_de_compress_into[fireworks.jpeg-compress_into] 44.3380 (2.09) 98.5800 (1.72) 47.4147 (2.08) 5.4927 (1.56) 45.6420 (2.08) 2.0510 (3.03) 1932;2103 21,090.4861 (0.48) 19150 1 -test_cramjam_snappy_de_compress_into[fireworks.jpeg-decompress_into] 30.1790 (1.42) 75.7750 (1.32) 32.0674 (1.40) 4.0658 (1.15) 30.7100 (1.40) 1.1880 (1.75) 2248;2630 31,184.3311 (0.71) 28385 1 -test_cramjam_snappy_de_compress_into[geo.protodata-compress_into] 121.2680 (5.70) 400.9240 (7.00) 130.3795 (5.71) 15.5604 (4.41) 125.4560 (5.71) 7.5100 (11.08) 808;1079 7,669.9154 (0.18) 7686 1 -test_cramjam_snappy_de_compress_into[geo.protodata-decompress_into] 61.8930 (2.91) 135.6580 (2.37) 65.3579 (2.86) 7.0477 (2.00) 62.6300 (2.85) 2.0933 (3.09) 1215;1502 15,300.3577 (0.35) 13125 1 -test_cramjam_snappy_de_compress_into[html-compress_into] 128.4720 (6.04) 257.3410 (4.50) 136.2710 (5.97) 12.7074 (3.60) 130.5210 (5.94) 7.5215 (11.09) 854;877 7,338.3187 (0.17) 7303 1 -test_cramjam_snappy_de_compress_into[html-decompress_into] 62.6920 (2.95) 173.2130 (3.03) 70.8147 (3.10) 11.3212 (3.21) 66.6290 (3.03) 6.4820 (9.56) 1799;1844 14,121.3670 (0.32) 12717 1 -test_cramjam_snappy_de_compress_into[html_x_4-compress_into] 511.0130 (24.03) 891.8790 (15.58) 549.7244 (24.08) 41.6672 (11.81) 542.8110 (24.70) 33.1573 (48.90) 150;109 1,819.0935 (0.04) 1447 1 -test_cramjam_snappy_de_compress_into[html_x_4-decompress_into] 258.6400 (12.16) 541.0200 (9.45) 292.8625 (12.83) 37.7322 (10.70) 276.6950 (12.59) 27.8205 (41.03) 338;317 3,414.5720 (0.08) 2543 1 -test_cramjam_snappy_de_compress_into[kppkn.gtb-compress_into] 359.4940 (16.91) 634.0840 (11.08) 385.2166 (16.87) 29.2549 (8.29) 381.5820 (17.37) 28.8383 (42.53) 265;155 2,595.9421 (0.06) 2735 1 -test_cramjam_snappy_de_compress_into[kppkn.gtb-decompress_into] 197.0240 (9.27) 405.3810 (7.08) 209.6119 (9.18) 21.6750 (6.15) 203.1435 (9.25) 12.2140 (18.01) 460;523 4,770.7209 (0.11) 4784 1 -test_cramjam_snappy_de_compress_into[lcet10.txt-compress_into] 1,220.0120 (57.38) 1,754.6500 (30.66) 1,354.5943 (59.34) 97.3167 (27.59) 1,317.4960 (59.96) 82.2825 (121.36) 126;69 738.2284 (0.02) 692 1 -test_cramjam_snappy_de_compress_into[lcet10.txt-decompress_into] 500.1370 (23.52) 904.8270 (15.81) 548.9960 (24.05) 51.5708 (14.62) 534.0950 (24.31) 34.9655 (51.57) 165;156 1,821.5069 (0.04) 1645 1 -test_cramjam_snappy_de_compress_into[paper-100k.pdf-compress_into] 38.5840 (1.81) 253.0600 (4.42) 41.1213 (1.80) 5.1050 (1.45) 39.6210 (1.80) 1.7125 (2.53) 1968;2146 24,318.3137 (0.56) 21464 1 -test_cramjam_snappy_de_compress_into[paper-100k.pdf-decompress_into] 30.2860 (1.42) 75.8210 (1.32) 32.1635 (1.41) 4.2167 (1.20) 30.7870 (1.40) 1.1800 (1.74) 2233;2458 31,091.0992 (0.71) 26565 1 -test_cramjam_snappy_de_compress_into[plrabn12.txt-compress_into] 1,650.9060 (77.64) 2,430.3730 (42.46) 1,770.4333 (77.55) 99.3839 (28.18) 1,755.6610 (79.90) 93.0350 (137.22) 104;35 564.8335 (0.01) 569 1 -test_cramjam_snappy_de_compress_into[plrabn12.txt-decompress_into] 670.7310 (31.54) 1,143.7440 (19.98) 713.4885 (31.25) 64.8742 (18.39) 695.7285 (31.66) 43.1620 (63.66) 72;69 1,401.5642 (0.03) 1078 1 -test_cramjam_snappy_de_compress_into[urls.10K-compress_into] 1,449.2240 (68.16) 2,248.9360 (39.29) 1,585.3133 (69.44) 128.9953 (36.57) 1,550.0310 (70.54) 93.1475 (137.39) 83;54 630.7902 (0.01) 607 1 -test_cramjam_snappy_de_compress_into[urls.10K-decompress_into] 611.5930 (28.76) 1,102.2170 (19.26) 660.0188 (28.91) 50.9816 (14.45) 650.2550 (29.59) 38.4440 (56.70) 133;94 1,515.1083 (0.03) 1110 1 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +--------------------------------------------------------------------------------------------------------------------------- benchmark: 28 tests --------------------------------------------------------------------------------------------------------------------------- +Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +test_cramjam_snappy_de_compress_into[Mark.Twain-Tom.Sawyer.txt-compress_into] 39.5430 (1.89) 226.3750 (4.12) 42.8299 (1.93) 6.0547 (2.06) 41.0640 (1.93) 1.4650 (1.96) 2225;2454 23,348.1766 (0.52) 18846 1 +test_cramjam_snappy_de_compress_into[Mark.Twain-Tom.Sawyer.txt-decompress_into] 20.8690 (1.0) 54.9380 (1.0) 22.1814 (1.0) 2.9375 (1.0) 21.2590 (1.0) 0.7462 (1.0) 463;481 45,082.7264 (1.0) 7705 1 +test_cramjam_snappy_de_compress_into[alice29.txt-compress_into] 463.9240 (22.23) 777.6850 (14.16) 492.9410 (22.22) 32.7011 (11.13) 483.5000 (22.74) 31.2150 (41.83) 197;115 2,028.6405 (0.04) 2053 1 +test_cramjam_snappy_de_compress_into[alice29.txt-decompress_into] 186.3790 (8.93) 382.7770 (6.97) 199.2822 (8.98) 21.8619 (7.44) 192.5040 (9.06) 11.0680 (14.83) 436;575 5,018.0092 (0.11) 4754 1 +test_cramjam_snappy_de_compress_into[asyoulik.txt-compress_into] 412.3180 (19.76) 713.2160 (12.98) 438.1621 (19.75) 36.8647 (12.55) 426.4940 (20.06) 29.6507 (39.73) 226;170 2,282.2606 (0.05) 2321 1 +test_cramjam_snappy_de_compress_into[asyoulik.txt-decompress_into] 163.3530 (7.83) 314.8180 (5.73) 173.4995 (7.82) 18.4431 (6.28) 165.1880 (7.77) 9.7200 (13.03) 567;637 5,763.7058 (0.13) 5530 1 +test_cramjam_snappy_de_compress_into[fifty-four-mb-random-compress_into] 21,370.5300 (>1000.0) 23,421.1310 (426.32) 22,271.0823 (>1000.0) 581.2127 (197.86) 22,076.6660 (>1000.0) 839.1198 (>1000.0) 9;0 44.9013 (0.00) 25 1 +test_cramjam_snappy_de_compress_into[fifty-four-mb-random-decompress_into] 14,683.7930 (703.62) 15,945.4020 (290.24) 15,056.4776 (678.79) 266.1213 (90.59) 15,000.5550 (705.61) 379.6751 (508.79) 7;1 66.4166 (0.00) 30 1 +test_cramjam_snappy_de_compress_into[fifty-four-mb-repeating-compress_into] 14,899.9090 (713.97) 16,099.5890 (293.05) 15,429.4449 (695.60) 276.9102 (94.27) 15,329.7200 (721.09) 336.6940 (451.19) 20;0 64.8111 (0.00) 63 1 +test_cramjam_snappy_de_compress_into[fifty-four-mb-repeating-decompress_into] 17,915.9630 (858.50) 19,089.1940 (347.47) 18,389.6426 (829.06) 278.4235 (94.78) 18,373.7230 (864.28) 418.8805 (561.32) 11;0 54.3784 (0.00) 29 1 +test_cramjam_snappy_de_compress_into[fireworks.jpeg-compress_into] 43.9390 (2.11) 118.4420 (2.16) 47.8558 (2.16) 5.6904 (1.94) 46.2460 (2.18) 1.9707 (2.64) 1996;2197 20,896.1152 (0.46) 19715 1 +test_cramjam_snappy_de_compress_into[fireworks.jpeg-decompress_into] 29.7500 (1.43) 73.8140 (1.34) 31.8771 (1.44) 3.9980 (1.36) 30.5820 (1.44) 1.2860 (1.72) 2432;2732 31,370.4800 (0.70) 27124 1 +test_cramjam_snappy_de_compress_into[geo.protodata-compress_into] 121.8950 (5.84) 260.3290 (4.74) 129.7199 (5.85) 12.8547 (4.38) 124.3525 (5.85) 7.0450 (9.44) 799;848 7,708.9151 (0.17) 7196 1 +test_cramjam_snappy_de_compress_into[geo.protodata-decompress_into] 61.7630 (2.96) 152.9090 (2.78) 65.6442 (2.96) 7.3924 (2.52) 62.6280 (2.95) 2.0600 (2.76) 1269;1729 15,233.6341 (0.34) 12907 1 +test_cramjam_snappy_de_compress_into[html-compress_into] 129.4560 (6.20) 252.4930 (4.60) 138.6755 (6.25) 14.4065 (4.90) 133.9925 (6.30) 7.5980 (10.18) 834;1040 7,211.0811 (0.16) 7318 1 +test_cramjam_snappy_de_compress_into[html-decompress_into] 62.3400 (2.99) 154.2940 (2.81) 66.4681 (3.00) 8.0530 (2.74) 64.3110 (3.03) 3.5030 (4.69) 1064;1480 15,044.8144 (0.33) 11675 1 +test_cramjam_snappy_de_compress_into[html_x_4-compress_into] 517.2050 (24.78) 888.9890 (16.18) 570.9151 (25.74) 50.9777 (17.35) 554.5600 (26.09) 44.1403 (59.15) 353;194 1,751.5740 (0.04) 1857 1 +test_cramjam_snappy_de_compress_into[html_x_4-decompress_into] 257.5690 (12.34) 529.3130 (9.63) 283.4639 (12.78) 37.2057 (12.67) 266.7790 (12.55) 26.8391 (35.97) 301;283 3,527.7862 (0.08) 2446 1 +test_cramjam_snappy_de_compress_into[kppkn.gtb-compress_into] 363.2850 (17.41) 611.8500 (11.14) 383.6018 (17.29) 26.0204 (8.86) 375.6370 (17.67) 23.2070 (31.10) 295;182 2,606.8697 (0.06) 2684 1 +test_cramjam_snappy_de_compress_into[kppkn.gtb-decompress_into] 195.4920 (9.37) 389.6650 (7.09) 206.6503 (9.32) 18.2140 (6.20) 201.5670 (9.48) 11.5785 (15.52) 518;513 4,839.0933 (0.11) 4671 1 +test_cramjam_snappy_de_compress_into[lcet10.txt-compress_into] 1,230.0970 (58.94) 1,987.3830 (36.18) 1,324.3744 (59.71) 91.6116 (31.19) 1,300.6140 (61.18) 70.7862 (94.86) 91;53 755.0735 (0.02) 671 1 +test_cramjam_snappy_de_compress_into[lcet10.txt-decompress_into] 489.8850 (23.47) 866.4870 (15.77) 531.4879 (23.96) 58.0469 (19.76) 516.8210 (24.31) 32.6948 (43.81) 126;136 1,881.5104 (0.04) 1385 1 +test_cramjam_snappy_de_compress_into[paper-100k.pdf-compress_into] 38.2830 (1.83) 97.7700 (1.78) 42.4763 (1.91) 5.6442 (1.92) 40.7560 (1.92) 2.0365 (2.73) 2347;2564 23,542.5644 (0.52) 19856 1 +test_cramjam_snappy_de_compress_into[paper-100k.pdf-decompress_into] 29.9950 (1.44) 80.4800 (1.46) 32.0370 (1.44) 4.0740 (1.39) 31.0030 (1.46) 0.9880 (1.32) 2199;2610 31,213.8860 (0.69) 26692 1 +test_cramjam_snappy_de_compress_into[plrabn12.txt-compress_into] 1,670.0190 (80.02) 2,474.3870 (45.04) 1,797.8845 (81.05) 104.9018 (35.71) 1,773.9745 (83.45) 89.2560 (119.61) 104;39 556.2093 (0.01) 580 1 +test_cramjam_snappy_de_compress_into[plrabn12.txt-decompress_into] 657.2370 (31.49) 1,143.2750 (20.81) 709.0987 (31.97) 61.9860 (21.10) 697.8510 (32.83) 33.1955 (44.48) 87;104 1,410.2409 (0.03) 1088 1 +test_cramjam_snappy_de_compress_into[urls.10K-compress_into] 1,454.5130 (69.70) 2,186.5280 (39.80) 1,609.2278 (72.55) 129.4296 (44.06) 1,569.5480 (73.83) 100.0898 (134.13) 54;36 621.4161 (0.01) 467 1 +test_cramjam_snappy_de_compress_into[urls.10K-decompress_into] 612.5320 (29.35) 1,063.2690 (19.35) 670.1230 (30.21) 62.6793 (21.34) 654.0500 (30.77) 43.8632 (58.78) 104;89 1,492.2634 (0.03) 1135 1 +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ``` #### Lz4 diff --git a/benchmarks/test_bench.py b/benchmarks/test_bench.py index 83fe2cef..9cfae1ee 100644 --- a/benchmarks/test_bench.py +++ b/benchmarks/test_bench.py @@ -12,6 +12,31 @@ ] +class FiftyFourMbRepeating: + """ + 54mb of data, where the first 54bytes are repeated 1000000 times. + """ + + name = "fifty-four-mb-repeating" + + def read_bytes(self): + return b"oh what a beautiful morning, oh what a beautiful day!!" * 1000000 + + +class FiftyFourMbRandom: + """ + 54mb of data, all random + """ + + name = "fifty-four-mb-random" + + def read_bytes(self): + return np.random.randint(0, 255, size=54000000, dtype=np.uint8).tobytes() + + +FILES.extend([FiftyFourMbRepeating(), FiftyFourMbRandom()]) + + def round_trip(compress, decompress, data, **kwargs): return decompress(compress(data, **kwargs)) @@ -71,6 +96,7 @@ def test_snappy_framed(benchmark, file, use_cramjam: bool): data=data, ) + @pytest.mark.parametrize("op", ("decompress_into", "compress_into")) @pytest.mark.parametrize("file", FILES, ids=lambda val: val.name) def test_cramjam_snappy_de_compress_into(benchmark, op, file): diff --git a/src/lib.rs b/src/lib.rs index c5a42c5b..8f3c03cf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -275,4 +275,25 @@ mod tests { test_variant!(brotli, compressed_len = 729, level = None); test_variant!(deflate, compressed_len = 157174, level = None); test_variant!(zstd, compressed_len = 4990, level = None); + + #[test] + fn test_snappy_raw_into_round_trip() { + let data = gen_data(); + let max_compress_len = snap::raw::max_compress_len(data.len()); + let mut compressed_buffer = vec![0; max_compress_len]; + + let n_bytes = + crate::snappy::internal::compress_raw_into(&data, &mut Cursor::new(&mut compressed_buffer)).unwrap(); + assert_eq!(n_bytes, 2563328); // raw compressed len + + let decompress_len = snap::raw::decompress_len(&compressed_buffer[..n_bytes]).unwrap(); + let mut decompressed_buffer = vec![0; decompress_len]; + let n_bytes = crate::snappy::internal::decompress_raw_into( + &compressed_buffer[..n_bytes], + &mut Cursor::new(&mut decompressed_buffer), + ) + .unwrap(); + assert_eq!(n_bytes, data.len()); + assert_eq!(&data, &decompressed_buffer[..n_bytes]); + } } diff --git a/src/snappy.rs b/src/snappy.rs index fb443f89..e7074eff 100644 --- a/src/snappy.rs +++ b/src/snappy.rs @@ -14,6 +14,10 @@ pub fn init_py_module(m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(decompress_raw, m)?)?; m.add_function(wrap_pyfunction!(compress_into, m)?)?; m.add_function(wrap_pyfunction!(decompress_into, m)?)?; + m.add_function(wrap_pyfunction!(compress_raw_into, m)?)?; + m.add_function(wrap_pyfunction!(decompress_raw_into, m)?)?; + m.add_function(wrap_pyfunction!(compress_raw_max_len, m)?)?; + m.add_function(wrap_pyfunction!(decompress_raw_len, m)?)?; Ok(()) } @@ -53,14 +57,22 @@ pub fn compress<'a>(py: Python<'a>, data: BytesType<'a>, output_len: Option(py: Python<'a>, data: BytesType<'a>) -> PyResult> { + let output_len = to_py_err!(DecompressionError -> snap::raw::decompress_len(data.as_bytes()))?; + match data { - BytesType::Bytes(input) => { - let out = to_py_err!(DecompressionError -> self::internal::decompress_raw(input.as_bytes()))?; - Ok(BytesType::Bytes(PyBytes::new(py, &out))) + BytesType::Bytes(_) => { + let pybytes = PyBytes::new_with(py, output_len, |output| { + to_py_err!(DecompressionError -> self::internal::decompress_raw_into(data.as_bytes(), &mut Cursor::new(output)))?; + Ok(()) + })?; + Ok(BytesType::Bytes(pybytes)) } - BytesType::ByteArray(input) => { - let out = to_py_err!(DecompressionError -> self::internal::decompress_raw(unsafe { input.as_bytes() }))?; - Ok(BytesType::ByteArray(PyByteArray::new(py, &out))) + BytesType::ByteArray(_) => { + let pybytes = PyByteArray::new_with(py, output_len, |output| { + to_py_err!(DecompressionError -> self::internal::decompress_raw_into(data.as_bytes(), &mut Cursor::new(output)))?; + Ok(()) + })?; + Ok(BytesType::ByteArray(pybytes)) } } } @@ -75,14 +87,25 @@ pub fn decompress_raw<'a>(py: Python<'a>, data: BytesType<'a>) -> PyResult(py: Python<'a>, data: BytesType<'a>) -> PyResult> { + let output_len = snap::raw::max_compress_len(data.len()); + match data { - BytesType::Bytes(input) => { - let out = to_py_err!(CompressionError -> self::internal::compress_raw(input.as_bytes()))?; - Ok(BytesType::Bytes(PyBytes::new(py, &out))) + BytesType::Bytes(_) => { + let mut output = vec![0; output_len]; + let n_bytes = to_py_err!(CompressionError -> self::internal::compress_raw_into(data.as_bytes(), &mut Cursor::new(output.as_mut_slice())))?; + output.truncate(n_bytes); + Ok(BytesType::Bytes(PyBytes::new(py, &output))) } - BytesType::ByteArray(input) => { - let out = to_py_err!(CompressionError -> self::internal::compress_raw(unsafe { input.as_bytes() }))?; - Ok(BytesType::ByteArray(PyByteArray::new(py, &out))) + BytesType::ByteArray(_) => { + let mut actual_size = 0; + let pybytes = PyByteArray::new_with(py, output_len, |output| { + let mut cursor = Cursor::new(output); + actual_size = + to_py_err!(CompressionError -> self::internal::compress_raw_into(data.as_bytes(), &mut cursor))?; + Ok(()) + })?; + pybytes.resize(actual_size)?; + Ok(BytesType::ByteArray(pybytes)) } } } @@ -99,21 +122,49 @@ pub fn decompress_into<'a>(_py: Python<'a>, data: BytesType<'a>, array: &'a PyAr crate::generic_into!(decompress(data -> array)) } +/// Compress raw format directly into an output buffer +#[pyfunction] +pub fn compress_raw_into<'a>(_py: Python<'a>, data: BytesType<'a>, array: &PyArray1) -> PyResult { + crate::generic_into!(compress_raw_into(data -> array)) +} + +/// Decompress raw format directly into an output buffer +#[pyfunction] +pub fn decompress_raw_into<'a>(_py: Python<'a>, data: BytesType<'a>, array: &PyArray1) -> PyResult { + crate::generic_into!(decompress_raw_into(data -> array)) +} + +/// Get the expected max compressed length for snappy raw compression; this is the size +/// of buffer that should be passed to `compress_raw_into` +#[pyfunction] +pub fn compress_raw_max_len<'a>(_py: Python<'a>, data: BytesType<'a>) -> usize { + snap::raw::max_compress_len(data.len()) +} + +/// Get the decompressed length for the given data. This is the size of buffer +/// that should be passed to `decompress_raw_into` +#[pyfunction] +pub fn decompress_raw_len<'a>(_py: Python<'a>, data: BytesType<'a>) -> PyResult { + to_py_err!(DecompressionError -> snap::raw::decompress_len(data.as_bytes())) +} + pub(crate) mod internal { use snap::raw::{Decoder, Encoder}; use snap::read::{FrameDecoder, FrameEncoder}; - use std::io::{Error, Write}; + use std::io::{Cursor, Error, Write}; - /// Decompress snappy data raw - pub fn decompress_raw(data: &[u8]) -> Result, snap::Error> { + /// Decompress snappy data raw into a mutable slice + pub fn decompress_raw_into(input: &[u8], output: &mut Cursor<&mut [u8]>) -> Result { let mut decoder = Decoder::new(); - decoder.decompress_vec(data) + let buffer = output.get_mut(); + decoder.decompress(input, *buffer) } - /// Compress snappy data raw - pub fn compress_raw(data: &[u8]) -> Result, snap::Error> { + /// Compress snappy data raw into a mutable slice + pub fn compress_raw_into(input: &[u8], output: &mut Cursor<&mut [u8]>) -> Result { let mut encoder = Encoder::new(); - encoder.compress_vec(data) + let buffer = output.get_mut(); + encoder.compress(input, buffer) } /// Decompress snappy data framed diff --git a/tests/test_variants.py b/tests/test_variants.py index 5ab8b37e..026e0b9d 100644 --- a/tests/test_variants.py +++ b/tests/test_variants.py @@ -57,3 +57,28 @@ def test_variants_de_compress_into(variant_str): decompressed_size = variant.decompress_into(compressed, decompressed_array) decompressed = decompressed_array[:decompressed_size].tobytes() assert same_same(decompressed, data) + + +def test_variant_snappy_raw_into(): + """ + A little more special than other de/compress_into variants, as the underlying + snappy raw api makes a hard expectation that its calculated len is used. + """ + data = b"oh what a beautiful morning, oh what a beautiful day!!" * 1000000 + + compressed_size = cramjam.snappy.compress_raw_max_len(data) + compressed_buffer = np.zeros(compressed_size, dtype=np.uint8) + n_bytes = cramjam.snappy.compress_raw_into(data, compressed_buffer) + assert n_bytes == 2563328 + + decompressed_size = cramjam.snappy.decompress_raw_len( + compressed_buffer[:n_bytes].tobytes() + ) + assert decompressed_size == len(data) + decompressed_buffer = np.zeros(decompressed_size, dtype=np.uint8) + n_bytes = cramjam.snappy.decompress_raw_into( + compressed_buffer[:n_bytes].tobytes(), decompressed_buffer + ) + assert n_bytes == len(data) + + assert same_same(decompressed_buffer[:n_bytes], data)