-
Notifications
You must be signed in to change notification settings - Fork 241
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Introduce ParsingOptions.FilterProvider and BaseFilterProvider and ma…
…ke CcittFaxCompressionType a byte
- Loading branch information
Showing
8 changed files
with
242 additions
and
81 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
namespace UglyToad.PdfPig.Tests.Integration | ||
{ | ||
using PdfPig.Filters; | ||
using PdfPig.Tokens; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
|
||
public class FilterTests | ||
{ | ||
private static readonly Lazy<string> DocumentFolder = new Lazy<string>(() => Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"))); | ||
private static readonly HashSet<string> _documentsToIgnore = | ||
[ | ||
"issue_671.pdf", | ||
"GHOSTSCRIPT-698363-0.pdf", | ||
"ErcotFacts.pdf" | ||
]; | ||
|
||
[Theory] | ||
[MemberData(nameof(GetAllDocuments))] | ||
public void NoImageDecoding(string documentName) | ||
{ | ||
// Add the full path back on, we removed it so we could see it in the test explorer. | ||
documentName = Path.Combine(DocumentFolder.Value, documentName); | ||
|
||
var parsingOptions = new ParsingOptions | ||
{ | ||
UseLenientParsing = true, | ||
FilterProvider = MyFilterProvider.Instance | ||
}; | ||
|
||
using (var document = PdfDocument.Open(documentName, parsingOptions)) | ||
{ | ||
for (var i = 0; i < document.NumberOfPages; i++) | ||
{ | ||
var page = document.GetPage(i + 1); | ||
|
||
foreach (var pdfImage in page.GetImages()) | ||
{ | ||
if (pdfImage.ImageDictionary.TryGet(NameToken.Filter, out NameToken filter)) | ||
{ | ||
if (filter.Data.Equals(NameToken.FlateDecode.Data) || | ||
filter.Data.Equals(NameToken.FlateDecodeAbbreviation.Data) || | ||
filter.Data.Equals(NameToken.LzwDecode.Data) || | ||
filter.Data.Equals(NameToken.LzwDecodeAbbreviation.Data)) | ||
{ | ||
continue; | ||
} | ||
} | ||
else | ||
{ | ||
continue; | ||
} | ||
|
||
Assert.False(pdfImage.TryGetPng(out _)); | ||
} | ||
} | ||
} | ||
} | ||
|
||
public sealed class NoFilter : IFilter | ||
{ | ||
public bool IsSupported => false; | ||
|
||
public ReadOnlyMemory<byte> Decode(ReadOnlySpan<byte> input, DictionaryToken streamDictionary, int filterIndex) | ||
{ | ||
throw new NotImplementedException(); | ||
} | ||
} | ||
|
||
public class MyFilterProvider : BaseFilterProvider | ||
{ | ||
/// <summary> | ||
/// The single instance of this provider. | ||
/// </summary> | ||
public static readonly IFilterProvider Instance = new MyFilterProvider(); | ||
|
||
/// <inheritdoc/> | ||
protected MyFilterProvider() : base(GetDictionary()) | ||
{ | ||
} | ||
|
||
private static Dictionary<string, IFilter> GetDictionary() | ||
{ | ||
var ascii85 = new Ascii85Filter(); | ||
var asciiHex = new AsciiHexDecodeFilter(); | ||
var flate = new FlateFilter(); | ||
var runLength = new RunLengthFilter(); | ||
var lzw = new LzwFilter(); | ||
|
||
var noFilter = new NoFilter(); | ||
|
||
return new Dictionary<string, IFilter> | ||
{ | ||
{ NameToken.Ascii85Decode.Data, ascii85 }, | ||
{ NameToken.Ascii85DecodeAbbreviation.Data, ascii85 }, | ||
{ NameToken.AsciiHexDecode.Data, asciiHex }, | ||
{ NameToken.AsciiHexDecodeAbbreviation.Data, asciiHex }, | ||
{ NameToken.CcittfaxDecode.Data, noFilter }, | ||
{ NameToken.CcittfaxDecodeAbbreviation.Data, noFilter }, | ||
{ NameToken.DctDecode.Data, noFilter }, | ||
{ NameToken.DctDecodeAbbreviation.Data, noFilter }, | ||
{ NameToken.FlateDecode.Data, flate }, | ||
{ NameToken.FlateDecodeAbbreviation.Data, flate }, | ||
{ NameToken.Jbig2Decode.Data, noFilter }, | ||
{ NameToken.JpxDecode.Data, noFilter }, | ||
{ NameToken.RunLengthDecode.Data, runLength }, | ||
{ NameToken.RunLengthDecodeAbbreviation.Data, runLength }, | ||
{NameToken.LzwDecode, lzw }, | ||
{NameToken.LzwDecodeAbbreviation, lzw } | ||
}; | ||
} | ||
} | ||
|
||
public static IEnumerable<object[]> GetAllDocuments | ||
{ | ||
get | ||
{ | ||
var files = Directory.GetFiles(DocumentFolder.Value, "*.pdf"); | ||
|
||
// Return the shortname so we can see it in the test explorer. | ||
return files.Where(x => !_documentsToIgnore.Any(i => x.EndsWith(i))).Select(x => new object[] { Path.GetFileName(x) }); | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
namespace UglyToad.PdfPig.Filters | ||
{ | ||
using Core; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using Tokens; | ||
using Util; | ||
|
||
/// <summary> | ||
/// Base abstract class for FilterProvider. | ||
/// </summary> | ||
public abstract class BaseFilterProvider : IFilterProvider | ||
{ | ||
/// <summary> | ||
/// Dictionary of filters. | ||
/// </summary> | ||
protected readonly IReadOnlyDictionary<string, IFilter> FilterInstances; | ||
|
||
/// <summary> | ||
/// Create a new <see cref="BaseFilterProvider"/> with the given filters. | ||
/// </summary> | ||
/// <param name="filterInstances"></param> | ||
protected BaseFilterProvider(IReadOnlyDictionary<string, IFilter> filterInstances) | ||
{ | ||
FilterInstances = filterInstances; | ||
} | ||
|
||
/// <inheritdoc /> | ||
public IReadOnlyList<IFilter> GetFilters(DictionaryToken dictionary) | ||
{ | ||
if (dictionary is null) | ||
{ | ||
throw new ArgumentNullException(nameof(dictionary)); | ||
} | ||
|
||
var token = dictionary.GetObjectOrDefault(NameToken.Filter, NameToken.F); | ||
if (token is null) | ||
{ | ||
return Array.Empty<IFilter>(); | ||
} | ||
|
||
switch (token) | ||
{ | ||
case ArrayToken filters: | ||
var result = new IFilter[filters.Data.Count]; | ||
for (var i = 0; i < filters.Data.Count; i++) | ||
{ | ||
var filterToken = filters.Data[i]; | ||
var filterName = ((NameToken)filterToken).Data; | ||
result[i] = GetFilterStrict(filterName); | ||
} | ||
|
||
return result; | ||
case NameToken name: | ||
return new[] { GetFilterStrict(name.Data) }; | ||
default: | ||
throw new PdfDocumentFormatException($"The filter for the stream was not a valid object. Expected name or array, instead got: {token}."); | ||
} | ||
} | ||
|
||
/// <inheritdoc /> | ||
public IReadOnlyList<IFilter> GetNamedFilters(IReadOnlyList<NameToken> names) | ||
{ | ||
if (names is null) | ||
{ | ||
throw new ArgumentNullException(nameof(names)); | ||
} | ||
|
||
var result = new List<IFilter>(); | ||
|
||
foreach (var name in names) | ||
{ | ||
result.Add(GetFilterStrict(name)); | ||
} | ||
|
||
return result; | ||
} | ||
|
||
private IFilter GetFilterStrict(string name) | ||
{ | ||
if (!FilterInstances.TryGetValue(name, out var factory)) | ||
{ | ||
throw new NotSupportedException($"The filter with the name {name} is not supported yet. Please raise an issue."); | ||
} | ||
|
||
return factory; | ||
} | ||
|
||
/// <inheritdoc /> | ||
public IReadOnlyList<IFilter> GetAllFilters() | ||
{ | ||
return FilterInstances.Values.Distinct().ToList(); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters