Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ignore missing metadata group length field #269

Merged
merged 11 commits into from
Jun 17, 2023
17 changes: 13 additions & 4 deletions parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,10 +222,11 @@ type ParseOption func(*parseOptSet)

// parseOptSet represents the flattened option set after all ParseOptions have been applied.
type parseOptSet struct {
skipMetadataReadOnNewParserInit bool
allowMismatchPixelDataLength bool
skipPixelData bool
skipProcessingPixelDataValue bool
skipMetadataReadOnNewParserInit bool
allowMismatchPixelDataLength bool
skipPixelData bool
skipProcessingPixelDataValue bool
allowErrorMetaElementGroupLength bool
faustoespinal-philips marked this conversation as resolved.
Show resolved Hide resolved
}

func toParseOptSet(opts ...ParseOption) parseOptSet {
Expand All @@ -243,6 +244,14 @@ func AllowMismatchPixelDataLength() ParseOption {
}
}

// AllowMissingMetaElementGroupLength allows parser to work around missing metaelement group length tag (0x0002,0x0000) by reading elements only
// in group 2.
func AllowMissingMetaElementGroupLength() ParseOption {
return func(set *parseOptSet) {
set.allowErrorMetaElementGroupLength = true
}
}

// SkipMetadataReadOnNewParserInit makes NewParser skip trying to parse metadata. This will make the Parser default to implicit little endian byte order.
// Any metatata tags found in the dataset will still be available when parsing.
func SkipMetadataReadOnNewParserInit() ParseOption {
Expand Down
8 changes: 8 additions & 0 deletions parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,14 @@ func TestParseFile_SkipProcessingPixelDataValue(t *testing.T) {
}
})
})
faustoespinal-philips marked this conversation as resolved.
Show resolved Hide resolved
t.Run("WithAllowErrorMetaElementGroupLength", func(t *testing.T) {
runForEveryTestFile(t, func(t *testing.T, filename string) {
dataset, err := dicom.ParseFile(filename, nil, dicom.AllowMissingMetaElementGroupLength())
if err != nil {
t.Errorf("Unexpected error parsing dataset: %v", dataset)
}
})
})
}

// BenchmarkParse runs sanity benchmarks over the sample files in testdata.
Expand Down
68 changes: 45 additions & 23 deletions read.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,6 @@ func (r *reader) readTag() (*tag.Tag, error) {
if gerr == nil && eerr == nil {
return &tag.Tag{Group: group, Element: element}, nil
}
// If the error is an io.EOF, return the error directly instead of the compound error later.
faustoespinal-philips marked this conversation as resolved.
Show resolved Hide resolved
// Once we target go1.20 we can use errors.Join: https://pkg.go.dev/errors#Join
if errors.Is(gerr, io.EOF) {
return nil, gerr
}
if errors.Is(eerr, io.EOF) {
return nil, eerr
}
return nil, fmt.Errorf("error reading tag: %v %v", gerr, eerr)
}

Expand Down Expand Up @@ -167,28 +159,58 @@ func (r *reader) readHeader() ([]*Element, error) {
return nil, err
}

metaElems := []*Element{maybeMetaLen} // TODO: maybe set capacity to a reasonable initial size
metaElementGroupLengthDefined := true
if maybeMetaLen.Tag != tag.FileMetaInformationGroupLength || maybeMetaLen.Value.ValueType() != Ints {
return nil, ErrorMetaElementGroupLength
// MetaInformationGroupLength is not present or of the wrong value type.
if !r.opts.allowErrorMetaElementGroupLength {
faustoespinal-philips marked this conversation as resolved.
Show resolved Hide resolved
return nil, ErrorMetaElementGroupLength
}
metaElementGroupLengthDefined = false
}

metaLen := maybeMetaLen.Value.GetValue().([]int)[0]
if metaElementGroupLengthDefined {
metaLen := maybeMetaLen.Value.GetValue().([]int)[0]

metaElems := []*Element{maybeMetaLen} // TODO: maybe set capacity to a reasonable initial size

// Read the metadata elements
err = r.rawReader.PushLimit(int64(metaLen))
if err != nil {
return nil, err
}
defer r.rawReader.PopLimit()
for !r.rawReader.IsLimitExhausted() {
elem, err := r.readElement(nil, nil)
// Read the metadata elements
err = r.rawReader.PushLimit(int64(metaLen))
if err != nil {
// TODO: see if we can skip over malformed elements somehow
return nil, err
}
// log.Printf("Metadata Element: %s\n", elem)
metaElems = append(metaElems, elem)
defer r.rawReader.PopLimit()
for !r.rawReader.IsLimitExhausted() {
elem, err := r.readElement(nil, nil)
if err != nil {
// TODO: see if we can skip over malformed elements somehow
return nil, err
}
// log.Printf("Metadata Element: %s\n", elem)
metaElems = append(metaElems, elem)
}
} else {
// We cannot use the limit functionality
debug.Log("Proceeding without metadata group length")
for {
// Lets peek into the tag field until we get to end-of-header
tag_bytes, err := r.rawReader.Peek(4)
faustoespinal-philips marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, ErrorMetaElementGroupLength
}
tg := tag.Tag{}
buff := bytes.NewBuffer(tag_bytes)
binary.Read(buff, binary.LittleEndian, &tg)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what would happen here if in the future an extra field were added to the tag struct in the future? It also sounds like unexported fields may lead to Read panicing (if we were to add on in the future)

It's a bit more verbose, but we may want to consider something like

if err := binary.Read(buff, binary.LittleEndian, &tg.Group); err != nil {
// handle err
}
if err := binary.Read(buff, binary.LittleEndian, &tg.Element); err != nil {
//handle err
}

Also don't forget to handle the errors on binary.Read as well :).

WDYT?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Certainly I can change it. I guess your point is that tag could change in a way that makes it incompatible with direct mapping from the DICOM header...

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, structs can/do change and if it did, it could lead to an unintentional breakage here. If the tests are good enough it's possible that regression would be protected, but it seems safer to do the slightly more verbose way to me for now!

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we make this update? Thanks!

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, not sure if this update was applied (it's not showing up for me). basically instead of reading into the tag.Tag directly, reading each field of the tag

if err := binary.Read(buff, binary.LittleEndian, &tg.Group); err != nil {
// handle err
}
if err := binary.Read(buff, binary.LittleEndian, &tg.Element); err != nil {
//handle err
}

debug.Logf("header-tag: %v", tg)
// Only read group 2 data
if tg.Group != 0x0002 {
break
}
elem, err := r.readElement(nil, nil)
if err != nil {
// TODO: see if we can skip over malformed elements somehow
return nil, err
}
metaElems = append(metaElems, elem)
}
}
return metaElems, nil
}
Expand Down
135 changes: 135 additions & 0 deletions read_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,141 @@ func TestReadPixelData_SkipPixelData(t *testing.T) {
}
}

// Used to encode the data from the generated headers.
type headerData struct {
// The byte encoded header data.
HeaderBytes *bytes.Buffer
// The decoded elements conforming the header.
Elements []*Element
}

// Write a collection of elements and return them as an encoded buffer of bytes.
func writeElements(elements []*Element) ([]byte, error) {
buff := bytes.Buffer{}
dcmWriter := NewWriter(&buff)
dcmWriter.SetTransferSyntax(binary.LittleEndian, true)

for _, e := range elements {
err := dcmWriter.WriteElement(e)
if err != nil {
return nil, err
}
}
data := buff.Bytes()
return data, nil
}

// Returns a fake DICOM group 2 header with the FileMetaInformationGroupLength tag missing (0x0002,0x0000).
func headerWithNoFileMetaInformationGroupLength() (*headerData, error) {
headerData := new(headerData)

elements := []*Element{
mustNewElement(tag.MediaStorageSOPClassUID, []string{"SecondaryCapture"}),
mustNewElement(tag.MediaStorageSOPInstanceUID, []string{"1.3.6.1.4.1.35190.4.1.20210608.607733549593"}),
mustNewElement(tag.TransferSyntaxUID, []string{"=RLELossless"}),
mustNewElement(tag.ImplementationClassUID, []string{"1.6.6.1.4.1.9590.100.1.0.100.4.0"}),
mustNewElement(tag.SOPInstanceUID, []string{"1.3.6.1.4.1.35190.4.1.20210608.607733549593"}),
}
data, err := writeElements(elements)
if err != nil {
return nil, err
}

// Construct valid DICOM header preamble.
magicWord := []byte("DICM")
preamble := make([]byte, 128)
preamble = append(preamble, magicWord...)
headerBytes := append(preamble, data...)
headerData.HeaderBytes = bytes.NewBuffer(headerBytes)
headerData.Elements = elements[0 : len(elements)-1]
return headerData, nil
}

// Returns a fake DICOM group 2 header with a FileMetaInformationGroupLength tag (0x0002,0x0000).
func headerWithFileMetaInformationGroupLength() (*headerData, error) {
headerData := new(headerData)

sopInstanceUidElement := mustNewElement(tag.SOPInstanceUID, []string{"1.3.6.1.4.1.35190.4.1.20210608.607733549593"})
elements := []*Element{
mustNewElement(tag.FileMetaInformationVersion, []byte{0x00, 0x01}),
mustNewElement(tag.MediaStorageSOPClassUID, []string{"1.2.276.0.7230010.3.1.0.1"}),
mustNewElement(tag.MediaStorageSOPInstanceUID, []string{"1.3.6.1.4.1.35190.4.1.20210608.607733549593"}),
mustNewElement(tag.TransferSyntaxUID, []string{"=RLELossless"}),
mustNewElement(tag.ImplementationClassUID, []string{"1.2.276.0.7230010.3.0.3.6.7"}),
mustNewElement(tag.ImplementationVersionName, []string{"OFFIS_DCMTK_367"}),
}
dataHeader, err := writeElements(elements)
if err != nil {
return nil, err
}
fileMetaInfoElement := mustNewElement(tag.FileMetaInformationGroupLength, []int{len(dataHeader)})
dataFileMetaInfo, err := writeElements([]*Element{fileMetaInfoElement})
if err != nil {
return nil, err
}
dataSopInstanceUid, err := writeElements([]*Element{sopInstanceUidElement})
if err != nil {
return nil, err
}
data := append(dataFileMetaInfo, dataHeader...)
data = append(data, dataSopInstanceUid...)

// Construct valid DICOM header preamble.
magicWord := []byte("DICM")
preamble := make([]byte, 128)
preamble = append(preamble, magicWord...)
headerBytes := append(preamble, data...)
headerData.HeaderBytes = bytes.NewBuffer(headerBytes)
headerData.Elements = append([]*Element{fileMetaInfoElement}, elements...)
return headerData, nil
}

func TestReadHeader_TryAllowErrorMetaElementGroupLength(t *testing.T) {
opts := parseOptSet{allowErrorMetaElementGroupLength: true}

t.Run("NoFileMetaInformationGroupLength", func(t *testing.T) {
dcmheaderNoInfoGrpLen, err := headerWithNoFileMetaInformationGroupLength()
if err != nil {
t.Fatalf("unsuccesful generation of fake header data")
} else {
r := &reader{
rawReader: dicomio.NewReader(bufio.NewReader(dcmheaderNoInfoGrpLen.HeaderBytes), binary.LittleEndian, int64(dcmheaderNoInfoGrpLen.HeaderBytes.Len())),
opts: opts,
}
r.rawReader.SetTransferSyntax(binary.LittleEndian, true)
parsedElements, err := r.readHeader()
faustoespinal-philips marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
t.Errorf("unsuccessful readHeader when parse option %v is turned on and header has no MetaElementGroupLength tag", opts.allowErrorMetaElementGroupLength)
}
// Ensure dataset read from readHeader and the test header are the same except for the ValueLength field.
if diff := cmp.Diff(parsedElements, dcmheaderNoInfoGrpLen.Elements, cmp.AllowUnexported(allValues...), cmpopts.IgnoreFields(Element{}, "ValueLength")); diff != "" {
t.Errorf("Elements parsed from test header do not match: %v", diff)
}
}
})

t.Run("WithFileMetaInformationGroupLength", func(t *testing.T) {
dcmHeaderInfoGrpLen, err := headerWithFileMetaInformationGroupLength()
if err != nil {
t.Fatalf("unsuccesful generation of fake header data with FileMetaInformationGroupLength")
} else {
r := &reader{
rawReader: dicomio.NewReader(bufio.NewReader(dcmHeaderInfoGrpLen.HeaderBytes), binary.LittleEndian, int64(dcmHeaderInfoGrpLen.HeaderBytes.Len())),
opts: opts,
}
r.rawReader.SetTransferSyntax(binary.LittleEndian, true)
parsedElements, err := r.readHeader()
if err != nil {
t.Errorf("unsuccesful readHeader when parse option %v is turned on and header has no MetaElementGroupLength tag", opts.allowErrorMetaElementGroupLength)
}
// Ensure dataset read from readHeader and the test header are the same except for the ValueLength field.
if diff := cmp.Diff(parsedElements, dcmHeaderInfoGrpLen.Elements, cmp.AllowUnexported(allValues...), cmpopts.IgnoreFields(Element{}, "ValueLength")); diff != "" {
t.Errorf("Elements parsed from test header do not match: %v", diff)
}
}
})
}

func TestReadPixelData_TrySkipProcessingPixelDataValue(t *testing.T) {
opts := parseOptSet{skipProcessingPixelDataValue: true}
valueBytes := []byte{1, 2, 3, 4, 5, 6}
Expand Down