From 68000aa262388b22d58e0381998cf25f4efa3dc6 Mon Sep 17 00:00:00 2001 From: mondrake Date: Tue, 23 Jan 2024 17:25:13 +0100 Subject: [PATCH] Dev 240122 (#78) --- .github/workflows/tests.yml | 2 +- specs/Jpeg/Jpeg.yaml | 2 + specs/Media.yaml | 1 - specs/Tiff/Tiff.yaml | 2 + src/Block/Exif/Exif.php | 9 +- src/Block/Jpeg.php | 154 +-------------------------------- src/Block/Tiff.php | 138 ----------------------------- src/Command/CompileCommand.php | 1 + src/ItemDefinition.php | 2 +- src/Media.php | 20 ++--- src/MediaTypeResolver.php | 12 +-- src/Model/BlockBase.php | 29 +++---- src/Parser/Jpeg/Jpeg.php | 149 +++++++++++++++++++++++++++++++ src/Parser/ParserBase.php | 14 +++ src/Parser/Tiff/Tiff.php | 147 +++++++++++++++++++++++++++++++ 15 files changed, 354 insertions(+), 328 deletions(-) create mode 100644 src/Parser/Jpeg/Jpeg.php create mode 100644 src/Parser/ParserBase.php create mode 100644 src/Parser/Tiff/Tiff.php diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 234da600c..1be551dd0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -51,7 +51,7 @@ jobs: php bin/fileeye-mediaprobe --version # Remove the comment to the lines below during development, to let # re-compile the specification PHP file from the YAML files. - # php bin/fileeye-mediaprobe compile + php bin/fileeye-mediaprobe compile - name: Image file dumps continue-on-error: true diff --git a/specs/Jpeg/Jpeg.yaml b/specs/Jpeg/Jpeg.yaml index 5d1ad4452..0187cef76 100644 --- a/specs/Jpeg/Jpeg.yaml +++ b/specs/Jpeg/Jpeg.yaml @@ -1,6 +1,8 @@ collection: Jpeg\Jpeg title: JPEG image class: FileEye\MediaProbe\Block\Jpeg +parser: FileEye\MediaProbe\Parser\Jpeg\Jpeg +writer: FileEye\MediaProbe\Writer\Jpeg\Jpeg DOMNode: jpeg items: 0xC0: diff --git a/specs/Media.yaml b/specs/Media.yaml index fde60d234..8c301a63a 100644 --- a/specs/Media.yaml +++ b/specs/Media.yaml @@ -1,4 +1,3 @@ collection: Media -class: FileEye\MediaProbe\Media DOMNode: media items: {} diff --git a/specs/Tiff/Tiff.yaml b/specs/Tiff/Tiff.yaml index 413782ac9..57b06c0f0 100644 --- a/specs/Tiff/Tiff.yaml +++ b/specs/Tiff/Tiff.yaml @@ -1,6 +1,8 @@ collection: Tiff\Tiff title: 'TIFF image data' class: FileEye\MediaProbe\Block\Tiff +parser: FileEye\MediaProbe\Parser\Tiff\Tiff +writer: FileEye\MediaProbe\Writer\Tiff\Tiff DOMNode: tiff items: 0: diff --git a/src/Block/Exif/Exif.php b/src/Block/Exif/Exif.php index be51e0e03..2c140a061 100644 --- a/src/Block/Exif/Exif.php +++ b/src/Block/Exif/Exif.php @@ -34,8 +34,13 @@ class Exif extends BlockBase protected function doParseData(DataElement $data): void { assert($this->debugInfo(['dataElement' => $data])); - if (Tiff::getTiffSegmentByteOrder($data, strlen(self::EXIF_HEADER)) !== null) { - $tiff = new ItemDefinition(CollectionFactory::get('Tiff\Tiff')); + + $tiff = new ItemDefinition( + collection: CollectionFactory::get('Tiff\Tiff'), + ); + $tiffParser = $tiff->collection->getPropertyValue('parser'); + + if ($tiffParser::getTiffSegmentByteOrder($data, strlen(self::EXIF_HEADER)) !== null) { $this->addBlock($tiff)->parseData($data, strlen(self::EXIF_HEADER), $data->getSize() - strlen(self::EXIF_HEADER)); } else { // We store the data as normal JPEG content if it could not be diff --git a/src/Block/Jpeg.php b/src/Block/Jpeg.php index 0961c43b3..d9ed4bd01 100644 --- a/src/Block/Jpeg.php +++ b/src/Block/Jpeg.php @@ -2,15 +2,15 @@ namespace FileEye\MediaProbe\Block; -use FileEye\MediaProbe\Model\BlockBase; use FileEye\MediaProbe\Collection\CollectionFactory; use FileEye\MediaProbe\Data\DataElement; use FileEye\MediaProbe\Data\DataException; +use FileEye\MediaProbe\Data\DataFormat; use FileEye\MediaProbe\Data\DataWindow; use FileEye\MediaProbe\Entry\Core\Undefined; use FileEye\MediaProbe\ItemDefinition; -use FileEye\MediaProbe\Data\DataFormat; use FileEye\MediaProbe\MediaProbe; +use FileEye\MediaProbe\Model\BlockBase; use FileEye\MediaProbe\Utility\ConvertBytes; /** @@ -27,154 +27,4 @@ class Jpeg extends BlockBase * JPEG header. */ const JPEG_HEADER = "\xFF\xD8\xFF"; - - /** - * {@inheritdoc} - */ - protected function doParseData(DataElement $data): void - { - assert($this->debugInfo(['dataElement' => $data])); - - // JPEG data is stored in big-endian format. - $data->setByteOrder(ConvertBytes::BIG_ENDIAN); - - // Run through the data to parse the segments in the image. After each - // segment is parsed, the offset will be moved forward, and after the - // last segment we will terminate. - $offset = 0; - while ($offset < $data->getSize()) { - // Get the next JPEG segment id offset. - try { - $new_offset = $this->getJpegSegmentIdOffset($data, $offset); - $segment_id = $segment_id ?? 0; - if ($new_offset !== $offset) { - // Add any trailing data from previous segment in a - // RawData block. - $this->error('Unexpected data found at end of JPEG segment {id}/{hexid} @ offset {offset}, size {size}', [ - 'id' => $segment_id, - 'hexid' => '0x' . strtoupper(dechex($segment_id)), - 'offset' => $data->getAbsoluteOffset($offset), - 'size' => $new_offset - $offset, - ]); - $trail = new ItemDefinition( - CollectionFactory::get('RawData', ['name' => 'trail']), - DataFormat::BYTE, - $offset - ); - $this->addBlock($trail)->parseData($data, $offset, $new_offset - $offset); - } - $offset = $new_offset; - } catch (DataException $e) { - $this->error($e->getMessage()); - return; - } - - // Get the JPEG segment id. - $segment_id = $data->getByte($offset + 1); - - // Warn if an unidentified segment is detected. - if (!in_array($segment_id, $this->getCollection()->listItemIds())) { - $this->warning('Invalid JPEG marker {id}/{hexid} found @ offset {offset}', [ - 'id' => $segment_id, - 'hexid' => '0x' . strtoupper(dechex($segment_id)), - 'offset' => $data->getAbsoluteOffset($offset), - ]); - } - - // Get the JPEG segment size. - $segment_collection = $this->getCollection()->getItemCollection($segment_id); - switch ($segment_collection->getPropertyValue('payload')) { - case 'none': - // The data window size is the JPEG delimiter byte and the - // segment identifier byte. - $segment_size = 2; - break; - case 'variable': - // Read the length of the segment. The data window size - // includes the JPEG delimiter byte, the segment identifier - // byte and two bytes used to store the segment length. - $segment_size = $data->getShort($offset + 2) + 2; - break; - case 'fixed': - // The data window size includes the JPEG delimiter byte - // and the segment identifier byte. - $segment_size = $segment_collection->getPropertyValue('components') + 2; - break; - case 'scan': - // In case of image scan segment, the window is to the end - // of the data. - $segment_size = null; - break; - } - - // Parse the MediaProbe JPEG segment data. - $segment_definition = new ItemDefinition($segment_collection); - $segment = $this->addBlock($segment_definition); - $segment->parseData($data, $offset, $segment_size); - - // Position to end of the segment. - $offset += $segment->getSize(); - } - - // Fail if SOS is missing. - if (!$this->getElement("jpegSegment[@name='SOS']")) { - $this->error('Missing SOS (Start Of Scan) JPEG marker'); - } - - // Fail if EOI is missing. - if (!$this->getElement("jpegSegment[@name='EOI']")) { - $this->error('Missing EOI (End Of Image) JPEG marker'); - } - } - - /** - * Determines the offset where the next JPEG segment id is found. - * - * JPEG sections start with 0xFF. The first byte that is not 0xFF is a - * marker (hopefully). - * - * @param DataElement $data_element - * The data element to be checked. - * @param int $offset - * The starting offset in the data element. - * - * @return int - * The found offset. - * - * @throws DataException - * In case of marker not found. - */ - protected function getJpegSegmentIdOffset(DataElement $data_element, int $offset): int - { - for ($i = $offset; $i < $offset + 128; $i++) { - if ($data_element->getByte($i) === Jpeg::JPEG_DELIMITER && $data_element->getByte($i + 1) !== Jpeg::JPEG_DELIMITER) { - return $i; - } - } - throw new DataException('JPEG marker not found @%d', $data_element->getAbsoluteOffset($offset)); - } - - /** - * Returns the MIME type of the image. - * - * @returns string - */ - public function getMimeType(): string - { - return 'image/jpeg'; - } - - /** - * Determines if the data is a JPEG image. - * - * @param DataElement $data_element - * The data element to be checked. - * - * @return bool - * TRUE if the data element is a JPEG image. - */ - public static function isDataMatchingFormat(DataElement $data_element): bool - { - return $data_element->getBytes(0, 3) === static::JPEG_HEADER; - } } diff --git a/src/Block/Tiff.php b/src/Block/Tiff.php index affd7009b..aa4763209 100644 --- a/src/Block/Tiff.php +++ b/src/Block/Tiff.php @@ -27,21 +27,9 @@ class Tiff extends BlockBase /** * The byte order of this TIFF segment. - * - * @var int */ protected int $byteOrder; - /** - * Returns the MIME type of the image. - * - * @returns string - */ - public function getMimeType(): string - { - return 'image/tiff'; - } - public function setByteOrder(int $byteOrder): self { $this->byteOrder = $byteOrder; @@ -53,84 +41,6 @@ public function getByteOrder(): int return $this->byteOrder; } - /** - * {@inheritdoc} - */ - protected function doParseData(DataElement $data): void - { - // Determine the byte order of the TIFF data. - $this->setByteOrder(self::getTiffSegmentByteOrder($data)); - $data->setByteOrder($this->getByteOrder()); - - assert($this->debugInfo(['dataElement' => $data])); - - // Starting IFD will be at offset 4 (2 bytes for byte order + 2 for header). - $ifd_offset = $data->getLong(4); - - // If the offset to first IFD is higher than 8, then there may be an - // image scan (TIFF) in between. Store that in a RawData block. - if ($ifd_offset > 8) { - $scan = new ItemDefinition( - CollectionFactory::get('RawData', ['name' => 'scan']), - DataFormat::BYTE, - $ifd_offset - 8 - ); - $this->addBlock($scan)->parseData($data, 8, $ifd_offset - 8); - } - - // Loops through IFDs. In fact we should only have IFD0 and IFD1. - for ($i = 0; $i <= 1; $i++) { - // Check data is accessible, warn otherwise. - if ($ifd_offset >= $data->getSize() || $ifd_offset + 4 > $data->getSize()) { - $this->warning( - 'Could not determine number of entries for {item}, overflow', - ['item' => $this->getCollection()->getItemCollection($i)->getPropertyValue('name')] - ); - continue; - } - - // Find number of tags in IFD and warn if not enough data to read them. - $ifd_tags_count = $data->getShort($ifd_offset); - if ($ifd_offset + $ifd_tags_count * 4 > $data->getSize()) { - $this->warning( - 'Invalid data for {item}', - ['item' => $this->getCollection()->getItemCollection($i)->getPropertyValue('name')] - ); - continue; - } - - // Create and load the IFDs. Note that the data element cannot - // be split in windows since any pointer will refer to the - // entire segment space. - $ifd_class = $this->getCollection()->getItemCollection($i)->getPropertyValue('class'); - $ifd_item = new ItemDefinition($this->getCollection()->getItemCollection($i), DataFormat::LONG, $ifd_tags_count, $ifd_offset, 0, $i); - $ifd = new $ifd_class($ifd_item, $this); - try { - $ifd->parseData($data); - } catch (DataException $e) { - $this->error('Error processing {ifd_name}: {msg}.', [ - 'ifd_name' => $this->getCollection()->getItemCollection($i)->getPropertyValue('name'), - 'msg' => $e->getMessage(), - ]); - continue; - } - - // Offset to next IFD. - $ifd_offset = $data->getLong($ifd_offset + $ifd_tags_count * 12 + 2); - - // If next IFD offset is 0 we are finished. - if ($ifd_offset === 0) { - break; - } - - // IFD1 shouldn't link further. - if ($i === 1) { - $this->error('IFD1 should not link to another IFD'); - break; - } - } - } - /** * {@inheritdoc} */ @@ -181,54 +91,6 @@ public function toBytes($order = ConvertBytes::LITTLE_ENDIAN, $offset = 0): stri return $bytes; } - /** - * Determines if the data is a TIFF image. - * - * @param DataElement $data_element - * The data element to be checked. - * - * @return bool - */ - public static function isDataMatchingFormat(DataElement $data_element): bool - { - return static::getTiffSegmentByteOrder($data_element) !== null; - } - - /** - * Returns the byte order of a TIFF segment. - * - * @return int|null - * The byte order of the TIFF segment in case data is a TIFF block, null - * otherwise. - */ - public static function getTiffSegmentByteOrder(DataElement $data_element, int $offset = 0): ?int - { - // There must be at least 8 bytes available: 2 bytes for the byte - // order, 2 bytes for the TIFF header, and 4 bytes for the offset to - // the first IFD. - if ($data_element->getSize() - $offset < 8) { - return null; - } - - // Byte order. - $order_string = $data_element->getBytes($offset, 2); - if ($order_string === 'II') { - $order = ConvertBytes::LITTLE_ENDIAN; - } elseif ($order_string === 'MM') { - $order = ConvertBytes::BIG_ENDIAN; - } else { - return null; - } - - // Verify the TIFF header. - $magic_string = $data_element->getBytes($offset + 2, 2); - if (ConvertBytes::toShort($magic_string, $order) !== self::TIFF_HEADER) { - return null; - } - - return $order; - } - public function collectInfo(array $context = []): array { $info = []; diff --git a/src/Command/CompileCommand.php b/src/Command/CompileCommand.php index 334223813..a3e81268d 100644 --- a/src/Command/CompileCommand.php +++ b/src/Command/CompileCommand.php @@ -41,6 +41,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int { $compiler = new SpecCompiler(); $compiler->compile($input->getArgument('spec-dir'), $input->getArgument('resource-dir')); + $output->writeln('Compile OK'); return(0); } } diff --git a/src/ItemDefinition.php b/src/ItemDefinition.php index d3b36b138..cfa415bb4 100644 --- a/src/ItemDefinition.php +++ b/src/ItemDefinition.php @@ -25,7 +25,7 @@ class ItemDefinition * The sequence of the item on its parent list. */ public function __construct( - protected CollectionInterface $collection, + public readonly CollectionInterface $collection, protected int $format = DataFormat::BYTE, protected int $valuesCount = 1, protected int $dataOffset = 0, diff --git a/src/Media.php b/src/Media.php index 37cb9a679..393e55402 100644 --- a/src/Media.php +++ b/src/Media.php @@ -112,12 +112,15 @@ public static function parseFromFile(string $path, ?LoggerInterface $externalLog */ public static function parse(DataElement $dataElement, ?LoggerInterface $externalLogger = null, ?string $failLevel = null): Media { - // Determine the media format. - $mediaType = new ItemDefinition(MediaTypeResolver::fromDataElement($dataElement)); + // Determine the media type. Throws MediaProbeException if not determinable. + $mediaType = new ItemDefinition( + collection: MediaTypeResolver::fromDataElement($dataElement), + ); - // Build the Media object and its immediate child, that represents the - // media format. Then parse the media according to the media format. + // Build the Media object and its immediate child, that represents the actual media. Then + // parse the media according to the media format. $media = new static($externalLogger, $failLevel); + $media->setAttribute('mimeType', (string) $mediaType->collection->getPropertyValue('item')); $media->getStopwatch()->start('media-parsing'); assert($media->debugInfo(['dataElement' => $dataElement])); $media->addBlock($mediaType)->parseData($dataElement); @@ -126,19 +129,12 @@ public static function parse(DataElement $dataElement, ?LoggerInterface $externa return $media; } - /** - * @todo remove, replace by parser - */ - protected function doParseData(DataElement $data): void - { - } - /** * Determines the MIME type of the media. */ public function getMimeType(): string { - return $this->getElement('*')->getMimeType(); + return $this->getAttribute('mimeType'); } /** diff --git a/src/MediaTypeResolver.php b/src/MediaTypeResolver.php index c112c8760..a9a257e05 100644 --- a/src/MediaTypeResolver.php +++ b/src/MediaTypeResolver.php @@ -24,15 +24,15 @@ class MediaTypeResolver */ public static function fromDataElement(DataElement $dataElement): CollectionInterface { - $mediaCollection = CollectionFactory::get('MediaType'); + $mediaTypes = CollectionFactory::get('MediaType'); // Loop through the 'Media' collection items, each of which defines a media format // collection, and checks if the media matches the format. When a match is found, return // the media format collection. - foreach ($mediaCollection->listItemIds() as $typeItem) { - $typeCollection = $mediaCollection->getItemCollection($typeItem); - $class = $typeCollection->getPropertyValue('class'); - if ($class::isDataMatchingFormat($dataElement)) { - return $typeCollection; + foreach ($mediaTypes->listItemIds() as $id) { + $type = $mediaTypes->getItemCollection($id); + $parser = $type->getPropertyValue('parser'); + if ($parser::isDataMatchingMediaType($dataElement)) { + return $type; } } throw new MediaProbeException('Media type not managed by MediaProbe'); diff --git a/src/Model/BlockBase.php b/src/Model/BlockBase.php index 5923874db..9050caf0d 100644 --- a/src/Model/BlockBase.php +++ b/src/Model/BlockBase.php @@ -98,39 +98,38 @@ public function getFormat(): int /** * Parse data into a MediaProbe block. * - * @param DataElement $data_element + * @param DataElement $dataElement * The data element that will provide the data. */ - public function parseData(DataElement $data_element, int $start = 0, ?int $size = null): void + public function parseData(DataElement $dataElement, int $start = 0, ?int $size = null): void { - $data = new DataWindow($data_element, $start, $size); + $data = new DataWindow($dataElement, $start, $size); $this->size = $data->getSize(); - $this->doParseData($data); + if ($this->getCollection()->hasProperty('parser')) { + $parserClass = $this->getCollection()->getPropertyValue('parser'); + $parser = new $parserClass($this); + $parser->parseData($data); + } else { + // @todo remove this when full parser model in place. + $this->doParseData($data); + } // Invoke post-parse callbacks. $this->executePostParseCallbacks($data); } - /** - * Parse data into a MediaProbe block. - * - * @param DataElement $data_element - * The data element that will provide the data. - */ - abstract protected function doParseData(DataElement $data); - /** * Invoke post-parse callbacks. * - * @param \FileEye\MediaProbe\Data\DataElement $data_element + * @param \FileEye\MediaProbe\Data\DataElement $dataElement * @todo */ - protected function executePostParseCallbacks(DataElement $data_element): static + protected function executePostParseCallbacks(DataElement $dataElement): static { $post_load_callbacks = $this->getCollection()->getPropertyValue('postParse'); if (!empty($post_load_callbacks)) { foreach ($post_load_callbacks as $callback) { - call_user_func($callback, $data_element, $this); + call_user_func($callback, $dataElement, $this); } } return $this; diff --git a/src/Parser/Jpeg/Jpeg.php b/src/Parser/Jpeg/Jpeg.php new file mode 100644 index 000000000..a5f89df8b --- /dev/null +++ b/src/Parser/Jpeg/Jpeg.php @@ -0,0 +1,149 @@ +block->debugInfo(['dataElement' => $data])); + + // JPEG data is stored in big-endian format. + $data->setByteOrder(ConvertBytes::BIG_ENDIAN); + + // Run through the data to parse the segments in the image. After each + // segment is parsed, the offset will be moved forward, and after the + // last segment we will terminate. + $offset = 0; + while ($offset < $data->getSize()) { + // Get the next JPEG segment id offset. + try { + $new_offset = $this->getJpegSegmentIdOffset($data, $offset); + $segment_id = $segment_id ?? 0; + if ($new_offset !== $offset) { + // Add any trailing data from previous segment in a + // RawData block. + $this->block->error('Unexpected data found at end of JPEG segment {id}/{hexid} @ offset {offset}, size {size}', [ + 'id' => $segment_id, + 'hexid' => '0x' . strtoupper(dechex($segment_id)), + 'offset' => $data->getAbsoluteOffset($offset), + 'size' => $new_offset - $offset, + ]); + $trail = new ItemDefinition( + CollectionFactory::get('RawData', ['name' => 'trail']), + DataFormat::BYTE, + $offset + ); + $this->block->addBlock($trail)->parseData($data, $offset, $new_offset - $offset); + } + $offset = $new_offset; + } catch (DataException $e) { + $this->block->error($e->getMessage()); + return; + } + + // Get the JPEG segment id. + $segment_id = $data->getByte($offset + 1); + + // Warn if an unidentified segment is detected. + if (!in_array($segment_id, $this->block->getCollection()->listItemIds())) { + $this->block->warning('Invalid JPEG marker {id}/{hexid} found @ offset {offset}', [ + 'id' => $segment_id, + 'hexid' => '0x' . strtoupper(dechex($segment_id)), + 'offset' => $data->getAbsoluteOffset($offset), + ]); + } + + // Get the JPEG segment size. + $segment_collection = $this->block->getCollection()->getItemCollection($segment_id); + $segment_size = match ($segment_collection->getPropertyValue('payload')) { + // The data window size is the JPEG delimiter byte and the segment identifier byte. + 'none' => 2, + // Read the length of the segment. The data window size includes the JPEG delimiter + // byte, the segment identifier byte and two bytes used to store the segment + // length. + 'variable' => $data->getShort($offset + 2) + 2, + // The data window size includes the JPEG delimiter byte and the segment identifier + // byte. + 'fixed' => $segment_collection->getPropertyValue('components') + 2, + // In case of image scan segment, the window is to the end of the data. + 'scan' => null, + }; + + // Parse the MediaProbe JPEG segment data. + $segment_definition = new ItemDefinition($segment_collection); + $segment = $this->block->addBlock($segment_definition); + $segment->parseData($data, $offset, $segment_size); + + // Position to end of the segment. + $offset += $segment->getSize(); + } + + // Fail if SOS is missing. + if (!$this->block->getElement("jpegSegment[@name='SOS']")) { + $this->block->error('Missing SOS (Start Of Scan) JPEG marker'); + } + + // Fail if EOI is missing. + if (!$this->block->getElement("jpegSegment[@name='EOI']")) { + $this->block->error('Missing EOI (End Of Image) JPEG marker'); + } + } + + /** + * Determines the offset where the next JPEG segment id is found. + * + * JPEG sections start with 0xFF. The first byte that is not 0xFF is a + * marker (hopefully). + * + * @param DataElement $data_element + * The data element to be checked. + * @param int $offset + * The starting offset in the data element. + * + * @return int + * The found offset. + * + * @throws DataException + * In case of marker not found. + */ + protected function getJpegSegmentIdOffset(DataElement $data_element, int $offset): int + { + for ($i = $offset; $i < $offset + 128; $i++) { + if ($data_element->getByte($i) === JpegBlock::JPEG_DELIMITER && $data_element->getByte($i + 1) !== JpegBlock::JPEG_DELIMITER) { + return $i; + } + } + throw new DataException('JPEG marker not found @%d', $data_element->getAbsoluteOffset($offset)); + } + + /** + * Determines if the data is a JPEG image. + * + * @param DataElement $data_element + * The data element to be checked. + * + * @return bool + * TRUE if the data element is a JPEG image. + */ + public static function isDataMatchingMediaType(DataElement $data_element): bool + { + return $data_element->getBytes(0, 3) === JpegBlock::JPEG_HEADER; + } +} diff --git a/src/Parser/ParserBase.php b/src/Parser/ParserBase.php new file mode 100644 index 000000000..c1c484351 --- /dev/null +++ b/src/Parser/ParserBase.php @@ -0,0 +1,14 @@ +block->setByteOrder($byteOrder); + $data->setByteOrder($byteOrder); + + assert($this->block->debugInfo(['dataElement' => $data])); + + // Starting IFD will be at offset 4 (2 bytes for byte order + 2 for header). + $ifd_offset = $data->getLong(4); + + // If the offset to first IFD is higher than 8, then there may be an + // image scan (TIFF) in between. Store that in a RawData block. + if ($ifd_offset > 8) { + $scan = new ItemDefinition( + collection: CollectionFactory::get('RawData', ['name' => 'scan']), + format: DataFormat::BYTE, + valuesCount: $ifd_offset - 8, + ); + $this->block->addBlock($scan)->parseData($data, 8, $ifd_offset - 8); + } + + // Loops through IFDs. In fact we should only have IFD0 and IFD1. + for ($i = 0; $i <= 1; $i++) { + // Check data is accessible, warn otherwise. + if ($ifd_offset >= $data->getSize() || $ifd_offset + 4 > $data->getSize()) { + $this->block->warning( + 'Could not determine number of entries for {item}, overflow', + ['item' => $this->block->getCollection()->getItemCollection($i)->getPropertyValue('name')] + ); + continue; + } + + // Find number of tags in IFD and warn if not enough data to read them. + $ifd_tags_count = $data->getShort($ifd_offset); + if ($ifd_offset + $ifd_tags_count * 4 > $data->getSize()) { + $this->block->warning( + 'Invalid data for {item}', + ['item' => $this->block->getCollection()->getItemCollection($i)->getPropertyValue('name')] + ); + continue; + } + + // Create and load the IFDs. Note that the data element cannot + // be split in windows since any pointer will refer to the + // entire segment space. + $ifd_class = $this->block->getCollection()->getItemCollection($i)->getPropertyValue('class'); + $ifd_item = new ItemDefinition($this->block->getCollection()->getItemCollection($i), DataFormat::LONG, $ifd_tags_count, $ifd_offset, 0, $i); + $ifd = new $ifd_class($ifd_item, $this->block); + try { + $ifd->parseData($data); + } catch (DataException $e) { + $this->block->error('Error processing {ifd_name}: {msg}.', [ + 'ifd_name' => $this->block->getCollection()->getItemCollection($i)->getPropertyValue('name'), + 'msg' => $e->getMessage(), + ]); + continue; + } + + // Offset to next IFD. + $ifd_offset = $data->getLong($ifd_offset + $ifd_tags_count * 12 + 2); + + // If next IFD offset is 0 we are finished. + if ($ifd_offset === 0) { + break; + } + + // IFD1 shouldn't link further. + if ($i === 1) { + $this->block->error('IFD1 should not link to another IFD'); + break; + } + } + } + + /** + * Determines if the data is a TIFF image. + * + * @param DataElement $data_element + * The data element to be checked. + * + * @return bool + */ + public static function isDataMatchingMediaType(DataElement $data_element): bool + { + return static::getTiffSegmentByteOrder($data_element) !== null; + } + + /** + * Returns the byte order of a TIFF segment. + * + * @return int|null + * The byte order of the TIFF segment in case data is a TIFF block, null + * otherwise. + */ + public static function getTiffSegmentByteOrder(DataElement $data_element, int $offset = 0): ?int + { + // There must be at least 8 bytes available: 2 bytes for the byte + // order, 2 bytes for the TIFF header, and 4 bytes for the offset to + // the first IFD. + if ($data_element->getSize() - $offset < 8) { + return null; + } + + // Byte order. + $order_string = $data_element->getBytes($offset, 2); + if ($order_string === 'II') { + $order = ConvertBytes::LITTLE_ENDIAN; + } elseif ($order_string === 'MM') { + $order = ConvertBytes::BIG_ENDIAN; + } else { + return null; + } + + // Verify the TIFF header. + $magic_string = $data_element->getBytes($offset + 2, 2); + if (ConvertBytes::toShort($magic_string, $order) !== TiffBlock::TIFF_HEADER) { + return null; + } + + return $order; + } + +}