Skip to content

Commit

Permalink
Dev 240122 (#78)
Browse files Browse the repository at this point in the history
  • Loading branch information
mondrake authored Jan 23, 2024
1 parent 7232d0a commit 68000aa
Show file tree
Hide file tree
Showing 15 changed files with 354 additions and 328 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
php bin/fileeye-mediaprobe --version
# Remove the comment to the lines below during development, to let
# re-compile the specification PHP file from the YAML files.
# php bin/fileeye-mediaprobe compile
php bin/fileeye-mediaprobe compile
- name: Image file dumps
continue-on-error: true
Expand Down
2 changes: 2 additions & 0 deletions specs/Jpeg/Jpeg.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
collection: Jpeg\Jpeg
title: JPEG image
class: FileEye\MediaProbe\Block\Jpeg
parser: FileEye\MediaProbe\Parser\Jpeg\Jpeg
writer: FileEye\MediaProbe\Writer\Jpeg\Jpeg
DOMNode: jpeg
items:
0xC0:
Expand Down
1 change: 0 additions & 1 deletion specs/Media.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
collection: Media
class: FileEye\MediaProbe\Media
DOMNode: media
items: {}
2 changes: 2 additions & 0 deletions specs/Tiff/Tiff.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
collection: Tiff\Tiff
title: 'TIFF image data'
class: FileEye\MediaProbe\Block\Tiff
parser: FileEye\MediaProbe\Parser\Tiff\Tiff
writer: FileEye\MediaProbe\Writer\Tiff\Tiff
DOMNode: tiff
items:
0:
Expand Down
9 changes: 7 additions & 2 deletions src/Block/Exif/Exif.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,13 @@ class Exif extends BlockBase
protected function doParseData(DataElement $data): void
{
assert($this->debugInfo(['dataElement' => $data]));
if (Tiff::getTiffSegmentByteOrder($data, strlen(self::EXIF_HEADER)) !== null) {
$tiff = new ItemDefinition(CollectionFactory::get('Tiff\Tiff'));

$tiff = new ItemDefinition(
collection: CollectionFactory::get('Tiff\Tiff'),
);
$tiffParser = $tiff->collection->getPropertyValue('parser');

if ($tiffParser::getTiffSegmentByteOrder($data, strlen(self::EXIF_HEADER)) !== null) {
$this->addBlock($tiff)->parseData($data, strlen(self::EXIF_HEADER), $data->getSize() - strlen(self::EXIF_HEADER));
} else {
// We store the data as normal JPEG content if it could not be
Expand Down
154 changes: 2 additions & 152 deletions src/Block/Jpeg.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@

namespace FileEye\MediaProbe\Block;

use FileEye\MediaProbe\Model\BlockBase;
use FileEye\MediaProbe\Collection\CollectionFactory;
use FileEye\MediaProbe\Data\DataElement;
use FileEye\MediaProbe\Data\DataException;
use FileEye\MediaProbe\Data\DataFormat;
use FileEye\MediaProbe\Data\DataWindow;
use FileEye\MediaProbe\Entry\Core\Undefined;
use FileEye\MediaProbe\ItemDefinition;
use FileEye\MediaProbe\Data\DataFormat;
use FileEye\MediaProbe\MediaProbe;
use FileEye\MediaProbe\Model\BlockBase;
use FileEye\MediaProbe\Utility\ConvertBytes;

/**
Expand All @@ -27,154 +27,4 @@ class Jpeg extends BlockBase
* JPEG header.
*/
const JPEG_HEADER = "\xFF\xD8\xFF";

/**
* {@inheritdoc}
*/
protected function doParseData(DataElement $data): void
{
assert($this->debugInfo(['dataElement' => $data]));

// JPEG data is stored in big-endian format.
$data->setByteOrder(ConvertBytes::BIG_ENDIAN);

// Run through the data to parse the segments in the image. After each
// segment is parsed, the offset will be moved forward, and after the
// last segment we will terminate.
$offset = 0;
while ($offset < $data->getSize()) {
// Get the next JPEG segment id offset.
try {
$new_offset = $this->getJpegSegmentIdOffset($data, $offset);
$segment_id = $segment_id ?? 0;
if ($new_offset !== $offset) {
// Add any trailing data from previous segment in a
// RawData block.
$this->error('Unexpected data found at end of JPEG segment {id}/{hexid} @ offset {offset}, size {size}', [
'id' => $segment_id,
'hexid' => '0x' . strtoupper(dechex($segment_id)),
'offset' => $data->getAbsoluteOffset($offset),
'size' => $new_offset - $offset,
]);
$trail = new ItemDefinition(
CollectionFactory::get('RawData', ['name' => 'trail']),
DataFormat::BYTE,
$offset
);
$this->addBlock($trail)->parseData($data, $offset, $new_offset - $offset);
}
$offset = $new_offset;
} catch (DataException $e) {
$this->error($e->getMessage());
return;
}

// Get the JPEG segment id.
$segment_id = $data->getByte($offset + 1);

// Warn if an unidentified segment is detected.
if (!in_array($segment_id, $this->getCollection()->listItemIds())) {
$this->warning('Invalid JPEG marker {id}/{hexid} found @ offset {offset}', [
'id' => $segment_id,
'hexid' => '0x' . strtoupper(dechex($segment_id)),
'offset' => $data->getAbsoluteOffset($offset),
]);
}

// Get the JPEG segment size.
$segment_collection = $this->getCollection()->getItemCollection($segment_id);
switch ($segment_collection->getPropertyValue('payload')) {
case 'none':
// The data window size is the JPEG delimiter byte and the
// segment identifier byte.
$segment_size = 2;
break;
case 'variable':
// Read the length of the segment. The data window size
// includes the JPEG delimiter byte, the segment identifier
// byte and two bytes used to store the segment length.
$segment_size = $data->getShort($offset + 2) + 2;
break;
case 'fixed':
// The data window size includes the JPEG delimiter byte
// and the segment identifier byte.
$segment_size = $segment_collection->getPropertyValue('components') + 2;
break;
case 'scan':
// In case of image scan segment, the window is to the end
// of the data.
$segment_size = null;
break;
}

// Parse the MediaProbe JPEG segment data.
$segment_definition = new ItemDefinition($segment_collection);
$segment = $this->addBlock($segment_definition);
$segment->parseData($data, $offset, $segment_size);

// Position to end of the segment.
$offset += $segment->getSize();
}

// Fail if SOS is missing.
if (!$this->getElement("jpegSegment[@name='SOS']")) {
$this->error('Missing SOS (Start Of Scan) JPEG marker');
}

// Fail if EOI is missing.
if (!$this->getElement("jpegSegment[@name='EOI']")) {
$this->error('Missing EOI (End Of Image) JPEG marker');
}
}

/**
* Determines the offset where the next JPEG segment id is found.
*
* JPEG sections start with 0xFF. The first byte that is not 0xFF is a
* marker (hopefully).
*
* @param DataElement $data_element
* The data element to be checked.
* @param int $offset
* The starting offset in the data element.
*
* @return int
* The found offset.
*
* @throws DataException
* In case of marker not found.
*/
protected function getJpegSegmentIdOffset(DataElement $data_element, int $offset): int
{
for ($i = $offset; $i < $offset + 128; $i++) {
if ($data_element->getByte($i) === Jpeg::JPEG_DELIMITER && $data_element->getByte($i + 1) !== Jpeg::JPEG_DELIMITER) {
return $i;
}
}
throw new DataException('JPEG marker not found @%d', $data_element->getAbsoluteOffset($offset));
}

/**
* Returns the MIME type of the image.
*
* @returns string
*/
public function getMimeType(): string
{
return 'image/jpeg';
}

/**
* Determines if the data is a JPEG image.
*
* @param DataElement $data_element
* The data element to be checked.
*
* @return bool
* TRUE if the data element is a JPEG image.
*/
public static function isDataMatchingFormat(DataElement $data_element): bool
{
return $data_element->getBytes(0, 3) === static::JPEG_HEADER;
}
}
138 changes: 0 additions & 138 deletions src/Block/Tiff.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,9 @@ class Tiff extends BlockBase

/**
* The byte order of this TIFF segment.
*
* @var int
*/
protected int $byteOrder;

/**
* Returns the MIME type of the image.
*
* @returns string
*/
public function getMimeType(): string
{
return 'image/tiff';
}

public function setByteOrder(int $byteOrder): self
{
$this->byteOrder = $byteOrder;
Expand All @@ -53,84 +41,6 @@ public function getByteOrder(): int
return $this->byteOrder;
}

/**
* {@inheritdoc}
*/
protected function doParseData(DataElement $data): void
{
// Determine the byte order of the TIFF data.
$this->setByteOrder(self::getTiffSegmentByteOrder($data));
$data->setByteOrder($this->getByteOrder());

assert($this->debugInfo(['dataElement' => $data]));

// Starting IFD will be at offset 4 (2 bytes for byte order + 2 for header).
$ifd_offset = $data->getLong(4);

// If the offset to first IFD is higher than 8, then there may be an
// image scan (TIFF) in between. Store that in a RawData block.
if ($ifd_offset > 8) {
$scan = new ItemDefinition(
CollectionFactory::get('RawData', ['name' => 'scan']),
DataFormat::BYTE,
$ifd_offset - 8
);
$this->addBlock($scan)->parseData($data, 8, $ifd_offset - 8);
}

// Loops through IFDs. In fact we should only have IFD0 and IFD1.
for ($i = 0; $i <= 1; $i++) {
// Check data is accessible, warn otherwise.
if ($ifd_offset >= $data->getSize() || $ifd_offset + 4 > $data->getSize()) {
$this->warning(
'Could not determine number of entries for {item}, overflow',
['item' => $this->getCollection()->getItemCollection($i)->getPropertyValue('name')]
);
continue;
}

// Find number of tags in IFD and warn if not enough data to read them.
$ifd_tags_count = $data->getShort($ifd_offset);
if ($ifd_offset + $ifd_tags_count * 4 > $data->getSize()) {
$this->warning(
'Invalid data for {item}',
['item' => $this->getCollection()->getItemCollection($i)->getPropertyValue('name')]
);
continue;
}

// Create and load the IFDs. Note that the data element cannot
// be split in windows since any pointer will refer to the
// entire segment space.
$ifd_class = $this->getCollection()->getItemCollection($i)->getPropertyValue('class');
$ifd_item = new ItemDefinition($this->getCollection()->getItemCollection($i), DataFormat::LONG, $ifd_tags_count, $ifd_offset, 0, $i);
$ifd = new $ifd_class($ifd_item, $this);
try {
$ifd->parseData($data);
} catch (DataException $e) {
$this->error('Error processing {ifd_name}: {msg}.', [
'ifd_name' => $this->getCollection()->getItemCollection($i)->getPropertyValue('name'),
'msg' => $e->getMessage(),
]);
continue;
}

// Offset to next IFD.
$ifd_offset = $data->getLong($ifd_offset + $ifd_tags_count * 12 + 2);

// If next IFD offset is 0 we are finished.
if ($ifd_offset === 0) {
break;
}

// IFD1 shouldn't link further.
if ($i === 1) {
$this->error('IFD1 should not link to another IFD');
break;
}
}
}

/**
* {@inheritdoc}
*/
Expand Down Expand Up @@ -181,54 +91,6 @@ public function toBytes($order = ConvertBytes::LITTLE_ENDIAN, $offset = 0): stri
return $bytes;
}

/**
* Determines if the data is a TIFF image.
*
* @param DataElement $data_element
* The data element to be checked.
*
* @return bool
*/
public static function isDataMatchingFormat(DataElement $data_element): bool
{
return static::getTiffSegmentByteOrder($data_element) !== null;
}

/**
* Returns the byte order of a TIFF segment.
*
* @return int|null
* The byte order of the TIFF segment in case data is a TIFF block, null
* otherwise.
*/
public static function getTiffSegmentByteOrder(DataElement $data_element, int $offset = 0): ?int
{
// There must be at least 8 bytes available: 2 bytes for the byte
// order, 2 bytes for the TIFF header, and 4 bytes for the offset to
// the first IFD.
if ($data_element->getSize() - $offset < 8) {
return null;
}

// Byte order.
$order_string = $data_element->getBytes($offset, 2);
if ($order_string === 'II') {
$order = ConvertBytes::LITTLE_ENDIAN;
} elseif ($order_string === 'MM') {
$order = ConvertBytes::BIG_ENDIAN;
} else {
return null;
}

// Verify the TIFF header.
$magic_string = $data_element->getBytes($offset + 2, 2);
if (ConvertBytes::toShort($magic_string, $order) !== self::TIFF_HEADER) {
return null;
}

return $order;
}

public function collectInfo(array $context = []): array
{
$info = [];
Expand Down
1 change: 1 addition & 0 deletions src/Command/CompileCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int
{
$compiler = new SpecCompiler();
$compiler->compile($input->getArgument('spec-dir'), $input->getArgument('resource-dir'));
$output->writeln('Compile OK');
return(0);
}
}
2 changes: 1 addition & 1 deletion src/ItemDefinition.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class ItemDefinition
* The sequence of the item on its parent list.
*/
public function __construct(
protected CollectionInterface $collection,
public readonly CollectionInterface $collection,
protected int $format = DataFormat::BYTE,
protected int $valuesCount = 1,
protected int $dataOffset = 0,
Expand Down
Loading

0 comments on commit 68000aa

Please sign in to comment.