Skip to content

Commit

Permalink
Refactor Xlsx Properties Reader code into a separate class (PHPOffice…
Browse files Browse the repository at this point in the history
…#1001)

* Unit tests for refactoring Spreadsheet properties
* Refactor Xlsx Properties Reader code into a separate class
  • Loading branch information
Mark Baker authored and guillaume-ro-fr committed Jun 12, 2019
1 parent 9f286d1 commit 9bbb8dc
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 55 deletions.
61 changes: 6 additions & 55 deletions src/PhpSpreadsheet/Reader/Xlsx.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use PhpOffice\PhpSpreadsheet\NamedRange;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\Reader\Xlsx\Chart;
use PhpOffice\PhpSpreadsheet\Reader\Xlsx\Properties as PropertyReader;
use PhpOffice\PhpSpreadsheet\ReferenceHelper;
use PhpOffice\PhpSpreadsheet\RichText\RichText;
use PhpOffice\PhpSpreadsheet\Settings;
Expand Down Expand Up @@ -456,70 +457,20 @@ public function load($pFilename)
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);

$propertyReader = new PropertyReader($this->securityScanner, $excel->getProperties());
foreach ($rels->Relationship as $rel) {
switch ($rel['Type']) {
case 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties':
$xmlCore = simplexml_load_string(
$this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
if (is_object($xmlCore)) {
$xmlCore->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
$xmlCore->registerXPathNamespace('dcterms', 'http://purl.org/dc/terms/');
$xmlCore->registerXPathNamespace('cp', 'http://schemas.openxmlformats.org/package/2006/metadata/core-properties');
$docProps = $excel->getProperties();
$docProps->setCreator((string) self::getArrayItem($xmlCore->xpath('dc:creator')));
$docProps->setLastModifiedBy((string) self::getArrayItem($xmlCore->xpath('cp:lastModifiedBy')));
$docProps->setCreated(strtotime(self::getArrayItem($xmlCore->xpath('dcterms:created')))); //! respect xsi:type
$docProps->setModified(strtotime(self::getArrayItem($xmlCore->xpath('dcterms:modified')))); //! respect xsi:type
$docProps->setTitle((string) self::getArrayItem($xmlCore->xpath('dc:title')));
$docProps->setDescription((string) self::getArrayItem($xmlCore->xpath('dc:description')));
$docProps->setSubject((string) self::getArrayItem($xmlCore->xpath('dc:subject')));
$docProps->setKeywords((string) self::getArrayItem($xmlCore->xpath('cp:keywords')));
$docProps->setCategory((string) self::getArrayItem($xmlCore->xpath('cp:category')));
}
$propertyReader->readCoreProperties($this->getFromZipArchive($zip, "{$rel['Target']}"));

break;
case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties':
$xmlCore = simplexml_load_string(
$this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
if (is_object($xmlCore)) {
$docProps = $excel->getProperties();
if (isset($xmlCore->Company)) {
$docProps->setCompany((string) $xmlCore->Company);
}
if (isset($xmlCore->Manager)) {
$docProps->setManager((string) $xmlCore->Manager);
}
}
$propertyReader->readExtendedProperties($this->getFromZipArchive($zip, "{$rel['Target']}"));

break;
case 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties':
$xmlCore = simplexml_load_string(
$this->securityScanner->scan($this->getFromZipArchive($zip, "{$rel['Target']}")),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
if (is_object($xmlCore)) {
$docProps = $excel->getProperties();
/** @var SimpleXMLElement $xmlProperty */
foreach ($xmlCore as $xmlProperty) {
$cellDataOfficeAttributes = $xmlProperty->attributes();
if (isset($cellDataOfficeAttributes['name'])) {
$propertyName = (string) $cellDataOfficeAttributes['name'];
$cellDataOfficeChildren = $xmlProperty->children('http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes');
$attributeType = $cellDataOfficeChildren->getName();
$attributeValue = (string) $cellDataOfficeChildren->{$attributeType};
$attributeValue = Properties::convertProperty($attributeValue, $attributeType);
$attributeType = Properties::convertPropertyType($attributeType);
$docProps->setCustomProperty($propertyName, $attributeValue, $attributeType);
}
}
}
$propertyReader->readCustomProperties($this->getFromZipArchive($zip, "{$rel['Target']}"));

break;
//Ribbon
Expand Down
91 changes: 91 additions & 0 deletions src/PhpSpreadsheet/Reader/Xlsx/Properties.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
<?php

namespace PhpOffice\PhpSpreadsheet\Reader\Xlsx;

use PhpOffice\PhpSpreadsheet\Document\Properties as DocumentProperties;
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
use PhpOffice\PhpSpreadsheet\Settings;

class Properties
{
private $securityScanner;

private $docProps;

public function __construct(XmlScanner $securityScanner, DocumentProperties $docProps)
{
$this->securityScanner = $securityScanner;
$this->docProps = $docProps;
}

private function extractPropertyData($propertyData)
{
return simplexml_load_string(
$this->securityScanner->scan($propertyData),
'SimpleXMLElement',
Settings::getLibXmlLoaderOptions()
);
}

public function readCoreProperties($propertyData)
{
$xmlCore = $this->extractPropertyData($propertyData);

if (is_object($xmlCore)) {
$xmlCore->registerXPathNamespace('dc', 'http://purl.org/dc/elements/1.1/');
$xmlCore->registerXPathNamespace('dcterms', 'http://purl.org/dc/terms/');
$xmlCore->registerXPathNamespace('cp', 'http://schemas.openxmlformats.org/package/2006/metadata/core-properties');

$this->docProps->setCreator((string) self::getArrayItem($xmlCore->xpath('dc:creator')));
$this->docProps->setLastModifiedBy((string) self::getArrayItem($xmlCore->xpath('cp:lastModifiedBy')));
$this->docProps->setCreated(strtotime(self::getArrayItem($xmlCore->xpath('dcterms:created')))); //! respect xsi:type
$this->docProps->setModified(strtotime(self::getArrayItem($xmlCore->xpath('dcterms:modified')))); //! respect xsi:type
$this->docProps->setTitle((string) self::getArrayItem($xmlCore->xpath('dc:title')));
$this->docProps->setDescription((string) self::getArrayItem($xmlCore->xpath('dc:description')));
$this->docProps->setSubject((string) self::getArrayItem($xmlCore->xpath('dc:subject')));
$this->docProps->setKeywords((string) self::getArrayItem($xmlCore->xpath('cp:keywords')));
$this->docProps->setCategory((string) self::getArrayItem($xmlCore->xpath('cp:category')));
}
}

public function readExtendedProperties($propertyData)
{
$xmlCore = $this->extractPropertyData($propertyData);

if (is_object($xmlCore)) {
if (isset($xmlCore->Company)) {
$this->docProps->setCompany((string) $xmlCore->Company);
}
if (isset($xmlCore->Manager)) {
$this->docProps->setManager((string) $xmlCore->Manager);
}
}
}

public function readCustomProperties($propertyData)
{
$xmlCore = $this->extractPropertyData($propertyData);

if (is_object($xmlCore)) {
foreach ($xmlCore as $xmlProperty) {
/** @var \SimpleXMLElement $xmlProperty */
$cellDataOfficeAttributes = $xmlProperty->attributes();
if (isset($cellDataOfficeAttributes['name'])) {
$propertyName = (string) $cellDataOfficeAttributes['name'];
$cellDataOfficeChildren = $xmlProperty->children('http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes');

$attributeType = $cellDataOfficeChildren->getName();
$attributeValue = (string) $cellDataOfficeChildren->{$attributeType};
$attributeValue = DocumentProperties::convertProperty($attributeValue, $attributeType);
$attributeType = DocumentProperties::convertPropertyType($attributeType);
$this->docProps->setCustomProperty($propertyName, $attributeValue, $attributeType);
}
}
}
}

private static function getArrayItem(array $array, $key = 0)
{
return isset($array[$key]) ? $array[$key] : null;
}
}
25 changes: 25 additions & 0 deletions tests/PhpSpreadsheetTests/Reader/XlsxTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,37 @@

namespace PhpOffice\PhpSpreadsheetTests\Reader;

use PhpOffice\PhpSpreadsheet\Document\Properties;
use PhpOffice\PhpSpreadsheet\Reader\Xlsx;
use PhpOffice\PhpSpreadsheet\Shared\File;
use PHPUnit\Framework\TestCase;

class XlsxTest extends TestCase
{
public function testLoadWorkbookProperties()
{
$filename = './data/Reader/XLSX/propertyTest.xlsx';
$reader = new Xlsx();
$spreadsheet = $reader->load($filename);

$properties = $spreadsheet->getProperties();
// Core Properties
$this->assertEquals('Mark Baker', $properties->getCreator());
$this->assertEquals('Unit Testing', $properties->getTitle());
$this->assertEquals('Property Test', $properties->getSubject());
// Extended Properties
$this->assertEquals('PHPOffice', $properties->getCompany());
$this->assertEquals('The Big Boss', $properties->getManager());
// Custom Properties
$customProperties = $properties->getCustomProperties();
$this->assertInternalType('array', $customProperties);
$customProperties = array_flip($customProperties);
$this->assertArrayHasKey('Publisher', $customProperties);
$this->assertTrue($properties->isCustomPropertySet('Publisher'));
$this->assertEquals(Properties::PROPERTY_TYPE_STRING, $properties->getCustomPropertyType('Publisher'));
$this->assertEquals('PHPOffice Suite', $properties->getCustomPropertyValue('Publisher'));
}

/**
* Test load Xlsx file without cell reference.
*/
Expand Down
Binary file added tests/data/Reader/XLSX/propertyTest.xlsx
Binary file not shown.

0 comments on commit 9bbb8dc

Please sign in to comment.