diff --git a/src/PhpSpreadsheet/Document/Properties.php b/src/PhpSpreadsheet/Document/Properties.php index 162e818036..302afee79e 100644 --- a/src/PhpSpreadsheet/Document/Properties.php +++ b/src/PhpSpreadsheet/Document/Properties.php @@ -107,6 +107,8 @@ class Properties */ private $customProperties = []; + private string $hyperlinkBase = ''; + /** * Create a new Document Properties instance. */ @@ -534,4 +536,16 @@ public static function convertPropertyType(string $propertyType): string { return self::PROPERTY_TYPE_ARRAY[$propertyType] ?? self::PROPERTY_TYPE_UNKNOWN; } + + public function getHyperlinkBase(): string + { + return $this->hyperlinkBase; + } + + public function setHyperlinkBase(string $hyperlinkBase): self + { + $this->hyperlinkBase = $hyperlinkBase; + + return $this; + } } diff --git a/src/PhpSpreadsheet/Reader/Html.php b/src/PhpSpreadsheet/Reader/Html.php index 377a8add06..fa3db908ef 100644 --- a/src/PhpSpreadsheet/Reader/Html.php +++ b/src/PhpSpreadsheet/Reader/Html.php @@ -8,6 +8,7 @@ use DOMText; use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Cell\DataType; +use PhpOffice\PhpSpreadsheet\Document\Properties; use PhpOffice\PhpSpreadsheet\Helper\Dimension as CssDimension; use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner; use PhpOffice\PhpSpreadsheet\Spreadsheet; @@ -685,10 +686,94 @@ public function loadIntoExisting($filename, Spreadsheet $spreadsheet) if ($loaded === false) { throw new Exception('Failed to load ' . $filename . ' as a DOM Document', 0, $e ?? null); } + self::loadProperties($dom, $spreadsheet); return $this->loadDocument($dom, $spreadsheet); } + private static function loadProperties(DOMDocument $dom, Spreadsheet $spreadsheet): void + { + $properties = $spreadsheet->getProperties(); + foreach ($dom->getElementsByTagName('meta') as $meta) { + $metaContent = (string) $meta->getAttribute('content'); + if ($metaContent !== '') { + $metaName = (string) $meta->getAttribute('name'); + switch ($metaName) { + case 'author': + $properties->setCreator($metaContent); + + break; + case 'category': + $properties->setCategory($metaContent); + + break; + case 'company': + $properties->setCompany($metaContent); + + break; + case 'created': + $properties->setCreated($metaContent); + + break; + case 'description': + $properties->setDescription($metaContent); + + break; + case 'keywords': + $properties->setKeywords($metaContent); + + break; + case 'lastModifiedBy': + $properties->setLastModifiedBy($metaContent); + + break; + case 'manager': + $properties->setManager($metaContent); + + break; + case 'modified': + $properties->setModified($metaContent); + + break; + case 'subject': + $properties->setSubject($metaContent); + + break; + case 'title': + $properties->setTitle($metaContent); + + break; + default: + if (preg_match('/^custom[.](bool|date|float|int|string)[.](.+)$/', $metaName, $matches) === 1) { + switch ($matches[1]) { + case 'bool': + $properties->setCustomProperty($matches[2], (bool) $metaContent, Properties::PROPERTY_TYPE_BOOLEAN); + + break; + case 'float': + $properties->setCustomProperty($matches[2], (float) $metaContent, Properties::PROPERTY_TYPE_FLOAT); + + break; + case 'int': + $properties->setCustomProperty($matches[2], (int) $metaContent, Properties::PROPERTY_TYPE_INTEGER); + + break; + case 'date': + $properties->setCustomProperty($matches[2], $metaContent, Properties::PROPERTY_TYPE_DATE); + + break; + default: // string + $properties->setCustomProperty($matches[2], $metaContent, Properties::PROPERTY_TYPE_STRING); + } + } + } + } + } + if (!empty($dom->baseURI)) { + $properties->setHyperlinkBase($dom->baseURI); + } + } + private static function replaceNonAscii(array $matches): string { return '&#' . mb_ord($matches[0], 'UTF-8') . ';'; @@ -719,8 +804,10 @@ public function loadFromString($content, ?Spreadsheet $spreadsheet = null): Spre if ($loaded === false) { throw new Exception('Failed to load content as a DOM Document', 0, $e ?? null); } + $spreadsheet = $spreadsheet ?? new Spreadsheet(); + self::loadProperties($dom, $spreadsheet); - return $this->loadDocument($dom, $spreadsheet ?? new Spreadsheet()); + return $this->loadDocument($dom, $spreadsheet); } /** diff --git a/src/PhpSpreadsheet/Reader/Xlsx/Properties.php b/src/PhpSpreadsheet/Reader/Xlsx/Properties.php index 72addffd5b..0d4701afac 100644 --- a/src/PhpSpreadsheet/Reader/Xlsx/Properties.php +++ b/src/PhpSpreadsheet/Reader/Xlsx/Properties.php @@ -73,6 +73,9 @@ public function readExtendedProperties(string $propertyData): void if (isset($xmlCore->Manager)) { $this->docProps->setManager((string) $xmlCore->Manager); } + if (isset($xmlCore->HyperlinkBase)) { + $this->docProps->setHyperlinkBase((string) $xmlCore->HyperlinkBase); + } } } diff --git a/src/PhpSpreadsheet/Reader/Xml/Properties.php b/src/PhpSpreadsheet/Reader/Xml/Properties.php index f0346ed02f..e216c254da 100644 --- a/src/PhpSpreadsheet/Reader/Xml/Properties.php +++ b/src/PhpSpreadsheet/Reader/Xml/Properties.php @@ -92,6 +92,10 @@ protected function processStandardProperty( case 'Manager': $docProps->setManager($stringValue); + break; + case 'HyperlinkBase': + $docProps->setHyperlinkBase($stringValue); + break; case 'Keywords': $docProps->setKeywords($stringValue); @@ -110,17 +114,10 @@ protected function processCustomProperty( ?SimpleXMLElement $propertyValue, SimpleXMLElement $propertyAttributes ): void { - $propertyType = DocumentProperties::PROPERTY_TYPE_UNKNOWN; - switch ((string) $propertyAttributes) { - case 'string': - $propertyType = DocumentProperties::PROPERTY_TYPE_STRING; - $propertyValue = trim((string) $propertyValue); - - break; case 'boolean': $propertyType = DocumentProperties::PROPERTY_TYPE_BOOLEAN; - $propertyValue = (bool) $propertyValue; + $propertyValue = (bool) (string) $propertyValue; break; case 'integer': @@ -134,9 +131,15 @@ protected function processCustomProperty( break; case 'dateTime.tz': + case 'dateTime.iso8601tz': $propertyType = DocumentProperties::PROPERTY_TYPE_DATE; $propertyValue = trim((string) $propertyValue); + break; + default: + $propertyType = DocumentProperties::PROPERTY_TYPE_STRING; + $propertyValue = trim((string) $propertyValue); + break; } diff --git a/src/PhpSpreadsheet/Writer/Html.php b/src/PhpSpreadsheet/Writer/Html.php index 575197aecb..842998f9eb 100644 --- a/src/PhpSpreadsheet/Writer/Html.php +++ b/src/PhpSpreadsheet/Writer/Html.php @@ -7,9 +7,11 @@ use PhpOffice\PhpSpreadsheet\Cell\Cell; use PhpOffice\PhpSpreadsheet\Cell\Coordinate; use PhpOffice\PhpSpreadsheet\Chart\Chart; +use PhpOffice\PhpSpreadsheet\Document\Properties; use PhpOffice\PhpSpreadsheet\RichText\RichText; use PhpOffice\PhpSpreadsheet\RichText\Run; use PhpOffice\PhpSpreadsheet\Settings; +use PhpOffice\PhpSpreadsheet\Shared\Date; use PhpOffice\PhpSpreadsheet\Shared\Drawing as SharedDrawing; use PhpOffice\PhpSpreadsheet\Shared\File; use PhpOffice\PhpSpreadsheet\Shared\Font as SharedFont; @@ -342,13 +344,21 @@ public function writeAllSheets() private static function generateMeta(?string $val, string $desc): string { - return $val + return ($val || $val === '0') ? (' ' . PHP_EOL) : ''; } public const BODY_LINE = ' ' . PHP_EOL; + private const CUSTOM_TO_META = [ + Properties::PROPERTY_TYPE_BOOLEAN => 'bool', + Properties::PROPERTY_TYPE_DATE => 'date', + Properties::PROPERTY_TYPE_FLOAT => 'float', + Properties::PROPERTY_TYPE_INTEGER => 'int', + Properties::PROPERTY_TYPE_STRING => 'string', + ]; + /** * Generate HTML header. * @@ -374,6 +384,36 @@ public function generateHTMLHeader($includeStyles = false) $html .= self::generateMeta($properties->getCategory(), 'category'); $html .= self::generateMeta($properties->getCompany(), 'company'); $html .= self::generateMeta($properties->getManager(), 'manager'); + $html .= self::generateMeta($properties->getLastModifiedBy(), 'lastModifiedBy'); + $date = Date::dateTimeFromTimestamp((string) $properties->getCreated()); + $date->setTimeZone(Date::getDefaultOrLocalTimeZone()); + $html .= self::generateMeta($date->format(DATE_W3C), 'created'); + $date = Date::dateTimeFromTimestamp((string) $properties->getModified()); + $date->setTimeZone(Date::getDefaultOrLocalTimeZone()); + $html .= self::generateMeta($date->format(DATE_W3C), 'modified'); + + $customProperties = $properties->getCustomProperties(); + foreach ($customProperties as $customProperty) { + $propertyValue = $properties->getCustomPropertyValue($customProperty); + $propertyType = $properties->getCustomPropertyType($customProperty); + $propertyQualifier = self::CUSTOM_TO_META[$propertyType] ?? null; + if ($propertyQualifier !== null) { + if ($propertyType === Properties::PROPERTY_TYPE_BOOLEAN) { + $propertyValue = $propertyValue ? '1' : '0'; + } elseif ($propertyType === Properties::PROPERTY_TYPE_DATE) { + $date = Date::dateTimeFromTimestamp((string) $propertyValue); + $date->setTimeZone(Date::getDefaultOrLocalTimeZone()); + $propertyValue = $date->format(DATE_W3C); + } else { + $propertyValue = (string) $propertyValue; + } + $html .= self::generateMeta($propertyValue, "custom.$propertyQualifier.$customProperty"); + } + } + + if (!empty($properties->getHyperlinkBase())) { + $html .= ' ' . PHP_EOL; + } $html .= $includeStyles ? $this->generateStyles(true) : $this->generatePageDeclarations(true); diff --git a/src/PhpSpreadsheet/Writer/Xls/Worksheet.php b/src/PhpSpreadsheet/Writer/Xls/Worksheet.php index 9f23bd365e..aeedd08e77 100644 --- a/src/PhpSpreadsheet/Writer/Xls/Worksheet.php +++ b/src/PhpSpreadsheet/Writer/Xls/Worksheet.php @@ -503,6 +503,8 @@ public function close(): void $this->writeMergedCells(); // Hyperlinks + $phpParent = $phpSheet->getParent(); + $hyperlinkbase = ($phpParent === null) ? '' : $phpParent->getProperties()->getHyperlinkBase(); foreach ($phpSheet->getHyperLinkCollection() as $coordinate => $hyperlink) { [$column, $row] = Coordinate::indexesFromString($coordinate); @@ -513,6 +515,11 @@ public function close(): void $url = str_replace('sheet://', 'internal:', $url); } elseif (preg_match('/^(http:|https:|ftp:|mailto:)/', $url)) { // URL + } elseif (!empty($hyperlinkbase) && preg_match('~^([A-Za-z]:)?[/\\\\]~', $url) !== 1) { + $url = "$hyperlinkbase$url"; + if (preg_match('/^(http:|https:|ftp:|mailto:)/', $url) !== 1) { + $url = 'external:' . $url; + } } else { // external (local file) $url = 'external:' . $url; diff --git a/src/PhpSpreadsheet/Writer/Xlsx/DocProps.php b/src/PhpSpreadsheet/Writer/Xlsx/DocProps.php index 8902826a19..8c33f59326 100644 --- a/src/PhpSpreadsheet/Writer/Xlsx/DocProps.php +++ b/src/PhpSpreadsheet/Writer/Xlsx/DocProps.php @@ -93,6 +93,9 @@ public function writeDocPropsApp(Spreadsheet $spreadsheet) // SharedDoc $objWriter->writeElement('SharedDoc', 'false'); + // HyperlinkBase + $objWriter->writeElement('HyperlinkBase', $spreadsheet->getProperties()->getHyperlinkBase()); + // HyperlinksChanged $objWriter->writeElement('HyperlinksChanged', 'false'); diff --git a/tests/PhpSpreadsheetTests/Reader/Xml/XmlPropertiesTest.php b/tests/PhpSpreadsheetTests/Reader/Xml/XmlPropertiesTest.php new file mode 100644 index 0000000000..8b4a225d3f --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/Xml/XmlPropertiesTest.php @@ -0,0 +1,187 @@ +load($this->filename); + + $properties = $spreadsheet->getProperties(); + self::assertSame('title', $properties->getTitle()); + self::assertSame('topic', $properties->getSubject()); + self::assertSame('author', $properties->getCreator()); + self::assertSame('keyword1, keyword2', $properties->getKeywords()); + self::assertSame('no comment', $properties->getDescription()); + self::assertSame('last author', $properties->getLastModifiedBy()); + $expected = self::timestampToInt('2023-05-18T11:21:43Z'); + self::assertEquals($expected, $properties->getCreated()); + $expected = self::timestampToInt('2023-05-18T11:30:00Z'); + self::assertEquals($expected, $properties->getModified()); + self::assertSame('category', $properties->getCategory()); + self::assertSame('manager', $properties->getManager()); + self::assertSame('company', $properties->getCompany()); + + self::assertSame('https://phpspreadsheet.readthedocs.io/en/latest/', $properties->getHyperlinkBase()); + + self::assertSame('TheString', $properties->getCustomPropertyValue('StringProperty')); + self::assertSame(12345, $properties->getCustomPropertyValue('NumberProperty')); + $expected = self::timestampToInt('2023-05-18T10:00:00Z'); + self::assertEquals($expected, $properties->getCustomPropertyValue('DateProperty')); + $expected = self::timestampToInt('2023-05-19T11:00:00Z'); + self::assertEquals($expected, $properties->getCustomPropertyValue('DateProperty2')); + self::assertTrue($properties->getCustomPropertyValue('BooleanPropertyTrue')); + self::assertFalse($properties->getCustomPropertyValue('BooleanPropertyFalse')); + self::assertEqualsWithDelta(1.2345, $properties->getCustomPropertyValue('FloatProperty'), 1E-8); + + $sheet = $spreadsheet->getActiveSheet(); + // Note that relative links don't actually work in XML format. + // It will, however, work just fine in the Xlsx and Html copies. + $hyperlink = $sheet->getCell('A1')->getHyperlink(); + self::assertSame('references/features-cross-reference/', $hyperlink->getUrl()); + // Same comment as for cell above. + self::assertSame('topics/accessing-cells/', $sheet->getCell('A2')->getCalculatedValue()); + // No problem for absolute links. + $hyperlink = $sheet->getCell('A3')->getHyperlink(); + self::assertSame('https://www.google.com/', $hyperlink->getUrl()); + self::assertSame('https://www.yahoo.com', $sheet->getCell('A4')->getCalculatedValue()); + + $reloadedSpreadsheet = $this->writeAndReload($spreadsheet, 'Xlsx'); + $spreadsheet->disconnectWorksheets(); + + $properties = $reloadedSpreadsheet->getProperties(); + self::assertSame('title', $properties->getTitle()); + self::assertSame('topic', $properties->getSubject()); + self::assertSame('author', $properties->getCreator()); + self::assertSame('keyword1, keyword2', $properties->getKeywords()); + self::assertSame('no comment', $properties->getDescription()); + self::assertSame('last author', $properties->getLastModifiedBy()); + $expected = self::timestampToInt('2023-05-18T11:21:43Z'); + self::assertEquals($expected, $properties->getCreated()); + $expected = self::timestampToInt('2023-05-18T11:30:00Z'); + self::assertEquals($expected, $properties->getModified()); + self::assertSame('category', $properties->getCategory()); + self::assertSame('manager', $properties->getManager()); + self::assertSame('company', $properties->getCompany()); + + self::assertSame('https://phpspreadsheet.readthedocs.io/en/latest/', $properties->getHyperlinkBase()); + + self::assertSame('TheString', $properties->getCustomPropertyValue('StringProperty')); + self::assertSame(12345, $properties->getCustomPropertyValue('NumberProperty')); + // Note that Xlsx will ignore the time part when displaying + // the property. + $expected = self::timestampToInt('2023-05-18T10:00:00Z'); + self::assertEquals($expected, $properties->getCustomPropertyValue('DateProperty')); + $expected = self::timestampToInt('2023-05-19T11:00:00Z'); + self::assertEquals($expected, $properties->getCustomPropertyValue('DateProperty2')); + self::assertTrue($properties->getCustomPropertyValue('BooleanPropertyTrue')); + self::assertFalse($properties->getCustomPropertyValue('BooleanPropertyFalse')); + self::assertEqualsWithDelta(1.2345, $properties->getCustomPropertyValue('FloatProperty'), 1E-8); + + $sheet = $reloadedSpreadsheet->getActiveSheet(); + // Note that relative links don't actually work in XML format. + // It will, however, work just fine in the Xlsx and Html copies. + $hyperlink = $sheet->getCell('A1')->getHyperlink(); + self::assertSame('references/features-cross-reference/', $hyperlink->getUrl()); + // Same comment as for cell above. + self::assertSame('topics/accessing-cells/', $sheet->getCell('A2')->getCalculatedValue()); + // No problem for absolute links. + $hyperlink = $sheet->getCell('A3')->getHyperlink(); + self::assertSame('https://www.google.com/', $hyperlink->getUrl()); + self::assertSame('https://www.yahoo.com', $sheet->getCell('A4')->getCalculatedValue()); + + $reloadedSpreadsheet->disconnectWorksheets(); + } + + public function testPropertiesHtml(): void + { + $reader = new Xml(); + $spreadsheet = $reader->load($this->filename); + + $reloadedSpreadsheet = $this->writeAndReload($spreadsheet, 'Html'); + $spreadsheet->disconnectWorksheets(); + + $properties = $reloadedSpreadsheet->getProperties(); + self::assertSame('https://phpspreadsheet.readthedocs.io/en/latest/', $properties->getHyperlinkBase()); + + self::assertSame('title', $properties->getTitle()); + self::assertSame('topic', $properties->getSubject()); + self::assertSame('author', $properties->getCreator()); + self::assertSame('keyword1, keyword2', $properties->getKeywords()); + self::assertSame('no comment', $properties->getDescription()); + self::assertSame('last author', $properties->getLastModifiedBy()); + $expected = self::timestampToInt('2023-05-18T11:21:43Z'); + self::assertEquals($expected, $properties->getCreated()); + $expected = self::timestampToInt('2023-05-18T11:30:00Z'); + self::assertEquals($expected, $properties->getModified()); + self::assertSame('category', $properties->getCategory()); + self::assertSame('manager', $properties->getManager()); + self::assertSame('company', $properties->getCompany()); + + self::assertSame('TheString', $properties->getCustomPropertyValue('StringProperty')); + self::assertSame(12345, $properties->getCustomPropertyValue('NumberProperty')); + $expected = self::timestampToInt('2023-05-18T10:00:00Z'); + self::assertEquals($expected, $properties->getCustomPropertyValue('DateProperty')); + $expected = self::timestampToInt('2023-05-19T11:00:00Z'); + self::assertEquals($expected, $properties->getCustomPropertyValue('DateProperty2')); + self::assertTrue($properties->getCustomPropertyValue('BooleanPropertyTrue')); + self::assertFalse($properties->getCustomPropertyValue('BooleanPropertyFalse')); + self::assertEqualsWithDelta(1.2345, $properties->getCustomPropertyValue('FloatProperty'), 1E-8); + + $sheet = $reloadedSpreadsheet->getActiveSheet(); + // Note that relative links don't actually work in XML format. + // It will, however, work just fine in the Xlsx and Html copies. + $hyperlink = $sheet->getCell('A1')->getHyperlink(); + self::assertSame('references/features-cross-reference/', $hyperlink->getUrl()); + // Same comment as for cell above. + self::assertSame('topics/accessing-cells/', $sheet->getCell('A2')->getCalculatedValue()); + // No problem for absolute links. + $hyperlink = $sheet->getCell('A3')->getHyperlink(); + self::assertSame('https://www.google.com/', $hyperlink->getUrl()); + self::assertSame('https://www.yahoo.com', $sheet->getCell('A4')->getCalculatedValue()); + + $reloadedSpreadsheet->disconnectWorksheets(); + } + + public function testHyperlinksXls(): void + { + $reader = new Xml(); + $spreadsheet = $reader->load($this->filename); + + $reloadedSpreadsheet = $this->writeAndReload($spreadsheet, 'Xls'); + $spreadsheet->disconnectWorksheets(); + + $sheet = $reloadedSpreadsheet->getActiveSheet(); + // Note that relative links don't actually work in XML format. + // However, Xls Writer will convert relative to absolute. + $hyperlink = $sheet->getCell('A1')->getHyperlink(); + self::assertSame('https://phpspreadsheet.readthedocs.io/en/latest/references/features-cross-reference/', $hyperlink->getUrl()); + // Xls writer does not get involved in function call. + // However, hyperlink does get updated somewhere. + //self::assertSame('topics/accessing-cells/', $sheet->getCell('A2')->getCalculatedValue()); + $hyperlink = $sheet->getCell('A2')->getHyperlink(); + self::assertSame('https://phpspreadsheet.readthedocs.io/en/latest/topics/accessing-cells/', $hyperlink->getUrl()); + // No problem for absolute links. + $hyperlink = $sheet->getCell('A3')->getHyperlink(); + self::assertSame('https://www.google.com/', $hyperlink->getUrl()); + self::assertSame('https://www.yahoo.com', $sheet->getCell('A4')->getCalculatedValue()); + + $reloadedSpreadsheet->disconnectWorksheets(); + } + + private static function timestampToInt(string $timestamp): string + { + $dto = new DateTimeImmutable($timestamp); + + return $dto->format('U'); + } +} diff --git a/tests/data/Reader/Xml/hyperlinkbase.xml b/tests/data/Reader/Xml/hyperlinkbase.xml new file mode 100644 index 0000000000..e3448ee994 --- /dev/null +++ b/tests/data/Reader/Xml/hyperlinkbase.xml @@ -0,0 +1,91 @@ + + + + + title + topic + author + keyword1, keyword2 + no comment + last author + 2023-05-18T11:21:43Z + 2023-05-18T11:30:00Z + category + manager + company + https://phpspreadsheet.readthedocs.io/en/latest/ + 16.00 + + + TheString + 12345 + 2023-05-18T10:00:00Z + 2023-05-19T11:00:00Z + 1 + 0 + 1.2345 + + + + + + 6820 + 19200 + 32767 + 32767 + False + False + + + + + + + + + references/features-cross-reference/ + + + topics/accessing-cells/ + + + https://www.google.com + + + https://www.yahoo.com + +
+ + + + + + 3 + 3 + + + False + False + +
+