Skip to content

Commit

Permalink
Make HTML checks more strict
Browse files Browse the repository at this point in the history
  • Loading branch information
kamazee authored and PowerKiKi committed Nov 16, 2016
1 parent 928b592 commit 408da0c
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 6 deletions.
59 changes: 53 additions & 6 deletions src/PhpSpreadsheet/Reader/HTML.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@
/** PhpSpreadsheet root directory */
class HTML extends BaseReader implements IReader
{
/**
* Sample size to read to determine if it's HTML or not
*/
const TEST_SAMPLE_SIZE = 2048;

/**
* Input encoding
*
Expand Down Expand Up @@ -126,14 +131,56 @@ public function __construct()
*/
protected function isValidFormat()
{
// Reading 2048 bytes should be enough to validate that the format is HTML
$data = fread($this->fileHandle, 2048);
if ((strpos($data, '<') !== false) &&
(strlen($data) !== strlen(strip_tags($data)))) {
return true;
$beginning = $this->readBeginning();

if (!self::startsWithTag($beginning)) {
return false;
}

if (!self::containsTags($beginning)) {
return false;
}

if (!self::endsWithTag($this->readEnding())) {
return false;
}

return false;
return true;
}

private function readBeginning()
{
fseek($this->fileHandle, 0);

return fread($this->fileHandle, self::TEST_SAMPLE_SIZE);
}

private function readEnding()
{
$meta = stream_get_meta_data($this->fileHandle);
$filename = $meta['uri'];

$size = filesize($filename);
$blockSize = self::TEST_SAMPLE_SIZE;

fseek($this->fileHandle, $size - $blockSize);

return fread($this->fileHandle, $blockSize);
}

private static function startsWithTag($data)
{
return '<' === substr(trim($data), 0, 1);
}

private static function endsWithTag($data)
{
return '>' === substr(trim($data), -1, 1);
}

private static function containsTags($data)
{
return strlen($data) !== strlen(strip_tags($data));
}

/**
Expand Down
19 changes: 19 additions & 0 deletions tests/PhpSpreadsheetTests/Reader/HTMLTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?php

namespace PhpOffice\PhpSpreadsheetTests\Reader;

use PhpOffice\PhpSpreadsheet\Reader\HTML;

class HTMLTest extends \PHPUnit_Framework_TestCase
{
public function testCsvWithAngleBracket()
{
$filename = __DIR__ . '/../../data/Reader/HTML/csv_with_angle_bracket.csv';
$this->assertFalse($this->getInstance()->canRead($filename));
}

private function getInstance()
{
return new HTML();
}
}
1 change: 1 addition & 0 deletions tests/data/Reader/HTML/csv_with_angle_bracket.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Collection Name,Number of items with weight <= 50kg

0 comments on commit 408da0c

Please sign in to comment.