Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use HtmlString to distinguish between HTML and plain text strings #1368

Merged
merged 1 commit into from
Feb 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

### Customization changes
- Custom spout parameter declarations should now use constants from `Parameter` interface. ([#1409](https://github.com/fossar/selfoss/pull/1409))
- Custom spouts are expected to pass `HtmlString` object to items’ title and content. ([#1368](https://github.com/fossar/selfoss/pull/1368))

### Other changes
- `tidy` PHP extension is now required if you want to use “Content extractor” spout. ([#1392](https://github.com/fossar/selfoss/pull/1392))
Expand Down
4 changes: 2 additions & 2 deletions docs/api-description.json
Original file line number Diff line number Diff line change
Expand Up @@ -951,12 +951,12 @@
"example": "2013-04-07T13:43:00+01:00"
},
"title": {
"description": "the title of the article",
"description": "The title of the article, can contain HTML tags",
"type": "string",
"example": "FTTH: Google Fiber für eine neue Großstadt"
},
"content": {
"description": "the full content of the article",
"description": "The full content of the article as a HTML fragment.",
"type": "string",
"example": "\n<p>Das 1-GBit/s-Angebot Google Fiber kommt nach Austin, die Hauptstadt des US-Bundesstaates Texas..."
},
Expand Down
9 changes: 6 additions & 3 deletions src/daos/mysql/Items.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
use DateTime;
use DateTimeImmutable;
use helpers\Configuration;
use helpers\HtmlString;
use Monolog\Logger;

/**
Expand Down Expand Up @@ -103,6 +104,8 @@ public function unstarr(int $id): void {

/**
* add new item
*
* @param array{datetime: \DateTimeInterface, title: HtmlString, content: HtmlString, thumbnail: ?string, icon: ?string, source: int, uid: string, link: string, author: ?string} $values
*/
public function add(array $values): void {
$this->database->exec('INSERT INTO ' . $this->configuration->dbPrefix . 'items (
Expand Down Expand Up @@ -131,9 +134,9 @@ public function add(array $values): void {
:author
)',
[
':datetime' => $values['datetime'],
':title' => $values['title'],
':content' => $values['content'],
':datetime' => $values['datetime']->format('Y-m-d H:i:s'),
':title' => $values['title']->getRaw(),
':content' => $values['content']->getRaw(),
':thumbnail' => $values['thumbnail'],
':icon' => $values['icon'],
':unread' => 1,
Expand Down
39 changes: 20 additions & 19 deletions src/helpers/ContentLoader.php
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,10 @@ public function fetch($source): void {
$sourceIconUrl = null;
$itemsSeen = [];
foreach ($items as $item) {
$titlePlainText = $item->getTitle()->getPlainText();
// item already in database?
if (isset($itemsFound[$item->getId()])) {
$this->logger->debug('item "' . $item->getTitle() . '" already in database.');
$this->logger->debug('item "' . $titlePlainText . '" already in database.');
$itemsSeen[] = $itemsFound[$item->getId()];
continue;
}
Expand All @@ -163,7 +164,7 @@ public function fetch($source): void {
$itemDate = new \DateTimeImmutable();
}
if ($itemDate < $minDate) {
$this->logger->debug('item "' . $item->getTitle() . '" (' . $itemDate->format(\DateTime::ATOM) . ') older than ' . $this->configuration->itemsLifetime . ' days');
$this->logger->debug('item "' . $titlePlainText . '" (' . $itemDate->format(\DateTime::ATOM) . ') older than ' . $this->configuration->itemsLifetime . ' days');
continue;
}

Expand All @@ -174,7 +175,7 @@ public function fetch($source): void {
}

// insert new item
$this->logger->debug('start insertion of new item "' . $item->getTitle() . '"');
$this->logger->debug('start insertion of new item "' . $titlePlainText . '"');

try {
// fetch content
Expand All @@ -184,14 +185,14 @@ public function fetch($source): void {
$content = $this->sanitizeContent($content);
} catch (\Throwable $e) {
$content = 'Error: Content not fetched. Reason: ' . $e->getMessage();
$this->logger->error('Can not fetch "' . $item->getTitle() . '"', ['exception' => $e]);
$this->logger->error('Can not fetch "' . $titlePlainText . '"', ['exception' => $e]);
}

// sanitize title
$title = trim($this->sanitizeField($item->getTitle()));
$title = HtmlString::fromRaw(trim($this->sanitizeField($item->getTitle())->getRaw()));

// Check sanitized title against filter
if ($this->filter($source, $title, $content) === false) {
if ($this->filter($source, $title->getRaw(), $content->getRaw()) === false) {
continue;
}

Expand All @@ -201,7 +202,7 @@ public function fetch($source): void {
'title' => $title,
'content' => $content,
'source' => $source['id'],
'datetime' => $itemDate->format('Y-m-d H:i:s'),
'datetime' => $itemDate,
'uid' => $item->getId(),
'link' => htmLawed($item->getLink(), ['deny_attribute' => '*', 'elements' => '-*']),
'author' => $item->getAuthor(),
Expand Down Expand Up @@ -313,13 +314,13 @@ protected function filter($source, string $title, string $content): bool {
/**
* Sanitize content for preventing XSS attacks.
*
* @param string $content content of the given feed
* @param HtmlString $content content of the given feed
*
* @return mixed|string sanitized content
* @return HtmlString sanitized content
*/
protected function sanitizeContent(string $content) {
return htmLawed(
$content,
protected function sanitizeContent(HtmlString $content): HtmlString {
return HtmlString::fromRaw(htmLawed(
$content->getRaw(),
[
'safe' => 1,
'deny_attribute' => '* -alt -title -src -href -target',
Expand All @@ -329,24 +330,24 @@ protected function sanitizeContent(string $content) {
'elements' => 'div,p,ul,li,a,img,dl,dt,dd,h1,h2,h3,h4,h5,h6,ol,br,table,tr,td,blockquote,pre,ins,del,th,thead,tbody,b,i,strong,em,tt,sub,sup,s,strike,code',
],
'img=width, height'
);
));
}

/**
* Sanitize a simple field
*
* @param string $value content of the given field
* @param HtmlString $value content of the given field
*
* @return mixed|string sanitized content
* @return HtmlString sanitized content
*/
protected function sanitizeField(string $value) {
return htmLawed(
$value,
protected function sanitizeField(HtmlString $value): HtmlString {
return HtmlString::fromRaw(htmLawed(
$value->getRaw(),
[
'deny_attribute' => '* -href -title -target',
'elements' => 'a,br,ins,del,b,i,strong,em,tt,sub,sup,s,code',
]
);
));
}

/**
Expand Down
45 changes: 45 additions & 0 deletions src/helpers/HtmlString.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
<?php

declare(strict_types=1);

namespace helpers;

/**
* A string wrapper representing a HTML fragment.
*/
class HtmlString {
/** @var string */
private $content;

private function __construct(string $content) {
$this->content = $content;
}

/**
* Creates a new HtmlString from a string containing a HTML fragment.
*/
public static function fromRaw(string $content): self {
return new self($content);
}

/**
* Creates a new HtmlString from a plain text string.
*/
public static function fromPlainText(string $content): self {
return new self(htmlspecialchars($content));
}

/**
* Returns a HTML fragment represented by the object.
*/
public function getRaw(): string {
return $this->content;
}

/**
* Returns a plain text without any HTML tags.
*/
public function getPlainText(): string {
return htmlspecialchars_decode(strip_tags($this->content));
}
}
2 changes: 1 addition & 1 deletion src/helpers/ImageUtils.php
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ public static function findFirstImageSource(string $html): ?string {
if (stripos($html, '<img') !== false) {
$imgsrc_regex = '#<\s*img [^\>]*src\s*=\s*(["\'])(.*?)\1#im';
if (preg_match($imgsrc_regex, $html, $matches)) {
return $matches[2];
return htmlspecialchars_decode($matches[2]);
} else {
return null;
}
Expand Down
17 changes: 9 additions & 8 deletions src/spouts/Item.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
namespace spouts;

use DateTimeInterface;
use helpers\HtmlString;

/**
* Value object representing a source item (e.g. an article).
Expand All @@ -15,10 +16,10 @@ class Item {
/** @var string an unique id for this item */
private $id;

/** @var string title */
/** @var HtmlString title */
private $title;

/** @var string content */
/** @var HtmlString content */
private $content;

/** @var ?string thumbnail */
Expand All @@ -44,8 +45,8 @@ class Item {
*/
public function __construct(
string $id,
string $title,
string $content,
HtmlString $title,
HtmlString $content,
?string $thumbnail,
?string $icon,
string $link,
Expand Down Expand Up @@ -89,14 +90,14 @@ public function withId(string $id): self {
* If the spout allows HTML in the title, HTML special chars are expected to be decoded by the spout
* (for instance when the spout feed is XML).
*/
public function getTitle(): string {
public function getTitle(): HtmlString {
return $this->title;
}

/**
* @return static
*/
public function withTitle(string $title): self {
public function withTitle(HtmlString $title): self {
$modified = clone $this;
$modified->title = $title;

Expand All @@ -109,14 +110,14 @@ public function withTitle(string $title): self {
* HTML special chars are expected to be decoded by the spout
* (for instance when the spout feed is XML).
*/
public function getContent(): string {
public function getContent(): HtmlString {
return $this->content;
}

/**
* @return static
*/
public function withContent(string $content): self {
public function withContent(HtmlString $content): self {
$modified = clone $this;
$modified->content = $content;

Expand Down
13 changes: 7 additions & 6 deletions src/spouts/facebook/page.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
namespace spouts\facebook;

use GuzzleHttp\Psr7\Uri;
use helpers\HtmlString;
use helpers\WebClient;
use spouts\Item;
use spouts\Parameter;
Expand Down Expand Up @@ -107,7 +108,7 @@ public function getIcon(): ?string {
public function getItems(): iterable {
foreach ($this->items as $item) {
$id = $item['id'];
$title = mb_strlen($item['message']) > 80 ? mb_substr($item['message'], 0, 100) . '…' : $item['message'];
$title = HtmlString::fromPlainText((mb_strlen($item['message']) > 80 ? mb_substr($item['message'], 0, 100) . '…' : $item['message']));
$content = $this->getPostContent($item);
$thumbnail = null;
$icon = null;
Expand All @@ -131,25 +132,25 @@ public function getItems(): iterable {
}
}

private function getPostContent(array $item): string {
$message = $item['message'];
private function getPostContent(array $item): HtmlString {
$message = htmlspecialchars($item['message']);

if (isset($item['attachments']) && count($item['attachments']['data']) > 0) {
foreach ($item['attachments']['data'] as $media) {
if ($media['type'] === 'photo') {
$message .= '<figure>' . PHP_EOL;
$message .= '<a href="' . $media['target']['url'] . '"><img src="' . $media['media']['image']['src'] . '" alt=""></a>' . PHP_EOL;
$message .= '<a href="' . htmlspecialchars($media['target']['url'], ENT_QUOTES) . '"><img src="' . htmlspecialchars($media['media']['image']['src'], ENT_QUOTES) . '" alt=""></a>' . PHP_EOL;

// Some photos will have the same description, no need to display it twice
if ($media['description'] !== $item['message']) {
$message .= '<figcaption>' . $media['description'] . '</figcaption>' . PHP_EOL;
$message .= '<figcaption>' . htmlspecialchars($media['description']) . '</figcaption>' . PHP_EOL;
}

$message .= '</figure>' . PHP_EOL;
}
}
}

return $message;
return HtmlString::fromRaw($message);
}
}
5 changes: 3 additions & 2 deletions src/spouts/github/commits.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

namespace spouts\github;

use helpers\HtmlString;
use helpers\WebClient;
use spouts\Item;
use spouts\Parameter;
Expand Down Expand Up @@ -107,8 +108,8 @@ public function getItems(): iterable {
$message = $item['commit']['message'];

$id = $item['sha'];
$title = htmlspecialchars(self::cutTitle($message));
$content = nl2br(htmlspecialchars($message), false);
$title = HtmlString::fromPlainText(self::cutTitle($message));
$content = HtmlString::fromRaw(nl2br(htmlspecialchars($message), false));
$thumbnail = null;
$icon = null;
$link = $item['html_url'];
Expand Down
Loading