From 61c9370a36493f3eaa0b2109f556585a67cf3fb1 Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Wed, 9 Nov 2016 18:57:02 +0100 Subject: [PATCH] Use web-thumbnailer to retrieve thumbnails * requires PHP 5.6 * use blazy on linklist since a lot more thumbs are retrieved * thumbnails can be disabled * thumbs size is now 120x120 * thumbs are now cropped to fit the expected size Fixes #345 #425 #487 #543 #588 #590 --- application/PageBuilder.php | 3 + application/Thumbnailer.php | 48 +++ application/config/ConfigManager.php | 4 + composer.json | 5 +- inc/shaarli.css | 8 +- inc/web-thumbnailer.json | 8 + index.php | 498 ++++----------------------- tests/ThumbnailerTest.php | 44 +++ tpl/configure.html | 12 + tpl/linklist.html | 15 +- tpl/picwall.html | 6 +- 11 files changed, 217 insertions(+), 434 deletions(-) create mode 100644 application/Thumbnailer.php create mode 100644 inc/web-thumbnailer.json create mode 100644 tests/ThumbnailerTest.php diff --git a/application/PageBuilder.php b/application/PageBuilder.php index 32c7f9f18..52340889d 100644 --- a/application/PageBuilder.php +++ b/application/PageBuilder.php @@ -78,6 +78,9 @@ private function initialize() $this->tpl->assign('showatom', $this->conf->get('feed.show_atom', false)); $this->tpl->assign('hide_timestamps', $this->conf->get('privacy.hide_timestamps', false)); $this->tpl->assign('token', getToken($this->conf)); + $this->tpl->assign('thumbnails_enabled', $this->conf->get('thumbnails.enabled')); + $this->tpl->assign('thumbnails_width', $this->conf->get('thumbnails.width')); + $this->tpl->assign('thumbnails_height', $this->conf->get('thumbnails.height')); // To be removed with a proper theme configuration. $this->tpl->assign('conf', $this->conf); } diff --git a/application/Thumbnailer.php b/application/Thumbnailer.php new file mode 100644 index 000000000..5aa7bdc5d --- /dev/null +++ b/application/Thumbnailer.php @@ -0,0 +1,48 @@ +conf = $conf; + $this->wt = new WebThumbnailer(); + \WebThumbnailer\Application\ConfigManager::addFile('inc/web-thumbnailer.json'); + $this->wt->maxWidth($this->conf->get('thumbnails.width')) + ->maxHeight($this->conf->get('thumbnails.height')) + ->crop(true); + } + + /** + * Retrieve a thumbnail for given URL + * + * @param string $url where to look for a thumbnail. + * + * @return bool|string The thumbnail relative cache file path, or false if none has been found. + */ + public function get($url) + { + return $this->wt->thumbnail($url); + } +} diff --git a/application/config/ConfigManager.php b/application/config/ConfigManager.php index f5f753f8f..f6e236ce4 100644 --- a/application/config/ConfigManager.php +++ b/application/config/ConfigManager.php @@ -310,6 +310,10 @@ protected function setDefaultValues() $this->setEmpty('general.links_per_page', 20); $this->setEmpty('general.enabled_plugins', array('qrcode')); + $this->setEmpty('thumbnails.enabled', true); + $this->setEmpty('thumbnails.width', 120); + $this->setEmpty('thumbnails.height', 120); + $this->setEmpty('updates.check_updates', false); $this->setEmpty('updates.check_updates_branch', 'stable'); $this->setEmpty('updates.check_updates_interval', 86400); diff --git a/composer.json b/composer.json index f7d26a315..ffb4bb122 100644 --- a/composer.json +++ b/composer.json @@ -10,9 +10,10 @@ }, "keywords": ["bookmark", "link", "share", "web"], "require": { - "php": ">=5.3.4", + "php": ">=5.6", "shaarli/netscape-bookmark-parser": "1.*", - "erusev/parsedown": "1.6" + "erusev/parsedown": "1.6", + "arthurhoaro/web-thumbnailer": "dev-master" }, "require-dev": { "phpmd/phpmd" : "@stable", diff --git a/inc/shaarli.css b/inc/shaarli.css index 5808320cb..8f5550e7d 100644 --- a/inc/shaarli.css +++ b/inc/shaarli.css @@ -687,8 +687,8 @@ em { position: relative; display: table-cell; vertical-align: middle; - width: 90px; - height: 90px; + width: 120px; + height: 120px; overflow: hidden; text-align: center; float: left; @@ -725,9 +725,9 @@ em { position: absolute; top: 0; left: 0; - width: 90px; + width: 120px; font-weight: bold; - font-size: 8pt; + font-size: 9pt; color: #fff; text-align: left; background-color: transparent; diff --git a/inc/web-thumbnailer.json b/inc/web-thumbnailer.json new file mode 100644 index 000000000..57e3d6795 --- /dev/null +++ b/inc/web-thumbnailer.json @@ -0,0 +1,8 @@ +{ + "settings": { + "default": { + "_comment": "infinite cache", + "cache_duration": -1 + } + } +} \ No newline at end of file diff --git a/index.php b/index.php index 84282b8dc..050ef6d76 100644 --- a/index.php +++ b/index.php @@ -79,6 +79,7 @@ require_once 'application/PluginManager.php'; require_once 'application/Router.php'; require_once 'application/Updater.php'; +require_once 'application/Thumbnailer.php'; // Ensure the PHP version is supported try { @@ -824,20 +825,51 @@ function renderPage($conf, $pluginManager) // -------- Picture wall if ($targetPage == Router::$PAGE_PICWALL) { + if (! $conf->get('thumbnails.enabled')) { + header('Location: ?'); + exit; + } + // Optionally filter the results: $links = $LINKSDB->filterSearch($_GET); $linksToDisplay = array(); + $thumbnailer = new Thumbnailer($conf); + + + $cpt = 0; // Get only links which have a thumbnail. foreach($links as $link) { - $permalink='?'.escape(smallHash($link['linkdate'])); - $thumb=lazyThumbnail($conf, $link['url'],$permalink); - if ($thumb!='') // Only output links which have a thumbnail. - { - $link['thumbnail']=$thumb; // Thumbnail HTML code. - $linksToDisplay[]=$link; // Add to array. + // Not a note, + // and (never retrieved yet or no valid cache file) + if ($link['url'][0] != '?' + && (! isset($link['thumbnail']) || ($link['thumbnail'] !== false && ! is_file($link['thumbnail']))) + ) { + $link['thumbnail'] = $thumbnailer->get($link['url']); + // FIXME! we really need to get rid of ArrayAccess... + $item = $LINKSDB[$link['linkdate']]; + $item['thumbnail'] = $link['thumbnail']; + $LINKSDB[$link['linkdate']] = $item; + $updateDB = true; + $cpt++; } + + if (isset($link['thumbnail']) && $link['thumbnail'] !== false) { + $linksToDisplay[] = $link; // Add to array. + } + + // If we retrieved new thumbnails, we update the database every 20 links. + // Downloading everything the first time may take a very long time + if (!empty($updateDB) && $cpt == 20) { + $LINKSDB->save($conf->get('resource.page_cache')); + $updateDB = false; + $cpt = 0; + } + } + + if (!empty($updateDB)) { + $LINKSDB->save($conf->get('resource.page_cache')); } $data = array( @@ -1153,6 +1185,8 @@ function renderPage($conf, $pluginManager) $conf->set('feed.rss_permalinks', !empty($_POST['enableRssPermalinks'])); $conf->set('updates.check_updates', !empty($_POST['updateCheck'])); $conf->set('privacy.hide_public_links', !empty($_POST['hidePublicLinks'])); + $conf->set('thumbnails.enabled', !empty($_POST['enableThumbnails'])); + try { $conf->write(isLoggedIn()); } @@ -1273,13 +1307,18 @@ function renderPage($conf, $pluginManager) 'private' => (isset($_POST['lf_private']) ? 1 : 0), 'linkdate' => $linkdate, 'updated' => $updated, - 'tags' => str_replace(',', ' ', $tags) + 'tags' => str_replace(',', ' ', $tags), ); // If title is empty, use the URL as title. if ($link['title'] == '') { $link['title'] = $link['url']; } + if ($conf->get('thumbnails.enabled')) { + $thumbnailer = new Thumbnailer($conf); + $link['thumbnail'] = $thumbnailer->get($url); + } + $pluginManager->executeHooks('save_link', $link); $LINKSDB[$linkdate] = $link; @@ -1589,7 +1628,7 @@ function($a, $b) { return $a['order'] - $b['order']; } * @param ConfigManager $conf Configuration Manager instance. * @param PluginManager $pluginManager Plugin Manager instance. */ -function buildLinkList($PAGE,$LINKSDB, $conf, $pluginManager) +function buildLinkList($PAGE, $LINKSDB, $conf, $pluginManager) { // Used in templates $searchtags = !empty($_GET['searchtags']) ? escape($_GET['searchtags']) : ''; @@ -1616,8 +1655,6 @@ function buildLinkList($PAGE,$LINKSDB, $conf, $pluginManager) $keys[] = $key; } - - // Select articles according to paging. $pagecount = ceil(count($keys) / $_SESSION['LINKS_PER_PAGE']); $pagecount = $pagecount == 0 ? 1 : $pagecount; @@ -1627,6 +1664,11 @@ function buildLinkList($PAGE,$LINKSDB, $conf, $pluginManager) // Start index. $i = ($page-1) * $_SESSION['LINKS_PER_PAGE']; $end = $i + $_SESSION['LINKS_PER_PAGE']; + + if ($conf->get('thumbnails.enabled')) { + $thumbnailer = new Thumbnailer($conf); + } + $linkDisp = array(); while ($i<$end && $iget('thumbnails.enabled') && $link['url'][0] != '?' + && (! isset($link['thumbnail']) || ($link['thumbnail'] !== false && ! is_file($link['thumbnail']))) + ) { + $link['thumbnail'] = $thumbnailer->get($link['url']); + // FIXME! we really need to get rid of ArrayAccess... + $item = $LINKSDB[$keys[$i]]; + $item['thumbnail'] = $link['thumbnail']; + $LINKSDB[$keys[$i]] = $item; + $updateDB = true; + } + // Check for both signs of a note: starting with ? and 7 chars long. - if ($link['url'][0] === '?' && - strlen($link['url']) === 7) { + if ($link['url'][0] === '?' && strlen($link['url']) === 7) { $link['url'] = index_url($_SERVER) . $link['url']; } @@ -1656,6 +1711,11 @@ function buildLinkList($PAGE,$LINKSDB, $conf, $pluginManager) $i++; } + // If we retrieved new thumbnails, we update the database. + if (!empty($updateDB)) { + $LINKSDB->save($conf->get('resource.page_cache')); + } + // Compute paging navigation $searchtagsUrl = empty($searchtags) ? '' : '&searchtags=' . urlencode($searchtags); $searchtermUrl = empty($searchterm) ? '' : '&searchterm=' . urlencode($searchterm); @@ -1696,194 +1756,6 @@ function buildLinkList($PAGE,$LINKSDB, $conf, $pluginManager) return; } -/** - * Compute the thumbnail for a link. - * - * With a link to the original URL. - * Understands various services (youtube.com...) - * Input: $url = URL for which the thumbnail must be found. - * $href = if provided, this URL will be followed instead of $url - * Returns an associative array with thumbnail attributes (src,href,width,height,style,alt) - * Some of them may be missing. - * Return an empty array if no thumbnail available. - * - * @param ConfigManager $conf Configuration Manager instance. - * @param string $url - * @param string|bool $href - * - * @return array - */ -function computeThumbnail($conf, $url, $href = false) -{ - if (!$conf->get('thumbnail.enable_thumbnails')) return array(); - if ($href==false) $href=$url; - - // For most hosts, the URL of the thumbnail can be easily deduced from the URL of the link. - // (e.g. http://www.youtube.com/watch?v=spVypYk4kto ---> http://img.youtube.com/vi/spVypYk4kto/default.jpg ) - // ^^^^^^^^^^^ ^^^^^^^^^^^ - $domain = parse_url($url,PHP_URL_HOST); - if ($domain=='youtube.com' || $domain=='www.youtube.com') - { - parse_str(parse_url($url,PHP_URL_QUERY), $params); // Extract video ID and get thumbnail - if (!empty($params['v'])) return array('src'=>'https://img.youtube.com/vi/'.$params['v'].'/default.jpg', - 'href'=>$href,'width'=>'120','height'=>'90','alt'=>'YouTube thumbnail'); - } - if ($domain=='youtu.be') // Youtube short links - { - $path = parse_url($url,PHP_URL_PATH); - return array('src'=>'https://img.youtube.com/vi'.$path.'/default.jpg', - 'href'=>$href,'width'=>'120','height'=>'90','alt'=>'YouTube thumbnail'); - } - if ($domain=='pix.toile-libre.org') // pix.toile-libre.org image hosting - { - parse_str(parse_url($url,PHP_URL_QUERY), $params); // Extract image filename. - if (!empty($params) && !empty($params['img'])) return array('src'=>'http://pix.toile-libre.org/upload/thumb/'.urlencode($params['img']), - 'href'=>$href,'style'=>'max-width:120px; max-height:150px','alt'=>'pix.toile-libre.org thumbnail'); - } - - if ($domain=='imgur.com') - { - $path = parse_url($url,PHP_URL_PATH); - if (startsWith($path,'/a/')) return array(); // Thumbnails for albums are not available. - if (startsWith($path,'/r/')) return array('src'=>'https://i.imgur.com/'.basename($path).'s.jpg', - 'href'=>$href,'width'=>'90','height'=>'90','alt'=>'imgur.com thumbnail'); - if (startsWith($path,'/gallery/')) return array('src'=>'https://i.imgur.com'.substr($path,8).'s.jpg', - 'href'=>$href,'width'=>'90','height'=>'90','alt'=>'imgur.com thumbnail'); - - if (substr_count($path,'/')==1) return array('src'=>'https://i.imgur.com/'.substr($path,1).'s.jpg', - 'href'=>$href,'width'=>'90','height'=>'90','alt'=>'imgur.com thumbnail'); - } - if ($domain=='i.imgur.com') - { - $pi = pathinfo(parse_url($url,PHP_URL_PATH)); - if (!empty($pi['filename'])) return array('src'=>'https://i.imgur.com/'.$pi['filename'].'s.jpg', - 'href'=>$href,'width'=>'90','height'=>'90','alt'=>'imgur.com thumbnail'); - } - if ($domain=='dailymotion.com' || $domain=='www.dailymotion.com') - { - if (strpos($url,'dailymotion.com/video/')!==false) - { - $thumburl=str_replace('dailymotion.com/video/','dailymotion.com/thumbnail/video/',$url); - return array('src'=>$thumburl, - 'href'=>$href,'width'=>'120','style'=>'height:auto;','alt'=>'DailyMotion thumbnail'); - } - } - if (endsWith($domain,'.imageshack.us')) - { - $ext=strtolower(pathinfo($url,PATHINFO_EXTENSION)); - if ($ext=='jpg' || $ext=='jpeg' || $ext=='png' || $ext=='gif') - { - $thumburl = substr($url,0,strlen($url)-strlen($ext)).'th.'.$ext; - return array('src'=>$thumburl, - 'href'=>$href,'width'=>'120','style'=>'height:auto;','alt'=>'imageshack.us thumbnail'); - } - } - - // Some other hosts are SLOW AS HELL and usually require an extra HTTP request to get the thumbnail URL. - // So we deport the thumbnail generation in order not to slow down page generation - // (and we also cache the thumbnail) - - if (! $conf->get('thumbnail.enable_localcache')) return array(); // If local cache is disabled, no thumbnails for services which require the use a local cache. - - if ($domain=='flickr.com' || endsWith($domain,'.flickr.com') - || $domain=='vimeo.com' - || $domain=='ted.com' || endsWith($domain,'.ted.com') - || $domain=='xkcd.com' || endsWith($domain,'.xkcd.com') - ) - { - if ($domain=='vimeo.com') - { // Make sure this vimeo URL points to a video (/xxx... where xxx is numeric) - $path = parse_url($url,PHP_URL_PATH); - if (!preg_match('!/\d+.+?!',$path)) return array(); // This is not a single video URL. - } - if ($domain=='xkcd.com' || endsWith($domain,'.xkcd.com')) - { // Make sure this URL points to a single comic (/xxx... where xxx is numeric) - $path = parse_url($url,PHP_URL_PATH); - if (!preg_match('!/\d+.+?!',$path)) return array(); - } - if ($domain=='ted.com' || endsWith($domain,'.ted.com')) - { // Make sure this TED URL points to a video (/talks/...) - $path = parse_url($url,PHP_URL_PATH); - if ("/talks/" !== substr($path,0,7)) return array(); // This is not a single video URL. - } - $sign = hash_hmac('sha256', $url, $conf->get('credentials.salt')); // We use the salt to sign data (it's random, secret, and specific to each installation) - return array('src'=>index_url($_SERVER).'?do=genthumbnail&hmac='.$sign.'&url='.urlencode($url), - 'href'=>$href,'width'=>'120','style'=>'height:auto;','alt'=>'thumbnail'); - } - - // For all other, we try to make a thumbnail of links ending with .jpg/jpeg/png/gif - // Technically speaking, we should download ALL links and check their Content-Type to see if they are images. - // But using the extension will do. - $ext=strtolower(pathinfo($url,PATHINFO_EXTENSION)); - if ($ext=='jpg' || $ext=='jpeg' || $ext=='png' || $ext=='gif') - { - $sign = hash_hmac('sha256', $url, $conf->get('credentials.salt')); // We use the salt to sign data (it's random, secret, and specific to each installation) - return array('src'=>index_url($_SERVER).'?do=genthumbnail&hmac='.$sign.'&url='.urlencode($url), - 'href'=>$href,'width'=>'120','style'=>'height:auto;','alt'=>'thumbnail'); - } - return array(); // No thumbnail. - -} - - -// Returns the HTML code to display a thumbnail for a link -// with a link to the original URL. -// Understands various services (youtube.com...) -// Input: $url = URL for which the thumbnail must be found. -// $href = if provided, this URL will be followed instead of $url -// Returns '' if no thumbnail available. -function thumbnail($url,$href=false) -{ - // FIXME! - global $conf; - $t = computeThumbnail($conf, $url,$href); - if (count($t)==0) return ''; // Empty array = no thumbnail for this URL. - - $html=''; - - // Lazy image - $html.='get('credentials.salt')); - if ($sign!=$_GET['hmac']) die('Naughty boy!'); - - $cacheDir = $conf->get('resource.thumbnails_cache', 'cache'); - // Let's see if we don't already have the image for this URL in the cache. - $thumbname=hash('sha1',$_GET['url']).'.jpg'; - if (is_file($cacheDir .'/'. $thumbname)) - { // We have the thumbnail, just serve it: - header('Content-Type: image/jpeg'); - echo file_get_contents($cacheDir .'/'. $thumbname); - return; - } - // We may also serve a blank image (if service did not respond) - $blankname=hash('sha1',$_GET['url']).'.gif'; - if (is_file($cacheDir .'/'. $blankname)) - { - header('Content-Type: image/gif'); - echo file_get_contents($cacheDir .'/'. $blankname); - return; - } - - // Otherwise, generate the thumbnail. - $url = $_GET['url']; - $domain = parse_url($url,PHP_URL_HOST); - - if ($domain=='flickr.com' || endsWith($domain,'.flickr.com')) - { - // Crude replacement to handle new flickr domain policy (They prefer www. now) - $url = str_replace('http://flickr.com/','http://www.flickr.com/',$url); - - // Is this a link to an image, or to a flickr page ? - $imageurl=''; - if (endsWith(parse_url($url, PHP_URL_PATH), '.jpg')) - { // This is a direct link to an image. e.g. http://farm1.staticflickr.com/5/5921913_ac83ed27bd_o.jpg - preg_match('!(http://farm\d+\.staticflickr\.com/\d+/\d+_\w+_)\w.jpg!',$url,$matches); - if (!empty($matches[1])) $imageurl=$matches[1].'m.jpg'; - } - else // This is a flickr page (html) - { - // Get the flickr html page. - list($headers, $content) = get_http_response($url, 20); - if (strpos($headers[0], '200 OK') !== false) - { - // flickr now nicely provides the URL of the thumbnail in each flickr page. - preg_match('! - if ($imageurl=='') - { - preg_match('! tag on that page - // http://www.ted.com/talks/mikko_hypponen_fighting_viruses_defending_the_net.html - // - list($headers, $content) = get_http_response($url, 5); - if (strpos($headers[0], '200 OK') !== false) { - // Extract the link to the thumbnail - preg_match('!link rel="image_src" href="(http://images.ted.com/images/ted/.+_\d+x\d+\.jpg)"!', $content, $matches); - if (!empty($matches[1])) - { // Let's download the image. - $imageurl=$matches[1]; - // No control on image size, so wait long enough - list($headers, $content) = get_http_response($imageurl, 20); - if (strpos($headers[0], '200 OK') !== false) { - $filepath = $cacheDir .'/'. $thumbname; - file_put_contents($filepath, $content); // Save image to cache. - if (resizeImage($filepath)) - { - header('Content-Type: image/jpeg'); - echo file_get_contents($filepath); - return; - } - } - } - } - } - - elseif ($domain=='xkcd.com' || endsWith($domain,'.xkcd.com')) - { - // There is no thumbnail available for xkcd comics, so download the whole image and resize it. - // http://xkcd.com/327/ - // <BLABLA> - list($headers, $content) = get_http_response($url, 5); - if (strpos($headers[0], '200 OK') !== false) { - // Extract the link to the thumbnail - preg_match('! Notify me when a new release is ready + + Enable thumbnails + + + + + diff --git a/tpl/linklist.html b/tpl/linklist.html index ddfd729ac..93f71f2aa 100644 --- a/tpl/linklist.html +++ b/tpl/linklist.html @@ -66,7 +66,16 @@ {loop="$links"} -
{$value.url|thumbnail}
+ {if="$thumbnails_enabled && !empty($value.thumbnail)"} + + {/if}