Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Markdown link rule-dess #4356

Merged
merged 4 commits into from
Jan 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions crates/api_common/src/post.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,13 +247,23 @@ pub struct GetSiteMetadataResponse {
#[cfg_attr(feature = "full", ts(export))]
/// Site metadata, from its opengraph tags.
pub struct LinkMetadata {
#[serde(flatten)]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should work okay without any breaking changes.

pub opengraph_data: OpenGraphData,
pub content_type: Option<String>,
#[serde(skip)]
pub thumbnail: Option<DbUrl>,
}

#[skip_serializing_none]
#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, Clone, Default)]
#[cfg_attr(feature = "full", derive(TS))]
#[cfg_attr(feature = "full", ts(export))]
/// Site metadata, from its opengraph tags.
pub struct OpenGraphData {
pub title: Option<String>,
pub description: Option<String>,
pub(crate) image: Option<DbUrl>,
pub embed_video_url: Option<DbUrl>,
pub content_type: Option<String>,
#[serde(skip)]
pub thumbnail: Option<DbUrl>,
}

#[skip_serializing_none]
Expand Down
86 changes: 52 additions & 34 deletions crates/api_common/src/request.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
use crate::{context::LemmyContext, post::LinkMetadata, utils::proxy_image_link};
use crate::{
context::LemmyContext,
post::{LinkMetadata, OpenGraphData},
utils::proxy_image_link,
};
use encoding::{all::encodings, DecoderTrap};
use lemmy_db_schema::newtypes::DbUrl;
use lemmy_utils::{
error::{LemmyError, LemmyErrorType},
settings::structs::Settings,
Expand Down Expand Up @@ -43,29 +48,28 @@ pub async fn fetch_link_metadata(
.get(CONTENT_TYPE)
.and_then(|h| h.to_str().ok())
.and_then(|h| h.parse().ok());
let is_image = content_type.as_ref().unwrap_or(&mime::TEXT_PLAIN).type_() == mime::IMAGE;

// Can't use .text() here, because it only checks the content header, not the actual bytes
// https://github.com/LemmyNet/lemmy/issues/1964
let html_bytes = response.bytes().await.map_err(LemmyError::from)?.to_vec();

let mut metadata = extract_opengraph_data(&html_bytes, url).unwrap_or_default();
let opengraph_data = extract_opengraph_data(&html_bytes, url).unwrap_or_default();
let thumbnail = extract_thumbnail_from_opengraph_data(
url,
&opengraph_data,
&content_type,
generate_thumbnail,
context,
)
.await;

metadata.content_type = content_type.map(|c| c.to_string());
if generate_thumbnail && is_image {
let image_url = metadata
.image
.as_ref()
.map(lemmy_db_schema::newtypes::DbUrl::inner)
.unwrap_or(url);
metadata.thumbnail = generate_pictrs_thumbnail(image_url, context)
.await
.ok()
.map(Into::into);
}

Ok(metadata)
Ok(LinkMetadata {
opengraph_data,
content_type: content_type.map(|c| c.to_string()),
thumbnail,
})
}

#[tracing::instrument(skip_all)]
pub async fn fetch_link_metadata_opt(
url: Option<&Url>,
Expand All @@ -81,7 +85,7 @@ pub async fn fetch_link_metadata_opt(
}

/// Extract site metadata from HTML Opengraph attributes.
fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> Result<LinkMetadata, LemmyError> {
fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> Result<OpenGraphData, LemmyError> {
let html = String::from_utf8_lossy(html_bytes);

// Make sure the first line is doctype html
Expand Down Expand Up @@ -137,16 +141,38 @@ fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> Result<LinkMetadata,
// join also works if the target URL is absolute
.and_then(|v| url.join(&v.url).ok());

Ok(LinkMetadata {
Ok(OpenGraphData {
title: og_title.or(page_title),
description: og_description.or(page_description),
image: og_image.map(Into::into),
embed_video_url: og_embed_url.map(Into::into),
content_type: None,
thumbnail: None,
})
}

#[tracing::instrument(skip_all)]
pub async fn extract_thumbnail_from_opengraph_data(
url: &Url,
opengraph_data: &OpenGraphData,
content_type: &Option<Mime>,
generate_thumbnail: bool,
context: &LemmyContext,
) -> Option<DbUrl> {
let is_image = content_type.as_ref().unwrap_or(&mime::TEXT_PLAIN).type_() == mime::IMAGE;
if generate_thumbnail && is_image {
let image_url = opengraph_data
.image
.as_ref()
.map(lemmy_db_schema::newtypes::DbUrl::inner)
.unwrap_or(url);
generate_pictrs_thumbnail(image_url, context)
.await
.ok()
.map(Into::into)
} else {
None
}
}

#[derive(Deserialize, Debug)]
struct PictrsResponse {
files: Vec<PictrsFile>,
Expand Down Expand Up @@ -233,15 +259,7 @@ async fn generate_pictrs_thumbnail(
let pictrs_config = context.settings().pictrs_config()?;

if !pictrs_config.cache_external_link_previews {
return Ok(
proxy_image_link(
image_url.clone(),
context.settings().pictrs_config()?.image_proxy,
context,
)
.await?
.into(),
);
return Ok(proxy_image_link(image_url.clone(), context).await?.into());
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I commented in the other PR, but this makes it seem like we need to get rid of one of these two pictrs settings bools, since they do the same thing.

}

// fetch remote non-pictrs images for persistent thumbnail link
Expand Down Expand Up @@ -314,21 +332,21 @@ mod tests {
.unwrap();
assert_eq!(
Some("FAQ · Wiki · IzzyOnDroid / repo · GitLab".to_string()),
sample_res.title
sample_res.opengraph_data.title
);
assert_eq!(
Some("The F-Droid compatible repo at https://apt.izzysoft.de/fdroid/".to_string()),
sample_res.description
sample_res.opengraph_data.description
);
assert_eq!(
Some(
Url::parse("https://gitlab.com/uploads/-/system/project/avatar/4877469/iod_logo.png")
.unwrap()
.into()
),
sample_res.image
sample_res.opengraph_data.image
);
assert_eq!(None, sample_res.embed_video_url);
assert_eq!(None, sample_res.opengraph_data.embed_video_url);
assert_eq!(
Some(mime::TEXT_HTML_UTF_8.to_string()),
sample_res.content_type
Expand Down
49 changes: 23 additions & 26 deletions crates/api_common/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -866,28 +866,33 @@ pub async fn process_markdown_opt(
}
}

/// Rewrite a link to go through `/api/v3/image_proxy` endpoint. This is only for remote urls and
/// if image_proxy setting is enabled.
/// A wrapper for `proxy_image_link` for use in tests.
///
/// The parameter `image_proxy` is the config value of `pictrs.image_proxy`. Its necessary to pass
/// The parameter `force_image_proxy` is the config value of `pictrs.image_proxy`. Its necessary to pass
/// as separate parameter so it can be changed in tests.
pub(crate) async fn proxy_image_link(
pub(crate) async fn proxy_image_link_wrapper(
link: Url,
image_proxy: bool,
force_image_proxy: bool,
context: &LemmyContext,
) -> LemmyResult<DbUrl> {
// Dont rewrite links pointing to local domain.
if link.domain() == Some(&context.settings().hostname) || !image_proxy {
return Ok(link.into());
if link.domain() == Some(&context.settings().hostname) || !force_image_proxy {
Ok(link.into())
} else {
let proxied = format!(
"{}/api/v3/image_proxy?url={}",
context.settings().get_protocol_and_hostname(),
encode(link.as_str())
);
RemoteImage::create(&mut context.pool(), vec![link]).await?;
Ok(Url::parse(&proxied)?.into())
}
}

let proxied = format!(
"{}/api/v3/image_proxy?url={}",
context.settings().get_protocol_and_hostname(),
encode(link.as_str())
);
RemoteImage::create(&mut context.pool(), vec![link]).await?;
Ok(Url::parse(&proxied)?.into())
/// Rewrite a link to go through `/api/v3/image_proxy` endpoint. This is only for remote urls and
/// if image_proxy setting is enabled.
pub(crate) async fn proxy_image_link(link: Url, context: &LemmyContext) -> LemmyResult<DbUrl> {
proxy_image_link_wrapper(link, false, context).await
}

pub async fn proxy_image_link_opt_api(
Expand All @@ -910,13 +915,7 @@ pub async fn proxy_image_link_api(
None => None,
};
if let Some(l) = link {
proxy_image_link(
l.into(),
context.settings().pictrs_config()?.image_proxy,
context,
)
.await
.map(Some)
proxy_image_link(l.into(), context).await.map(Some)
} else {
Ok(link)
}
Expand All @@ -927,9 +926,7 @@ pub async fn proxy_image_link_opt_apub(
context: &LemmyContext,
) -> LemmyResult<Option<DbUrl>> {
if let Some(l) = link {
proxy_image_link(l, context.settings().pictrs_config()?.image_proxy, context)
.await
.map(Some)
proxy_image_link(l, context).await.map(Some)
} else {
Ok(None)
}
Expand Down Expand Up @@ -991,14 +988,14 @@ mod tests {

// image from local domain is unchanged
let local_url = Url::parse("http://lemmy-alpha/image.png").unwrap();
let proxied = proxy_image_link(local_url.clone(), true, &context)
let proxied = proxy_image_link_wrapper(local_url.clone(), true, &context)
.await
.unwrap();
assert_eq!(&local_url, proxied.inner());

// image from remote domain is proxied
let remote_image = Url::parse("http://lemmy-beta/image.png").unwrap();
let proxied = proxy_image_link(remote_image.clone(), true, &context)
let proxied = proxy_image_link_wrapper(remote_image.clone(), true, &context)
.await
.unwrap();
assert_eq!(
Expand Down
6 changes: 3 additions & 3 deletions crates/api_crud/src/post/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@ pub async fn create_post(
.community_id(data.community_id)
.creator_id(local_user_view.person.id)
.nsfw(data.nsfw)
.embed_title(metadata.title)
.embed_description(metadata.description)
.embed_video_url(metadata.embed_video_url)
.embed_title(metadata.opengraph_data.title)
.embed_description(metadata.opengraph_data.description)
.embed_video_url(metadata.opengraph_data.embed_video_url)
.language_id(language_id)
.thumbnail_url(metadata.thumbnail)
.build();
Expand Down
6 changes: 3 additions & 3 deletions crates/api_crud/src/post/update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ pub async fn update_post(
Some(url) => {
let metadata = fetch_link_metadata(url, true, &context).await?;
(
Some(metadata.title),
Some(metadata.description),
Some(metadata.embed_video_url),
Some(metadata.opengraph_data.title),
Some(metadata.opengraph_data.description),
Some(metadata.opengraph_data.embed_video_url),
Some(metadata.thumbnail),
)
}
Expand Down
6 changes: 3 additions & 3 deletions crates/apub/src/objects/post.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,9 @@ impl Object for ApubPost {
updated: page.updated.map(Into::into),
deleted: Some(false),
nsfw: page.sensitive,
embed_title: metadata.title,
embed_description: metadata.description,
embed_video_url: metadata.embed_video_url,
embed_title: metadata.opengraph_data.title,
embed_description: metadata.opengraph_data.description,
embed_video_url: metadata.opengraph_data.embed_video_url,
thumbnail_url,
ap_id: Some(page.id.clone().into()),
local: Some(false),
Expand Down