Skip to content

Commit

Permalink
[NFT Metadata Crawler] Fix raw image uri parse error (#9628)
Browse files Browse the repository at this point in the history
* Fix raw_image_uri parse bug

* upsert
  • Loading branch information
just-in-chang authored Aug 13, 2023
1 parent 78d414d commit 36e37ab
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,14 @@ impl NFTMetadataCrawlerURIsQuery {
}

pub fn get_by_raw_image_uri(
token_uri: String,
raw_image_uri: String,
conn: &mut PooledConnection<ConnectionManager<PgConnection>>,
) -> anyhow::Result<Option<Self>> {
let mut op = || {
parsed_token_uris::table
.filter(parsed_token_uris::raw_image_uri.eq(raw_image_uri.clone()))
.filter(parsed_token_uris::token_uri.ne(token_uri.clone()))
.first::<NFTMetadataCrawlerURIsQuery>(conn)
.optional()
.map_err(Into::into)
Expand All @@ -85,12 +87,14 @@ impl NFTMetadataCrawlerURIsQuery {
}

pub fn get_by_raw_animation_uri(
token_uri: String,
raw_animation_uri: String,
conn: &mut PooledConnection<ConnectionManager<PgConnection>>,
) -> anyhow::Result<Option<Self>> {
let mut op = || {
parsed_token_uris::table
.filter(parsed_token_uris::raw_animation_uri.eq(raw_animation_uri.clone()))
.filter(parsed_token_uris::token_uri.ne(token_uri.clone()))
.first::<NFTMetadataCrawlerURIsQuery>(conn)
.optional()
.map_err(Into::into)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ pub fn upsert_uris(
cdn_animation_uri.eq(excluded(cdn_animation_uri)),
image_optimizer_retry_count.eq(excluded(image_optimizer_retry_count)),
json_parser_retry_count.eq(excluded(json_parser_retry_count)),
animation_optimizer_retry_count.eq(excluded(animation_optimizer_retry_count)),
));

let debug_query = diesel::debug_query::<diesel::pg::Pg, _>(&query).to_string();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,10 @@ impl ImageOptimizer {
) -> anyhow::Result<(Vec<u8>, ImageFormat)> {
let (_, size) = get_uri_metadata(uri.clone()).await?;
if size > max_file_size_bytes {
let error_msg = format!(
return Err(anyhow::anyhow!(format!(
"Image optimizer received file too large: {} bytes, skipping",
size
);
error!(uri = uri, "[NFT Metadata Crawler] {}", error_msg);
return Err(anyhow::anyhow!(error_msg));
)));
}

let op = || {
Expand Down Expand Up @@ -59,7 +57,7 @@ impl ImageOptimizer {
let img = image::load_from_memory(&img_bytes)
.context(format!("Failed to load image from memory: {} bytes", size))?;
let resized_image = resize(&img.to_rgb8(), 400, 400, FilterType::Gaussian);
Ok((Self::to_json_bytes(resized_image, image_quality)?, format))
Ok((Self::to_jpeg_bytes(resized_image, image_quality)?, format))
},
}
}
Expand All @@ -85,7 +83,7 @@ impl ImageOptimizer {
}

/// Converts image to JPEG bytes vector
fn to_json_bytes(
fn to_jpeg_bytes(
image_buffer: ImageBuffer<image::Rgb<u8>, Vec<u8>>,
image_quality: u8,
) -> anyhow::Result<Vec<u8>> {
Expand All @@ -94,7 +92,7 @@ impl ImageOptimizer {
match dynamic_image.write_to(&mut byte_store, ImageOutputFormat::Jpeg(image_quality)) {
Ok(_) => Ok(byte_store.into_inner()),
Err(e) => {
error!(error = ?e, "[NFT Metadata Crawler] Error converting image to bytes:: {} bytes", dynamic_image.as_bytes().len());
error!(error = ?e, "[NFT Metadata Crawler] Error converting image to bytes: {} bytes", dynamic_image.as_bytes().len());
Err(anyhow::anyhow!(e))
},
}
Expand Down
13 changes: 6 additions & 7 deletions ecosystem/nft-metadata-crawler-parser/src/utils/json_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,15 @@ impl JSONParser {
) -> anyhow::Result<(Option<String>, Option<String>, Value)> {
let (mime, size) = get_uri_metadata(uri.clone()).await?;
if ImageFormat::from_mime_type(mime.clone()).is_some() {
let error_msg = format!("JSON parser received image file: {}, skipping", mime);
error!(uri = uri, "[NFT Metadata Crawler] {}", error_msg);
return Err(anyhow::anyhow!(error_msg));
return Err(anyhow::anyhow!(format!(
"JSON parser received image file: {}, skipping",
mime
)));
} else if size > max_file_size_bytes {
let error_msg = format!(
return Err(anyhow::anyhow!(format!(
"JSON parser received file too large: {} bytes, skipping",
size
);
error!(uri = uri, "[NFT Metadata Crawler] {}", error_msg);
return Err(anyhow::anyhow!(error_msg));
)));
}

let op = || {
Expand Down
58 changes: 40 additions & 18 deletions ecosystem/nft-metadata-crawler-parser/src/worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,11 +263,14 @@ impl Worker {
// Deduplicate token_uri
// Proceed if force or if token_uri has not been parsed
if self.force
|| NFTMetadataCrawlerURIsQuery::get_by_token_uri(
self.token_uri.clone(),
&mut self.conn,
)?
.is_none()
|| NFTMetadataCrawlerURIsQuery::get_by_token_uri(self.token_uri.clone(), &mut self.conn)
.map_or(true, |uri| match uri {
Some(uris) => {
self.model.set_cdn_json_uri(uris.cdn_json_uri);
false
},
None => true,
})
{
// Parse token_uri
self.model.set_token_uri(self.token_uri.clone());
Expand Down Expand Up @@ -315,23 +318,33 @@ impl Worker {
.ok();
self.model.set_cdn_json_uri(cdn_json_uri);
}
}

// Commit model to Postgres
if let Err(e) = upsert_uris(&mut self.conn, self.model.clone()) {
error!(
last_transaction_version = self.last_transaction_version,
error = ?e,
"[NFT Metadata Crawler] Commit to Postgres failed"
);
}
// Commit model to Postgres
if let Err(e) = upsert_uris(&mut self.conn, self.model.clone()) {
error!(
last_transaction_version = self.last_transaction_version,
error = ?e,
"[NFT Metadata Crawler] Commit to Postgres failed"
);
}

// Deduplicate raw_image_uri
// Proceed with image optimization of force or if raw_image_uri has not been parsed
if self.force
|| self.model.get_raw_image_uri().map_or(true, |uri_option| {
NFTMetadataCrawlerURIsQuery::get_by_raw_image_uri(uri_option, &mut self.conn)
.map_or(true, |uri| uri.is_none())
NFTMetadataCrawlerURIsQuery::get_by_raw_image_uri(
self.token_uri.clone(),
uri_option,
&mut self.conn,
)
.map_or(true, |uri| match uri {
Some(uris) => {
self.model.set_cdn_image_uri(uris.cdn_image_uri);
false
},
None => true,
})
})
{
// Parse raw_image_uri, use token_uri if parsing fails
Expand Down Expand Up @@ -402,9 +415,18 @@ impl Worker {
let mut raw_animation_uri_option = self.model.get_raw_animation_uri();
if !self.force
&& raw_animation_uri_option.clone().map_or(true, |uri| {
NFTMetadataCrawlerURIsQuery::get_by_raw_animation_uri(uri, &mut self.conn)
.unwrap_or(None)
.is_some()
NFTMetadataCrawlerURIsQuery::get_by_raw_animation_uri(
self.token_uri.clone(),
uri,
&mut self.conn,
)
.map_or(true, |uri| match uri {
Some(uris) => {
self.model.set_cdn_animation_uri(uris.cdn_animation_uri);
true
},
None => true,
})
})
{
raw_animation_uri_option = None;
Expand Down

0 comments on commit 36e37ab

Please sign in to comment.