From 6fd0dcb3eb7da9b0f4becd7fdc8a9624034c41fa Mon Sep 17 00:00:00 2001 From: Pankaj Garg Date: Fri, 20 Oct 2023 15:41:24 -0700 Subject: [PATCH] Add support for fetching program in cargo registry (#33759) * Rename publisher.rs to crate_handler.rs * support for fetching program in cargo registry --- cargo-registry/src/crate_handler.rs | 356 ++++++++++++++++++++++++++++ cargo-registry/src/main.rs | 40 +++- cargo-registry/src/publisher.rs | 173 -------------- cargo-registry/src/sparse_index.rs | 29 ++- cli/src/program_v4.rs | 2 +- 5 files changed, 410 insertions(+), 190 deletions(-) create mode 100644 cargo-registry/src/crate_handler.rs delete mode 100644 cargo-registry/src/publisher.rs diff --git a/cargo-registry/src/crate_handler.rs b/cargo-registry/src/crate_handler.rs new file mode 100644 index 00000000000000..c55ab4ff07395f --- /dev/null +++ b/cargo-registry/src/crate_handler.rs @@ -0,0 +1,356 @@ +use { + crate::{ + client::{Client, RPCCommandConfig}, + sparse_index::{IndexEntry, RegistryIndex}, + }, + flate2::{ + read::{GzDecoder, GzEncoder}, + Compression, + }, + hyper::body::Bytes, + log::*, + serde::{Deserialize, Serialize}, + serde_json::from_slice, + sha2::{Digest, Sha256}, + solana_cli::program_v4::{process_deploy_program, process_dump, read_and_verify_elf}, + solana_sdk::{ + pubkey::Pubkey, + signature::{Keypair, Signer}, + signer::EncodableKey, + }, + std::{ + collections::BTreeMap, + fs, + io::{Cursor, Read}, + mem::size_of, + ops::Deref, + path::{Path, PathBuf}, + str::FromStr, + sync::Arc, + }, + tar::{Archive, Builder}, + tempfile::{tempdir, TempDir}, +}; + +pub(crate) type Error = Box; + +#[derive(Clone, Debug, Deserialize, Serialize)] +#[serde(rename_all = "lowercase")] +pub(crate) enum DependencyType { + Dev, + Build, + Normal, +} + +#[allow(dead_code)] +#[derive(Clone, Debug, Deserialize, Serialize)] +pub(crate) struct Dependency { + pub name: String, + pub version_req: String, + pub features: Vec, + pub optional: bool, + pub default_features: bool, + pub target: Option, + pub kind: DependencyType, + pub registry: Option, + pub explicit_name_in_toml: Option, +} + +#[derive(Clone, Debug, Deserialize, Serialize)] +#[allow(unused)] +pub(crate) struct PackageMetaData { + pub name: String, + pub vers: String, + pub deps: Vec, + pub features: BTreeMap>, + pub authors: Vec, + pub description: Option, + pub documentation: Option, + pub homepage: Option, + pub readme: Option, + pub readme_file: Option, + pub keywords: Vec, + pub categories: Vec, + pub license: Option, + pub license_file: Option, + pub repository: Option, + pub badges: BTreeMap>, + pub links: Option, + pub rust_version: Option, +} + +impl PackageMetaData { + fn new(bytes: &Bytes) -> serde_json::Result<(PackageMetaData, usize)> { + let (json_length, sizeof_length) = Self::read_u32_length(bytes)?; + let end_of_meta_data = sizeof_length.saturating_add(json_length as usize); + let json_body = bytes.slice(sizeof_length..end_of_meta_data); + from_slice::(json_body.deref()).map(|data| (data, end_of_meta_data)) + } + + fn read_u32_length(bytes: &Bytes) -> serde_json::Result<(u32, usize)> { + let sizeof_length = size_of::(); + let length_le = bytes.slice(0..sizeof_length); + let length = + u32::from_le_bytes(length_le.deref().try_into().expect("Failed to read length")); + Ok((length, sizeof_length)) + } +} + +pub(crate) struct Program { + path: String, + id: Pubkey, + _tempdir: Arc, +} + +impl Program { + fn deploy(&self, client: Arc, signer: &dyn Signer) -> Result<(), Error> { + if self.id != signer.pubkey() { + return Err("Signer doesn't match program ID".into()); + } + + let program_data = read_and_verify_elf(self.path.as_ref()) + .map_err(|e| format!("failed to read the program: {}", e))?; + + let command_config = RPCCommandConfig::new(client.as_ref()); + + process_deploy_program( + client.rpc_client.clone(), + &command_config.0, + &program_data, + program_data.len() as u32, + &signer.pubkey(), + Some(signer), + ) + .map_err(|e| { + error!("Failed to deploy the program: {}", e); + format!("Failed to deploy the program: {}", e) + })?; + + Ok(()) + } + + fn dump(&self, client: Arc) -> Result<(), Error> { + info!("Fetching program {:?}", self.id); + let command_config = RPCCommandConfig::new(client.as_ref()); + + process_dump( + client.rpc_client.clone(), + command_config.0.commitment, + Some(self.id), + &self.path, + ) + .map_err(|e| { + error!("Failed to fetch the program: {}", e); + format!("Failed to fetch the program: {}", e) + })?; + + Ok(()) + } + + pub(crate) fn crate_name_to_program_id(crate_name: &str) -> Option { + crate_name + .split_once('-') + .and_then(|(_prefix, id_str)| Pubkey::from_str(id_str).ok()) + } +} + +impl From<&UnpackedCrate> for Program { + fn from(value: &UnpackedCrate) -> Self { + Self { + path: value.program_path.clone(), + id: value.program_id, + _tempdir: value.tempdir.clone(), + } + } +} + +pub(crate) struct CratePackage(pub(crate) Bytes); + +impl From for Result { + fn from(value: UnpackedCrate) -> Self { + let mut archive = Builder::new(Vec::new()); + archive.append_dir_all(".", value.tempdir.path())?; + let data = archive.into_inner()?; + let reader = Cursor::new(data); + let mut encoder = GzEncoder::new(reader, Compression::fast()); + let mut zipped_data = Vec::new(); + encoder.read_to_end(&mut zipped_data)?; + + let meta_str = serde_json::to_string(&value.meta)?; + + let sizeof_length = size_of::(); + let mut packed = Vec::with_capacity( + sizeof_length + .saturating_add(meta_str.len()) + .saturating_add(sizeof_length) + .saturating_add(zipped_data.len()), + ); + + packed[..sizeof_length].copy_from_slice(&u32::to_le_bytes(meta_str.len() as u32)); + let offset = sizeof_length; + let end = offset.saturating_add(meta_str.len()); + packed[offset..end].copy_from_slice(meta_str.as_bytes()); + let offset = end; + let end = offset.saturating_add(sizeof_length); + packed[offset..end].copy_from_slice(&u32::to_le_bytes(zipped_data.len() as u32)); + let offset = end; + packed[offset..].copy_from_slice(&zipped_data); + + Ok(CratePackage(Bytes::from(packed))) + } +} + +pub(crate) struct UnpackedCrate { + meta: PackageMetaData, + cksum: String, + tempdir: Arc, + program_path: String, + program_id: Pubkey, + keypair: Option, +} + +impl From for Result { + fn from(value: CratePackage) -> Self { + let bytes = value.0; + let (meta, offset) = PackageMetaData::new(&bytes)?; + + let (_crate_file_length, length_size) = + PackageMetaData::read_u32_length(&bytes.slice(offset..))?; + let crate_bytes = bytes.slice(offset.saturating_add(length_size)..); + let cksum = format!("{:x}", Sha256::digest(&crate_bytes)); + + let decoder = GzDecoder::new(crate_bytes.as_ref()); + let mut archive = Archive::new(decoder); + + let tempdir = tempdir()?; + archive.unpack(tempdir.path())?; + + let lib_name = UnpackedCrate::program_library_name(&tempdir, &meta)?; + + let program_path = + UnpackedCrate::make_path(&tempdir, &meta, format!("out/{}.so", lib_name)) + .into_os_string() + .into_string() + .map_err(|_| "Failed to get program file path")?; + + let keypair = Keypair::read_from_file(UnpackedCrate::make_path( + &tempdir, + &meta, + format!("out/{}-keypair.json", lib_name), + )) + .map_err(|e| format!("Failed to get keypair from the file: {}", e))?; + + Ok(UnpackedCrate { + meta, + cksum, + tempdir: Arc::new(tempdir), + program_path, + program_id: keypair.pubkey(), + keypair: Some(keypair), + }) + } +} + +impl UnpackedCrate { + pub(crate) fn publish( + &self, + client: Arc, + index: Arc, + ) -> Result<(), Error> { + let Some(signer) = &self.keypair else { + return Err("No signer provided for the program deployment".into()); + }; + + Program::from(self).deploy(client, signer)?; + + let mut entry: IndexEntry = self.meta.clone().into(); + entry.cksum = self.cksum.clone(); + index.insert_entry(entry)?; + + info!("Successfully deployed the program"); + Ok(()) + } + + pub(crate) fn fetch_index(id: Pubkey, client: Arc) -> Result { + let (_program, unpacked_crate) = Self::fetch_program(id, client)?; + + let mut entry: IndexEntry = unpacked_crate.meta.clone().into(); + entry.cksum = unpacked_crate.cksum.clone(); + + Ok(entry) + } + + #[allow(dead_code)] + pub(crate) fn fetch(id: Pubkey, client: Arc) -> Result { + let (_program, unpacked_crate) = Self::fetch_program(id, client)?; + UnpackedCrate::into(unpacked_crate) + } + + fn fetch_program(id: Pubkey, client: Arc) -> Result<(Program, UnpackedCrate), Error> { + let crate_obj = Self::new_empty(id)?; + let program = Program::from(&crate_obj); + program.dump(client)?; + + // Decompile the program + // Generate a Cargo.toml + + Ok((program, crate_obj)) + } + + fn new_empty(id: Pubkey) -> Result { + let meta = PackageMetaData { + name: id.to_string(), + vers: "0.1".to_string(), + deps: vec![], + features: BTreeMap::new(), + authors: vec![], + description: None, + documentation: None, + homepage: None, + readme: None, + readme_file: None, + keywords: vec![], + categories: vec![], + license: None, + license_file: None, + repository: None, + badges: BTreeMap::new(), + links: None, + rust_version: None, + }; + + let tempdir = tempdir()?; + + let program_path = Self::make_path(&tempdir, &meta, format!("out/{}.so", id)) + .into_os_string() + .into_string() + .map_err(|_| "Failed to get program file path")?; + + Ok(Self { + meta, + cksum: "".to_string(), + tempdir: Arc::new(tempdir), + program_path, + program_id: id, + keypair: None, + }) + } + + fn make_path>(tempdir: &TempDir, meta: &PackageMetaData, append: P) -> PathBuf { + let mut path = tempdir.path().to_path_buf(); + path.push(format!("{}-{}/", meta.name, meta.vers)); + path.push(append); + path + } + + fn program_library_name(tempdir: &TempDir, meta: &PackageMetaData) -> Result { + let toml_content = fs::read_to_string(Self::make_path(tempdir, meta, "Cargo.toml.orig"))?; + let toml = toml_content.parse::()?; + let library_name = toml + .get("lib") + .and_then(|v| v.get("name")) + .and_then(|v| v.as_str()) + .ok_or("Failed to get module name")?; + Ok(library_name.to_string()) + } +} diff --git a/cargo-registry/src/main.rs b/cargo-registry/src/main.rs index 419e8cf434202d..073b8e42cb609c 100644 --- a/cargo-registry/src/main.rs +++ b/cargo-registry/src/main.rs @@ -2,7 +2,7 @@ use { crate::{ client::Client, - publisher::{Error, Publisher}, + crate_handler::{CratePackage, Error, Program, UnpackedCrate}, sparse_index::RegistryIndex, }, hyper::{ @@ -18,7 +18,7 @@ use { }; mod client; -mod publisher; +mod crate_handler; mod response_builder; mod sparse_index; @@ -38,10 +38,14 @@ impl CargoRegistryService { match bytes { Ok(data) => { - let Ok(result) = tokio::task::spawn_blocking(move || { - Publisher::publish_crate(data, client, index) - }) - .await + let Ok(crate_object) = CratePackage(data).into() else { + return response_builder::error_response( + hyper::StatusCode::INTERNAL_SERVER_ERROR, + "Failed to parse the crate information", + ); + }; + let Ok(result) = + tokio::task::spawn_blocking(move || crate_object.publish(client, index)).await else { return response_builder::error_response( hyper::StatusCode::INTERNAL_SERVER_ERROR, @@ -74,6 +78,27 @@ impl CargoRegistryService { }) } + fn handle_download_crate_request( + path: &str, + _request: &hyper::Request, + client: Arc, + ) -> hyper::Response { + let Some((path, crate_name, _version)) = Self::get_crate_name_and_version(path) else { + return response_builder::error_in_parsing(); + }; + + if path.len() != PATH_PREFIX.len() { + return response_builder::error_incorrect_length(); + } + + let _package = Program::crate_name_to_program_id(crate_name) + .and_then(|id| UnpackedCrate::fetch(id, client).ok()); + + // Return the package to the caller in the response + + response_builder::error_not_implemented() + } + fn handle_yank_request( path: &str, _request: &hyper::Request, @@ -183,7 +208,7 @@ impl CargoRegistryService { } if path.starts_with(index.index_root.as_str()) { - return Ok(index.handler(request)); + return Ok(index.handler(request, client.clone())); } if !path.starts_with(PATH_PREFIX) { @@ -216,6 +241,7 @@ impl CargoRegistryService { Method::GET => match endpoint { "crates" => Self::handle_get_crates_request(path, &request), "owners" => Self::handle_get_owners_request(path, &request), + "download" => Self::handle_download_crate_request(path, &request, client.clone()), _ => response_builder::error_not_allowed(), }, Method::DELETE => match endpoint { diff --git a/cargo-registry/src/publisher.rs b/cargo-registry/src/publisher.rs deleted file mode 100644 index ea4c74a7251b67..00000000000000 --- a/cargo-registry/src/publisher.rs +++ /dev/null @@ -1,173 +0,0 @@ -use { - crate::{ - client::{Client, RPCCommandConfig}, - sparse_index::{IndexEntry, RegistryIndex}, - }, - flate2::read::GzDecoder, - hyper::body::Bytes, - log::*, - serde::{Deserialize, Serialize}, - serde_json::from_slice, - sha2::{Digest, Sha256}, - solana_cli::program_v4::{process_deploy_program, read_and_verify_elf}, - solana_sdk::{ - signature::{Keypair, Signer}, - signer::EncodableKey, - }, - std::{ - collections::BTreeMap, - fs, - mem::size_of, - ops::Deref, - path::{Path, PathBuf}, - sync::Arc, - }, - tar::Archive, - tempfile::{tempdir, TempDir}, -}; - -pub(crate) type Error = Box; - -#[derive(Debug, Deserialize, Serialize)] -#[serde(rename_all = "lowercase")] -pub(crate) enum DependencyType { - Dev, - Build, - Normal, -} - -#[allow(dead_code)] -#[derive(Debug, Deserialize)] -pub(crate) struct Dependency { - pub name: String, - pub version_req: String, - pub features: Vec, - pub optional: bool, - pub default_features: bool, - pub target: Option, - pub kind: DependencyType, - pub registry: Option, - pub explicit_name_in_toml: Option, -} - -#[derive(Debug, Deserialize)] -#[allow(unused)] -pub(crate) struct PackageMetaData { - pub name: String, - pub vers: String, - pub deps: Vec, - pub features: BTreeMap>, - pub authors: Vec, - pub description: Option, - pub documentation: Option, - pub homepage: Option, - pub readme: Option, - pub readme_file: Option, - pub keywords: Vec, - pub categories: Vec, - pub license: Option, - pub license_file: Option, - pub repository: Option, - pub badges: BTreeMap>, - pub links: Option, - pub rust_version: Option, -} - -impl PackageMetaData { - fn new(bytes: &Bytes) -> serde_json::Result<(PackageMetaData, usize)> { - let (json_length, sizeof_length) = Self::read_u32_length(bytes)?; - let end_of_meta_data = sizeof_length.saturating_add(json_length as usize); - let json_body = bytes.slice(sizeof_length..end_of_meta_data); - from_slice::(json_body.deref()).map(|data| (data, end_of_meta_data)) - } - - fn read_u32_length(bytes: &Bytes) -> serde_json::Result<(u32, usize)> { - let sizeof_length = size_of::(); - let length_le = bytes.slice(0..sizeof_length); - let length = - u32::from_le_bytes(length_le.deref().try_into().expect("Failed to read length")); - Ok((length, sizeof_length)) - } -} - -pub(crate) struct Publisher {} - -impl Publisher { - fn make_path>(tempdir: &TempDir, meta: &PackageMetaData, append: P) -> PathBuf { - let mut path = tempdir.path().to_path_buf(); - path.push(format!("{}-{}/", meta.name, meta.vers)); - path.push(append); - path - } - - fn program_library_name(tempdir: &TempDir, meta: &PackageMetaData) -> Result { - let toml_content = fs::read_to_string(Self::make_path(tempdir, meta, "Cargo.toml.orig"))?; - let toml = toml_content.parse::()?; - let library_name = toml - .get("lib") - .and_then(|v| v.get("name")) - .and_then(|v| v.as_str()) - .ok_or("Failed to get module name")?; - Ok(library_name.to_string()) - } - - pub(crate) fn publish_crate( - bytes: Bytes, - client: Arc, - index: Arc, - ) -> Result<(), Error> { - let (meta_data, offset) = PackageMetaData::new(&bytes)?; - - let (_crate_file_length, length_size) = - PackageMetaData::read_u32_length(&bytes.slice(offset..))?; - let crate_bytes = bytes.slice(offset.saturating_add(length_size)..); - let crate_cksum = format!("{:x}", Sha256::digest(&crate_bytes)); - - let decoder = GzDecoder::new(crate_bytes.as_ref()); - let mut archive = Archive::new(decoder); - - let tempdir = tempdir()?; - archive.unpack(tempdir.path())?; - - let command_config = RPCCommandConfig::new(client.as_ref()); - - let lib_name = Self::program_library_name(&tempdir, &meta_data)?; - - let program_path = Self::make_path(&tempdir, &meta_data, format!("out/{}.so", lib_name)) - .into_os_string() - .into_string() - .map_err(|_| "Failed to get program file path")?; - - let program_data = read_and_verify_elf(program_path.as_ref()) - .map_err(|e| format!("failed to read the program: {}", e))?; - - let program_keypair = Keypair::read_from_file(Self::make_path( - &tempdir, - &meta_data, - format!("out/{}-keypair.json", lib_name), - )) - .map_err(|e| format!("Failed to get keypair from the file: {}", e))?; - - info!("Deploying program at {:?}", program_keypair.pubkey()); - - process_deploy_program( - client.rpc_client.clone(), - &command_config.0, - &program_data, - program_data.len() as u32, - &program_keypair.pubkey(), - Some(&program_keypair), - ) - .map_err(|e| { - error!("Failed to deploy the program: {}", e); - format!("Failed to deploy the program: {}", e) - })?; - - let mut entry: IndexEntry = meta_data.into(); - entry.cksum = crate_cksum; - index.insert_entry(entry)?; - - info!("Successfully deployed the program"); - Ok(()) - } -} diff --git a/cargo-registry/src/sparse_index.rs b/cargo-registry/src/sparse_index.rs index e29a581c1c7819..68ff4bfac1c6a0 100644 --- a/cargo-registry/src/sparse_index.rs +++ b/cargo-registry/src/sparse_index.rs @@ -1,11 +1,15 @@ use { crate::{ - publisher::{Dependency, Error, PackageMetaData}, + client::Client, + crate_handler::{Dependency, Error, PackageMetaData, Program, UnpackedCrate}, response_builder, }, log::info, serde::{Deserialize, Serialize}, - std::{collections::BTreeMap, sync::RwLock}, + std::{ + collections::BTreeMap, + sync::{Arc, RwLock}, + }, }; #[derive(Debug, Default, Deserialize, Serialize)] @@ -94,6 +98,7 @@ impl RegistryIndex { pub(crate) fn handler( &self, request: hyper::Request, + client: Arc, ) -> hyper::Response { let path = request.uri().path(); let expected_root = self.index_root.as_str(); @@ -115,7 +120,7 @@ impl RegistryIndex { return response_builder::success_response_str(&self.config); } - self.handle_crate_lookup_request(path) + self.handle_crate_lookup_request(path, client) } pub(crate) fn insert_entry(&self, entry: IndexEntry) -> Result<(), Error> { @@ -150,7 +155,11 @@ impl RegistryIndex { .then_some(crate_name) } - fn handle_crate_lookup_request(&self, path: &str) -> hyper::Response { + fn handle_crate_lookup_request( + &self, + path: &str, + client: Arc, + ) -> hyper::Response { let Some(crate_name) = Self::get_crate_name_from_path(path) else { return response_builder::error_response( hyper::StatusCode::BAD_REQUEST, @@ -167,15 +176,17 @@ impl RegistryIndex { ); }; - let Some(entry) = read_index.get(crate_name) else { + let response = if let Some(entry) = read_index.get(crate_name) { + Some(serde_json::to_string(entry)) + } else { // The index currently doesn't contain the program entry. // Fetch the program information from the network using RPC client. - // In the meanwhile, return empty success response, so that the registry - // client continues to poll us for the index information. - return response_builder::success_response(); + Program::crate_name_to_program_id(crate_name) + .and_then(|id| UnpackedCrate::fetch_index(id, client).ok()) + .map(|entry| serde_json::to_string(&entry)) }; - let Ok(response) = serde_json::to_string(entry) else { + let Some(Ok(response)) = response else { return response_builder::error_response( hyper::StatusCode::INTERNAL_SERVER_ERROR, "Internal error. index entry is corrupted", diff --git a/cli/src/program_v4.rs b/cli/src/program_v4.rs index 41a8fa9de32b61..324f3040b83d4c 100644 --- a/cli/src/program_v4.rs +++ b/cli/src/program_v4.rs @@ -752,7 +752,7 @@ fn process_show( } } -fn process_dump( +pub fn process_dump( rpc_client: Arc, commitment: CommitmentConfig, account_pubkey: Option,