From 332cea5cf1ec753be47b070ab1ef20f2c029eac1 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 16 Mar 2022 20:11:59 +0800 Subject: [PATCH 01/16] refactor: Say goodbye to aws-s3-sdk Signed-off-by: Xuanwo --- Cargo.toml | 21 +- src/lib.rs | 2 + src/services/s3/backend.rs | 350 ++++++++++++---------- src/services/s3/error.rs | 96 ------ src/services/s3/middleware/credentials.rs | 61 ---- src/services/s3/middleware/default.rs | 88 ------ src/services/s3/middleware/mod.rs | 19 -- src/services/s3/middleware/signer.rs | 119 -------- src/services/s3/mod.rs | 2 - src/services/s3/object_stream.rs | 299 +++++++++++++----- tests/behavior/behavior.rs | 3 +- 11 files changed, 436 insertions(+), 624 deletions(-) delete mode 100644 src/services/s3/error.rs delete mode 100644 src/services/s3/middleware/credentials.rs delete mode 100644 src/services/s3/middleware/default.rs delete mode 100644 src/services/s3/middleware/mod.rs delete mode 100644 src/services/s3/middleware/signer.rs diff --git a/Cargo.toml b/Cargo.toml index d5f5de83417..77e987bb8db 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,16 +23,6 @@ name = "ops" anyhow = "1" async-compat = "0.2" async-trait = "0.1" -aws-config = "0.8" -aws-endpoint = "0.8" -aws-http = "0.8" -aws-sdk-s3 = "0.8" -aws-sig-auth = "0.8" -aws-sigv4 = "0.8" -aws-smithy-client = "0.38" -aws-smithy-http = "0.38" -aws-smithy-http-tower = "0.38" -aws-types = { version = "0.8", features = ["hardcoded-credentials"] } bytes = "1" futures = { version = "0.3", features = ["alloc"] } http = "0.2" @@ -41,17 +31,20 @@ log = "0.4" metrics = "0.18" once_cell = "1" pin-project = "1" -reqwest = "0.11" +reqwest = { version = "0.11", features = ["stream"] } thiserror = "1" tokio = { version = "1.17", features = ["full"] } tower = "0.4" +reqsign = { git = "https://github.com/Xuanwo/reqsign", rev = "44d7bb37dfa543487d4d5286d64abd3f2fe12bf7" } +roxmltree = "0.14" +bstr = "0.2" [dev-dependencies] anyhow = "1.0" criterion = { version = "0.3", features = [ - "async", - "async_tokio", - "html_reports", + "async", + "async_tokio", + "html_reports", ] } dotenv = "0.15" env_logger = "0.9" diff --git a/src/lib.rs b/src/lib.rs index 900d24e6666..a9c0c084e1f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -64,6 +64,8 @@ //! Ok(()) //! } //! ``` +extern crate core; + mod accessor; pub use accessor::Accessor; diff --git a/src/services/s3/backend.rs b/src/services/s3/backend.rs index 30ee8e5be50..af224b2d9cf 100644 --- a/src/services/s3/backend.rs +++ b/src/services/s3/backend.rs @@ -12,20 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::borrow::Cow; use std::collections::HashMap; -use std::pin::Pin; use std::str::FromStr; use std::sync::Arc; -use std::task::Context; -use std::task::Poll; use anyhow::anyhow; use async_trait::async_trait; -use aws_sdk_s3; -use aws_sdk_s3::Client; -use aws_smithy_http::body::SdkBody; -use aws_smithy_http::byte_stream::ByteStream; use futures::TryStreamExt; use http::HeaderValue; use http::StatusCode; @@ -35,11 +27,9 @@ use log::info; use log::warn; use metrics::increment_counter; use once_cell::sync::Lazy; +use reqsign::services::aws::v4::Signer; +use reqwest::{Body, Response, Url}; -use super::error::parse_get_object_error; -use super::error::parse_head_object_error; -use super::error::parse_unexpect_error; -use super::middleware::DefaultMiddleware; use super::object_stream::S3ObjectStream; use crate::credential::Credential; use crate::error::Error; @@ -165,8 +155,8 @@ impl Builder { ("bucket".to_string(), bucket.to_string()), ]); - let hc = reqwest::Client::new(); - let res = hc + let client = reqwest::Client::new(); + let res = client .head(format!("{endpoint}/{bucket}")) .send() .await @@ -237,36 +227,10 @@ impl Builder { }; debug!("backend use endpoint: {}, region: {}", &endpoint, ®ion); - // Config Loader will load config from environment. - // - // We will take user's input first if any. If there is no user input, we - // will fallback to the aws default load chain like the following: - // - // - Environment variables: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, and AWS_REGION - // - The default credentials files located in ~/.aws/config and ~/.aws/credentials (location can vary per platform) - // - Web Identity Token credentials from the environment or container (including EKS) - // - ECS Container Credentials (IAM roles for tasks) - // - EC2 Instance Metadata Service (IAM Roles attached to instance) - // - // Please keep in mind that the config loader only detect region and credentials. - let cfg_loader = aws_config::ConfigLoader::default(); - let mut cfg = aws_sdk_s3::config::Builder::from(&cfg_loader.load().await); - - { - // Set region. - cfg = cfg.region(aws_sdk_s3::Region::new(Cow::from(region.clone()))); - } - - { - // Set endpoint - let uri = http::Uri::from_str(&endpoint).map_err(|e| Error::Backend { - kind: Kind::BackendConfigurationInvalid, - context: context.clone(), - source: anyhow::Error::from(e), - })?; - - cfg = cfg.endpoint_resolver(aws_sdk_s3::Endpoint::immutable(uri)); - } + let mut signer_builder = reqsign::services::aws::v4::Signer::builder(); + signer_builder.service("s3"); + signer_builder.region(®ion); + signer_builder.allow_anonymous(); if let Some(cred) = &self.credential { context.insert("credential".to_string(), "*".to_string()); @@ -275,11 +239,8 @@ impl Builder { access_key_id, secret_access_key, } => { - cfg = cfg.credentials_provider(aws_sdk_s3::Credentials::from_keys( - access_key_id, - secret_access_key, - None, - )); + signer_builder.access_key(access_key_id); + signer_builder.secret_key(secret_access_key); } // We don't need to do anything if user tries to read credential from env. Credential::Plain => { @@ -295,22 +256,15 @@ impl Builder { } } - let hyper_connector = aws_smithy_client::hyper_ext::Adapter::builder() - .build(aws_smithy_client::conns::https()); - - let aws_client = aws_smithy_client::Builder::new() - .connector(hyper_connector) - .middleware(aws_smithy_client::erase::DynMiddleware::new( - DefaultMiddleware::new(), - )) - .default_async_sleep() - .build(); + let signer = signer_builder.build().await?; info!("backend build finished: {:?}", &self); Ok(Arc::new(Backend { root, + endpoint, + signer: Arc::new(signer), bucket: self.bucket.clone(), - client: aws_sdk_s3::Client::with_config(aws_client.into_dyn(), cfg.build()), + client, })) } } @@ -319,8 +273,9 @@ impl Builder { #[derive(Debug, Clone)] pub struct Backend { bucket: String, - - client: aws_sdk_s3::Client, + endpoint: String, + signer: Arc, + client: reqwest::Client, // root will be "/" or "/abc/" root: String, } @@ -330,10 +285,6 @@ impl Backend { Builder::default() } - pub(crate) fn inner(&self) -> Client { - self.client.clone() - } - // normalize_path removes all internal `//` inside path. pub(crate) fn normalize_path(path: &str) -> String { let has_trailing = path.ends_with('/'); @@ -388,52 +339,36 @@ impl Accessor for Backend { &p, args.offset, args.size ); - let mut req = self - .client - .get_object() - .bucket(&self.bucket.clone()) - .key(&p); - - if args.offset.is_some() || args.size.is_some() { - req = req.range(HeaderRange::new(args.offset, args.size).to_string()); - } - - let resp = req.send().await.map_err(|e| { - let e = parse_get_object_error(e, "read", &p); - error!("object {} get_object: {:?}", &p, e); - e - })?; + let resp = self.get_object(&p, args.offset, args.size).await?; info!( "object {} reader created: offset {:?}, size {:?}", &p, args.offset, args.size ); - Ok(Box::new(S3ByteStream(resp.body).into_async_read())) + Ok(Box::new( + resp.bytes_stream() + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)) + .into_async_read(), + )) } async fn write(&self, r: BoxedAsyncReader, args: &OpWrite) -> Result { let p = self.get_abs_path(&args.path); info!("object {} write start: size {}", &p, args.size); - let _ = self - .client - .put_object() - .bucket(&self.bucket.clone()) - .key(&p) - .content_length(args.size as i64) - .body(ByteStream::from(SdkBody::from( - hyper::body::Body::wrap_stream(ReaderStream::new(r)), - ))) - .send() - .await - .map_err(|e| { - let e = parse_unexpect_error(e, "write", &p); - error!("object {} put_object: {:?}", &p, e); - e - })?; - - info!("object {} write finished: size {:?}", &p, args.size); - Ok(args.size as usize) + let resp = self.put_object(&p, r, args.size).await?; + match resp.status() { + http::StatusCode::CREATED | http::StatusCode::OK => { + info!("object {} write finished: size {:?}", &p, args.size); + Ok(args.size as usize) + } + _ => Err(Error::Object { + kind: Kind::Unexpected, + op: "write", + path: p.to_string(), + source: anyhow!("{:?}", resp), + }), + } } async fn stat(&self, args: &OpStat) -> Result { @@ -454,20 +389,20 @@ impl Accessor for Backend { return Ok(m); } - let meta = self - .client - .head_object() - .bucket(&self.bucket.clone()) - .key(&p) - .send() - .await - .map_err(|e| parse_head_object_error(e, "stat", &p)); + let resp = self.head_object(&p).await?; - match meta { - Ok(meta) => { + match resp.status() { + http::StatusCode::OK => { let mut m = Metadata::default(); m.set_path(&args.path); - m.set_content_length(meta.content_length as u64); + + // Parse content_length + if let Some(v) = resp.headers().get(http::header::CONTENT_LENGTH) { + m.set_content_length( + u64::from_str(v.to_str().expect("header must not contain non-ascii value")) + .expect("content length header must contain valid length"), + ); + } if p.ends_with('/') { m.set_mode(ObjectMode::DIR); @@ -480,21 +415,34 @@ impl Accessor for Backend { info!("object {} stat finished", &p); Ok(m) } - // Always returns empty dir object if path is endswith "/" and we got an - // ObjectNotExist error. - Err(e) if (e.kind() == Kind::ObjectNotExist && p.ends_with('/')) => { - let mut m = Metadata::default(); - m.set_path(&args.path); - m.set_content_length(0); - m.set_mode(ObjectMode::DIR); - m.set_complete(); + http::StatusCode::NOT_FOUND => { + // Always returns empty dir object if path is endswith "/" + if p.ends_with('/') { + let mut m = Metadata::default(); + m.set_path(&args.path); + m.set_content_length(0); + m.set_mode(ObjectMode::DIR); + m.set_complete(); - info!("object {} stat finished", &p); - Ok(m) + info!("object {} stat finished", &p); + Ok(m) + } else { + Err(Error::Object { + kind: Kind::ObjectNotExist, + op: "stat", + path: p.to_string(), + source: anyhow!("{:?}", resp), + }) + } } - Err(e) => { - error!("object {} head_object: {:?}", &p, e); - Err(e) + _ => { + error!("object {} head_object: {:?}", &p, resp); + Err(Error::Object { + kind: Kind::Unexpected, + op: "stat", + path: p.to_string(), + source: anyhow!("{:?}", resp), + }) } } } @@ -505,14 +453,7 @@ impl Accessor for Backend { let p = self.get_abs_path(&args.path); info!("object {} delete start", &p); - let _ = self - .client - .delete_object() - .bucket(&self.bucket.clone()) - .key(&p) - .send() - .await - .map_err(|e| parse_unexpect_error(e, "delete", &p))?; + let _ = self.delete_object(&p).await?; info!("object {} delete finished", &p); Ok(()) @@ -528,35 +469,132 @@ impl Accessor for Backend { } info!("object {} list start", &path); - Ok(Box::new(S3ObjectStream::new( - self.clone(), - self.bucket.clone(), - path, - ))) + Ok(Box::new(S3ObjectStream::new(self.clone(), path))) } } -struct S3ByteStream(aws_smithy_http::byte_stream::ByteStream); +impl Backend { + pub(crate) async fn get_object( + &self, + path: &str, + offset: Option, + size: Option, + ) -> Result { + let mut req = reqwest::Request::new( + http::Method::GET, + Url::from_str(&format!("{}/{}/{}", self.endpoint, self.bucket, path)) + .expect("url must be valid"), + ); -impl futures::Stream for S3ByteStream { - type Item = std::result::Result; + if offset.is_some() || size.is_some() { + req.headers_mut().insert( + http::header::RANGE, + HeaderRange::new(offset, size) + .to_string() + .parse() + .expect("header must be valid"), + ); + } - /// ## TODO - /// - /// This hack is ugly, we should find a better way to do this. - /// - /// The problem is `into_async_read` requires the stream returning - /// `std::io::Error`, the the `ByteStream` returns - /// `aws_smithy_http::byte_stream::Error` instead. - /// - /// I don't know why aws sdk should wrap the error into their own type... - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - Pin::new(&mut self.0) - .poll_next(cx) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)) + self.signer.sign(&mut req).await.expect("sign must success"); + + self.client.execute(req).await.map_err(|e| { + error!("object {} get_object: {:?}", path, e); + Error::Unexpected(anyhow::Error::from(e)) + }) + } + + pub(crate) async fn put_object( + &self, + path: &str, + r: BoxedAsyncReader, + size: u64, + ) -> Result { + let mut req = reqwest::Request::new( + http::Method::PUT, + Url::from_str(&format!("{}/{}/{}", self.endpoint, self.bucket, path)) + .expect("url must be valid"), + ); + + // Set content length. + req.headers_mut().insert( + http::header::CONTENT_LENGTH, + size.to_string() + .parse() + .expect("content length must be valid"), + ); + req.headers_mut() + .insert(http::header::CONTENT_TYPE, HeaderValue::from_static("test")); + *req.body_mut() = Some(Body::from(hyper::body::Body::wrap_stream( + ReaderStream::new(r), + ))); + + self.signer.sign(&mut req).await.expect("sign must success"); + + self.client.execute(req).await.map_err(|e| { + error!("object {} put_object: {:?}", path, e); + Error::Unexpected(anyhow::Error::from(e)) + }) } - fn size_hint(&self) -> (usize, Option) { - self.0.size_hint() + pub(crate) async fn head_object(&self, path: &str) -> Result { + let mut req = reqwest::Request::new( + http::Method::HEAD, + Url::from_str(&format!("{}/{}/{}", self.endpoint, self.bucket, path)) + .expect("url must be valid"), + ); + + self.signer.sign(&mut req).await.expect("sign must success"); + + self.client.execute(req).await.map_err(|e| { + error!("object {} head_object: {:?}", path, e); + Error::Unexpected(anyhow::Error::from(e)) + }) + } + + pub(crate) async fn delete_object(&self, path: &str) -> Result { + let mut req = reqwest::Request::new( + http::Method::DELETE, + Url::from_str(&format!("{}/{}/{}", self.endpoint, self.bucket, path)) + .expect("url must be valid"), + ); + + self.signer.sign(&mut req).await.expect("sign must success"); + + self.client.execute(req).await.map_err(|e| { + error!("object {} delete_object: {:?}", path, e); + Error::Unexpected(anyhow::Error::from(e)) + }) + } + + pub(crate) async fn list_object( + &self, + path: &str, + continuation_token: &str, + ) -> Result { + let mut req = reqwest::Request::new( + http::Method::GET, + Url::from_str(&format!("{}/{}", self.endpoint, self.bucket)) + .expect("url must be valid"), + ); + + { + let mut query_pairs = req.url_mut().query_pairs_mut(); + + query_pairs + .append_pair("list-type", "2") + .append_pair("delimiter", "/") + .append_pair("prefix", path); + if !continuation_token.is_empty() { + query_pairs.append_pair("continuation-token", continuation_token); + } + } + + self.signer.sign(&mut req).await.expect("sign must success"); + + self.client.execute(req).await.map_err(|e| { + error!("object {} list_object: {:?}", path, e); + Error::Unexpected(anyhow::Error::from(e)) + }) } } diff --git a/src/services/s3/error.rs b/src/services/s3/error.rs deleted file mode 100644 index 1ffccd8a144..00000000000 --- a/src/services/s3/error.rs +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use aws_sdk_s3::error::GetObjectError; -use aws_sdk_s3::error::GetObjectErrorKind; -use aws_sdk_s3::error::HeadObjectError; -use aws_sdk_s3::error::HeadObjectErrorKind; -use aws_smithy_http::result::SdkError; - -use crate::error::Error; -use crate::error::Kind; - -pub fn parse_get_object_error( - err: SdkError, - op: &'static str, - path: &str, -) -> Error { - if let SdkError::ServiceError { err, .. } = err { - match err.kind { - GetObjectErrorKind::NoSuchKey(_) => Error::Object { - kind: Kind::ObjectNotExist, - op, - path: path.to_string(), - source: anyhow::Error::from(err), - }, - _ => Error::Object { - kind: Kind::Unexpected, - op, - path: path.to_string(), - source: anyhow::Error::from(err), - }, - } - } else { - Error::Object { - kind: Kind::Unexpected, - op, - path: path.to_string(), - source: anyhow::Error::from(err), - } - } -} - -pub fn parse_head_object_error( - err: SdkError, - op: &'static str, - path: &str, -) -> Error { - if let SdkError::ServiceError { err, .. } = err { - match err.kind { - HeadObjectErrorKind::NotFound(_) => Error::Object { - kind: Kind::ObjectNotExist, - op, - path: path.to_string(), - source: anyhow::Error::from(err), - }, - _ => Error::Object { - kind: Kind::Unexpected, - op, - path: path.to_string(), - source: anyhow::Error::from(err), - }, - } - } else { - Error::Object { - kind: Kind::Unexpected, - op, - path: path.to_string(), - source: anyhow::Error::from(err), - } - } -} - -// parse_unexpect_error is used to parse SdkError into unexpected. -pub fn parse_unexpect_error( - err: SdkError, - op: &'static str, - path: &str, -) -> Error { - Error::Object { - kind: Kind::Unexpected, - op, - path: path.to_string(), - source: anyhow::Error::from(err), - } -} diff --git a/src/services/s3/middleware/credentials.rs b/src/services/s3/middleware/credentials.rs deleted file mode 100644 index e5903e87fba..00000000000 --- a/src/services/s3/middleware/credentials.rs +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::future::Future; -use std::pin::Pin; - -use aws_smithy_http::middleware::AsyncMapRequest; -use aws_smithy_http::operation::Request; -use aws_types::credentials::ProvideCredentials; -use aws_types::credentials::SharedCredentialsProvider; - -#[derive(Clone, Debug, Default)] -pub struct CredentialsStage; - -impl CredentialsStage { - /// Creates a new credentials stage. - pub fn new() -> Self { - CredentialsStage - } - - async fn load_creds(mut request: Request) -> Result { - let provider = request - .properties() - .get::() - .cloned(); - let provider = match provider { - Some(provider) => provider, - None => { - return Ok(request); - } - }; - - // We will ignore all credential loading errors here. - if let Ok(creds) = provider.provide_credentials().await { - request.properties_mut().insert(creds); - } - Ok(request) - } -} - -type BoxFuture = Pin + Send>>; - -impl AsyncMapRequest for CredentialsStage { - type Error = String; - type Future = Pin> + Send + 'static>>; - - fn apply(&self, request: Request) -> BoxFuture> { - Box::pin(Self::load_creds(request)) - } -} diff --git a/src/services/s3/middleware/default.rs b/src/services/s3/middleware/default.rs deleted file mode 100644 index 36a96a372f3..00000000000 --- a/src/services/s3/middleware/default.rs +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::fmt::Debug; - -use aws_endpoint::AwsEndpointStage; -use aws_http::recursion_detection::RecursionDetectionStage; -use aws_http::user_agent::UserAgentStage; -use aws_sig_auth::signer::SigV4Signer; -use aws_smithy_http_tower::map_request::AsyncMapRequestLayer; -use aws_smithy_http_tower::map_request::MapRequestLayer; -use tower::layer::util::Identity; -use tower::layer::util::Stack; -use tower::ServiceBuilder; - -use super::credentials::CredentialsStage; -use super::signer::SigningStage; - -type DefaultMiddlewareStack = Stack< - MapRequestLayer, - Stack< - MapRequestLayer, - Stack< - AsyncMapRequestLayer, - Stack< - MapRequestLayer, - Stack, Identity>, - >, - >, - >, ->; - -/// AWS Middleware Stack -/// -/// This implements the middleware stack for this service. It will: -/// 1. Load credentials asynchronously into the property bag -/// 2. Sign the request with SigV4 -/// 3. Resolve an Endpoint for the request -/// 4. Add a user agent to the request -#[derive(Debug, Default, Clone)] -#[non_exhaustive] -pub struct DefaultMiddleware; - -impl DefaultMiddleware { - pub fn new() -> Self { - Self {} - } -} - -// define the middleware stack in a non-generic location to reduce code bloat. -fn base() -> ServiceBuilder { - let credential_provider = AsyncMapRequestLayer::for_mapper(CredentialsStage::new()); - let signer = MapRequestLayer::for_mapper(SigningStage::new(SigV4Signer::new())); - let endpoint_resolver = MapRequestLayer::for_mapper(AwsEndpointStage); - let user_agent = MapRequestLayer::for_mapper(UserAgentStage::new()); - let recursion_detection = MapRequestLayer::for_mapper(RecursionDetectionStage::new()); - // These layers can be considered as occurring in order, that is: - // 1. Resolve an endpoint - // 2. Add a user agent - // 3. Acquire credentials - // 4. Sign with credentials - // (5. Dispatch over the wire) - ServiceBuilder::new() - .layer(endpoint_resolver) - .layer(user_agent) - .layer(credential_provider) - .layer(signer) - .layer(recursion_detection) -} - -impl tower::Layer for DefaultMiddleware { - type Service = >::Service; - - fn layer(&self, inner: S) -> Self::Service { - base().service(inner) - } -} diff --git a/src/services/s3/middleware/mod.rs b/src/services/s3/middleware/mod.rs deleted file mode 100644 index 4ccf0ef02d5..00000000000 --- a/src/services/s3/middleware/mod.rs +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -mod credentials; -mod default; -mod signer; - -pub use default::DefaultMiddleware; diff --git a/src/services/s3/middleware/signer.rs b/src/services/s3/middleware/signer.rs deleted file mode 100644 index f8372705a77..00000000000 --- a/src/services/s3/middleware/signer.rs +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/// We borrowed code from `aws_sig_auth` here to make anonymous access possible. -/// -/// The original implementations requires `Credentials` and signing all requests. -/// We did a simple trick here: rewrite the `SigningStage` and only sign request -/// when we have a valid credentials. -/// -/// For users who specify Credentials, nothing changed. -/// For users who doesn't specify Credentials, there are two situations: -/// -/// - The env could have valid credentials, we will load credentials from env. -/// - There aren't any credentials, we will sending request without any signing -/// just like sending requests via browser or `curl`. -/// -/// # TODO -/// -/// There is a potential CVE. Users could construct an anonymous client to read -/// credentials from the environment. We should address it in the future. -use std::time::SystemTime; - -use aws_sig_auth::middleware::SigningStageError; -use aws_sig_auth::signer::OperationSigningConfig; -use aws_sig_auth::signer::RequestConfig; -use aws_sig_auth::signer::SigV4Signer; -use aws_sig_auth::signer::SigningRequirements; -use aws_sigv4::http_request::SignableBody; -use aws_smithy_http::middleware::MapRequest; -use aws_smithy_http::operation::Request; -use aws_smithy_http::property_bag::PropertyBag; -use aws_types::region::SigningRegion; -use aws_types::Credentials; -use aws_types::SigningService; - -#[derive(Clone, Debug)] -pub struct SigningStage { - signer: SigV4Signer, -} - -impl SigningStage { - pub fn new(signer: SigV4Signer) -> Self { - Self { signer } - } -} - -fn signing_config( - config: &PropertyBag, -) -> Result<(&OperationSigningConfig, RequestConfig, Option), SigningStageError> { - let operation_config = config - .get::() - .ok_or(SigningStageError::MissingSigningConfig)?; - // Here is a trick. - // We will return `Option` here instead of `Credentials`. - let credentials = config.get::().cloned(); - let region = config - .get::() - .ok_or(SigningStageError::MissingSigningRegion)?; - let signing_service = config - .get::() - .ok_or(SigningStageError::MissingSigningService)?; - let payload_override = config.get::>(); - let request_config = RequestConfig { - request_ts: config - .get::() - .copied() - .unwrap_or_else(SystemTime::now), - region, - payload_override, - service: signing_service, - }; - Ok((operation_config, request_config, credentials)) -} - -impl MapRequest for SigningStage { - type Error = SigningStageError; - - fn apply(&self, req: Request) -> Result { - req.augment(|mut req, config| { - let operation_config = config - .get::() - .ok_or(SigningStageError::MissingSigningConfig)?; - let (operation_config, request_config, creds) = - match &operation_config.signing_requirements { - SigningRequirements::Disabled => return Ok(req), - SigningRequirements::Optional => match signing_config(config) { - Ok(parts) => parts, - Err(_) => return Ok(req), - }, - SigningRequirements::Required => signing_config(config)?, - }; - - // The most tricky part here. - // - // We will try to load the credentials and only sign it when we have a - // valid credential. - if let Some(creds) = creds { - let signature = self - .signer - .sign(operation_config, &request_config, &creds, &mut req) - .map_err(|err| SigningStageError::SigningFailure(err))?; - config.insert(signature); - } - - Ok(req) - }) - } -} diff --git a/src/services/s3/mod.rs b/src/services/s3/mod.rs index 946eb3a508a..5ca246db6e4 100644 --- a/src/services/s3/mod.rs +++ b/src/services/s3/mod.rs @@ -70,6 +70,4 @@ mod backend; pub use backend::Backend; pub use backend::Builder; -mod error; -mod middleware; mod object_stream; diff --git a/src/services/s3/object_stream.rs b/src/services/s3/object_stream.rs index 9e2bdcd4cf9..8bc563dcd8e 100644 --- a/src/services/s3/object_stream.rs +++ b/src/services/s3/object_stream.rs @@ -12,27 +12,25 @@ // See the License for the specific language governing permissions and // limitations under the License. +use anyhow::anyhow; use std::future::Future; use std::pin::Pin; use std::sync::Arc; use std::task::Context; use std::task::Poll; -use aws_sdk_s3; -use aws_sdk_s3::output::ListObjectsV2Output; +use bstr::ByteSlice; use futures::future::BoxFuture; use futures::ready; -use log::debug; +use log::{debug, error}; -use super::error::parse_unexpect_error; use super::Backend; -use crate::error::Result; +use crate::error::{Error, Kind, Result}; use crate::Object; use crate::ObjectMode; pub struct S3ObjectStream { backend: Backend, - bucket: String, path: String, token: String, @@ -40,10 +38,10 @@ pub struct S3ObjectStream { state: State, } -#[allow(clippy::large_enum_variant)] +// #[allow(clippy::large_enum_variant)] enum State { Idle, - Sending(BoxFuture<'static, Result>), + Sending(BoxFuture<'static, Result>), /// # TODO /// /// It's better to move this large struct to heap as suggested by clippy. @@ -65,14 +63,13 @@ enum State { /// 45 | Listing(Box<(ListObjectsV2Output, usize, usize)>), /// /// But stable rust doesn't support `State::Listing(box (output, common_prefixes_idx, objects_idx))` so far, let's wait a bit. - Listing((ListObjectsV2Output, usize, usize)), + Listing((ListOutput, usize, usize)), } impl S3ObjectStream { - pub fn new(backend: Backend, bucket: String, path: String) -> Self { + pub fn new(backend: Backend, path: String) -> Self { Self { backend, - bucket, path, token: "".to_string(), @@ -90,79 +87,81 @@ impl futures::Stream for S3ObjectStream { match &mut self.state { State::Idle => { - let client = self.backend.inner(); - let bucket = self.bucket.clone(); + let backend = self.backend.clone(); let path = self.path.clone(); let token = self.token.clone(); let fut = async move { - let mut req = client - .list_objects_v2() - .bucket(bucket) - .prefix(&path) - .delimiter("/"); - if !token.is_empty() { - req = req.continuation_token(token); + let resp = backend.list_object(&path, &token).await?; + + if resp.status() != http::StatusCode::OK { + let e = Err(Error::Object { + kind: Kind::Unexpected, + op: "list", + path: path.clone(), + source: anyhow!("{:?}", resp), + }); + debug!("error response: {}", resp.text().await.expect("must valid")); + return e; } - req.send() - .await - .map_err(|e| parse_unexpect_error(e, "list", &path)) + + resp.bytes().await.map_err(|e| { + error!("object {} put_object: {:?}", path, e); + Error::Unexpected(anyhow::Error::from(e)) + }) }; self.state = State::Sending(Box::pin(fut)); self.poll_next(cx) } State::Sending(fut) => { - let output = ready!(Pin::new(fut).poll(cx))?; + let output = ListOutput::parse(ready!(Pin::new(fut).poll(cx))?)?; self.done = !output.is_truncated; - self.token = output.continuation_token.clone().unwrap_or_default(); + self.token = output.next_continuation_token.clone(); self.state = State::Listing((output, 0, 0)); self.poll_next(cx) } State::Listing((output, common_prefixes_idx, objects_idx)) => { - if let Some(prefixes) = &output.common_prefixes { - if *common_prefixes_idx < prefixes.len() { - *common_prefixes_idx += 1; - let prefix = &prefixes[*common_prefixes_idx - 1].prefix(); - - let mut o = Object::new( - Arc::new(backend.clone()), - &backend.get_rel_path(prefix.expect("prefix should not be None")), - ); - let meta = o.metadata_mut(); - meta.set_mode(ObjectMode::DIR) - .set_content_length(0) - .set_complete(); - - debug!( - "object {} got entry, path: {}, mode: {}", - &self.path, - meta.path(), - meta.mode() - ); - return Poll::Ready(Some(Ok(o))); - } + let prefixes = &output.common_prefixes; + if *common_prefixes_idx < prefixes.len() { + *common_prefixes_idx += 1; + let prefix = &prefixes[*common_prefixes_idx - 1]; + + let mut o = + Object::new(Arc::new(backend.clone()), &backend.get_rel_path(prefix)); + let meta = o.metadata_mut(); + meta.set_mode(ObjectMode::DIR) + .set_content_length(0) + .set_complete(); + + debug!( + "object {} got entry, path: {}, mode: {}", + &self.path, + meta.path(), + meta.mode() + ); + return Poll::Ready(Some(Ok(o))); } - if let Some(objects) = &output.contents { - if *objects_idx < objects.len() { - *objects_idx += 1; - let object = &objects[*objects_idx - 1]; - - let mut o = Object::new( - Arc::new(backend.clone()), - &backend.get_rel_path(object.key().expect("key should not be None")), - ); - let meta = o.metadata_mut(); - meta.set_mode(ObjectMode::FILE) - .set_content_length(object.size as u64); - - debug!( - "object {} got entry, path: {}, mode: {}", - &self.path, - meta.path(), - meta.mode() - ); - return Poll::Ready(Some(Ok(o))); - } + + let objects = &output.contents; + if *objects_idx < objects.len() { + *objects_idx += 1; + let object = &objects[*objects_idx - 1]; + + let mut o = Object::new( + Arc::new(backend.clone()), + &backend.get_rel_path(&object.key), + ); + let meta = o.metadata_mut(); + meta.set_mode(ObjectMode::FILE) + .set_content_length(object.size as u64); + + debug!( + "object {} got entry, path: {}, mode: {}", + &self.path, + meta.path(), + meta.mode() + ); + return Poll::Ready(Some(Ok(o))); } if self.done { @@ -176,3 +175,167 @@ impl futures::Stream for S3ObjectStream { } } } + +#[derive(Default, Debug)] +struct ListOutput { + is_truncated: bool, + next_continuation_token: String, + common_prefixes: Vec, + contents: Vec, +} + +#[derive(Default, Debug, Eq, PartialEq)] +struct ListOutputContent { + key: String, + size: u64, +} + +impl ListOutput { + fn parse(bs: bytes::Bytes) -> Result { + let root = roxmltree::Document::parse( + bs.as_bytes().to_str().expect("content must be valid utf-8"), + ) + .map_err(|e| Error::Unexpected(anyhow::Error::from(e)))?; + + let mut output = ListOutput::default(); + + // IsTruncated + if let Some(n) = root + .descendants() + .find(|n| n.tag_name().name() == "IsTruncated") + { + output.is_truncated = n + .text() + .unwrap_or("false") + .parse::() + .map_err(|e| invalid_list_object_response(&e.to_string()))?; + } + + // NextContinuationToken + if let Some(n) = root + .descendants() + .find(|n| n.tag_name().name() == "NextContinuationToken") + { + output.next_continuation_token = n.text().unwrap_or_default().to_string(); + } + + // CommonPrefixes + for item in root + .descendants() + .filter(|v| v.tag_name().name() == "CommonPrefixes") + { + output.common_prefixes.push( + item.children() + .find(|v| v.tag_name().name() == "Prefix") + .ok_or_else(|| invalid_list_object_response("Prefix is not found"))? + .text() + .ok_or_else(|| invalid_list_object_response("Prefix is empty"))? + .to_string(), + ) + } + + // Contents + for item in root + .descendants() + .filter(|v| v.tag_name().name() == "Contents") + { + let mut content = ListOutputContent::default(); + + // Key + let n = item + .children() + .find(|n| n.tag_name().name() == "Key") + .ok_or_else(|| invalid_list_object_response("Key is not found"))?; + content.key = n + .text() + .ok_or_else(|| invalid_list_object_response("Key is empty"))? + .to_string(); + + // Size + let n = item + .children() + .find(|n| n.tag_name().name() == "Size") + .ok_or_else(|| invalid_list_object_response("Size is not found"))?; + content.size = n + .text() + .ok_or_else(|| invalid_list_object_response("Size is empty"))? + .parse::() + .map_err(|e| invalid_list_object_response(&e.to_string()))?; + + output.contents.push(content) + } + + Ok(output) + } +} + +fn invalid_list_object_response(cause: &str) -> Error { + Error::Object { + kind: Kind::Unexpected, + op: "list", + path: "".to_string(), + source: anyhow!("invalid list object response: {}", cause), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_list_output() { + let bs = bytes::Bytes::from( + r#" + example-bucket + photos/2006/ + 3 + 1000 + / + false + + photos/2006 + 2016-04-30T23:51:29.000Z + "d41d8cd98f00b204e9800998ecf8427e" + 56 + STANDARD + + + photos/2007 + 2016-04-30T23:51:29.000Z + "d41d8cd98f00b204e9800998ecf8427e" + 100 + STANDARD + + + + photos/2006/February/ + + + photos/2006/January/ + +"#, + ); + + let out = ListOutput::parse(bs).expect("must success"); + + assert!(!out.is_truncated); + assert!(out.next_continuation_token.is_empty()); + assert_eq!( + out.common_prefixes, + vec!["photos/2006/February/", "photos/2006/January/"] + ); + assert_eq!( + out.contents, + vec![ + ListOutputContent { + key: "photos/2006".to_string(), + size: 56 + }, + ListOutputContent { + key: "photos/2007".to_string(), + size: 100 + } + ] + ) + } +} diff --git a/tests/behavior/behavior.rs b/tests/behavior/behavior.rs index 489575cbbc3..762a427d3c9 100644 --- a/tests/behavior/behavior.rs +++ b/tests/behavior/behavior.rs @@ -26,6 +26,7 @@ use anyhow::Result; use futures::AsyncReadExt; use futures::AsyncSeekExt; use futures::StreamExt; + use opendal::ObjectMode; use opendal::Operator; use rand::prelude::*; @@ -103,7 +104,7 @@ impl BehaviorTest { ); // Step 5: List this dir, we should get this file. - let mut obs = self.op.objects("").map(|o| o.expect("list object")); + let mut obs = self.op.objects("").map(|o| o.expect("list object: {}")); let mut found = false; while let Some(o) = obs.next().await { let meta = o.metadata().await?; From 1aecb5c632cd1953ceb38904775bb97728dbc982 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 16 Mar 2022 20:14:44 +0800 Subject: [PATCH 02/16] Format Cargo.toml Signed-off-by: Xuanwo --- Cargo.toml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 77e987bb8db..34da468ca46 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ name = "ops" anyhow = "1" async-compat = "0.2" async-trait = "0.1" +bstr = "0.2" bytes = "1" futures = { version = "0.3", features = ["alloc"] } http = "0.2" @@ -31,20 +32,19 @@ log = "0.4" metrics = "0.18" once_cell = "1" pin-project = "1" +reqsign = { git = "https://github.com/Xuanwo/reqsign", rev = "44d7bb37dfa543487d4d5286d64abd3f2fe12bf7" } reqwest = { version = "0.11", features = ["stream"] } +roxmltree = "0.14" thiserror = "1" tokio = { version = "1.17", features = ["full"] } tower = "0.4" -reqsign = { git = "https://github.com/Xuanwo/reqsign", rev = "44d7bb37dfa543487d4d5286d64abd3f2fe12bf7" } -roxmltree = "0.14" -bstr = "0.2" [dev-dependencies] anyhow = "1.0" criterion = { version = "0.3", features = [ - "async", - "async_tokio", - "html_reports", + "async", + "async_tokio", + "html_reports", ] } dotenv = "0.15" env_logger = "0.9" From f7b4daeb4d54ead58dc05cd5256a020e8cbb40be Mon Sep 17 00:00:00 2001 From: PhilipsPot Date: Thu, 17 Mar 2022 07:01:05 +0800 Subject: [PATCH 03/16] some implement of azblob(not all) --- Cargo.toml | 7 + opendal_test/src/services/azblob.rs | 28 ++ opendal_test/src/services/mod.rs | 1 + src/services/azblob/backend.rs | 325 ++++++++++++++++++ src/services/azblob/error.rs | 95 +++++ src/services/azblob/middleware/credentials.rs | 0 src/services/azblob/middleware/mod.rs | 0 src/services/azblob/mod.rs | 7 + src/services/azblob/object_stream.rs | 0 src/services/mod.rs | 2 + tests/behavior/azblob.rs | 19 + 11 files changed, 484 insertions(+) create mode 100644 opendal_test/src/services/azblob.rs create mode 100644 src/services/azblob/backend.rs create mode 100644 src/services/azblob/error.rs create mode 100644 src/services/azblob/middleware/credentials.rs create mode 100644 src/services/azblob/middleware/mod.rs create mode 100644 src/services/azblob/mod.rs create mode 100644 src/services/azblob/object_stream.rs create mode 100644 tests/behavior/azblob.rs diff --git a/Cargo.toml b/Cargo.toml index d4b9e20e4a1..d45195d8d73 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,13 @@ aws-smithy-client = "0.38" aws-smithy-http = "0.38" aws-smithy-http-tower = "0.38" aws-types = { version = "0.8", features = ["hardcoded-credentials"] } + +azure_core = {version="0.1",default-features=false,git = "https://github.com/Azure/azure-sdk-for-rust"} +azure_storage = { version = "0.1.0", default-features=false, features=["account"], git = "https://github.com/Azure/azure-sdk-for-rust" } +azure_storage_blobs = { version = "0.1.0", git = "https://github.com/Azure/azure-sdk-for-rust" } +azure_identity = {git = "https://github.com/Azure/azure-sdk-for-rust", version = "0.1" } + +oauth2 = { version = "4.0.0", default-features = false } blocking = "1" bytes = "1" futures = { version = "0.3", features = ["alloc"] } diff --git a/opendal_test/src/services/azblob.rs b/opendal_test/src/services/azblob.rs new file mode 100644 index 00000000000..ccc5bd37203 --- /dev/null +++ b/opendal_test/src/services/azblob.rs @@ -0,0 +1,28 @@ +use std::env; +use std::sync::Arc; + +use opendal::error::Result; +use opendal::services::azblob; +use opendal::Accessor; + +pub async fn new() -> Result>> { + dotenv::from_filename(".env").ok(); + + if env::var("OPENDAL_AZBLOB_TEST").is_err() || env::var("OPENDAL_AZBLOB_TEST").unwrap() != "on" { + return Ok(None); + } + + let root = + &env::var("OPENDAL_S3_ROOT").unwrap_or_else(|_| format!("/{}", uuid::Uuid::new_v4())); + + let mut builder = azblob::Backend::build(); + builder.root(root); + builder.bucket(&env::var("OPENDAL_AZBLOB_BUCKET").expect("OPENDAL_AZBLOB_BUCKET must set")); + + builder.credential(Credential::hmac( + &env::var("OPENDAL_AZBLOB_ACCESS_KEY_ID").unwrap_or_default(), + &env::var("OPENDAL_AZBLOB_SECRET_ACCESS_KEY").unwrap_or_default(), + )); + + Ok(Some(builder.finish().await?)) +} \ No newline at end of file diff --git a/opendal_test/src/services/mod.rs b/opendal_test/src/services/mod.rs index 4f31096621f..1e25a3f79d6 100644 --- a/opendal_test/src/services/mod.rs +++ b/opendal_test/src/services/mod.rs @@ -14,3 +14,4 @@ pub mod fs; pub mod memory; pub mod s3; +pub mod azblob; \ No newline at end of file diff --git a/src/services/azblob/backend.rs b/src/services/azblob/backend.rs new file mode 100644 index 00000000000..c65d730b69c --- /dev/null +++ b/src/services/azblob/backend.rs @@ -0,0 +1,325 @@ + +use std::collections::HashMap; + + + +use std::num::NonZeroU32; + +use anyhow::anyhow; +use async_trait::async_trait; + +use metrics::increment_counter; + +use super::error::parse_get_object_error; +use super::error::parse_head_object_error; +use super::error::parse_unexpect_error; + +use crate::credential::Credential; +use crate::error::Error; +use crate::error::Kind; +use crate::error::Result; +use crate::object::BoxedObjectStream; +use crate::object::Metadata; +use crate::ops::OpDelete; +use crate::ops::OpList; +use crate::ops::OpRead; +use crate::ops::OpStat; +use crate::ops::OpWrite; +use log::debug; +use log::error; +use log::info; +use log::warn; +use crate::Accessor; +use crate::BoxedAsyncReader; +use crate::ObjectMode; +use std::sync::Arc; + +use azure_core::prelude::*; +use azure_storage::core::prelude::*; +use azure_storage_blobs::prelude::*; + +#[derive(Default, Debug, Clone)] +pub struct Builder { + root: Option, + bucket: String, // in Azure, bucket = container + credential: Option, +} + +impl Builder { + pub fn root(&mut self, root: &str) -> &mut Self { + self.root = if root.is_empty() { + None + } else { + Some(root.to_string()) + }; + + self + } + pub fn bucket(&mut self, bucket: &str) -> &mut Self { + self.bucket = bucket.to_string(); + + self + } + + pub fn credential(&mut self, credential: Credential) -> &mut Self { + self.credential = Some(credential); + + self + } + pub async fn finish(&mut self) -> Result> { + info!("backend build started: {:?}", &self); + let root = self.root.unwrap(); + info!("backend use root {}", root); + let bucket = match self.bucket.is_empty() { + false => Ok(&self.bucket), + true => Err(Error::Backend { + kind: Kind::BackendConfigurationInvalid, + context: HashMap::from([("bucket".to_string(), "".to_string())]), + source: anyhow!("bucket is empty"), + }), + }?; + debug!("backend use bucket {}", &bucket); + let mut context: HashMap = + HashMap::from([("bucket".to_string(), bucket.to_string())]); + let mut azure_storage_account = String::new(); + let mut azure_storage_key = String::new(); + if let Some(cred) = &self.credential { + context.insert("credential".to_string(), "*".to_string()); + match cred { + Credential::HMAC { + access_key_id, + secret_access_key, + } => { + azure_storage_account = access_key_id.to_string(); + azure_storage_key = secret_access_key.to_string(); + } + // We don't need to do anything if user tries to read credential from env. + Credential::Plain => { + warn!("backend got empty credential, fallback to read from env.") + } + _ => { + return Err(Error::Backend { + kind: Kind::BackendConfigurationInvalid, + context: context.clone(), + source: anyhow!("credential is invalid"), + }); + } + } + } + let http_client = azure_core::new_http_client(); + let storage_client = StorageAccountClient::new_access_key( + http_client.clone(), + azure_storage_account, + azure_storage_key, + ).as_storage_client(); + info!("backend build finished: {:?}", &self); + Ok(Arc::new(Backend { + root:root.unwrap().clone(), + bucket: self.bucket.clone(), + client: storage_client, + })) + } +} +#[derive(Debug, Clone)] +pub struct Backend { + bucket: String, + client: Arc, + root: String, +} + +impl Backend { + pub fn build() -> Builder { + Builder::default() + } + + pub(crate) fn inner(&self) -> Arc { + self.client.clone() + } + pub(crate) fn normalize_path(path: &str) -> String { + let has_trailing = path.ends_with('/'); + + let mut p = path + .split('/') + .filter(|v| !v.is_empty()) + .collect::>() + .join("/"); + + if has_trailing && !p.eq("/") { + p.push('/') + } + + p + } + pub(crate) fn get_abs_path(&self, path: &str) -> String { + let path = Backend::normalize_path(path); + // root must be normalized like `/abc/` + format!("{}{}", self.root, path) + .trim_start_matches('/') + .to_string() + } + pub(crate) fn get_rel_path(&self, path: &str) -> String { + let path = format!("/{}", path); + + match path.strip_prefix(&self.root) { + Some(v) => v.to_string(), + None => unreachable!( + "invalid path {} that not start with backend root {}", + &path, &self.root + ), + } + } +} +#[async_trait] +impl Accessor for Backend { + async fn read(&self, args: &OpRead) -> Result { + increment_counter!("opendal_s3_read_requests"); + + let p = self.get_abs_path(&args.path); + info!( + "object {} read start: offset {:?}, size {:?}", + &p, args.offset, args.size + ); + + let mut blob_client = self + .client + .as_container_client(&self.bucket) + .as_blob_client(&p); + + let resp =if args.offset.is_some() || args.size.is_some() { + blob_client + .get() + .range(Range::new(0, 1024)) + .execute() + .await.map_err(|e| { + let e = parse_get_object_error(e, "read", &p); + error!("object {} get_object: {:?}", &p, e); + e + })? + } else{ + blob_client + .get() + .execute() + .await.map_err(|e| { + let e = parse_get_object_error(e, "read", &p); + error!("object {} get_object: {:?}", &p, e); + e + })? + }; + + info!( + "object {} reader created: offset {:?}, size {:?}", + &p, args.offset, args.size + ); + // Ok(Box::new(S3ByteStream(resp).into_async_read())) + todo!() + } + async fn write(&self, r: BoxedAsyncReader, args: &OpWrite) -> Result { + increment_counter!("opendal_s3_stat_requests"); + let p = self.get_abs_path(&args.path); + info!("object {} write start: size {}", &p, args.size); + let blob_client =self.client + .as_container_client(&self.bucket) + .as_blob_client(&p); + blob_client + .put_block_blob("tmp_data") + .content_type("text/plain") + .execute() + .await.map_err(|e| { + let e = parse_unexpect_error(e, "write", &p); + error!("object {} put_object: {:?}", &p, e); + e + })?; + Ok(args.size as usize) + } + async fn stat(&self, args: &OpStat) -> Result { + increment_counter!("opendal_s3_stat_requests"); + + let p = self.get_abs_path(&args.path); + info!("object {} stat start", &p); + + let blob_client =self.client + .as_container_client(&self.bucket) + .as_blob_client(&p); + + let response_result = blob_client.get().execute() + .await.map_err(|e| parse_head_object_error(e, "stat", &p)); + + match response_result { + Ok(response) => { + let mut m = Metadata::default(); + m.set_path(&args.path); + m.set_content_length(response.blob.properties.content_length); + + if p.ends_with('/') { + m.set_mode(ObjectMode::DIR); + } else { + m.set_mode(ObjectMode::FILE); + }; + + m.set_complete(); + + info!("object {} stat finished", &p); + Ok(m) + } + // Always returns empty dir object if path is endswith "/" and we got an + // ObjectNotExist error. + Err(e) if (e.kind() == Kind::ObjectNotExist && p.ends_with('/')) => { + let mut m = Metadata::default(); + m.set_path(&args.path); + m.set_content_length(0); + m.set_mode(ObjectMode::DIR); + m.set_complete(); + + info!("object {} stat finished", &p); + Ok(m) + } + Err(e) => { + error!("object {} head_object: {:?}", &p, e); + Err(e) + } + } + + } + async fn delete(&self, args: &OpDelete) -> Result<()> { + increment_counter!("opendal_s3_delete_requests"); + + let p = self.get_abs_path(&args.path); + info!("object {} delete start", &p); + + let blob_client = + self.client + .as_container_client(&self.bucket) + .as_blob_client(&p); + + let res = blob_client + .delete() + .delete_snapshots_method(DeleteSnapshotsMethod::Include) + .execute() + .await.map_err(|e| parse_unexpect_error(e, "delete", &p))?; + + info!("object {} delete finished", &p); + Ok(()) + } + async fn list(&self, args: &OpList) -> Result { + increment_counter!("opendal_s3_list_requests"); + + let mut path = self.get_abs_path(&args.path); + // Make sure list path is endswith '/' + if !path.ends_with('/') && !path.is_empty() { + path.push('/') + } + info!("object {} list start", &path); + //prefix would look like that .prefix("root/firstfolder/") + //if path have / as prefix, then remove it + if path.starts_with("/") {path =path[1..].to_string();} + let max_results =NonZeroU32::new(5u32).unwrap(); + let container_client = self.client.as_container_client(&self.bucket); + let mut stream = Box::pin( + container_client + .list_blobs() + .prefix(path) + .max_results(max_results) + .stream()); + todo!() + } +} diff --git a/src/services/azblob/error.rs b/src/services/azblob/error.rs new file mode 100644 index 00000000000..8be5e87db57 --- /dev/null +++ b/src/services/azblob/error.rs @@ -0,0 +1,95 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +use http::StatusCode; +use azure_core::HttpError; +use crate::error::Kind; +use crate::error::Error; +use std::error::Error as StdError; + + +pub fn parse_get_object_error( + err: Box, + op: &'static str, + path: &str, +) -> Error { + if let Some(err) = err.downcast_ref::() { + if matches!( + err, + HttpError::StatusCode { + status: StatusCode::NOT_FOUND, + .. + } + ) { + return Error::Object { + kind: Kind::ObjectNotExist, + op, + path: path.to_string(), + source: anyhow::Error::from(err), + } + } + + } + return Error::Object { + kind: Kind::Unexpected, + op, + path: path.to_string(), + source: todo!(), + } + +} + +pub fn parse_head_object_error( + err: Box, + op: &'static str, + path: &str, +) -> Error { + if let Some(err) = err.downcast_ref::() { + if matches!( + err, + HttpError::StatusCode { + status: StatusCode::NOT_FOUND, + .. + } + ) { + return Error::Object { + kind: Kind::ObjectNotExist, + op, + path: path.to_string(), + source: anyhow::Error::from(err), + } + } + } + Error::Object { + kind: Kind::Unexpected, + op, + path: path.to_string(), + source: todo!(), + } +} + +// parse_unexpect_error is used to parse SdkError into unexpected. +pub fn parse_unexpect_error( + err: Box, + op: &'static str, + path: &str, +) -> Error { + Error::Object { + kind: Kind::Unexpected, + op, + path: path.to_string(), + source: todo!(), + } +} diff --git a/src/services/azblob/middleware/credentials.rs b/src/services/azblob/middleware/credentials.rs new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/services/azblob/middleware/mod.rs b/src/services/azblob/middleware/mod.rs new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/services/azblob/mod.rs b/src/services/azblob/mod.rs new file mode 100644 index 00000000000..427475d0db5 --- /dev/null +++ b/src/services/azblob/mod.rs @@ -0,0 +1,7 @@ +mod backend; +pub use backend::Backend; +pub use backend::Builder; + +mod error; +mod object_stream; +mod middleware; \ No newline at end of file diff --git a/src/services/azblob/object_stream.rs b/src/services/azblob/object_stream.rs new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/services/mod.rs b/src/services/mod.rs index 0d0d3025b77..b2f95e1480c 100644 --- a/src/services/mod.rs +++ b/src/services/mod.rs @@ -21,4 +21,6 @@ pub mod fs; pub mod memory; +pub mod azblob; + pub mod s3; diff --git a/tests/behavior/azblob.rs b/tests/behavior/azblob.rs new file mode 100644 index 00000000000..75c4336d0b5 --- /dev/null +++ b/tests/behavior/azblob.rs @@ -0,0 +1,19 @@ +use anyhow::Result; +use log::warn; +use opendal::Operator; +use opendal_test::services::azblob; + +use super::BehaviorTest; + +#[tokio::test] +async fn behavior() -> Result<()> { + super::init_logger(); + + let acc = s3::new().await?; + if acc.is_none() { + warn!("OPENDAL_S3_TEST not set, ignore"); + return Ok(()); + } + + BehaviorTest::new(Operator::new(acc.unwrap())).run().await +} \ No newline at end of file From 6ed58c08c2c7d85259b1ac2305776bb1681a40ac Mon Sep 17 00:00:00 2001 From: PhilipsPot Date: Thu, 17 Mar 2022 07:49:59 +0800 Subject: [PATCH 04/16] testing azure sign function --- opendal_test/src/services/azblob.rs | 42 +- src/services/azblob/backend.rs | 646 ++++++++++++++-------------- src/services/azblob/error.rs | 174 ++++---- src/services/azblob/mod.rs | 4 +- 4 files changed, 433 insertions(+), 433 deletions(-) diff --git a/opendal_test/src/services/azblob.rs b/opendal_test/src/services/azblob.rs index ccc5bd37203..f3da0542f02 100644 --- a/opendal_test/src/services/azblob.rs +++ b/opendal_test/src/services/azblob.rs @@ -1,28 +1,28 @@ -use std::env; -use std::sync::Arc; +// use std::env; +// use std::sync::Arc; -use opendal::error::Result; -use opendal::services::azblob; -use opendal::Accessor; +// use opendal::error::Result; +// use opendal::services::azblob; +// use opendal::Accessor; -pub async fn new() -> Result>> { - dotenv::from_filename(".env").ok(); +// pub async fn new() -> Result>> { +// dotenv::from_filename(".env").ok(); - if env::var("OPENDAL_AZBLOB_TEST").is_err() || env::var("OPENDAL_AZBLOB_TEST").unwrap() != "on" { - return Ok(None); - } +// if env::var("OPENDAL_AZBLOB_TEST").is_err() || env::var("OPENDAL_AZBLOB_TEST").unwrap() != "on" { +// return Ok(None); +// } - let root = - &env::var("OPENDAL_S3_ROOT").unwrap_or_else(|_| format!("/{}", uuid::Uuid::new_v4())); +// let root = +// &env::var("OPENDAL_S3_ROOT").unwrap_or_else(|_| format!("/{}", uuid::Uuid::new_v4())); - let mut builder = azblob::Backend::build(); - builder.root(root); - builder.bucket(&env::var("OPENDAL_AZBLOB_BUCKET").expect("OPENDAL_AZBLOB_BUCKET must set")); +// let mut builder = azblob::Backend::build(); +// builder.root(root); +// builder.bucket(&env::var("OPENDAL_AZBLOB_BUCKET").expect("OPENDAL_AZBLOB_BUCKET must set")); - builder.credential(Credential::hmac( - &env::var("OPENDAL_AZBLOB_ACCESS_KEY_ID").unwrap_or_default(), - &env::var("OPENDAL_AZBLOB_SECRET_ACCESS_KEY").unwrap_or_default(), - )); +// builder.credential(Credential::hmac( +// &env::var("OPENDAL_AZBLOB_ACCESS_KEY_ID").unwrap_or_default(), +// &env::var("OPENDAL_AZBLOB_SECRET_ACCESS_KEY").unwrap_or_default(), +// )); - Ok(Some(builder.finish().await?)) -} \ No newline at end of file +// Ok(Some(builder.finish().await?)) +// } \ No newline at end of file diff --git a/src/services/azblob/backend.rs b/src/services/azblob/backend.rs index c65d730b69c..c61ad0c3fce 100644 --- a/src/services/azblob/backend.rs +++ b/src/services/azblob/backend.rs @@ -1,325 +1,325 @@ -use std::collections::HashMap; - - - -use std::num::NonZeroU32; - -use anyhow::anyhow; -use async_trait::async_trait; - -use metrics::increment_counter; - -use super::error::parse_get_object_error; -use super::error::parse_head_object_error; -use super::error::parse_unexpect_error; - -use crate::credential::Credential; -use crate::error::Error; -use crate::error::Kind; -use crate::error::Result; -use crate::object::BoxedObjectStream; -use crate::object::Metadata; -use crate::ops::OpDelete; -use crate::ops::OpList; -use crate::ops::OpRead; -use crate::ops::OpStat; -use crate::ops::OpWrite; -use log::debug; -use log::error; -use log::info; -use log::warn; -use crate::Accessor; -use crate::BoxedAsyncReader; -use crate::ObjectMode; -use std::sync::Arc; - -use azure_core::prelude::*; -use azure_storage::core::prelude::*; -use azure_storage_blobs::prelude::*; - -#[derive(Default, Debug, Clone)] -pub struct Builder { - root: Option, - bucket: String, // in Azure, bucket = container - credential: Option, -} - -impl Builder { - pub fn root(&mut self, root: &str) -> &mut Self { - self.root = if root.is_empty() { - None - } else { - Some(root.to_string()) - }; - - self - } - pub fn bucket(&mut self, bucket: &str) -> &mut Self { - self.bucket = bucket.to_string(); - - self - } - - pub fn credential(&mut self, credential: Credential) -> &mut Self { - self.credential = Some(credential); - - self - } - pub async fn finish(&mut self) -> Result> { - info!("backend build started: {:?}", &self); - let root = self.root.unwrap(); - info!("backend use root {}", root); - let bucket = match self.bucket.is_empty() { - false => Ok(&self.bucket), - true => Err(Error::Backend { - kind: Kind::BackendConfigurationInvalid, - context: HashMap::from([("bucket".to_string(), "".to_string())]), - source: anyhow!("bucket is empty"), - }), - }?; - debug!("backend use bucket {}", &bucket); - let mut context: HashMap = - HashMap::from([("bucket".to_string(), bucket.to_string())]); - let mut azure_storage_account = String::new(); - let mut azure_storage_key = String::new(); - if let Some(cred) = &self.credential { - context.insert("credential".to_string(), "*".to_string()); - match cred { - Credential::HMAC { - access_key_id, - secret_access_key, - } => { - azure_storage_account = access_key_id.to_string(); - azure_storage_key = secret_access_key.to_string(); - } - // We don't need to do anything if user tries to read credential from env. - Credential::Plain => { - warn!("backend got empty credential, fallback to read from env.") - } - _ => { - return Err(Error::Backend { - kind: Kind::BackendConfigurationInvalid, - context: context.clone(), - source: anyhow!("credential is invalid"), - }); - } - } - } - let http_client = azure_core::new_http_client(); - let storage_client = StorageAccountClient::new_access_key( - http_client.clone(), - azure_storage_account, - azure_storage_key, - ).as_storage_client(); - info!("backend build finished: {:?}", &self); - Ok(Arc::new(Backend { - root:root.unwrap().clone(), - bucket: self.bucket.clone(), - client: storage_client, - })) - } -} -#[derive(Debug, Clone)] -pub struct Backend { - bucket: String, - client: Arc, - root: String, -} - -impl Backend { - pub fn build() -> Builder { - Builder::default() - } - - pub(crate) fn inner(&self) -> Arc { - self.client.clone() - } - pub(crate) fn normalize_path(path: &str) -> String { - let has_trailing = path.ends_with('/'); - - let mut p = path - .split('/') - .filter(|v| !v.is_empty()) - .collect::>() - .join("/"); - - if has_trailing && !p.eq("/") { - p.push('/') - } - - p - } - pub(crate) fn get_abs_path(&self, path: &str) -> String { - let path = Backend::normalize_path(path); - // root must be normalized like `/abc/` - format!("{}{}", self.root, path) - .trim_start_matches('/') - .to_string() - } - pub(crate) fn get_rel_path(&self, path: &str) -> String { - let path = format!("/{}", path); - - match path.strip_prefix(&self.root) { - Some(v) => v.to_string(), - None => unreachable!( - "invalid path {} that not start with backend root {}", - &path, &self.root - ), - } - } -} -#[async_trait] -impl Accessor for Backend { - async fn read(&self, args: &OpRead) -> Result { - increment_counter!("opendal_s3_read_requests"); - - let p = self.get_abs_path(&args.path); - info!( - "object {} read start: offset {:?}, size {:?}", - &p, args.offset, args.size - ); - - let mut blob_client = self - .client - .as_container_client(&self.bucket) - .as_blob_client(&p); - - let resp =if args.offset.is_some() || args.size.is_some() { - blob_client - .get() - .range(Range::new(0, 1024)) - .execute() - .await.map_err(|e| { - let e = parse_get_object_error(e, "read", &p); - error!("object {} get_object: {:?}", &p, e); - e - })? - } else{ - blob_client - .get() - .execute() - .await.map_err(|e| { - let e = parse_get_object_error(e, "read", &p); - error!("object {} get_object: {:?}", &p, e); - e - })? - }; - - info!( - "object {} reader created: offset {:?}, size {:?}", - &p, args.offset, args.size - ); - // Ok(Box::new(S3ByteStream(resp).into_async_read())) - todo!() - } - async fn write(&self, r: BoxedAsyncReader, args: &OpWrite) -> Result { - increment_counter!("opendal_s3_stat_requests"); - let p = self.get_abs_path(&args.path); - info!("object {} write start: size {}", &p, args.size); - let blob_client =self.client - .as_container_client(&self.bucket) - .as_blob_client(&p); - blob_client - .put_block_blob("tmp_data") - .content_type("text/plain") - .execute() - .await.map_err(|e| { - let e = parse_unexpect_error(e, "write", &p); - error!("object {} put_object: {:?}", &p, e); - e - })?; - Ok(args.size as usize) - } - async fn stat(&self, args: &OpStat) -> Result { - increment_counter!("opendal_s3_stat_requests"); - - let p = self.get_abs_path(&args.path); - info!("object {} stat start", &p); - - let blob_client =self.client - .as_container_client(&self.bucket) - .as_blob_client(&p); +// use std::collections::HashMap; + + + +// use std::num::NonZeroU32; + +// use anyhow::anyhow; +// use async_trait::async_trait; + +// use metrics::increment_counter; + +// use super::error::parse_get_object_error; +// use super::error::parse_head_object_error; +// use super::error::parse_unexpect_error; + +// use crate::credential::Credential; +// use crate::error::Error; +// use crate::error::Kind; +// use crate::error::Result; +// use crate::object::BoxedObjectStream; +// use crate::object::Metadata; +// use crate::ops::OpDelete; +// use crate::ops::OpList; +// use crate::ops::OpRead; +// use crate::ops::OpStat; +// use crate::ops::OpWrite; +// use log::debug; +// use log::error; +// use log::info; +// use log::warn; +// use crate::Accessor; +// use crate::BoxedAsyncReader; +// use crate::ObjectMode; +// use std::sync::Arc; + +// use azure_core::prelude::*; +// use azure_storage::core::prelude::*; +// use azure_storage_blobs::prelude::*; + +// #[derive(Default, Debug, Clone)] +// pub struct Builder { +// root: Option, +// bucket: String, // in Azure, bucket = container +// credential: Option, +// } + +// impl Builder { +// pub fn root(&mut self, root: &str) -> &mut Self { +// self.root = if root.is_empty() { +// None +// } else { +// Some(root.to_string()) +// }; + +// self +// } +// pub fn bucket(&mut self, bucket: &str) -> &mut Self { +// self.bucket = bucket.to_string(); + +// self +// } + +// pub fn credential(&mut self, credential: Credential) -> &mut Self { +// self.credential = Some(credential); + +// self +// } +// pub async fn finish(&mut self) -> Result> { +// info!("backend build started: {:?}", &self); +// let root = self.root.unwrap(); +// info!("backend use root {}", root); +// let bucket = match self.bucket.is_empty() { +// false => Ok(&self.bucket), +// true => Err(Error::Backend { +// kind: Kind::BackendConfigurationInvalid, +// context: HashMap::from([("bucket".to_string(), "".to_string())]), +// source: anyhow!("bucket is empty"), +// }), +// }?; +// debug!("backend use bucket {}", &bucket); +// let mut context: HashMap = +// HashMap::from([("bucket".to_string(), bucket.to_string())]); +// let mut azure_storage_account = String::new(); +// let mut azure_storage_key = String::new(); +// if let Some(cred) = &self.credential { +// context.insert("credential".to_string(), "*".to_string()); +// match cred { +// Credential::HMAC { +// access_key_id, +// secret_access_key, +// } => { +// azure_storage_account = access_key_id.to_string(); +// azure_storage_key = secret_access_key.to_string(); +// } +// // We don't need to do anything if user tries to read credential from env. +// Credential::Plain => { +// warn!("backend got empty credential, fallback to read from env.") +// } +// _ => { +// return Err(Error::Backend { +// kind: Kind::BackendConfigurationInvalid, +// context: context.clone(), +// source: anyhow!("credential is invalid"), +// }); +// } +// } +// } +// let http_client = azure_core::new_http_client(); +// let storage_client = StorageAccountClient::new_access_key( +// http_client.clone(), +// azure_storage_account, +// azure_storage_key, +// ).as_storage_client(); +// info!("backend build finished: {:?}", &self); +// Ok(Arc::new(Backend { +// root:root.unwrap().clone(), +// bucket: self.bucket.clone(), +// client: storage_client, +// })) +// } +// } +// #[derive(Debug, Clone)] +// pub struct Backend { +// bucket: String, +// client: Arc, +// root: String, +// } + +// impl Backend { +// pub fn build() -> Builder { +// Builder::default() +// } + +// pub(crate) fn inner(&self) -> Arc { +// self.client.clone() +// } +// pub(crate) fn normalize_path(path: &str) -> String { +// let has_trailing = path.ends_with('/'); + +// let mut p = path +// .split('/') +// .filter(|v| !v.is_empty()) +// .collect::>() +// .join("/"); + +// if has_trailing && !p.eq("/") { +// p.push('/') +// } + +// p +// } +// pub(crate) fn get_abs_path(&self, path: &str) -> String { +// let path = Backend::normalize_path(path); +// // root must be normalized like `/abc/` +// format!("{}{}", self.root, path) +// .trim_start_matches('/') +// .to_string() +// } +// pub(crate) fn get_rel_path(&self, path: &str) -> String { +// let path = format!("/{}", path); + +// match path.strip_prefix(&self.root) { +// Some(v) => v.to_string(), +// None => unreachable!( +// "invalid path {} that not start with backend root {}", +// &path, &self.root +// ), +// } +// } +// } +// #[async_trait] +// impl Accessor for Backend { +// async fn read(&self, args: &OpRead) -> Result { +// increment_counter!("opendal_s3_read_requests"); + +// let p = self.get_abs_path(&args.path); +// info!( +// "object {} read start: offset {:?}, size {:?}", +// &p, args.offset, args.size +// ); + +// let mut blob_client = self +// .client +// .as_container_client(&self.bucket) +// .as_blob_client(&p); + +// let resp =if args.offset.is_some() || args.size.is_some() { +// blob_client +// .get() +// .range(Range::new(0, 1024)) +// .execute() +// .await.map_err(|e| { +// let e = parse_get_object_error(e, "read", &p); +// error!("object {} get_object: {:?}", &p, e); +// e +// })? +// } else{ +// blob_client +// .get() +// .execute() +// .await.map_err(|e| { +// let e = parse_get_object_error(e, "read", &p); +// error!("object {} get_object: {:?}", &p, e); +// e +// })? +// }; + +// info!( +// "object {} reader created: offset {:?}, size {:?}", +// &p, args.offset, args.size +// ); +// // Ok(Box::new(S3ByteStream(resp).into_async_read())) +// todo!() +// } +// async fn write(&self, r: BoxedAsyncReader, args: &OpWrite) -> Result { +// increment_counter!("opendal_s3_stat_requests"); +// let p = self.get_abs_path(&args.path); +// info!("object {} write start: size {}", &p, args.size); +// let blob_client =self.client +// .as_container_client(&self.bucket) +// .as_blob_client(&p); +// blob_client +// .put_block_blob("tmp_data") +// .content_type("text/plain") +// .execute() +// .await.map_err(|e| { +// let e = parse_unexpect_error(e, "write", &p); +// error!("object {} put_object: {:?}", &p, e); +// e +// })?; +// Ok(args.size as usize) +// } +// async fn stat(&self, args: &OpStat) -> Result { +// increment_counter!("opendal_s3_stat_requests"); + +// let p = self.get_abs_path(&args.path); +// info!("object {} stat start", &p); + +// let blob_client =self.client +// .as_container_client(&self.bucket) +// .as_blob_client(&p); - let response_result = blob_client.get().execute() - .await.map_err(|e| parse_head_object_error(e, "stat", &p)); - - match response_result { - Ok(response) => { - let mut m = Metadata::default(); - m.set_path(&args.path); - m.set_content_length(response.blob.properties.content_length); - - if p.ends_with('/') { - m.set_mode(ObjectMode::DIR); - } else { - m.set_mode(ObjectMode::FILE); - }; - - m.set_complete(); - - info!("object {} stat finished", &p); - Ok(m) - } - // Always returns empty dir object if path is endswith "/" and we got an - // ObjectNotExist error. - Err(e) if (e.kind() == Kind::ObjectNotExist && p.ends_with('/')) => { - let mut m = Metadata::default(); - m.set_path(&args.path); - m.set_content_length(0); - m.set_mode(ObjectMode::DIR); - m.set_complete(); - - info!("object {} stat finished", &p); - Ok(m) - } - Err(e) => { - error!("object {} head_object: {:?}", &p, e); - Err(e) - } - } - - } - async fn delete(&self, args: &OpDelete) -> Result<()> { - increment_counter!("opendal_s3_delete_requests"); - - let p = self.get_abs_path(&args.path); - info!("object {} delete start", &p); - - let blob_client = - self.client - .as_container_client(&self.bucket) - .as_blob_client(&p); - - let res = blob_client - .delete() - .delete_snapshots_method(DeleteSnapshotsMethod::Include) - .execute() - .await.map_err(|e| parse_unexpect_error(e, "delete", &p))?; - - info!("object {} delete finished", &p); - Ok(()) - } - async fn list(&self, args: &OpList) -> Result { - increment_counter!("opendal_s3_list_requests"); - - let mut path = self.get_abs_path(&args.path); - // Make sure list path is endswith '/' - if !path.ends_with('/') && !path.is_empty() { - path.push('/') - } - info!("object {} list start", &path); - //prefix would look like that .prefix("root/firstfolder/") - //if path have / as prefix, then remove it - if path.starts_with("/") {path =path[1..].to_string();} - let max_results =NonZeroU32::new(5u32).unwrap(); - let container_client = self.client.as_container_client(&self.bucket); - let mut stream = Box::pin( - container_client - .list_blobs() - .prefix(path) - .max_results(max_results) - .stream()); - todo!() - } -} +// let response_result = blob_client.get().execute() +// .await.map_err(|e| parse_head_object_error(e, "stat", &p)); + +// match response_result { +// Ok(response) => { +// let mut m = Metadata::default(); +// m.set_path(&args.path); +// m.set_content_length(response.blob.properties.content_length); + +// if p.ends_with('/') { +// m.set_mode(ObjectMode::DIR); +// } else { +// m.set_mode(ObjectMode::FILE); +// }; + +// m.set_complete(); + +// info!("object {} stat finished", &p); +// Ok(m) +// } +// // Always returns empty dir object if path is endswith "/" and we got an +// // ObjectNotExist error. +// Err(e) if (e.kind() == Kind::ObjectNotExist && p.ends_with('/')) => { +// let mut m = Metadata::default(); +// m.set_path(&args.path); +// m.set_content_length(0); +// m.set_mode(ObjectMode::DIR); +// m.set_complete(); + +// info!("object {} stat finished", &p); +// Ok(m) +// } +// Err(e) => { +// error!("object {} head_object: {:?}", &p, e); +// Err(e) +// } +// } + +// } +// async fn delete(&self, args: &OpDelete) -> Result<()> { +// increment_counter!("opendal_s3_delete_requests"); + +// let p = self.get_abs_path(&args.path); +// info!("object {} delete start", &p); + +// let blob_client = +// self.client +// .as_container_client(&self.bucket) +// .as_blob_client(&p); + +// let res = blob_client +// .delete() +// .delete_snapshots_method(DeleteSnapshotsMethod::Include) +// .execute() +// .await.map_err(|e| parse_unexpect_error(e, "delete", &p))?; + +// info!("object {} delete finished", &p); +// Ok(()) +// } +// async fn list(&self, args: &OpList) -> Result { +// increment_counter!("opendal_s3_list_requests"); + +// let mut path = self.get_abs_path(&args.path); +// // Make sure list path is endswith '/' +// if !path.ends_with('/') && !path.is_empty() { +// path.push('/') +// } +// info!("object {} list start", &path); +// //prefix would look like that .prefix("root/firstfolder/") +// //if path have / as prefix, then remove it +// if path.starts_with("/") {path =path[1..].to_string();} +// let max_results =NonZeroU32::new(5u32).unwrap(); +// let container_client = self.client.as_container_client(&self.bucket); +// let mut stream = Box::pin( +// container_client +// .list_blobs() +// .prefix(path) +// .max_results(max_results) +// .stream()); +// todo!() +// } +// } diff --git a/src/services/azblob/error.rs b/src/services/azblob/error.rs index 8be5e87db57..7a00315f45e 100644 --- a/src/services/azblob/error.rs +++ b/src/services/azblob/error.rs @@ -1,95 +1,95 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// // Copyright 2022 Datafuse Labs. +// // +// // Licensed under the Apache License, Version 2.0 (the "License"); +// // you may not use this file except in compliance with the License. +// // You may obtain a copy of the License at +// // +// // http://www.apache.org/licenses/LICENSE-2.0 +// // +// // Unless required by applicable law or agreed to in writing, software +// // distributed under the License is distributed on an "AS IS" BASIS, +// // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// // See the License for the specific language governing permissions and +// // limitations under the License. -use http::StatusCode; -use azure_core::HttpError; -use crate::error::Kind; -use crate::error::Error; -use std::error::Error as StdError; +// use http::StatusCode; +// use azure_core::HttpError; +// use crate::error::Kind; +// use crate::error::Error; +// use std::error::Error as StdError; -pub fn parse_get_object_error( - err: Box, - op: &'static str, - path: &str, -) -> Error { - if let Some(err) = err.downcast_ref::() { - if matches!( - err, - HttpError::StatusCode { - status: StatusCode::NOT_FOUND, - .. - } - ) { - return Error::Object { - kind: Kind::ObjectNotExist, - op, - path: path.to_string(), - source: anyhow::Error::from(err), - } - } +// pub fn parse_get_object_error( +// err: Box, +// op: &'static str, +// path: &str, +// ) -> Error { +// if let Some(err) = err.downcast_ref::() { +// if matches!( +// err, +// HttpError::StatusCode { +// status: StatusCode::NOT_FOUND, +// .. +// } +// ) { +// return Error::Object { +// kind: Kind::ObjectNotExist, +// op, +// path: path.to_string(), +// source: anyhow::Error::from(err), +// } +// } - } - return Error::Object { - kind: Kind::Unexpected, - op, - path: path.to_string(), - source: todo!(), - } +// } +// return Error::Object { +// kind: Kind::Unexpected, +// op, +// path: path.to_string(), +// source: todo!(), +// } -} +// } -pub fn parse_head_object_error( - err: Box, - op: &'static str, - path: &str, -) -> Error { - if let Some(err) = err.downcast_ref::() { - if matches!( - err, - HttpError::StatusCode { - status: StatusCode::NOT_FOUND, - .. - } - ) { - return Error::Object { - kind: Kind::ObjectNotExist, - op, - path: path.to_string(), - source: anyhow::Error::from(err), - } - } - } - Error::Object { - kind: Kind::Unexpected, - op, - path: path.to_string(), - source: todo!(), - } -} +// pub fn parse_head_object_error( +// err: Box, +// op: &'static str, +// path: &str, +// ) -> Error { +// if let Some(err) = err.downcast_ref::() { +// if matches!( +// err, +// HttpError::StatusCode { +// status: StatusCode::NOT_FOUND, +// .. +// } +// ) { +// return Error::Object { +// kind: Kind::ObjectNotExist, +// op, +// path: path.to_string(), +// source: anyhow::Error::from(err), +// } +// } +// } +// Error::Object { +// kind: Kind::Unexpected, +// op, +// path: path.to_string(), +// source: todo!(), +// } +// } -// parse_unexpect_error is used to parse SdkError into unexpected. -pub fn parse_unexpect_error( - err: Box, - op: &'static str, - path: &str, -) -> Error { - Error::Object { - kind: Kind::Unexpected, - op, - path: path.to_string(), - source: todo!(), - } -} +// // parse_unexpect_error is used to parse SdkError into unexpected. +// pub fn parse_unexpect_error( +// err: Box, +// op: &'static str, +// path: &str, +// ) -> Error { +// Error::Object { +// kind: Kind::Unexpected, +// op, +// path: path.to_string(), +// source: todo!(), +// } +// } diff --git a/src/services/azblob/mod.rs b/src/services/azblob/mod.rs index 427475d0db5..af36b62ad5a 100644 --- a/src/services/azblob/mod.rs +++ b/src/services/azblob/mod.rs @@ -1,6 +1,6 @@ mod backend; -pub use backend::Backend; -pub use backend::Builder; +// pub use backend::Backend; +// pub use backend::Builder; mod error; mod object_stream; From f33fdc009dc28d1e2adc109e3001f0fc5654f4e9 Mon Sep 17 00:00:00 2001 From: PhilipsPot Date: Fri, 18 Mar 2022 11:58:15 +0800 Subject: [PATCH 05/16] fix bug sign can't parse url --- Cargo.toml | 2 +- src/services/azblob/backend.rs | 652 +++++++++++++++++---------------- src/services/azblob/error.rs | 95 ----- src/services/azblob/mod.rs | 10 +- 4 files changed, 335 insertions(+), 424 deletions(-) delete mode 100644 src/services/azblob/error.rs diff --git a/Cargo.toml b/Cargo.toml index 5d9025867b2..8ee51805109 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,7 +40,7 @@ log = "0.4" metrics = "0.18" once_cell = "1" pin-project = "1" -reqsign = { git = "https://github.com/Xuanwo/reqsign", rev = "44d7bb37dfa543487d4d5286d64abd3f2fe12bf7" } +reqsign = { git = "https://github.com/D2Lark/reqsign", branch = "Implement-azblob-support" } reqwest = { version = "0.11", features = ["stream"] } roxmltree = "0.14" thiserror = "1" diff --git a/src/services/azblob/backend.rs b/src/services/azblob/backend.rs index c61ad0c3fce..9949b72c00d 100644 --- a/src/services/azblob/backend.rs +++ b/src/services/azblob/backend.rs @@ -1,325 +1,331 @@ -// use std::collections::HashMap; - - - -// use std::num::NonZeroU32; - -// use anyhow::anyhow; -// use async_trait::async_trait; - -// use metrics::increment_counter; - -// use super::error::parse_get_object_error; -// use super::error::parse_head_object_error; -// use super::error::parse_unexpect_error; - -// use crate::credential::Credential; -// use crate::error::Error; -// use crate::error::Kind; -// use crate::error::Result; -// use crate::object::BoxedObjectStream; -// use crate::object::Metadata; -// use crate::ops::OpDelete; -// use crate::ops::OpList; -// use crate::ops::OpRead; -// use crate::ops::OpStat; -// use crate::ops::OpWrite; -// use log::debug; -// use log::error; -// use log::info; -// use log::warn; -// use crate::Accessor; -// use crate::BoxedAsyncReader; -// use crate::ObjectMode; -// use std::sync::Arc; - -// use azure_core::prelude::*; -// use azure_storage::core::prelude::*; -// use azure_storage_blobs::prelude::*; - -// #[derive(Default, Debug, Clone)] -// pub struct Builder { -// root: Option, -// bucket: String, // in Azure, bucket = container -// credential: Option, -// } - -// impl Builder { -// pub fn root(&mut self, root: &str) -> &mut Self { -// self.root = if root.is_empty() { -// None -// } else { -// Some(root.to_string()) -// }; - -// self -// } -// pub fn bucket(&mut self, bucket: &str) -> &mut Self { -// self.bucket = bucket.to_string(); - -// self -// } - -// pub fn credential(&mut self, credential: Credential) -> &mut Self { -// self.credential = Some(credential); - -// self -// } -// pub async fn finish(&mut self) -> Result> { -// info!("backend build started: {:?}", &self); -// let root = self.root.unwrap(); -// info!("backend use root {}", root); -// let bucket = match self.bucket.is_empty() { -// false => Ok(&self.bucket), -// true => Err(Error::Backend { -// kind: Kind::BackendConfigurationInvalid, -// context: HashMap::from([("bucket".to_string(), "".to_string())]), -// source: anyhow!("bucket is empty"), -// }), -// }?; -// debug!("backend use bucket {}", &bucket); -// let mut context: HashMap = -// HashMap::from([("bucket".to_string(), bucket.to_string())]); -// let mut azure_storage_account = String::new(); -// let mut azure_storage_key = String::new(); -// if let Some(cred) = &self.credential { -// context.insert("credential".to_string(), "*".to_string()); -// match cred { -// Credential::HMAC { -// access_key_id, -// secret_access_key, -// } => { -// azure_storage_account = access_key_id.to_string(); -// azure_storage_key = secret_access_key.to_string(); -// } -// // We don't need to do anything if user tries to read credential from env. -// Credential::Plain => { -// warn!("backend got empty credential, fallback to read from env.") -// } -// _ => { -// return Err(Error::Backend { -// kind: Kind::BackendConfigurationInvalid, -// context: context.clone(), -// source: anyhow!("credential is invalid"), -// }); -// } -// } -// } -// let http_client = azure_core::new_http_client(); -// let storage_client = StorageAccountClient::new_access_key( -// http_client.clone(), -// azure_storage_account, -// azure_storage_key, -// ).as_storage_client(); -// info!("backend build finished: {:?}", &self); -// Ok(Arc::new(Backend { -// root:root.unwrap().clone(), -// bucket: self.bucket.clone(), -// client: storage_client, -// })) -// } -// } -// #[derive(Debug, Clone)] -// pub struct Backend { -// bucket: String, -// client: Arc, -// root: String, -// } - -// impl Backend { -// pub fn build() -> Builder { -// Builder::default() -// } - -// pub(crate) fn inner(&self) -> Arc { -// self.client.clone() -// } -// pub(crate) fn normalize_path(path: &str) -> String { -// let has_trailing = path.ends_with('/'); - -// let mut p = path -// .split('/') -// .filter(|v| !v.is_empty()) -// .collect::>() -// .join("/"); - -// if has_trailing && !p.eq("/") { -// p.push('/') -// } - -// p -// } -// pub(crate) fn get_abs_path(&self, path: &str) -> String { -// let path = Backend::normalize_path(path); -// // root must be normalized like `/abc/` -// format!("{}{}", self.root, path) -// .trim_start_matches('/') -// .to_string() -// } -// pub(crate) fn get_rel_path(&self, path: &str) -> String { -// let path = format!("/{}", path); - -// match path.strip_prefix(&self.root) { -// Some(v) => v.to_string(), -// None => unreachable!( -// "invalid path {} that not start with backend root {}", -// &path, &self.root -// ), -// } -// } -// } -// #[async_trait] -// impl Accessor for Backend { -// async fn read(&self, args: &OpRead) -> Result { -// increment_counter!("opendal_s3_read_requests"); - -// let p = self.get_abs_path(&args.path); -// info!( -// "object {} read start: offset {:?}, size {:?}", -// &p, args.offset, args.size -// ); - -// let mut blob_client = self -// .client -// .as_container_client(&self.bucket) -// .as_blob_client(&p); - -// let resp =if args.offset.is_some() || args.size.is_some() { -// blob_client -// .get() -// .range(Range::new(0, 1024)) -// .execute() -// .await.map_err(|e| { -// let e = parse_get_object_error(e, "read", &p); -// error!("object {} get_object: {:?}", &p, e); -// e -// })? -// } else{ -// blob_client -// .get() -// .execute() -// .await.map_err(|e| { -// let e = parse_get_object_error(e, "read", &p); -// error!("object {} get_object: {:?}", &p, e); -// e -// })? -// }; - -// info!( -// "object {} reader created: offset {:?}, size {:?}", -// &p, args.offset, args.size -// ); -// // Ok(Box::new(S3ByteStream(resp).into_async_read())) -// todo!() -// } -// async fn write(&self, r: BoxedAsyncReader, args: &OpWrite) -> Result { -// increment_counter!("opendal_s3_stat_requests"); -// let p = self.get_abs_path(&args.path); -// info!("object {} write start: size {}", &p, args.size); -// let blob_client =self.client -// .as_container_client(&self.bucket) -// .as_blob_client(&p); -// blob_client -// .put_block_blob("tmp_data") -// .content_type("text/plain") -// .execute() -// .await.map_err(|e| { -// let e = parse_unexpect_error(e, "write", &p); -// error!("object {} put_object: {:?}", &p, e); -// e -// })?; -// Ok(args.size as usize) -// } -// async fn stat(&self, args: &OpStat) -> Result { -// increment_counter!("opendal_s3_stat_requests"); - -// let p = self.get_abs_path(&args.path); -// info!("object {} stat start", &p); - -// let blob_client =self.client -// .as_container_client(&self.bucket) -// .as_blob_client(&p); +use std::collections::HashMap; + +use crate::ops::HeaderRange; +use reqsign::services::azure::azblob::Signer; +use std::num::NonZeroU32; +use reqwest::{Body, Response, Url}; +use anyhow::anyhow; +use async_trait::async_trait; +use std::str::FromStr; +use metrics::increment_counter; +use futures::TryStreamExt; + +use crate::credential::Credential; +use crate::error::Error; +use crate::error::Kind; +use crate::error::Result; +use crate::object::BoxedObjectStream; +use crate::object::Metadata; +use crate::ops::OpDelete; +use crate::ops::OpList; +use crate::ops::OpRead; +use crate::ops::OpStat; +use crate::ops::OpWrite; +use log::debug; +use log::error; +use log::info; +use log::warn; +use crate::Accessor; +use crate::BoxedAsyncReader; +use crate::ObjectMode; +use std::sync::Arc; + +use azure_core::prelude::*; +use azure_storage::core::prelude::*; +use azure_storage_blobs::prelude::*; + +#[derive(Default, Debug, Clone)] +pub struct Builder { + root: Option, + bucket: String, // in Azure, bucket = container + credential: Option, + endpoint: Option, + +} + +impl Builder { + pub fn root(&mut self, root: &str) -> &mut Self { + self.root = if root.is_empty() { + None + } else { + Some(root.to_string()) + }; + + self + } + pub fn bucket(&mut self, bucket: &str) -> &mut Self { + self.bucket = bucket.to_string(); + + self + } + pub fn endpoint(&mut self,endpoint:&str) -> &mut Self{ + self.endpoint = Some(endpoint.to_string()); + + self + } + pub fn credential(&mut self, credential: Credential) -> &mut Self { + self.credential = Some(credential); + + self + } + pub async fn finish(&mut self) -> Result> { -// let response_result = blob_client.get().execute() -// .await.map_err(|e| parse_head_object_error(e, "stat", &p)); - -// match response_result { -// Ok(response) => { -// let mut m = Metadata::default(); -// m.set_path(&args.path); -// m.set_content_length(response.blob.properties.content_length); - -// if p.ends_with('/') { -// m.set_mode(ObjectMode::DIR); -// } else { -// m.set_mode(ObjectMode::FILE); -// }; - -// m.set_complete(); - -// info!("object {} stat finished", &p); -// Ok(m) -// } -// // Always returns empty dir object if path is endswith "/" and we got an -// // ObjectNotExist error. -// Err(e) if (e.kind() == Kind::ObjectNotExist && p.ends_with('/')) => { -// let mut m = Metadata::default(); -// m.set_path(&args.path); -// m.set_content_length(0); -// m.set_mode(ObjectMode::DIR); -// m.set_complete(); - -// info!("object {} stat finished", &p); -// Ok(m) -// } -// Err(e) => { -// error!("object {} head_object: {:?}", &p, e); -// Err(e) -// } -// } - -// } -// async fn delete(&self, args: &OpDelete) -> Result<()> { -// increment_counter!("opendal_s3_delete_requests"); - -// let p = self.get_abs_path(&args.path); -// info!("object {} delete start", &p); - -// let blob_client = -// self.client -// .as_container_client(&self.bucket) -// .as_blob_client(&p); - -// let res = blob_client -// .delete() -// .delete_snapshots_method(DeleteSnapshotsMethod::Include) -// .execute() -// .await.map_err(|e| parse_unexpect_error(e, "delete", &p))?; - -// info!("object {} delete finished", &p); -// Ok(()) -// } -// async fn list(&self, args: &OpList) -> Result { -// increment_counter!("opendal_s3_list_requests"); - -// let mut path = self.get_abs_path(&args.path); -// // Make sure list path is endswith '/' -// if !path.ends_with('/') && !path.is_empty() { -// path.push('/') -// } -// info!("object {} list start", &path); -// //prefix would look like that .prefix("root/firstfolder/") -// //if path have / as prefix, then remove it -// if path.starts_with("/") {path =path[1..].to_string();} -// let max_results =NonZeroU32::new(5u32).unwrap(); -// let container_client = self.client.as_container_client(&self.bucket); -// let mut stream = Box::pin( -// container_client -// .list_blobs() -// .prefix(path) -// .max_results(max_results) -// .stream()); -// todo!() -// } -// } + info!("backend build started: {:?}", &self); + + let root = match &self.root { + // Use "/" as root if user not specified. + None => "/".to_string(), + Some(v) => { + let mut v = Backend::normalize_path(v); + if !v.starts_with('/') { + v.insert(0, '/'); + } + if !v.ends_with('/') { + v.push('/') + } + v + } + }; + + info!("backend use root {}", root); + + // Handle endpoint, region and bucket name. + let bucket = match self.bucket.is_empty() { + false => Ok(&self.bucket), + true => Err(Error::Backend { + kind: Kind::BackendConfigurationInvalid, + context: HashMap::from([("bucket".to_string(), "".to_string())]), + source: anyhow!("bucket is empty"), + }), + }?; + debug!("backend use bucket {}", &bucket); + + let endpoint = match &self.endpoint { + Some(endpoint) => endpoint.clone(), + None => "blob.core.windows.net".to_string(), + }; + + debug!("backend use endpoint {} to detect region", &endpoint); + + + let mut context: HashMap = HashMap::from([ + ("endpoint".to_string(), endpoint.to_string()), + ("bucket".to_string(), bucket.to_string()), + ]); + + + + let mut azure_storage_account = String::new(); + let mut azure_storage_key = String::new(); + if let Some(cred) = &self.credential { + context.insert("credential".to_string(), "*".to_string()); + match cred { + Credential::HMAC { + access_key_id, + secret_access_key, + } => { + azure_storage_account = access_key_id.to_string(); + azure_storage_key = secret_access_key.to_string(); + } + // We don't need to do anything if user tries to read credential from env. + Credential::Plain => { + warn!("backend got empty credential, fallback to read from env.") + } + _ => { + return Err(Error::Backend { + kind: Kind::BackendConfigurationInvalid, + context: context.clone(), + source: anyhow!("credential is invalid"), + }); + } + } + } + let client = reqwest::Client::new(); + + let mut signer_builder = Signer::builder(); + signer_builder + .access_acount(&azure_storage_account) + .access_key(&azure_storage_key); + + let signer = signer_builder.build().await?; + + // let http_client = azure_core::new_http_client(); + // let storage_client = StorageAccountClient::new_access_key( + // http_client.clone(), + // azure_storage_account, + // azure_storage_key, + // ).as_storage_client(); + info!("backend build finished: {:?}", &self); + Ok(Arc::new(Backend { + root:root, + endpoint, + signer: Arc::new(signer), + bucket: self.bucket.clone(), + client, + azure_storage_account + })) + } +} + + +#[derive(Debug, Clone)] +pub struct Backend { + bucket: String, + client: reqwest::Client, + root: String, // root will be "/" or /abc/ + endpoint:String, + signer:Arc, + azure_storage_account:String, +} + +impl Backend { + pub fn build() -> Builder { + Builder::default() + } + + pub(crate) fn normalize_path(path: &str) -> String { + let has_trailing = path.ends_with('/'); + + let mut p = path + .split('/') + .filter(|v| !v.is_empty()) + .collect::>() + .join("/"); + + if has_trailing && !p.eq("/") { + p.push('/') + } + + p + } + pub(crate) fn get_abs_path(&self, path: &str) -> String { + let path = Backend::normalize_path(path); + // root must be normalized like `/abc/` + format!("{}{}", self.root, path) + .trim_start_matches('/') + .to_string() + } + pub(crate) fn get_rel_path(&self, path: &str) -> String { + let path = format!("/{}", path); + + match path.strip_prefix(&self.root) { + Some(v) => v.to_string(), + None => unreachable!( + "invalid path {} that not start with backend root {}", + &path, &self.root + ), + } + } +} +#[async_trait] +impl Accessor for Backend { + async fn read(&self, args: &OpRead) -> Result { + increment_counter!("opendal_azure_read_requests"); + + let p = self.get_abs_path(&args.path); + info!( + "object {} read start: offset {:?}, size {:?}", + &p, args.offset, args.size + ); + + let resp = self.get_object(&p, args.offset, args.size).await?; + + info!( + "object {} reader created: offset {:?}, size {:?}", + &p, args.offset, args.size + ); + Ok(Box::new( + resp.bytes_stream() + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)) + .into_async_read(), + )) + + } + async fn write(&self, r: BoxedAsyncReader, args: &OpWrite) -> Result { + todo!() + } + async fn stat(&self, args: &OpStat) -> Result { + todo!() + } + async fn delete(&self, args: &OpDelete) -> Result<()> { + todo!() + } + async fn list(&self, args: &OpList) -> Result { + todo!() + } +} + +impl Backend { + pub(crate) async fn get_object( + &self, + path: &str, + offset: Option, + size: Option, + ) -> Result { + let mut req = reqwest::Request::new( + http::Method::GET, + Url::from_str(&format!("https://{}.{}/{}/{}", + self.azure_storage_account, + self.endpoint, + self.bucket, + path)) + .expect("url must be valid"), + ); + + if offset.is_some() || size.is_some() { + req.headers_mut().insert( + http::header::RANGE, + HeaderRange::new(offset, size) + .to_string() + .parse() + .expect("header must be valid"), + ); + } + + self.signer.sign(&mut req).await.expect("sign must success"); + + self.client.execute(req).await.map_err(|e| { + error!("object {} get_object: {:?}", path, e); + Error::Unexpected(anyhow::Error::from(e)) + }) + } +} +#[cfg(test)] +mod tests { + use std::env; + use super::*; + use anyhow::Result; + use crate::operator::Operator; + use futures::AsyncReadExt; + #[tokio::test] + async fn azblob_new() -> Result<()> { + dotenv::from_filename(".env").ok(); + + if env::var("OPENDAL_AZBLOB_TEST").is_err() || env::var("OPENDAL_AZBLOB_TEST").unwrap() != "on" { + return Ok(()); + } + + let root = + &env::var("OPENDAL_AZBLOB_ROOT").unwrap_or_else(|_| format!("/{}", uuid::Uuid::new_v4())); + + let mut builder = Backend::build(); + builder.root(root); + builder.bucket(&env::var("OPENDAL_AZBLOB_BUCKET").expect("OPENDAL_AZBLOB_BUCKET must set")); + builder.endpoint(&env::var("OPENDAL_AZBLOB_ENDPOINT").unwrap_or_default()); + builder.credential(Credential::hmac( + &env::var("OPENDAL_AZBLOB_ACCESS_KEY_ID").unwrap_or_default(), + &env::var("OPENDAL_AZBLOB_SECRET_ACCESS_KEY").unwrap_or_default(), + )); + let acc = builder.finish().await?; + + println!("{acc:?}"); + let path = "1.txt"; + let operator = Operator::new(acc); + let mut buf = Vec::new(); + let mut r = operator.object(&path).reader(); + let n = r.read_to_end(&mut buf).await.expect("read to end"); + println!("{buf:?}"); + Ok(()) + } +} diff --git a/src/services/azblob/error.rs b/src/services/azblob/error.rs deleted file mode 100644 index 7a00315f45e..00000000000 --- a/src/services/azblob/error.rs +++ /dev/null @@ -1,95 +0,0 @@ -// // Copyright 2022 Datafuse Labs. -// // -// // Licensed under the Apache License, Version 2.0 (the "License"); -// // you may not use this file except in compliance with the License. -// // You may obtain a copy of the License at -// // -// // http://www.apache.org/licenses/LICENSE-2.0 -// // -// // Unless required by applicable law or agreed to in writing, software -// // distributed under the License is distributed on an "AS IS" BASIS, -// // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// // See the License for the specific language governing permissions and -// // limitations under the License. - - -// use http::StatusCode; -// use azure_core::HttpError; -// use crate::error::Kind; -// use crate::error::Error; -// use std::error::Error as StdError; - - -// pub fn parse_get_object_error( -// err: Box, -// op: &'static str, -// path: &str, -// ) -> Error { -// if let Some(err) = err.downcast_ref::() { -// if matches!( -// err, -// HttpError::StatusCode { -// status: StatusCode::NOT_FOUND, -// .. -// } -// ) { -// return Error::Object { -// kind: Kind::ObjectNotExist, -// op, -// path: path.to_string(), -// source: anyhow::Error::from(err), -// } -// } - -// } -// return Error::Object { -// kind: Kind::Unexpected, -// op, -// path: path.to_string(), -// source: todo!(), -// } - -// } - -// pub fn parse_head_object_error( -// err: Box, -// op: &'static str, -// path: &str, -// ) -> Error { -// if let Some(err) = err.downcast_ref::() { -// if matches!( -// err, -// HttpError::StatusCode { -// status: StatusCode::NOT_FOUND, -// .. -// } -// ) { -// return Error::Object { -// kind: Kind::ObjectNotExist, -// op, -// path: path.to_string(), -// source: anyhow::Error::from(err), -// } -// } -// } -// Error::Object { -// kind: Kind::Unexpected, -// op, -// path: path.to_string(), -// source: todo!(), -// } -// } - -// // parse_unexpect_error is used to parse SdkError into unexpected. -// pub fn parse_unexpect_error( -// err: Box, -// op: &'static str, -// path: &str, -// ) -> Error { -// Error::Object { -// kind: Kind::Unexpected, -// op, -// path: path.to_string(), -// source: todo!(), -// } -// } diff --git a/src/services/azblob/mod.rs b/src/services/azblob/mod.rs index af36b62ad5a..a026e27409c 100644 --- a/src/services/azblob/mod.rs +++ b/src/services/azblob/mod.rs @@ -1,7 +1,7 @@ -mod backend; -// pub use backend::Backend; -// pub use backend::Builder; +pub mod backend; +pub use backend::Backend; +pub use backend::Builder; + -mod error; mod object_stream; -mod middleware; \ No newline at end of file +mod middleware; From 80801ed212872aa594a139a7d4e513101cc10f45 Mon Sep 17 00:00:00 2001 From: PhilipsPot Date: Sun, 20 Mar 2022 09:06:26 +0800 Subject: [PATCH 06/16] remove azure sdk package in cargo.toml --- Cargo.toml | 8 +- opendal_test/src/services/azblob.rs | 28 --- opendal_test/src/services/mod.rs | 1 - src/services/azblob/backend.rs | 232 +++++++++++++----- src/services/azblob/middleware/credentials.rs | 0 src/services/azblob/middleware/mod.rs | 0 src/services/azblob/mod.rs | 3 - src/services/azblob/object_stream.rs | 0 src/services/mod.rs | 3 +- 9 files changed, 176 insertions(+), 99 deletions(-) delete mode 100644 opendal_test/src/services/azblob.rs delete mode 100644 src/services/azblob/middleware/credentials.rs delete mode 100644 src/services/azblob/middleware/mod.rs delete mode 100644 src/services/azblob/object_stream.rs diff --git a/Cargo.toml b/Cargo.toml index 8ee51805109..581fbbd1536 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,11 +24,6 @@ anyhow = "1" async-compat = "0.2" async-trait = "0.1" -azure_core = {version="0.1",default-features=false,git = "https://github.com/Azure/azure-sdk-for-rust"} -azure_storage = { version = "0.1.0", default-features=false, features=["account"], git = "https://github.com/Azure/azure-sdk-for-rust" } -azure_storage_blobs = { version = "0.1.0", git = "https://github.com/Azure/azure-sdk-for-rust" } -azure_identity = {git = "https://github.com/Azure/azure-sdk-for-rust", version = "0.1" } - oauth2 = { version = "4.0.0", default-features = false } blocking = "1" bstr = "0.2" @@ -40,12 +35,13 @@ log = "0.4" metrics = "0.18" once_cell = "1" pin-project = "1" -reqsign = { git = "https://github.com/D2Lark/reqsign", branch = "Implement-azblob-support" } +reqsign = { git = "https://github.com/D2Lark/reqsign", branch = "main" } reqwest = { version = "0.11", features = ["stream"] } roxmltree = "0.14" thiserror = "1" tokio = { version = "1.17", features = ["full"] } tower = "0.4" +md5 = "0.7" [dev-dependencies] anyhow = "1.0" diff --git a/opendal_test/src/services/azblob.rs b/opendal_test/src/services/azblob.rs deleted file mode 100644 index f3da0542f02..00000000000 --- a/opendal_test/src/services/azblob.rs +++ /dev/null @@ -1,28 +0,0 @@ -// use std::env; -// use std::sync::Arc; - -// use opendal::error::Result; -// use opendal::services::azblob; -// use opendal::Accessor; - -// pub async fn new() -> Result>> { -// dotenv::from_filename(".env").ok(); - -// if env::var("OPENDAL_AZBLOB_TEST").is_err() || env::var("OPENDAL_AZBLOB_TEST").unwrap() != "on" { -// return Ok(None); -// } - -// let root = -// &env::var("OPENDAL_S3_ROOT").unwrap_or_else(|_| format!("/{}", uuid::Uuid::new_v4())); - -// let mut builder = azblob::Backend::build(); -// builder.root(root); -// builder.bucket(&env::var("OPENDAL_AZBLOB_BUCKET").expect("OPENDAL_AZBLOB_BUCKET must set")); - -// builder.credential(Credential::hmac( -// &env::var("OPENDAL_AZBLOB_ACCESS_KEY_ID").unwrap_or_default(), -// &env::var("OPENDAL_AZBLOB_SECRET_ACCESS_KEY").unwrap_or_default(), -// )); - -// Ok(Some(builder.finish().await?)) -// } \ No newline at end of file diff --git a/opendal_test/src/services/mod.rs b/opendal_test/src/services/mod.rs index 1e25a3f79d6..4f31096621f 100644 --- a/opendal_test/src/services/mod.rs +++ b/opendal_test/src/services/mod.rs @@ -14,4 +14,3 @@ pub mod fs; pub mod memory; pub mod s3; -pub mod azblob; \ No newline at end of file diff --git a/src/services/azblob/backend.rs b/src/services/azblob/backend.rs index 9949b72c00d..6e0b619d4f4 100644 --- a/src/services/azblob/backend.rs +++ b/src/services/azblob/backend.rs @@ -1,15 +1,15 @@ +use futures::TryStreamExt; +use http::HeaderValue; use std::collections::HashMap; use crate::ops::HeaderRange; -use reqsign::services::azure::azblob::Signer; -use std::num::NonZeroU32; -use reqwest::{Body, Response, Url}; use anyhow::anyhow; use async_trait::async_trait; -use std::str::FromStr; use metrics::increment_counter; -use futures::TryStreamExt; +use reqsign::services::azure::azblob::Signer; +use reqwest::{Body, Response, Url}; +use std::str::FromStr; use crate::credential::Credential; use crate::error::Error; @@ -22,26 +22,22 @@ use crate::ops::OpList; use crate::ops::OpRead; use crate::ops::OpStat; use crate::ops::OpWrite; +use crate::Accessor; +use crate::BoxedAsyncReader; use log::debug; use log::error; use log::info; use log::warn; -use crate::Accessor; -use crate::BoxedAsyncReader; -use crate::ObjectMode; +pub const DELETE_SNAPSHOTS: &str = "x-ms-delete-snapshots"; +use crate::readers::ReaderStream; use std::sync::Arc; -use azure_core::prelude::*; -use azure_storage::core::prelude::*; -use azure_storage_blobs::prelude::*; - #[derive(Default, Debug, Clone)] pub struct Builder { root: Option, bucket: String, // in Azure, bucket = container credential: Option, endpoint: Option, - } impl Builder { @@ -59,7 +55,7 @@ impl Builder { self } - pub fn endpoint(&mut self,endpoint:&str) -> &mut Self{ + pub fn endpoint(&mut self, endpoint: &str) -> &mut Self { self.endpoint = Some(endpoint.to_string()); self @@ -70,9 +66,8 @@ impl Builder { self } pub async fn finish(&mut self) -> Result> { - info!("backend build started: {:?}", &self); - + let root = match &self.root { // Use "/" as root if user not specified. None => "/".to_string(), @@ -108,14 +103,11 @@ impl Builder { debug!("backend use endpoint {} to detect region", &endpoint); - let mut context: HashMap = HashMap::from([ ("endpoint".to_string(), endpoint.to_string()), ("bucket".to_string(), bucket.to_string()), ]); - - let mut azure_storage_account = String::new(); let mut azure_storage_key = String::new(); if let Some(cred) = &self.credential { @@ -145,38 +137,31 @@ impl Builder { let mut signer_builder = Signer::builder(); signer_builder - .access_acount(&azure_storage_account) - .access_key(&azure_storage_key); + .access_acount(&azure_storage_account) + .access_key(&azure_storage_key); let signer = signer_builder.build().await?; - - // let http_client = azure_core::new_http_client(); - // let storage_client = StorageAccountClient::new_access_key( - // http_client.clone(), - // azure_storage_account, - // azure_storage_key, - // ).as_storage_client(); + info!("backend build finished: {:?}", &self); Ok(Arc::new(Backend { - root:root, + root: root, endpoint, signer: Arc::new(signer), bucket: self.bucket.clone(), client, - azure_storage_account + azure_storage_account, })) } } - #[derive(Debug, Clone)] pub struct Backend { bucket: String, client: reqwest::Client, root: String, // root will be "/" or /abc/ - endpoint:String, - signer:Arc, - azure_storage_account:String, + endpoint: String, + signer: Arc, + azure_storage_account: String, } impl Backend { @@ -206,6 +191,7 @@ impl Backend { .trim_start_matches('/') .to_string() } + #[warn(dead_code)] pub(crate) fn get_rel_path(&self, path: &str) -> String { let path = format!("/{}", path); @@ -240,19 +226,44 @@ impl Accessor for Backend { .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)) .into_async_read(), )) - } async fn write(&self, r: BoxedAsyncReader, args: &OpWrite) -> Result { - todo!() + let p = self.get_abs_path(&args.path); + info!("object {} write start: size {}", &p, args.size); + + let resp = self.put_object(&p, r, args.size).await?; + println!("resp :{resp:?}"); + match resp.status() { + http::StatusCode::CREATED | http::StatusCode::OK => { + info!("object {} write finished: size {:?}", &p, args.size); + Ok(args.size as usize) + } + _ => Err(Error::Object { + kind: Kind::Unexpected, + op: "write", + path: p.to_string(), + source: anyhow!("{:?}", resp), + }), + } } async fn stat(&self, args: &OpStat) -> Result { - todo!() + let _ = args; + unimplemented!() } async fn delete(&self, args: &OpDelete) -> Result<()> { - todo!() + increment_counter!("opendal_azure_delete_requests"); + + let p = self.get_abs_path(&args.path); + info!("object {} delete start", &p); + + let _ = self.delete_object(&p).await?; + + info!("object {} delete finished", &p); + Ok(()) } async fn list(&self, args: &OpList) -> Result { - todo!() + let _ = args; + unimplemented!() } } @@ -265,11 +276,10 @@ impl Backend { ) -> Result { let mut req = reqwest::Request::new( http::Method::GET, - Url::from_str(&format!("https://{}.{}/{}/{}", - self.azure_storage_account, - self.endpoint, - self.bucket, - path)) + Url::from_str(&format!( + "https://{}.{}/{}/{}", + self.azure_storage_account, self.endpoint, self.bucket, path + )) .expect("url must be valid"), ); @@ -283,33 +293,112 @@ impl Backend { ); } + self.signer.sign(&mut req).await.expect("sign must success"); + println!("req: {req:?}"); + let resp = self.client.execute(req).await.map_err(|e| { + error!("object {} get_object: {:?}", path, e); + Error::Unexpected(anyhow::Error::from(e)) + }); + println!("resp: {resp:?}"); + resp + } + pub(crate) async fn put_object( + &self, + path: &str, + r: BoxedAsyncReader, + size: u64, + ) -> Result { + // let hash = md5::compute(&data[..]).into(); + + let mut req = reqwest::Request::new( + http::Method::PUT, + Url::from_str(&format!( + "https://{}.{}/{}/{}", + self.azure_storage_account, self.endpoint, self.bucket, path + )) + .expect("url must be valid"), + ); + + // Set content length. + req.headers_mut().insert( + http::header::CONTENT_LENGTH, + size.to_string() + .parse() + .expect("content length must be valid"), + ); + req.headers_mut().insert( + http::header::CONTENT_TYPE, + HeaderValue::from_static("text/plain"), + ); + + req.headers_mut() + .insert("x-ms-blob-type", HeaderValue::from_static("BlockBlob")); + + *req.body_mut() = Some(Body::from(hyper::body::Body::wrap_stream( + ReaderStream::new(r), + ))); + self.signer.sign(&mut req).await.expect("sign must success"); self.client.execute(req).await.map_err(|e| { - error!("object {} get_object: {:?}", path, e); + error!("object {} put_object: {:?}", path, e); Error::Unexpected(anyhow::Error::from(e)) }) } + #[warn(dead_code)] + pub(crate) async fn head_object(&self, path: &str) -> Result { + let _ = path; + unimplemented!() + } + + pub(crate) async fn delete_object(&self, path: &str) -> Result { + let mut req = reqwest::Request::new( + http::Method::DELETE, + Url::from_str(&format!( + "https://{}.{}/{}/{}", + self.azure_storage_account, self.endpoint, self.bucket, path + )) + .expect("url must be valid"), + ); + + req.headers_mut() + .insert(DELETE_SNAPSHOTS, HeaderValue::from_static("include")); + + self.signer.sign(&mut req).await.expect("sign must success"); + println!("req: {req:?}"); + let resp = self.client.execute(req).await.map_err(|e| { + error!("object {} delete_object: {:?}", path, e); + Error::Unexpected(anyhow::Error::from(e)) + }); + println!("resp: {resp:?}"); + resp + } + #[warn(dead_code)] + pub(crate) async fn list_object( + &self, + path: &str, + continuation_token: &str, + ) -> Result { + let _ = path; + let _ = continuation_token; + unimplemented!() + } } #[cfg(test)] mod tests { - use std::env; use super::*; - use anyhow::Result; use crate::operator::Operator; use futures::AsyncReadExt; - #[tokio::test] - async fn azblob_new() -> Result<()> { + use std::env; + + async fn azblob_new_access() -> Result> { dotenv::from_filename(".env").ok(); - - if env::var("OPENDAL_AZBLOB_TEST").is_err() || env::var("OPENDAL_AZBLOB_TEST").unwrap() != "on" { - return Ok(()); - } - let root = - &env::var("OPENDAL_AZBLOB_ROOT").unwrap_or_else(|_| format!("/{}", uuid::Uuid::new_v4())); + let root = &env::var("OPENDAL_AZBLOB_ROOT") + .unwrap_or_else(|_| format!("/{}", uuid::Uuid::new_v4())); let mut builder = Backend::build(); + builder.root(root); builder.bucket(&env::var("OPENDAL_AZBLOB_BUCKET").expect("OPENDAL_AZBLOB_BUCKET must set")); builder.endpoint(&env::var("OPENDAL_AZBLOB_ENDPOINT").unwrap_or_default()); @@ -317,15 +406,38 @@ mod tests { &env::var("OPENDAL_AZBLOB_ACCESS_KEY_ID").unwrap_or_default(), &env::var("OPENDAL_AZBLOB_SECRET_ACCESS_KEY").unwrap_or_default(), )); - let acc = builder.finish().await?; - - println!("{acc:?}"); - let path = "1.txt"; + + let acc = builder.finish().await; + + acc + } + + #[tokio::test] + async fn test_write_read_delete() -> Result<()> { + let acc = azblob_new_access().await?; + let operator = Operator::new(acc); + + let path = "put_blob_object1"; + println!("Generate a file: {}", &path); + + //content of object is a vec of b'a' size = 1024 + let size: usize = 1024; + let content = vec![97; size]; + // first step: write content to azblob + let w = operator.object(&path).writer(); + let n = w.write_bytes(content.clone()).await?; + assert_eq!(n, size, "write to azblob success"); + + // second step: read content from azblob let mut buf = Vec::new(); let mut r = operator.object(&path).reader(); let n = r.read_to_end(&mut buf).await.expect("read to end"); - println!("{buf:?}"); + assert_eq!(n, buf.len(), "read to azblob success"); + + // thrid setp: delete azblob + let _ = operator.object(&path).delete().await?; + Ok(()) } } diff --git a/src/services/azblob/middleware/credentials.rs b/src/services/azblob/middleware/credentials.rs deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/src/services/azblob/middleware/mod.rs b/src/services/azblob/middleware/mod.rs deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/src/services/azblob/mod.rs b/src/services/azblob/mod.rs index a026e27409c..4b440c4e84a 100644 --- a/src/services/azblob/mod.rs +++ b/src/services/azblob/mod.rs @@ -2,6 +2,3 @@ pub mod backend; pub use backend::Backend; pub use backend::Builder; - -mod object_stream; -mod middleware; diff --git a/src/services/azblob/object_stream.rs b/src/services/azblob/object_stream.rs deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/src/services/mod.rs b/src/services/mod.rs index b2f95e1480c..4894b6aec88 100644 --- a/src/services/mod.rs +++ b/src/services/mod.rs @@ -19,8 +19,9 @@ //! - Builder: responsible for building the service backend. //! - Backend: the service backend which implements the [`Accessor`][crate::Accessor] trait. + pub mod fs; pub mod memory; -pub mod azblob; pub mod s3; +pub mod azblob; \ No newline at end of file From 1bcc09dd6145f28e354c9c1f1512a08f848b4f8a Mon Sep 17 00:00:00 2001 From: PhilipsPot Date: Sun, 20 Mar 2022 10:03:58 +0800 Subject: [PATCH 07/16] reqsign file name change --- src/services/azblob/backend.rs | 38 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/services/azblob/backend.rs b/src/services/azblob/backend.rs index 6e0b619d4f4..ca8ef5ef01c 100644 --- a/src/services/azblob/backend.rs +++ b/src/services/azblob/backend.rs @@ -7,7 +7,7 @@ use crate::ops::HeaderRange; use anyhow::anyhow; use async_trait::async_trait; use metrics::increment_counter; -use reqsign::services::azure::azblob::Signer; +use reqsign::services::azure::signer::Signer; use reqwest::{Body, Response, Url}; use std::str::FromStr; @@ -108,8 +108,8 @@ impl Builder { ("bucket".to_string(), bucket.to_string()), ]); - let mut azure_storage_account = String::new(); - let mut azure_storage_key = String::new(); + let mut access_name = String::new(); + let mut shared_key = String::new(); if let Some(cred) = &self.credential { context.insert("credential".to_string(), "*".to_string()); match cred { @@ -117,8 +117,8 @@ impl Builder { access_key_id, secret_access_key, } => { - azure_storage_account = access_key_id.to_string(); - azure_storage_key = secret_access_key.to_string(); + access_name = access_key_id.to_string(); + shared_key = secret_access_key.to_string(); } // We don't need to do anything if user tries to read credential from env. Credential::Plain => { @@ -137,8 +137,8 @@ impl Builder { let mut signer_builder = Signer::builder(); signer_builder - .access_acount(&azure_storage_account) - .access_key(&azure_storage_key); + .access_name(&access_name) + .shared_key(&shared_key); let signer = signer_builder.build().await?; @@ -149,7 +149,7 @@ impl Builder { signer: Arc::new(signer), bucket: self.bucket.clone(), client, - azure_storage_account, + access_name, })) } } @@ -161,7 +161,7 @@ pub struct Backend { root: String, // root will be "/" or /abc/ endpoint: String, signer: Arc, - azure_storage_account: String, + access_name: String, } impl Backend { @@ -278,7 +278,7 @@ impl Backend { http::Method::GET, Url::from_str(&format!( "https://{}.{}/{}/{}", - self.azure_storage_account, self.endpoint, self.bucket, path + self.access_name, self.endpoint, self.bucket, path )) .expect("url must be valid"), ); @@ -314,7 +314,7 @@ impl Backend { http::Method::PUT, Url::from_str(&format!( "https://{}.{}/{}/{}", - self.azure_storage_account, self.endpoint, self.bucket, path + self.access_name, self.endpoint, self.bucket, path )) .expect("url must be valid"), ); @@ -356,7 +356,7 @@ impl Backend { http::Method::DELETE, Url::from_str(&format!( "https://{}.{}/{}/{}", - self.azure_storage_account, self.endpoint, self.bucket, path + self.access_name, self.endpoint, self.bucket, path )) .expect("url must be valid"), ); @@ -429,14 +429,14 @@ mod tests { let n = w.write_bytes(content.clone()).await?; assert_eq!(n, size, "write to azblob success"); - // second step: read content from azblob - let mut buf = Vec::new(); - let mut r = operator.object(&path).reader(); - let n = r.read_to_end(&mut buf).await.expect("read to end"); - assert_eq!(n, buf.len(), "read to azblob success"); + // // second step: read content from azblob + // let mut buf = Vec::new(); + // let mut r = operator.object(&path).reader(); + // let n = r.read_to_end(&mut buf).await.expect("read to end"); + // assert_eq!(n, buf.len(), "read to azblob success"); - // thrid setp: delete azblob - let _ = operator.object(&path).delete().await?; + // // thrid setp: delete azblob + // let _ = operator.object(&path).delete().await?; Ok(()) } From cde4e2ab614b14ba86523859ceb5d7909ad73d07 Mon Sep 17 00:00:00 2001 From: PhilipsPot Date: Sun, 20 Mar 2022 10:09:35 +0800 Subject: [PATCH 08/16] change cargo.toml --- Cargo.toml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index cbd20c7292a..739e23b3dd8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,11 +39,7 @@ roxmltree = "0.14" thiserror = "1" tokio = { version = "1.17", features = ["full"] } tower = "0.4" -<<<<<<< HEAD time = "0.3.7" -======= -md5 = "0.7" ->>>>>>> Implement-azblob-support [dev-dependencies] anyhow = "1.0" From de6d500919708deca4ed175f63abe4bf64f098ee Mon Sep 17 00:00:00 2001 From: PhilipsPot Date: Sun, 20 Mar 2022 10:21:49 +0800 Subject: [PATCH 09/16] solve conflict uncorrectly, fix bug use Pin --- src/services/s3/backend.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/services/s3/backend.rs b/src/services/s3/backend.rs index 0276f48bf88..2b10554b965 100644 --- a/src/services/s3/backend.rs +++ b/src/services/s3/backend.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::collections::HashMap; +use std::pin::Pin; use std::str::FromStr; use std::sync::Arc; use std::task::{Context, Poll}; From a84735bff9efec0e95272b544bbae5cb77acb88a Mon Sep 17 00:00:00 2001 From: PhilipsPot Date: Sun, 20 Mar 2022 16:26:06 +0800 Subject: [PATCH 10/16] add write stat read delete func to tests/behavior --- opendal_test/src/services/azblob.rs | 34 +++++ opendal_test/src/services/mod.rs | 1 + src/services/azblob/backend.rs | 221 +++++++++++++++++---------- src/services/azblob/mod.rs | 1 + src/services/azblob/object_stream.rs | 15 ++ src/services/mod.rs | 3 +- tests/behavior/azblob.rs | 6 +- tests/behavior/behavior.rs | 24 +-- tests/behavior/main.rs | 2 + 9 files changed, 213 insertions(+), 94 deletions(-) create mode 100644 opendal_test/src/services/azblob.rs create mode 100644 src/services/azblob/object_stream.rs diff --git a/opendal_test/src/services/azblob.rs b/opendal_test/src/services/azblob.rs new file mode 100644 index 00000000000..560c27e82e1 --- /dev/null +++ b/opendal_test/src/services/azblob.rs @@ -0,0 +1,34 @@ +use std::env; +use std::sync::Arc; + +use opendal::credential::Credential; +use opendal::error::Result; +use opendal::services::azblob; +use opendal::Accessor; + +/// In order to test azblob service, please set the following environment variables: +/// - `OPENDAL_AZBLOB_TEST=on`: set to `on` to enable the test. +/// - `OPENDAL_AZBLOB_ROOT=/path/to/dir`: set the root dir. +/// - `OPENDAL_AZBLOB_BUCKET=`: set the bucket name. +/// - `OPENDAL_AZBLOB_ENDPOINT=`: set the endpoint of the azblob service. +/// - `OPENDAL_AZBLOB_ACCESS_NAME=`: set the access_name. +/// - `OPENDAL_AZBLOB_SHARED_KEY=`: set the shared_key. +pub async fn new() -> Result>> { + dotenv::from_filename(".env").ok(); + + let root = + &env::var("OPENDAL_AZBLOB_ROOT").unwrap_or_else(|_| format!("/{}", uuid::Uuid::new_v4())); + + let mut builder = azblob::Backend::build(); + + builder + .root(root) + .bucket(&env::var("OPENDAL_AZBLOB_BUCKET").expect("OPENDAL_AZBLOB_BUCKET must set")) + .endpoint(&env::var("OPENDAL_AZBLOB_ENDPOINT").unwrap_or_default()) + .credential(Credential::hmac( + &env::var("OPENDAL_AZBLOB_ACCESS_NAME").unwrap_or_default(), + &env::var("OPENDAL_AZBLOB_SHARED_KEY").unwrap_or_default(), + )); + + Ok(Some(builder.finish().await?)) +} diff --git a/opendal_test/src/services/mod.rs b/opendal_test/src/services/mod.rs index 4f31096621f..0bc7f88f519 100644 --- a/opendal_test/src/services/mod.rs +++ b/opendal_test/src/services/mod.rs @@ -11,6 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +pub mod azblob; pub mod fs; pub mod memory; pub mod s3; diff --git a/src/services/azblob/backend.rs b/src/services/azblob/backend.rs index ca8ef5ef01c..a71875e88dd 100644 --- a/src/services/azblob/backend.rs +++ b/src/services/azblob/backend.rs @@ -1,22 +1,13 @@ use futures::TryStreamExt; use http::HeaderValue; - -use std::collections::HashMap; - -use crate::ops::HeaderRange; -use anyhow::anyhow; -use async_trait::async_trait; -use metrics::increment_counter; -use reqsign::services::azure::signer::Signer; -use reqwest::{Body, Response, Url}; -use std::str::FromStr; - +// use super::object_stream::AzureObjectStream; use crate::credential::Credential; use crate::error::Error; use crate::error::Kind; use crate::error::Result; use crate::object::BoxedObjectStream; use crate::object::Metadata; +use crate::ops::HeaderRange; use crate::ops::OpDelete; use crate::ops::OpList; use crate::ops::OpRead; @@ -24,12 +15,23 @@ use crate::ops::OpStat; use crate::ops::OpWrite; use crate::Accessor; use crate::BoxedAsyncReader; +use anyhow::anyhow; +use async_trait::async_trait; +use http::header::HeaderName; use log::debug; use log::error; use log::info; use log::warn; +use metrics::increment_counter; +use reqsign::services::azure::signer::Signer; +use reqwest::{Body, Response, Url}; +use std::collections::HashMap; +use std::str::FromStr; +use time::format_description::well_known::Rfc2822; +use time::OffsetDateTime; pub const DELETE_SNAPSHOTS: &str = "x-ms-delete-snapshots"; use crate::readers::ReaderStream; +use crate::ObjectMode; use std::sync::Arc; #[derive(Default, Debug, Clone)] @@ -144,7 +146,7 @@ impl Builder { info!("backend build finished: {:?}", &self); Ok(Arc::new(Backend { - root: root, + root, endpoint, signer: Arc::new(signer), bucket: self.bucket.clone(), @@ -247,8 +249,93 @@ impl Accessor for Backend { } } async fn stat(&self, args: &OpStat) -> Result { - let _ = args; - unimplemented!() + increment_counter!("opendal_azure_stat_requests"); + + let p = self.get_abs_path(&args.path); + info!("object {} stat start", &p); + + // Stat root always returns a DIR. + if self.get_rel_path(&p).is_empty() { + let mut m = Metadata::default(); + m.set_path(&args.path); + m.set_content_length(0); + m.set_mode(ObjectMode::DIR); + m.set_complete(); + + info!("backed root object stat finished"); + return Ok(m); + } + + let resp = self.head_object(&p).await?; + match resp.status() { + http::StatusCode::OK => { + let mut m = Metadata::default(); + m.set_path(&args.path); + + // Parse content_length + if let Some(v) = resp.headers().get(http::header::CONTENT_LENGTH) { + let v = + u64::from_str(v.to_str().expect("header must not contain non-ascii value")) + .expect("content length header must contain valid length"); + + m.set_content_length(v); + } + + // Parse content_md5 + if let Some(v) = resp.headers().get(HeaderName::from_static("content-md5")) { + let v = v.to_str().expect("header must not contain non-ascii value"); + m.set_content_md5(v); + } + + // Parse last_modified + if let Some(v) = resp.headers().get(http::header::LAST_MODIFIED) { + let v = v.to_str().expect("header must not contain non-ascii value"); + let t = + OffsetDateTime::parse(v, &Rfc2822).expect("must contain valid time format"); + m.set_last_modified(t.into()); + } + + if p.ends_with('/') { + m.set_mode(ObjectMode::DIR); + } else { + m.set_mode(ObjectMode::FILE); + }; + + m.set_complete(); + + info!("object {} stat finished: {:?}", &p, m); + Ok(m) + } + http::StatusCode::NOT_FOUND => { + // Always returns empty dir object if path is endswith "/" + if p.ends_with('/') { + let mut m = Metadata::default(); + m.set_path(&args.path); + m.set_content_length(0); + m.set_mode(ObjectMode::DIR); + m.set_complete(); + + info!("object {} stat finished", &p); + Ok(m) + } else { + Err(Error::Object { + kind: Kind::ObjectNotExist, + op: "stat", + path: p.to_string(), + source: anyhow!("{:?}", resp), + }) + } + } + _ => { + error!("object {} head_object: {:?}", &p, resp); + Err(Error::Object { + kind: Kind::Unexpected, + op: "stat", + path: p.to_string(), + source: anyhow!("{:?}", resp), + }) + } + } } async fn delete(&self, args: &OpDelete) -> Result<()> { increment_counter!("opendal_azure_delete_requests"); @@ -262,8 +349,19 @@ impl Accessor for Backend { Ok(()) } async fn list(&self, args: &OpList) -> Result { - let _ = args; - unimplemented!() + increment_counter!("opendal_s3_list_requests"); + + let mut path = self.get_abs_path(&args.path); + // Make sure list path is endswith '/' + if !path.ends_with('/') && !path.is_empty() { + path.push('/') + } + + let _ = self.list_object("", ""); + todo!() + // info!("object {} list start", &path); + + // Ok(Box::new(S3ObjectStream::new(self.clone(), path))) } } @@ -345,10 +443,27 @@ impl Backend { Error::Unexpected(anyhow::Error::from(e)) }) } + #[warn(dead_code)] pub(crate) async fn head_object(&self, path: &str) -> Result { - let _ = path; - unimplemented!() + let mut req = reqwest::Request::new( + http::Method::HEAD, + Url::from_str(&format!( + "https://{}.{}/{}/{}", + self.access_name, self.endpoint, self.bucket, path + )) + .expect("url must be valid"), + ); + + req.headers_mut() + .insert(DELETE_SNAPSHOTS, HeaderValue::from_static("include")); + + self.signer.sign(&mut req).await.expect("sign must success"); + println!("req: {req:?}"); + self.client.execute(req).await.map_err(|e| { + error!("object {} delete_object: {:?}", path, e); + Error::Unexpected(anyhow::Error::from(e)) + }) } pub(crate) async fn delete_object(&self, path: &str) -> Result { @@ -373,7 +488,7 @@ impl Backend { println!("resp: {resp:?}"); resp } - #[warn(dead_code)] + #[warn(unused)] pub(crate) async fn list_object( &self, path: &str, @@ -381,63 +496,15 @@ impl Backend { ) -> Result { let _ = path; let _ = continuation_token; - unimplemented!() - } -} -#[cfg(test)] -mod tests { - use super::*; - use crate::operator::Operator; - use futures::AsyncReadExt; - use std::env; - - async fn azblob_new_access() -> Result> { - dotenv::from_filename(".env").ok(); - - let root = &env::var("OPENDAL_AZBLOB_ROOT") - .unwrap_or_else(|_| format!("/{}", uuid::Uuid::new_v4())); - - let mut builder = Backend::build(); - - builder.root(root); - builder.bucket(&env::var("OPENDAL_AZBLOB_BUCKET").expect("OPENDAL_AZBLOB_BUCKET must set")); - builder.endpoint(&env::var("OPENDAL_AZBLOB_ENDPOINT").unwrap_or_default()); - builder.credential(Credential::hmac( - &env::var("OPENDAL_AZBLOB_ACCESS_KEY_ID").unwrap_or_default(), - &env::var("OPENDAL_AZBLOB_SECRET_ACCESS_KEY").unwrap_or_default(), - )); - - let acc = builder.finish().await; - - acc - } - - #[tokio::test] - async fn test_write_read_delete() -> Result<()> { - let acc = azblob_new_access().await?; - - let operator = Operator::new(acc); - - let path = "put_blob_object1"; - println!("Generate a file: {}", &path); - - //content of object is a vec of b'a' size = 1024 - let size: usize = 1024; - let content = vec![97; size]; - // first step: write content to azblob - let w = operator.object(&path).writer(); - let n = w.write_bytes(content.clone()).await?; - assert_eq!(n, size, "write to azblob success"); - - // // second step: read content from azblob - // let mut buf = Vec::new(); - // let mut r = operator.object(&path).reader(); - // let n = r.read_to_end(&mut buf).await.expect("read to end"); - // assert_eq!(n, buf.len(), "read to azblob success"); - - // // thrid setp: delete azblob - // let _ = operator.object(&path).delete().await?; - - Ok(()) + // let mut req = reqwest::Request::new( + // http::Method::GET, + // Url::from_str(&format!( + // "https://{}.{}/{}/{}", + // self.access_name, self.endpoint, self.bucket, path + // )) + // .expect("url must be valid"), + // ); + + todo!() } } diff --git a/src/services/azblob/mod.rs b/src/services/azblob/mod.rs index 4b440c4e84a..e88e401d817 100644 --- a/src/services/azblob/mod.rs +++ b/src/services/azblob/mod.rs @@ -2,3 +2,4 @@ pub mod backend; pub use backend::Backend; pub use backend::Builder; +mod object_stream; diff --git a/src/services/azblob/object_stream.rs b/src/services/azblob/object_stream.rs new file mode 100644 index 00000000000..9b7402ef249 --- /dev/null +++ b/src/services/azblob/object_stream.rs @@ -0,0 +1,15 @@ +// use super::Backend; +// pub struct S3ObjectStream { +// backend: Backend, +// path: String, + +// token: String, +// done: bool, +// state: State, +// } + +// enum State { +// Idle, +// Sending(BoxFuture<'static, Result>), +// Listing((ListOutput, usize, usize)), +// } diff --git a/src/services/mod.rs b/src/services/mod.rs index 4894b6aec88..6db10d2375b 100644 --- a/src/services/mod.rs +++ b/src/services/mod.rs @@ -19,9 +19,8 @@ //! - Builder: responsible for building the service backend. //! - Backend: the service backend which implements the [`Accessor`][crate::Accessor] trait. - pub mod fs; pub mod memory; +pub mod azblob; pub mod s3; -pub mod azblob; \ No newline at end of file diff --git a/tests/behavior/azblob.rs b/tests/behavior/azblob.rs index 75c4336d0b5..3158633428e 100644 --- a/tests/behavior/azblob.rs +++ b/tests/behavior/azblob.rs @@ -9,11 +9,11 @@ use super::BehaviorTest; async fn behavior() -> Result<()> { super::init_logger(); - let acc = s3::new().await?; + let acc = azblob::new().await?; if acc.is_none() { - warn!("OPENDAL_S3_TEST not set, ignore"); + warn!("OPENDAL_AZBLOB_TEST not set, ignore"); return Ok(()); } BehaviorTest::new(Operator::new(acc.unwrap())).run().await -} \ No newline at end of file +} diff --git a/tests/behavior/behavior.rs b/tests/behavior/behavior.rs index 762a427d3c9..fad2012fad6 100644 --- a/tests/behavior/behavior.rs +++ b/tests/behavior/behavior.rs @@ -104,18 +104,18 @@ impl BehaviorTest { ); // Step 5: List this dir, we should get this file. - let mut obs = self.op.objects("").map(|o| o.expect("list object: {}")); - let mut found = false; - while let Some(o) = obs.next().await { - let meta = o.metadata().await?; - if meta.path() == path { - let mode = meta.mode(); - assert_eq!(mode, ObjectMode::FILE); - - found = true - } - } - assert!(found, "file should be found in iterator"); + // let mut obs = self.op.objects("").map(|o| o.expect("list object: {}")); + // let mut found = false; + // while let Some(o) = obs.next().await { + // let meta = o.metadata().await?; + // if meta.path() == path { + // let mode = meta.mode(); + // assert_eq!(mode, ObjectMode::FILE); + + // found = true + // } + // } + // assert!(found, "file should be found in iterator"); // Step 6: Delete this file let result = self.op.object(&path).delete().await; diff --git a/tests/behavior/main.rs b/tests/behavior/main.rs index 3183a570b50..7017c6a24de 100644 --- a/tests/behavior/main.rs +++ b/tests/behavior/main.rs @@ -22,6 +22,8 @@ mod fs; mod memory; mod s3; +mod azblob; + pub fn init_logger() { let _ = env_logger::builder().is_test(true).try_init(); } From ef9ddd1f9094b62d8634c2bb4e3bd27cde1f61bd Mon Sep 17 00:00:00 2001 From: PhilipsPot Date: Sun, 20 Mar 2022 16:28:37 +0800 Subject: [PATCH 11/16] add azblob env.example --- .env.example | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.env.example b/.env.example index 5ef347adf0b..c89772a40aa 100644 --- a/.env.example +++ b/.env.example @@ -9,3 +9,11 @@ OPENDAL_S3_BUCKET= OPENDAL_S3_ENDPOINT= OPENDAL_S3_ACCESS_KEY_ID= OPENDAL_S3_SECRET_ACCESS_KEY= +# azblob +OPENDAL_AZBLOB_TEST=false +OPENDAL_AZBLOB_ROOT=/path/to/dir +OPENDAL_AZBLOB_BUCKET= +//endpoint look like that blob.core.windows.net +OPENDAL_AZBLOB_ENDPOINT= +OPENDAL_AZBLOB_ACCESS_NAME= +OPENDAL_AZBLOB_SHARED_KEY= \ No newline at end of file From 7d1390d49c4c927d6f3086d1f1538c93bf881735 Mon Sep 17 00:00:00 2001 From: PhilipsPot Date: Sun, 20 Mar 2022 19:44:05 +0800 Subject: [PATCH 12/16] run behavior tests through --- src/services/azblob/backend.rs | 214 +++++++++++++++------------------ src/services/s3/backend.rs | 1 - tests/behavior/behavior.rs | 24 ++-- 3 files changed, 112 insertions(+), 127 deletions(-) diff --git a/src/services/azblob/backend.rs b/src/services/azblob/backend.rs index a71875e88dd..5e1543835be 100644 --- a/src/services/azblob/backend.rs +++ b/src/services/azblob/backend.rs @@ -1,6 +1,22 @@ +use std::collections::HashMap; +use std::pin::Pin; +use std::str::FromStr; +use std::sync::Arc; +use std::task::{Context, Poll}; + +use anyhow::anyhow; +use async_trait::async_trait; use futures::TryStreamExt; -use http::HeaderValue; -// use super::object_stream::AzureObjectStream; +use http::header::HeaderName; +use log::debug; +use log::error; +use log::info; +use log::warn; +use metrics::increment_counter; +use reqsign::services::azure::signer::Signer; +use time::format_description::well_known::Rfc2822; +use time::OffsetDateTime; + use crate::credential::Credential; use crate::error::Error; use crate::error::Kind; @@ -13,26 +29,13 @@ use crate::ops::OpList; use crate::ops::OpRead; use crate::ops::OpStat; use crate::ops::OpWrite; +use crate::readers::ReaderStream; use crate::Accessor; use crate::BoxedAsyncReader; -use anyhow::anyhow; -use async_trait::async_trait; -use http::header::HeaderName; -use log::debug; -use log::error; -use log::info; -use log::warn; -use metrics::increment_counter; -use reqsign::services::azure::signer::Signer; -use reqwest::{Body, Response, Url}; -use std::collections::HashMap; -use std::str::FromStr; -use time::format_description::well_known::Rfc2822; -use time::OffsetDateTime; -pub const DELETE_SNAPSHOTS: &str = "x-ms-delete-snapshots"; -use crate::readers::ReaderStream; use crate::ObjectMode; -use std::sync::Arc; + +pub const DELETE_SNAPSHOTS: &str = "x-ms-delete-snapshots"; +pub const BLOB_TYPE: &str = "x-ms-blob-type"; #[derive(Default, Debug, Clone)] pub struct Builder { @@ -135,7 +138,7 @@ impl Builder { } } } - let client = reqwest::Client::new(); + let client = hyper::Client::builder().build(hyper_tls::HttpsConnector::new()); let mut signer_builder = Signer::builder(); signer_builder @@ -159,7 +162,7 @@ impl Builder { #[derive(Debug, Clone)] pub struct Backend { bucket: String, - client: reqwest::Client, + client: hyper::Client, hyper::Body>, root: String, // root will be "/" or /abc/ endpoint: String, signer: Arc, @@ -223,11 +226,7 @@ impl Accessor for Backend { "object {} reader created: offset {:?}, size {:?}", &p, args.offset, args.size ); - Ok(Box::new( - resp.bytes_stream() - .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)) - .into_async_read(), - )) + Ok(Box::new(ByteStream(resp).into_async_read())) } async fn write(&self, r: BoxedAsyncReader, args: &OpWrite) -> Result { let p = self.get_abs_path(&args.path); @@ -371,140 +370,127 @@ impl Backend { path: &str, offset: Option, size: Option, - ) -> Result { - let mut req = reqwest::Request::new( - http::Method::GET, - Url::from_str(&format!( - "https://{}.{}/{}/{}", - self.access_name, self.endpoint, self.bucket, path - )) - .expect("url must be valid"), - ); + ) -> Result> { + let mut req = hyper::Request::get(&format!( + "https://{}.{}/{}/{}", + self.access_name, self.endpoint, self.bucket, path + )); if offset.is_some() || size.is_some() { - req.headers_mut().insert( + req = req.header( http::header::RANGE, - HeaderRange::new(offset, size) - .to_string() - .parse() - .expect("header must be valid"), + HeaderRange::new(offset, size).to_string(), ); } + let mut req = req + .body(hyper::Body::empty()) + .expect("must be valid request"); + self.signer.sign(&mut req).await.expect("sign must success"); - println!("req: {req:?}"); - let resp = self.client.execute(req).await.map_err(|e| { + + self.client.request(req).await.map_err(|e| { error!("object {} get_object: {:?}", path, e); Error::Unexpected(anyhow::Error::from(e)) - }); - println!("resp: {resp:?}"); - resp + }) } pub(crate) async fn put_object( &self, path: &str, r: BoxedAsyncReader, size: u64, - ) -> Result { + ) -> Result> { // let hash = md5::compute(&data[..]).into(); - let mut req = reqwest::Request::new( - http::Method::PUT, - Url::from_str(&format!( - "https://{}.{}/{}/{}", - self.access_name, self.endpoint, self.bucket, path - )) - .expect("url must be valid"), - ); + let mut req = hyper::Request::put(&format!( + "https://{}.{}/{}/{}", + self.access_name, self.endpoint, self.bucket, path + )); - // Set content length. - req.headers_mut().insert( - http::header::CONTENT_LENGTH, - size.to_string() - .parse() - .expect("content length must be valid"), - ); - req.headers_mut().insert( - http::header::CONTENT_TYPE, - HeaderValue::from_static("text/plain"), - ); + req = req.header(http::header::CONTENT_LENGTH, size.to_string()); - req.headers_mut() - .insert("x-ms-blob-type", HeaderValue::from_static("BlockBlob")); + req = req.header(HeaderName::from_static(BLOB_TYPE), "BlockBlob"); - *req.body_mut() = Some(Body::from(hyper::body::Body::wrap_stream( - ReaderStream::new(r), - ))); + // Set body + let mut req = req + .body(hyper::body::Body::wrap_stream(ReaderStream::new(r))) + .expect("must be valid request"); self.signer.sign(&mut req).await.expect("sign must success"); - self.client.execute(req).await.map_err(|e| { + self.client.request(req).await.map_err(|e| { error!("object {} put_object: {:?}", path, e); Error::Unexpected(anyhow::Error::from(e)) }) } #[warn(dead_code)] - pub(crate) async fn head_object(&self, path: &str) -> Result { - let mut req = reqwest::Request::new( - http::Method::HEAD, - Url::from_str(&format!( - "https://{}.{}/{}/{}", - self.access_name, self.endpoint, self.bucket, path - )) - .expect("url must be valid"), - ); - - req.headers_mut() - .insert(DELETE_SNAPSHOTS, HeaderValue::from_static("include")); + pub(crate) async fn head_object(&self, path: &str) -> Result> { + let req = hyper::Request::head(&format!( + "https://{}.{}/{}/{}", + self.access_name, self.endpoint, self.bucket, path + )); + let mut req = req + .body(hyper::Body::empty()) + .expect("must be valid request"); self.signer.sign(&mut req).await.expect("sign must success"); - println!("req: {req:?}"); - self.client.execute(req).await.map_err(|e| { - error!("object {} delete_object: {:?}", path, e); + + println!("req : {req:?}"); + self.client.request(req).await.map_err(|e| { + error!("object {} get_object: {:?}", path, e); Error::Unexpected(anyhow::Error::from(e)) }) } - pub(crate) async fn delete_object(&self, path: &str) -> Result { - let mut req = reqwest::Request::new( - http::Method::DELETE, - Url::from_str(&format!( - "https://{}.{}/{}/{}", - self.access_name, self.endpoint, self.bucket, path - )) - .expect("url must be valid"), - ); + pub(crate) async fn delete_object(&self, path: &str) -> Result> { + let req = hyper::Request::delete(&format!( + "https://{}.{}/{}/{}", + self.access_name, self.endpoint, self.bucket, path + )); - req.headers_mut() - .insert(DELETE_SNAPSHOTS, HeaderValue::from_static("include")); + let mut req = req + .body(hyper::Body::empty()) + .expect("must be valid request"); self.signer.sign(&mut req).await.expect("sign must success"); - println!("req: {req:?}"); - let resp = self.client.execute(req).await.map_err(|e| { - error!("object {} delete_object: {:?}", path, e); + + self.client.request(req).await.map_err(|e| { + error!("object {} get_object: {:?}", path, e); Error::Unexpected(anyhow::Error::from(e)) - }); - println!("resp: {resp:?}"); - resp + }) } #[warn(unused)] pub(crate) async fn list_object( &self, path: &str, continuation_token: &str, - ) -> Result { - let _ = path; + ) -> Result> { let _ = continuation_token; - // let mut req = reqwest::Request::new( - // http::Method::GET, - // Url::from_str(&format!( - // "https://{}.{}/{}/{}", - // self.access_name, self.endpoint, self.bucket, path - // )) - // .expect("url must be valid"), - // ); + let uri = format!( + "https://{}.{}/{}?restype=container&comp=list&delimiter=/&prefix={}", + self.access_name, self.endpoint, self.bucket, path + ); + let mut req = hyper::Request::get(uri) + .body(hyper::Body::empty()) + .expect("must be valid request"); - todo!() + self.signer.sign(&mut req).await.expect("sign must success"); + + self.client.request(req).await.map_err(|e| { + error!("object {} get_object: {:?}", path, e); + Error::Unexpected(anyhow::Error::from(e)) + }) + } +} +struct ByteStream(hyper::Response); + +impl futures::Stream for ByteStream { + type Item = std::io::Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + Pin::new(self.0.body_mut()) + .poll_next(cx) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string())) } } diff --git a/src/services/s3/backend.rs b/src/services/s3/backend.rs index 2b10554b965..a7f9cc264e5 100644 --- a/src/services/s3/backend.rs +++ b/src/services/s3/backend.rs @@ -11,7 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - use std::collections::HashMap; use std::pin::Pin; use std::str::FromStr; diff --git a/tests/behavior/behavior.rs b/tests/behavior/behavior.rs index fad2012fad6..762a427d3c9 100644 --- a/tests/behavior/behavior.rs +++ b/tests/behavior/behavior.rs @@ -104,18 +104,18 @@ impl BehaviorTest { ); // Step 5: List this dir, we should get this file. - // let mut obs = self.op.objects("").map(|o| o.expect("list object: {}")); - // let mut found = false; - // while let Some(o) = obs.next().await { - // let meta = o.metadata().await?; - // if meta.path() == path { - // let mode = meta.mode(); - // assert_eq!(mode, ObjectMode::FILE); - - // found = true - // } - // } - // assert!(found, "file should be found in iterator"); + let mut obs = self.op.objects("").map(|o| o.expect("list object: {}")); + let mut found = false; + while let Some(o) = obs.next().await { + let meta = o.metadata().await?; + if meta.path() == path { + let mode = meta.mode(); + assert_eq!(mode, ObjectMode::FILE); + + found = true + } + } + assert!(found, "file should be found in iterator"); // Step 6: Delete this file let result = self.op.object(&path).delete().await; From 078d15d95203d86fb51b953e2f42dff9334bf516 Mon Sep 17 00:00:00 2001 From: PhilipsPot Date: Mon, 21 Mar 2022 14:52:45 +0800 Subject: [PATCH 13/16] Comment out in tests/behaviour to Close list tests --- opendal_test/src/services/azblob.rs | 13 ++++++++ src/services/azblob/backend.rs | 48 ++++++++++++++++++++++------ src/services/azblob/mod.rs | 13 ++++++++ src/services/azblob/object_stream.rs | 28 ++++++++-------- tests/behavior/azblob.rs | 13 ++++++++ tests/behavior/behavior.rs | 26 +++++++-------- 6 files changed, 103 insertions(+), 38 deletions(-) diff --git a/opendal_test/src/services/azblob.rs b/opendal_test/src/services/azblob.rs index 560c27e82e1..4a4da3c6305 100644 --- a/opendal_test/src/services/azblob.rs +++ b/opendal_test/src/services/azblob.rs @@ -1,3 +1,16 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. use std::env; use std::sync::Arc; diff --git a/src/services/azblob/backend.rs b/src/services/azblob/backend.rs index 5e1543835be..d07fad0d931 100644 --- a/src/services/azblob/backend.rs +++ b/src/services/azblob/backend.rs @@ -1,3 +1,16 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. use std::collections::HashMap; use std::pin::Pin; use std::str::FromStr; @@ -8,6 +21,7 @@ use anyhow::anyhow; use async_trait::async_trait; use futures::TryStreamExt; use http::header::HeaderName; +use hyper::body::HttpBody as _; use log::debug; use log::error; use log::info; @@ -348,19 +362,28 @@ impl Accessor for Backend { Ok(()) } async fn list(&self, args: &OpList) -> Result { - increment_counter!("opendal_s3_list_requests"); - + increment_counter!("opendal_azblob_list_requests"); let mut path = self.get_abs_path(&args.path); // Make sure list path is endswith '/' if !path.ends_with('/') && !path.is_empty() { path.push('/') } - let _ = self.list_object("", ""); - todo!() - // info!("object {} list start", &path); + // url query part will conver "/" to "%2F" like that query: Some("restype=container&comp=list&prefix=%2Fdir") + path = str::replace(&path, "/", "%2F"); - // Ok(Box::new(S3ObjectStream::new(self.clone(), path))) + info!("object {} list start", &path); + + let mut resp = self.list_object(&path, "").await?; + while let Some(next) = resp.data().await { + let chunk = next.map_err(|e| { + error!("object {} get_object: {:?}", path, e); + Error::Unexpected(anyhow::Error::from(e)) + }); + println!("chunk : {chunk:?}"); + } + + todo!() } } @@ -467,16 +490,21 @@ impl Backend { continuation_token: &str, ) -> Result> { let _ = continuation_token; - let uri = format!( - "https://{}.{}/{}?restype=container&comp=list&delimiter=/&prefix={}", + let mut req = hyper::Request::get(&format!( + "https://{}.{}/{}?restype=container&comp=list&prefix={}", self.access_name, self.endpoint, self.bucket, path - ); - let mut req = hyper::Request::get(uri) + )); + + req = req.header(http::header::CONTENT_LENGTH, "0"); + + let mut req = req .body(hyper::Body::empty()) .expect("must be valid request"); self.signer.sign(&mut req).await.expect("sign must success"); + println!("resq : {req:?}"); + self.client.request(req).await.map_err(|e| { error!("object {} get_object: {:?}", path, e); Error::Unexpected(anyhow::Error::from(e)) diff --git a/src/services/azblob/mod.rs b/src/services/azblob/mod.rs index e88e401d817..09bab45c975 100644 --- a/src/services/azblob/mod.rs +++ b/src/services/azblob/mod.rs @@ -1,3 +1,16 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. pub mod backend; pub use backend::Backend; pub use backend::Builder; diff --git a/src/services/azblob/object_stream.rs b/src/services/azblob/object_stream.rs index 9b7402ef249..ea0ed57e60e 100644 --- a/src/services/azblob/object_stream.rs +++ b/src/services/azblob/object_stream.rs @@ -1,15 +1,13 @@ -// use super::Backend; -// pub struct S3ObjectStream { -// backend: Backend, -// path: String, - -// token: String, -// done: bool, -// state: State, -// } - -// enum State { -// Idle, -// Sending(BoxFuture<'static, Result>), -// Listing((ListOutput, usize, usize)), -// } +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. diff --git a/tests/behavior/azblob.rs b/tests/behavior/azblob.rs index 3158633428e..889c7076c9b 100644 --- a/tests/behavior/azblob.rs +++ b/tests/behavior/azblob.rs @@ -1,3 +1,16 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. use anyhow::Result; use log::warn; use opendal::Operator; diff --git a/tests/behavior/behavior.rs b/tests/behavior/behavior.rs index 762a427d3c9..5bfea090edd 100644 --- a/tests/behavior/behavior.rs +++ b/tests/behavior/behavior.rs @@ -103,19 +103,19 @@ impl BehaviorTest { "read part file" ); - // Step 5: List this dir, we should get this file. - let mut obs = self.op.objects("").map(|o| o.expect("list object: {}")); - let mut found = false; - while let Some(o) = obs.next().await { - let meta = o.metadata().await?; - if meta.path() == path { - let mode = meta.mode(); - assert_eq!(mode, ObjectMode::FILE); - - found = true - } - } - assert!(found, "file should be found in iterator"); + // // Step 5: List this dir, we should get this file. + // let mut obs = self.op.objects("").map(|o| o.expect("list object: {}")); + // let mut found = false; + // while let Some(o) = obs.next().await { + // let meta = o.metadata().await?; + // if meta.path() == path { + // let mode = meta.mode(); + // assert_eq!(mode, ObjectMode::FILE); + + // found = true + // } + // } + // assert!(found, "file should be found in iterator"); // Step 6: Delete this file let result = self.op.object(&path).delete().await; From 7ff9ab90dd75423ea347038f8e80f2c3ebefa308 Mon Sep 17 00:00:00 2001 From: PhilipsPot Date: Thu, 24 Mar 2022 09:08:01 +0800 Subject: [PATCH 14/16] adapt to reqsign 0.0.2 version --- Cargo.toml | 2 +- src/services/azblob/backend.rs | 28 ++++++++++++++-------------- tests/behavior/main.rs | 1 - 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5dfc4dd0fb3..13c545bfb26 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,7 +34,7 @@ metrics = "0.18" once_cell = "1" pin-project = "1" quick-xml = { version = "0.22.0", features = ["serialize"] } -reqsign = { git = "https://github.com/D2Lark/reqsign", branch = "main" } +reqsign = "0.0.2" reqwest = { version = "0.11", features = ["stream"] } roxmltree = "0.14" serde = { version = "1.0.136", features = ["derive"] } diff --git a/src/services/azblob/backend.rs b/src/services/azblob/backend.rs index d07fad0d931..06a53ed4be5 100644 --- a/src/services/azblob/backend.rs +++ b/src/services/azblob/backend.rs @@ -27,7 +27,7 @@ use log::error; use log::info; use log::warn; use metrics::increment_counter; -use reqsign::services::azure::signer::Signer; +use reqsign::services::azure::storage::Signer; use time::format_description::well_known::Rfc2822; use time::OffsetDateTime; @@ -127,8 +127,8 @@ impl Builder { ("bucket".to_string(), bucket.to_string()), ]); - let mut access_name = String::new(); - let mut shared_key = String::new(); + let mut account_name = String::new(); + let mut account_key = String::new(); if let Some(cred) = &self.credential { context.insert("credential".to_string(), "*".to_string()); match cred { @@ -136,8 +136,8 @@ impl Builder { access_key_id, secret_access_key, } => { - access_name = access_key_id.to_string(); - shared_key = secret_access_key.to_string(); + account_name = access_key_id.to_string(); + account_key = secret_access_key.to_string(); } // We don't need to do anything if user tries to read credential from env. Credential::Plain => { @@ -156,8 +156,8 @@ impl Builder { let mut signer_builder = Signer::builder(); signer_builder - .access_name(&access_name) - .shared_key(&shared_key); + .account_name(&account_name) + .account_key(&account_key); let signer = signer_builder.build().await?; @@ -168,7 +168,7 @@ impl Builder { signer: Arc::new(signer), bucket: self.bucket.clone(), client, - access_name, + account_name, })) } } @@ -180,7 +180,7 @@ pub struct Backend { root: String, // root will be "/" or /abc/ endpoint: String, signer: Arc, - access_name: String, + account_name: String, } impl Backend { @@ -396,7 +396,7 @@ impl Backend { ) -> Result> { let mut req = hyper::Request::get(&format!( "https://{}.{}/{}/{}", - self.access_name, self.endpoint, self.bucket, path + self.account_name, self.endpoint, self.bucket, path )); if offset.is_some() || size.is_some() { @@ -427,7 +427,7 @@ impl Backend { let mut req = hyper::Request::put(&format!( "https://{}.{}/{}/{}", - self.access_name, self.endpoint, self.bucket, path + self.account_name, self.endpoint, self.bucket, path )); req = req.header(http::header::CONTENT_LENGTH, size.to_string()); @@ -451,7 +451,7 @@ impl Backend { pub(crate) async fn head_object(&self, path: &str) -> Result> { let req = hyper::Request::head(&format!( "https://{}.{}/{}/{}", - self.access_name, self.endpoint, self.bucket, path + self.account_name, self.endpoint, self.bucket, path )); let mut req = req .body(hyper::Body::empty()) @@ -469,7 +469,7 @@ impl Backend { pub(crate) async fn delete_object(&self, path: &str) -> Result> { let req = hyper::Request::delete(&format!( "https://{}.{}/{}/{}", - self.access_name, self.endpoint, self.bucket, path + self.account_name, self.endpoint, self.bucket, path )); let mut req = req @@ -492,7 +492,7 @@ impl Backend { let _ = continuation_token; let mut req = hyper::Request::get(&format!( "https://{}.{}/{}?restype=container&comp=list&prefix={}", - self.access_name, self.endpoint, self.bucket, path + self.account_name, self.endpoint, self.bucket, path )); req = req.header(http::header::CONTENT_LENGTH, "0"); diff --git a/tests/behavior/main.rs b/tests/behavior/main.rs index 7017c6a24de..bfa5159b76e 100644 --- a/tests/behavior/main.rs +++ b/tests/behavior/main.rs @@ -21,7 +21,6 @@ use behavior::BehaviorTest; mod fs; mod memory; mod s3; - mod azblob; pub fn init_logger() { From 779551f91ddf95fa9bd84c21ccb31688ee3a0ed8 Mon Sep 17 00:00:00 2001 From: PhilipsPot Date: Thu, 24 Mar 2022 14:36:22 +0800 Subject: [PATCH 15/16] Clear Unrelated code --- .env.example | 7 +- opendal_test/src/services/azblob.rs | 47 ------ opendal_test/src/services/mod.rs | 2 +- src/services/azblob/backend.rs | 240 +++++++++++++++------------- tests/behavior/azblob.rs | 32 ---- tests/behavior/behavior.rs | 26 +-- tests/behavior/main.rs | 1 - 7 files changed, 142 insertions(+), 213 deletions(-) delete mode 100644 opendal_test/src/services/azblob.rs delete mode 100644 tests/behavior/azblob.rs diff --git a/.env.example b/.env.example index c89772a40aa..ddc05edd682 100644 --- a/.env.example +++ b/.env.example @@ -12,8 +12,7 @@ OPENDAL_S3_SECRET_ACCESS_KEY= # azblob OPENDAL_AZBLOB_TEST=false OPENDAL_AZBLOB_ROOT=/path/to/dir -OPENDAL_AZBLOB_BUCKET= -//endpoint look like that blob.core.windows.net +OPENDAL_AZBLOB_CONTAINER= OPENDAL_AZBLOB_ENDPOINT= -OPENDAL_AZBLOB_ACCESS_NAME= -OPENDAL_AZBLOB_SHARED_KEY= \ No newline at end of file +OPENDAL_AZBLOB_ACCOUNT_NAME= +OPENDAL_AZBLOB_ACCOUNT_KEY= \ No newline at end of file diff --git a/opendal_test/src/services/azblob.rs b/opendal_test/src/services/azblob.rs deleted file mode 100644 index 4a4da3c6305..00000000000 --- a/opendal_test/src/services/azblob.rs +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -use std::env; -use std::sync::Arc; - -use opendal::credential::Credential; -use opendal::error::Result; -use opendal::services::azblob; -use opendal::Accessor; - -/// In order to test azblob service, please set the following environment variables: -/// - `OPENDAL_AZBLOB_TEST=on`: set to `on` to enable the test. -/// - `OPENDAL_AZBLOB_ROOT=/path/to/dir`: set the root dir. -/// - `OPENDAL_AZBLOB_BUCKET=`: set the bucket name. -/// - `OPENDAL_AZBLOB_ENDPOINT=`: set the endpoint of the azblob service. -/// - `OPENDAL_AZBLOB_ACCESS_NAME=`: set the access_name. -/// - `OPENDAL_AZBLOB_SHARED_KEY=`: set the shared_key. -pub async fn new() -> Result>> { - dotenv::from_filename(".env").ok(); - - let root = - &env::var("OPENDAL_AZBLOB_ROOT").unwrap_or_else(|_| format!("/{}", uuid::Uuid::new_v4())); - - let mut builder = azblob::Backend::build(); - - builder - .root(root) - .bucket(&env::var("OPENDAL_AZBLOB_BUCKET").expect("OPENDAL_AZBLOB_BUCKET must set")) - .endpoint(&env::var("OPENDAL_AZBLOB_ENDPOINT").unwrap_or_default()) - .credential(Credential::hmac( - &env::var("OPENDAL_AZBLOB_ACCESS_NAME").unwrap_or_default(), - &env::var("OPENDAL_AZBLOB_SHARED_KEY").unwrap_or_default(), - )); - - Ok(Some(builder.finish().await?)) -} diff --git a/opendal_test/src/services/mod.rs b/opendal_test/src/services/mod.rs index 0bc7f88f519..863dd3bcaa6 100644 --- a/opendal_test/src/services/mod.rs +++ b/opendal_test/src/services/mod.rs @@ -11,7 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -pub mod azblob; + pub mod fs; pub mod memory; pub mod s3; diff --git a/src/services/azblob/backend.rs b/src/services/azblob/backend.rs index 06a53ed4be5..e59144ced0e 100644 --- a/src/services/azblob/backend.rs +++ b/src/services/azblob/backend.rs @@ -11,17 +11,24 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + +use std::cmp::min; use std::collections::HashMap; use std::pin::Pin; use std::str::FromStr; use std::sync::Arc; -use std::task::{Context, Poll}; +use std::task::Context; +use std::task::Poll; use anyhow::anyhow; use async_trait::async_trait; +use bytes::BufMut; use futures::TryStreamExt; use http::header::HeaderName; -use hyper::body::HttpBody as _; +use http::Response; +use http::StatusCode; +use hyper::body::HttpBody; +use hyper::Body; use log::debug; use log::error; use log::info; @@ -54,7 +61,7 @@ pub const BLOB_TYPE: &str = "x-ms-blob-type"; #[derive(Default, Debug, Clone)] pub struct Builder { root: Option, - bucket: String, // in Azure, bucket = container + container: String, credential: Option, endpoint: Option, } @@ -69,8 +76,8 @@ impl Builder { self } - pub fn bucket(&mut self, bucket: &str) -> &mut Self { - self.bucket = bucket.to_string(); + pub fn container(&mut self, container: &str) -> &mut Self { + self.container = container.to_string(); self } @@ -104,27 +111,25 @@ impl Builder { info!("backend use root {}", root); - // Handle endpoint, region and bucket name. - let bucket = match self.bucket.is_empty() { - false => Ok(&self.bucket), + // Handle endpoint, region and container name. + let container = match self.container.is_empty() { + false => Ok(&self.container), true => Err(Error::Backend { kind: Kind::BackendConfigurationInvalid, - context: HashMap::from([("bucket".to_string(), "".to_string())]), - source: anyhow!("bucket is empty"), + context: HashMap::from([("container".to_string(), "".to_string())]), + source: anyhow!("container is empty"), }), }?; - debug!("backend use bucket {}", &bucket); + debug!("backend use container {}", &container); let endpoint = match &self.endpoint { Some(endpoint) => endpoint.clone(), None => "blob.core.windows.net".to_string(), }; - debug!("backend use endpoint {} to detect region", &endpoint); - let mut context: HashMap = HashMap::from([ ("endpoint".to_string(), endpoint.to_string()), - ("bucket".to_string(), bucket.to_string()), + ("container".to_string(), container.to_string()), ]); let mut account_name = String::new(); @@ -166,7 +171,7 @@ impl Builder { root, endpoint, signer: Arc::new(signer), - bucket: self.bucket.clone(), + container: self.container.clone(), client, account_name, })) @@ -175,7 +180,7 @@ impl Builder { #[derive(Debug, Clone)] pub struct Backend { - bucket: String, + container: String, client: hyper::Client, hyper::Body>, root: String, // root will be "/" or /abc/ endpoint: String, @@ -210,7 +215,7 @@ impl Backend { .trim_start_matches('/') .to_string() } - #[warn(dead_code)] + #[allow(dead_code)] pub(crate) fn get_rel_path(&self, path: &str) -> String { let path = format!("/{}", path); @@ -235,30 +240,30 @@ impl Accessor for Backend { ); let resp = self.get_object(&p, args.offset, args.size).await?; + match resp.status() { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => { + info!( + "object {} reader created: offset {:?}, size {:?}", + &p, args.offset, args.size + ); - info!( - "object {} reader created: offset {:?}, size {:?}", - &p, args.offset, args.size - ); - Ok(Box::new(ByteStream(resp).into_async_read())) + Ok(Box::new(ByteStream(resp).into_async_read())) + } + _ => Err(parse_error_response(resp, "read", &p).await), + } } async fn write(&self, r: BoxedAsyncReader, args: &OpWrite) -> Result { let p = self.get_abs_path(&args.path); info!("object {} write start: size {}", &p, args.size); let resp = self.put_object(&p, r, args.size).await?; - println!("resp :{resp:?}"); + match resp.status() { http::StatusCode::CREATED | http::StatusCode::OK => { info!("object {} write finished: size {:?}", &p, args.size); Ok(args.size as usize) } - _ => Err(Error::Object { - kind: Kind::Unexpected, - op: "write", - path: p.to_string(), - source: anyhow!("{:?}", resp), - }), + _ => Err(parse_error_response(resp, "write", &p).await), } } async fn stat(&self, args: &OpStat) -> Result { @@ -319,35 +324,17 @@ impl Accessor for Backend { info!("object {} stat finished: {:?}", &p, m); Ok(m) } - http::StatusCode::NOT_FOUND => { - // Always returns empty dir object if path is endswith "/" - if p.ends_with('/') { - let mut m = Metadata::default(); - m.set_path(&args.path); - m.set_content_length(0); - m.set_mode(ObjectMode::DIR); - m.set_complete(); + StatusCode::NOT_FOUND if p.ends_with('/') => { + let mut m = Metadata::default(); + m.set_path(&args.path); + m.set_content_length(0); + m.set_mode(ObjectMode::DIR); + m.set_complete(); - info!("object {} stat finished", &p); - Ok(m) - } else { - Err(Error::Object { - kind: Kind::ObjectNotExist, - op: "stat", - path: p.to_string(), - source: anyhow!("{:?}", resp), - }) - } - } - _ => { - error!("object {} head_object: {:?}", &p, resp); - Err(Error::Object { - kind: Kind::Unexpected, - op: "stat", - path: p.to_string(), - source: anyhow!("{:?}", resp), - }) + info!("object {} stat finished", &p); + Ok(m) } + _ => Err(parse_error_response(resp, "stat", &p).await), } } async fn delete(&self, args: &OpDelete) -> Result<()> { @@ -356,34 +343,19 @@ impl Accessor for Backend { let p = self.get_abs_path(&args.path); info!("object {} delete start", &p); - let _ = self.delete_object(&p).await?; - - info!("object {} delete finished", &p); - Ok(()) + let resp = self.delete_object(&p).await?; + match resp.status() { + StatusCode::NO_CONTENT => { + info!("object {} delete finished", &p); + Ok(()) + } + _ => Err(parse_error_response(resp, "delete", &p).await), + } } + #[warn(dead_code)] async fn list(&self, args: &OpList) -> Result { - increment_counter!("opendal_azblob_list_requests"); - let mut path = self.get_abs_path(&args.path); - // Make sure list path is endswith '/' - if !path.ends_with('/') && !path.is_empty() { - path.push('/') - } - - // url query part will conver "/" to "%2F" like that query: Some("restype=container&comp=list&prefix=%2Fdir") - path = str::replace(&path, "/", "%2F"); - - info!("object {} list start", &path); - - let mut resp = self.list_object(&path, "").await?; - while let Some(next) = resp.data().await { - let chunk = next.map_err(|e| { - error!("object {} get_object: {:?}", path, e); - Error::Unexpected(anyhow::Error::from(e)) - }); - println!("chunk : {chunk:?}"); - } - - todo!() + let _ = args; + unimplemented!() } } @@ -396,7 +368,7 @@ impl Backend { ) -> Result> { let mut req = hyper::Request::get(&format!( "https://{}.{}/{}/{}", - self.account_name, self.endpoint, self.bucket, path + self.account_name, self.endpoint, self.container, path )); if offset.is_some() || size.is_some() { @@ -414,7 +386,12 @@ impl Backend { self.client.request(req).await.map_err(|e| { error!("object {} get_object: {:?}", path, e); - Error::Unexpected(anyhow::Error::from(e)) + Error::Object { + kind: Kind::Unexpected, + op: "read", + path: path.to_string(), + source: anyhow::Error::from(e), + } }) } pub(crate) async fn put_object( @@ -423,11 +400,9 @@ impl Backend { r: BoxedAsyncReader, size: u64, ) -> Result> { - // let hash = md5::compute(&data[..]).into(); - let mut req = hyper::Request::put(&format!( "https://{}.{}/{}/{}", - self.account_name, self.endpoint, self.bucket, path + self.account_name, self.endpoint, self.container, path )); req = req.header(http::header::CONTENT_LENGTH, size.to_string()); @@ -443,7 +418,12 @@ impl Backend { self.client.request(req).await.map_err(|e| { error!("object {} put_object: {:?}", path, e); - Error::Unexpected(anyhow::Error::from(e)) + Error::Object { + kind: Kind::Unexpected, + op: "write", + path: path.to_string(), + source: anyhow::Error::from(e), + } }) } @@ -451,7 +431,7 @@ impl Backend { pub(crate) async fn head_object(&self, path: &str) -> Result> { let req = hyper::Request::head(&format!( "https://{}.{}/{}/{}", - self.account_name, self.endpoint, self.bucket, path + self.account_name, self.endpoint, self.container, path )); let mut req = req .body(hyper::Body::empty()) @@ -459,17 +439,21 @@ impl Backend { self.signer.sign(&mut req).await.expect("sign must success"); - println!("req : {req:?}"); self.client.request(req).await.map_err(|e| { - error!("object {} get_object: {:?}", path, e); - Error::Unexpected(anyhow::Error::from(e)) + error!("object {} head_object: {:?}", path, e); + Error::Object { + kind: Kind::Unexpected, + op: "stat", + path: path.to_string(), + source: anyhow::Error::from(e), + } }) } pub(crate) async fn delete_object(&self, path: &str) -> Result> { let req = hyper::Request::delete(&format!( "https://{}.{}/{}/{}", - self.account_name, self.endpoint, self.bucket, path + self.account_name, self.endpoint, self.container, path )); let mut req = req @@ -479,36 +463,24 @@ impl Backend { self.signer.sign(&mut req).await.expect("sign must success"); self.client.request(req).await.map_err(|e| { - error!("object {} get_object: {:?}", path, e); - Error::Unexpected(anyhow::Error::from(e)) + error!("object {} delete_object: {:?}", path, e); + Error::Object { + kind: Kind::Unexpected, + op: "delete", + path: path.to_string(), + source: anyhow::Error::from(e), + } }) } - #[warn(unused)] + #[allow(dead_code)] pub(crate) async fn list_object( &self, path: &str, continuation_token: &str, ) -> Result> { + let _ = path; let _ = continuation_token; - let mut req = hyper::Request::get(&format!( - "https://{}.{}/{}?restype=container&comp=list&prefix={}", - self.account_name, self.endpoint, self.bucket, path - )); - - req = req.header(http::header::CONTENT_LENGTH, "0"); - - let mut req = req - .body(hyper::Body::empty()) - .expect("must be valid request"); - - self.signer.sign(&mut req).await.expect("sign must success"); - - println!("resq : {req:?}"); - - self.client.request(req).await.map_err(|e| { - error!("object {} get_object: {:?}", path, e); - Error::Unexpected(anyhow::Error::from(e)) - }) + unimplemented!() } } struct ByteStream(hyper::Response); @@ -522,3 +494,41 @@ impl futures::Stream for ByteStream { .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string())) } } + +// Read and decode whole error response. +async fn parse_error_response(resp: Response, op: &'static str, path: &str) -> Error { + let (part, mut body) = resp.into_parts(); + let kind = match part.status { + StatusCode::NOT_FOUND => Kind::ObjectNotExist, + StatusCode::FORBIDDEN => Kind::ObjectPermissionDenied, + _ => Kind::Unexpected, + }; + + // Only read 4KiB from the response to avoid broken services. + let mut bs = Vec::new(); + let mut limit = 4 * 1024; + + while let Some(b) = body.data().await { + match b { + Ok(b) => { + bs.put_slice(&b[..min(b.len(), limit)]); + limit -= b.len(); + if limit == 0 { + break; + } + } + Err(e) => return Error::Unexpected(anyhow!("parse error response parse: {:?}", e)), + } + } + + Error::Object { + kind, + op, + path: path.to_string(), + source: anyhow!( + "response part: {:?}, body: {:?}", + part, + String::from_utf8_lossy(&bs) + ), + } +} diff --git a/tests/behavior/azblob.rs b/tests/behavior/azblob.rs deleted file mode 100644 index 889c7076c9b..00000000000 --- a/tests/behavior/azblob.rs +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2022 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -use anyhow::Result; -use log::warn; -use opendal::Operator; -use opendal_test::services::azblob; - -use super::BehaviorTest; - -#[tokio::test] -async fn behavior() -> Result<()> { - super::init_logger(); - - let acc = azblob::new().await?; - if acc.is_none() { - warn!("OPENDAL_AZBLOB_TEST not set, ignore"); - return Ok(()); - } - - BehaviorTest::new(Operator::new(acc.unwrap())).run().await -} diff --git a/tests/behavior/behavior.rs b/tests/behavior/behavior.rs index 462d1678cf8..56fd7708ab3 100644 --- a/tests/behavior/behavior.rs +++ b/tests/behavior/behavior.rs @@ -104,19 +104,19 @@ impl BehaviorTest { "read part file" ); - // // Step 5: List this dir, we should get this file. - // let mut obs = self.op.objects("").map(|o| o.expect("list object: {}")); - // let mut found = false; - // while let Some(o) = obs.next().await { - // let meta = o.metadata().await?; - // if meta.path() == path { - // let mode = meta.mode(); - // assert_eq!(mode, ObjectMode::FILE); - - // found = true - // } - // } - // assert!(found, "file should be found in iterator"); + // Step 5: List this dir, we should get this file. + let mut obs = self.op.objects("").map(|o| o.expect("list object: {}")); + let mut found = false; + while let Some(o) = obs.next().await { + let meta = o.metadata().await?; + if meta.path() == path { + let mode = meta.mode(); + assert_eq!(mode, ObjectMode::FILE); + + found = true + } + } + assert!(found, "file should be found in iterator"); // Step 6: Delete this file let result = self.op.object(&path).delete().await; diff --git a/tests/behavior/main.rs b/tests/behavior/main.rs index bfa5159b76e..3183a570b50 100644 --- a/tests/behavior/main.rs +++ b/tests/behavior/main.rs @@ -21,7 +21,6 @@ use behavior::BehaviorTest; mod fs; mod memory; mod s3; -mod azblob; pub fn init_logger() { let _ = env_logger::builder().is_test(true).try_init(); From dd4574a2b06eda99a8206a6bec8cc579863683ad Mon Sep 17 00:00:00 2001 From: PhilipsPot Date: Thu, 24 Mar 2022 14:40:49 +0800 Subject: [PATCH 16/16] remove changes to tests module --- opendal_test/src/services/mod.rs | 1 - tests/behavior/behavior.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/opendal_test/src/services/mod.rs b/opendal_test/src/services/mod.rs index 863dd3bcaa6..4f31096621f 100644 --- a/opendal_test/src/services/mod.rs +++ b/opendal_test/src/services/mod.rs @@ -11,7 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - pub mod fs; pub mod memory; pub mod s3; diff --git a/tests/behavior/behavior.rs b/tests/behavior/behavior.rs index 56fd7708ab3..a89c6622b83 100644 --- a/tests/behavior/behavior.rs +++ b/tests/behavior/behavior.rs @@ -105,7 +105,7 @@ impl BehaviorTest { ); // Step 5: List this dir, we should get this file. - let mut obs = self.op.objects("").map(|o| o.expect("list object: {}")); + let mut obs = self.op.objects("").map(|o| o.expect("list object")); let mut found = false; while let Some(o) = obs.next().await { let meta = o.metadata().await?;