From c1061b0ec4f63669d80360ab3c6f06321f861671 Mon Sep 17 00:00:00 2001 From: Matthias Date: Sat, 4 Aug 2018 22:36:59 +0200 Subject: [PATCH 1/3] clippy lints --- src/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7bfeaed..6e2c52c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -183,7 +183,7 @@ impl ArchiveClient { .header("Content-Type", "application/x-www-form-urlencoded") .body(body.into()) .unwrap(); - let capture = self.client.request(req).map_err(|e|Error::Hyper(e)).and_then(move |resp| { + let capture = self.client.request(req).map_err(Error::Hyper).and_then(move |resp| { // get the url of the archived page let refresh = resp.headers().get("Refresh").and_then(|x| { x.to_str() @@ -210,7 +210,7 @@ impl ArchiveClient { _ => { // an err response body can be empty, contain Server Error or // can directly contain the archived site, in that case we extract the archived_url - let err_resp_handling = resp.into_body().concat2().map_err(|e|Error::Hyper(e)).and_then(move |ch| { + let err_resp_handling = resp.into_body().concat2().map_err(Error::Hyper).and_then(move |ch| { if let Ok(html) = ::std::str::from_utf8(&ch) { if html.starts_with("

Server Error

") { return Box::new(self.capture(target_url.as_str())) @@ -259,11 +259,11 @@ impl ArchiveClient { self.client .request(req) - .map_err(|e| Error::Hyper(e)) + .map_err(Error::Hyper) .and_then(|res| { res.into_body() .concat2() - .map_err(|e| Error::Hyper(e)) + .map_err(Error::Hyper) .and_then(|ch| { ::std::str::from_utf8(&ch) .map_err(|_| Error::MissingToken) From 8c47fb833270c50ff8d8ea251812ee1c4ab3419a Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 5 Aug 2018 13:12:43 +0200 Subject: [PATCH 2/3] added capture_all support + bumped version --> 0.2.2 --- Cargo.toml | 9 ++++++-- README.md | 48 +++++++++++++++++++--------------------- src/lib.rs | 64 ++++++++++++++++++++++++++++++++++-------------------- 3 files changed, 70 insertions(+), 51 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 564ebea..016210e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,15 +1,20 @@ [package] authors = ["Matthias Seitz "] name = "archiveis" -version = "0.2.1" +version = "0.2.2" license = "MIT OR Apache-2.0" documentation = "https://docs.rs/archiveis" repository = "https://github.com/mattsse/archiveis-rs" readme = "README.md" +categories = ["command-line-utilities","api-bindings"] keywords = ["archive"] description = """ Archive websites online using the archive.is capturing service. """ +autobins = false +[[bin]] +name = "archiver" +path = "src/main.rs" [dependencies] hyper = "0.12.7" @@ -20,4 +25,4 @@ clap = "2.32.0" tokio-core = "0.1.17" [badges] -travis-ci = { repository = "mattsse/archiveis-rs" } +travis-ci = { repository = "MattsSe/archiveis-rs" } diff --git a/README.md b/README.md index 03a4e04..04c9337 100644 --- a/README.md +++ b/README.md @@ -40,12 +40,12 @@ fn main() { ### Archive mutliple urls archive.is uses a temporary token to validate a archive request. -The `ArchiveClient` `capture` function first obtains the token via a GET request. -The token is usually valid several minutes, and even if archive.is switches to a new token,the older ones are still valid. So if we need to archive multiple links, we can only need to obtain the token once and then invoke the capturing service directly with `capture_with_token` for each url. This can be done using the `future::join` functionality. -In the following case the designated `join_all` function is used to get Future of a `Vec`. - -An undesired sideeffect if the `join_all` is that this returns an Error if any of the futures failed. -The Capturing service should work fine in most cases but if individual error handling is desired, the capturing futures can be wrapped inside another `Result`. In an `And_Then` we can handle those failures. +The `ArchiveClient` `capture` function first obtains a new submit token via a GET request. +The token is usually valid several minutes, and even if archive.is switches to a new in the +meantime token,the older ones are still valid. So if we need to archive multiple links, +we can only need to obtain the token once and then invoke the capturing service directly with +`capture_with_token` for each url. `capture_all` returns a Vec of Results of every capturing +request, so every single capture request gets executed regardless of the success of prior requests. ```rust extern crate archiveis; @@ -67,28 +67,24 @@ fn main() { "https://crates.io", ]; - let capture = client - .get_unique_token() - .and_then(|token| { - let mut futures = Vec::new(); - for u in urls.into_iter() { - // optionally wrap the capturing result in another Result, to handle the failures in the next step - futures.push(client.capture_with_token(u, &token).then(|x| Ok(x))); - } - join_all(futures) - }).and_then(|archives| { - let failures: Vec<_> = archives.iter().map(Result::as_ref).filter(Result::is_err).map(Result::unwrap_err).collect(); - if failures.is_empty() { - println!("all links successfully archived."); - } else { - for err in failures { - if let archiveis::Error::MissingUrl(url) = err { - println!("Failed to archive url: {}", url); - } + let capture = client.capture_all(urls, None).and_then(|archives| { + let failures: Vec<_> = archives + .iter() + .map(Result::as_ref) + .filter(Result::is_err) + .map(Result::unwrap_err) + .collect(); + if failures.is_empty() { + println!("all links successfully archived."); + } else { + for err in failures { + if let archiveis::Error::MissingUrl(url) = err { + println!("Failed to archive url: {}", url); } } - Ok(()) - }); + } + Ok(()) + }); core.run(capture).unwrap(); } ``` diff --git a/src/lib.rs b/src/lib.rs index 6e2c52c..7ff9cb5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,15 +28,12 @@ //! ``` //! ### Archive multiple urls //! archive.is uses a temporary token to validate a archive request. -//! The `ArchiveClient` `capture` function first obtains the token via a GET request. -//! The token is usually valid several minutes, and even if archive.is switches to a new token,the -//! older ones are still valid. So if we need to archive multiple links, we can only need to obtain -//! the token once and then invoke the capturing service directly with `capture_with_token` for each url. -//! This can be done using the `future::join` functionality. -//! In the following case the designated `join_all` function is used to get Future of a `Vec`. -//! An undesired sideeffect if the `join_all` is that this returns an Error if any of the futures failed. -//! The Capturing service should work fine in most cases but if individual error handling is desired, the -//! capturing futures can be wrapped inside another `Result`. In an `And_Then` we can handle those failures. +//! The `ArchiveClient` `capture` function first obtains a new submit token via a GET request. +//! The token is usually valid several minutes, and even if archive.is switches to a new in the +//! meantime token,the older ones are still valid. So if we need to archive multiple links, +//! we can only need to obtain the token once and then invoke the capturing service directly with +//! `capture_with_token` for each url. `capture_all` returns a Vec of Results of every capturing +//! request, so every single capture request gets executed regardless of the success of prior requests. //! //! ```rust,no_run //! extern crate archiveis; @@ -54,28 +51,24 @@ //! "https://crates.io", //! ]; //! -//! let capture = client -//! .get_unique_token() -//! .and_then(|token| { -//! let mut futures = Vec::new(); -//! for u in urls.into_iter() { -//! // optionally wrap the capturing result in another Result, to handle the failures in the next step -//! futures.push(client.capture_with_token(u, &token).then(|x| Ok(x))); -//! } -//! join_all(futures) -//! }).and_then(|archives| { -//! let failures: Vec<_> = archives.iter().map(Result::as_ref).filter(Result::is_err).map(Result::unwrap_err).collect(); +//! let capture = client.capture_all(urls, None).and_then(|archives| { +//! let failures: Vec<_> = archives +//! .iter() +//! .map(Result::as_ref) +//! .filter(Result::is_err) +//! .map(Result::unwrap_err) +//! .collect(); //! if failures.is_empty() { //! println!("all links successfully archived."); //! } else { -//! for err in failures { +//! for err in failures { //! if let archiveis::Error::MissingUrl(url) = err { //! println!("Failed to archive url: {}", url); //! } //! } //! } -//! Ok(()) -//! }); +//! Ok(()) +//! }); //! ``` //! @@ -136,6 +129,31 @@ impl ArchiveClient { } } + /// Invokes the archive.is capture service an each url supplied. + /// If no token was passed, a fresh token is obtained via `get_unique_token`, + /// afterwards all capture requests are joined in a single future that returns + /// a `Vec>` which holds every result of the individual + /// capturing requests, so every single capture request gets executed regardless + /// of the success of prior requests. + pub fn capture_all<'a>( + &'a self, + urls: Vec<&'a str>, + token: Option, + ) -> impl Future>, Error = Error> + 'a { + use futures::future::join_all; + let get_token: Box> = match token { + Some(t) => Box::new(future::ok(t)), + _ => Box::new(self.get_unique_token()), + }; + get_token.and_then(move |token| { + let mut futures = Vec::new(); + for url in urls { + futures.push(self.capture_with_token(url, &token).then(Ok)); + } + join_all(futures) + }) + } + /// Invokes the archive.is capture service. /// First it get's the current valid unique `submitid` by calling `get_unique_id`. /// Then it sends a new POST request to the archive.is submit endpoint with the `url` and the From 71ef0f169d123e397b69e1359abbc0b0a2f3b71d Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 5 Aug 2018 13:22:50 +0200 Subject: [PATCH 3/3] created main for cli app --- src/main.rs | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 src/main.rs diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..16a878a --- /dev/null +++ b/src/main.rs @@ -0,0 +1,4 @@ +fn main() { + println!("Nothing to see here yet."); + unimplemented!() +}