From b662b46a44a087af316953b10aee537397f2fa2a Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Thu, 13 Mar 2014 02:26:55 -0700 Subject: [PATCH] Deviantart support --- .../ripme/ripper/AbstractRipper.java | 2 +- .../ripper/rippers/DeviantartRipper.java | 169 ++++++++++++++++++ .../ripper/rippers/DeviantartRipperTest.java | 38 ++++ 3 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/DeviantartRipperTest.java diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java index 7893bdb3e..952f8420d 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractRipper.java @@ -99,7 +99,7 @@ public void addURLToDownload(URL url, File saveAs) { || itemsCompleted.containsKey(url) || itemsErrored.containsKey(url)) { // Item is already downloaded/downloading, skip it. - logger.info(" Skipping " + url + " -- already attempted: " + Utils.removeCWD(saveAs)); + logger.info("[!] Skipping " + url + " -- already attempted: " + Utils.removeCWD(saveAs)); return; } itemsPending.put(url, saveAs); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java new file mode 100644 index 000000000..f37b347e7 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -0,0 +1,169 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.log4j.Logger; +import org.jsoup.Connection.Method; +import org.jsoup.Connection.Response; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractRipper; +import com.rarchives.ripme.utils.Utils; + +public class DeviantartRipper extends AbstractRipper { + + private static final String DOMAIN = "deviantart.com", + HOST = "deviantart"; + + private static final int SLEEP_TIME = 2000; + private static final Logger logger = Logger.getLogger(DeviantartRipper.class); + + public DeviantartRipper(URL url) throws IOException { + super(url); + } + + @Override + public boolean canRip(URL url) { + return url.getHost().endsWith(DOMAIN); + } + + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + String u = url.toExternalForm(); + u = u.replaceAll("\\?.*", ""); + return new URL(u); + } + + @Override + public void rip() throws IOException { + int index = 0; + String nextURL = this.url.toExternalForm(); + while (nextURL != null) { + logger.info(" Retrieving " + nextURL); + Document doc = Jsoup.connect(nextURL) + .userAgent(USER_AGENT) + .get(); + try { + Thread.sleep(SLEEP_TIME); + } catch (InterruptedException e) { + logger.error("[!] Interrupted while waiting for page to load", e); + break; + } + for (Element thumb : doc.select("a.thumb img")) { + String fullSize = thumbToFull(thumb.attr("src")); + URL pageURL; + try { + pageURL = new URL(fullSize); + } catch (MalformedURLException e) { + logger.error("[!] Invalid thumbnail image: " + thumbToFull(fullSize)); + continue; + } + index++; + addURLToDownload(pageURL, String.format("%03d_", index)); + } + nextURL = null; + for (Element nextButton : doc.select("a.away")) { + if (nextButton.attr("href").contains("offset=" + index)) { + nextURL = this.url.toExternalForm() + "?offset=" + index; + } + } + } + waitForThreads(); + } + + public static String thumbToFull(String thumb) { + thumb = thumb.replace("http://th", "http://fc"); + List fields = new ArrayList(Arrays.asList(thumb.split("/"))); + fields.remove(4); + StringBuilder result = new StringBuilder(); + for (int i = 0; i < fields.size(); i++) { + if (i > 0) { + result.append("/"); + } + result.append(fields.get(i)); + } + return result.toString(); + } + + @Override + public String getHost() { + return HOST; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com(/gallery)?/?$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + // Root gallery + return m.group(1); + } + p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/gallery/([0-9]{1,}).*$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + // Subgallery + return m.group(1) + "_" + m.group(2); + } + throw new MalformedURLException("Expected URL format: http://username.deviantart.com/[/gallery/#####], got: " + url); + } + + /** + * Logs into deviant art. Not required to rip NSFW images. + * @return Map of cookies containing session data. + */ + @SuppressWarnings("unused") + private Map loginToDeviantart() throws IOException { + // Populate postData fields + Map postData = new HashMap(); + String username = Utils.getConfigString("deviantart.username", null); + String password = Utils.getConfigString("deviantart.password", null); + if (username == null || password == null) { + throw new IOException("could not find username or password in config"); + } + Response resp = Jsoup.connect("http://www.deviantart.com/") + .userAgent(USER_AGENT) + .method(Method.GET) + .execute(); + for (Element input : resp.parse().select("form#form-login input[type=hidden]")) { + postData.put(input.attr("name"), input.attr("value")); + } + postData.put("username", username); + postData.put("password", password); + postData.put("remember_me", "1"); + + // Send login request + resp = Jsoup.connect("https://www.deviantart.com/users/login") + .userAgent(USER_AGENT) + .data(postData) + .cookies(resp.cookies()) + .method(Method.POST) + .execute(); + + // Assert we are logged in + if (resp.hasHeader("Location") && resp.header("Location").contains("password")) { + // Wrong password + throw new IOException("Wrong pasword"); + } + if (resp.url().toExternalForm().contains("bad_form")) { + throw new IOException("Login form was incorrectly submitted"); + } + if (resp.cookie("auth_secure") == null || + resp.cookie("auth") == null) { + throw new IOException("No auth_secure or auth cookies received"); + } + // We are logged in, save the cookies + return resp.cookies(); + } + +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/DeviantartRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/DeviantartRipperTest.java new file mode 100644 index 000000000..01a879e0c --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/DeviantartRipperTest.java @@ -0,0 +1,38 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; + +import com.rarchives.ripme.ripper.rippers.DeviantartRipper; + +public class DeviantartRipperTest extends RippersTest { + + public void testDeviantartAlbums() throws IOException { + if (!DOWNLOAD_CONTENT) { + return; + } + List contentURLs = new ArrayList(); + + // Small gallery + contentURLs.add(new URL("http://airgee.deviantart.com/gallery/")); + // NSFW gallery + contentURLs.add(new URL("http://faterkcx.deviantart.com/gallery/")); + // Multi-page NSFW + contentURLs.add(new URL("http://geekysica.deviantart.com/gallery/35209412")); + + for (URL url : contentURLs) { + try { + DeviantartRipper ripper = new DeviantartRipper(url); + ripper.rip(); + assert(ripper.getWorkingDir().listFiles().length > 1); + deleteDir(ripper.getWorkingDir()); + } catch (Exception e) { + e.printStackTrace(); + fail("Error while ripping URL " + url + ": " + e.getMessage()); + } + } + } + +}