-
Notifications
You must be signed in to change notification settings - Fork 203
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
208 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
169 changes: 169 additions & 0 deletions
169
src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
package com.rarchives.ripme.ripper.rippers; | ||
|
||
import java.io.IOException; | ||
import java.net.MalformedURLException; | ||
import java.net.URL; | ||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
import org.apache.log4j.Logger; | ||
import org.jsoup.Connection.Method; | ||
import org.jsoup.Connection.Response; | ||
import org.jsoup.Jsoup; | ||
import org.jsoup.nodes.Document; | ||
import org.jsoup.nodes.Element; | ||
|
||
import com.rarchives.ripme.ripper.AbstractRipper; | ||
import com.rarchives.ripme.utils.Utils; | ||
|
||
public class DeviantartRipper extends AbstractRipper { | ||
|
||
private static final String DOMAIN = "deviantart.com", | ||
HOST = "deviantart"; | ||
|
||
private static final int SLEEP_TIME = 2000; | ||
private static final Logger logger = Logger.getLogger(DeviantartRipper.class); | ||
|
||
public DeviantartRipper(URL url) throws IOException { | ||
super(url); | ||
} | ||
|
||
@Override | ||
public boolean canRip(URL url) { | ||
return url.getHost().endsWith(DOMAIN); | ||
} | ||
|
||
@Override | ||
public URL sanitizeURL(URL url) throws MalformedURLException { | ||
String u = url.toExternalForm(); | ||
u = u.replaceAll("\\?.*", ""); | ||
return new URL(u); | ||
} | ||
|
||
@Override | ||
public void rip() throws IOException { | ||
int index = 0; | ||
String nextURL = this.url.toExternalForm(); | ||
while (nextURL != null) { | ||
logger.info(" Retrieving " + nextURL); | ||
Document doc = Jsoup.connect(nextURL) | ||
.userAgent(USER_AGENT) | ||
.get(); | ||
try { | ||
Thread.sleep(SLEEP_TIME); | ||
} catch (InterruptedException e) { | ||
logger.error("[!] Interrupted while waiting for page to load", e); | ||
break; | ||
} | ||
for (Element thumb : doc.select("a.thumb img")) { | ||
String fullSize = thumbToFull(thumb.attr("src")); | ||
URL pageURL; | ||
try { | ||
pageURL = new URL(fullSize); | ||
} catch (MalformedURLException e) { | ||
logger.error("[!] Invalid thumbnail image: " + thumbToFull(fullSize)); | ||
continue; | ||
} | ||
index++; | ||
addURLToDownload(pageURL, String.format("%03d_", index)); | ||
} | ||
nextURL = null; | ||
for (Element nextButton : doc.select("a.away")) { | ||
if (nextButton.attr("href").contains("offset=" + index)) { | ||
nextURL = this.url.toExternalForm() + "?offset=" + index; | ||
} | ||
} | ||
} | ||
waitForThreads(); | ||
} | ||
|
||
public static String thumbToFull(String thumb) { | ||
thumb = thumb.replace("http://th", "http://fc"); | ||
List<String> fields = new ArrayList<String>(Arrays.asList(thumb.split("/"))); | ||
fields.remove(4); | ||
StringBuilder result = new StringBuilder(); | ||
for (int i = 0; i < fields.size(); i++) { | ||
if (i > 0) { | ||
result.append("/"); | ||
} | ||
result.append(fields.get(i)); | ||
} | ||
return result.toString(); | ||
} | ||
|
||
@Override | ||
public String getHost() { | ||
return HOST; | ||
} | ||
|
||
@Override | ||
public String getGID(URL url) throws MalformedURLException { | ||
Pattern p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com(/gallery)?/?$"); | ||
Matcher m = p.matcher(url.toExternalForm()); | ||
if (m.matches()) { | ||
// Root gallery | ||
return m.group(1); | ||
} | ||
p = Pattern.compile("^https?://([a-zA-Z0-9\\-]{1,})\\.deviantart\\.com/gallery/([0-9]{1,}).*$"); | ||
m = p.matcher(url.toExternalForm()); | ||
if (m.matches()) { | ||
// Subgallery | ||
return m.group(1) + "_" + m.group(2); | ||
} | ||
throw new MalformedURLException("Expected URL format: http://username.deviantart.com/[/gallery/#####], got: " + url); | ||
} | ||
|
||
/** | ||
* Logs into deviant art. Not required to rip NSFW images. | ||
* @return Map of cookies containing session data. | ||
*/ | ||
@SuppressWarnings("unused") | ||
private Map<String, String> loginToDeviantart() throws IOException { | ||
// Populate postData fields | ||
Map<String,String> postData = new HashMap<String,String>(); | ||
String username = Utils.getConfigString("deviantart.username", null); | ||
String password = Utils.getConfigString("deviantart.password", null); | ||
if (username == null || password == null) { | ||
throw new IOException("could not find username or password in config"); | ||
} | ||
Response resp = Jsoup.connect("http://www.deviantart.com/") | ||
.userAgent(USER_AGENT) | ||
.method(Method.GET) | ||
.execute(); | ||
for (Element input : resp.parse().select("form#form-login input[type=hidden]")) { | ||
postData.put(input.attr("name"), input.attr("value")); | ||
} | ||
postData.put("username", username); | ||
postData.put("password", password); | ||
postData.put("remember_me", "1"); | ||
|
||
// Send login request | ||
resp = Jsoup.connect("https://www.deviantart.com/users/login") | ||
.userAgent(USER_AGENT) | ||
.data(postData) | ||
.cookies(resp.cookies()) | ||
.method(Method.POST) | ||
.execute(); | ||
|
||
// Assert we are logged in | ||
if (resp.hasHeader("Location") && resp.header("Location").contains("password")) { | ||
// Wrong password | ||
throw new IOException("Wrong pasword"); | ||
} | ||
if (resp.url().toExternalForm().contains("bad_form")) { | ||
throw new IOException("Login form was incorrectly submitted"); | ||
} | ||
if (resp.cookie("auth_secure") == null || | ||
resp.cookie("auth") == null) { | ||
throw new IOException("No auth_secure or auth cookies received"); | ||
} | ||
// We are logged in, save the cookies | ||
return resp.cookies(); | ||
} | ||
|
||
} |
38 changes: 38 additions & 0 deletions
38
src/test/java/com/rarchives/ripme/tst/ripper/rippers/DeviantartRipperTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
package com.rarchives.ripme.tst.ripper.rippers; | ||
|
||
import java.io.IOException; | ||
import java.net.URL; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
import com.rarchives.ripme.ripper.rippers.DeviantartRipper; | ||
|
||
public class DeviantartRipperTest extends RippersTest { | ||
|
||
public void testDeviantartAlbums() throws IOException { | ||
if (!DOWNLOAD_CONTENT) { | ||
return; | ||
} | ||
List<URL> contentURLs = new ArrayList<URL>(); | ||
|
||
// Small gallery | ||
contentURLs.add(new URL("http://airgee.deviantart.com/gallery/")); | ||
// NSFW gallery | ||
contentURLs.add(new URL("http://faterkcx.deviantart.com/gallery/")); | ||
// Multi-page NSFW | ||
contentURLs.add(new URL("http://geekysica.deviantart.com/gallery/35209412")); | ||
|
||
for (URL url : contentURLs) { | ||
try { | ||
DeviantartRipper ripper = new DeviantartRipper(url); | ||
ripper.rip(); | ||
assert(ripper.getWorkingDir().listFiles().length > 1); | ||
deleteDir(ripper.getWorkingDir()); | ||
} catch (Exception e) { | ||
e.printStackTrace(); | ||
fail("Error while ripping URL " + url + ": " + e.getMessage()); | ||
} | ||
} | ||
} | ||
|
||
} |