From 2875ee8d41904ddd7e93dd062b7912cb30b6ad35 Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Tue, 22 Apr 2014 20:49:11 -0700 Subject: [PATCH] 1.0.25 - Added photobucket ripper #8 --- pom.xml | 2 +- .../ripper/rippers/PhotobucketRipper.java | 221 ++++++++++++++++++ .../com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 223 insertions(+), 2 deletions(-) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java diff --git a/pom.xml b/pom.xml index 70c3d1453..87f32f0ef 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.0.24 + 1.0.25 ripme http://rip.rarchives.com diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java new file mode 100644 index 000000000..fe94eec47 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java @@ -0,0 +1,221 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.log4j.Logger; +import org.json.JSONArray; +import org.json.JSONObject; +import org.jsoup.Connection.Response; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import com.rarchives.ripme.ripper.AlbumRipper; + +public class PhotobucketRipper extends AlbumRipper { + + private static final String DOMAIN = "photobucket.com", + HOST = "photobucket"; + private static final Logger logger = Logger.getLogger(PhotobucketRipper.class); + + private Response pageResponse = null; + + public PhotobucketRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return HOST; + } + + public URL sanitizeURL(URL url) throws MalformedURLException { + logger.info(url); + String u = url.toExternalForm(); + if (u.contains("?")) { + u = u.substring(0, u.indexOf("?")); + return new URL(u); + } + else { + return url; + } + } + + public String getAlbumTitle(URL url) throws MalformedURLException { + try { + // Attempt to use album title as GID + if (pageResponse == null) { + pageResponse = Jsoup.connect(url.toExternalForm()).execute(); + } + Document albumDoc = pageResponse.parse(); + Elements els = albumDoc.select("div.libraryTitle > h1"); + if (els.size() == 0) { + throw new IOException("Could not find libraryTitle at " + url); + } + return els.get(0).text(); + } catch (IOException e) { + // Fall back to default album naming convention + } + return super.getAlbumTitle(url); + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p; Matcher m; + + // http://s844.photobucket.com/user/SpazzySpizzy/library/Lady%20Gaga?sort=3&page=1 + p = Pattern.compile("^https?://[a-zA-Z0-9]+\\.photobucket\\.com/user/([a-zA-Z0-9_\\-]+)/library.*$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + + throw new MalformedURLException( + "Expected photobucket.com gallery formats: " + + "http://x###.photobucket.com/username/library/..." + + " Got: " + url); + } + + @Override + public void rip() throws IOException { + List subalbums = ripAlbumAndGetSubalbums(this.url.toExternalForm()); + + List subsToRip = new ArrayList(), + rippedSubs = new ArrayList(); + + for (String sub : subalbums) { + subsToRip.add(sub); + } + + while (subsToRip.size() > 0 && !isStopped()) { + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + break; + } + String nextSub = subsToRip.remove(0); + rippedSubs.add(nextSub); + logger.info("Attempting to rip next subalbum: " + nextSub); + try { + pageResponse = null; + subalbums = ripAlbumAndGetSubalbums(nextSub); + } catch (IOException e) { + logger.error("Error while ripping " + nextSub, e); + break; + } + for (String subalbum : subalbums) { + if (!subsToRip.contains(subalbum) && !rippedSubs.contains(subalbum)) { + subsToRip.add(subalbum); + } + } + } + waitForThreads(); + } + + public List ripAlbumAndGetSubalbums(String theUrl) throws IOException { + int filesIndex = 0, + filesTotal = 0, + pageIndex = 0; + String currentAlbumPath = null, + url = null; + + while (pageIndex == 0 || filesIndex < filesTotal) { + if (isStopped()) { + break; + } + pageIndex++; + if (pageIndex > 1 || pageResponse == null) { + url = theUrl + String.format("?sort=3&page=", pageIndex); + logger.info(" Retrieving " + url); + pageResponse = Jsoup.connect(url).execute(); + } + Document albumDoc = pageResponse.parse(); + // Retrieve JSON from request + String jsonString = null; + for (Element script : albumDoc.select("script[type=text/javascript]")) { + String data = script.data(); + if (!data.contains("libraryAlbumsPageCollectionData")) { + continue; + } + // Ensure this chunk of javascript contains the album info + // Grab the JSON + Pattern p; Matcher m; + p = Pattern.compile("^.*collectionData: (\\{.*\\}).*$", Pattern.DOTALL); + m = p.matcher(data); + if (m.matches()) { + jsonString = m.group(1); + break; + } + } + if (jsonString == null) { + logger.error("Unable to find JSON data at URL: " + url); + break; + } + JSONObject json = new JSONObject(jsonString); + JSONObject items = json.getJSONObject("items"); + JSONArray objects = items.getJSONArray("objects"); + filesTotal = items.getInt("total"); + currentAlbumPath = json.getString("currentAlbumPath"); + for (int i = 0; i < objects.length(); i++) { + JSONObject object = objects.getJSONObject(i); + String image = object.getString("fullsizeUrl"); + filesIndex += 1; + addURLToDownload(new URL(image), + "", + object.getString("location"), + albumDoc.location(), + pageResponse.cookies()); + } + } + // Get subalbums + if (url != null) { + return getSubAlbums(url, currentAlbumPath); + } else { + return new ArrayList(); + } + } + + private List getSubAlbums(String url, String currentAlbumPath) { + List result = new ArrayList(); + String subdomain = url.substring(url.indexOf("://")+3); + subdomain = subdomain.substring(0, subdomain.indexOf(".")); + String apiUrl = "http://" + subdomain + ".photobucket.com/component/Albums-SubalbumList" + + "?deferCollapsed=true" + + "&albumPath=" + currentAlbumPath // %2Falbums%2Fab10%2FSpazzySpizzy" + + "&json=1"; + try { + logger.info("Loading " + apiUrl); + Document doc = Jsoup.connect(apiUrl) + .ignoreContentType(true) + .referrer(url) + .get(); + String jsonString = doc.body().html().replace(""", "\""); + JSONObject json = new JSONObject(jsonString); + JSONArray subalbums = json.getJSONObject("body").getJSONArray("subAlbums"); + for (int i = 0; i < subalbums.length(); i++) { + String suburl = + "http://" + + subdomain + + ".photobucket.com" + + subalbums.getJSONObject(i).getString("path"); + suburl = suburl.replace(" ", "%20"); + result.add(suburl); + } + } catch (IOException e) { + logger.error("Failed to get subalbums from " + apiUrl, e); + } + return result; + } + + public boolean canRip(URL url) { + return url.getHost().endsWith(DOMAIN); + } + +} \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index 3773a6534..031985c72 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -19,7 +19,7 @@ public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.0.24"; + private static final String DEFAULT_VERSION = "1.0.25"; private static final String updateJsonURL = "http://rarchives.com/ripme.json"; private static final String updateJarURL = "http://rarchives.com/ripme.jar"; private static final String mainFileName = "ripme.jar";