package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.json.JSONArray; import org.json.JSONObject; import org.jsoup.Connection.Response; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AlbumRipper; public class PhotobucketRipper extends AlbumRipper { private static final String DOMAIN = "photobucket.com", HOST = "photobucket"; private Response pageResponse = null; public PhotobucketRipper(URL url) throws IOException { super(url); } @Override public String getHost() { return HOST; } public URL sanitizeURL(URL url) throws MalformedURLException { logger.info(url); String u = url.toExternalForm(); if (u.contains("?")) { u = u.substring(0, u.indexOf("?")); return new URL(u); } else { return url; } } public String getAlbumTitle(URL url) throws MalformedURLException { try { // Attempt to use album title as GID if (pageResponse == null) { pageResponse = Jsoup.connect(url.toExternalForm()).execute(); } Document albumDoc = pageResponse.parse(); Elements els = albumDoc.select("div.libraryTitle > h1"); if (els.size() == 0) { throw new IOException("Could not find libraryTitle at " + url); } return els.get(0).text(); } catch (IOException e) { // Fall back to default album naming convention } return super.getAlbumTitle(url); } @Override public String getGID(URL url) throws MalformedURLException { Pattern p; Matcher m; // http://s844.photobucket.com/user/SpazzySpizzy/library/Lady%20Gaga?sort=3&page=1 p = Pattern.compile("^https?://[a-zA-Z0-9]+\\.photobucket\\.com/user/([a-zA-Z0-9_\\-]+)/library.*$"); m = p.matcher(url.toExternalForm()); if (m.matches()) { return m.group(1); } throw new MalformedURLException( "Expected photobucket.com gallery formats: " + "http://x###.photobucket.com/username/library/..." + " Got: " + url); } @Override public void rip() throws IOException { List subalbums = ripAlbumAndGetSubalbums(this.url.toExternalForm()); List subsToRip = new ArrayList(), rippedSubs = new ArrayList(); for (String sub : subalbums) { subsToRip.add(sub); } while (subsToRip.size() > 0 && !isStopped()) { try { Thread.sleep(1000); } catch (InterruptedException e) { break; } String nextSub = subsToRip.remove(0); rippedSubs.add(nextSub); logger.info("Attempting to rip next subalbum: " + nextSub); try { pageResponse = null; subalbums = ripAlbumAndGetSubalbums(nextSub); } catch (IOException e) { logger.error("Error while ripping " + nextSub, e); break; } for (String subalbum : subalbums) { if (!subsToRip.contains(subalbum) && !rippedSubs.contains(subalbum)) { subsToRip.add(subalbum); } } } waitForThreads(); } public List ripAlbumAndGetSubalbums(String theUrl) throws IOException { int filesIndex = 0, filesTotal = 0, pageIndex = 0; String currentAlbumPath = null, url = null; while (pageIndex == 0 || filesIndex < filesTotal) { if (isStopped()) { break; } pageIndex++; if (pageIndex > 1 || pageResponse == null) { url = theUrl + String.format("?sort=3&page=", pageIndex); logger.info(" Retrieving " + url); pageResponse = Jsoup.connect(url).execute(); } Document albumDoc = pageResponse.parse(); // Retrieve JSON from request String jsonString = null; for (Element script : albumDoc.select("script[type=text/javascript]")) { String data = script.data(); if (!data.contains("libraryAlbumsPageCollectionData")) { continue; } // Ensure this chunk of javascript contains the album info // Grab the JSON Pattern p; Matcher m; p = Pattern.compile("^.*collectionData: (\\{.*\\}).*$", Pattern.DOTALL); m = p.matcher(data); if (m.matches()) { jsonString = m.group(1); break; } } if (jsonString == null) { logger.error("Unable to find JSON data at URL: " + url); break; } JSONObject json = new JSONObject(jsonString); JSONObject items = json.getJSONObject("items"); JSONArray objects = items.getJSONArray("objects"); filesTotal = items.getInt("total"); currentAlbumPath = json.getString("currentAlbumPath"); for (int i = 0; i < objects.length(); i++) { JSONObject object = objects.getJSONObject(i); String image = object.getString("fullsizeUrl"); filesIndex += 1; addURLToDownload(new URL(image), "", object.getString("location"), albumDoc.location(), pageResponse.cookies()); } } // Get subalbums if (url != null) { return getSubAlbums(url, currentAlbumPath); } else { return new ArrayList(); } } private List getSubAlbums(String url, String currentAlbumPath) { List result = new ArrayList(); String subdomain = url.substring(url.indexOf("://")+3); subdomain = subdomain.substring(0, subdomain.indexOf(".")); String apiUrl = "http://" + subdomain + ".photobucket.com/component/Albums-SubalbumList" + "?deferCollapsed=true" + "&albumPath=" + currentAlbumPath // %2Falbums%2Fab10%2FSpazzySpizzy" + "&json=1"; try { logger.info("Loading " + apiUrl); Document doc = Jsoup.connect(apiUrl) .ignoreContentType(true) .referrer(url) .get(); String jsonString = doc.body().html().replace(""", "\""); JSONObject json = new JSONObject(jsonString); JSONArray subalbums = json.getJSONObject("body").getJSONArray("subAlbums"); for (int i = 0; i < subalbums.length(); i++) { String suburl = "http://" + subdomain + ".photobucket.com" + subalbums.getJSONObject(i).getString("path"); suburl = suburl.replace(" ", "%20"); result.add(suburl); } } catch (IOException e) { logger.error("Failed to get subalbums from " + apiUrl, e); } return result; } public boolean canRip(URL url) { return url.getHost().endsWith(DOMAIN); } }